diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index f07ecedf535..ca9c71938c1 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1797,6 +1797,28 @@ ulint buf_flush_LRU(ulint max_n, bool evict) buf_pool.try_LRU_scan= true; pthread_cond_broadcast(&buf_pool.done_free); } + else if (!pages && !buf_pool.try_LRU_scan && + buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) + { + /* For example, with the minimum innodb_buffer_pool_size=5M and + the default innodb_page_size=16k there are only a little over 316 + pages in the buffer pool. The buffer pool can easily be exhausted + by a workload of some dozen concurrent connections. The system could + reach a deadlock like the following: + + (1) Many threads are waiting in buf_LRU_get_free_block() + for buf_pool.done_free. + (2) Some threads are waiting for a page latch which is held by + another thread that is waiting in buf_LRU_get_free_block(). + (3) This thread is the only one that could make progress, but + we fail to do so because all the pages that we scanned are + buffer-fixed or latched by some thread. */ + sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" + " %zu blocks are in use and %zu free." + " Consider increasing innodb_buffer_pool_size.", + UT_LIST_GET_LEN(buf_pool.LRU), + UT_LIST_GET_LEN(buf_pool.free)); + } return pages; } @@ -2287,6 +2309,16 @@ func_exit: goto func_exit; } +TPOOL_SUPPRESS_TSAN +bool buf_pool_t::need_LRU_eviction() const +{ + /* try_LRU_scan==false means that buf_LRU_get_free_block() is waiting + for buf_flush_page_cleaner() to evict some blocks */ + return UNIV_UNLIKELY(!try_LRU_scan || + (UT_LIST_GET_LEN(LRU) > BUF_LRU_MIN_LEN && + UT_LIST_GET_LEN(free) < srv_LRU_scan_depth / 2)); +} + /** page_cleaner thread tasked with flushing dirty pages from the buffer pools. As of now we'll have only one coordinator. */ static void buf_flush_page_cleaner() @@ -2319,21 +2351,24 @@ static void buf_flush_page_cleaner() } mysql_mutex_lock(&buf_pool.flush_list_mutex); - if (buf_pool.ran_out()) - goto no_wait; - else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) - break; + if (!buf_pool.need_LRU_eviction()) + { + if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) + break; - if (buf_pool.page_cleaner_idle() && - (!UT_LIST_GET_LEN(buf_pool.flush_list) || - srv_max_dirty_pages_pct_lwm == 0.0)) - /* We are idle; wait for buf_pool.page_cleaner_wakeup() */ - my_cond_wait(&buf_pool.do_flush_list, - &buf_pool.flush_list_mutex.m_mutex); - else - my_cond_timedwait(&buf_pool.do_flush_list, - &buf_pool.flush_list_mutex.m_mutex, &abstime); - no_wait: + if (buf_pool.page_cleaner_idle() && + (!UT_LIST_GET_LEN(buf_pool.flush_list) || + srv_max_dirty_pages_pct_lwm == 0.0)) + { + buf_pool.LRU_warned.clear(std::memory_order_release); + /* We are idle; wait for buf_pool.page_cleaner_wakeup() */ + my_cond_wait(&buf_pool.do_flush_list, + &buf_pool.flush_list_mutex.m_mutex); + } + else + my_cond_timedwait(&buf_pool.do_flush_list, + &buf_pool.flush_list_mutex.m_mutex, &abstime); + } set_timespec(abstime, 1); lsn_limit= buf_flush_sync_lsn; @@ -2365,7 +2400,7 @@ static void buf_flush_page_cleaner() } while (false); - if (!buf_pool.ran_out()) + if (!buf_pool.need_LRU_eviction()) continue; mysql_mutex_lock(&buf_pool.flush_list_mutex); oldest_lsn= buf_pool.get_oldest_modification(0); @@ -2394,7 +2429,7 @@ static void buf_flush_page_cleaner() if (oldest_lsn >= soft_lsn_limit) buf_flush_async_lsn= soft_lsn_limit= 0; } - else if (buf_pool.ran_out()) + else if (buf_pool.need_LRU_eviction()) { buf_pool.page_cleaner_set_idle(false); buf_pool.n_flush_inc(); @@ -2509,9 +2544,11 @@ static void buf_flush_page_cleaner() MONITOR_FLUSH_ADAPTIVE_PAGES, n_flushed); } - else if (buf_flush_async_lsn <= oldest_lsn) + else if (buf_flush_async_lsn <= oldest_lsn && + !buf_pool.need_LRU_eviction()) goto check_oldest_and_set_idle; + n= srv_max_io_capacity; n= n >= n_flushed ? n - n_flushed : 0; goto LRU_flush; } diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index b80b99a9b12..ce37209f154 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -60,10 +60,6 @@ static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20; frames in the buffer pool, we set this to TRUE */ static bool buf_lru_switched_on_innodb_mon = false; -/** True if diagnostic message about difficult to find free blocks -in the buffer bool has already printed. */ -static bool buf_lru_free_blocks_error_printed; - /******************************************************************//** These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O and page_zip_decompress() operations. Based on the statistics, @@ -408,6 +404,7 @@ got_mutex: buf_LRU_check_size_of_non_data_objects(); buf_block_t* block; + IF_DBUG(static bool buf_lru_free_blocks_error_printed,); DBUG_EXECUTE_IF("ib_lru_force_no_free_page", if (!buf_lru_free_blocks_error_printed) { n_iterations = 21; @@ -417,9 +414,25 @@ retry: /* If there is a block in the free list, take it */ if ((block = buf_LRU_get_free_only()) != nullptr) { got_block: + const ulint LRU_size = UT_LIST_GET_LEN(buf_pool.LRU); + const ulint available = UT_LIST_GET_LEN(buf_pool.free); + const ulint scan_depth = srv_LRU_scan_depth / 2; + ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth + || buf_pool.need_LRU_eviction()); + if (!have_mutex) { mysql_mutex_unlock(&buf_pool.mutex); } + + if (UNIV_UNLIKELY(available < scan_depth) + && LRU_size > BUF_LRU_MIN_LEN) { + mysql_mutex_lock(&buf_pool.flush_list_mutex); + if (!buf_pool.page_cleaner_active()) { + buf_pool.page_cleaner_wakeup(true); + } + mysql_mutex_unlock(&buf_pool.flush_list_mutex); + } + block->page.zip.clear(); return block; } @@ -445,10 +458,11 @@ got_block: if ((block = buf_LRU_get_free_only()) != nullptr) { goto got_block; } + const bool wake = buf_pool.need_LRU_eviction(); mysql_mutex_unlock(&buf_pool.mutex); mysql_mutex_lock(&buf_pool.flush_list_mutex); const auto n_flush = buf_pool.n_flush(); - if (!buf_pool.try_LRU_scan) { + if (wake && !buf_pool.page_cleaner_active()) { buf_pool.page_cleaner_wakeup(true); } mysql_mutex_unlock(&buf_pool.flush_list_mutex); @@ -467,9 +481,10 @@ not_found: MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS ); } - if (n_iterations == 21 && !buf_lru_free_blocks_error_printed - && srv_buf_pool_old_size == srv_buf_pool_size) { - buf_lru_free_blocks_error_printed = true; + if (n_iterations == 21 + && srv_buf_pool_old_size == srv_buf_pool_size + && buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) { + IF_DBUG(buf_lru_free_blocks_error_printed = true,); mysql_mutex_unlock(&buf_pool.mutex); ib::warn() << "Difficult to find free blocks in the buffer pool" " (" << n_iterations << " search iterations)! " diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 5626af17bb7..4986e83cfb9 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1488,10 +1488,8 @@ public: n_chunks_new / 4 * chunks->size; } - /** @return whether the buffer pool has run out */ - TPOOL_SUPPRESS_TSAN - bool ran_out() const - { return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); } + /** @return whether the buffer pool is running low */ + bool need_LRU_eviction() const; /** @return whether the buffer pool is shrinking */ inline bool is_shrinking() const @@ -1811,6 +1809,9 @@ public: Set whenever the free list grows, along with a broadcast of done_free. Protected by buf_pool.mutex. */ Atomic_relaxed try_LRU_scan; + /** Whether we have warned to be running out of buffer pool */ + std::atomic_flag LRU_warned; + /* @} */ /** @name LRU replacement algorithm fields */