MDEV-33053 InnoDB LRU flushing does not run before running out of buffer pool

buf_flush_LRU(): Display a warning if no pages could be evicted and
no writes initiated.

buf_pool_t::need_LRU_eviction(): Renamed from buf_pool_t::ran_out().
Check if the amount of free pages is smaller than innodb_lru_scan_depth
instead of checking if it is 0.

buf_flush_page_cleaner(): For the final LRU flush after a checkpoint
flush, use a "budget" of innodb_io_capacity_max, like we do in the
case when we are not in "furious" checkpoint flushing.

Co-developed by: Debarun Banerjee
Reviewed by: Debarun Banerjee
Tested by: Matthias Leich
This commit is contained in:
Marko Mäkelä 2024-01-19 12:40:16 +02:00
parent 16f2f8e5a7
commit d34479dc66
3 changed files with 82 additions and 29 deletions

View file

@ -1797,6 +1797,28 @@ ulint buf_flush_LRU(ulint max_n, bool evict)
buf_pool.try_LRU_scan= true;
pthread_cond_broadcast(&buf_pool.done_free);
}
else if (!pages && !buf_pool.try_LRU_scan &&
buf_pool.LRU_warned.test_and_set(std::memory_order_acquire))
{
/* For example, with the minimum innodb_buffer_pool_size=5M and
the default innodb_page_size=16k there are only a little over 316
pages in the buffer pool. The buffer pool can easily be exhausted
by a workload of some dozen concurrent connections. The system could
reach a deadlock like the following:
(1) Many threads are waiting in buf_LRU_get_free_block()
for buf_pool.done_free.
(2) Some threads are waiting for a page latch which is held by
another thread that is waiting in buf_LRU_get_free_block().
(3) This thread is the only one that could make progress, but
we fail to do so because all the pages that we scanned are
buffer-fixed or latched by some thread. */
sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!"
" %zu blocks are in use and %zu free."
" Consider increasing innodb_buffer_pool_size.",
UT_LIST_GET_LEN(buf_pool.LRU),
UT_LIST_GET_LEN(buf_pool.free));
}
return pages;
}
@ -2287,6 +2309,16 @@ func_exit:
goto func_exit;
}
TPOOL_SUPPRESS_TSAN
bool buf_pool_t::need_LRU_eviction() const
{
/* try_LRU_scan==false means that buf_LRU_get_free_block() is waiting
for buf_flush_page_cleaner() to evict some blocks */
return UNIV_UNLIKELY(!try_LRU_scan ||
(UT_LIST_GET_LEN(LRU) > BUF_LRU_MIN_LEN &&
UT_LIST_GET_LEN(free) < srv_LRU_scan_depth / 2));
}
/** page_cleaner thread tasked with flushing dirty pages from the buffer
pools. As of now we'll have only one coordinator. */
static void buf_flush_page_cleaner()
@ -2319,21 +2351,24 @@ static void buf_flush_page_cleaner()
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (buf_pool.ran_out())
goto no_wait;
else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
if (!buf_pool.need_LRU_eviction())
{
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED)
break;
if (buf_pool.page_cleaner_idle() &&
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
srv_max_dirty_pages_pct_lwm == 0.0))
/* We are idle; wait for buf_pool.page_cleaner_wakeup() */
my_cond_wait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex);
else
my_cond_timedwait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
no_wait:
if (buf_pool.page_cleaner_idle() &&
(!UT_LIST_GET_LEN(buf_pool.flush_list) ||
srv_max_dirty_pages_pct_lwm == 0.0))
{
buf_pool.LRU_warned.clear(std::memory_order_release);
/* We are idle; wait for buf_pool.page_cleaner_wakeup() */
my_cond_wait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex);
}
else
my_cond_timedwait(&buf_pool.do_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
}
set_timespec(abstime, 1);
lsn_limit= buf_flush_sync_lsn;
@ -2365,7 +2400,7 @@ static void buf_flush_page_cleaner()
}
while (false);
if (!buf_pool.ran_out())
if (!buf_pool.need_LRU_eviction())
continue;
mysql_mutex_lock(&buf_pool.flush_list_mutex);
oldest_lsn= buf_pool.get_oldest_modification(0);
@ -2394,7 +2429,7 @@ static void buf_flush_page_cleaner()
if (oldest_lsn >= soft_lsn_limit)
buf_flush_async_lsn= soft_lsn_limit= 0;
}
else if (buf_pool.ran_out())
else if (buf_pool.need_LRU_eviction())
{
buf_pool.page_cleaner_set_idle(false);
buf_pool.n_flush_inc();
@ -2509,9 +2544,11 @@ static void buf_flush_page_cleaner()
MONITOR_FLUSH_ADAPTIVE_PAGES,
n_flushed);
}
else if (buf_flush_async_lsn <= oldest_lsn)
else if (buf_flush_async_lsn <= oldest_lsn &&
!buf_pool.need_LRU_eviction())
goto check_oldest_and_set_idle;
n= srv_max_io_capacity;
n= n >= n_flushed ? n - n_flushed : 0;
goto LRU_flush;
}

View file

@ -60,10 +60,6 @@ static constexpr ulint BUF_LRU_OLD_TOLERANCE = 20;
frames in the buffer pool, we set this to TRUE */
static bool buf_lru_switched_on_innodb_mon = false;
/** True if diagnostic message about difficult to find free blocks
in the buffer bool has already printed. */
static bool buf_lru_free_blocks_error_printed;
/******************************************************************//**
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
and page_zip_decompress() operations. Based on the statistics,
@ -408,6 +404,7 @@ got_mutex:
buf_LRU_check_size_of_non_data_objects();
buf_block_t* block;
IF_DBUG(static bool buf_lru_free_blocks_error_printed,);
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
if (!buf_lru_free_blocks_error_printed) {
n_iterations = 21;
@ -417,9 +414,25 @@ retry:
/* If there is a block in the free list, take it */
if ((block = buf_LRU_get_free_only()) != nullptr) {
got_block:
const ulint LRU_size = UT_LIST_GET_LEN(buf_pool.LRU);
const ulint available = UT_LIST_GET_LEN(buf_pool.free);
const ulint scan_depth = srv_LRU_scan_depth / 2;
ut_ad(LRU_size <= BUF_LRU_MIN_LEN || available >= scan_depth
|| buf_pool.need_LRU_eviction());
if (!have_mutex) {
mysql_mutex_unlock(&buf_pool.mutex);
}
if (UNIV_UNLIKELY(available < scan_depth)
&& LRU_size > BUF_LRU_MIN_LEN) {
mysql_mutex_lock(&buf_pool.flush_list_mutex);
if (!buf_pool.page_cleaner_active()) {
buf_pool.page_cleaner_wakeup(true);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
block->page.zip.clear();
return block;
}
@ -445,10 +458,11 @@ got_block:
if ((block = buf_LRU_get_free_only()) != nullptr) {
goto got_block;
}
const bool wake = buf_pool.need_LRU_eviction();
mysql_mutex_unlock(&buf_pool.mutex);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const auto n_flush = buf_pool.n_flush();
if (!buf_pool.try_LRU_scan) {
if (wake && !buf_pool.page_cleaner_active()) {
buf_pool.page_cleaner_wakeup(true);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
@ -467,9 +481,10 @@ not_found:
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
}
if (n_iterations == 21 && !buf_lru_free_blocks_error_printed
&& srv_buf_pool_old_size == srv_buf_pool_size) {
buf_lru_free_blocks_error_printed = true;
if (n_iterations == 21
&& srv_buf_pool_old_size == srv_buf_pool_size
&& buf_pool.LRU_warned.test_and_set(std::memory_order_acquire)) {
IF_DBUG(buf_lru_free_blocks_error_printed = true,);
mysql_mutex_unlock(&buf_pool.mutex);
ib::warn() << "Difficult to find free blocks in the buffer pool"
" (" << n_iterations << " search iterations)! "

View file

@ -1488,10 +1488,8 @@ public:
n_chunks_new / 4 * chunks->size;
}
/** @return whether the buffer pool has run out */
TPOOL_SUPPRESS_TSAN
bool ran_out() const
{ return UNIV_UNLIKELY(!try_LRU_scan || !UT_LIST_GET_LEN(free)); }
/** @return whether the buffer pool is running low */
bool need_LRU_eviction() const;
/** @return whether the buffer pool is shrinking */
inline bool is_shrinking() const
@ -1811,6 +1809,9 @@ public:
Set whenever the free list grows, along with a broadcast of done_free.
Protected by buf_pool.mutex. */
Atomic_relaxed<bool> try_LRU_scan;
/** Whether we have warned to be running out of buffer pool */
std::atomic_flag LRU_warned;
/* @} */
/** @name LRU replacement algorithm fields */