MDEV-26200 buf_pool.flush_list corrupted by buffer pool resizing or ROW_FORMAT=COMPRESSED

The lazy deletion of clean blocks from buf_pool.flush_list that was
introduced in commit 6441bc614a (MDEV-25113)
introduced a race condition around the function
buf_flush_relocate_on_flush_list().

The test innodb_zip.wl5522_debug_zip as well as the buffer pool
resizing tests would occasionally fail in debug builds due to
buf_pool.flush_list.count disagreeing with the actual length of the
doubly-linked list.

The safe procedure for relocating a block in buf_pool.flush_list should be
as follows, now that we implement lazy deletion from buf_pool.flush_list:

1. Acquire buf_pool.mutex.
2. Acquire the exclusive buf_pool.page_hash.latch.
3. Acquire buf_pool.flush_list_mutex.
4. Copy the block descriptor.
5. Invoke buf_flush_relocate_on_flush_list().
6. Release buf_pool.flush_list_mutex.

buf_flush_relocate_on_flush_list(): Assert that
buf_pool.flush_list_mutex is being held. Invoke
buf_page_t::oldest_modification() only once, using
std::memory_order_relaxed, now that the mutex protects us.

buf_LRU_free_page(), buf_LRU_block_remove_hashed(): Avoid
an unlock-lock cycle on hash_lock. (We must not acquire hash_lock
while already holding buf_pool.flush_list_mutex, because that
could lead to a deadlock due to latching order violation.)
This commit is contained in:
Marko Mäkelä 2021-07-22 08:34:49 +03:00
parent 316a8cebf5
commit bf435a3f4d
3 changed files with 25 additions and 26 deletions

View file

@ -1566,6 +1566,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
if (block->page.can_relocate()) {
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
new_block->frame, block->frame, srv_page_size);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (&new_block->page) buf_page_t(block->page);
/* relocate LRU list */
@ -1625,6 +1626,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
buf_flush_relocate_on_flush_list(&block->page,
&new_block->page);
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
block->page.set_corrupt_id();
/* set other flags of buf_block_t */
@ -3131,12 +3133,14 @@ evict_from_pool:
/* Note: this is the uncompressed block and it is not
accessible by other threads yet because it is not in
any list or hash table */
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(bpage, &block->page);
/* Set after buf_relocate(). */
block->page.set_buf_fix_count(1);
buf_flush_relocate_on_flush_list(bpage, &block->page);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
/* Buffer-fix, I/O-fix, and X-latch the block
for the duration of the decompression.
@ -3646,8 +3650,10 @@ loop:
}
rw_lock_x_lock(&free_block->lock);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_relocate(&block->page, &free_block->page);
buf_flush_relocate_on_flush_list(&block->page, &free_block->page);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
buf_unzip_LRU_add_block(free_block, FALSE);

View file

@ -286,43 +286,29 @@ buf_flush_relocate_on_flush_list(
{
buf_page_t* prev;
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
const lsn_t lsn = bpage->oldest_modification_acquire();
const lsn_t lsn = bpage->oldest_modification();
if (!lsn) {
return;
}
ut_ad(lsn == 1 || lsn > 2);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
/* FIXME: Can we avoid holding buf_pool.mutex here? */
ut_ad(dpage->oldest_modification() == lsn);
if (ut_d(const lsn_t o_lsn =) bpage->oldest_modification()) {
ut_ad(o_lsn == lsn);
/* Important that we adjust the hazard pointer before removing
the bpage from the flush list. */
buf_pool.flush_hp.adjust(bpage);
/* Important that we adjust the hazard pointer before removing
the bpage from the flush list. */
buf_pool.flush_hp.adjust(bpage);
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
prev = UT_LIST_GET_PREV(list, bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
bpage->clear_oldest_modification();
} else {
/* bpage was removed from buf_pool.flush_list
since we last checked, and before we acquired
buf_pool.flush_list_mutex. */
goto was_clean;
}
bpage->clear_oldest_modification();
if (lsn == 1) {
buf_pool.stat.flush_list_bytes -= dpage->physical_size();
was_clean:
dpage->list.prev = nullptr;
dpage->list.next = nullptr;
dpage->clear_oldest_modification();
@ -334,7 +320,6 @@ was_clean:
}
ut_d(buf_flush_validate_low());
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
/** Complete write of a file page from buf_pool.

View file

@ -845,6 +845,7 @@ func_exit:
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
b = buf_page_alloc_descriptor();
ut_a(b);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
new (b) buf_page_t(*bpage);
b->set_state(BUF_BLOCK_ZIP_PAGE);
}
@ -859,6 +860,8 @@ func_exit:
ut_ad(bpage->can_relocate());
if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) {
ut_ad(!b);
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
return(true);
}
@ -872,8 +875,6 @@ func_exit:
if (UNIV_LIKELY_NULL(b)) {
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
hash_lock->write_lock();
ut_ad(!buf_pool.page_hash_get_low(id, fold));
ut_ad(b->zip_size());
@ -940,6 +941,7 @@ func_exit:
}
buf_flush_relocate_on_flush_list(bpage, b);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
bpage->zip.data = nullptr;
@ -950,6 +952,8 @@ func_exit:
hash_lock. */
b->set_io_fix(BUF_IO_PIN);
hash_lock->write_unlock();
} else if (!zip) {
hash_lock->write_unlock();
}
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
@ -1182,6 +1186,10 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size);
bpage->set_state(BUF_BLOCK_REMOVE_HASH);
if (!zip) {
return true;
}
/* Question: If we release hash_lock here
then what protects us against:
1) Some other thread buffer fixing this page
@ -1203,7 +1211,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
page_hash. */
hash_lock->write_unlock();
if (zip && bpage->zip.data) {
if (bpage->zip.data) {
/* Free the compressed page. */
void* data = bpage->zip.data;
bpage->zip.data = NULL;