mirror of
https://github.com/MariaDB/server.git
synced 2025-01-17 12:32:27 +01:00
MDEV-26200 buf_pool.flush_list corrupted by buffer pool resizing or ROW_FORMAT=COMPRESSED
The lazy deletion of clean blocks from buf_pool.flush_list that was
introduced in commit 6441bc614a
(MDEV-25113)
introduced a race condition around the function
buf_flush_relocate_on_flush_list().
The test innodb_zip.wl5522_debug_zip as well as the buffer pool
resizing tests would occasionally fail in debug builds due to
buf_pool.flush_list.count disagreeing with the actual length of the
doubly-linked list.
The safe procedure for relocating a block in buf_pool.flush_list should be
as follows, now that we implement lazy deletion from buf_pool.flush_list:
1. Acquire buf_pool.mutex.
2. Acquire the exclusive buf_pool.page_hash.latch.
3. Acquire buf_pool.flush_list_mutex.
4. Copy the block descriptor.
5. Invoke buf_flush_relocate_on_flush_list().
6. Release buf_pool.flush_list_mutex.
buf_flush_relocate_on_flush_list(): Assert that
buf_pool.flush_list_mutex is being held. Invoke
buf_page_t::oldest_modification() only once, using
std::memory_order_relaxed, now that the mutex protects us.
buf_LRU_free_page(), buf_LRU_block_remove_hashed(): Avoid
an unlock-lock cycle on hash_lock. (We must not acquire hash_lock
while already holding buf_pool.flush_list_mutex, because that
could lead to a deadlock due to latching order violation.)
This commit is contained in:
parent
316a8cebf5
commit
bf435a3f4d
3 changed files with 25 additions and 26 deletions
|
@ -1566,6 +1566,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
|
|||
if (block->page.can_relocate()) {
|
||||
memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
|
||||
new_block->frame, block->frame, srv_page_size);
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
new (&new_block->page) buf_page_t(block->page);
|
||||
|
||||
/* relocate LRU list */
|
||||
|
@ -1625,6 +1626,7 @@ inline bool buf_pool_t::realloc(buf_block_t *block)
|
|||
buf_flush_relocate_on_flush_list(&block->page,
|
||||
&new_block->page);
|
||||
}
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
block->page.set_corrupt_id();
|
||||
|
||||
/* set other flags of buf_block_t */
|
||||
|
@ -3131,12 +3133,14 @@ evict_from_pool:
|
|||
/* Note: this is the uncompressed block and it is not
|
||||
accessible by other threads yet because it is not in
|
||||
any list or hash table */
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
buf_relocate(bpage, &block->page);
|
||||
|
||||
/* Set after buf_relocate(). */
|
||||
block->page.set_buf_fix_count(1);
|
||||
|
||||
buf_flush_relocate_on_flush_list(bpage, &block->page);
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
/* Buffer-fix, I/O-fix, and X-latch the block
|
||||
for the duration of the decompression.
|
||||
|
@ -3646,8 +3650,10 @@ loop:
|
|||
}
|
||||
|
||||
rw_lock_x_lock(&free_block->lock);
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
buf_relocate(&block->page, &free_block->page);
|
||||
buf_flush_relocate_on_flush_list(&block->page, &free_block->page);
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
free_block->page.set_state(BUF_BLOCK_FILE_PAGE);
|
||||
buf_unzip_LRU_add_block(free_block, FALSE);
|
||||
|
|
|
@ -286,43 +286,29 @@ buf_flush_relocate_on_flush_list(
|
|||
{
|
||||
buf_page_t* prev;
|
||||
|
||||
mysql_mutex_assert_owner(&buf_pool.mutex);
|
||||
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
|
||||
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
|
||||
|
||||
const lsn_t lsn = bpage->oldest_modification_acquire();
|
||||
const lsn_t lsn = bpage->oldest_modification();
|
||||
|
||||
if (!lsn) {
|
||||
return;
|
||||
}
|
||||
|
||||
ut_ad(lsn == 1 || lsn > 2);
|
||||
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
|
||||
/* FIXME: Can we avoid holding buf_pool.mutex here? */
|
||||
ut_ad(dpage->oldest_modification() == lsn);
|
||||
|
||||
if (ut_d(const lsn_t o_lsn =) bpage->oldest_modification()) {
|
||||
ut_ad(o_lsn == lsn);
|
||||
/* Important that we adjust the hazard pointer before removing
|
||||
the bpage from the flush list. */
|
||||
buf_pool.flush_hp.adjust(bpage);
|
||||
|
||||
/* Important that we adjust the hazard pointer before removing
|
||||
the bpage from the flush list. */
|
||||
buf_pool.flush_hp.adjust(bpage);
|
||||
prev = UT_LIST_GET_PREV(list, bpage);
|
||||
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
|
||||
|
||||
prev = UT_LIST_GET_PREV(list, bpage);
|
||||
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
|
||||
|
||||
bpage->clear_oldest_modification();
|
||||
} else {
|
||||
/* bpage was removed from buf_pool.flush_list
|
||||
since we last checked, and before we acquired
|
||||
buf_pool.flush_list_mutex. */
|
||||
goto was_clean;
|
||||
}
|
||||
bpage->clear_oldest_modification();
|
||||
|
||||
if (lsn == 1) {
|
||||
buf_pool.stat.flush_list_bytes -= dpage->physical_size();
|
||||
was_clean:
|
||||
dpage->list.prev = nullptr;
|
||||
dpage->list.next = nullptr;
|
||||
dpage->clear_oldest_modification();
|
||||
|
@ -334,7 +320,6 @@ was_clean:
|
|||
}
|
||||
|
||||
ut_d(buf_flush_validate_low());
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
}
|
||||
|
||||
/** Complete write of a file page from buf_pool.
|
||||
|
|
|
@ -845,6 +845,7 @@ func_exit:
|
|||
} else if (bpage->state() == BUF_BLOCK_FILE_PAGE) {
|
||||
b = buf_page_alloc_descriptor();
|
||||
ut_a(b);
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
new (b) buf_page_t(*bpage);
|
||||
b->set_state(BUF_BLOCK_ZIP_PAGE);
|
||||
}
|
||||
|
@ -859,6 +860,8 @@ func_exit:
|
|||
ut_ad(bpage->can_relocate());
|
||||
|
||||
if (!buf_LRU_block_remove_hashed(bpage, id, hash_lock, zip)) {
|
||||
ut_ad(!b);
|
||||
mysql_mutex_assert_not_owner(&buf_pool.flush_list_mutex);
|
||||
return(true);
|
||||
}
|
||||
|
||||
|
@ -872,8 +875,6 @@ func_exit:
|
|||
if (UNIV_LIKELY_NULL(b)) {
|
||||
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
|
||||
|
||||
hash_lock->write_lock();
|
||||
|
||||
ut_ad(!buf_pool.page_hash_get_low(id, fold));
|
||||
ut_ad(b->zip_size());
|
||||
|
||||
|
@ -940,6 +941,7 @@ func_exit:
|
|||
}
|
||||
|
||||
buf_flush_relocate_on_flush_list(bpage, b);
|
||||
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
|
||||
|
||||
bpage->zip.data = nullptr;
|
||||
|
||||
|
@ -950,6 +952,8 @@ func_exit:
|
|||
hash_lock. */
|
||||
b->set_io_fix(BUF_IO_PIN);
|
||||
hash_lock->write_unlock();
|
||||
} else if (!zip) {
|
||||
hash_lock->write_unlock();
|
||||
}
|
||||
|
||||
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
|
||||
|
@ -1182,6 +1186,10 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
|
|||
MEM_UNDEFINED(((buf_block_t*) bpage)->frame, srv_page_size);
|
||||
bpage->set_state(BUF_BLOCK_REMOVE_HASH);
|
||||
|
||||
if (!zip) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Question: If we release hash_lock here
|
||||
then what protects us against:
|
||||
1) Some other thread buffer fixing this page
|
||||
|
@ -1203,7 +1211,7 @@ static bool buf_LRU_block_remove_hashed(buf_page_t *bpage, const page_id_t id,
|
|||
page_hash. */
|
||||
hash_lock->write_unlock();
|
||||
|
||||
if (zip && bpage->zip.data) {
|
||||
if (bpage->zip.data) {
|
||||
/* Free the compressed page. */
|
||||
void* data = bpage->zip.data;
|
||||
bpage->zip.data = NULL;
|
||||
|
|
Loading…
Reference in a new issue