mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
Bug #58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
and compressed tables buf_LRU_drop_page_hash_for_tablespace(): after releasing and reacquiring the buffer pool mutex, do not dereference any block descriptor pointer that is not known to be a pointer to an uncompressed page frame (type buf_block_t; state == BUF_BLOCK_FILE_PAGE). Also, defer the acquisition of the block_mutex until it is needed. buf_page_get_gen(): Add mode == BUF_GET_IF_IN_POOL_PEEK for buffer-fixing a block without making it young in the LRU list. buf_page_get_gen(), buf_page_init(), buf_LRU_block_remove_hashed_page(): Set bpage->state = BUF_BLOCK_ZIP_FREE before buf_buddy_free(bpage), so that similar race conditions might be detected a little easier. btr_search_drop_page_hash_when_freed(): Use BUF_GET_IF_IN_POOL_PEEK when dropping the hash indexes. rb://528 approved by Jimmy Yang
This commit is contained in:
parent
5a805fe7c4
commit
0f8ae318c7
5 changed files with 81 additions and 52 deletions
|
@ -1,3 +1,9 @@
|
||||||
|
2011-02-28 The InnoDB Team
|
||||||
|
|
||||||
|
* btr/btr0sea.c, buf/buf0buf.c, buf/buf0lru.c:
|
||||||
|
Fix Bug#58549 Race condition in buf_LRU_drop_page_hash_for_tablespace()
|
||||||
|
and compressed tables
|
||||||
|
|
||||||
2011-02-15 The InnoDB Team
|
2011-02-15 The InnoDB Team
|
||||||
|
|
||||||
* sync/sync0rw.c, innodb_bug59307.test:
|
* sync/sync0rw.c, innodb_bug59307.test:
|
||||||
|
|
|
@ -1201,8 +1201,8 @@ btr_search_drop_page_hash_when_freed(
|
||||||
having to fear a deadlock. */
|
having to fear a deadlock. */
|
||||||
|
|
||||||
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
|
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH, NULL,
|
||||||
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
|
BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
|
||||||
&mtr);
|
&mtr);
|
||||||
/* Because the buffer pool mutex was released by
|
/* Because the buffer pool mutex was released by
|
||||||
buf_page_peek_if_search_hashed(), it is possible that the
|
buf_page_peek_if_search_hashed(), it is possible that the
|
||||||
block was removed from the buffer pool by another thread
|
block was removed from the buffer pool by another thread
|
||||||
|
|
|
@ -2031,7 +2031,7 @@ buf_page_get_gen(
|
||||||
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
|
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
|
||||||
buf_block_t* guess, /*!< in: guessed block or NULL */
|
buf_block_t* guess, /*!< in: guessed block or NULL */
|
||||||
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
|
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
|
||||||
BUF_GET_NO_LATCH */
|
BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
|
||||||
const char* file, /*!< in: file name */
|
const char* file, /*!< in: file name */
|
||||||
ulint line, /*!< in: line where called */
|
ulint line, /*!< in: line where called */
|
||||||
mtr_t* mtr) /*!< in: mini-transaction */
|
mtr_t* mtr) /*!< in: mini-transaction */
|
||||||
|
@ -2047,9 +2047,19 @@ buf_page_get_gen(
|
||||||
ut_ad((rw_latch == RW_S_LATCH)
|
ut_ad((rw_latch == RW_S_LATCH)
|
||||||
|| (rw_latch == RW_X_LATCH)
|
|| (rw_latch == RW_X_LATCH)
|
||||||
|| (rw_latch == RW_NO_LATCH));
|
|| (rw_latch == RW_NO_LATCH));
|
||||||
ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
|
#ifdef UNIV_DEBUG
|
||||||
ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
|
switch (mode) {
|
||||||
|| (mode == BUF_GET_NO_LATCH));
|
case BUF_GET_NO_LATCH:
|
||||||
|
ut_ad(rw_latch == RW_NO_LATCH);
|
||||||
|
break;
|
||||||
|
case BUF_GET:
|
||||||
|
case BUF_GET_IF_IN_POOL:
|
||||||
|
case BUF_PEEK_IF_IN_POOL:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
ut_error;
|
||||||
|
}
|
||||||
|
#endif /* UNIV_DEBUG */
|
||||||
ut_ad(zip_size == fil_space_get_zip_size(space));
|
ut_ad(zip_size == fil_space_get_zip_size(space));
|
||||||
ut_ad(ut_is_2pow(zip_size));
|
ut_ad(ut_is_2pow(zip_size));
|
||||||
#ifndef UNIV_LOG_DEBUG
|
#ifndef UNIV_LOG_DEBUG
|
||||||
|
@ -2091,7 +2101,8 @@ loop2:
|
||||||
|
|
||||||
buf_pool_mutex_exit();
|
buf_pool_mutex_exit();
|
||||||
|
|
||||||
if (mode == BUF_GET_IF_IN_POOL) {
|
if (mode == BUF_GET_IF_IN_POOL
|
||||||
|
|| mode == BUF_PEEK_IF_IN_POOL) {
|
||||||
|
|
||||||
return(NULL);
|
return(NULL);
|
||||||
}
|
}
|
||||||
|
@ -2130,7 +2141,8 @@ loop2:
|
||||||
|
|
||||||
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
|
must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
|
||||||
|
|
||||||
if (must_read && mode == BUF_GET_IF_IN_POOL) {
|
if (must_read && (mode == BUF_GET_IF_IN_POOL
|
||||||
|
|| mode == BUF_PEEK_IF_IN_POOL)) {
|
||||||
/* The page is only being read to buffer */
|
/* The page is only being read to buffer */
|
||||||
buf_pool_mutex_exit();
|
buf_pool_mutex_exit();
|
||||||
|
|
||||||
|
@ -2248,6 +2260,7 @@ wait_until_unfixed:
|
||||||
mutex_exit(&buf_pool_zip_mutex);
|
mutex_exit(&buf_pool_zip_mutex);
|
||||||
buf_pool->n_pend_unzip++;
|
buf_pool->n_pend_unzip++;
|
||||||
|
|
||||||
|
bpage->state = BUF_BLOCK_ZIP_FREE;
|
||||||
buf_buddy_free(bpage, sizeof *bpage);
|
buf_buddy_free(bpage, sizeof *bpage);
|
||||||
|
|
||||||
buf_pool_mutex_exit();
|
buf_pool_mutex_exit();
|
||||||
|
@ -2324,7 +2337,9 @@ wait_until_unfixed:
|
||||||
|
|
||||||
buf_pool_mutex_exit();
|
buf_pool_mutex_exit();
|
||||||
|
|
||||||
buf_page_set_accessed_make_young(&block->page, access_time);
|
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
|
||||||
|
buf_page_set_accessed_make_young(&block->page, access_time);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
|
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
|
||||||
ut_a(!block->page.file_page_was_freed);
|
ut_a(!block->page.file_page_was_freed);
|
||||||
|
@ -2377,7 +2392,7 @@ wait_until_unfixed:
|
||||||
|
|
||||||
mtr_memo_push(mtr, block, fix_type);
|
mtr_memo_push(mtr, block, fix_type);
|
||||||
|
|
||||||
if (!access_time) {
|
if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
|
||||||
/* In the case of a first access, try to apply linear
|
/* In the case of a first access, try to apply linear
|
||||||
read-ahead */
|
read-ahead */
|
||||||
|
|
||||||
|
@ -2926,6 +2941,7 @@ err_exit:
|
||||||
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
|
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
|
||||||
|
|
||||||
/* The block was added by some other thread. */
|
/* The block was added by some other thread. */
|
||||||
|
bpage->state = BUF_BLOCK_ZIP_FREE;
|
||||||
buf_buddy_free(bpage, sizeof *bpage);
|
buf_buddy_free(bpage, sizeof *bpage);
|
||||||
buf_buddy_free(data, zip_size);
|
buf_buddy_free(data, zip_size);
|
||||||
|
|
||||||
|
|
|
@ -246,71 +246,75 @@ buf_LRU_drop_page_hash_for_tablespace(
|
||||||
page_arr = ut_malloc(sizeof(ulint)
|
page_arr = ut_malloc(sizeof(ulint)
|
||||||
* BUF_LRU_DROP_SEARCH_HASH_SIZE);
|
* BUF_LRU_DROP_SEARCH_HASH_SIZE);
|
||||||
buf_pool_mutex_enter();
|
buf_pool_mutex_enter();
|
||||||
|
num_entries = 0;
|
||||||
|
|
||||||
scan_again:
|
scan_again:
|
||||||
num_entries = 0;
|
|
||||||
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
|
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
|
||||||
|
|
||||||
while (bpage != NULL) {
|
while (bpage != NULL) {
|
||||||
mutex_t* block_mutex = buf_page_get_mutex(bpage);
|
|
||||||
buf_page_t* prev_bpage;
|
buf_page_t* prev_bpage;
|
||||||
|
ibool is_fixed;
|
||||||
|
|
||||||
mutex_enter(block_mutex);
|
|
||||||
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
|
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
|
||||||
|
|
||||||
ut_a(buf_page_in_file(bpage));
|
ut_a(buf_page_in_file(bpage));
|
||||||
|
|
||||||
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|
||||||
|| bpage->space != id
|
|| bpage->space != id
|
||||||
|| bpage->buf_fix_count > 0
|
|
||||||
|| bpage->io_fix != BUF_IO_NONE) {
|
|| bpage->io_fix != BUF_IO_NONE) {
|
||||||
/* We leave the fixed pages as is in this scan.
|
/* Compressed pages are never hashed.
|
||||||
To be dealt with later in the final scan. */
|
Skip blocks of other tablespaces.
|
||||||
mutex_exit(block_mutex);
|
Skip I/O-fixed blocks (to be dealt with later). */
|
||||||
|
next_page:
|
||||||
|
bpage = prev_bpage;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_enter(&((buf_block_t*) bpage)->mutex);
|
||||||
|
is_fixed = bpage->buf_fix_count > 0
|
||||||
|
|| !((buf_block_t*) bpage)->is_hashed;
|
||||||
|
mutex_exit(&((buf_block_t*) bpage)->mutex);
|
||||||
|
|
||||||
|
if (is_fixed) {
|
||||||
goto next_page;
|
goto next_page;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (((buf_block_t*) bpage)->is_hashed) {
|
/* Store the page number so that we can drop the hash
|
||||||
|
index in a batch later. */
|
||||||
|
page_arr[num_entries] = bpage->offset;
|
||||||
|
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
|
||||||
|
++num_entries;
|
||||||
|
|
||||||
/* Store the offset(i.e.: page_no) in the array
|
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
|
||||||
so that we can drop hash index in a batch
|
goto next_page;
|
||||||
later. */
|
|
||||||
page_arr[num_entries] = bpage->offset;
|
|
||||||
mutex_exit(block_mutex);
|
|
||||||
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
|
|
||||||
++num_entries;
|
|
||||||
|
|
||||||
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
|
|
||||||
goto next_page;
|
|
||||||
}
|
|
||||||
/* Array full. We release the buf_pool_mutex to
|
|
||||||
obey the latching order. */
|
|
||||||
buf_pool_mutex_exit();
|
|
||||||
|
|
||||||
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
|
|
||||||
num_entries);
|
|
||||||
num_entries = 0;
|
|
||||||
buf_pool_mutex_enter();
|
|
||||||
} else {
|
|
||||||
mutex_exit(block_mutex);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
next_page:
|
/* Array full. We release the buf_pool_mutex to
|
||||||
/* Note that we may have released the buf_pool mutex
|
obey the latching order. */
|
||||||
above after reading the prev_bpage during processing
|
buf_pool_mutex_exit();
|
||||||
of a page_hash_batch (i.e.: when the array was full).
|
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
|
||||||
This means that prev_bpage can change in LRU list.
|
num_entries);
|
||||||
This is OK because this function is a 'best effort'
|
buf_pool_mutex_enter();
|
||||||
to drop as many search hash entries as possible and
|
num_entries = 0;
|
||||||
it does not guarantee that ALL such entries will be
|
|
||||||
dropped. */
|
/* Note that we released the buf_pool mutex above
|
||||||
bpage = prev_bpage;
|
after reading the prev_bpage during processing of a
|
||||||
|
page_hash_batch (i.e.: when the array was full).
|
||||||
|
Because prev_bpage could belong to a compressed-only
|
||||||
|
block, it may have been relocated, and thus the
|
||||||
|
pointer cannot be trusted. Because bpage is of type
|
||||||
|
buf_block_t, it is safe to dereference.
|
||||||
|
|
||||||
|
bpage can change in the LRU list. This is OK because
|
||||||
|
this function is a 'best effort' to drop as many
|
||||||
|
search hash entries as possible and it does not
|
||||||
|
guarantee that ALL such entries will be dropped. */
|
||||||
|
|
||||||
/* If, however, bpage has been removed from LRU list
|
/* If, however, bpage has been removed from LRU list
|
||||||
to the free list then we should restart the scan.
|
to the free list then we should restart the scan.
|
||||||
bpage->state is protected by buf_pool mutex. */
|
bpage->state is protected by buf_pool mutex. */
|
||||||
if (bpage && !buf_page_in_file(bpage)) {
|
if (bpage
|
||||||
ut_a(num_entries == 0);
|
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
|
||||||
goto scan_again;
|
goto scan_again;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1799,6 +1803,7 @@ buf_LRU_block_remove_hashed_page(
|
||||||
buf_pool_mutex_exit_forbid();
|
buf_pool_mutex_exit_forbid();
|
||||||
buf_buddy_free(bpage->zip.data,
|
buf_buddy_free(bpage->zip.data,
|
||||||
page_zip_get_size(&bpage->zip));
|
page_zip_get_size(&bpage->zip));
|
||||||
|
bpage->state = BUF_BLOCK_ZIP_FREE;
|
||||||
buf_buddy_free(bpage, sizeof(*bpage));
|
buf_buddy_free(bpage, sizeof(*bpage));
|
||||||
buf_pool_mutex_exit_allow();
|
buf_pool_mutex_exit_allow();
|
||||||
UNIV_MEM_UNDESC(bpage);
|
UNIV_MEM_UNDESC(bpage);
|
||||||
|
|
|
@ -41,6 +41,8 @@ Created 11/5/1995 Heikki Tuuri
|
||||||
/* @{ */
|
/* @{ */
|
||||||
#define BUF_GET 10 /*!< get always */
|
#define BUF_GET 10 /*!< get always */
|
||||||
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
|
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
|
||||||
|
#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
|
||||||
|
the block young in the LRU list */
|
||||||
#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
|
#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
|
||||||
set no latch; we have
|
set no latch; we have
|
||||||
separated this case, because
|
separated this case, because
|
||||||
|
@ -284,7 +286,7 @@ buf_page_get_gen(
|
||||||
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
|
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
|
||||||
buf_block_t* guess, /*!< in: guessed block or NULL */
|
buf_block_t* guess, /*!< in: guessed block or NULL */
|
||||||
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
|
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
|
||||||
BUF_GET_NO_LATCH */
|
BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH */
|
||||||
const char* file, /*!< in: file name */
|
const char* file, /*!< in: file name */
|
||||||
ulint line, /*!< in: line where called */
|
ulint line, /*!< in: line where called */
|
||||||
mtr_t* mtr); /*!< in: mini-transaction */
|
mtr_t* mtr); /*!< in: mini-transaction */
|
||||||
|
|
Loading…
Reference in a new issue