diff --git a/buf/buf0buddy.c b/buf/buf0buddy.c index 2323dc56585..6069b5b19cd 100644 --- a/buf/buf0buddy.c +++ b/buf/buf0buddy.c @@ -356,16 +356,21 @@ free_LRU: } /************************************************************************** -Allocate a block. */ +Allocate a block. The thread calling this function must hold +buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex. +The buf_pool->mutex may only be released and reacquired if lru != NULL. */ void* buf_buddy_alloc_low( /*================*/ - /* out: allocated block, or NULL - if buf_pool->zip_free[] was empty */ + /* out: allocated block, + possibly NULL if lru==NULL */ ulint i, /* in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ - ibool lru) /* in: TRUE=allocate from the LRU list if needed */ + ibool* lru) /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool->mutex was temporarily released, + or NULL if the LRU list should not be used */ { buf_block_t* block; @@ -407,6 +412,7 @@ buf_buddy_alloc_low( /* Try replacing an uncompressed page in the buffer pool. */ mutex_exit(&buf_pool->mutex); block = buf_LRU_get_free_block(0); + *lru = TRUE; mutex_enter(&buf_pool->mutex); alloc_big: diff --git a/buf/buf0buf.c b/buf/buf0buf.c index 76ee4cffcb0..ed21c7b81ea 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -2341,6 +2341,8 @@ buf_page_init_for_read( buf_block_t* block; buf_page_t* bpage; mtr_t mtr; + ibool lru = FALSE; + void* data; ut_ad(buf_pool); @@ -2365,47 +2367,26 @@ buf_page_init_for_read( } if (zip_size && UNIV_LIKELY(!recv_recovery_is_on())) { - void* data; - mutex_enter(&buf_pool->mutex); - - /* This must be allocated before bpage, in order to - avoid the invocation of buf_buddy_relocate_block() - on uninitialized data. */ - data = buf_buddy_alloc(zip_size, TRUE); - - bpage = buf_buddy_alloc(sizeof *bpage, TRUE); - page_zip_des_init(&bpage->zip); - page_zip_set_size(&bpage->zip, zip_size); - bpage->zip.data = data; - block = NULL; - mutex_enter(&buf_pool->zip_mutex); } else { block = buf_LRU_get_free_block(0); - ut_ad(block); - bpage = &block->page; - mutex_enter(&buf_pool->mutex); - mutex_enter(&block->mutex); } + mutex_enter(&buf_pool->mutex); + if (buf_page_hash_get(space, offset)) { /* The page is already in the buffer pool. */ err_exit: if (block) { + mutex_enter(&block->mutex); buf_LRU_block_free_non_file_page(block); - mutex_exit(&buf_pool->mutex); mutex_exit(&block->mutex); - } else { - void* data = bpage->zip.data; - bpage->zip.data = NULL; - - mutex_exit(&buf_pool->zip_mutex); - buf_buddy_free(data, zip_size); - buf_buddy_free(bpage, sizeof *bpage); - mutex_exit(&buf_pool->mutex); } +err_exit2: + mutex_exit(&buf_pool->mutex); + if (mode == BUF_READ_IBUF_PAGES_ONLY) { mtr_commit(&mtr); @@ -2424,7 +2405,9 @@ err_exit: } if (block) { - buf_page_init(space, offset, (buf_block_t*) bpage); + bpage = &block->page; + mutex_enter(&block->mutex); + buf_page_init(space, offset, block); /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); @@ -2438,12 +2421,11 @@ err_exit: read is completed. The x-lock is cleared by the io-handler thread. */ - rw_lock_x_lock_gen(&((buf_block_t*) bpage)->lock, BUF_IO_READ); + rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); if (UNIV_UNLIKELY(zip_size)) { - void* data; page_zip_set_size(&block->page.zip, zip_size); - mutex_exit(&block->mutex); + /* buf_pool->mutex may be released and reacquired by buf_buddy_alloc(). Thus, we must release block->mutex in order not to @@ -2452,27 +2434,51 @@ err_exit: operation until after the block descriptor has been added to buf_pool->LRU and buf_pool->page_hash. */ - data = buf_buddy_alloc(zip_size, TRUE); + mutex_exit(&block->mutex); + data = buf_buddy_alloc(zip_size, &lru); mutex_enter(&block->mutex); block->page.zip.data = data; } buf_page_set_io_fix(bpage, BUF_IO_READ); - buf_pool->n_pend_reads++; - mutex_exit(&block->mutex); - mutex_exit(&buf_pool->mutex); } else { + /* Defer buf_buddy_alloc() until after the block has + been found not to exist. The buf_buddy_alloc() and + buf_buddy_free() calls may be expensive because of + buf_buddy_relocate(). */ + + /* The compressed page must be allocated before the + control block (bpage), in order to avoid the + invocation of buf_buddy_relocate_block() on + uninitialized data. */ + data = buf_buddy_alloc(zip_size, &lru); + bpage = buf_buddy_alloc(sizeof *bpage, &lru); + + /* If buf_buddy_alloc() allocated storage from the LRU list, + it released and reacquired buf_pool->mutex. Thus, we must + check the page_hash again, as it may have been modified. */ + if (UNIV_UNLIKELY(lru) + && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) { + + /* The block was added by some other thread. */ + buf_buddy_free(data, zip_size); + buf_buddy_free(bpage, sizeof *bpage); + goto err_exit2; + } + + page_zip_des_init(&bpage->zip); + page_zip_set_size(&bpage->zip, zip_size); + bpage->zip.data = data; + + mutex_enter(&buf_pool->zip_mutex); UNIV_MEM_DESC(bpage->zip.data, page_zip_get_size(&bpage->zip), bpage); buf_page_init_low(bpage); bpage->state = BUF_BLOCK_ZIP_PAGE; bpage->space = space; bpage->offset = offset; -#ifdef UNIV_DEBUG_FILE_ACCESSES - bpage->file_page_was_freed = FALSE; -#endif /* UNIV_DEBUG_FILE_ACCESSES */ #ifdef UNIV_DEBUG bpage->in_page_hash = FALSE; @@ -2492,12 +2498,12 @@ err_exit: buf_page_set_io_fix(bpage, BUF_IO_READ); - buf_pool->n_pend_reads++; - mutex_exit(&buf_pool->zip_mutex); - mutex_exit(&buf_pool->mutex); } + buf_pool->n_pend_reads++; + mutex_exit(&buf_pool->mutex); + if (mode == BUF_READ_IBUF_PAGES_ONLY) { mtr_commit(&mtr); @@ -2575,6 +2581,7 @@ buf_page_create( if (zip_size) { void* data; + ibool lru; /* Prevent race conditions during buf_buddy_alloc(), which may release and reacquire buf_pool->mutex, @@ -2591,7 +2598,7 @@ buf_page_create( the reacquisition of buf_pool->mutex. We also must defer this operation until after the block descriptor has been added to buf_pool->LRU and buf_pool->page_hash. */ - data = buf_buddy_alloc(zip_size, TRUE); + data = buf_buddy_alloc(zip_size, &lru); mutex_enter(&block->mutex); block->page.zip.data = data; diff --git a/buf/buf0lru.c b/buf/buf0lru.c index d33371e9c2b..78486dec3dc 100644 --- a/buf/buf0lru.c +++ b/buf/buf0lru.c @@ -492,8 +492,9 @@ loop: block->page.zip.n_blobs = 0; if (zip_size) { + ibool lru; page_zip_set_size(&block->page.zip, zip_size); - block->page.zip.data = buf_buddy_alloc(zip_size, TRUE); + block->page.zip.data = buf_buddy_alloc(zip_size, &lru); UNIV_MEM_DESC(block->page.zip.data, zip_size, block); } else { page_zip_set_size(&block->page.zip, 0); @@ -924,9 +925,9 @@ buf_LRU_free_block( If it cannot be allocated (without freeing a block from the LRU list), refuse to free bpage. */ alloc: - b = buf_buddy_alloc(sizeof *b, FALSE); + b = buf_buddy_alloc(sizeof *b, NULL); - if (!b) { + if (UNIV_UNLIKELY(!b)) { return(FALSE); } } diff --git a/include/buf0buddy.h b/include/buf0buddy.h index 7b8536ea42c..5bd2dbc779f 100644 --- a/include/buf0buddy.h +++ b/include/buf0buddy.h @@ -18,14 +18,21 @@ Created December 2006 by Marko Makela #include "buf0types.h" /************************************************************************** -Allocate a block. */ +Allocate a block. The thread calling this function must hold +buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex. +The buf_pool->mutex may only be released and reacquired if +lru == BUF_BUDDY_USE_LRU. */ UNIV_INLINE void* buf_buddy_alloc( /*============*/ - /* out: pointer to the start of the block */ + /* out: allocated block, + possibly NULL if lru == NULL */ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ - ibool lru) /* in: TRUE=allocate from the LRU list if needed */ + ibool* lru) /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool->mutex was temporarily released, + or NULL if the LRU list should not be used */ __attribute__((malloc)); /************************************************************************** diff --git a/include/buf0buddy.ic b/include/buf0buddy.ic index 5b1657b37cb..cffa1a23ab4 100644 --- a/include/buf0buddy.ic +++ b/include/buf0buddy.ic @@ -17,15 +17,22 @@ Created December 2006 by Marko Makela #include "sync0sync.h" /************************************************************************** -Allocate a block. */ +Allocate a block. The thread calling this function must hold +buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex. +The buf_pool->mutex may only be released and reacquired if +lru == BUF_BUDDY_USE_LRU. */ void* buf_buddy_alloc_low( /*================*/ - /* out: pointer to the start of the block */ + /* out: allocated block, + possibly NULL if lru==NULL */ ulint i, /* in: index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */ - ibool lru) /* in: TRUE=allocate from the LRU list if needed */ + ibool* lru) /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool->mutex was temporarily released, + or NULL if the LRU list should not be used */ __attribute__((malloc)); /************************************************************************** @@ -60,14 +67,21 @@ buf_buddy_get_slot( } /************************************************************************** -Allocate a block. */ +Allocate a block. The thread calling this function must hold +buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex. +The buf_pool->mutex may only be released and reacquired if +lru == BUF_BUDDY_USE_LRU. */ UNIV_INLINE void* buf_buddy_alloc( /*============*/ - /* out: pointer to the start of the block */ + /* out: allocated block, + possibly NULL if lru == NULL */ ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ - ibool lru) /* in: TRUE=allocate from the LRU list if needed */ + ibool* lru) /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool->mutex was temporarily released, + or NULL if the LRU list should not be used */ { ut_ad(mutex_own(&buf_pool->mutex));