From 3bd1a9fbfdeb1f357fa987b94a842cdc05732804 Mon Sep 17 00:00:00 2001 From: marko <> Date: Thu, 10 Sep 2009 09:47:09 +0000 Subject: [PATCH] branches/zip: Reduce mutex contention that was introduced when addressing Bug #45015 (Issue #316), in r5703. buf_page_set_accessed_make_young(): New auxiliary function, called by buf_page_get_zip(), buf_page_get_gen(), buf_page_optimistic_get_func(). Call ut_time_ms() outside of buf_pool_mutex. Use cached access_time. buf_page_set_accessed(): Add the parameter time_ms, so that ut_time_ms() need not be called while holding buf_pool_mutex. buf_page_optimistic_get_func(), buf_page_get_known_nowait(): Read buf_page_t::access_time without holding buf_pool_mutex. This should be OK, because the field is only used for heuristic purposes. buf_page_peek_if_too_old(): If buf_pool->freed_page_clock == 0, return FALSE, so that we will not waste time moving blocks in the LRU list in the warm-up phase or when the workload fits in the buffer pool. rb://156 approved by Sunny Bains --- buf/buf0buf.c | 84 ++++++++++++++++++++++++++++++---------------- include/buf0buf.h | 3 +- include/buf0buf.ic | 23 ++++++++----- 3 files changed, 72 insertions(+), 38 deletions(-) diff --git a/buf/buf0buf.c b/buf/buf0buf.c index d4b6afa1593..32435bd1f87 100644 --- a/buf/buf0buf.c +++ b/buf/buf0buf.c @@ -1506,6 +1506,36 @@ buf_page_make_young( buf_pool_mutex_exit(); } +/********************************************************************//** +Sets the time of the first access of a page and moves a page to the +start of the buffer pool LRU list if it is too old. This high-level +function can be used to prevent an important page from slipping +out of the buffer pool. */ +static +void +buf_page_set_accessed_make_young( +/*=============================*/ + buf_page_t* bpage, /*!< in/out: buffer block of a + file page */ + unsigned access_time) /*!< in: bpage->access_time + read under mutex protection, + or 0 if unknown */ +{ + ut_ad(!buf_pool_mutex_own()); + ut_a(buf_page_in_file(bpage)); + + if (buf_page_peek_if_too_old(bpage)) { + buf_pool_mutex_enter(); + buf_LRU_make_block_young(bpage); + buf_pool_mutex_exit(); + } else if (!access_time) { + ulint time_ms = ut_time_ms(); + buf_pool_mutex_enter(); + buf_page_set_accessed(bpage, time_ms); + buf_pool_mutex_exit(); + } +} + /********************************************************************//** Resets the check_index_page_at_flush field of a page if found in the buffer pool. */ @@ -1637,6 +1667,7 @@ buf_page_get_zip( buf_page_t* bpage; mutex_t* block_mutex; ibool must_read; + unsigned access_time; #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside()); @@ -1704,17 +1735,14 @@ err_exit: got_block: must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ; - - if (buf_page_peek_if_too_old(bpage)) { - buf_LRU_make_block_young(bpage); - } - - buf_page_set_accessed(bpage); + access_time = buf_page_is_accessed(bpage); buf_pool_mutex_exit(); mutex_exit(block_mutex); + buf_page_set_accessed_make_young(bpage, access_time); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!bpage->file_page_was_freed); #endif @@ -2244,14 +2272,10 @@ wait_until_unfixed: access_time = buf_page_is_accessed(&block->page); - if (buf_page_peek_if_too_old(&block->page)) { - buf_LRU_make_block_young(&block->page); - } - - buf_page_set_accessed(&block->page); - buf_pool_mutex_exit(); + buf_page_set_accessed_make_young(&block->page, access_time); + #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(!block->page.file_page_was_freed); #endif @@ -2353,18 +2377,13 @@ buf_page_optimistic_get_func( mutex_exit(&block->mutex); - buf_pool_mutex_enter(); + /* Check if this is the first access to the page. + We do a dirty read on purpose, to avoid mutex contention. + This field is only used for heuristic purposes; it does not + affect correctness. */ - /* Check if this is the first access to the page */ access_time = buf_page_is_accessed(&block->page); - - if (buf_page_peek_if_too_old(&block->page)) { - buf_LRU_make_block_young(&block->page); - } - - buf_page_set_accessed(&block->page); - - buf_pool_mutex_exit(); + buf_page_set_accessed_make_young(&block->page, access_time); ut_ad(!ibuf_inside() || ibuf_page(buf_block_get_space(block), @@ -2477,16 +2496,22 @@ buf_page_get_known_nowait( mutex_exit(&block->mutex); - buf_pool_mutex_enter(); - if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) { + buf_pool_mutex_enter(); buf_LRU_make_block_young(&block->page); + buf_pool_mutex_exit(); + } else if (!buf_page_is_accessed(&block->page)) { + /* Above, we do a dirty read on purpose, to avoid + mutex contention. The field buf_page_t::access_time + is only used for heuristic purposes. Writes to the + field must be protected by mutex, however. */ + ulint time_ms = ut_time_ms(); + + buf_pool_mutex_enter(); + buf_page_set_accessed(&block->page, time_ms); + buf_pool_mutex_exit(); } - buf_page_set_accessed(&block->page); - - buf_pool_mutex_exit(); - ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { @@ -2917,6 +2942,7 @@ buf_page_create( buf_frame_t* frame; buf_block_t* block; buf_block_t* free_block = NULL; + ulint time_ms = ut_time_ms(); ut_ad(mtr); ut_ad(space || !zip_size); @@ -3000,7 +3026,7 @@ buf_page_create( rw_lock_x_unlock(&block->lock); } - buf_page_set_accessed(&block->page); + buf_page_set_accessed(&block->page, time_ms); buf_pool_mutex_exit(); diff --git a/include/buf0buf.h b/include/buf0buf.h index 3c2bd9a97cf..e9d95a14f1b 100644 --- a/include/buf0buf.h +++ b/include/buf0buf.h @@ -821,7 +821,8 @@ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ + buf_page_t* bpage, /*!< in/out: control block */ + ulint time_ms) /*!< in: ut_time_ms() */ __attribute__((nonnull)); /*********************************************************************//** Gets the buf_block_t handle of a buffered file block if an uncompressed diff --git a/include/buf0buf.ic b/include/buf0buf.ic index 18ee4551eb4..8b1f904a090 100644 --- a/include/buf0buf.ic +++ b/include/buf0buf.ic @@ -72,10 +72,16 @@ buf_page_peek_if_too_old( /*=====================*/ const buf_page_t* bpage) /*!< in: block to make younger */ { - if (buf_LRU_old_threshold_ms && bpage->old) { + if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) { + /* If eviction has not started yet, do not update the + statistics or move blocks in the LRU list. This is + either the warm-up phase or an in-memory workload. */ + return(FALSE); + } else if (buf_LRU_old_threshold_ms && bpage->old) { unsigned access_time = buf_page_is_accessed(bpage); - if (access_time && ut_time_ms() - access_time + if (access_time > 0 + && (ut_time_ms() - access_time) >= buf_LRU_old_threshold_ms) { return(TRUE); } @@ -85,10 +91,10 @@ buf_page_peek_if_too_old( } else { /* FIXME: bpage->freed_page_clock is 31 bits */ return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) - > bpage->freed_page_clock - + (buf_pool->curr_size - * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) - / (BUF_LRU_OLD_RATIO_DIV * 4))); + > ((ulint) bpage->freed_page_clock + + (buf_pool->curr_size + * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) + / (BUF_LRU_OLD_RATIO_DIV * 4)))); } } @@ -490,14 +496,15 @@ UNIV_INLINE void buf_page_set_accessed( /*==================*/ - buf_page_t* bpage) /*!< in/out: control block */ + buf_page_t* bpage, /*!< in/out: control block */ + ulint time_ms) /*!< in: ut_time_ms() */ { ut_a(buf_page_in_file(bpage)); ut_ad(buf_pool_mutex_own()); if (!bpage->access_time) { /* Make this the time of the first access. */ - bpage->access_time = ut_time_ms(); + bpage->access_time = time_ms; } }