mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 02:51:44 +01:00
5155a300fa
The rw_lock_s_lock() calls for the buf_pool.page_hash became a clear bottleneck after MDEV-15053 reduced the contention on buf_pool.mutex. We will replace that use of rw_lock_t with a special implementation that is optimized for memory bus traffic. The hash_table_locks instrumentation will be removed. buf_pool_t::page_hash: Use a special implementation whose API is compatible with hash_table_t, and store the custom rw-locks directly in buf_pool.page_hash.array, intentionally sharing cache lines with the hash table pointers. rw_lock: A low-level rw-lock implementation based on std::atomic<uint32_t> where read_trylock() becomes a simple fetch_add(1). buf_pool_t::page_hash_latch: The special of rw_lock for the page_hash. buf_pool_t::page_hash_latch::read_lock(): Assert that buf_pool.mutex is not being held by the caller. buf_pool_t::page_hash_latch::write_lock() may be called while not holding buf_pool.mutex. buf_pool_t::watch_set() is such a caller. buf_pool_t::page_hash_latch::read_lock_wait(), page_hash_latch::write_lock_wait(): The spin loops. These will obey the global parameters innodb_sync_spin_loops and innodb_sync_spin_wait_delay. buf_pool_t::freed_page_hash: A singly linked list of copies of buf_pool.page_hash that ever existed. The fact that we never free any buf_pool.page_hash.array guarantees that all page_hash_latch that ever existed will remain valid until shutdown. buf_pool_t::resize_hash(): Replaces buf_pool_resize_hash(). Prepend a shallow copy of the old page_hash to freed_page_hash. buf_pool_t::page_hash_table::n_cells: Declare as Atomic_relaxed. buf_pool_t::page_hash_table::lock(): Explain what prevents a race condition with buf_pool_t::resize_hash().
220 lines
8.4 KiB
C
220 lines
8.4 KiB
C
/*****************************************************************************
|
|
|
|
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
|
Copyright (c) 2017, 2020, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/**************************************************//**
|
|
@file include/buf0lru.h
|
|
The database buffer pool LRU replacement algorithm
|
|
|
|
Created 11/5/1995 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#ifndef buf0lru_h
|
|
#define buf0lru_h
|
|
|
|
#include "ut0byte.h"
|
|
#include "buf0types.h"
|
|
|
|
// Forward declaration
|
|
struct trx_t;
|
|
struct fil_space_t;
|
|
|
|
/*#######################################################################
|
|
These are low-level functions
|
|
#########################################################################*/
|
|
|
|
/** Minimum LRU list length for which the LRU_old pointer is defined */
|
|
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
|
|
|
|
/** Empty the flush list for all pages belonging to a tablespace.
|
|
@param[in] id tablespace identifier
|
|
@param[in] flush whether to write the pages to files
|
|
@param[in] first first page to be flushed or evicted */
|
|
void buf_LRU_flush_or_remove_pages(ulint id, bool flush, ulint first = 0);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/********************************************************************//**
|
|
Insert a compressed block into buf_pool.zip_clean in the LRU order. */
|
|
void
|
|
buf_LRU_insert_zip_clean(
|
|
/*=====================*/
|
|
buf_page_t* bpage); /*!< in: pointer to the block in question */
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/** Try to free a block. If bpage is a descriptor of a compressed-only
|
|
ROW_FORMAT=COMPRESSED page, the buf_page_t object will be freed as well.
|
|
The caller must hold buf_pool.mutex.
|
|
@param bpage block to be freed
|
|
@param zip whether to remove both copies of a ROW_FORMAT=COMPRESSED page
|
|
@retval true if freed and buf_pool.mutex may have been temporarily released
|
|
@retval false if the page was not freed */
|
|
bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
|
|
MY_ATTRIBUTE((nonnull));
|
|
|
|
/** Try to free a replaceable block.
|
|
@param[in] scan_all true=scan the whole LRU list,
|
|
false=use BUF_LRU_SEARCH_SCAN_THRESHOLD
|
|
@return true if found and freed */
|
|
bool buf_LRU_scan_and_free_block(bool scan_all);
|
|
|
|
/** @return a buffer block from the buf_pool.free list
|
|
@retval NULL if the free list is empty */
|
|
buf_block_t* buf_LRU_get_free_only();
|
|
|
|
/** Get a free block from the buf_pool. The block is taken off the
|
|
free list. If free list is empty, blocks are moved from the end of the
|
|
LRU list to the free list.
|
|
|
|
This function is called from a user thread when it needs a clean
|
|
block to read in a page. Note that we only ever get a block from
|
|
the free list. Even when we flush a page or find a page in LRU scan
|
|
we put it to free list to be used.
|
|
* iteration 0:
|
|
* get a block from free list, success:done
|
|
* if buf_pool.try_LRU_scan is set
|
|
* scan LRU up to srv_LRU_scan_depth to find a clean block
|
|
* the above will put the block on free list
|
|
* success:retry the free list
|
|
* flush one dirty page from tail of LRU to disk
|
|
* the above will put the block on free list
|
|
* success: retry the free list
|
|
* iteration 1:
|
|
* same as iteration 0 except:
|
|
* scan whole LRU list
|
|
* scan LRU list even if buf_pool.try_LRU_scan is not set
|
|
* iteration > 1:
|
|
* same as iteration 1 but sleep 10ms
|
|
|
|
@param have_mutex whether buf_pool.mutex is already being held
|
|
@return the free control block, in state BUF_BLOCK_MEMORY */
|
|
buf_block_t* buf_LRU_get_free_block(bool have_mutex)
|
|
MY_ATTRIBUTE((malloc,warn_unused_result));
|
|
|
|
/** @return whether the unzip_LRU list should be used for evicting a victim
|
|
instead of the general LRU list */
|
|
bool buf_LRU_evict_from_unzip_LRU();
|
|
|
|
/** Puts a block back to the free list.
|
|
@param[in] block block; not containing a file page */
|
|
void
|
|
buf_LRU_block_free_non_file_page(buf_block_t* block);
|
|
/******************************************************************//**
|
|
Adds a block to the LRU list. Please make sure that the page_size is
|
|
already set when invoking the function, so that we can get correct
|
|
page_size from the buffer page when adding a block into LRU */
|
|
void
|
|
buf_LRU_add_block(
|
|
/*==============*/
|
|
buf_page_t* bpage, /*!< in: control block */
|
|
bool old); /*!< in: true if should be put to the old
|
|
blocks in the LRU list, else put to the
|
|
start; if the LRU list is very short, added to
|
|
the start regardless of this parameter */
|
|
/******************************************************************//**
|
|
Adds a block to the LRU list of decompressed zip pages. */
|
|
void
|
|
buf_unzip_LRU_add_block(
|
|
/*====================*/
|
|
buf_block_t* block, /*!< in: control block */
|
|
ibool old); /*!< in: TRUE if should be put to the end
|
|
of the list, else put to the start */
|
|
|
|
/** Update buf_pool.LRU_old_ratio.
|
|
@param[in] old_pct Reserve this percentage of
|
|
the buffer pool for "old" blocks
|
|
@param[in] adjust true=adjust the LRU list;
|
|
false=just assign buf_pool.LRU_old_ratio
|
|
during the initialization of InnoDB
|
|
@return updated old_pct */
|
|
uint buf_LRU_old_ratio_update(uint old_pct, bool adjust);
|
|
/********************************************************************//**
|
|
Update the historical stats that we are collecting for LRU eviction
|
|
policy at the end of each interval. */
|
|
void
|
|
buf_LRU_stat_update();
|
|
|
|
/** Remove one page from LRU list and put it to free list.
|
|
@param bpage file page to be freed
|
|
@param id page identifier
|
|
@param hash_lock buf_pool.page_hash latch (will be released here) */
|
|
void buf_LRU_free_one_page(buf_page_t *bpage, const page_id_t id,
|
|
page_hash_latch *hash_lock)
|
|
MY_ATTRIBUTE((nonnull));
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/** Validate the LRU list. */
|
|
void buf_LRU_validate();
|
|
#endif /* UNIV_DEBUG */
|
|
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG
|
|
/** Dump the LRU list to stderr. */
|
|
void buf_LRU_print();
|
|
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG */
|
|
|
|
/** @name Heuristics for detecting index scan @{ */
|
|
/** The denominator of buf_pool.LRU_old_ratio. */
|
|
#define BUF_LRU_OLD_RATIO_DIV 1024
|
|
/** Maximum value of buf_pool.LRU_old_ratio.
|
|
@see buf_LRU_old_adjust_len
|
|
@see buf_pool.LRU_old_ratio_update */
|
|
#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
|
|
/** Minimum value of buf_pool.LRU_old_ratio.
|
|
@see buf_LRU_old_adjust_len
|
|
@see buf_pool.LRU_old_ratio_update
|
|
The minimum must exceed
|
|
(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
|
|
#define BUF_LRU_OLD_RATIO_MIN 51
|
|
|
|
#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
|
|
# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
|
|
#endif
|
|
#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
|
|
# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
|
|
#endif
|
|
|
|
/** Move blocks to "new" LRU list only if the first access was at
|
|
least this many milliseconds ago. Not protected by any mutex or latch. */
|
|
extern uint buf_LRU_old_threshold_ms;
|
|
/* @} */
|
|
|
|
/** @brief Statistics for selecting the LRU list for eviction.
|
|
|
|
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
|
|
and page_zip_decompress() operations. Based on the statistics we decide
|
|
if we want to evict from buf_pool.unzip_LRU or buf_pool.LRU. */
|
|
struct buf_LRU_stat_t
|
|
{
|
|
ulint io; /**< Counter of buffer pool I/O operations. */
|
|
ulint unzip; /**< Counter of page_zip_decompress operations. */
|
|
};
|
|
|
|
/** Current operation counters. Not protected by any mutex.
|
|
Cleared by buf_LRU_stat_update(). */
|
|
extern buf_LRU_stat_t buf_LRU_stat_cur;
|
|
|
|
/** Running sum of past values of buf_LRU_stat_cur.
|
|
Updated by buf_LRU_stat_update(). Protected by buf_pool.mutex. */
|
|
extern buf_LRU_stat_t buf_LRU_stat_sum;
|
|
|
|
/********************************************************************//**
|
|
Increments the I/O counter in buf_LRU_stat_cur. */
|
|
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
|
|
/********************************************************************//**
|
|
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
|
|
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
|
|
|
|
#endif
|