mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 02:51:44 +01:00
ad6171b91c
If the InnoDB buffer pool contains many pages for a table or index that is being dropped or rebuilt, and if many of such pages are pointed to by the adaptive hash index, dropping the adaptive hash index may consume a lot of time. The time-consuming operation of dropping the adaptive hash index entries is being executed while the InnoDB data dictionary cache dict_sys is exclusively locked. It is not actually necessary to drop all adaptive hash index entries at the time a table or index is being dropped or rebuilt. We can let the LRU replacement policy of the buffer pool take care of this gradually. For this to work, we must detach the dict_table_t and dict_index_t objects from the main dict_sys cache, and once the last adaptive hash index entry for the detached table is removed (when the garbage page is evicted from the buffer pool) we can free the dict_table_t and dict_index_t object. Related to this, in MDEV-16283, we made ALTER TABLE...DISCARD TABLESPACE skip both the buffer pool eviction and the drop of the adaptive hash index. We shifted the burden to ALTER TABLE...IMPORT TABLESPACE or DROP TABLE. We can remove the eviction from DROP TABLE. We must retain the eviction in the ALTER TABLE...IMPORT TABLESPACE code path, so that in case the discarded table is being re-imported with the same tablespace identifier, the fresh data from the imported tablespace will replace any stale pages in the buffer pool. rpl.rpl_failed_drop_tbl_binlog: Remove the test. DROP TABLE can no longer be interrupted inside InnoDB. fseg_free_page(), fseg_free_step(), fseg_free_step_not_header(), fseg_free_page_low(), fseg_free_extent(): Remove the parameter that specifies whether the adaptive hash index should be dropped. btr_search_lazy_free(): Lazily free an index when the last reference to it is dropped from the adaptive hash index. buf_pool_clear_hash_index(): Declare static, and move to the same compilation unit with the bulk of the adaptive hash index code. dict_index_t::clone(), dict_index_t::clone_if_needed(): Clone an index that is being rebuilt while adaptive hash index entries exist. The original index will be inserted into dict_table_t::freed_indexes and dict_index_t::set_freed() will be called. dict_index_t::set_freed(), dict_index_t::freed(): Note that or check whether the index has been freed. We will use the impossible page number 1 to denote this condition. dict_index_t::n_ahi_pages(): Replaces btr_search_info_get_ref_count(). dict_index_t::detach_columns(): Move the assignment n_fields=0 to ha_innobase_inplace_ctx::clear_added_indexes(). We must have access to the columns when freeing the adaptive hash index. Note: dict_table_t::v_cols[] will remain valid. If virtual columns are dropped or added, the table definition will be reloaded in ha_innobase::commit_inplace_alter_table(). buf_page_mtr_lock(): Drop a stale adaptive hash index if needed. We will also reduce the number of btr_get_search_latch() calls and enclose some more code inside #ifdef BTR_CUR_HASH_ADAPT in order to benefit cmake -DWITH_INNODB_AHI=OFF.
277 lines
11 KiB
C
277 lines
11 KiB
C
/*****************************************************************************
|
|
|
|
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
|
Copyright (c) 2017, 2019, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/**************************************************//**
|
|
@file include/buf0lru.h
|
|
The database buffer pool LRU replacement algorithm
|
|
|
|
Created 11/5/1995 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#ifndef buf0lru_h
|
|
#define buf0lru_h
|
|
|
|
#include "ut0byte.h"
|
|
#include "buf0types.h"
|
|
|
|
// Forward declaration
|
|
struct trx_t;
|
|
|
|
/******************************************************************//**
|
|
Returns TRUE if less than 25 % of the buffer pool is available. This can be
|
|
used in heuristics to prevent huge transactions eating up the whole buffer
|
|
pool for their locks.
|
|
@return TRUE if less than 25 % of buffer pool left */
|
|
ibool
|
|
buf_LRU_buf_pool_running_out(void);
|
|
/*==============================*/
|
|
|
|
/*#######################################################################
|
|
These are low-level functions
|
|
#########################################################################*/
|
|
|
|
/** Minimum LRU list length for which the LRU_old pointer is defined */
|
|
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
|
|
|
|
/** Empty the flush list for all pages belonging to a tablespace.
|
|
@param[in] id tablespace identifier
|
|
@param[in,out] observer flush observer,
|
|
or NULL if nothing is to be written
|
|
@param[in] first first page to be flushed or evicted */
|
|
void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer,
|
|
ulint first = 0);
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
/********************************************************************//**
|
|
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
|
|
void
|
|
buf_LRU_insert_zip_clean(
|
|
/*=====================*/
|
|
buf_page_t* bpage); /*!< in: pointer to the block in question */
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
/******************************************************************//**
|
|
Try to free a block. If bpage is a descriptor of a compressed-only
|
|
page, the descriptor object will be freed as well.
|
|
|
|
NOTE: If this function returns true, it will temporarily
|
|
release buf_pool->mutex. Furthermore, the page frame will no longer be
|
|
accessible via bpage.
|
|
|
|
The caller must hold buf_pool->mutex and must not hold any
|
|
buf_page_get_mutex() when calling this function.
|
|
@return true if freed, false otherwise. */
|
|
bool
|
|
buf_LRU_free_page(
|
|
/*==============*/
|
|
buf_page_t* bpage, /*!< in: block to be freed */
|
|
bool zip) /*!< in: true if should remove also the
|
|
compressed page of an uncompressed page */
|
|
MY_ATTRIBUTE((nonnull));
|
|
/******************************************************************//**
|
|
Try to free a replaceable block.
|
|
@return true if found and freed */
|
|
bool
|
|
buf_LRU_scan_and_free_block(
|
|
/*========================*/
|
|
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
|
|
bool scan_all) /*!< in: scan whole LRU list
|
|
if true, otherwise scan only
|
|
'old' blocks. */
|
|
MY_ATTRIBUTE((nonnull,warn_unused_result));
|
|
/******************************************************************//**
|
|
Returns a free block from the buf_pool. The block is taken off the
|
|
free list. If it is empty, returns NULL.
|
|
@return a free control block, or NULL if the buf_block->free list is empty */
|
|
buf_block_t*
|
|
buf_LRU_get_free_only(
|
|
/*==================*/
|
|
buf_pool_t* buf_pool); /*!< buffer pool instance */
|
|
/******************************************************************//**
|
|
Returns a free block from the buf_pool. The block is taken off the
|
|
free list. If free list is empty, blocks are moved from the end of the
|
|
LRU list to the free list.
|
|
This function is called from a user thread when it needs a clean
|
|
block to read in a page. Note that we only ever get a block from
|
|
the free list. Even when we flush a page or find a page in LRU scan
|
|
we put it to free list to be used.
|
|
* iteration 0:
|
|
* get a block from free list, success:done
|
|
* if buf_pool->try_LRU_scan is set
|
|
* scan LRU up to srv_LRU_scan_depth to find a clean block
|
|
* the above will put the block on free list
|
|
* success:retry the free list
|
|
* flush one dirty page from tail of LRU to disk
|
|
* the above will put the block on free list
|
|
* success: retry the free list
|
|
* iteration 1:
|
|
* same as iteration 0 except:
|
|
* scan whole LRU list
|
|
* scan LRU list even if buf_pool->try_LRU_scan is not set
|
|
* iteration > 1:
|
|
* same as iteration 1 but sleep 10ms
|
|
@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
|
|
buf_block_t*
|
|
buf_LRU_get_free_block(
|
|
/*===================*/
|
|
buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
|
|
MY_ATTRIBUTE((nonnull,warn_unused_result));
|
|
/******************************************************************//**
|
|
Determines if the unzip_LRU list should be used for evicting a victim
|
|
instead of the general LRU list.
|
|
@return TRUE if should use unzip_LRU */
|
|
ibool
|
|
buf_LRU_evict_from_unzip_LRU(
|
|
/*=========================*/
|
|
buf_pool_t* buf_pool);
|
|
/******************************************************************//**
|
|
Puts a block back to the free list. */
|
|
void
|
|
buf_LRU_block_free_non_file_page(
|
|
/*=============================*/
|
|
buf_block_t* block); /*!< in: block, must not contain a file page */
|
|
/******************************************************************//**
|
|
Adds a block to the LRU list. Please make sure that the page_size is
|
|
already set when invoking the function, so that we can get correct
|
|
page_size from the buffer page when adding a block into LRU */
|
|
void
|
|
buf_LRU_add_block(
|
|
/*==============*/
|
|
buf_page_t* bpage, /*!< in: control block */
|
|
ibool old); /*!< in: TRUE if should be put to the old
|
|
blocks in the LRU list, else put to the
|
|
start; if the LRU list is very short, added to
|
|
the start regardless of this parameter */
|
|
/******************************************************************//**
|
|
Adds a block to the LRU list of decompressed zip pages. */
|
|
void
|
|
buf_unzip_LRU_add_block(
|
|
/*====================*/
|
|
buf_block_t* block, /*!< in: control block */
|
|
ibool old); /*!< in: TRUE if should be put to the end
|
|
of the list, else put to the start */
|
|
/******************************************************************//**
|
|
Moves a block to the start of the LRU list. */
|
|
void
|
|
buf_LRU_make_block_young(
|
|
/*=====================*/
|
|
buf_page_t* bpage); /*!< in: control block */
|
|
/**********************************************************************//**
|
|
Updates buf_pool->LRU_old_ratio.
|
|
@return updated old_pct */
|
|
uint
|
|
buf_LRU_old_ratio_update(
|
|
/*=====================*/
|
|
uint old_pct,/*!< in: Reserve this percentage of
|
|
the buffer pool for "old" blocks. */
|
|
bool adjust);/*!< in: true=adjust the LRU list;
|
|
false=just assign buf_pool->LRU_old_ratio
|
|
during the initialization of InnoDB */
|
|
/********************************************************************//**
|
|
Update the historical stats that we are collecting for LRU eviction
|
|
policy at the end of each interval. */
|
|
void
|
|
buf_LRU_stat_update(void);
|
|
/*=====================*/
|
|
|
|
/** Remove one page from LRU list and put it to free list.
|
|
@param[in,out] bpage block, must contain a file page and be in
|
|
a freeable state; there may or may not be a
|
|
hash index to the page
|
|
@param[in] old_page_id page number before bpage->id was invalidated */
|
|
void buf_LRU_free_one_page(buf_page_t* bpage, page_id_t old_page_id)
|
|
MY_ATTRIBUTE((nonnull));
|
|
|
|
/******************************************************************//**
|
|
Adjust LRU hazard pointers if needed. */
|
|
void
|
|
buf_LRU_adjust_hp(
|
|
/*==============*/
|
|
buf_pool_t* buf_pool,/*!< in: buffer pool instance */
|
|
const buf_page_t* bpage); /*!< in: control block */
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
/**********************************************************************//**
|
|
Validates the LRU list.
|
|
@return TRUE */
|
|
ibool
|
|
buf_LRU_validate(void);
|
|
/*==================*/
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
/**********************************************************************//**
|
|
Prints the LRU list. */
|
|
void
|
|
buf_LRU_print(void);
|
|
/*===============*/
|
|
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
/** @name Heuristics for detecting index scan @{ */
|
|
/** The denominator of buf_pool->LRU_old_ratio. */
|
|
#define BUF_LRU_OLD_RATIO_DIV 1024
|
|
/** Maximum value of buf_pool->LRU_old_ratio.
|
|
@see buf_LRU_old_adjust_len
|
|
@see buf_pool->LRU_old_ratio_update */
|
|
#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
|
|
/** Minimum value of buf_pool->LRU_old_ratio.
|
|
@see buf_LRU_old_adjust_len
|
|
@see buf_pool->LRU_old_ratio_update
|
|
The minimum must exceed
|
|
(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
|
|
#define BUF_LRU_OLD_RATIO_MIN 51
|
|
|
|
#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
|
|
# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
|
|
#endif
|
|
#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
|
|
# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
|
|
#endif
|
|
|
|
/** Move blocks to "new" LRU list only if the first access was at
|
|
least this many milliseconds ago. Not protected by any mutex or latch. */
|
|
extern uint buf_LRU_old_threshold_ms;
|
|
/* @} */
|
|
|
|
/** @brief Statistics for selecting the LRU list for eviction.
|
|
|
|
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
|
|
and page_zip_decompress() operations. Based on the statistics we decide
|
|
if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
|
|
struct buf_LRU_stat_t
|
|
{
|
|
ulint io; /**< Counter of buffer pool I/O operations. */
|
|
ulint unzip; /**< Counter of page_zip_decompress operations. */
|
|
};
|
|
|
|
/** Current operation counters. Not protected by any mutex.
|
|
Cleared by buf_LRU_stat_update(). */
|
|
extern buf_LRU_stat_t buf_LRU_stat_cur;
|
|
|
|
/** Running sum of past values of buf_LRU_stat_cur.
|
|
Updated by buf_LRU_stat_update(). Protected by buf_pool->mutex. */
|
|
extern buf_LRU_stat_t buf_LRU_stat_sum;
|
|
|
|
/********************************************************************//**
|
|
Increments the I/O counter in buf_LRU_stat_cur. */
|
|
#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
|
|
/********************************************************************//**
|
|
Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
|
|
#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
|
|
|
|
#endif
|