2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-06-21 14:21:03 +02:00
|
|
|
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
2018-01-09 11:37:58 +01:00
|
|
|
Copyright (c) 2017, 2018, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file buf/buf0lru.cc
|
|
|
|
The database buffer replacement algorithm
|
|
|
|
|
|
|
|
Created 11/5/1995 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#include "buf0lru.h"
|
|
|
|
#include "ut0byte.h"
|
|
|
|
#include "ut0rnd.h"
|
|
|
|
#include "sync0rw.h"
|
|
|
|
#include "hash0hash.h"
|
2016-08-12 10:17:45 +02:00
|
|
|
#include "os0event.h"
|
2014-02-26 19:11:54 +01:00
|
|
|
#include "fil0fil.h"
|
|
|
|
#include "btr0btr.h"
|
|
|
|
#include "buf0buddy.h"
|
|
|
|
#include "buf0buf.h"
|
|
|
|
#include "buf0dblwr.h"
|
|
|
|
#include "buf0flu.h"
|
|
|
|
#include "buf0rea.h"
|
|
|
|
#include "btr0sea.h"
|
|
|
|
#include "ibuf0ibuf.h"
|
|
|
|
#include "os0file.h"
|
|
|
|
#include "page0zip.h"
|
|
|
|
#include "log0recv.h"
|
|
|
|
#include "srv0srv.h"
|
|
|
|
#include "srv0mon.h"
|
|
|
|
|
|
|
|
/** The number of blocks from the LRU_old pointer onward, including
|
|
|
|
the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
|
|
|
|
of the whole LRU list length, except that the tolerance defined below
|
|
|
|
is allowed. Note that the tolerance must be small enough such that for
|
|
|
|
even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
|
|
|
|
allowed to point to either end of the LRU list. */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
static const ulint BUF_LRU_OLD_TOLERANCE = 20;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** The minimum amount of non-old blocks when the LRU_old list exists
|
|
|
|
(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
|
|
|
|
@see buf_LRU_old_adjust_len */
|
|
|
|
#define BUF_LRU_NON_OLD_MIN_LEN 5
|
|
|
|
#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
|
|
|
|
# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/** When dropping the search hash index entries before deleting an ibd
|
|
|
|
file, we build a local array of pages belonging to that tablespace
|
|
|
|
in the buffer pool. Following is the size of that array.
|
|
|
|
We also release buf_pool->mutex after scanning this many pages of the
|
|
|
|
flush_list when dropping a table. This is to ensure that other threads
|
|
|
|
are not blocked for extended period of time when using very large
|
|
|
|
buffer pools. */
|
2016-08-12 10:17:45 +02:00
|
|
|
static const ulint BUF_LRU_DROP_SEARCH_SIZE = 1024;
|
|
|
|
|
|
|
|
/** We scan these many blocks when looking for a clean page to evict
|
|
|
|
during LRU eviction. */
|
|
|
|
static const ulint BUF_LRU_SEARCH_SCAN_THRESHOLD = 100;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** If we switch on the InnoDB monitor because there are too few available
|
|
|
|
frames in the buffer pool, we set this to TRUE */
|
2016-08-12 10:17:45 +02:00
|
|
|
static bool buf_lru_switched_on_innodb_mon = false;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-01-09 11:37:58 +01:00
|
|
|
/** True if diagnostic message about difficult to find free blocks
|
|
|
|
in the buffer bool has already printed. */
|
|
|
|
static bool buf_lru_free_blocks_error_printed;
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/******************************************************************//**
|
|
|
|
These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
|
|
|
|
and page_zip_decompress() operations. Based on the statistics,
|
|
|
|
buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
|
|
|
|
unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
|
|
|
|
uncompressed frame (meaning we can evict dirty blocks as well). From
|
|
|
|
the regular LRU, we will evict the entire block (i.e.: both the
|
|
|
|
uncompressed and compressed data), which must be clean. */
|
|
|
|
|
|
|
|
/* @{ */
|
|
|
|
|
|
|
|
/** Number of intervals for which we keep the history of these stats.
|
|
|
|
Each interval is 1 second, defined by the rate at which
|
|
|
|
srv_error_monitor_thread() calls buf_LRU_stat_update(). */
|
2016-08-12 10:17:45 +02:00
|
|
|
static const ulint BUF_LRU_STAT_N_INTERVAL = 50;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** Co-efficient with which we multiply I/O operations to equate them
|
|
|
|
with page_zip_decompress() operations. */
|
2016-08-12 10:17:45 +02:00
|
|
|
static const ulint BUF_LRU_IO_TO_UNZIP_FACTOR = 50;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** Sampled values buf_LRU_stat_cur.
|
|
|
|
Not protected by any mutex. Updated by buf_LRU_stat_update(). */
|
|
|
|
static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
|
|
|
|
|
|
|
|
/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
|
|
|
|
static ulint buf_LRU_stat_arr_ind;
|
|
|
|
|
|
|
|
/** Current operation counters. Not protected by any mutex. Cleared
|
|
|
|
by buf_LRU_stat_update(). */
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_LRU_stat_t buf_LRU_stat_cur;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** Running sum of past values of buf_LRU_stat_cur.
|
|
|
|
Updated by buf_LRU_stat_update(). Not Protected by any mutex. */
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_LRU_stat_t buf_LRU_stat_sum;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* @} */
|
|
|
|
|
|
|
|
/** @name Heuristics for detecting index scan @{ */
|
|
|
|
/** Move blocks to "new" LRU list only if the first access was at
|
|
|
|
least this many milliseconds ago. Not protected by any mutex or latch. */
|
2016-08-12 10:17:45 +02:00
|
|
|
uint buf_LRU_old_threshold_ms;
|
2014-02-26 19:11:54 +01:00
|
|
|
/* @} */
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Takes a block out of the LRU list and page hash table.
|
|
|
|
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
|
|
|
|
the object will be freed.
|
|
|
|
|
|
|
|
The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
|
|
|
|
and the appropriate hash_lock. This function will release the
|
|
|
|
buf_page_get_mutex() and the hash_lock.
|
|
|
|
|
|
|
|
If a compressed page is freed other compressed pages may be relocated.
|
|
|
|
@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
|
|
|
|
caller needs to free the page to the free list
|
|
|
|
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
|
|
|
|
this case the block is already returned to the buddy allocator. */
|
2016-09-06 08:43:16 +02:00
|
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
bool
|
|
|
|
buf_LRU_block_remove_hashed(
|
|
|
|
/*========================*/
|
|
|
|
buf_page_t* bpage, /*!< in: block, must contain a file page and
|
|
|
|
be in a state where it can be freed; there
|
|
|
|
may or may not be a hash index to the page */
|
|
|
|
bool zip); /*!< in: true if should remove also the
|
|
|
|
compressed page of an uncompressed page */
|
|
|
|
/******************************************************************//**
|
|
|
|
Puts a file page whose has no hash index to the free list. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_block_free_hashed_page(
|
|
|
|
/*===========================*/
|
|
|
|
buf_block_t* block); /*!< in: block, must contain a file page and
|
|
|
|
be in a state where it can be freed */
|
|
|
|
|
|
|
|
/******************************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Increases LRU size in bytes with page size inline function */
|
2014-02-26 19:11:54 +01:00
|
|
|
static inline
|
|
|
|
void
|
|
|
|
incr_LRU_size_in_bytes(
|
|
|
|
/*===================*/
|
|
|
|
buf_page_t* bpage, /*!< in: control block */
|
|
|
|
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
buf_pool->stat.LRU_bytes += bpage->size.physical();
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Determines if the unzip_LRU list should be used for evicting a victim
|
|
|
|
instead of the general LRU list.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if should use unzip_LRU */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
buf_LRU_evict_from_unzip_LRU(
|
|
|
|
/*=========================*/
|
|
|
|
buf_pool_t* buf_pool)
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
/* If the unzip_LRU list is empty, we can only use the LRU. */
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
|
|
|
|
return(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If unzip_LRU is at most 10% of the size of the LRU list,
|
|
|
|
then use the LRU. This slack allows us to keep hot
|
|
|
|
decompressed pages in the buffer pool. */
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
|
|
|
|
<= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
|
|
|
|
return(FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If eviction hasn't started yet, we assume by default
|
|
|
|
that a workload is disk bound. */
|
|
|
|
if (buf_pool->freed_page_clock == 0) {
|
|
|
|
return(TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Calculate the average over past intervals, and add the values
|
|
|
|
of the current interval. */
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
|
2014-02-26 19:11:54 +01:00
|
|
|
+ buf_LRU_stat_cur.io;
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
ulint unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
|
2014-02-26 19:11:54 +01:00
|
|
|
+ buf_LRU_stat_cur.unzip;
|
|
|
|
|
|
|
|
/* Decide based on our formula. If the load is I/O bound
|
|
|
|
(unzip_avg is smaller than the weighted io_avg), evict an
|
|
|
|
uncompressed frame from unzip_LRU. Otherwise we assume that
|
|
|
|
the load is CPU bound and evict from the regular LRU. */
|
|
|
|
return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
|
|
|
|
}
|
|
|
|
|
2017-02-23 22:05:12 +01:00
|
|
|
#ifdef BTR_CUR_HASH_ADAPT
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Attempts to drop page hash index on a batch of pages belonging to a
|
|
|
|
particular space id.
|
|
|
|
@param[in] space_id space id
|
|
|
|
@param[in] arr array of page_no
|
|
|
|
@param[in] count number of entries in array */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
void
|
2018-05-29 15:36:16 +02:00
|
|
|
buf_LRU_drop_page_hash_batch(ulint space_id, const ulint* arr, ulint count)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
|
|
|
|
|
2018-05-29 15:36:16 +02:00
|
|
|
for (const ulint* const end = arr + count; arr != end; ) {
|
2016-08-12 10:17:45 +02:00
|
|
|
/* While our only caller
|
|
|
|
buf_LRU_drop_page_hash_for_tablespace()
|
|
|
|
is being executed for DROP TABLE or similar,
|
2018-05-29 15:36:16 +02:00
|
|
|
the table cannot be evicted from the buffer pool. */
|
2016-08-12 10:17:45 +02:00
|
|
|
btr_search_drop_page_hash_when_freed(
|
2018-05-29 15:36:16 +02:00
|
|
|
page_id_t(space_id, *arr++));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
|
|
|
|
hash index entries belonging to that table. This function tries to
|
|
|
|
do that in batch. Note that this is a 'best effort' attempt and does
|
|
|
|
not guarantee that ALL hash entries will be removed. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_drop_page_hash_for_tablespace(
|
|
|
|
/*==================================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
|
|
|
|
ulint id) /*!< in: space id */
|
|
|
|
{
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint* page_arr = static_cast<ulint*>(ut_malloc_nokey(
|
|
|
|
sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
|
|
|
|
|
|
|
|
ulint num_entries = 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
scan_again:
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
|
|
|
|
bpage != NULL;
|
|
|
|
/* No op */) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_a(buf_page_in_file(bpage));
|
|
|
|
|
|
|
|
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|
2016-08-12 10:17:45 +02:00
|
|
|
|| bpage->id.space() != id
|
2014-02-26 19:11:54 +01:00
|
|
|
|| bpage->io_fix != BUF_IO_NONE) {
|
|
|
|
/* Compressed pages are never hashed.
|
|
|
|
Skip blocks of other tablespaces.
|
|
|
|
Skip I/O-fixed blocks (to be dealt with later). */
|
|
|
|
next_page:
|
|
|
|
bpage = prev_bpage;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-09-02 16:28:54 +02:00
|
|
|
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-09-02 16:28:54 +02:00
|
|
|
mutex_enter(&block->mutex);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2016-09-02 16:28:54 +02:00
|
|
|
/* This debug check uses a dirty read that could
|
|
|
|
theoretically cause false positives while
|
|
|
|
buf_pool_clear_hash_index() is executing.
|
|
|
|
(Other conflicting access paths to the adaptive hash
|
|
|
|
index should not be possible, because when a
|
|
|
|
tablespace is being discarded or dropped, there must
|
|
|
|
be no concurrect access to the contained tables.) */
|
|
|
|
assert_block_ahi_valid(block);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2016-09-02 16:28:54 +02:00
|
|
|
bool skip = bpage->buf_fix_count > 0 || !block->index;
|
|
|
|
|
|
|
|
mutex_exit(&block->mutex);
|
|
|
|
|
|
|
|
if (skip) {
|
|
|
|
/* Skip this block, because there are
|
|
|
|
no adaptive hash index entries
|
|
|
|
pointing to it, or because we cannot
|
|
|
|
drop them due to the buffer-fix. */
|
|
|
|
goto next_page;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Store the page number so that we can drop the hash
|
|
|
|
index in a batch later. */
|
2016-08-12 10:17:45 +02:00
|
|
|
page_arr[num_entries] = bpage->id.page_no();
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
|
|
|
|
++num_entries;
|
|
|
|
|
|
|
|
if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
|
|
|
|
goto next_page;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Array full. We release the buf_pool->mutex to obey
|
|
|
|
the latching order. */
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
num_entries = 0;
|
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
/* Note that we released the buf_pool mutex above
|
|
|
|
after reading the prev_bpage during processing of a
|
|
|
|
page_hash_batch (i.e.: when the array was full).
|
|
|
|
Because prev_bpage could belong to a compressed-only
|
|
|
|
block, it may have been relocated, and thus the
|
|
|
|
pointer cannot be trusted. Because bpage is of type
|
|
|
|
buf_block_t, it is safe to dereference.
|
|
|
|
|
|
|
|
bpage can change in the LRU list. This is OK because
|
|
|
|
this function is a 'best effort' to drop as many
|
|
|
|
search hash entries as possible and it does not
|
|
|
|
guarantee that ALL such entries will be dropped. */
|
|
|
|
|
|
|
|
/* If, however, bpage has been removed from LRU list
|
|
|
|
to the free list then we should restart the scan.
|
|
|
|
bpage->state is protected by buf_pool mutex. */
|
2016-08-12 10:17:45 +02:00
|
|
|
if (bpage != NULL
|
2014-02-26 19:11:54 +01:00
|
|
|
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
goto scan_again;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
/* Drop any remaining batch of search hashed pages. */
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_free(page_arr);
|
|
|
|
}
|
|
|
|
|
2018-06-26 10:34:51 +02:00
|
|
|
/** Try to drop the adaptive hash index for a tablespace.
|
|
|
|
@param[in,out] table table
|
|
|
|
@return whether anything was dropped */
|
2018-06-26 14:12:58 +02:00
|
|
|
bool buf_LRU_drop_page_hash_for_tablespace(dict_table_t* table)
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
{
|
|
|
|
for (dict_index_t* index = dict_table_get_first_index(table);
|
|
|
|
index != NULL;
|
|
|
|
index = dict_table_get_next_index(index)) {
|
2018-05-29 15:36:16 +02:00
|
|
|
if (btr_search_info_get_ref_count(btr_search_get_info(index),
|
|
|
|
index)) {
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
goto drop_ahi;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-26 10:34:51 +02:00
|
|
|
return false;
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
drop_ahi:
|
|
|
|
ulint id = table->space;
|
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
|
|
|
buf_LRU_drop_page_hash_for_tablespace(buf_pool_from_array(i),
|
|
|
|
id);
|
|
|
|
}
|
2018-06-26 10:34:51 +02:00
|
|
|
|
|
|
|
return true;
|
MDEV-16283 ALTER TABLE...DISCARD TABLESPACE still takes long on a large buffer pool
Also fixes MDEV-14727, MDEV-14491
InnoDB: Error: Waited for 5 secs for hash index ref_count (1) to drop to 0
by replacing the flawed wait logic in dict_index_remove_from_cache_low().
On DISCARD TABLESPACE, there is no need to drop the adaptive hash index.
We must drop it on IMPORT TABLESPACE, and eventually on DROP TABLE or
DROP INDEX. As long as the dict_index_t object remains in the cache
and the table remains inaccessible, the adaptive hash index entries
to orphaned pages would not do any harm. They would be dropped when
buffer pool pages are reused for something else.
btr_search_drop_page_hash_when_freed(), buf_LRU_drop_page_hash_batch():
Remove the parameter zip_size, and pass 0 to buf_page_get_gen().
buf_page_get_gen(): Ignore zip_size if mode==BUF_PEEK_IF_IN_POOL.
buf_LRU_drop_page_hash_for_tablespace(): Drop the adaptive hash index
even if the tablespace is inaccessible.
buf_LRU_drop_page_hash_for_tablespace(): New global function, to drop
the adaptive hash index.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Remove the parameter drop_ahi.
dict_index_remove_from_cache_low(): Actively drop the adaptive hash index
if entries exist. This should prevent InnoDB hangs on DROP TABLE or
DROP INDEX.
row_import_for_mysql(): Drop any adaptive hash index entries for the table.
row_drop_table_for_mysql(): Drop any adaptive hash index for the table,
except if the table resides in the system tablespace. (DISCARD TABLESPACE
does not apply to the system tablespace, and we do no want to drop the
adaptive hash index for other tables than the one that is being dropped.)
row_truncate_table_for_mysql(): Drop any adaptive hash index entries for
the table, except if the table resides in the system tablespace.
2018-05-29 12:52:43 +02:00
|
|
|
}
|
2017-02-23 22:05:12 +01:00
|
|
|
#endif /* BTR_CUR_HASH_ADAPT */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
While flushing (or removing dirty) pages from a tablespace we don't
|
|
|
|
want to hog the CPU and resources. Release the buffer pool and block
|
|
|
|
mutex and try to force a context switch. Then reacquire the same mutexes.
|
|
|
|
The current page is "fixed" before the release of the mutexes and then
|
|
|
|
"unfixed" again once we have reacquired the mutexes. */
|
2016-09-06 08:43:16 +02:00
|
|
|
static
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
buf_flush_yield(
|
|
|
|
/*============*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
|
|
|
|
buf_page_t* bpage) /*!< in/out: current page */
|
|
|
|
{
|
2016-08-12 10:17:45 +02:00
|
|
|
BPageMutex* block_mutex;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
|
|
|
|
block_mutex = buf_page_get_mutex(bpage);
|
|
|
|
|
|
|
|
mutex_enter(block_mutex);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* "Fix" the block so that the position cannot be
|
|
|
|
changed after we release the buffer pool and
|
|
|
|
block mutexes. */
|
|
|
|
buf_page_set_sticky(bpage);
|
|
|
|
|
|
|
|
/* Now it is safe to release the buf_pool->mutex. */
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
mutex_exit(block_mutex);
|
|
|
|
/* Try and force a context switch. */
|
|
|
|
os_thread_yield();
|
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
mutex_enter(block_mutex);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* "Unfix" the block now that we have both the
|
|
|
|
buffer pool and block mutex again. */
|
|
|
|
buf_page_unset_sticky(bpage);
|
|
|
|
mutex_exit(block_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
If we have hogged the resources for too long then release the buffer
|
|
|
|
pool and flush list mutex and do a thread yield. Set the current page
|
|
|
|
to "sticky" so that it is not relocated during the yield.
|
|
|
|
@return true if yielded */
|
2016-09-06 08:43:16 +02:00
|
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
bool
|
|
|
|
buf_flush_try_yield(
|
|
|
|
/*================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
|
|
|
|
buf_page_t* bpage, /*!< in/out: bpage to remove */
|
|
|
|
ulint processed) /*!< in: number of pages processed */
|
|
|
|
{
|
|
|
|
/* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
|
|
|
|
loop we release buf_pool->mutex to let other threads
|
|
|
|
do their job but only if the block is not IO fixed. This
|
|
|
|
ensures that the block stays in its position in the
|
|
|
|
flush_list. */
|
|
|
|
|
|
|
|
if (bpage != NULL
|
|
|
|
&& processed >= BUF_LRU_DROP_SEARCH_SIZE
|
|
|
|
&& buf_page_get_io_fix(bpage) == BUF_IO_NONE) {
|
|
|
|
|
|
|
|
buf_flush_list_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
/* Release the buffer pool and block mutex
|
|
|
|
to give the other threads a go. */
|
|
|
|
|
|
|
|
buf_flush_yield(buf_pool, bpage);
|
|
|
|
|
|
|
|
buf_flush_list_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
/* Should not have been removed from the flush
|
|
|
|
list during the yield. However, this check is
|
|
|
|
not sufficient to catch a remove -> add. */
|
|
|
|
|
|
|
|
ut_ad(bpage->in_flush_list);
|
|
|
|
|
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Removes a single page from a given tablespace inside a specific
|
|
|
|
buffer pool instance.
|
|
|
|
@return true if page was removed. */
|
2016-09-06 08:43:16 +02:00
|
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
bool
|
|
|
|
buf_flush_or_remove_page(
|
|
|
|
/*=====================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
|
|
|
|
buf_page_t* bpage, /*!< in/out: bpage to remove */
|
|
|
|
bool flush) /*!< in: flush to disk if true but
|
|
|
|
don't remove else remove without
|
|
|
|
flushing to disk */
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_flush_list_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
/* bpage->space and bpage->io_fix are protected by
|
|
|
|
buf_pool->mutex and block_mutex. It is safe to check
|
|
|
|
them while holding buf_pool->mutex only. */
|
|
|
|
|
|
|
|
if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
|
|
|
|
|
|
|
|
/* We cannot remove this page during this scan
|
|
|
|
yet; maybe the system is currently reading it
|
|
|
|
in, or flushing the modifications to the file */
|
|
|
|
return(false);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
BPageMutex* block_mutex;
|
|
|
|
bool processed = false;
|
|
|
|
|
|
|
|
block_mutex = buf_page_get_mutex(bpage);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* We have to release the flush_list_mutex to obey the
|
|
|
|
latching order. We are however guaranteed that the page
|
|
|
|
will stay in the flush_list and won't be relocated because
|
|
|
|
buf_flush_remove() and buf_flush_relocate_on_flush_list()
|
|
|
|
need buf_pool->mutex as well. */
|
|
|
|
|
|
|
|
buf_flush_list_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
mutex_enter(block_mutex);
|
|
|
|
|
|
|
|
ut_ad(bpage->oldest_modification != 0);
|
|
|
|
|
|
|
|
if (!flush) {
|
|
|
|
|
|
|
|
buf_flush_remove(bpage);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
mutex_exit(block_mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
processed = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
} else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
|
2014-05-05 18:20:28 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/* The following call will release the buffer pool
|
|
|
|
and block mutex. */
|
|
|
|
processed = buf_flush_page(
|
|
|
|
buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false);
|
2014-05-05 18:20:28 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (processed) {
|
|
|
|
/* Wake possible simulated aio thread to actually
|
|
|
|
post the writes to the operating system */
|
|
|
|
os_aio_simulated_wake_handler_threads();
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
} else {
|
|
|
|
mutex_exit(block_mutex);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2016-08-12 10:17:45 +02:00
|
|
|
mutex_exit(block_mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
buf_flush_list_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
ut_ad(!mutex_own(block_mutex));
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
return(processed);
|
|
|
|
}
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
/** Remove all dirty pages belonging to a given tablespace inside a specific
|
2014-02-26 19:11:54 +01:00
|
|
|
buffer pool instance when we are deleting the data file(s) of that
|
|
|
|
tablespace. The pages still remain a part of LRU and are evicted from
|
|
|
|
the list as they age towards the tail of the LRU.
|
2017-11-20 08:49:21 +01:00
|
|
|
@param[in,out] buf_pool buffer pool
|
|
|
|
@param[in] id tablespace identifier
|
|
|
|
@param[in] observer flush observer (to check for interrupt),
|
|
|
|
or NULL if the files should not be written to
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
@param[in] first first page to be flushed or evicted
|
|
|
|
@return whether all matching dirty pages were removed */
|
2016-09-06 08:43:16 +02:00
|
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
2017-11-20 08:49:21 +01:00
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_flush_or_remove_pages(
|
2017-11-20 08:49:21 +01:00
|
|
|
buf_pool_t* buf_pool,
|
|
|
|
ulint id,
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
FlushObserver* observer,
|
|
|
|
ulint first)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
buf_page_t* prev;
|
|
|
|
buf_page_t* bpage;
|
|
|
|
ulint processed = 0;
|
|
|
|
|
|
|
|
buf_flush_list_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
rescan:
|
|
|
|
bool all_freed = true;
|
|
|
|
|
|
|
|
for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
|
|
|
|
bpage != NULL;
|
|
|
|
bpage = prev) {
|
|
|
|
|
|
|
|
ut_a(buf_page_in_file(bpage));
|
|
|
|
|
|
|
|
/* Save the previous link because once we free the
|
|
|
|
page we can't rely on the links. */
|
|
|
|
|
|
|
|
prev = UT_LIST_GET_PREV(list, bpage);
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
/* Flush the pages matching space id,
|
|
|
|
or pages matching the flush observer. */
|
|
|
|
if (observer && observer->is_partial_flush()) {
|
|
|
|
if (observer != bpage->flush_observer) {
|
|
|
|
/* Skip this block. */
|
|
|
|
} else if (!buf_flush_or_remove_page(
|
|
|
|
buf_pool, bpage,
|
|
|
|
!observer->is_interrupted())) {
|
|
|
|
all_freed = false;
|
|
|
|
} else if (!observer->is_interrupted()) {
|
|
|
|
/* The processing was successful. And during the
|
|
|
|
processing we have released the buf_pool mutex
|
|
|
|
when calling buf_page_flush(). We cannot trust
|
|
|
|
prev pointer. */
|
|
|
|
goto rescan;
|
|
|
|
}
|
|
|
|
} else if (id != bpage->id.space()) {
|
|
|
|
/* Skip this block, because it is for a
|
|
|
|
different tablespace. */
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
} else if (bpage->id.page_no() < first) {
|
|
|
|
/* Skip this block, because it is below the limit. */
|
2017-11-20 08:49:21 +01:00
|
|
|
} else if (!buf_flush_or_remove_page(
|
|
|
|
buf_pool, bpage, observer != NULL)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Remove was unsuccessful, we have to try again
|
|
|
|
by scanning the entire list from the end.
|
|
|
|
This also means that we never released the
|
|
|
|
buf_pool mutex. Therefore we can trust the prev
|
|
|
|
pointer.
|
|
|
|
buf_flush_or_remove_page() released the
|
|
|
|
flush list mutex but not the buf_pool mutex.
|
|
|
|
Therefore it is possible that a new page was
|
|
|
|
added to the flush list. For example, in case
|
|
|
|
where we are at the head of the flush list and
|
|
|
|
prev == NULL. That is OK because we have the
|
|
|
|
tablespace quiesced and no new pages for this
|
|
|
|
space-id should enter flush_list. This is
|
|
|
|
because the only callers of this function are
|
|
|
|
DROP TABLE and FLUSH TABLE FOR EXPORT.
|
|
|
|
We know that we'll have to do at least one more
|
|
|
|
scan but we don't break out of loop here and
|
|
|
|
try to do as much work as we can in this
|
|
|
|
iteration. */
|
|
|
|
|
|
|
|
all_freed = false;
|
2017-11-20 08:49:21 +01:00
|
|
|
} else if (observer) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* The processing was successful. And during the
|
|
|
|
processing we have released the buf_pool mutex
|
|
|
|
when calling buf_page_flush(). We cannot trust
|
|
|
|
prev pointer. */
|
|
|
|
goto rescan;
|
|
|
|
}
|
|
|
|
|
|
|
|
++processed;
|
|
|
|
|
|
|
|
/* Yield if we have hogged the CPU and mutexes for too long. */
|
|
|
|
if (buf_flush_try_yield(buf_pool, prev, processed)) {
|
|
|
|
|
|
|
|
/* Reset the batch size counter if we had to yield. */
|
|
|
|
|
|
|
|
processed = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The check for trx is interrupted is expensive, we want
|
|
|
|
to check every N iterations. */
|
2017-11-20 08:49:21 +01:00
|
|
|
if (!processed && observer) {
|
|
|
|
observer->check_interrupted();
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_flush_list_mutex_exit(buf_pool);
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
return(all_freed);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2017-11-02 21:51:34 +01:00
|
|
|
/** Remove or flush all the dirty pages that belong to a given tablespace
|
2014-02-26 19:11:54 +01:00
|
|
|
inside a specific buffer pool instance. The pages will remain in the LRU
|
|
|
|
list and will be evicted from the LRU list as they age and move towards
|
2017-11-02 21:51:34 +01:00
|
|
|
the tail of the LRU list.
|
|
|
|
@param[in,out] buf_pool buffer pool
|
|
|
|
@param[in] id tablespace identifier
|
2017-11-07 22:02:39 +01:00
|
|
|
@param[in] observer flush observer,
|
|
|
|
or NULL if the files should not be written to
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
@param[in] first first page to be flushed or evicted */
|
2016-09-06 08:43:16 +02:00
|
|
|
static
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
buf_flush_dirty_pages(
|
2017-11-07 22:02:39 +01:00
|
|
|
buf_pool_t* buf_pool,
|
|
|
|
ulint id,
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
FlushObserver* observer,
|
|
|
|
ulint first)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2017-11-20 08:49:21 +01:00
|
|
|
for (;;) {
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
bool freed = buf_flush_or_remove_pages(buf_pool, id, observer,
|
|
|
|
first);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
ut_ad(buf_flush_validate(buf_pool));
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
if (freed) {
|
|
|
|
break;
|
2016-08-12 10:17:45 +02:00
|
|
|
}
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
os_thread_sleep(2000);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(buf_flush_validate(buf_pool));
|
2017-11-20 08:49:21 +01:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
ut_ad((observer && observer->is_interrupted())
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
|| first
|
2016-08-12 10:17:45 +02:00
|
|
|
|| buf_pool_get_dirty_pages_count(buf_pool, id, observer) == 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
MDEV-13328 ALTER TABLE…DISCARD TABLESPACE takes a lot of time
With a big buffer pool that contains many data pages,
DISCARD TABLESPACE took a long time, because it would scan the
entire buffer pool to remove any pages that belong to the tablespace.
With a large buffer pool, this would take a lot of time, especially
when the table-to-discard is empty.
The minimum amount of work that DISCARD TABLESPACE must do is to
remove the pages of the to-be-discarded table from the
buf_pool->flush_list because any writes to the data file must be
prevented before the file is deleted.
If DISCARD TABLESPACE does not evict the pages from the buffer pool,
then IMPORT TABLESPACE must do it, because we must prevent pre-DISCARD,
not-yet-evicted pages from being mistaken for pages of the imported
tablespace.
It would not be a useful fix to simply move the buffer pool scan to
the IMPORT TABLESPACE step. What we can do is to actively evict those
pages that could be mistaken for imported pages. In this way, when
importing a small table into a big buffer pool, the import should
still run relatively fast.
Import is bypassing the buffer pool when reading pages for the
adjustment phase. In the adjustment phase, if a page exists in
the buffer pool, we could replace it with the page from the imported
file. Unfortunately I did not get this to work properly, so instead
we will simply evict any matching page from the buffer pool.
buf_page_get_gen(): Implement BUF_EVICT_IF_IN_POOL, a new mode
where the requested page will be evicted if it is found. There
must be no unwritten changes for the page.
buf_remove_t: Remove. Instead, use trx!=NULL to signify that a write
to file is desired, and use a separate parameter bool drop_ahi.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Replace buf_remove_t.
buf_LRU_remove_pages(), buf_LRU_remove_all_pages(): Remove.
PageConverter::m_mtr: A dummy mini-transaction buffer
PageConverter::PageConverter(): Complete the member initialization list.
PageConverter::operator()(): Evict any 'shadow' pages from the
buffer pool so that pre-existing (garbage) pages cannot be mistaken
for pages that exist in the being-imported file.
row_discard_tablespace(): Remove a bogus comment that seems to
refer to IMPORT TABLESPACE, not DISCARD TABLESPACE.
2017-11-02 21:38:37 +01:00
|
|
|
/** Empty the flush list for all pages belonging to a tablespace.
|
|
|
|
@param[in] id tablespace identifier
|
2017-11-20 08:49:21 +01:00
|
|
|
@param[in] observer flush observer,
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
or NULL if nothing is to be written
|
|
|
|
@param[in] first first page to be flushed or evicted */
|
|
|
|
void buf_LRU_flush_or_remove_pages(ulint id, FlushObserver* observer,
|
|
|
|
ulint first)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2017-11-07 22:02:39 +01:00
|
|
|
/* Pages in the system tablespace must never be discarded. */
|
2017-11-20 08:49:21 +01:00
|
|
|
ut_ad(id || observer);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
MDEV-13328 ALTER TABLE…DISCARD TABLESPACE takes a lot of time
With a big buffer pool that contains many data pages,
DISCARD TABLESPACE took a long time, because it would scan the
entire buffer pool to remove any pages that belong to the tablespace.
With a large buffer pool, this would take a lot of time, especially
when the table-to-discard is empty.
The minimum amount of work that DISCARD TABLESPACE must do is to
remove the pages of the to-be-discarded table from the
buf_pool->flush_list because any writes to the data file must be
prevented before the file is deleted.
If DISCARD TABLESPACE does not evict the pages from the buffer pool,
then IMPORT TABLESPACE must do it, because we must prevent pre-DISCARD,
not-yet-evicted pages from being mistaken for pages of the imported
tablespace.
It would not be a useful fix to simply move the buffer pool scan to
the IMPORT TABLESPACE step. What we can do is to actively evict those
pages that could be mistaken for imported pages. In this way, when
importing a small table into a big buffer pool, the import should
still run relatively fast.
Import is bypassing the buffer pool when reading pages for the
adjustment phase. In the adjustment phase, if a page exists in
the buffer pool, we could replace it with the page from the imported
file. Unfortunately I did not get this to work properly, so instead
we will simply evict any matching page from the buffer pool.
buf_page_get_gen(): Implement BUF_EVICT_IF_IN_POOL, a new mode
where the requested page will be evicted if it is found. There
must be no unwritten changes for the page.
buf_remove_t: Remove. Instead, use trx!=NULL to signify that a write
to file is desired, and use a separate parameter bool drop_ahi.
buf_LRU_flush_or_remove_pages(), fil_delete_tablespace():
Replace buf_remove_t.
buf_LRU_remove_pages(), buf_LRU_remove_all_pages(): Remove.
PageConverter::m_mtr: A dummy mini-transaction buffer
PageConverter::PageConverter(): Complete the member initialization list.
PageConverter::operator()(): Evict any 'shadow' pages from the
buffer pool so that pre-existing (garbage) pages cannot be mistaken
for pages that exist in the being-imported file.
row_discard_tablespace(): Remove a bogus comment that seems to
refer to IMPORT TABLESPACE, not DISCARD TABLESPACE.
2017-11-02 21:38:37 +01:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
buf_flush_dirty_pages(buf_pool_from_array(i), id, observer,
|
|
|
|
first);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2017-11-20 08:49:21 +01:00
|
|
|
if (observer && !observer->is_interrupted()) {
|
2014-02-26 19:11:54 +01:00
|
|
|
/* Ensure that all asynchronous IO is completed. */
|
|
|
|
os_aio_wait_until_no_pending_writes();
|
|
|
|
fil_flush(id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
|
|
/********************************************************************//**
|
|
|
|
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
|
|
|
|
void
|
|
|
|
buf_LRU_insert_zip_clean(
|
|
|
|
/*=====================*/
|
|
|
|
buf_page_t* bpage) /*!< in: pointer to the block in question */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
|
|
|
|
|
|
|
|
/* Find the first successor of bpage in the LRU list
|
|
|
|
that is in the zip_clean list. */
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_t* b = bpage;
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
do {
|
|
|
|
b = UT_LIST_GET_NEXT(LRU, b);
|
|
|
|
} while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
|
|
|
|
|
|
|
|
/* Insert bpage before b, i.e., after the predecessor of b. */
|
2016-08-12 10:17:45 +02:00
|
|
|
if (b != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
b = UT_LIST_GET_PREV(list, b);
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (b != NULL) {
|
|
|
|
UT_LIST_INSERT_AFTER(buf_pool->zip_clean, b, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_ADD_FIRST(buf_pool->zip_clean, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Try to free an uncompressed page of a compressed block from the unzip
|
|
|
|
LRU list. The compressed page is preserved, and it need not be clean.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return true if freed */
|
|
|
|
static
|
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_LRU_free_from_unzip_LRU_list(
|
|
|
|
/*=============================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool scan_all) /*!< in: scan whole LRU list
|
|
|
|
if true, otherwise scan only
|
2014-02-26 19:11:54 +01:00
|
|
|
srv_LRU_scan_depth / 2 blocks. */
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
|
2016-08-12 10:17:45 +02:00
|
|
|
return(false);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint scanned = 0;
|
|
|
|
bool freed = false;
|
|
|
|
|
|
|
|
for (buf_block_t* block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
|
|
|
|
block != NULL
|
|
|
|
&& !freed
|
2014-02-26 19:11:54 +01:00
|
|
|
&& (scan_all || scanned < srv_LRU_scan_depth);
|
|
|
|
++scanned) {
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_block_t* prev_block;
|
|
|
|
|
|
|
|
prev_block = UT_LIST_GET_PREV(unzip_LRU, block);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
|
|
|
|
ut_ad(block->in_unzip_LRU_list);
|
|
|
|
ut_ad(block->page.in_LRU_list);
|
|
|
|
|
|
|
|
freed = buf_LRU_free_page(&block->page, false);
|
|
|
|
|
|
|
|
block = prev_block;
|
|
|
|
}
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
if (scanned) {
|
|
|
|
MONITOR_INC_VALUE_CUMULATIVE(
|
|
|
|
MONITOR_LRU_UNZIP_SEARCH_SCANNED,
|
|
|
|
MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
|
|
|
|
MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
|
|
|
|
scanned);
|
|
|
|
}
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
return(freed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Try to free a clean page from the common LRU list.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return true if freed */
|
|
|
|
static
|
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_LRU_free_from_common_LRU_list(
|
|
|
|
/*==============================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool scan_all) /*!< in: scan whole LRU list
|
|
|
|
if true, otherwise scan only
|
|
|
|
up to BUF_LRU_SEARCH_SCAN_THRESHOLD */
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint scanned = 0;
|
|
|
|
bool freed = false;
|
|
|
|
|
|
|
|
for (buf_page_t* bpage = buf_pool->lru_scan_itr.start();
|
|
|
|
bpage != NULL
|
|
|
|
&& !freed
|
2014-11-06 12:17:11 +01:00
|
|
|
&& (scan_all || scanned < BUF_LRU_SEARCH_SCAN_THRESHOLD);
|
|
|
|
++scanned, bpage = buf_pool->lru_scan_itr.get()) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
|
|
|
|
BPageMutex* mutex = buf_page_get_mutex(bpage);
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
buf_pool->lru_scan_itr.set(prev);
|
|
|
|
|
|
|
|
mutex_enter(mutex);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
ut_ad(bpage->in_LRU_list);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
unsigned accessed = buf_page_is_accessed(bpage);
|
2014-11-06 12:17:11 +01:00
|
|
|
|
|
|
|
if (buf_flush_ready_for_replace(bpage)) {
|
|
|
|
mutex_exit(mutex);
|
|
|
|
freed = buf_LRU_free_page(bpage, true);
|
|
|
|
} else {
|
|
|
|
mutex_exit(mutex);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
if (freed && !accessed) {
|
|
|
|
/* Keep track of pages that are evicted without
|
|
|
|
ever being accessed. This gives us a measure of
|
|
|
|
the effectiveness of readahead */
|
|
|
|
++buf_pool->stat.n_ra_pages_evicted;
|
|
|
|
}
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(!mutex_own(mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
if (scanned) {
|
|
|
|
MONITOR_INC_VALUE_CUMULATIVE(
|
|
|
|
MONITOR_LRU_SEARCH_SCANNED,
|
|
|
|
MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
|
|
|
|
MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
|
|
|
|
scanned);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(freed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Try to free a replaceable block.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return true if found and freed */
|
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_LRU_scan_and_free_block(
|
|
|
|
/*========================*/
|
|
|
|
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool scan_all) /*!< in: scan whole LRU list
|
|
|
|
if true, otherwise scan only
|
|
|
|
BUF_LRU_SEARCH_SCAN_THRESHOLD
|
|
|
|
blocks. */
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
return(buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all)
|
2016-08-12 10:17:45 +02:00
|
|
|
|| buf_LRU_free_from_common_LRU_list(buf_pool, scan_all));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Returns TRUE if less than 25 % of the buffer pool in any instance is
|
|
|
|
available. This can be used in heuristics to prevent huge transactions
|
|
|
|
eating up the whole buffer pool for their locks.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if less than 25 % of buffer pool left */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
buf_LRU_buf_pool_running_out(void)
|
|
|
|
/*==============================*/
|
|
|
|
{
|
|
|
|
ibool ret = FALSE;
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances && !ret; i++) {
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_t* buf_pool;
|
|
|
|
|
|
|
|
buf_pool = buf_pool_from_array(i);
|
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (!recv_recovery_is_on()
|
2014-02-26 19:11:54 +01:00
|
|
|
&& UT_LIST_GET_LEN(buf_pool->free)
|
|
|
|
+ UT_LIST_GET_LEN(buf_pool->LRU)
|
2016-08-12 10:17:45 +02:00
|
|
|
< ut_min(buf_pool->curr_size,
|
|
|
|
buf_pool->old_size) / 4) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ret = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Returns a free block from the buf_pool. The block is taken off the
|
|
|
|
free list. If it is empty, returns NULL.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return a free control block, or NULL if the buf_block->free list is empty */
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_block_t*
|
|
|
|
buf_LRU_get_free_only(
|
|
|
|
/*==================*/
|
|
|
|
buf_pool_t* buf_pool)
|
|
|
|
{
|
|
|
|
buf_block_t* block;
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
block = reinterpret_cast<buf_block_t*>(
|
|
|
|
UT_LIST_GET_FIRST(buf_pool->free));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
while (block != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(block->page.in_free_list);
|
|
|
|
ut_d(block->page.in_free_list = FALSE);
|
|
|
|
ut_ad(!block->page.in_flush_list);
|
|
|
|
ut_ad(!block->page.in_LRU_list);
|
|
|
|
ut_a(!buf_page_in_file(&block->page));
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_REMOVE(buf_pool->free, &block->page);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (buf_pool->curr_size >= buf_pool->old_size
|
|
|
|
|| UT_LIST_GET_LEN(buf_pool->withdraw)
|
|
|
|
>= buf_pool->withdraw_target
|
|
|
|
|| !buf_block_will_withdrawn(buf_pool, block)) {
|
|
|
|
/* found valid free block */
|
|
|
|
buf_page_mutex_enter(block);
|
2016-09-02 16:28:54 +02:00
|
|
|
/* No adaptive hash index entries may point to
|
|
|
|
a free block. */
|
|
|
|
assert_block_ahi_empty(block);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
|
|
|
|
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_ad(buf_pool_from_block(block) == buf_pool);
|
|
|
|
|
|
|
|
buf_page_mutex_exit(block);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This should be withdrawn */
|
|
|
|
UT_LIST_ADD_LAST(
|
|
|
|
buf_pool->withdraw,
|
|
|
|
&block->page);
|
|
|
|
ut_d(block->in_withdraw_list = TRUE);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
block = reinterpret_cast<buf_block_t*>(
|
|
|
|
UT_LIST_GET_FIRST(buf_pool->free));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return(block);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Checks how much of buf_pool is occupied by non-data objects like
|
|
|
|
AHI, lock heaps etc. Depending on the size of non-data objects this
|
|
|
|
function will either assert or issue a warning and switch on the
|
|
|
|
status monitor. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_check_size_of_non_data_objects(
|
|
|
|
/*===================================*/
|
|
|
|
const buf_pool_t* buf_pool) /*!< in: buffer pool instance */
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (!recv_recovery_is_on()
|
|
|
|
&& buf_pool->curr_size == buf_pool->old_size
|
|
|
|
&& UT_LIST_GET_LEN(buf_pool->free)
|
2014-02-26 19:11:54 +01:00
|
|
|
+ UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ib::fatal() << "Over 95 percent of the buffer pool is"
|
2017-02-23 22:05:12 +01:00
|
|
|
" occupied by lock heaps"
|
|
|
|
#ifdef BTR_CUR_HASH_ADAPT
|
|
|
|
" or the adaptive hash index!"
|
|
|
|
#endif /* BTR_CUR_HASH_ADAPT */
|
2016-08-12 10:17:45 +02:00
|
|
|
" Check that your transactions do not set too many"
|
2017-02-23 22:05:12 +01:00
|
|
|
" row locks, or review if"
|
|
|
|
" innodb_buffer_pool_size="
|
|
|
|
<< (buf_pool->curr_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
|
|
|
|
<< "M could be bigger.";
|
2016-08-12 10:17:45 +02:00
|
|
|
} else if (!recv_recovery_is_on()
|
|
|
|
&& buf_pool->curr_size == buf_pool->old_size
|
2014-02-26 19:11:54 +01:00
|
|
|
&& (UT_LIST_GET_LEN(buf_pool->free)
|
|
|
|
+ UT_LIST_GET_LEN(buf_pool->LRU))
|
|
|
|
< buf_pool->curr_size / 3) {
|
|
|
|
|
|
|
|
if (!buf_lru_switched_on_innodb_mon) {
|
|
|
|
|
|
|
|
/* Over 67 % of the buffer pool is occupied by lock
|
|
|
|
heaps or the adaptive hash index. This may be a memory
|
|
|
|
leak! */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ib::warn() << "Over 67 percent of the buffer pool is"
|
2017-02-23 22:05:12 +01:00
|
|
|
" occupied by lock heaps"
|
|
|
|
#ifdef BTR_CUR_HASH_ADAPT
|
|
|
|
" or the adaptive hash index!"
|
|
|
|
#endif /* BTR_CUR_HASH_ADAPT */
|
|
|
|
" Check that your transactions do not"
|
|
|
|
" set too many row locks."
|
|
|
|
" innodb_buffer_pool_size="
|
|
|
|
<< (buf_pool->curr_size >>
|
|
|
|
(20 - UNIV_PAGE_SIZE_SHIFT)) << "M."
|
|
|
|
" Starting the InnoDB Monitor to print"
|
|
|
|
" diagnostics.";
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
buf_lru_switched_on_innodb_mon = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
srv_print_innodb_monitor = TRUE;
|
2014-10-24 16:56:04 +02:00
|
|
|
os_event_set(srv_monitor_event);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
} else if (buf_lru_switched_on_innodb_mon) {
|
|
|
|
|
|
|
|
/* Switch off the InnoDB Monitor; this is a simple way
|
|
|
|
to stop the monitor if the situation becomes less urgent,
|
|
|
|
but may also surprise users if the user also switched on the
|
|
|
|
monitor! */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_lru_switched_on_innodb_mon = false;
|
2014-02-26 19:11:54 +01:00
|
|
|
srv_print_innodb_monitor = FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Returns a free block from the buf_pool. The block is taken off the
|
|
|
|
free list. If free list is empty, blocks are moved from the end of the
|
|
|
|
LRU list to the free list.
|
|
|
|
This function is called from a user thread when it needs a clean
|
|
|
|
block to read in a page. Note that we only ever get a block from
|
|
|
|
the free list. Even when we flush a page or find a page in LRU scan
|
|
|
|
we put it to free list to be used.
|
|
|
|
* iteration 0:
|
|
|
|
* get a block from free list, success:done
|
|
|
|
* if buf_pool->try_LRU_scan is set
|
|
|
|
* scan LRU up to srv_LRU_scan_depth to find a clean block
|
|
|
|
* the above will put the block on free list
|
|
|
|
* success:retry the free list
|
|
|
|
* flush one dirty page from tail of LRU to disk
|
|
|
|
* the above will put the block on free list
|
|
|
|
* success: retry the free list
|
|
|
|
* iteration 1:
|
|
|
|
* same as iteration 0 except:
|
|
|
|
* scan whole LRU list
|
|
|
|
* scan LRU list even if buf_pool->try_LRU_scan is not set
|
|
|
|
* iteration > 1:
|
2014-11-06 12:17:11 +01:00
|
|
|
* same as iteration 1 but sleep 10ms
|
2016-08-12 10:17:45 +02:00
|
|
|
@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_block_t*
|
|
|
|
buf_LRU_get_free_block(
|
|
|
|
/*===================*/
|
|
|
|
buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
|
|
|
|
{
|
|
|
|
buf_block_t* block = NULL;
|
2016-08-12 10:17:45 +02:00
|
|
|
bool freed = false;
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint n_iterations = 0;
|
|
|
|
ulint flush_failures = 0;
|
|
|
|
|
|
|
|
MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
|
|
|
|
loop:
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
buf_LRU_check_size_of_non_data_objects(buf_pool);
|
|
|
|
|
2018-01-09 11:37:58 +01:00
|
|
|
DBUG_EXECUTE_IF("ib_lru_force_no_free_page",
|
|
|
|
if (!buf_lru_free_blocks_error_printed) {
|
|
|
|
n_iterations = 21;
|
|
|
|
goto not_found;});
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* If there is a block in the free list, take it */
|
|
|
|
block = buf_LRU_get_free_only(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (block != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
ut_ad(buf_pool_from_block(block) == buf_pool);
|
|
|
|
memset(&block->page.zip, 0, sizeof block->page.zip);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
block->skip_flush_check = false;
|
|
|
|
block->page.flush_observer = NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
return(block);
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
MONITOR_INC( MONITOR_LRU_GET_FREE_LOOPS );
|
|
|
|
freed = false;
|
2014-02-26 19:11:54 +01:00
|
|
|
if (buf_pool->try_LRU_scan || n_iterations > 0) {
|
|
|
|
/* If no block was in the free list, search from the
|
|
|
|
end of the LRU list and try to free a block there.
|
|
|
|
If we are doing for the first time we'll scan only
|
|
|
|
tail of the LRU list otherwise we scan the whole LRU
|
|
|
|
list. */
|
2016-08-12 10:17:45 +02:00
|
|
|
freed = buf_LRU_scan_and_free_block(
|
|
|
|
buf_pool, n_iterations > 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (!freed && n_iterations == 0) {
|
|
|
|
/* Tell other threads that there is no point
|
|
|
|
in scanning the LRU list. This flag is set to
|
|
|
|
TRUE again when we flush a batch from this
|
|
|
|
buffer pool. */
|
|
|
|
buf_pool->try_LRU_scan = FALSE;
|
2014-11-06 12:17:11 +01:00
|
|
|
|
|
|
|
/* Also tell the page_cleaner thread that
|
|
|
|
there is work for it to do. */
|
|
|
|
os_event_set(buf_flush_event);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-09 11:37:58 +01:00
|
|
|
#ifndef DBUG_OFF
|
|
|
|
not_found:
|
|
|
|
#endif
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
if (freed) {
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
|
2018-01-11 11:03:23 +01:00
|
|
|
if (n_iterations > 20 && !buf_lru_free_blocks_error_printed
|
2016-08-12 10:17:45 +02:00
|
|
|
&& srv_buf_pool_old_size == srv_buf_pool_size) {
|
|
|
|
|
|
|
|
ib::warn() << "Difficult to find free blocks in the buffer pool"
|
|
|
|
" (" << n_iterations << " search iterations)! "
|
|
|
|
<< flush_failures << " failed attempts to"
|
2018-01-11 11:03:23 +01:00
|
|
|
" flush a page!"
|
|
|
|
" Consider increasing innodb_buffer_pool_size."
|
2016-08-12 10:17:45 +02:00
|
|
|
" Pending flushes (fsync) log: "
|
|
|
|
<< fil_n_pending_log_flushes
|
|
|
|
<< "; buffer pool: "
|
|
|
|
<< fil_n_pending_tablespace_flushes
|
|
|
|
<< ". " << os_n_file_reads << " OS file reads, "
|
|
|
|
<< os_n_file_writes << " OS file writes, "
|
|
|
|
<< os_n_fsyncs
|
2018-01-11 11:03:23 +01:00
|
|
|
<< " OS fsyncs.";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-01-09 11:37:58 +01:00
|
|
|
buf_lru_free_blocks_error_printed = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we have scanned the whole LRU and still are unable to
|
|
|
|
find a free block then we should sleep here to let the
|
2014-11-06 12:17:11 +01:00
|
|
|
page_cleaner do an LRU batch for us. */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (!srv_read_only_mode) {
|
|
|
|
os_event_set(buf_flush_event);
|
|
|
|
}
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
if (n_iterations > 1) {
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
MONITOR_INC( MONITOR_LRU_GET_FREE_WAITS );
|
2014-11-06 12:17:11 +01:00
|
|
|
os_thread_sleep(10000);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* No free block was found: try to flush the LRU list.
|
|
|
|
This call will flush one page from the LRU and put it on the
|
|
|
|
free list. That means that the free block is up for grabs for
|
|
|
|
all user threads.
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
TODO: A more elegant way would have been to return the freed
|
|
|
|
up block to the caller here but the code that deals with
|
|
|
|
removing the block from page_hash and LRU_list is fairly
|
|
|
|
involved (particularly in case of compressed pages). We
|
|
|
|
can do that in a separate patch sometime in future. */
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
if (!buf_flush_single_page_from_LRU(buf_pool)) {
|
|
|
|
MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
|
|
|
|
++flush_failures;
|
|
|
|
}
|
|
|
|
|
MDEV-12674 Innodb_row_lock_current_waits has overflow
There is a race condition related to the variable
srv_stats.n_lock_wait_current_count, which is only
incremented and decremented by the function lock_wait_suspend_thread(),
The incrementing is protected by lock_sys->wait_mutex, but the
decrementing does not appear to be protected by anything.
This mismatch could allow the counter to be corrupted when a
transactional InnoDB table or record lock wait is terminating
roughly at the same time with the start of a wait on a
(possibly different) lock.
ib_counter_t: Remove some unused methods. Prevent instantiation for N=1.
Add an inc() method that takes a slot index as a parameter.
single_indexer_t: Remove.
simple_counter<typename Type, bool atomic=false>: A new counter wrapper.
Optionally use atomic memory operations for modifying the counter.
Aligned to the cache line size.
lsn_ctr_1_t, ulint_ctr_1_t, int64_ctr_1_t: Define as simple_counter<Type>.
These counters are either only incremented (and we do not care about
losing some increment operations), or the increment/decrement operations
are protected by some mutex.
srv_stats_t::os_log_pending_writes: Document that the number is protected
by log_sys->mutex.
srv_stats_t::n_lock_wait_current_count: Use simple_counter<ulint, true>,
that is, atomic inc() and dec() operations.
lock_wait_suspend_thread(): Release the mutexes before incrementing
the counters. Avoid acquiring the lock mutex if the lock wait has
already been resolved. Atomically increment and decrement
srv_stats.n_lock_wait_current_count.
row_insert_for_mysql(), row_update_for_mysql(),
row_update_cascade_for_mysql(): Use the inc() method with the trx->id
as the slot index. This is a non-functional change, just using
inc() instead of add(1).
buf_LRU_get_free_block(): Replace the method add(index, n) with inc().
There is no slot index in the simple_counter.
2017-05-11 20:12:37 +02:00
|
|
|
srv_stats.buf_pool_wait_free.inc();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
n_iterations++;
|
|
|
|
|
|
|
|
goto loop;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Moves the LRU_old pointer so that the length of the old blocks list
|
|
|
|
is inside the allowed limits. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
buf_LRU_old_adjust_len(
|
|
|
|
/*===================*/
|
|
|
|
buf_pool_t* buf_pool) /*!< in: buffer pool instance */
|
|
|
|
{
|
|
|
|
ulint old_len;
|
|
|
|
ulint new_len;
|
|
|
|
|
|
|
|
ut_a(buf_pool->LRU_old);
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
|
|
|
|
ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
|
|
|
|
#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
|
|
|
|
# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
|
|
|
|
#endif
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
/* buf_pool->LRU_old must be the first item in the LRU list
|
|
|
|
whose "old" flag is set. */
|
|
|
|
ut_a(buf_pool->LRU_old->old);
|
|
|
|
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|
|
|
|
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
|
|
|
|
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|
|
|
|
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
|
|
|
|
|
|
|
old_len = buf_pool->LRU_old_len;
|
|
|
|
new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
|
|
|
|
* buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
|
|
|
|
UT_LIST_GET_LEN(buf_pool->LRU)
|
|
|
|
- (BUF_LRU_OLD_TOLERANCE
|
|
|
|
+ BUF_LRU_NON_OLD_MIN_LEN));
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
buf_page_t* LRU_old = buf_pool->LRU_old;
|
|
|
|
|
|
|
|
ut_a(LRU_old);
|
|
|
|
ut_ad(LRU_old->in_LRU_list);
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
ut_a(LRU_old->old);
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
|
|
|
|
|
|
|
/* Update the LRU_old pointer if necessary */
|
|
|
|
|
|
|
|
if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
|
|
|
|
|
|
|
|
buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
|
|
|
|
LRU, LRU_old);
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
ut_a(!LRU_old->old);
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
|
|
|
old_len = ++buf_pool->LRU_old_len;
|
|
|
|
buf_page_set_old(LRU_old, TRUE);
|
|
|
|
|
|
|
|
} else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
|
|
|
|
|
|
|
|
buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
|
|
|
|
old_len = --buf_pool->LRU_old_len;
|
|
|
|
buf_page_set_old(LRU_old, FALSE);
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*******************************************************************//**
|
|
|
|
Initializes the old blocks pointer in the LRU list. This function should be
|
|
|
|
called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_old_init(
|
|
|
|
/*=============*/
|
|
|
|
buf_pool_t* buf_pool)
|
|
|
|
{
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
|
|
|
|
|
|
|
|
/* We first initialize all blocks in the LRU list as old and then use
|
|
|
|
the adjust function to move the LRU_old pointer to the right
|
|
|
|
position */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->LRU);
|
|
|
|
bpage != NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(bpage->in_LRU_list);
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* This loop temporarily violates the
|
|
|
|
assertions of buf_page_set_old(). */
|
|
|
|
bpage->old = TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
|
|
|
|
buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
|
|
|
|
|
|
|
|
buf_LRU_old_adjust_len(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Remove a block from the unzip_LRU list if it belonged to the list. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_unzip_LRU_remove_block_if_needed(
|
|
|
|
/*=================================*/
|
|
|
|
buf_page_t* bpage) /*!< in/out: control block */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
if (buf_page_belongs_to_unzip_LRU(bpage)) {
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_block_t* block = reinterpret_cast<buf_block_t*>(bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(block->in_unzip_LRU_list);
|
|
|
|
ut_d(block->in_unzip_LRU_list = FALSE);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_REMOVE(buf_pool->unzip_LRU, block);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
/******************************************************************//**
|
|
|
|
Adjust LRU hazard pointers if needed. */
|
|
|
|
void
|
|
|
|
buf_LRU_adjust_hp(
|
|
|
|
/*==============*/
|
|
|
|
buf_pool_t* buf_pool,/*!< in: buffer pool instance */
|
|
|
|
const buf_page_t* bpage) /*!< in: control block */
|
|
|
|
{
|
|
|
|
buf_pool->lru_hp.adjust(bpage);
|
|
|
|
buf_pool->lru_scan_itr.adjust(bpage);
|
|
|
|
buf_pool->single_scan_itr.adjust(bpage);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/******************************************************************//**
|
|
|
|
Removes a block from the LRU list. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
buf_LRU_remove_block(
|
|
|
|
/*=================*/
|
|
|
|
buf_page_t* bpage) /*!< in: control block */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
ut_a(buf_page_in_file(bpage));
|
|
|
|
|
|
|
|
ut_ad(bpage->in_LRU_list);
|
|
|
|
|
2014-11-06 12:17:11 +01:00
|
|
|
/* Important that we adjust the hazard pointers before removing
|
|
|
|
bpage from the LRU list. */
|
|
|
|
buf_LRU_adjust_hp(buf_pool, bpage);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* If the LRU_old pointer is defined and points to just this block,
|
|
|
|
move it backward one step */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (bpage == buf_pool->LRU_old) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Below: the previous block is guaranteed to exist,
|
|
|
|
because the LRU_old pointer is only allowed to differ
|
|
|
|
by BUF_LRU_OLD_TOLERANCE from strict
|
|
|
|
buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
|
|
|
|
list length. */
|
|
|
|
buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
|
|
|
|
|
|
|
|
ut_a(prev_bpage);
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
ut_a(!prev_bpage->old);
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
|
|
|
buf_pool->LRU_old = prev_bpage;
|
|
|
|
buf_page_set_old(prev_bpage, TRUE);
|
|
|
|
|
|
|
|
buf_pool->LRU_old_len++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Remove the block from the LRU list */
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_REMOVE(buf_pool->LRU, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_d(bpage->in_LRU_list = FALSE);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_pool->stat.LRU_bytes -= bpage->size.physical();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_unzip_LRU_remove_block_if_needed(bpage);
|
|
|
|
|
|
|
|
/* If the LRU list is so short that LRU_old is not defined,
|
|
|
|
clear the "old" flags and return */
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
|
|
|
|
bpage != NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* This loop temporarily violates the
|
|
|
|
assertions of buf_page_set_old(). */
|
|
|
|
bpage->old = FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool->LRU_old = NULL;
|
|
|
|
buf_pool->LRU_old_len = 0;
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad(buf_pool->LRU_old);
|
|
|
|
|
|
|
|
/* Update the LRU_old_len field if necessary */
|
|
|
|
if (buf_page_is_old(bpage)) {
|
|
|
|
|
|
|
|
buf_pool->LRU_old_len--;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Adjust the length of the old block list if necessary */
|
|
|
|
buf_LRU_old_adjust_len(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Adds a block to the LRU list of decompressed zip pages. */
|
|
|
|
void
|
|
|
|
buf_unzip_LRU_add_block(
|
|
|
|
/*====================*/
|
|
|
|
buf_block_t* block, /*!< in: control block */
|
|
|
|
ibool old) /*!< in: TRUE if should be put to the end
|
|
|
|
of the list, else put to the start */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_block(block);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
|
|
|
|
|
|
|
|
ut_ad(!block->in_unzip_LRU_list);
|
|
|
|
ut_d(block->in_unzip_LRU_list = TRUE);
|
|
|
|
|
|
|
|
if (old) {
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_ADD_LAST(buf_pool->unzip_LRU, block);
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_ADD_FIRST(buf_pool->unzip_LRU, block);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Adds a block to the LRU list. Please make sure that the page_size is
|
|
|
|
already set when invoking the function, so that we can get correct
|
|
|
|
page_size from the buffer page when adding a block into LRU */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
buf_LRU_add_block_low(
|
|
|
|
/*==================*/
|
|
|
|
buf_page_t* bpage, /*!< in: control block */
|
|
|
|
ibool old) /*!< in: TRUE if should be put to the old blocks
|
|
|
|
in the LRU list, else put to the start; if the
|
|
|
|
LRU list is very short, the block is added to
|
|
|
|
the start, regardless of this parameter */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
ut_a(buf_page_in_file(bpage));
|
|
|
|
ut_ad(!bpage->in_LRU_list);
|
|
|
|
|
|
|
|
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_ADD_FIRST(buf_pool->LRU, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
bpage->freed_page_clock = buf_pool->freed_page_clock;
|
|
|
|
} else {
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
/* buf_pool->LRU_old must be the first item in the LRU list
|
|
|
|
whose "old" flag is set. */
|
|
|
|
ut_a(buf_pool->LRU_old->old);
|
|
|
|
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|
|
|
|
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
|
|
|
|
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|
|
|
|
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_INSERT_AFTER(buf_pool->LRU, buf_pool->LRU_old,
|
|
|
|
bpage);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool->LRU_old_len++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_d(bpage->in_LRU_list = TRUE);
|
|
|
|
|
|
|
|
incr_LRU_size_in_bytes(bpage, buf_pool);
|
|
|
|
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
|
|
|
|
ut_ad(buf_pool->LRU_old);
|
|
|
|
|
|
|
|
/* Adjust the length of the old block list if necessary */
|
|
|
|
|
|
|
|
buf_page_set_old(bpage, old);
|
|
|
|
buf_LRU_old_adjust_len(buf_pool);
|
|
|
|
|
|
|
|
} else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
|
|
|
|
/* The LRU list is now long enough for LRU_old to become
|
|
|
|
defined: init it */
|
|
|
|
|
|
|
|
buf_LRU_old_init(buf_pool);
|
|
|
|
} else {
|
|
|
|
buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If this is a zipped block with decompressed frame as well
|
|
|
|
then put it on the unzip_LRU list */
|
|
|
|
if (buf_page_belongs_to_unzip_LRU(bpage)) {
|
|
|
|
buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Adds a block to the LRU list. Please make sure that the page_size is
|
|
|
|
already set when invoking the function, so that we can get correct
|
|
|
|
page_size from the buffer page when adding a block into LRU */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
buf_LRU_add_block(
|
|
|
|
/*==============*/
|
|
|
|
buf_page_t* bpage, /*!< in: control block */
|
|
|
|
ibool old) /*!< in: TRUE if should be put to the old
|
|
|
|
blocks in the LRU list, else put to the start;
|
|
|
|
if the LRU list is very short, the block is
|
|
|
|
added to the start, regardless of this
|
|
|
|
parameter */
|
|
|
|
{
|
|
|
|
buf_LRU_add_block_low(bpage, old);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Moves a block to the start of the LRU list. */
|
|
|
|
void
|
|
|
|
buf_LRU_make_block_young(
|
|
|
|
/*=====================*/
|
|
|
|
buf_page_t* bpage) /*!< in: control block */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
if (bpage->old) {
|
|
|
|
buf_pool->stat.n_pages_made_young++;
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_LRU_remove_block(bpage);
|
|
|
|
buf_LRU_add_block_low(bpage, FALSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Try to free a block. If bpage is a descriptor of a compressed-only
|
|
|
|
page, the descriptor object will be freed as well.
|
|
|
|
|
|
|
|
NOTE: If this function returns true, it will temporarily
|
|
|
|
release buf_pool->mutex. Furthermore, the page frame will no longer be
|
|
|
|
accessible via bpage.
|
|
|
|
|
|
|
|
The caller must hold buf_pool->mutex and must not hold any
|
|
|
|
buf_page_get_mutex() when calling this function.
|
|
|
|
@return true if freed, false otherwise. */
|
|
|
|
bool
|
|
|
|
buf_LRU_free_page(
|
|
|
|
/*===============*/
|
|
|
|
buf_page_t* bpage, /*!< in: block to be freed */
|
|
|
|
bool zip) /*!< in: true if should remove also the
|
|
|
|
compressed page of an uncompressed page */
|
|
|
|
{
|
|
|
|
buf_page_t* b = NULL;
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
|
|
|
|
|
|
|
|
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
ut_ad(bpage->in_LRU_list);
|
|
|
|
|
|
|
|
rw_lock_x_lock(hash_lock);
|
|
|
|
mutex_enter(block_mutex);
|
|
|
|
|
|
|
|
if (!buf_page_can_relocate(bpage)) {
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/* Do not free buffer fixed and I/O-fixed blocks. */
|
2014-02-26 19:11:54 +01:00
|
|
|
goto func_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef UNIV_IBUF_COUNT_DEBUG
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_a(ibuf_count_get(bpage->id) == 0);
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_IBUF_COUNT_DEBUG */
|
|
|
|
|
|
|
|
if (zip || !bpage->zip.data) {
|
|
|
|
/* This would completely free the block. */
|
|
|
|
/* Do not completely free dirty blocks. */
|
|
|
|
|
|
|
|
if (bpage->oldest_modification) {
|
|
|
|
goto func_exit;
|
|
|
|
}
|
2014-05-05 18:20:28 +02:00
|
|
|
} else if (bpage->oldest_modification > 0
|
|
|
|
&& buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2014-05-05 18:20:28 +02:00
|
|
|
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
func_exit:
|
|
|
|
rw_lock_x_unlock(hash_lock);
|
|
|
|
mutex_exit(block_mutex);
|
|
|
|
return(false);
|
|
|
|
|
|
|
|
} else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
|
|
|
|
b = buf_page_alloc_descriptor();
|
|
|
|
ut_a(b);
|
2018-08-03 10:22:20 +02:00
|
|
|
new (b) buf_page_t(*bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(buf_page_in_file(bpage));
|
|
|
|
ut_ad(bpage->in_LRU_list);
|
|
|
|
ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
DBUG_PRINT("ib_buf", ("free page %u:%u",
|
|
|
|
bpage->id.space(), bpage->id.page_no()));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-09-06 08:43:16 +02:00
|
|
|
ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(buf_page_can_relocate(bpage));
|
|
|
|
|
|
|
|
if (!buf_LRU_block_remove_hashed(bpage, zip)) {
|
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* buf_LRU_block_remove_hashed() releases the hash_lock */
|
2018-07-23 12:31:10 +02:00
|
|
|
ut_ad(!rw_lock_own_flagged(hash_lock,
|
|
|
|
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
|
|
|
|
then it was a compressed page with an uncompressed frame and
|
|
|
|
we are interested in freeing only the uncompressed frame.
|
|
|
|
Therefore we have to reinsert the compressed page descriptor
|
|
|
|
into the LRU and page_hash (and possibly flush_list).
|
|
|
|
if b == NULL then it was a regular page that has been freed */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (b != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
|
|
|
|
|
|
|
|
rw_lock_x_lock(hash_lock);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
mutex_enter(block_mutex);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_a(!buf_page_hash_get_low(buf_pool, b->id));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
b->state = b->oldest_modification
|
|
|
|
? BUF_BLOCK_ZIP_DIRTY
|
|
|
|
: BUF_BLOCK_ZIP_PAGE;
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
ut_ad(b->size.is_compressed());
|
|
|
|
|
|
|
|
UNIV_MEM_DESC(b->zip.data, b->size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* The fields in_page_hash and in_LRU_list of
|
|
|
|
the to-be-freed block descriptor should have
|
|
|
|
been cleared in
|
|
|
|
buf_LRU_block_remove_hashed(), which
|
|
|
|
invokes buf_LRU_remove_block(). */
|
|
|
|
ut_ad(!bpage->in_page_hash);
|
|
|
|
ut_ad(!bpage->in_LRU_list);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* bpage->state was BUF_BLOCK_FILE_PAGE because
|
|
|
|
b != NULL. The type cast below is thus valid. */
|
|
|
|
ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
|
|
|
|
|
|
|
|
/* The fields of bpage were copied to b before
|
|
|
|
buf_LRU_block_remove_hashed() was invoked. */
|
|
|
|
ut_ad(!b->in_zip_hash);
|
|
|
|
ut_ad(b->in_page_hash);
|
|
|
|
ut_ad(b->in_LRU_list);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
|
|
|
|
b->id.fold(), b);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Insert b where bpage was in the LRU list. */
|
2016-08-12 10:17:45 +02:00
|
|
|
if (prev_b != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint lru_len;
|
|
|
|
|
|
|
|
ut_ad(prev_b->in_LRU_list);
|
|
|
|
ut_ad(buf_page_in_file(prev_b));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
UT_LIST_INSERT_AFTER(buf_pool->LRU, prev_b, b);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
incr_LRU_size_in_bytes(b, buf_pool);
|
|
|
|
|
|
|
|
if (buf_page_is_old(b)) {
|
|
|
|
buf_pool->LRU_old_len++;
|
2016-08-12 10:17:45 +02:00
|
|
|
if (buf_pool->LRU_old
|
|
|
|
== UT_LIST_GET_NEXT(LRU, b)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool->LRU_old = b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
|
|
|
|
|
|
|
|
if (lru_len > BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
ut_ad(buf_pool->LRU_old);
|
|
|
|
/* Adjust the length of the
|
|
|
|
old block list if necessary */
|
|
|
|
buf_LRU_old_adjust_len(buf_pool);
|
|
|
|
} else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
/* The LRU list is now long
|
|
|
|
enough for LRU_old to become
|
|
|
|
defined: init it */
|
|
|
|
buf_LRU_old_init(buf_pool);
|
|
|
|
}
|
|
|
|
#ifdef UNIV_LRU_DEBUG
|
|
|
|
/* Check that the "old" flag is consistent
|
|
|
|
in the block and its neighbours. */
|
|
|
|
buf_page_set_old(b, buf_page_is_old(b));
|
|
|
|
#endif /* UNIV_LRU_DEBUG */
|
|
|
|
} else {
|
|
|
|
ut_d(b->in_LRU_list = FALSE);
|
|
|
|
buf_LRU_add_block_low(b, buf_page_is_old(b));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (b->state == BUF_BLOCK_ZIP_PAGE) {
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
|
|
buf_LRU_insert_zip_clean(b);
|
|
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
} else {
|
|
|
|
/* Relocate on buf_pool->flush_list. */
|
|
|
|
buf_flush_relocate_on_flush_list(bpage, b);
|
|
|
|
}
|
|
|
|
|
|
|
|
bpage->zip.data = NULL;
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
page_zip_set_size(&bpage->zip, 0);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
bpage->size.copy_from(page_size_t(bpage->size.logical(),
|
|
|
|
bpage->size.logical(),
|
|
|
|
false));
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
mutex_exit(block_mutex);
|
|
|
|
|
|
|
|
/* Prevent buf_page_get_gen() from
|
|
|
|
decompressing the block while we release
|
|
|
|
buf_pool->mutex and block_mutex. */
|
|
|
|
block_mutex = buf_page_get_mutex(b);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
mutex_enter(block_mutex);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_page_set_sticky(b);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
mutex_exit(block_mutex);
|
|
|
|
|
|
|
|
rw_lock_x_unlock(hash_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
|
|
|
|
/* Remove possible adaptive hash index on the page.
|
|
|
|
The page was declared uninitialized by
|
|
|
|
buf_LRU_block_remove_hashed(). We need to flag
|
|
|
|
the contents of the page valid (which it still is) in
|
|
|
|
order to avoid bogus Valgrind warnings.*/
|
|
|
|
|
|
|
|
UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
|
|
|
|
UNIV_PAGE_SIZE);
|
|
|
|
btr_search_drop_page_hash_index((buf_block_t*) bpage);
|
|
|
|
UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
|
|
|
|
UNIV_PAGE_SIZE);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (b != NULL) {
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* Compute and stamp the compressed page
|
|
|
|
checksum while not holding any mutex. The
|
|
|
|
block is already half-freed
|
|
|
|
(BUF_BLOCK_REMOVE_HASH) and removed from
|
|
|
|
buf_pool->page_hash, thus inaccessible by any
|
|
|
|
other thread. */
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_ad(b->size.is_compressed());
|
|
|
|
|
|
|
|
const uint32_t checksum = page_zip_calc_checksum(
|
|
|
|
b->zip.data,
|
|
|
|
b->size.physical(),
|
|
|
|
static_cast<srv_checksum_algorithm_t>(
|
|
|
|
srv_checksum_algorithm));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
|
|
|
|
checksum);
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (b != NULL) {
|
|
|
|
mutex_enter(block_mutex);
|
|
|
|
|
|
|
|
buf_page_unset_sticky(b);
|
|
|
|
|
|
|
|
mutex_exit(block_mutex);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Puts a block back to the free list. */
|
|
|
|
void
|
|
|
|
buf_LRU_block_free_non_file_page(
|
|
|
|
/*=============================*/
|
|
|
|
buf_block_t* block) /*!< in: block, must not contain a file page */
|
|
|
|
{
|
|
|
|
void* data;
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_block(block);
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_ad(buf_page_mutex_own(block));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
switch (buf_block_get_state(block)) {
|
|
|
|
case BUF_BLOCK_MEMORY:
|
|
|
|
case BUF_BLOCK_READY_FOR_USE:
|
|
|
|
break;
|
|
|
|
default:
|
2016-09-02 16:28:54 +02:00
|
|
|
ut_error;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-09-02 16:28:54 +02:00
|
|
|
assert_block_ahi_empty(block);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(!block->page.in_free_list);
|
|
|
|
ut_ad(!block->page.in_flush_list);
|
|
|
|
ut_ad(!block->page.in_LRU_list);
|
|
|
|
|
|
|
|
buf_block_set_state(block, BUF_BLOCK_NOT_USED);
|
|
|
|
|
|
|
|
UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/* Wipe contents of page to reveal possible stale pointers to it */
|
|
|
|
memset(block->frame, '\0', UNIV_PAGE_SIZE);
|
|
|
|
#else
|
|
|
|
/* Wipe page_no and space_id */
|
|
|
|
memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
|
|
|
|
memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
|
2016-08-12 10:17:45 +02:00
|
|
|
#endif /* UNIV_DEBUG */
|
2014-02-26 19:11:54 +01:00
|
|
|
data = block->page.zip.data;
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (data != NULL) {
|
2014-02-26 19:11:54 +01:00
|
|
|
block->page.zip.data = NULL;
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_mutex_exit(block);
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_mutex_exit_forbid(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_ad(block->page.size.is_compressed());
|
|
|
|
|
|
|
|
buf_buddy_free(buf_pool, data, block->page.size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_exit_allow(buf_pool);
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_mutex_enter(block);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
page_zip_set_size(&block->page.zip, 0);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
block->page.size.copy_from(
|
|
|
|
page_size_t(block->page.size.logical(),
|
|
|
|
block->page.size.logical(),
|
|
|
|
false));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
if (buf_pool->curr_size < buf_pool->old_size
|
|
|
|
&& UT_LIST_GET_LEN(buf_pool->withdraw) < buf_pool->withdraw_target
|
|
|
|
&& buf_block_will_withdrawn(buf_pool, block)) {
|
|
|
|
/* This should be withdrawn */
|
|
|
|
UT_LIST_ADD_LAST(
|
|
|
|
buf_pool->withdraw,
|
|
|
|
&block->page);
|
|
|
|
ut_d(block->in_withdraw_list = TRUE);
|
|
|
|
} else {
|
|
|
|
UT_LIST_ADD_FIRST(buf_pool->free, &block->page);
|
|
|
|
ut_d(block->page.in_free_list = TRUE);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-01-23 18:29:12 +01:00
|
|
|
UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Takes a block out of the LRU list and page hash table.
|
|
|
|
If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
|
|
|
|
the object will be freed.
|
|
|
|
|
|
|
|
The caller must hold buf_pool->mutex, the buf_page_get_mutex() mutex
|
|
|
|
and the appropriate hash_lock. This function will release the
|
|
|
|
buf_page_get_mutex() and the hash_lock.
|
|
|
|
|
|
|
|
If a compressed page is freed other compressed pages may be relocated.
|
|
|
|
@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
|
|
|
|
caller needs to free the page to the free list
|
|
|
|
@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
|
|
|
|
this case the block is already returned to the buddy allocator. */
|
|
|
|
static
|
|
|
|
bool
|
|
|
|
buf_LRU_block_remove_hashed(
|
|
|
|
/*========================*/
|
|
|
|
buf_page_t* bpage, /*!< in: block, must contain a file page and
|
|
|
|
be in a state where it can be freed; there
|
|
|
|
may or may not be a hash index to the page */
|
|
|
|
bool zip) /*!< in: true if should remove also the
|
|
|
|
compressed page of an uncompressed page */
|
|
|
|
{
|
|
|
|
const buf_page_t* hashed_bpage;
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
|
|
|
rw_lock_t* hash_lock;
|
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
|
|
|
|
|
|
|
|
ut_ad(rw_lock_own(hash_lock, RW_LOCK_X));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
|
|
|
|
ut_a(bpage->buf_fix_count == 0);
|
|
|
|
|
|
|
|
buf_LRU_remove_block(bpage);
|
|
|
|
|
|
|
|
buf_pool->freed_page_clock += 1;
|
|
|
|
|
|
|
|
switch (buf_page_get_state(bpage)) {
|
|
|
|
case BUF_BLOCK_FILE_PAGE:
|
|
|
|
UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
|
|
|
|
UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
|
|
|
|
UNIV_PAGE_SIZE);
|
|
|
|
buf_block_modify_clock_inc((buf_block_t*) bpage);
|
|
|
|
if (bpage->zip.data) {
|
|
|
|
const page_t* page = ((buf_block_t*) bpage)->frame;
|
|
|
|
|
|
|
|
ut_a(!zip || bpage->oldest_modification == 0);
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_ad(bpage->size.is_compressed());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
switch (fil_page_get_type(page)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
case FIL_PAGE_TYPE_ALLOCATED:
|
|
|
|
case FIL_PAGE_INODE:
|
|
|
|
case FIL_PAGE_IBUF_BITMAP:
|
|
|
|
case FIL_PAGE_TYPE_FSP_HDR:
|
|
|
|
case FIL_PAGE_TYPE_XDES:
|
|
|
|
/* These are essentially uncompressed pages. */
|
|
|
|
if (!zip) {
|
|
|
|
/* InnoDB writes the data to the
|
|
|
|
uncompressed page frame. Copy it
|
|
|
|
to the compressed page, which will
|
|
|
|
be preserved. */
|
|
|
|
memcpy(bpage->zip.data, page,
|
2016-08-12 10:17:45 +02:00
|
|
|
bpage->size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FIL_PAGE_TYPE_ZBLOB:
|
|
|
|
case FIL_PAGE_TYPE_ZBLOB2:
|
|
|
|
break;
|
|
|
|
case FIL_PAGE_INDEX:
|
2016-08-12 10:17:45 +02:00
|
|
|
case FIL_PAGE_RTREE:
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_ZIP_DEBUG
|
|
|
|
ut_a(page_zip_validate(
|
|
|
|
&bpage->zip, page,
|
|
|
|
((buf_block_t*) bpage)->index));
|
|
|
|
#endif /* UNIV_ZIP_DEBUG */
|
|
|
|
break;
|
|
|
|
default:
|
2016-08-12 10:17:45 +02:00
|
|
|
ib::error() << "The compressed page to be"
|
|
|
|
" evicted seems corrupt:";
|
|
|
|
ut_print_buf(stderr, page,
|
|
|
|
bpage->size.logical());
|
|
|
|
|
|
|
|
ib::error() << "Possibly older version of"
|
|
|
|
" the page:";
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_print_buf(stderr, bpage->zip.data,
|
2016-08-12 10:17:45 +02:00
|
|
|
bpage->size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
putc('\n', stderr);
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* fall through */
|
|
|
|
case BUF_BLOCK_ZIP_PAGE:
|
|
|
|
ut_a(bpage->oldest_modification == 0);
|
2016-08-12 10:17:45 +02:00
|
|
|
if (bpage->size.is_compressed()) {
|
|
|
|
UNIV_MEM_ASSERT_W(bpage->zip.data,
|
|
|
|
bpage->size.physical());
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
break;
|
|
|
|
case BUF_BLOCK_POOL_WATCH:
|
|
|
|
case BUF_BLOCK_ZIP_DIRTY:
|
|
|
|
case BUF_BLOCK_NOT_USED:
|
|
|
|
case BUF_BLOCK_READY_FOR_USE:
|
|
|
|
case BUF_BLOCK_MEMORY:
|
|
|
|
case BUF_BLOCK_REMOVE_HASH:
|
|
|
|
ut_error;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->id);
|
|
|
|
if (bpage != hashed_bpage) {
|
|
|
|
ib::error() << "Page " << bpage->id
|
|
|
|
<< " not found in the hash table";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2014-07-25 09:30:16 +02:00
|
|
|
#ifdef UNIV_DEBUG
|
2016-09-06 08:43:16 +02:00
|
|
|
|
|
|
|
|
|
|
|
ib::error()
|
|
|
|
<< "in_page_hash:" << bpage->in_page_hash
|
|
|
|
<< " in_zip_hash:" << bpage->in_zip_hash
|
|
|
|
// << " in_free_list:"<< bpage->in_fee_list
|
|
|
|
<< " in_flush_list:" << bpage->in_flush_list
|
|
|
|
<< " in_LRU_list:" << bpage->in_LRU_list
|
|
|
|
<< " zip.data:" << bpage->zip.data
|
|
|
|
<< " zip_size:" << bpage->size.logical()
|
|
|
|
<< " page_state:" << buf_page_get_state(bpage);
|
2014-07-25 09:30:16 +02:00
|
|
|
#else
|
2016-09-06 08:43:16 +02:00
|
|
|
ib::error()
|
|
|
|
<< " zip.data:" << bpage->zip.data
|
|
|
|
<< " zip_size:" << bpage->size.logical()
|
|
|
|
<< " page_state:" << buf_page_get_state(bpage);
|
2014-07-25 09:30:16 +02:00
|
|
|
#endif
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
if (hashed_bpage) {
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
ib::error() << "In hash table we find block "
|
|
|
|
<< hashed_bpage << " of " << hashed_bpage->id
|
|
|
|
<< " which is not " << bpage;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
|
|
mutex_exit(buf_page_get_mutex(bpage));
|
|
|
|
rw_lock_x_unlock(hash_lock);
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
buf_print();
|
|
|
|
buf_LRU_print();
|
|
|
|
buf_validate();
|
|
|
|
buf_LRU_validate();
|
|
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
ut_error;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad(!bpage->in_zip_hash);
|
|
|
|
ut_ad(bpage->in_page_hash);
|
|
|
|
ut_d(bpage->in_page_hash = FALSE);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, bpage->id.fold(),
|
|
|
|
bpage);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
switch (buf_page_get_state(bpage)) {
|
|
|
|
case BUF_BLOCK_ZIP_PAGE:
|
|
|
|
ut_ad(!bpage->in_free_list);
|
|
|
|
ut_ad(!bpage->in_flush_list);
|
|
|
|
ut_ad(!bpage->in_LRU_list);
|
|
|
|
ut_a(bpage->zip.data);
|
2016-08-12 10:17:45 +02:00
|
|
|
ut_a(bpage->size.is_compressed());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
2016-08-12 10:17:45 +02:00
|
|
|
UT_LIST_REMOVE(buf_pool->zip_clean, bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
|
|
|
|
mutex_exit(&buf_pool->zip_mutex);
|
|
|
|
rw_lock_x_unlock(hash_lock);
|
|
|
|
buf_pool_mutex_exit_forbid(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_buddy_free(buf_pool, bpage->zip.data,
|
|
|
|
bpage->size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_exit_allow(buf_pool);
|
|
|
|
buf_page_free_descriptor(bpage);
|
|
|
|
return(false);
|
|
|
|
|
|
|
|
case BUF_BLOCK_FILE_PAGE:
|
|
|
|
memset(((buf_block_t*) bpage)->frame
|
|
|
|
+ FIL_PAGE_OFFSET, 0xff, 4);
|
|
|
|
memset(((buf_block_t*) bpage)->frame
|
|
|
|
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
|
|
|
|
UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
|
|
|
|
UNIV_PAGE_SIZE);
|
|
|
|
buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
|
|
|
|
|
|
|
|
/* Question: If we release bpage and hash mutex here
|
|
|
|
then what protects us against:
|
|
|
|
1) Some other thread buffer fixing this page
|
|
|
|
2) Some other thread trying to read this page and
|
|
|
|
not finding it in buffer pool attempting to read it
|
|
|
|
from the disk.
|
|
|
|
Answer:
|
|
|
|
1) Cannot happen because the page is no longer in the
|
|
|
|
page_hash. Only possibility is when while invalidating
|
|
|
|
a tablespace we buffer fix the prev_page in LRU to
|
|
|
|
avoid relocation during the scan. But that is not
|
|
|
|
possible because we are holding buf_pool mutex.
|
|
|
|
|
|
|
|
2) Not possible because in buf_page_init_for_read()
|
|
|
|
we do a look up of page_hash while holding buf_pool
|
|
|
|
mutex and since we are holding buf_pool mutex here
|
|
|
|
and by the time we'll release it in the caller we'd
|
|
|
|
have inserted the compressed only descriptor in the
|
|
|
|
page_hash. */
|
|
|
|
rw_lock_x_unlock(hash_lock);
|
|
|
|
mutex_exit(&((buf_block_t*) bpage)->mutex);
|
|
|
|
|
|
|
|
if (zip && bpage->zip.data) {
|
|
|
|
/* Free the compressed page. */
|
|
|
|
void* data = bpage->zip.data;
|
|
|
|
bpage->zip.data = NULL;
|
|
|
|
|
|
|
|
ut_ad(!bpage->in_free_list);
|
|
|
|
ut_ad(!bpage->in_flush_list);
|
|
|
|
ut_ad(!bpage->in_LRU_list);
|
|
|
|
buf_pool_mutex_exit_forbid(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_buddy_free(buf_pool, data, bpage->size.physical());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool_mutex_exit_allow(buf_pool);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
page_zip_set_size(&bpage->zip, 0);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
bpage->size.copy_from(
|
|
|
|
page_size_t(bpage->size.logical(),
|
|
|
|
bpage->size.logical(),
|
|
|
|
false));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return(true);
|
|
|
|
|
|
|
|
case BUF_BLOCK_POOL_WATCH:
|
|
|
|
case BUF_BLOCK_ZIP_DIRTY:
|
|
|
|
case BUF_BLOCK_NOT_USED:
|
|
|
|
case BUF_BLOCK_READY_FOR_USE:
|
|
|
|
case BUF_BLOCK_MEMORY:
|
|
|
|
case BUF_BLOCK_REMOVE_HASH:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_error;
|
|
|
|
return(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Puts a file page whose has no hash index to the free list. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_block_free_hashed_page(
|
|
|
|
/*===========================*/
|
|
|
|
buf_block_t* block) /*!< in: block, must contain a file page and
|
|
|
|
be in a state where it can be freed */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_block(block);
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_mutex_enter(block);
|
|
|
|
|
|
|
|
if (buf_pool->flush_rbt == NULL) {
|
2018-10-18 17:23:12 +02:00
|
|
|
block->page.id
|
|
|
|
= page_id_t(ULINT32_UNDEFINED, ULINT32_UNDEFINED);
|
2016-08-12 10:17:45 +02:00
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_block_set_state(block, BUF_BLOCK_MEMORY);
|
|
|
|
|
|
|
|
buf_LRU_block_free_non_file_page(block);
|
2016-08-12 10:17:45 +02:00
|
|
|
buf_page_mutex_exit(block);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************//**
|
|
|
|
Remove one page from LRU list and put it to free list */
|
|
|
|
void
|
|
|
|
buf_LRU_free_one_page(
|
|
|
|
/*==================*/
|
|
|
|
buf_page_t* bpage) /*!< in/out: block, must contain a file page and
|
|
|
|
be in a state where it can be freed; there
|
|
|
|
may or may not be a hash index to the page */
|
|
|
|
{
|
|
|
|
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, bpage->id);
|
|
|
|
BPageMutex* block_mutex = buf_page_get_mutex(bpage);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(buf_pool_mutex_own(buf_pool));
|
|
|
|
|
|
|
|
rw_lock_x_lock(hash_lock);
|
|
|
|
mutex_enter(block_mutex);
|
|
|
|
|
|
|
|
if (buf_LRU_block_remove_hashed(bpage, true)) {
|
|
|
|
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
|
2018-07-23 12:31:10 +02:00
|
|
|
ut_ad(!rw_lock_own_flagged(hash_lock,
|
|
|
|
RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(!mutex_own(block_mutex));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Updates buf_pool->LRU_old_ratio for one buffer pool instance.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return updated old_pct */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
uint
|
|
|
|
buf_LRU_old_ratio_update_instance(
|
|
|
|
/*==============================*/
|
|
|
|
buf_pool_t* buf_pool,/*!< in: buffer pool instance */
|
|
|
|
uint old_pct,/*!< in: Reserve this percentage of
|
|
|
|
the buffer pool for "old" blocks. */
|
|
|
|
ibool adjust) /*!< in: TRUE=adjust the LRU list;
|
|
|
|
FALSE=just assign buf_pool->LRU_old_ratio
|
|
|
|
during the initialization of InnoDB */
|
|
|
|
{
|
|
|
|
uint ratio;
|
|
|
|
|
|
|
|
ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
|
|
|
|
if (ratio < BUF_LRU_OLD_RATIO_MIN) {
|
|
|
|
ratio = BUF_LRU_OLD_RATIO_MIN;
|
|
|
|
} else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
|
|
|
|
ratio = BUF_LRU_OLD_RATIO_MAX;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (adjust) {
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
if (ratio != buf_pool->LRU_old_ratio) {
|
|
|
|
buf_pool->LRU_old_ratio = ratio;
|
|
|
|
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->LRU)
|
2016-08-12 10:17:45 +02:00
|
|
|
>= BUF_LRU_OLD_MIN_LEN) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_LRU_old_adjust_len(buf_pool);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
} else {
|
|
|
|
buf_pool->LRU_old_ratio = ratio;
|
|
|
|
}
|
|
|
|
/* the reverse of
|
|
|
|
ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
|
|
|
|
return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Updates buf_pool->LRU_old_ratio.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return updated old_pct */
|
|
|
|
uint
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_LRU_old_ratio_update(
|
|
|
|
/*=====================*/
|
|
|
|
uint old_pct,/*!< in: Reserve this percentage of
|
|
|
|
the buffer pool for "old" blocks. */
|
|
|
|
ibool adjust) /*!< in: TRUE=adjust the LRU list;
|
|
|
|
FALSE=just assign buf_pool->LRU_old_ratio
|
|
|
|
during the initialization of InnoDB */
|
|
|
|
{
|
2016-08-12 10:17:45 +02:00
|
|
|
uint new_ratio = 0;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_t* buf_pool;
|
|
|
|
|
|
|
|
buf_pool = buf_pool_from_array(i);
|
|
|
|
|
|
|
|
new_ratio = buf_LRU_old_ratio_update_instance(
|
|
|
|
buf_pool, old_pct, adjust);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(new_ratio);
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Update the historical stats that we are collecting for LRU eviction
|
|
|
|
policy at the end of each interval. */
|
|
|
|
void
|
|
|
|
buf_LRU_stat_update(void)
|
|
|
|
/*=====================*/
|
|
|
|
{
|
|
|
|
buf_LRU_stat_t* item;
|
|
|
|
buf_pool_t* buf_pool;
|
2016-08-12 10:17:45 +02:00
|
|
|
bool evict_started = FALSE;
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_LRU_stat_t cur_stat;
|
|
|
|
|
|
|
|
/* If we haven't started eviction yet then don't update stats. */
|
2016-08-12 10:17:45 +02:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool = buf_pool_from_array(i);
|
|
|
|
|
|
|
|
if (buf_pool->freed_page_clock != 0) {
|
2016-08-12 10:17:45 +02:00
|
|
|
evict_started = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!evict_started) {
|
|
|
|
goto func_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Update the index. */
|
|
|
|
item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
|
|
|
|
buf_LRU_stat_arr_ind++;
|
|
|
|
buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
|
|
|
|
|
|
|
|
/* Add the current value and subtract the obsolete entry.
|
|
|
|
Since buf_LRU_stat_cur is not protected by any mutex,
|
|
|
|
it can be changing between adding to buf_LRU_stat_sum
|
|
|
|
and copying to item. Assign it to local variables to make
|
|
|
|
sure the same value assign to the buf_LRU_stat_sum
|
|
|
|
and item */
|
|
|
|
cur_stat = buf_LRU_stat_cur;
|
|
|
|
|
|
|
|
buf_LRU_stat_sum.io += cur_stat.io - item->io;
|
|
|
|
buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip;
|
|
|
|
|
|
|
|
/* Put current entry in the array. */
|
|
|
|
memcpy(item, &cur_stat, sizeof *item);
|
|
|
|
|
|
|
|
func_exit:
|
|
|
|
/* Clear the current entry. */
|
|
|
|
memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
|
|
|
|
}
|
|
|
|
|
|
|
|
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
|
|
/**********************************************************************//**
|
|
|
|
Validates the LRU list for one buffer pool instance. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
buf_LRU_validate_instance(
|
|
|
|
/*======================*/
|
|
|
|
buf_pool_t* buf_pool)
|
|
|
|
{
|
|
|
|
ulint old_len;
|
|
|
|
ulint new_len;
|
|
|
|
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
|
|
|
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
|
|
|
|
|
|
|
|
ut_a(buf_pool->LRU_old);
|
|
|
|
old_len = buf_pool->LRU_old_len;
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
|
|
|
|
* buf_pool->LRU_old_ratio
|
|
|
|
/ BUF_LRU_OLD_RATIO_DIV,
|
|
|
|
UT_LIST_GET_LEN(buf_pool->LRU)
|
|
|
|
- (BUF_LRU_OLD_TOLERANCE
|
|
|
|
+ BUF_LRU_NON_OLD_MIN_LEN));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
|
|
|
|
ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
CheckInLRUList::validate(buf_pool);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
old_len = 0;
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
|
2014-02-26 19:11:54 +01:00
|
|
|
bpage != NULL;
|
|
|
|
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
|
|
|
|
|
|
|
|
switch (buf_page_get_state(bpage)) {
|
|
|
|
case BUF_BLOCK_POOL_WATCH:
|
|
|
|
case BUF_BLOCK_NOT_USED:
|
|
|
|
case BUF_BLOCK_READY_FOR_USE:
|
|
|
|
case BUF_BLOCK_MEMORY:
|
|
|
|
case BUF_BLOCK_REMOVE_HASH:
|
|
|
|
ut_error;
|
|
|
|
break;
|
|
|
|
case BUF_BLOCK_FILE_PAGE:
|
|
|
|
ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
|
|
|
|
== buf_page_belongs_to_unzip_LRU(bpage));
|
|
|
|
case BUF_BLOCK_ZIP_PAGE:
|
|
|
|
case BUF_BLOCK_ZIP_DIRTY:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (buf_page_is_old(bpage)) {
|
|
|
|
const buf_page_t* prev
|
|
|
|
= UT_LIST_GET_PREV(LRU, bpage);
|
|
|
|
const buf_page_t* next
|
|
|
|
= UT_LIST_GET_NEXT(LRU, bpage);
|
|
|
|
|
|
|
|
if (!old_len++) {
|
|
|
|
ut_a(buf_pool->LRU_old == bpage);
|
|
|
|
} else {
|
|
|
|
ut_a(!prev || buf_page_is_old(prev));
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_a(!next || buf_page_is_old(next));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_a(buf_pool->LRU_old_len == old_len);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
CheckInFreeList::validate(buf_pool);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->free);
|
2014-02-26 19:11:54 +01:00
|
|
|
bpage != NULL;
|
|
|
|
bpage = UT_LIST_GET_NEXT(list, bpage)) {
|
|
|
|
|
|
|
|
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
CheckUnzipLRUAndLRUList::validate(buf_pool);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (buf_block_t* block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
|
|
|
|
block != NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
|
|
|
|
|
|
|
|
ut_ad(block->in_unzip_LRU_list);
|
|
|
|
ut_ad(block->page.in_LRU_list);
|
|
|
|
ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Validates the LRU list.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
buf_LRU_validate(void)
|
|
|
|
/*==================*/
|
|
|
|
{
|
2016-08-12 10:17:45 +02:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
2014-02-26 19:11:54 +01:00
|
|
|
buf_pool_t* buf_pool;
|
|
|
|
|
|
|
|
buf_pool = buf_pool_from_array(i);
|
|
|
|
buf_LRU_validate_instance(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(TRUE);
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
|
|
|
|
|
|
|
|
#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
|
|
|
|
/**********************************************************************//**
|
|
|
|
Prints the LRU list for one buffer pool instance. */
|
2015-05-26 09:01:12 +02:00
|
|
|
static
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
buf_LRU_print_instance(
|
|
|
|
/*===================*/
|
|
|
|
buf_pool_t* buf_pool)
|
|
|
|
{
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
for (const buf_page_t* bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
|
|
|
|
bpage != NULL;
|
|
|
|
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
mutex_enter(buf_page_get_mutex(bpage));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
fprintf(stderr, "BLOCK space %u page %u ",
|
|
|
|
bpage->id.space(), bpage->id.page_no());
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (buf_page_is_old(bpage)) {
|
|
|
|
fputs("old ", stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bpage->buf_fix_count) {
|
2017-04-21 04:51:27 +02:00
|
|
|
fprintf(stderr, "buffix count %u ",
|
|
|
|
bpage->buf_fix_count);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (buf_page_get_io_fix(bpage)) {
|
2017-04-21 04:51:27 +02:00
|
|
|
fprintf(stderr, "io_fix %d ",
|
|
|
|
buf_page_get_io_fix(bpage));
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (bpage->oldest_modification) {
|
|
|
|
fputs("modif. ", stderr);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (buf_page_get_state(bpage)) {
|
|
|
|
const byte* frame;
|
|
|
|
case BUF_BLOCK_FILE_PAGE:
|
|
|
|
frame = buf_block_get_frame((buf_block_t*) bpage);
|
2017-09-13 08:27:15 +02:00
|
|
|
fprintf(stderr, "\ntype %u index id " IB_ID_FMT "\n",
|
2017-04-21 04:51:27 +02:00
|
|
|
fil_page_get_type(frame),
|
2016-08-12 10:17:45 +02:00
|
|
|
btr_page_get_index_id(frame));
|
2014-02-26 19:11:54 +01:00
|
|
|
break;
|
|
|
|
case BUF_BLOCK_ZIP_PAGE:
|
|
|
|
frame = bpage->zip.data;
|
2017-09-13 08:27:15 +02:00
|
|
|
fprintf(stderr, "\ntype %u size " ULINTPF
|
2016-09-06 08:43:16 +02:00
|
|
|
" index id " IB_ID_FMT "\n",
|
2017-04-21 04:51:27 +02:00
|
|
|
fil_page_get_type(frame),
|
|
|
|
bpage->size.physical(),
|
2016-08-12 10:17:45 +02:00
|
|
|
btr_page_get_index_id(frame));
|
2014-02-26 19:11:54 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2017-04-21 04:51:27 +02:00
|
|
|
fprintf(stderr, "\n!state %d!\n",
|
|
|
|
buf_page_get_state(bpage));
|
2014-02-26 19:11:54 +01:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_exit(buf_page_get_mutex(bpage));
|
|
|
|
}
|
|
|
|
|
|
|
|
buf_pool_mutex_exit(buf_pool);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Prints the LRU list. */
|
|
|
|
void
|
|
|
|
buf_LRU_print(void)
|
|
|
|
/*===============*/
|
|
|
|
{
|
2016-08-12 10:17:45 +02:00
|
|
|
for (ulint i = 0; i < srv_buf_pool_instances; i++) {
|
|
|
|
buf_pool_t* buf_pool;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
buf_pool = buf_pool_from_array(i);
|
|
|
|
buf_LRU_print_instance(buf_pool);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
|