mirror of
https://github.com/MariaDB/server.git
synced 2025-01-30 18:41:56 +01:00
b1ab211dee
User-visible changes: The INFORMATION_SCHEMA views INNODB_BUFFER_PAGE and INNODB_BUFFER_PAGE_LRU will report a dummy value FLUSH_TYPE=0 and will no longer report the PAGE_STATE value READY_FOR_USE. We will remove some fields from buf_page_t and move much code to member functions of buf_pool_t and buf_page_t, so that the access rules of data members can be enforced consistently. Evicting or adding pages in buf_pool.LRU will remain covered by buf_pool.mutex. Evicting or adding pages in buf_pool.page_hash will remain covered by both buf_pool.mutex and the buf_pool.page_hash X-latch. After this fix, buf_pool.page_hash lookups can entirely avoid acquiring buf_pool.mutex, only relying on buf_pool.hash_lock_get() S-latch. Similarly, buf_flush_check_neighbors() can will rely solely on buf_pool.mutex, no buf_pool.page_hash latch at all. The buf_pool.mutex is rather contended in I/O heavy benchmarks, especially when the workload does not fit in the buffer pool. The first attempt to alleviate the contention was the buf_pool_t::mutex split in commit4ed7082eef
which introduced buf_block_t::mutex, which we are now removing. Later, multiple instances of buf_pool_t were introduced in commitc18084f71b
and recently removed by us in commit1a6f708ec5
(MDEV-15058). UNIV_BUF_DEBUG: Remove. This option to enable some buffer pool related debugging in otherwise non-debug builds has not been used for years. Instead, we have been using UNIV_DEBUG, which is enabled in CMAKE_BUILD_TYPE=Debug. buf_block_t::mutex, buf_pool_t::zip_mutex: Remove. We can mainly rely on std::atomic and the buf_pool.page_hash latches, and in some cases depend on buf_pool.mutex or buf_pool.flush_list_mutex just like before. We must always release buf_block_t::lock before invoking unfix() or io_unfix(), to prevent a glitch where a block that was added to the buf_pool.free list would apper X-latched. See commitc5883debd6
how this glitch was finally caught in a debug environment. We move some buf_pool_t::page_hash specific code from the ha and hash modules to buf_pool, for improved readability. buf_pool_t::close(): Assert that all blocks are clean, except on aborted startup or crash-like shutdown. buf_pool_t::validate(): No longer attempt to validate n_flush[] against the number of BUF_IO_WRITE fixed blocks, because buf_page_t::flush_type no longer exists. buf_pool_t::watch_set(): Replaces buf_pool_watch_set(). Reduce mutex contention by separating the buf_pool.watch[] allocation and the insert into buf_pool.page_hash. buf_pool_t::page_hash_lock<bool exclusive>(): Acquire a buf_pool.page_hash latch. Replaces and extends buf_page_hash_lock_s_confirm() and buf_page_hash_lock_x_confirm(). buf_pool_t::READ_AHEAD_PAGES: Renamed from BUF_READ_AHEAD_PAGES. buf_pool_t::curr_size, old_size, read_ahead_area, n_pend_reads: Use Atomic_counter. buf_pool_t::running_out(): Replaces buf_LRU_buf_pool_running_out(). buf_pool_t::LRU_remove(): Remove a block from the LRU list and return its predecessor. Incorporates buf_LRU_adjust_hp(), which was removed. buf_page_get_gen(): Remove a redundant call of fsp_is_system_temporary(), for mode == BUF_GET_IF_IN_POOL_OR_WATCH, which is only used by BTR_DELETE_OP (purge), which is never invoked on temporary tables. buf_free_from_unzip_LRU_list_batch(): Avoid redundant assignments. buf_LRU_free_from_unzip_LRU_list(): Simplify the loop condition. buf_LRU_free_page(): Clarify the function comment. buf_flush_check_neighbor(), buf_flush_check_neighbors(): Rewrite the construction of the page hash range. We will hold the buf_pool.mutex for up to buf_pool.read_ahead_area (at most 64) consecutive lookups of buf_pool.page_hash. buf_flush_page_and_try_neighbors(): Remove. Merge to its only callers, and remove redundant operations in buf_flush_LRU_list_batch(). buf_read_ahead_random(), buf_read_ahead_linear(): Rewrite. Do not acquire buf_pool.mutex, and iterate directly with page_id_t. ut_2_power_up(): Remove. my_round_up_to_next_power() is inlined and avoids any loops. fil_page_get_prev(), fil_page_get_next(), fil_addr_is_null(): Remove. buf_flush_page(): Add a fil_space_t* parameter. Minimize the buf_pool.mutex hold time. buf_pool.n_flush[] is no longer updated atomically with the io_fix, and we will protect most buf_block_t fields with buf_block_t::lock. The function buf_flush_write_block_low() is removed and merged here. buf_page_init_for_read(): Use static linkage. Initialize the newly allocated block and acquire the exclusive buf_block_t::lock while not holding any mutex. IORequest::IORequest(): Remove the body. We only need to invoke set_punch_hole() in buf_flush_page() and nowhere else. buf_page_t::flush_type: Remove. Replaced by IORequest::flush_type. This field is only used during a fil_io() call. That function already takes IORequest as a parameter, so we had better introduce for the rarely changing field. buf_block_t::init(): Replaces buf_page_init(). buf_page_t::init(): Replaces buf_page_init_low(). buf_block_t::initialise(): Initialise many fields, but keep the buf_page_t::state(). Both buf_pool_t::validate() and buf_page_optimistic_get() requires that buf_page_t::in_file() be protected atomically with buf_page_t::in_page_hash and buf_page_t::in_LRU_list. buf_page_optimistic_get(): Now that buf_block_t::mutex no longer exists, we must check buf_page_t::io_fix() after acquiring the buf_pool.page_hash lock, to detect whether buf_page_init_for_read() has been initiated. We will also check the io_fix() before acquiring hash_lock in order to avoid unnecessary computation. The field buf_block_t::modify_clock (protected by buf_block_t::lock) allows buf_page_optimistic_get() to validate the block. buf_page_t::real_size: Remove. It was only used while flushing pages of page_compressed tables. buf_page_encrypt(): Add an output parameter that allows us ot eliminate buf_page_t::real_size. Replace a condition with debug assertion. buf_page_should_punch_hole(): Remove. buf_dblwr_t::add_to_batch(): Replaces buf_dblwr_add_to_batch(). Add the parameter size (to replace buf_page_t::real_size). buf_dblwr_t::write_single_page(): Replaces buf_dblwr_write_single_page(). Add the parameter size (to replace buf_page_t::real_size). fil_system_t::detach(): Replaces fil_space_detach(). Ensure that fil_validate() will not be violated even if fil_system.mutex is released and reacquired. fil_node_t::complete_io(): Renamed from fil_node_complete_io(). fil_node_t::close_to_free(): Replaces fil_node_close_to_free(). Avoid invoking fil_node_t::close() because fil_system.n_open has already been decremented in fil_space_t::detach(). BUF_BLOCK_READY_FOR_USE: Remove. Directly use BUF_BLOCK_MEMORY. BUF_BLOCK_ZIP_DIRTY: Remove. Directly use BUF_BLOCK_ZIP_PAGE, and distinguish dirty pages by buf_page_t::oldest_modification(). BUF_BLOCK_POOL_WATCH: Remove. Use BUF_BLOCK_NOT_USED instead. This state was only being used for buf_page_t that are in buf_pool.watch. buf_pool_t::watch[]: Remove pointer indirection. buf_page_t::in_flush_list: Remove. It was set if and only if buf_page_t::oldest_modification() is nonzero. buf_page_decrypt_after_read(), buf_corrupt_page_release(), buf_page_check_corrupt(): Change the const fil_space_t* parameter to const fil_node_t& so that we can report the correct file name. buf_page_monitor(): Declare as an ATTRIBUTE_COLD global function. buf_page_io_complete(): Split to buf_page_read_complete() and buf_page_write_complete(). buf_dblwr_t::in_use: Remove. buf_dblwr_t::buf_block_array: Add IORequest::flush_t. buf_dblwr_sync_datafiles(): Remove. It was a useless wrapper of os_aio_wait_until_no_pending_writes(). buf_flush_write_complete(): Declare static, not global. Add the parameter IORequest::flush_t. buf_flush_freed_page(): Simplify the code. recv_sys_t::flush_lru: Renamed from flush_type and changed to bool. fil_read(), fil_write(): Replaced with direct use of fil_io(). fil_buffering_disabled(): Remove. Check srv_file_flush_method directly. fil_mutex_enter_and_prepare_for_io(): Return the resolved fil_space_t* to avoid a duplicated lookup in the caller. fil_report_invalid_page_access(): Clean up the parameters. fil_io(): Return fil_io_t, which comprises fil_node_t and error code. Always invoke fil_space_t::acquire_for_io() and let either the sync=true caller or fil_aio_callback() invoke fil_space_t::release_for_io(). fil_aio_callback(): Rewrite to replace buf_page_io_complete(). fil_check_pending_operations(): Remove a parameter, and remove some redundant lookups. fil_node_close_to_free(): Wait for n_pending==0. Because we no longer do an extra lookup of the tablespace between fil_io() and the completion of the operation, we must give fil_node_t::complete_io() a chance to decrement the counter. fil_close_tablespace(): Remove unused parameter trx, and document that this is only invoked during the error handling of IMPORT TABLESPACE. row_import_discard_changes(): Merged with the only caller, row_import_cleanup(). Do not lock up the data dictionary while invoking fil_close_tablespace(). logs_empty_and_mark_files_at_shutdown(): Do not invoke fil_close_all_files(), to avoid a !needs_flush assertion failure on fil_node_t::close(). innodb_shutdown(): Invoke os_aio_free() before fil_close_all_files(). fil_close_all_files(): Invoke fil_flush_file_spaces() to ensure proper durability. thread_pool::unbind(): Fix a crash that would occur on Windows after srv_thread_pool->disable_aio() and os_file_close(). This fix was submitted by Vladislav Vaintroub. Thanks to Matthias Leich and Axel Schwenke for extensive testing, Vladislav Vaintroub for helpful comments, and Eugene Kosov for a review.
813 lines
21 KiB
C++
813 lines
21 KiB
C++
/*****************************************************************************
|
|
|
|
Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved.
|
|
Copyright (c) 2017, 2020, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/**************************************************//**
|
|
@file buf/buf0dump.cc
|
|
Implements a buffer pool dump/load.
|
|
|
|
Created April 08, 2011 Vasil Dimov
|
|
*******************************************************/
|
|
|
|
#include "my_global.h"
|
|
#include "mysqld.h"
|
|
#include "my_sys.h"
|
|
|
|
#include "mysql/psi/mysql_stage.h"
|
|
#include "mysql/psi/psi.h"
|
|
|
|
#include "buf0buf.h"
|
|
#include "buf0dump.h"
|
|
#include "dict0dict.h"
|
|
#include "os0file.h"
|
|
#include "os0thread.h"
|
|
#include "srv0srv.h"
|
|
#include "srv0start.h"
|
|
#include "sync0rw.h"
|
|
#include "ut0byte.h"
|
|
|
|
#include <algorithm>
|
|
|
|
#include "mysql/service_wsrep.h" /* wsrep_recovery */
|
|
#include <my_service_manager.h>
|
|
|
|
static void buf_do_load_dump();
|
|
|
|
enum status_severity {
|
|
STATUS_INFO,
|
|
STATUS_ERR
|
|
};
|
|
|
|
#define SHUTTING_DOWN() (srv_shutdown_state != SRV_SHUTDOWN_NONE)
|
|
|
|
/* Flags that tell the buffer pool dump/load thread which action should it
|
|
take after being waked up. */
|
|
static volatile bool buf_dump_should_start;
|
|
static volatile bool buf_load_should_start;
|
|
|
|
static bool buf_load_abort_flag;
|
|
|
|
/** Start the buffer pool dump/load task and instructs it to start a dump. */
|
|
void buf_dump_start()
|
|
{
|
|
buf_dump_should_start= true;
|
|
buf_do_load_dump();
|
|
}
|
|
|
|
/** Start the buffer pool dump/load task and instructs it to start a load. */
|
|
void buf_load_start()
|
|
{
|
|
buf_load_should_start= true;
|
|
buf_do_load_dump();
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
Sets the global variable that feeds MySQL's innodb_buffer_pool_dump_status
|
|
to the specified string. The format and the following parameters are the
|
|
same as the ones used for printf(3). The value of this variable can be
|
|
retrieved by:
|
|
SELECT variable_value FROM information_schema.global_status WHERE
|
|
variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS';
|
|
or by:
|
|
SHOW STATUS LIKE 'innodb_buffer_pool_dump_status'; */
|
|
static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
|
|
void
|
|
buf_dump_status(
|
|
/*============*/
|
|
enum status_severity severity,/*!< in: status severity */
|
|
const char* fmt, /*!< in: format */
|
|
...) /*!< in: extra parameters according
|
|
to fmt */
|
|
{
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
vsnprintf(
|
|
export_vars.innodb_buffer_pool_dump_status,
|
|
sizeof(export_vars.innodb_buffer_pool_dump_status),
|
|
fmt, ap);
|
|
|
|
switch (severity) {
|
|
case STATUS_INFO:
|
|
ib::info() << export_vars.innodb_buffer_pool_dump_status;
|
|
break;
|
|
|
|
case STATUS_ERR:
|
|
ib::error() << export_vars.innodb_buffer_pool_dump_status;
|
|
break;
|
|
}
|
|
|
|
va_end(ap);
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
Sets the global variable that feeds MySQL's innodb_buffer_pool_load_status
|
|
to the specified string. The format and the following parameters are the
|
|
same as the ones used for printf(3). The value of this variable can be
|
|
retrieved by:
|
|
SELECT variable_value FROM information_schema.global_status WHERE
|
|
variable_name = 'INNODB_BUFFER_POOL_LOAD_STATUS';
|
|
or by:
|
|
SHOW STATUS LIKE 'innodb_buffer_pool_load_status'; */
|
|
static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
|
|
void
|
|
buf_load_status(
|
|
/*============*/
|
|
enum status_severity severity,/*!< in: status severity */
|
|
const char* fmt, /*!< in: format */
|
|
...) /*!< in: extra parameters according to fmt */
|
|
{
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
|
|
vsnprintf(
|
|
export_vars.innodb_buffer_pool_load_status,
|
|
sizeof(export_vars.innodb_buffer_pool_load_status),
|
|
fmt, ap);
|
|
|
|
switch (severity) {
|
|
case STATUS_INFO:
|
|
ib::info() << export_vars.innodb_buffer_pool_load_status;
|
|
break;
|
|
|
|
case STATUS_ERR:
|
|
ib::error() << export_vars.innodb_buffer_pool_load_status;
|
|
break;
|
|
}
|
|
|
|
va_end(ap);
|
|
}
|
|
|
|
/** Returns the directory path where the buffer pool dump file will be created.
|
|
@return directory path */
|
|
static
|
|
const char*
|
|
get_buf_dump_dir()
|
|
{
|
|
const char* dump_dir;
|
|
|
|
/* The dump file should be created in the default data directory if
|
|
innodb_data_home_dir is set as an empty string. */
|
|
if (!*srv_data_home) {
|
|
dump_dir = fil_path_to_mysql_datadir;
|
|
} else {
|
|
dump_dir = srv_data_home;
|
|
}
|
|
|
|
return(dump_dir);
|
|
}
|
|
|
|
/** Generate the path to the buffer pool dump/load file.
|
|
@param[out] path generated path
|
|
@param[in] path_size size of 'path', used as in snprintf(3). */
|
|
static void buf_dump_generate_path(char *path, size_t path_size)
|
|
{
|
|
char buf[FN_REFLEN];
|
|
|
|
mysql_mutex_lock(&LOCK_global_system_variables);
|
|
snprintf(buf, sizeof(buf), "%s%c%s", get_buf_dump_dir(),
|
|
OS_PATH_SEPARATOR, srv_buf_dump_filename);
|
|
mysql_mutex_unlock(&LOCK_global_system_variables);
|
|
|
|
os_file_type_t type;
|
|
bool exists = false;
|
|
bool ret;
|
|
|
|
ret = os_file_status(buf, &exists, &type);
|
|
|
|
/* For realpath() to succeed the file must exist. */
|
|
|
|
if (ret && exists) {
|
|
/* my_realpath() assumes the destination buffer is big enough
|
|
to hold FN_REFLEN bytes. */
|
|
ut_a(path_size >= FN_REFLEN);
|
|
|
|
my_realpath(path, buf, 0);
|
|
} else {
|
|
/* If it does not exist, then resolve only srv_data_home
|
|
and append srv_buf_dump_filename to it. */
|
|
char srv_data_home_full[FN_REFLEN];
|
|
|
|
my_realpath(srv_data_home_full, get_buf_dump_dir(), 0);
|
|
|
|
if (srv_data_home_full[strlen(srv_data_home_full) - 1]
|
|
== OS_PATH_SEPARATOR) {
|
|
|
|
snprintf(path, path_size, "%s%s",
|
|
srv_data_home_full,
|
|
srv_buf_dump_filename);
|
|
} else {
|
|
snprintf(path, path_size, "%s%c%s",
|
|
srv_data_home_full,
|
|
OS_PATH_SEPARATOR,
|
|
srv_buf_dump_filename);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
Perform a buffer pool dump into the file specified by
|
|
innodb_buffer_pool_filename. If any errors occur then the value of
|
|
innodb_buffer_pool_dump_status will be set accordingly, see buf_dump_status().
|
|
The dump filename can be specified by (relative to srv_data_home):
|
|
SET GLOBAL innodb_buffer_pool_filename='filename'; */
|
|
static
|
|
void
|
|
buf_dump(
|
|
/*=====*/
|
|
ibool obey_shutdown) /*!< in: quit if we are in a shutting down
|
|
state */
|
|
{
|
|
#define SHOULD_QUIT() (SHUTTING_DOWN() && obey_shutdown)
|
|
|
|
char full_filename[OS_FILE_MAX_PATH];
|
|
char tmp_filename[OS_FILE_MAX_PATH + sizeof "incomplete"];
|
|
char now[32];
|
|
FILE* f;
|
|
int ret;
|
|
|
|
buf_dump_generate_path(full_filename, sizeof(full_filename));
|
|
|
|
snprintf(tmp_filename, sizeof(tmp_filename),
|
|
"%s.incomplete", full_filename);
|
|
|
|
buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) to %s",
|
|
full_filename);
|
|
|
|
#if defined(__GLIBC__) || defined(__WIN__) || O_CLOEXEC == 0
|
|
f = fopen(tmp_filename, "w" STR_O_CLOEXEC);
|
|
#else
|
|
{
|
|
int fd;
|
|
fd = open(tmp_filename, O_CREAT | O_TRUNC | O_CLOEXEC | O_WRONLY, 0640);
|
|
if (fd >= 0) {
|
|
f = fdopen(fd, "w");
|
|
}
|
|
else {
|
|
f = NULL;
|
|
}
|
|
}
|
|
#endif
|
|
if (f == NULL) {
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot open '%s' for writing: %s",
|
|
tmp_filename, strerror(errno));
|
|
return;
|
|
}
|
|
const buf_page_t* bpage;
|
|
page_id_t* dump;
|
|
ulint n_pages;
|
|
ulint j;
|
|
|
|
mutex_enter(&buf_pool.mutex);
|
|
|
|
n_pages = UT_LIST_GET_LEN(buf_pool.LRU);
|
|
|
|
/* skip empty buffer pools */
|
|
if (n_pages == 0) {
|
|
mutex_exit(&buf_pool.mutex);
|
|
goto done;
|
|
}
|
|
|
|
if (srv_buf_pool_dump_pct != 100) {
|
|
ulint t_pages;
|
|
|
|
/* limit the number of total pages dumped to X% of the
|
|
total number of pages */
|
|
t_pages = buf_pool.curr_size * srv_buf_pool_dump_pct / 100;
|
|
if (n_pages > t_pages) {
|
|
buf_dump_status(STATUS_INFO,
|
|
"Restricted to " ULINTPF
|
|
" pages due to "
|
|
"innodb_buf_pool_dump_pct=%lu",
|
|
t_pages, srv_buf_pool_dump_pct);
|
|
n_pages = t_pages;
|
|
}
|
|
|
|
if (n_pages == 0) {
|
|
n_pages = 1;
|
|
}
|
|
}
|
|
|
|
dump = static_cast<page_id_t*>(ut_malloc_nokey(
|
|
n_pages * sizeof(*dump)));
|
|
|
|
if (dump == NULL) {
|
|
mutex_exit(&buf_pool.mutex);
|
|
fclose(f);
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot allocate " ULINTPF " bytes: %s",
|
|
(ulint) (n_pages * sizeof(*dump)),
|
|
strerror(errno));
|
|
/* leave tmp_filename to exist */
|
|
return;
|
|
}
|
|
|
|
for (bpage = UT_LIST_GET_FIRST(buf_pool.LRU), j = 0;
|
|
bpage != NULL && j < n_pages;
|
|
bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
|
|
|
|
ut_a(bpage->in_file());
|
|
const page_id_t id(bpage->id());
|
|
|
|
if (id.space() == SRV_TMP_SPACE_ID) {
|
|
/* Ignore the innodb_temporary tablespace. */
|
|
continue;
|
|
}
|
|
|
|
dump[j++] = id;
|
|
}
|
|
|
|
mutex_exit(&buf_pool.mutex);
|
|
|
|
ut_a(j <= n_pages);
|
|
n_pages = j;
|
|
|
|
for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) {
|
|
ret = fprintf(f, "%u,%u\n",
|
|
dump[j].space(), dump[j].page_no());
|
|
if (ret < 0) {
|
|
ut_free(dump);
|
|
fclose(f);
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot write to '%s': %s",
|
|
tmp_filename, strerror(errno));
|
|
/* leave tmp_filename to exist */
|
|
return;
|
|
}
|
|
if (SHUTTING_DOWN() && !(j & 1023)) {
|
|
service_manager_extend_timeout(
|
|
INNODB_EXTEND_TIMEOUT_INTERVAL,
|
|
"Dumping buffer pool page "
|
|
ULINTPF "/" ULINTPF, j + 1, n_pages);
|
|
}
|
|
}
|
|
|
|
ut_free(dump);
|
|
|
|
done:
|
|
ret = fclose(f);
|
|
if (ret != 0) {
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot close '%s': %s",
|
|
tmp_filename, strerror(errno));
|
|
return;
|
|
}
|
|
/* else */
|
|
|
|
ret = unlink(full_filename);
|
|
if (ret != 0 && errno != ENOENT) {
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot delete '%s': %s",
|
|
full_filename, strerror(errno));
|
|
/* leave tmp_filename to exist */
|
|
return;
|
|
}
|
|
/* else */
|
|
|
|
ret = rename(tmp_filename, full_filename);
|
|
if (ret != 0) {
|
|
buf_dump_status(STATUS_ERR,
|
|
"Cannot rename '%s' to '%s': %s",
|
|
tmp_filename, full_filename,
|
|
strerror(errno));
|
|
/* leave tmp_filename to exist */
|
|
return;
|
|
}
|
|
/* else */
|
|
|
|
/* success */
|
|
|
|
ut_sprintf_timestamp(now);
|
|
|
|
buf_dump_status(STATUS_INFO,
|
|
"Buffer pool(s) dump completed at %s", now);
|
|
|
|
/* Though dumping doesn't related to an incomplete load,
|
|
we reset this to 0 here to indicate that a shutdown can also perform
|
|
a dump */
|
|
export_vars.innodb_buffer_pool_load_incomplete = 0;
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
Artificially delay the buffer pool loading if necessary. The idea of
|
|
this function is to prevent hogging the server with IO and slowing down
|
|
too much normal client queries. */
|
|
UNIV_INLINE
|
|
void
|
|
buf_load_throttle_if_needed(
|
|
/*========================*/
|
|
ulint* last_check_time, /*!< in/out: milliseconds since epoch
|
|
of the last time we did check if
|
|
throttling is needed, we do the check
|
|
every srv_io_capacity IO ops. */
|
|
ulint* last_activity_count,
|
|
ulint n_io) /*!< in: number of IO ops done since
|
|
buffer pool load has started */
|
|
{
|
|
if (n_io % srv_io_capacity < srv_io_capacity - 1) {
|
|
return;
|
|
}
|
|
|
|
if (*last_check_time == 0 || *last_activity_count == 0) {
|
|
*last_check_time = ut_time_ms();
|
|
*last_activity_count = srv_get_activity_count();
|
|
return;
|
|
}
|
|
|
|
/* srv_io_capacity IO operations have been performed by buffer pool
|
|
load since the last time we were here. */
|
|
|
|
/* If no other activity, then keep going without any delay. */
|
|
if (srv_get_activity_count() == *last_activity_count) {
|
|
return;
|
|
}
|
|
|
|
/* There has been other activity, throttle. */
|
|
|
|
ulint now = ut_time_ms();
|
|
ulint elapsed_time = now - *last_check_time;
|
|
|
|
/* Notice that elapsed_time is not the time for the last
|
|
srv_io_capacity IO operations performed by BP load. It is the
|
|
time elapsed since the last time we detected that there has been
|
|
other activity. This has a small and acceptable deficiency, e.g.:
|
|
1. BP load runs and there is no other activity.
|
|
2. Other activity occurs, we run N IO operations after that and
|
|
enter here (where 0 <= N < srv_io_capacity).
|
|
3. last_check_time is very old and we do not sleep at this time, but
|
|
only update last_check_time and last_activity_count.
|
|
4. We run srv_io_capacity more IO operations and call this function
|
|
again.
|
|
5. There has been more other activity and thus we enter here.
|
|
6. Now last_check_time is recent and we sleep if necessary to prevent
|
|
more than srv_io_capacity IO operations per second.
|
|
The deficiency is that we could have slept at 3., but for this we
|
|
would have to update last_check_time before the
|
|
"cur_activity_count == *last_activity_count" check and calling
|
|
ut_time_ms() that often may turn out to be too expensive. */
|
|
|
|
if (elapsed_time < 1000 /* 1 sec (1000 milli secs) */) {
|
|
os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */);
|
|
}
|
|
|
|
*last_check_time = ut_time_ms();
|
|
*last_activity_count = srv_get_activity_count();
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
Perform a buffer pool load from the file specified by
|
|
innodb_buffer_pool_filename. If any errors occur then the value of
|
|
innodb_buffer_pool_load_status will be set accordingly, see buf_load_status().
|
|
The dump filename can be specified by (relative to srv_data_home):
|
|
SET GLOBAL innodb_buffer_pool_filename='filename'; */
|
|
static
|
|
void
|
|
buf_load()
|
|
/*======*/
|
|
{
|
|
char full_filename[OS_FILE_MAX_PATH];
|
|
char now[32];
|
|
FILE* f;
|
|
page_id_t* dump;
|
|
ulint dump_n;
|
|
ulint i;
|
|
ulint space_id;
|
|
ulint page_no;
|
|
int fscanf_ret;
|
|
|
|
/* Ignore any leftovers from before */
|
|
buf_load_abort_flag = false;
|
|
|
|
buf_dump_generate_path(full_filename, sizeof(full_filename));
|
|
|
|
buf_load_status(STATUS_INFO,
|
|
"Loading buffer pool(s) from %s", full_filename);
|
|
|
|
f = fopen(full_filename, "r" STR_O_CLOEXEC);
|
|
if (f == NULL) {
|
|
buf_load_status(STATUS_INFO,
|
|
"Cannot open '%s' for reading: %s",
|
|
full_filename, strerror(errno));
|
|
return;
|
|
}
|
|
/* else */
|
|
|
|
/* First scan the file to estimate how many entries are in it.
|
|
This file is tiny (approx 500KB per 1GB buffer pool), reading it
|
|
two times is fine. */
|
|
dump_n = 0;
|
|
while (fscanf(f, ULINTPF "," ULINTPF, &space_id, &page_no) == 2
|
|
&& !SHUTTING_DOWN()) {
|
|
dump_n++;
|
|
}
|
|
|
|
if (!SHUTTING_DOWN() && !feof(f)) {
|
|
/* fscanf() returned != 2 */
|
|
const char* what;
|
|
if (ferror(f)) {
|
|
what = "reading";
|
|
} else {
|
|
what = "parsing";
|
|
}
|
|
fclose(f);
|
|
buf_load_status(STATUS_ERR, "Error %s '%s',"
|
|
" unable to load buffer pool (stage 1)",
|
|
what, full_filename);
|
|
return;
|
|
}
|
|
|
|
/* If dump is larger than the buffer pool(s), then we ignore the
|
|
extra trailing. This could happen if a dump is made, then buffer
|
|
pool is shrunk and then load is attempted. */
|
|
dump_n = std::min(dump_n, buf_pool.get_n_pages());
|
|
|
|
if (dump_n != 0) {
|
|
dump = static_cast<page_id_t*>(ut_malloc_nokey(
|
|
dump_n * sizeof(*dump)));
|
|
} else {
|
|
fclose(f);
|
|
ut_sprintf_timestamp(now);
|
|
buf_load_status(STATUS_INFO,
|
|
"Buffer pool(s) load completed at %s"
|
|
" (%s was empty)", now, full_filename);
|
|
return;
|
|
}
|
|
|
|
if (dump == NULL) {
|
|
fclose(f);
|
|
buf_load_status(STATUS_ERR,
|
|
"Cannot allocate " ULINTPF " bytes: %s",
|
|
dump_n * sizeof(*dump),
|
|
strerror(errno));
|
|
return;
|
|
}
|
|
|
|
rewind(f);
|
|
|
|
export_vars.innodb_buffer_pool_load_incomplete = 1;
|
|
|
|
for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
|
|
fscanf_ret = fscanf(f, ULINTPF "," ULINTPF,
|
|
&space_id, &page_no);
|
|
|
|
if (fscanf_ret != 2) {
|
|
if (feof(f)) {
|
|
break;
|
|
}
|
|
/* else */
|
|
|
|
ut_free(dump);
|
|
fclose(f);
|
|
buf_load_status(STATUS_ERR,
|
|
"Error parsing '%s', unable"
|
|
" to load buffer pool (stage 2)",
|
|
full_filename);
|
|
return;
|
|
}
|
|
|
|
if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
|
|
ut_free(dump);
|
|
fclose(f);
|
|
buf_load_status(STATUS_ERR,
|
|
"Error parsing '%s': bogus"
|
|
" space,page " ULINTPF "," ULINTPF
|
|
" at line " ULINTPF ","
|
|
" unable to load buffer pool",
|
|
full_filename,
|
|
space_id, page_no,
|
|
i);
|
|
return;
|
|
}
|
|
|
|
dump[i] = page_id_t(space_id, page_no);
|
|
}
|
|
|
|
/* Set dump_n to the actual number of initialized elements,
|
|
i could be smaller than dump_n here if the file got truncated after
|
|
we read it the first time. */
|
|
dump_n = i;
|
|
|
|
fclose(f);
|
|
|
|
if (dump_n == 0) {
|
|
ut_free(dump);
|
|
ut_sprintf_timestamp(now);
|
|
buf_load_status(STATUS_INFO,
|
|
"Buffer pool(s) load completed at %s"
|
|
" (%s was empty or had errors)", now, full_filename);
|
|
return;
|
|
}
|
|
|
|
if (!SHUTTING_DOWN()) {
|
|
std::sort(dump, dump + dump_n);
|
|
}
|
|
|
|
ulint last_check_time = 0;
|
|
ulint last_activity_cnt = 0;
|
|
|
|
/* Avoid calling the expensive fil_space_acquire_silent() for each
|
|
page within the same tablespace. dump[] is sorted by (space, page),
|
|
so all pages from a given tablespace are consecutive. */
|
|
ulint cur_space_id = dump[0].space();
|
|
fil_space_t* space = fil_space_acquire_silent(cur_space_id);
|
|
ulint zip_size = space ? space->zip_size() : 0;
|
|
|
|
PSI_stage_progress* pfs_stage_progress __attribute__((unused))
|
|
= mysql_set_stage(srv_stage_buffer_pool_load.m_key);
|
|
mysql_stage_set_work_estimated(pfs_stage_progress, dump_n);
|
|
mysql_stage_set_work_completed(pfs_stage_progress, 0);
|
|
|
|
for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
|
|
|
|
/* space_id for this iteration of the loop */
|
|
const ulint this_space_id = dump[i].space();
|
|
|
|
if (this_space_id == SRV_TMP_SPACE_ID) {
|
|
/* Ignore the innodb_temporary tablespace. */
|
|
continue;
|
|
}
|
|
|
|
if (this_space_id != cur_space_id) {
|
|
if (space != NULL) {
|
|
space->release();
|
|
}
|
|
|
|
cur_space_id = this_space_id;
|
|
space = fil_space_acquire_silent(cur_space_id);
|
|
|
|
if (space != NULL) {
|
|
zip_size = space->zip_size();
|
|
}
|
|
}
|
|
|
|
/* JAN: TODO: As we use background page read below,
|
|
if tablespace is encrypted we cant use it. */
|
|
if (space == NULL ||
|
|
(space && space->crypt_data &&
|
|
space->crypt_data->encryption != FIL_ENCRYPTION_OFF &&
|
|
space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED)) {
|
|
continue;
|
|
}
|
|
|
|
buf_read_page_background(dump[i], zip_size, true);
|
|
|
|
if (buf_load_abort_flag) {
|
|
if (space != NULL) {
|
|
space->release();
|
|
}
|
|
buf_load_abort_flag = false;
|
|
ut_free(dump);
|
|
buf_load_status(
|
|
STATUS_INFO,
|
|
"Buffer pool(s) load aborted on request");
|
|
/* Premature end, set estimated = completed = i and
|
|
end the current stage event. */
|
|
|
|
mysql_stage_set_work_estimated(pfs_stage_progress, i);
|
|
mysql_stage_set_work_completed(pfs_stage_progress, i);
|
|
|
|
mysql_end_stage();
|
|
return;
|
|
}
|
|
|
|
buf_load_throttle_if_needed(
|
|
&last_check_time, &last_activity_cnt, i);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if ((i+1) >= srv_buf_pool_load_pages_abort) {
|
|
buf_load_abort_flag = true;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if (space != NULL) {
|
|
space->release();
|
|
}
|
|
|
|
ut_free(dump);
|
|
|
|
ut_sprintf_timestamp(now);
|
|
|
|
if (i == dump_n) {
|
|
buf_load_status(STATUS_INFO,
|
|
"Buffer pool(s) load completed at %s", now);
|
|
export_vars.innodb_buffer_pool_load_incomplete = 0;
|
|
} else if (!buf_load_abort_flag) {
|
|
buf_load_status(STATUS_INFO,
|
|
"Buffer pool(s) load aborted due to user instigated abort at %s",
|
|
now);
|
|
/* intentionally don't reset innodb_buffer_pool_load_incomplete
|
|
as we don't want a shutdown to save the buffer pool */
|
|
} else {
|
|
buf_load_status(STATUS_INFO,
|
|
"Buffer pool(s) load aborted due to shutdown at %s",
|
|
now);
|
|
/* intentionally don't reset innodb_buffer_pool_load_incomplete
|
|
as we want to abort without saving the buffer pool */
|
|
}
|
|
|
|
/* Make sure that estimated = completed when we end. */
|
|
mysql_stage_set_work_completed(pfs_stage_progress, dump_n);
|
|
/* End the stage progress event. */
|
|
mysql_end_stage();
|
|
}
|
|
|
|
/** Abort a currently running buffer pool load. */
|
|
void buf_load_abort()
|
|
{
|
|
buf_load_abort_flag= true;
|
|
}
|
|
|
|
/*****************************************************************//**
|
|
This is the main task for buffer pool dump/load. when scheduled
|
|
either performs a dump or load, depending on server state, state of the variables etc- */
|
|
static void buf_dump_load_func(void *)
|
|
{
|
|
ut_ad(!srv_read_only_mode);
|
|
static bool first_time = true;
|
|
if (first_time && srv_buffer_pool_load_at_startup) {
|
|
|
|
#ifdef WITH_WSREP
|
|
if (!get_wsrep_recovery()) {
|
|
#endif /* WITH_WSREP */
|
|
buf_load();
|
|
#ifdef WITH_WSREP
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
}
|
|
first_time = false;
|
|
|
|
while (!SHUTTING_DOWN()) {
|
|
if (buf_dump_should_start) {
|
|
buf_dump_should_start = false;
|
|
buf_dump(true);
|
|
}
|
|
if (buf_load_should_start) {
|
|
buf_load_should_start = false;
|
|
buf_load();
|
|
}
|
|
|
|
if (!buf_dump_should_start && !buf_load_should_start) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* In shutdown */
|
|
if (srv_buffer_pool_dump_at_shutdown && srv_fast_shutdown != 2) {
|
|
if (export_vars.innodb_buffer_pool_load_incomplete) {
|
|
buf_dump_status(STATUS_INFO,
|
|
"Dumping of buffer pool not started"
|
|
" as load was incomplete");
|
|
#ifdef WITH_WSREP
|
|
} else if (get_wsrep_recovery()) {
|
|
#endif /* WITH_WSREP */
|
|
} else {
|
|
buf_dump(false/* do complete dump at shutdown */);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/* Execute task with max.concurrency */
|
|
static tpool::task_group tpool_group(1);
|
|
static tpool::waitable_task buf_dump_load_task(buf_dump_load_func, &tpool_group);
|
|
static bool load_dump_enabled;
|
|
|
|
/** Start async buffer pool load, if srv_buffer_pool_load_at_startup was set.*/
|
|
void buf_load_at_startup()
|
|
{
|
|
load_dump_enabled= true;
|
|
if (srv_buffer_pool_load_at_startup)
|
|
buf_do_load_dump();
|
|
}
|
|
|
|
static void buf_do_load_dump()
|
|
{
|
|
if (load_dump_enabled && !buf_dump_load_task.is_running())
|
|
srv_thread_pool->submit_task(&buf_dump_load_task);
|
|
}
|
|
|
|
/** Wait for currently running load/dumps to finish*/
|
|
void buf_load_dump_end()
|
|
{
|
|
ut_ad(SHUTTING_DOWN());
|
|
buf_dump_load_task.wait();
|
|
}
|