mirror of
https://github.com/MariaDB/server.git
synced 2026-05-16 20:07:13 +02:00
MDEV-27416 InnoDB hang in buf_flush_wait_flushed(), on log checkpoint
InnoDB could sometimes hang when triggering a log checkpoint. This is
due to commit 7b1252c03d (MDEV-24278),
which introduced an untimed wait to buf_flush_page_cleaner().
The hang was noticed by occasional failures of IMPORT TABLESPACE tests,
such as innodb.innodb-wl5522, which would (unnecessarily) invoke
log_make_checkpoint() from row_import_cleanup().
The reason of the hang was that buf_flush_page_cleaner() would enter
untimed sleep despite buf_flush_sync_lsn being set. The exact failure
scenario is unclear, because buf_flush_sync_lsn should actually be
protected by buf_pool.flush_list_mutex. We prevent the hang by
invoking buf_pool.page_cleaner_set_idle(false) whenever we are
setting buf_flush_sync_lsn and signaling buf_pool.do_flush_list.
The bulk of these changes was originally developed as a preparation
for MDEV-26827, to invoke buf_flush_list() from fewer threads,
and tested on 10.6 by Matthias Leich.
This fix was tested by running 100 repetitions of 100 concurrent instances
of the test innodb.innodb-wl5522 on a RelWithDebInfo build, using ext4fs
and innodb_flush_method=O_DIRECT on a SATA SSD with 4096-byte block size.
During the test, the call to log_make_checkpoint() in row_import_cleanup()
was present.
buf_flush_list(): Make static.
buf_flush_wait(): Wait for buf_pool.get_oldest_modification()
to reach a target, by work done in the buf_flush_page_cleaner.
If buf_flush_sync_lsn is going to be set, we will invoke
buf_pool.page_cleaner_set_idle(false).
buf_flush_ahead(): If buf_flush_sync_lsn or buf_flush_async_lsn
is going to be set and the page cleaner woken up, we will invoke
buf_pool.page_cleaner_set_idle(false).
buf_flush_wait_flushed(): Invoke buf_flush_wait().
buf_flush_sync(): Invoke recv_sys.apply() at the start in case
crash recovery is active. Invoke buf_flush_wait().
buf_flush_sync_batch(): A lower-level variant of buf_flush_sync()
that is only called by recv_sys_t::apply().
buf_flush_sync_for_checkpoint(): Do not trigger log apply
or checkpoint during recovery.
buf_dblwr_t::create(): Only initiate a buffer pool flush, not
a checkpoint.
row_import_cleanup(): Do not unnecessarily invoke log_make_checkpoint().
Invoking buf_flush_list_space() before starting to generate redo log
for the imported tablespace should suffice.
srv_prepare_to_delete_redo_log_file():
Set recv_sys.recovery_on in order to prevent
buf_flush_sync_for_checkpoint() from initiating a checkpoint
while the log is inaccessible. Remove a wait loop that is already
part of buf_flush_sync().
Do not invoke fil_names_clear() if the log is being upgraded,
because the FILE_MODIFY record is specific to the latest format.
create_log_file(): Clear recv_sys.recovery_on only after calling
log_make_checkpoint(), to prevent buf_flush_page_cleaner from
invoking a checkpoint.
innodb_shutdown(): Simplify the logic in mariadb-backup --prepare.
os_aio_wait_until_no_pending_writes(): Update the function comment.
Apart from row_quiesce_table_start() during FLUSH TABLES...FOR EXPORT,
this is being called by buf_flush_list_space(), which is invoked
by ALTER TABLE...IMPORT TABLESPACE as well as some encryption operations.
This commit is contained in:
parent
eab89f14ab
commit
4c3ad24413
11 changed files with 155 additions and 139 deletions
|
|
@ -329,6 +329,13 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
|
|||
|
||||
log_sys.log.write_header_durable(lsn);
|
||||
|
||||
ut_ad(srv_startup_is_before_trx_rollback_phase);
|
||||
if (create_new_db) {
|
||||
srv_startup_is_before_trx_rollback_phase = false;
|
||||
}
|
||||
|
||||
/* Enable checkpoints in buf_flush_page_cleaner(). */
|
||||
recv_sys.recovery_on = false;
|
||||
mysql_mutex_unlock(&log_sys.mutex);
|
||||
|
||||
log_make_checkpoint();
|
||||
|
|
@ -895,91 +902,74 @@ buffer pools. Flush the redo log buffer to the redo log file.
|
|||
@return lsn upto which data pages have been flushed. */
|
||||
static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
|
||||
{
|
||||
DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
|
||||
DBUG_ENTER("srv_prepare_to_delete_redo_log_file");
|
||||
|
||||
lsn_t flushed_lsn;
|
||||
ulint count = 0;
|
||||
/* Disable checkpoints in the page cleaner. */
|
||||
ut_ad(!recv_sys.recovery_on);
|
||||
recv_sys.recovery_on= true;
|
||||
|
||||
if (log_sys.log.subformat != 2) {
|
||||
srv_log_file_size = 0;
|
||||
}
|
||||
/* Clean the buffer pool. */
|
||||
buf_flush_sync();
|
||||
|
||||
for (;;) {
|
||||
/* Clean the buffer pool. */
|
||||
buf_flush_sync();
|
||||
if (log_sys.log.subformat != 2)
|
||||
srv_log_file_size= 0;
|
||||
|
||||
DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
|
||||
DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
|
||||
DBUG_EXECUTE_IF("innodb_log_abort_1", DBUG_RETURN(0););
|
||||
DBUG_PRINT("ib_log", ("After innodb_log_abort_1"));
|
||||
|
||||
mysql_mutex_lock(&log_sys.mutex);
|
||||
mysql_mutex_lock(&log_sys.mutex);
|
||||
const bool latest_format= (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED) ==
|
||||
log_t::FORMAT_10_5;
|
||||
lsn_t flushed_lsn= log_sys.get_lsn();
|
||||
|
||||
fil_names_clear(log_sys.get_lsn(), false);
|
||||
if (latest_format)
|
||||
{
|
||||
fil_names_clear(flushed_lsn, false);
|
||||
flushed_lsn= log_sys.get_lsn();
|
||||
}
|
||||
|
||||
flushed_lsn = log_sys.get_lsn();
|
||||
{
|
||||
const char *msg;
|
||||
if (!latest_format || srv_log_file_size == 0)
|
||||
{
|
||||
msg= "Upgrading redo log: ";
|
||||
same_size:
|
||||
ib::info() << msg << srv_log_file_size_requested << " bytes; LSN="
|
||||
<< flushed_lsn;
|
||||
}
|
||||
else if (old_exists && srv_log_file_size == srv_log_file_size_requested)
|
||||
{
|
||||
msg= srv_encrypt_log
|
||||
? "Encrypting redo log: " : "Removing redo log encryption: ";
|
||||
goto same_size;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (srv_encrypt_log == (my_bool)log_sys.is_encrypted())
|
||||
msg= srv_encrypt_log ? "Resizing encrypted" : "Resizing";
|
||||
else
|
||||
msg= srv_encrypt_log
|
||||
? "Encrypting and resizing"
|
||||
: "Removing encryption and resizing";
|
||||
|
||||
{
|
||||
ib::info info;
|
||||
if (srv_log_file_size == 0
|
||||
|| (log_sys.log.format & ~log_t::FORMAT_ENCRYPTED)
|
||||
!= log_t::FORMAT_10_5) {
|
||||
info << "Upgrading redo log: ";
|
||||
} else if (!old_exists
|
||||
|| srv_log_file_size
|
||||
!= srv_log_file_size_requested) {
|
||||
if (srv_encrypt_log
|
||||
== (my_bool)log_sys.is_encrypted()) {
|
||||
info << (srv_encrypt_log
|
||||
? "Resizing encrypted"
|
||||
: "Resizing");
|
||||
} else if (srv_encrypt_log) {
|
||||
info << "Encrypting and resizing";
|
||||
} else {
|
||||
info << "Removing encryption"
|
||||
" and resizing";
|
||||
}
|
||||
ib::info() << msg << " redo log from " << srv_log_file_size << " to "
|
||||
<< srv_log_file_size_requested
|
||||
<< " bytes; LSN=" << flushed_lsn;
|
||||
}
|
||||
}
|
||||
|
||||
info << " redo log from " << srv_log_file_size
|
||||
<< " to ";
|
||||
} else if (srv_encrypt_log) {
|
||||
info << "Encrypting redo log: ";
|
||||
} else {
|
||||
info << "Removing redo log encryption: ";
|
||||
}
|
||||
mysql_mutex_unlock(&log_sys.mutex);
|
||||
|
||||
info << srv_log_file_size_requested
|
||||
<< " bytes; LSN=" << flushed_lsn;
|
||||
}
|
||||
if (flushed_lsn != log_sys.get_flushed_lsn())
|
||||
{
|
||||
log_write_up_to(flushed_lsn, false);
|
||||
log_sys.log.flush();
|
||||
}
|
||||
|
||||
mysql_mutex_unlock(&log_sys.mutex);
|
||||
ut_ad(flushed_lsn == log_sys.get_lsn());
|
||||
ut_ad(!buf_pool.any_io_pending());
|
||||
|
||||
if (flushed_lsn != log_sys.get_flushed_lsn()) {
|
||||
log_write_up_to(flushed_lsn, false);
|
||||
log_sys.log.flush();
|
||||
}
|
||||
|
||||
ut_ad(flushed_lsn == log_sys.get_lsn());
|
||||
|
||||
/* Check if the buffer pools are clean. If not
|
||||
retry till it is clean. */
|
||||
if (ulint pending_io = buf_pool.io_pending()) {
|
||||
count++;
|
||||
/* Print a message every 60 seconds if we
|
||||
are waiting to clean the buffer pools */
|
||||
if (srv_print_verbose_log && count > 600) {
|
||||
ib::info() << "Waiting for "
|
||||
<< pending_io << " buffer "
|
||||
<< "page I/Os to complete";
|
||||
count = 0;
|
||||
}
|
||||
|
||||
os_thread_sleep(100000);
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
DBUG_RETURN(flushed_lsn);
|
||||
DBUG_RETURN(flushed_lsn);
|
||||
}
|
||||
|
||||
/** Tries to locate LOG_FILE_NAME and check it's size, etc
|
||||
|
|
@ -1259,7 +1249,7 @@ dberr_t srv_start(bool create_new_db)
|
|||
ut_ad(buf_page_cleaner_is_active);
|
||||
}
|
||||
|
||||
srv_startup_is_before_trx_rollback_phase = !create_new_db;
|
||||
srv_startup_is_before_trx_rollback_phase = true;
|
||||
|
||||
/* Check if undo tablespaces and redo log files exist before creating
|
||||
a new system tablespace */
|
||||
|
|
@ -1308,7 +1298,6 @@ dberr_t srv_start(bool create_new_db)
|
|||
if (create_new_db) {
|
||||
flushed_lsn = log_sys.get_lsn();
|
||||
log_sys.set_flushed_lsn(flushed_lsn);
|
||||
buf_flush_sync();
|
||||
|
||||
err = create_log_file(true, flushed_lsn, logfile0);
|
||||
|
||||
|
|
@ -1371,6 +1360,9 @@ dberr_t srv_start(bool create_new_db)
|
|||
if (!log_set_capacity(srv_log_file_size_requested)) {
|
||||
return(srv_init_abort(DB_ERROR));
|
||||
}
|
||||
|
||||
/* Enable checkpoints in the page cleaner. */
|
||||
recv_sys.recovery_on = false;
|
||||
}
|
||||
|
||||
file_checked:
|
||||
|
|
@ -2024,11 +2016,8 @@ void innodb_shutdown()
|
|||
break;
|
||||
case SRV_OPERATION_RESTORE:
|
||||
case SRV_OPERATION_RESTORE_EXPORT:
|
||||
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
|
||||
if (!buf_page_cleaner_is_active) {
|
||||
break;
|
||||
}
|
||||
mysql_mutex_lock(&buf_pool.flush_list_mutex);
|
||||
srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
|
||||
while (buf_page_cleaner_is_active) {
|
||||
pthread_cond_signal(&buf_pool.do_flush_list);
|
||||
my_cond_wait(&buf_pool.done_flush_list,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue