MDEV-34483 Backup may copy unnecessarily much log

In mariadb-backup --backup there are multiple mechanisms for ensuring that
a sufficient amount of the InnoDB write-ahead log (ib_logfile0) is being
copied at the end of the backup. The backup needs to include the latest
committed transaction. While further transaction commits are blocked by
BACKUP STAGE BLOCK_COMMIT, ongoing transactions may modify the database
contents and write log records. We were unnecessarily copying such log,
which would also cause further effort of rolling back incomplete
transactions after the backup is restored.

backup_wait_for_lsn(): Declare as static, and refactor some code
to separate functions backup_wait_for_lsn_low() and
backup_wait_timeout().

backup_wait_for_commit_lsn(): A new function to determine the current
LSN (within BACKUP STAGE BLOCK_COMMIT) and to wait for the log to be
copied until that. Invoked by BackupStages::stage_block_commit().

xtrabackup_backup_func(): Remove a condition that had already been
checked by a caller of backup_wait_timeout().

server_lsn_after_lock: Declare as a local variable in
BackupStages::stage_block_ddl().

log_copying_thread(), io_watching_thread(): Use metadata_last_lsn
instead of metadata_to_lsn as the stop condition.

BackupStages::stage_block_commit(): Ensure that the log tables
(in particular, mysql.general_log) will have been copied before
the BACKUP STAGE BLOCK_COMMIT is being followed by any further
SQL statements.

Reviewed by: Debarun Banerjee
Tested by: Matthias Leich
This commit is contained in:
Marko Mäkelä 2024-09-09 16:47:35 +03:00
parent d002b1f503
commit 852d42e993
4 changed files with 140 additions and 81 deletions

View file

@ -1353,9 +1353,6 @@ out:
return(ret);
}
lsn_t server_lsn_after_lock;
extern void backup_wait_for_lsn(lsn_t lsn);
/** Release resources after backup_files() */
void backup_release()
{

View file

@ -3482,35 +3482,70 @@ static bool xtrabackup_copy_logfile()
return false;
}
/**
Wait until redo log copying thread processes given lsn
*/
void backup_wait_for_lsn(lsn_t lsn)
static bool backup_wait_timeout(lsn_t lsn, lsn_t last_lsn)
{
mysql_mutex_lock(&recv_sys.mutex);
for (lsn_t last_lsn{recv_sys.lsn}; last_lsn < lsn; )
if (last_lsn >= lsn)
return true;
msg("Was only able to copy log from " LSN_PF " to " LSN_PF
", not " LSN_PF "; try increasing innodb_log_file_size",
log_sys.next_checkpoint_lsn, last_lsn, lsn);
return false;
}
/**
Wait for enough log to be copied.
@param lsn log sequence number target
@return the reached log sequence number (may be less or more than lsn)
*/
static lsn_t backup_wait_for_lsn_low(lsn_t lsn)
{
mysql_mutex_assert_owner(&recv_sys.mutex);
lsn_t last_lsn{recv_sys.lsn};
if (last_lsn >= lsn)
return last_lsn;
msg("Waiting for log copy thread to read lsn " LSN_PF, lsn);
while (log_copying_running && last_lsn < lsn)
{
timespec abstime;
set_timespec(abstime, 5);
if (my_cond_timedwait(&scanned_lsn_cond, &recv_sys.mutex.m_mutex,
&abstime) &&
last_lsn == recv_sys.lsn)
die("Was only able to copy log from " LSN_PF " to " LSN_PF
", not " LSN_PF "; try increasing innodb_log_file_size",
log_sys.next_checkpoint_lsn, last_lsn, lsn);
break;
last_lsn= recv_sys.lsn;
}
mysql_mutex_unlock(&recv_sys.mutex);
return last_lsn;
}
/**
Wait for enough log to be copied after BACKUP STAGE BLOCK_DDL.
@param lsn log sequence number target
@return whether log_copying_thread() copied everything until the target lsn
*/
static bool backup_wait_for_lsn(lsn_t lsn)
{
if (!lsn)
return true;
mysql_mutex_lock(&recv_sys.mutex);
ut_ad(!metadata_to_lsn);
ut_ad(!metadata_last_lsn);
lsn_t last_lsn{backup_wait_for_lsn_low(lsn)};
mysql_mutex_unlock(&recv_sys.mutex);
return backup_wait_timeout(lsn, last_lsn);
}
extern lsn_t server_lsn_after_lock;
static void log_copying_thread()
{
my_thread_init();
mysql_mutex_lock(&recv_sys.mutex);
while (!xtrabackup_copy_logfile() &&
(!metadata_to_lsn || metadata_to_lsn > recv_sys.lsn))
(!metadata_last_lsn || metadata_last_lsn > recv_sys.lsn))
{
timespec abstime;
set_timespec_nsec(abstime, 1000000ULL * xtrabackup_log_copy_interval);
@ -3534,7 +3569,7 @@ static void io_watching_thread()
mysql_mutex_lock(&recv_sys.mutex);
ut_ad(have_io_watching_thread);
while (log_copying_running && !metadata_to_lsn)
while (log_copying_running && !metadata_last_lsn)
{
timespec abstime;
set_timespec(abstime, 1);
@ -4685,6 +4720,7 @@ end:
static void stop_backup_threads()
{
ut_ad(metadata_last_lsn);
mysql_cond_broadcast(&log_copying_stop);
if (log_copying_running || have_io_watching_thread)
@ -4708,38 +4744,58 @@ static void stop_backup_threads()
mysql_cond_destroy(&log_copying_stop);
}
/**
Wait for enough log to be copied.
@return whether log_copying_thread() copied everything until the target lsn
*/
static bool backup_wait_for_commit_lsn()
{
lsn_t lsn= get_current_lsn(mysql_connection);
mysql_mutex_lock(&recv_sys.mutex);
ut_ad(!metadata_to_lsn);
ut_ad(!metadata_last_lsn);
lsn_t last_lsn= recv_sys.lsn;
/* read the latest checkpoint lsn */
if (recv_sys.find_checkpoint() == DB_SUCCESS && log_sys.is_latest())
{
if (log_sys.next_checkpoint_lsn > lsn)
lsn= log_sys.next_checkpoint_lsn;
metadata_to_lsn= log_sys.next_checkpoint_lsn;
msg("mariabackup: The latest check point (for incremental): '"
LSN_PF "'", metadata_to_lsn);
}
else
{
msg("Error: recv_sys.find_checkpoint() failed.");
metadata_last_lsn= 1;
stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex);
return false;
}
recv_sys.lsn= last_lsn;
ut_ad(metadata_to_lsn);
metadata_last_lsn= lsn;
last_lsn= backup_wait_for_lsn_low(LSN_MAX);
metadata_last_lsn= last_lsn;
stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex);
return backup_wait_timeout(lsn, last_lsn);
}
/** Implement the core of --backup
@return whether the operation succeeded */
bool Backup_datasinks::backup_low()
{
mysql_mutex_lock(&recv_sys.mutex);
ut_ad(!metadata_to_lsn);
/* read the latest checkpoint lsn */
{
const lsn_t lsn = recv_sys.lsn;
if (recv_sys.find_checkpoint() == DB_SUCCESS
&& log_sys.is_latest()) {
metadata_to_lsn = log_sys.next_checkpoint_lsn;
msg("mariabackup: The latest check point"
" (for incremental): '" LSN_PF "'",
metadata_to_lsn);
} else {
msg("Error: recv_sys.find_checkpoint() failed.");
}
recv_sys.lsn = lsn;
stop_backup_threads();
}
if (metadata_to_lsn && xtrabackup_copy_logfile()) {
mysql_mutex_unlock(&recv_sys.mutex);
ds_close(dst_log_file);
dst_log_file = NULL;
return false;
}
mysql_mutex_unlock(&recv_sys.mutex);
ut_d(mysql_mutex_lock(&recv_sys.mutex));
ut_ad(metadata_last_lsn);
ut_ad(metadata_to_lsn);
ut_ad(!log_copying_running);
ut_d(mysql_mutex_unlock(&recv_sys.mutex));
if (ds_close(dst_log_file) || !metadata_to_lsn) {
dst_log_file = NULL;
@ -4756,7 +4812,7 @@ bool Backup_datasinks::backup_low()
for (uint32_t id : failed_ids) {
msg("mariabackup: Failed to read undo log "
"tablespace space id %d and there is no undo "
"tablespace space id " UINT32PF " and there is no undo "
"tablespace truncation redo record.",
id);
}
@ -4843,17 +4899,17 @@ private:
// TODO: this came from the old code, where it was not thread-safe
// too, use separate mysql connection per thread here
DBUG_MARIABACKUP_EVENT("before_copy", node->space->name());
DBUG_EXECUTE_FOR_KEY("wait_innodb_redo_before_copy",
node->space->name(),
backup_wait_for_lsn(
get_current_lsn(mysql_connection)););
DBUG_EXECUTE_FOR_KEY("wait_innodb_redo_before_copy",
node->space->name(),
backup_wait_for_lsn(
get_current_lsn(mysql_connection)););
/* copy the datafile */
if(xtrabackup_copy_datafile(m_backup_datasinks.m_data,
m_backup_datasinks.m_meta,
node, thread_num, NULL,
xtrabackup_incremental
? wf_incremental : wf_write_through,
m_corrupted_pages))
m_backup_datasinks.m_meta,
node, thread_num, NULL,
xtrabackup_incremental
? wf_incremental : wf_write_through,
m_corrupted_pages))
die("mariabackup: Error: failed to copy datafile.");
// TODO: this came from the old code, where it was not thread-safe
// too, use separate mysql connection per thread here
@ -4861,8 +4917,8 @@ private:
m_tasks.finish_task(1);
}
Backup_datasinks &m_backup_datasinks;
CorruptedPages &m_corrupted_pages;
Backup_datasinks &m_backup_datasinks;
CorruptedPages &m_corrupted_pages;
TasksGroup m_tasks;
};
@ -5063,9 +5119,9 @@ class BackupStages {
return false;
}
msg("Waiting for log copy thread to read lsn %llu",
server_lsn_after_lock);
backup_wait_for_lsn(server_lsn_after_lock);
if (!backup_wait_for_lsn(server_lsn_after_lock)) {
return false;
}
corrupted_pages.backup_fix_ddl(backup_datasinks.m_data,
backup_datasinks.m_meta);
@ -5101,11 +5157,28 @@ class BackupStages {
}
// Copy log tables tail
if (!m_common_backup.copy_log_tables(true)) {
if (!m_common_backup.copy_log_tables(true) ||
!m_common_backup.close_log_tables()) {
msg("Error on copy log tables");
return false;
}
// Copy just enough log to cover the latest commit.
// Meanwhile, there could be some active transactions
// that are modifying the database and writing more
// log. Not copying log for them will save busy work
// and avoid some rollback of the incomplete
// transactions after the backup has been restored.
//
// We find the current InnoDB LSN by executing
// SHOW ENGINE INNODB STATUS, which in the case of
// general_log=1, log_output='TABLES'
// would be written to the table mysql.general_log
// that we just finished copying above.
if (!backup_wait_for_commit_lsn()) {
return false;
}
// Copy stats tables
if (!m_common_backup.copy_stats_tables()) {
msg("Error on copy stats tables");
@ -5123,11 +5196,6 @@ class BackupStages {
return false;
}
if (!m_common_backup.close_log_tables()) {
msg("Error on close log tables");
return false;
}
if (!backup_files_from_datadir(backup_datasinks.m_data,
fil_path_to_mysql_datadir,
"aws-kms-key")) {
@ -5269,13 +5337,14 @@ static bool xtrabackup_backup_func()
undo_space_trunc = backup_undo_trunc;
first_page_init = backup_first_page_op;
metadata_to_lsn = 0;
metadata_last_lsn = 0;
/* initialize components */
if(innodb_init_param()) {
fail:
if (log_copying_running) {
mysql_mutex_lock(&recv_sys.mutex);
metadata_to_lsn = 1;
metadata_last_lsn = 1;
stop_backup_threads();
mysql_mutex_unlock(&recv_sys.mutex);
}
@ -5460,14 +5529,9 @@ fail:
xb_data_files_close();
/* Make sure that the latest checkpoint was included */
if (metadata_to_lsn > recv_sys.lsn) {
msg("Error: failed to copy enough redo log ("
"LSN=" LSN_PF "; checkpoint LSN=" LSN_PF ").",
recv_sys.lsn, metadata_to_lsn);
goto fail;
}
ut_ad(!log_copying_running);
ut_ad(metadata_to_lsn <= recv_sys.lsn);
ut_ad(metadata_last_lsn == recv_sys.lsn);
innodb_shutdown();
log_file_op = NULL;
undo_space_trunc = NULL;

View file

@ -1,5 +1,7 @@
CREATE TABLE t(i INT)
ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1;
SET GLOBAL general_log = 0;
TRUNCATE mysql.general_log;
SET GLOBAL general_log = 1;
SET GLOBAL log_output = 'TABLE';
INSERT INTO t VALUES (1);

View file

@ -7,9 +7,9 @@
CREATE TABLE t(i INT)
ENGINE ARIA TRANSACTIONAL=1 ROW_FORMAT=PAGE PAGE_CHECKSUM=1;
--let $general_log_old = `SELECT @@global.general_log`
--let $log_output_old = `SELECT @@global.log_output`
# Truncate the log in order to make the test ./mtr --repeat proof
SET GLOBAL general_log = 0;
TRUNCATE mysql.general_log;
SET GLOBAL general_log = 1;
SET GLOBAL log_output = 'TABLE';
@ -43,7 +43,3 @@ SELECT * FROM mysql.general_log
--rmdir $targetdir
DROP TABLE t;
--disable_query_log
--eval SET GLOBAL general_log = $general_log_old
--eval SET GLOBAL log_output = $log_output_old
--enable_query_log