MDEV-34850: Busy work while parsing FILE_ records

In mariadb-backup --backup, we only have to invoke the undo_space_trunc
and log_file_op callbacks as well as validate the mini-transaction
checksums. There is absolutely no need to access recv_sys.pages or
recv_spaces, or to allocate a decrypt_buf in case of innodb_encrypt_log=ON.
This is what the new mode recv_sys_t::store::BACKUP will do.

In the skip_the_rest: loop, the main thing is to process all FILE_ records
until the end of the log is reached.  Additionally, we must process
INIT_PAGE and FREE_PAGE records in the same way as they would be
during storing == YES.

This was measured to reduce the CPU time between the messages
"InnoDB: Multi-batch recovery needed at LSN" and
"InnoDB: End of log at LSN"
by some 20%.

recv_sys_t::store: A ternary enumeration that specifies how records
should be stored: NO, BACKUP, or YES.

recv_sys_t::parse(), recv_sys_t::parse_mtr(), recv_sys_t::parse_pmem():
Replace template<bool store> with template<store storing>.

store_freed_or_init_rec(): Simplify some logic. We can look up also
the system tablespace.

Reviewed by: Debarun Banerjee
This commit is contained in:
Marko Mäkelä 2024-10-04 13:38:21 +03:00
parent 590718cadc
commit b249a059da
3 changed files with 118 additions and 90 deletions

View file

@ -3405,7 +3405,8 @@ static bool xtrabackup_copy_mmap_logfile()
recv_sys_t::parse_mtr_result r;
const byte *start= &log_sys.buf[recv_sys.offset];
if (recv_sys.parse_mmap<false>(false) == recv_sys_t::OK)
if (recv_sys.parse_mmap<recv_sys_t::store::BACKUP>(false) ==
recv_sys_t::OK)
{
const byte *end;
@ -3425,7 +3426,8 @@ static bool xtrabackup_copy_mmap_logfile()
start = seq + 1;
}
}
while ((r= recv_sys.parse_mmap<false>(false)) == recv_sys_t::OK);
while ((r= recv_sys.parse_mmap<recv_sys_t::store::BACKUP>(false)) ==
recv_sys_t::OK);
end= &log_sys.buf[recv_sys.offset];
@ -3530,7 +3532,8 @@ static bool xtrabackup_copy_logfile()
if (log_sys.buf[recv_sys.offset] <= 1)
break;
if (recv_sys.parse_mtr<false>(false) == recv_sys_t::OK)
if (recv_sys.parse_mtr<recv_sys_t::store::BACKUP>(false) ==
recv_sys_t::OK)
{
do
{
@ -3540,7 +3543,8 @@ static bool xtrabackup_copy_logfile()
sequence_offset));
*seq= 1;
}
while ((r= recv_sys.parse_mtr<false>(false)) == recv_sys_t::OK);
while ((r= recv_sys.parse_mtr<recv_sys_t::store::BACKUP>(false)) ==
recv_sys_t::OK);
if (ds_write(dst_log_file, log_sys.buf + start_offset,
recv_sys.offset - start_offset))

View file

@ -390,12 +390,15 @@ public:
GOT_OOM
};
/** Whether to store parsed log records */
enum store{NO,BACKUP,YES};
private:
/** Parse and register one log_t::FORMAT_10_8 mini-transaction.
@tparam store whether to store the records
@tparam storing whether to store the records
@param l log data source
@param if_exists if store: whether to check if the tablespace exists */
template<typename source,bool store>
template<typename source,store storing>
inline parse_mtr_result parse(source &l, bool if_exists) noexcept;
/** Rewind a mini-transaction when parse() runs out of memory.
@ -409,20 +412,20 @@ private:
public:
/** Parse and register one log_t::FORMAT_10_8 mini-transaction,
without handling any log_sys.is_mmap() buffer wrap-around.
@tparam store whether to store the records
@param if_exists if store: whether to check if the tablespace exists */
template<bool store>
@tparam storing whether to store the records
@param if_exists storing=YES: whether to check if the tablespace exists */
template<store storing>
static parse_mtr_result parse_mtr(bool if_exists) noexcept;
/** Parse and register one log_t::FORMAT_10_8 mini-transaction,
handling log_sys.is_mmap() buffer wrap-around.
@tparam store whether to store the records
@param if_exists if store: whether to check if the tablespace exists */
template<bool store>
@tparam storing whether to store the records
@param if_exists storing=YES: whether to check if the tablespace exists */
template<store storing>
static parse_mtr_result parse_mmap(bool if_exists) noexcept
#ifdef HAVE_INNODB_MMAP
;
#else
{ return parse_mtr<store>(if_exists); }
{ return parse_mtr<storing>(if_exists); }
#endif
/** Erase log records for a page. */

View file

@ -2186,15 +2186,8 @@ static void store_freed_or_init_rec(page_id_t page_id, bool freed)
uint32_t page_no= page_id.page_no();
if (is_predefined_tablespace(space_id))
{
if (!srv_immediate_scrub_data_uncompressed)
return;
fil_space_t *space;
if (space_id == TRX_SYS_SPACE)
space= fil_system.sys_space;
else
space= fil_space_get(space_id);
space->free_page(page_no, freed);
if (srv_immediate_scrub_data_uncompressed)
fil_space_get(space_id)->free_page(page_no, freed);
return;
}
@ -2505,28 +2498,29 @@ void recv_sys_t::rewind(source &l, source &begin) noexcept
}
/** Parse and register one log_t::FORMAT_10_8 mini-transaction.
@tparam store whether to store the records
@tparam storing whether to store the records
@param l log data source
@param if_exists if store: whether to check if the tablespace exists */
template<typename source,bool store>
template<typename source,recv_sys_t::store storing>
inline
recv_sys_t::parse_mtr_result recv_sys_t::parse(source &l, bool if_exists)
noexcept
{
restart:
ut_ad(log_sys.latch_have_wr() ||
srv_operation == SRV_OPERATION_BACKUP ||
srv_operation == SRV_OPERATION_BACKUP_NO_DEFER);
ut_ad(storing == BACKUP || log_sys.latch_have_wr());
ut_ad(storing == BACKUP || !undo_space_trunc);
ut_ad(storing == BACKUP || !log_file_op);
ut_ad(storing == YES || !if_exists);
ut_ad((storing == BACKUP) ==
(srv_operation == SRV_OPERATION_BACKUP ||
srv_operation == SRV_OPERATION_BACKUP_NO_DEFER));
mysql_mutex_assert_owner(&mutex);
ut_ad(log_sys.next_checkpoint_lsn);
ut_ad(log_sys.is_latest());
ut_ad(store || !if_exists);
ut_ad(store ||
srv_operation != SRV_OPERATION_BACKUP ||
srv_operation != SRV_OPERATION_BACKUP_NO_DEFER);
alignas(8) byte iv[MY_AES_BLOCK_SIZE];
byte *decrypt_buf= static_cast<byte*>(alloca(srv_page_size));
byte *decrypt_buf= storing == YES
? static_cast<byte*>(alloca(srv_page_size)) : nullptr;
const lsn_t start_lsn{lsn};
@ -2582,15 +2576,16 @@ restart:
crc= my_crc32c(crc, iv, 8);
}
DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch",
{
static int c;
if (!c++)
if (storing == BACKUP)
DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch",
{
sql_print_information("Invalid log block checksum");
return GOT_EOF;
}
});
static int c;
if (!c++)
{
sql_print_information("Invalid log block checksum");
return GOT_EOF;
}
});
if (crc != (l + 1).read4())
return GOT_EOF;
@ -2668,7 +2663,8 @@ restart:
}
sql_print_warning("InnoDB: Ignoring malformed log record at LSN "
LSN_PF, lsn);
last_offset= 1; /* the next record must not be same_page */
/* the next record must not be same_page */
if (storing == YES) last_offset= 1;
continue;
}
if (srv_operation == SRV_OPERATION_BACKUP)
@ -2678,7 +2674,7 @@ restart:
lsn, b, l - recs + rlen, space_id, page_no));
goto same_page;
}
last_offset= 0;
if (storing == YES) last_offset= 0;
idlen= mlog_decode_varint_length(*l);
if (UNIV_UNLIKELY(idlen > 5 || idlen >= rlen))
{
@ -2709,17 +2705,21 @@ restart:
goto page_id_corrupted;
l+= idlen;
rlen-= idlen;
mach_write_to_4(iv + 8, space_id);
mach_write_to_4(iv + 12, page_no);
if (storing == YES)
{
mach_write_to_4(iv + 8, space_id);
mach_write_to_4(iv + 12, page_no);
}
got_page_op= !(b & 0x80);
if (!got_page_op);
else if (!store && srv_operation == SRV_OPERATION_BACKUP)
else if (storing == BACKUP && srv_operation == SRV_OPERATION_BACKUP)
{
if (page_no == 0 && first_page_init && (b & 0x10))
if (page_no == 0 && (b & 0xf0) == INIT_PAGE && first_page_init)
first_page_init(space_id);
continue;
}
else if (store && file_checkpoint && !is_predefined_tablespace(space_id))
else if (storing == YES && file_checkpoint &&
!is_predefined_tablespace(space_id))
{
recv_spaces_t::iterator i= recv_spaces.lower_bound(space_id);
if (i != recv_spaces.end() && i->first == space_id);
@ -2748,7 +2748,6 @@ restart:
if (got_page_op)
{
same_page:
const byte *cl= l.ptr;
if (!rlen);
else if (UNIV_UNLIKELY(l - recs + rlen > srv_page_size))
goto record_corrupted;
@ -2756,30 +2755,59 @@ restart:
ut_d(if ((b & 0x70) == INIT_PAGE || (b & 0x70) == OPTION)
freed.erase(id));
ut_ad(freed.find(id) == freed.end());
const byte *cl= storing == NO ? nullptr : l.ptr;
switch (b & 0x70) {
case FREE_PAGE:
ut_ad(freed.emplace(id).second);
last_offset= 1; /* the next record must not be same_page */
/* the next record must not be same_page */
if (storing == YES) last_offset= 1;
goto free_or_init_page;
case INIT_PAGE:
last_offset= FIL_PAGE_TYPE;
if (storing == YES) last_offset= FIL_PAGE_TYPE;
free_or_init_page:
store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE);
if (storing == BACKUP)
continue;
if (UNIV_UNLIKELY(rlen != 0))
goto record_corrupted;
store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE);
if (storing == NO)
{
/* We must update mlog_init for the correct operation of
multi-batch recovery, for example to avoid occasional
failures of the test innodb.recovery_memory.
For storing == YES, this will be invoked in recv_sys_t::add(). */
mlog_init.add(id, start_lsn);
/* recv_scan_log() may have stored some log for this page
before entering the skip_the_rest: loop. Such records must
be discarded, because reading an INIT_PAGE or FREE_PAGE
record implies that the page can be recovered based on log
records, without reading it from a data file. */
if (pages_it == pages.end() || pages_it->first != id)
{
pages_it= pages.find(id);
if (pages_it == pages.end())
continue;
}
map::iterator r= pages_it++;
ut_ad(!r->second.being_processed);
erase(r);
continue;
}
copy_if_needed:
cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen);
break;
case EXTENDED:
if (storing != YES)
continue;
if (UNIV_UNLIKELY(!rlen))
goto record_corrupted;
cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen);
if (rlen == 1 && *cl == TRIM_PAGES)
{
#if 0 /* For now, we can only truncate an undo log tablespace */
if (UNIV_UNLIKELY(!space_id || !page_no))
goto record_corrupted;
#else
if (!srv_is_undo_tablespace(space_id) ||
page_no != SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
goto record_corrupted;
@ -2790,16 +2818,16 @@ restart:
trim({space_id, 0}, start_lsn);
truncated_undo_spaces[space_id - srv_undo_space_id_start]=
{ start_lsn, page_no };
if (!store && undo_space_trunc)
if (storing == BACKUP && undo_space_trunc)
undo_space_trunc(space_id);
#endif
last_offset= 1; /* the next record must not be same_page */
/* the next record must not be same_page */
if (storing == YES) last_offset= 1;
continue;
}
last_offset= FIL_PAGE_TYPE;
if (storing == YES) last_offset= FIL_PAGE_TYPE;
break;
case OPTION:
if (rlen == 5 && *l == OPT_PAGE_CHECKSUM)
if (storing == YES && rlen == 5 && *l == OPT_PAGE_CHECKSUM)
goto copy_if_needed;
/* fall through */
case RESERVED:
@ -2807,6 +2835,8 @@ restart:
case WRITE:
case MEMMOVE:
case MEMSET:
if (storing != YES)
continue;
if (UNIV_UNLIKELY(rlen == 0 || last_offset == 1))
goto record_corrupted;
ut_d(const source payload{l});
@ -2847,7 +2877,7 @@ restart:
last_offset)
: file_name_t::initial_flags;
if (it == recv_spaces.end())
ut_ad(!store || space_id == TRX_SYS_SPACE ||
ut_ad(space_id == TRX_SYS_SPACE ||
srv_is_undo_tablespace(space_id));
else if (!it->second.space)
{
@ -2907,7 +2937,7 @@ restart:
ut_ad(modified.emplace(id).second || (b & 0x70) != INIT_PAGE);
}
#endif
if (store)
if (storing == YES)
{
if (if_exists)
{
@ -2947,18 +2977,6 @@ restart:
}
}
}
else if ((b & 0x70) <= INIT_PAGE)
{
mlog_init.add(id, start_lsn);
if (pages_it == pages.end() || pages_it->first != id)
{
pages_it= pages.find(id);
if (pages_it == pages.end())
continue;
}
map::iterator r= pages_it++;
erase(r);
}
}
else if (rlen)
{
@ -2970,7 +2988,7 @@ restart:
if (rlen < UNIV_PAGE_SIZE_MAX && !l.is_zero(rlen))
continue;
}
else if (store)
else if (storing == YES)
{
ut_ad(file_checkpoint);
continue;
@ -3056,9 +3074,7 @@ restart:
if (UNIV_UNLIKELY(!recv_needed_recovery && srv_read_only_mode))
continue;
if (!store &&
(srv_operation == SRV_OPERATION_BACKUP ||
srv_operation == SRV_OPERATION_BACKUP_NO_DEFER))
if (storing == BACKUP)
{
if ((b & 0xf0) < FILE_CHECKPOINT && log_file_op)
log_file_op(space_id, b & 0xf0,
@ -3100,22 +3116,23 @@ restart:
return OK;
}
template<bool store>
template<recv_sys_t::store storing>
recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool if_exists) noexcept
{
recv_buf s{&log_sys.buf[recv_sys.offset]};
return recv_sys.parse<recv_buf,store>(s, if_exists);
return recv_sys.parse<recv_buf,storing>(s, if_exists);
}
/** for mariadb-backup; @see xtrabackup_copy_logfile() */
template
recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr<false>(bool) noexcept;
recv_sys_t::parse_mtr_result
recv_sys_t::parse_mtr<recv_sys_t::store::BACKUP>(bool) noexcept;
#ifdef HAVE_INNODB_MMAP
template<bool store>
template<recv_sys_t::store storing>
recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) noexcept
{
recv_sys_t::parse_mtr_result r{parse_mtr<store>(if_exists)};
recv_sys_t::parse_mtr_result r{parse_mtr<storing>(if_exists)};
if (UNIV_LIKELY(r != PREMATURE_EOF) || !log_sys.is_mmap())
return r;
ut_ad(recv_sys.len == log_sys.file_size);
@ -3125,12 +3142,13 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) noexcept
{recv_sys.offset == recv_sys.len
? &log_sys.buf[log_sys.START_OFFSET]
: &log_sys.buf[recv_sys.offset]};
return recv_sys.parse<recv_ring,store>(s, if_exists);
return recv_sys.parse<recv_ring,storing>(s, if_exists);
}
/** for mariadb-backup; @see xtrabackup_copy_mmap_logfile() */
template
recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap<false>(bool) noexcept;
recv_sys_t::parse_mtr_result
recv_sys_t::parse_mmap<recv_sys_t::store::BACKUP>(bool) noexcept;
#endif
/** Apply the hashed log records to the page, if the page lsn is less than the
@ -4097,7 +4115,7 @@ static bool recv_scan_log(bool last_phase)
for (;;)
{
const byte& b{log_sys.buf[recv_sys.offset]};
r= recv_sys.parse_mmap<false>(false);
r= recv_sys.parse_mmap<recv_sys_t::store::NO>(false);
switch (r) {
case recv_sys_t::PREMATURE_EOF:
goto read_more;
@ -4127,7 +4145,7 @@ static bool recv_scan_log(bool last_phase)
else
{
ut_ad(recv_sys.file_checkpoint != 0);
switch ((r= recv_sys.parse_mmap<true>(false))) {
switch ((r= recv_sys.parse_mmap<recv_sys_t::store::YES>(false))) {
case recv_sys_t::PREMATURE_EOF:
goto read_more;
case recv_sys_t::GOT_EOF:
@ -4149,11 +4167,13 @@ static bool recv_scan_log(bool last_phase)
if (!store)
skip_the_rest:
while ((r= recv_sys.parse_mmap<false>(false)) == recv_sys_t::OK);
while ((r= recv_sys.parse_mmap<recv_sys_t::store::NO>(false)) ==
recv_sys_t::OK);
else
{
uint16_t count= 0;
while ((r= recv_sys.parse_mmap<true>(last_phase)) == recv_sys_t::OK)
while ((r= recv_sys.parse_mmap<recv_sys_t::store::YES>(last_phase)) ==
recv_sys_t::OK)
if (!++count && recv_sys.report(time(nullptr)))
{
const size_t n= recv_sys.pages.size();
@ -4183,7 +4203,8 @@ static bool recv_scan_log(bool last_phase)
ut_ad(recv_sys.is_initialised());
if (recv_sys.scanned_lsn > 1)
{
ut_ad(recv_sys.scanned_lsn == recv_sys.lsn);
ut_ad(recv_sys.is_corrupt_fs() ||
recv_sys.scanned_lsn == recv_sys.lsn);
break;
}
recv_sys.scanned_lsn= recv_sys.lsn;