From b249a059da2ef03eda35872a763e5453dbc39339 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 4 Oct 2024 13:38:21 +0300 Subject: [PATCH] MDEV-34850: Busy work while parsing FILE_ records In mariadb-backup --backup, we only have to invoke the undo_space_trunc and log_file_op callbacks as well as validate the mini-transaction checksums. There is absolutely no need to access recv_sys.pages or recv_spaces, or to allocate a decrypt_buf in case of innodb_encrypt_log=ON. This is what the new mode recv_sys_t::store::BACKUP will do. In the skip_the_rest: loop, the main thing is to process all FILE_ records until the end of the log is reached. Additionally, we must process INIT_PAGE and FREE_PAGE records in the same way as they would be during storing == YES. This was measured to reduce the CPU time between the messages "InnoDB: Multi-batch recovery needed at LSN" and "InnoDB: End of log at LSN" by some 20%. recv_sys_t::store: A ternary enumeration that specifies how records should be stored: NO, BACKUP, or YES. recv_sys_t::parse(), recv_sys_t::parse_mtr(), recv_sys_t::parse_pmem(): Replace template with template. store_freed_or_init_rec(): Simplify some logic. We can look up also the system tablespace. Reviewed by: Debarun Banerjee --- extra/mariabackup/xtrabackup.cc | 12 +- storage/innobase/include/log0recv.h | 21 ++-- storage/innobase/log/log0recv.cc | 175 ++++++++++++++++------------ 3 files changed, 118 insertions(+), 90 deletions(-) diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index e5d05b230fd..411bb2479d1 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -3405,7 +3405,8 @@ static bool xtrabackup_copy_mmap_logfile() recv_sys_t::parse_mtr_result r; const byte *start= &log_sys.buf[recv_sys.offset]; - if (recv_sys.parse_mmap(false) == recv_sys_t::OK) + if (recv_sys.parse_mmap(false) == + recv_sys_t::OK) { const byte *end; @@ -3425,7 +3426,8 @@ static bool xtrabackup_copy_mmap_logfile() start = seq + 1; } } - while ((r= recv_sys.parse_mmap(false)) == recv_sys_t::OK); + while ((r= recv_sys.parse_mmap(false)) == + recv_sys_t::OK); end= &log_sys.buf[recv_sys.offset]; @@ -3530,7 +3532,8 @@ static bool xtrabackup_copy_logfile() if (log_sys.buf[recv_sys.offset] <= 1) break; - if (recv_sys.parse_mtr(false) == recv_sys_t::OK) + if (recv_sys.parse_mtr(false) == + recv_sys_t::OK) { do { @@ -3540,7 +3543,8 @@ static bool xtrabackup_copy_logfile() sequence_offset)); *seq= 1; } - while ((r= recv_sys.parse_mtr(false)) == recv_sys_t::OK); + while ((r= recv_sys.parse_mtr(false)) == + recv_sys_t::OK); if (ds_write(dst_log_file, log_sys.buf + start_offset, recv_sys.offset - start_offset)) diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 6cf79c857e4..731d7e3ff11 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -390,12 +390,15 @@ public: GOT_OOM }; + /** Whether to store parsed log records */ + enum store{NO,BACKUP,YES}; + private: /** Parse and register one log_t::FORMAT_10_8 mini-transaction. - @tparam store whether to store the records + @tparam storing whether to store the records @param l log data source @param if_exists if store: whether to check if the tablespace exists */ - template + template inline parse_mtr_result parse(source &l, bool if_exists) noexcept; /** Rewind a mini-transaction when parse() runs out of memory. @@ -409,20 +412,20 @@ private: public: /** Parse and register one log_t::FORMAT_10_8 mini-transaction, without handling any log_sys.is_mmap() buffer wrap-around. - @tparam store whether to store the records - @param if_exists if store: whether to check if the tablespace exists */ - template + @tparam storing whether to store the records + @param if_exists storing=YES: whether to check if the tablespace exists */ + template static parse_mtr_result parse_mtr(bool if_exists) noexcept; /** Parse and register one log_t::FORMAT_10_8 mini-transaction, handling log_sys.is_mmap() buffer wrap-around. - @tparam store whether to store the records - @param if_exists if store: whether to check if the tablespace exists */ - template + @tparam storing whether to store the records + @param if_exists storing=YES: whether to check if the tablespace exists */ + template static parse_mtr_result parse_mmap(bool if_exists) noexcept #ifdef HAVE_INNODB_MMAP ; #else - { return parse_mtr(if_exists); } + { return parse_mtr(if_exists); } #endif /** Erase log records for a page. */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 1a49df99f51..f33eb427b54 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2186,15 +2186,8 @@ static void store_freed_or_init_rec(page_id_t page_id, bool freed) uint32_t page_no= page_id.page_no(); if (is_predefined_tablespace(space_id)) { - if (!srv_immediate_scrub_data_uncompressed) - return; - fil_space_t *space; - if (space_id == TRX_SYS_SPACE) - space= fil_system.sys_space; - else - space= fil_space_get(space_id); - - space->free_page(page_no, freed); + if (srv_immediate_scrub_data_uncompressed) + fil_space_get(space_id)->free_page(page_no, freed); return; } @@ -2505,28 +2498,29 @@ void recv_sys_t::rewind(source &l, source &begin) noexcept } /** Parse and register one log_t::FORMAT_10_8 mini-transaction. -@tparam store whether to store the records +@tparam storing whether to store the records @param l log data source @param if_exists if store: whether to check if the tablespace exists */ -template +template inline recv_sys_t::parse_mtr_result recv_sys_t::parse(source &l, bool if_exists) noexcept { restart: - ut_ad(log_sys.latch_have_wr() || - srv_operation == SRV_OPERATION_BACKUP || - srv_operation == SRV_OPERATION_BACKUP_NO_DEFER); + ut_ad(storing == BACKUP || log_sys.latch_have_wr()); + ut_ad(storing == BACKUP || !undo_space_trunc); + ut_ad(storing == BACKUP || !log_file_op); + ut_ad(storing == YES || !if_exists); + ut_ad((storing == BACKUP) == + (srv_operation == SRV_OPERATION_BACKUP || + srv_operation == SRV_OPERATION_BACKUP_NO_DEFER)); mysql_mutex_assert_owner(&mutex); ut_ad(log_sys.next_checkpoint_lsn); ut_ad(log_sys.is_latest()); - ut_ad(store || !if_exists); - ut_ad(store || - srv_operation != SRV_OPERATION_BACKUP || - srv_operation != SRV_OPERATION_BACKUP_NO_DEFER); alignas(8) byte iv[MY_AES_BLOCK_SIZE]; - byte *decrypt_buf= static_cast(alloca(srv_page_size)); + byte *decrypt_buf= storing == YES + ? static_cast(alloca(srv_page_size)) : nullptr; const lsn_t start_lsn{lsn}; @@ -2582,15 +2576,16 @@ restart: crc= my_crc32c(crc, iv, 8); } - DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch", - { - static int c; - if (!c++) + if (storing == BACKUP) + DBUG_EXECUTE_IF("log_intermittent_checksum_mismatch", { - sql_print_information("Invalid log block checksum"); - return GOT_EOF; - } - }); + static int c; + if (!c++) + { + sql_print_information("Invalid log block checksum"); + return GOT_EOF; + } + }); if (crc != (l + 1).read4()) return GOT_EOF; @@ -2668,7 +2663,8 @@ restart: } sql_print_warning("InnoDB: Ignoring malformed log record at LSN " LSN_PF, lsn); - last_offset= 1; /* the next record must not be same_page */ + /* the next record must not be same_page */ + if (storing == YES) last_offset= 1; continue; } if (srv_operation == SRV_OPERATION_BACKUP) @@ -2678,7 +2674,7 @@ restart: lsn, b, l - recs + rlen, space_id, page_no)); goto same_page; } - last_offset= 0; + if (storing == YES) last_offset= 0; idlen= mlog_decode_varint_length(*l); if (UNIV_UNLIKELY(idlen > 5 || idlen >= rlen)) { @@ -2709,17 +2705,21 @@ restart: goto page_id_corrupted; l+= idlen; rlen-= idlen; - mach_write_to_4(iv + 8, space_id); - mach_write_to_4(iv + 12, page_no); + if (storing == YES) + { + mach_write_to_4(iv + 8, space_id); + mach_write_to_4(iv + 12, page_no); + } got_page_op= !(b & 0x80); if (!got_page_op); - else if (!store && srv_operation == SRV_OPERATION_BACKUP) + else if (storing == BACKUP && srv_operation == SRV_OPERATION_BACKUP) { - if (page_no == 0 && first_page_init && (b & 0x10)) + if (page_no == 0 && (b & 0xf0) == INIT_PAGE && first_page_init) first_page_init(space_id); continue; } - else if (store && file_checkpoint && !is_predefined_tablespace(space_id)) + else if (storing == YES && file_checkpoint && + !is_predefined_tablespace(space_id)) { recv_spaces_t::iterator i= recv_spaces.lower_bound(space_id); if (i != recv_spaces.end() && i->first == space_id); @@ -2748,7 +2748,6 @@ restart: if (got_page_op) { same_page: - const byte *cl= l.ptr; if (!rlen); else if (UNIV_UNLIKELY(l - recs + rlen > srv_page_size)) goto record_corrupted; @@ -2756,30 +2755,59 @@ restart: ut_d(if ((b & 0x70) == INIT_PAGE || (b & 0x70) == OPTION) freed.erase(id)); ut_ad(freed.find(id) == freed.end()); + const byte *cl= storing == NO ? nullptr : l.ptr; switch (b & 0x70) { case FREE_PAGE: ut_ad(freed.emplace(id).second); - last_offset= 1; /* the next record must not be same_page */ + /* the next record must not be same_page */ + if (storing == YES) last_offset= 1; goto free_or_init_page; case INIT_PAGE: - last_offset= FIL_PAGE_TYPE; + if (storing == YES) last_offset= FIL_PAGE_TYPE; free_or_init_page: - store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE); + if (storing == BACKUP) + continue; if (UNIV_UNLIKELY(rlen != 0)) goto record_corrupted; + store_freed_or_init_rec(id, (b & 0x70) == FREE_PAGE); + + if (storing == NO) + { + /* We must update mlog_init for the correct operation of + multi-batch recovery, for example to avoid occasional + failures of the test innodb.recovery_memory. + + For storing == YES, this will be invoked in recv_sys_t::add(). */ + mlog_init.add(id, start_lsn); + + /* recv_scan_log() may have stored some log for this page + before entering the skip_the_rest: loop. Such records must + be discarded, because reading an INIT_PAGE or FREE_PAGE + record implies that the page can be recovered based on log + records, without reading it from a data file. */ + + if (pages_it == pages.end() || pages_it->first != id) + { + pages_it= pages.find(id); + if (pages_it == pages.end()) + continue; + } + map::iterator r= pages_it++; + ut_ad(!r->second.being_processed); + erase(r); + continue; + } copy_if_needed: cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); break; case EXTENDED: + if (storing != YES) + continue; if (UNIV_UNLIKELY(!rlen)) goto record_corrupted; cl= l.copy_if_needed(iv, decrypt_buf, recs, rlen); if (rlen == 1 && *cl == TRIM_PAGES) { -#if 0 /* For now, we can only truncate an undo log tablespace */ - if (UNIV_UNLIKELY(!space_id || !page_no)) - goto record_corrupted; -#else if (!srv_is_undo_tablespace(space_id) || page_no != SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) goto record_corrupted; @@ -2790,16 +2818,16 @@ restart: trim({space_id, 0}, start_lsn); truncated_undo_spaces[space_id - srv_undo_space_id_start]= { start_lsn, page_no }; - if (!store && undo_space_trunc) + if (storing == BACKUP && undo_space_trunc) undo_space_trunc(space_id); -#endif - last_offset= 1; /* the next record must not be same_page */ + /* the next record must not be same_page */ + if (storing == YES) last_offset= 1; continue; } - last_offset= FIL_PAGE_TYPE; + if (storing == YES) last_offset= FIL_PAGE_TYPE; break; case OPTION: - if (rlen == 5 && *l == OPT_PAGE_CHECKSUM) + if (storing == YES && rlen == 5 && *l == OPT_PAGE_CHECKSUM) goto copy_if_needed; /* fall through */ case RESERVED: @@ -2807,6 +2835,8 @@ restart: case WRITE: case MEMMOVE: case MEMSET: + if (storing != YES) + continue; if (UNIV_UNLIKELY(rlen == 0 || last_offset == 1)) goto record_corrupted; ut_d(const source payload{l}); @@ -2847,7 +2877,7 @@ restart: last_offset) : file_name_t::initial_flags; if (it == recv_spaces.end()) - ut_ad(!store || space_id == TRX_SYS_SPACE || + ut_ad(space_id == TRX_SYS_SPACE || srv_is_undo_tablespace(space_id)); else if (!it->second.space) { @@ -2907,7 +2937,7 @@ restart: ut_ad(modified.emplace(id).second || (b & 0x70) != INIT_PAGE); } #endif - if (store) + if (storing == YES) { if (if_exists) { @@ -2947,18 +2977,6 @@ restart: } } } - else if ((b & 0x70) <= INIT_PAGE) - { - mlog_init.add(id, start_lsn); - if (pages_it == pages.end() || pages_it->first != id) - { - pages_it= pages.find(id); - if (pages_it == pages.end()) - continue; - } - map::iterator r= pages_it++; - erase(r); - } } else if (rlen) { @@ -2970,7 +2988,7 @@ restart: if (rlen < UNIV_PAGE_SIZE_MAX && !l.is_zero(rlen)) continue; } - else if (store) + else if (storing == YES) { ut_ad(file_checkpoint); continue; @@ -3056,9 +3074,7 @@ restart: if (UNIV_UNLIKELY(!recv_needed_recovery && srv_read_only_mode)) continue; - if (!store && - (srv_operation == SRV_OPERATION_BACKUP || - srv_operation == SRV_OPERATION_BACKUP_NO_DEFER)) + if (storing == BACKUP) { if ((b & 0xf0) < FILE_CHECKPOINT && log_file_op) log_file_op(space_id, b & 0xf0, @@ -3100,22 +3116,23 @@ restart: return OK; } -template +template recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool if_exists) noexcept { recv_buf s{&log_sys.buf[recv_sys.offset]}; - return recv_sys.parse(s, if_exists); + return recv_sys.parse(s, if_exists); } /** for mariadb-backup; @see xtrabackup_copy_logfile() */ template -recv_sys_t::parse_mtr_result recv_sys_t::parse_mtr(bool) noexcept; +recv_sys_t::parse_mtr_result +recv_sys_t::parse_mtr(bool) noexcept; #ifdef HAVE_INNODB_MMAP -template +template recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) noexcept { - recv_sys_t::parse_mtr_result r{parse_mtr(if_exists)}; + recv_sys_t::parse_mtr_result r{parse_mtr(if_exists)}; if (UNIV_LIKELY(r != PREMATURE_EOF) || !log_sys.is_mmap()) return r; ut_ad(recv_sys.len == log_sys.file_size); @@ -3125,12 +3142,13 @@ recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool if_exists) noexcept {recv_sys.offset == recv_sys.len ? &log_sys.buf[log_sys.START_OFFSET] : &log_sys.buf[recv_sys.offset]}; - return recv_sys.parse(s, if_exists); + return recv_sys.parse(s, if_exists); } /** for mariadb-backup; @see xtrabackup_copy_mmap_logfile() */ template -recv_sys_t::parse_mtr_result recv_sys_t::parse_mmap(bool) noexcept; +recv_sys_t::parse_mtr_result +recv_sys_t::parse_mmap(bool) noexcept; #endif /** Apply the hashed log records to the page, if the page lsn is less than the @@ -4097,7 +4115,7 @@ static bool recv_scan_log(bool last_phase) for (;;) { const byte& b{log_sys.buf[recv_sys.offset]}; - r= recv_sys.parse_mmap(false); + r= recv_sys.parse_mmap(false); switch (r) { case recv_sys_t::PREMATURE_EOF: goto read_more; @@ -4127,7 +4145,7 @@ static bool recv_scan_log(bool last_phase) else { ut_ad(recv_sys.file_checkpoint != 0); - switch ((r= recv_sys.parse_mmap(false))) { + switch ((r= recv_sys.parse_mmap(false))) { case recv_sys_t::PREMATURE_EOF: goto read_more; case recv_sys_t::GOT_EOF: @@ -4149,11 +4167,13 @@ static bool recv_scan_log(bool last_phase) if (!store) skip_the_rest: - while ((r= recv_sys.parse_mmap(false)) == recv_sys_t::OK); + while ((r= recv_sys.parse_mmap(false)) == + recv_sys_t::OK); else { uint16_t count= 0; - while ((r= recv_sys.parse_mmap(last_phase)) == recv_sys_t::OK) + while ((r= recv_sys.parse_mmap(last_phase)) == + recv_sys_t::OK) if (!++count && recv_sys.report(time(nullptr))) { const size_t n= recv_sys.pages.size(); @@ -4183,7 +4203,8 @@ static bool recv_scan_log(bool last_phase) ut_ad(recv_sys.is_initialised()); if (recv_sys.scanned_lsn > 1) { - ut_ad(recv_sys.scanned_lsn == recv_sys.lsn); + ut_ad(recv_sys.is_corrupt_fs() || + recv_sys.scanned_lsn == recv_sys.lsn); break; } recv_sys.scanned_lsn= recv_sys.lsn;