MDEV-34898 Doublewrite recovery of innodb_checksum_algorithm=full_crc32 encrypted pages does not work

- InnoDB fails to recover the full crc32 encrypted page from
doublewrite buffer. The reason is that buf_dblwr_t::recover()
fails to identify the space id from the page because the page has
been encrypted from FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION bytes.

Fix:
===
buf_dblwr_t::recover(): preserve any pages whose space_id
does not match a known tablespace. These could be encrypted pages
of tablespaces that had been created with
innodb_checksum_algorithm=full_crc32.

buf_page_t::read_complete(): If the page looks corrupted and the
tablespace is encrypted and in full_crc32 format, try to
restore the page from doublewrite buffer.

recv_dblwr_t::recover_encrypted_page(): Find the page which
has the same page number and try to decrypt the page using
space->crypt_data. After decryption, compare the space id.
Write the recovered page back to the file.
This commit is contained in:
Thirunarayanan Balathandayuthapani 2025-01-07 18:39:46 +05:30
parent 6abbfdef7a
commit f8cf493290
11 changed files with 395 additions and 10 deletions

View file

@ -6185,7 +6185,8 @@ static bool xtrabackup_prepare_func(char** argv)
srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
}
if (innodb_init()) {
recv_sys.recovery_on = false;
if (innodb_init()) {
goto error_cleanup;
}

View file

@ -1,3 +1,4 @@
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file");
create table t1(a serial) engine=innoDB;
set global innodb_encrypt_tables=ON;
show variables like 'innodb_encrypt%';

View file

@ -0,0 +1,81 @@
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t[12]\\.ibd looks corrupted");
call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page ");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;
SET GLOBAL innodb_fast_shutdown = 0;
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
# xtrabackup prepare
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_buf_flush_list_now = 1;
# Kill the server
# restart
FOUND 2 /InnoDB: Recovered page \[page id: space=[1-9]*, page number=3\]/ in mysqld.1.err
check table t1;
Table Op Msg_type Msg_text
test.t1 check status OK
check table t2;
Table Op Msg_type Msg_text
test.t2 check status OK
select f1, f2 from t1;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
select f1, f2 from t2;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
SET GLOBAL innodb_fast_shutdown = 0;
# shutdown server
# remove datadir
# xtrabackup move back
# restart: --debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
begin;
insert into t1 values (6, repeat('%', 400));
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_buf_flush_list_now = 1;
# Kill the server
# Corrupt the page 3 in t1.ibd file
# Assign the maximum value to lsn in doublewrite buffer page
# restart
FOUND 1 /InnoDB: Encrypted page \[page id: space=[1-9]*, page number=3\] in file .*test.t1.ibd looks corrupted/ in mysqld.1.err
select * from t1;
ERROR 42000: Unknown storage engine 'InnoDB'
# shutdown server
# remove datadir
# xtrabackup move back
# restart
select * from t1;
f1 f2
1 ############
2 ++++++++++++
3 ////////////
4 ------------
5 ............
drop table t2, t1;

View file

@ -3,6 +3,7 @@
-- source include/innodb_undo_tablespaces.inc
-- source include/not_embedded.inc
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=[0-9]*\\] in file");
if (`select count(*) = 0 from information_schema.plugins
where plugin_name = 'debug_key_management' and plugin_status='active'`)
{

View file

@ -0,0 +1,3 @@
--innodb-use-atomic-writes=0
--innodb-encrypt-tables=FORCE
--innodb_sys_tablespaces

View file

@ -0,0 +1,223 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/not_embedded.inc
--source include/have_example_key_management_plugin.inc
call mtr.add_suppression("InnoDB: Encrypted page \\[page id: space=[1-9][0-9]*, page number=3\\] in file .*test.t[12]\\.ibd looks corrupted");
call mtr.add_suppression("InnoDB: Unable to apply log to corrupted page ");
call mtr.add_suppression("InnoDB: Plugin initialization aborted");
call mtr.add_suppression("Plugin 'InnoDB' init function returned error");
call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed");
let INNODB_PAGE_SIZE=`select @@innodb_page_size`;
let MYSQLD_DATADIR=`select @@datadir`;
let ALGO=`select @@innodb_checksum_algorithm`;
create table t1 (f1 int primary key, f2 blob)page_compressed = 1 engine=innodb stats_persistent=0;
create table t2(f1 int primary key, f2 blob)engine=innodb stats_persistent=0;
start transaction;
insert into t1 values(1, repeat('#',12));
insert into t1 values(2, repeat('+',12));
insert into t1 values(3, repeat('/',12));
insert into t1 values(4, repeat('-',12));
insert into t1 values(5, repeat('.',12));
insert into t2 select * from t1;
commit work;
# Slow shutdown and restart to make sure ibuf merge is finished
SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=;
let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0;
--source include/restart_mysqld.inc
--source ../../suite/innodb/include/no_checkpoint_start.inc
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
select space into @t2_space_id from information_schema.innodb_sys_tablespaces where name='test/t2';
begin;
insert into t1 values (6, repeat('%', 400));
insert into t2 values (6, repeat('%', 400));
# Copy the t1.ibd, t2.ibd file
let $targetdir=$MYSQLTEST_VARDIR/tmp/backup_1;
--disable_result_log
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir;
--enable_result_log
echo # xtrabackup prepare;
--disable_result_log
exec $XTRABACKUP --prepare --target-dir=$targetdir;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t2_space_id;
set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, t2, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc
# Corrupt the page 3 in t1.ibd, t2.ibd file
perl;
use IO::Handle;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $polynomial = 0x82f63b78; # CRC-32C
my $algo = $ENV{ALGO};
die "Unsupported innodb_checksum_algorithm=$algo\n" unless $algo =~ /crc32/;
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd";
my $page_size = $ENV{INNODB_PAGE_SIZE};
my $page;
do "$ENV{MTR_SUITE_DIR}/../innodb/include/crc32.pl";
open(FILE, "+<", $fname) or die;
sysseek(FILE, 3*$page_size, 0);
sysread(FILE, $page, $page_size)==$page_size||die "Unable to read $name\n";
sysseek(FILE, 3*$page_size, 0)||die "Unable to seek $fname\n";
my $corrupted = $page;
# Set FIL_PAGE_LSN to the maximum
substr($corrupted, 16, 8) = chr(255) x 8;
substr($corrupted, $page_size - 8, 8) = chr(255) x 8;
if ($algo =~ /full_crc32/)
{
my $ck = mycrc32(substr($corrupted, 0, $page_size - 4), 0, $polynomial);
substr($corrupted, $page_size - 4, 4) = pack("N", $ck);
}
else
{
# Replace the innodb_checksum_algorithm=crc32 checksum
my $ck= pack("N",
mycrc32(substr($corrupted, 4, 22), 0, $polynomial) ^
mycrc32(substr($corrupted_, 38, $page_size - 38 - 8), 0,
$polynomial));
substr ($corrupted, 0, 4) = $ck;
substr ($corrupted, $page_size - 8, 4) = $ck;
}
syswrite(FILE, $corrupted);
close FILE;
# Zero the complete page
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t2.ibd";
open(FILE, "+<", $fname) or die;
FILE->autoflush(1);
binmode FILE;
sysseek(FILE, 3*$page_size, 0);
print FILE chr(0) x ($ENV{'INNODB_PAGE_SIZE'});
close FILE;
EOF
# Successful recover from doublewrite buffer
let $restart_parameters=;
--source include/start_mysqld.inc
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_PATTERN=InnoDB: Recovered page \\[page id: space=[1-9]*, page number=3\\];
--source include/search_pattern_in_file.inc
check table t1;
check table t2;
select f1, f2 from t1;
select f1, f2 from t2;
SET GLOBAL innodb_fast_shutdown = 0;
let $shutdown_timeout=;
let $restart_parameters=--debug_dbug=+d,ib_log_checkpoint_avoid_hard --innodb_flush_sync=0;
--source ../../mariabackup/include/restart_and_restore.inc
--source ../../suite/innodb/include/no_checkpoint_start.inc
select space into @t1_space_id from information_schema.innodb_sys_tablespaces where name='test/t1';
begin;
insert into t1 values (6, repeat('%', 400));
set global innodb_saved_page_number_debug = 3;
set global innodb_fil_make_page_dirty_debug = @t1_space_id;
set global innodb_buf_flush_list_now = 1;
--let CLEANUP_IF_CHECKPOINT=drop table t1, unexpected_checkpoint;
--source ../../suite/innodb/include/no_checkpoint_end.inc
--echo # Corrupt the page 3 in t1.ibd file
--echo # Assign the maximum value to lsn in doublewrite buffer page
perl;
use IO::Handle;
do "$ENV{MTR_SUITE_DIR}/include/crc32.pl";
my $polynomial = 0x82f63b78; # CRC-32C
my $algo = $ENV{ALGO};
die "Unsupported innodb_checksum_algorithm=$algo\n" unless $algo =~ /crc32/;
my $fname= "$ENV{'MYSQLD_DATADIR'}test/t1.ibd";
my $page_size = $ENV{INNODB_PAGE_SIZE};
my $page;
do "$ENV{MTR_SUITE_DIR}/../innodb/include/crc32.pl";
open(FILE, "+<", $fname) or die;
sysseek(FILE, 3*$page_size, 0);
sysread(FILE, $page, $page_size)==$page_size||die "Unable to read $name\n";
sysseek(FILE, 3*$page_size, 0)||die "Unable to seek $fname\n";
my $corrupted = $page;
# Set FIL_PAGE_LSN to the maximum
substr($corrupted, 16, 8) = chr(255) x 8;
substr($corrupted, $page_size - 8, 8) = chr(255) x 8;
if ($algo =~ /full_crc32/)
{
my $ck = mycrc32(substr($corrupted, 0, $page_size - 4), 0, $polynomial);
substr($corrupted, $page_size - 4, 4) = pack("N", $ck);
}
else
{
# Replace the innodb_checksum_algorithm=crc32 checksum
my $ck= pack("N",
mycrc32(substr($corrupted, 4, 22), 0, $polynomial) ^
mycrc32(substr($corrupted_, 38, $page_size - 38 - 8), 0,
$polynomial));
substr ($corrupted, 0, 4) = $ck;
substr ($corrupted, $page_size - 8, 4) = $ck;
}
syswrite(FILE, $corrupted);
close FILE;
# Change the page lsn to maximum value
open(FILE, "+<", "$ENV{MYSQLD_DATADIR}ibdata1")||die "cannot open ibdata1\n";
sysseek(FILE, 6 * $page_size - 190, 0)||die "Unable to seek ibdata1\n";
sysread(FILE, $_, 12) == 12||die "Unable to read TRX_SYS\n";
my($magic,$d1,$d2)=unpack "NNN", $_;
die "magic=$magic, $d1, $d2\n" unless $magic == 536853855 && $d2 >= $d1 + 64;
sysseek(FILE, $d1 * $page_size, 0)||die "Unable to seek ibdata1\n";
# Find the page in the doublewrite buffer
for (my $d = $d1; $d < $d2 + 64; $d++)
{
sysread(FILE, $_, $page_size)==$page_size||die "Cannot read doublewrite\n";
next unless $_ eq $page;
sysseek(FILE, $d * $page_size, 0)||die "Unable to seek ibdata1\n";
substr($_, 16, 8) = chr(255) x 8;
if ($algo =~ /full_crc32/)
{
my $ck = mycrc32(substr($_, 0, $page_size - 4), 0, $polynomial);
substr($_, $page_size - 4, 4) = pack("N", $ck);
}
else
{
# Replace the innodb_checksum_algorithm=crc32 checksum
my $ck= pack("N",
mycrc32(substr($_, 4, 22), 0, $polynomial) ^
mycrc32(substr($_, 38, $page_size - 38 - 8), 0,
$polynomial));
substr ($_, 0, 4) = $ck;
substr ($_, $page_size - 8, 4) = $ck;
}
syswrite(FILE, $_, $page_size)==$page_size||die;
close(FILE);
exit 0;
}
die "Did not find the page in the doublewrite buffer ($d1,$d2)\n";
EOF
let $restart_parameters=;
--source include/start_mysqld.inc
let SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err;
let SEARCH_PATTERN=InnoDB: Encrypted page \\[page id: space=[1-9]*, page number=3\\] in file .*test.t1.ibd looks corrupted;
--source include/search_pattern_in_file.inc
--error ER_UNKNOWN_STORAGE_ENGINE
select * from t1;
--source ../../mariabackup/include/restart_and_restore.inc
select * from t1;
drop table t2, t1;

View file

@ -3767,6 +3767,16 @@ database_corrupted_compressed:
if (err == DB_PAGE_CORRUPTED || err == DB_DECRYPTION_FAILED)
{
release_page:
if (node.space->full_crc32() && node.space->crypt_data &&
recv_recovery_is_on() &&
recv_sys.dblwr.find_encrypted_page(node, id().page_no(),
const_cast<byte*>(read_frame)))
{
/* Recover from doublewrite buffer */
err= DB_SUCCESS;
goto success_page;
}
if (recv_sys.free_corrupted_page(expected_id, node));
else if (err == DB_FAIL)
err= DB_PAGE_CORRUPTED;
@ -3788,6 +3798,7 @@ release_page:
buf_pool.corrupted_evict(this, buf_page_t::READ_FIX);
return err;
}
success_page:
const bool recovery= recv_recovery_is_on();

View file

@ -370,6 +370,7 @@ void buf_dblwr_t::recover()
srv_page_size));
byte *const buf= read_buf + srv_page_size;
std::deque<byte*> encrypted_pages;
for (recv_dblwr_t::list::iterator i= recv_sys.dblwr.pages.begin();
i != recv_sys.dblwr.pages.end(); ++i, ++page_no_dblwr)
{
@ -385,8 +386,16 @@ void buf_dblwr_t::recover()
fil_space_t *space= fil_space_t::get(space_id);
if (!space)
/* The tablespace that this page once belonged to does not exist */
{
/* These pages does not appear to belong to any tablespace.
There is a possibility that this page could be
encrypted using full_crc32 format. If innodb encounters
any corrupted encrypted page during recovery then
InnoDB should use this page to find the valid page.
See find_encrypted_page() */
encrypted_pages.push_back(*i);
continue;
}
if (UNIV_UNLIKELY(page_no >= space->get_size()))
{
@ -465,6 +474,8 @@ next_page:
}
recv_sys.dblwr.pages.clear();
for (byte *page : encrypted_pages)
recv_sys.dblwr.pages.push_back(page);
fil_flush_file_spaces();
aligned_free(read_buf);
}

View file

@ -150,6 +150,16 @@ struct recv_dblwr_t
const fil_space_t *space= nullptr,
byte *tmp_buf= nullptr) const noexcept;
/** Find the doublewrite copy of an encrypted page with the
smallest FIL_PAGE_LSN that is large enough for recovery.
@param space tablespace object
@param page_no page number to find
@param buf buffer for unencrypted page
@return buf
@retval nullptr if the page was not found in doublewrite buffer */
byte *find_encrypted_page(const fil_node_t &space, uint32_t page_no,
byte *buf) noexcept;
/** Restore the first page of the given tablespace from
doublewrite buffer.
1) Find the page which has page_no as 0
@ -257,8 +267,9 @@ private:
during log scan or apply */
bool found_corrupt_fs;
public:
/** whether we are applying redo log records during crash recovery */
bool recovery_on;
/** whether we are applying redo log records during crash recovery.
This is protected by recv_sys.mutex */
Atomic_relaxed<bool> recovery_on= false;
/** whether recv_recover_page(), invoked from buf_page_t::read_complete(),
should apply log records*/
bool apply_log_recs;

View file

@ -3785,6 +3785,7 @@ void recv_sys_t::apply(bool last_batch)
/* We skipped this in buf_page_create(). */
mlog_init.mark_ibuf_exist();
mlog_init.clear();
dblwr.pages.clear();
}
else
{
@ -4841,6 +4842,48 @@ bool recv_dblwr_t::validate_page(const page_id_t page_id, lsn_t max_lsn,
goto check_if_corrupted;
}
byte *recv_dblwr_t::find_encrypted_page(const fil_node_t &node,
uint32_t page_no,
byte *buf) noexcept
{
ut_ad(node.space->crypt_data);
ut_ad(node.space->full_crc32());
mysql_mutex_lock(&recv_sys.mutex);
byte *result_page= nullptr;
for (list::iterator page_it= pages.begin(); page_it != pages.end();
page_it++)
{
if (page_get_page_no(*page_it) != page_no ||
buf_page_is_corrupted(true, *page_it, node.space->flags))
continue;
memcpy(buf, *page_it, node.space->physical_size());
buf_tmp_buffer_t *slot= buf_pool.io_buf_reserve(false);
ut_a(slot);
slot->allocate();
bool invalidate=
!fil_space_decrypt(node.space, slot->crypt_buf, buf) ||
(node.space->is_compressed() &&
!fil_page_decompress(slot->crypt_buf, buf, node.space->flags));
slot->release();
if (invalidate ||
mach_read_from_4(buf + FIL_PAGE_SPACE_ID) != node.space->id)
continue;
result_page= *page_it;
pages.erase(page_it);
break;
}
mysql_mutex_unlock(&recv_sys.mutex);
if (result_page)
sql_print_information("InnoDB: Recovered page [page id: space="
UINT32PF ", page number=" UINT32PF "] "
"to '%s' from the doublewrite buffer.",
uint32_t(node.space->id), page_no,
node.name);
return result_page;
}
const byte *recv_dblwr_t::find_page(const page_id_t page_id, lsn_t max_lsn,
const fil_space_t *space, byte *tmp_buf)
const noexcept

View file

@ -324,8 +324,11 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn,
srv_startup_is_before_trx_rollback_phase = false;
}
/* Enable checkpoints in buf_flush_page_cleaner(). */
/* After disabling recv_no_log_write, enable checkpoints
in buf_flush_page_cleaner(). This could help to avoid
crash during log file resizing */
recv_sys.recovery_on = false;
mysql_mutex_unlock(&log_sys.mutex);
log_make_checkpoint();
@ -1351,8 +1354,7 @@ dberr_t srv_start(bool create_new_db)
return(srv_init_abort(DB_ERROR));
}
/* Enable checkpoints in the page cleaner. */
recv_sys.recovery_on = false;
ut_ad(!recv_sys.recovery_on);
err= recv_recovery_read_max_checkpoint();
@ -1507,8 +1509,6 @@ dberr_t srv_start(bool create_new_db)
: recv_recovery_from_checkpoint_start(flushed_lsn);
recv_sys.close_files();
recv_sys.dblwr.pages.clear();
if (err != DB_SUCCESS) {
return(srv_init_abort(err));
}
@ -1667,7 +1667,6 @@ dberr_t srv_start(bool create_new_db)
<< "Starting to delete and rewrite log file.";
srv_log_file_size = srv_log_file_size_requested;
err = create_log_file(false, flushed_lsn, logfile0);
if (err == DB_SUCCESS) {