MDEV-34705: Binlog-in-engine: Implement page checksum

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen 2025-03-21 20:30:37 +01:00
parent 734024d86f
commit c5159124d6
5 changed files with 92 additions and 13 deletions
mysql-test/suite/binlog_in_engine
storage/innobase

View file

@ -21,6 +21,12 @@ Log_name File_size
binlog-000000.ibb 49152
binlog-000001.ibb 262144
binlog-000002.ibb 262144
SET STATEMENT sql_log_bin=0 FOR
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0");
FLUSH BINARY LOGS;
FLUSH BINARY LOGS;
SHOW BINLOG EVENTS IN 'binlog-000000.ibb' LIMIT 1;
ERROR HY000: Error when executing command SHOW BINLOG EVENTS: error reading event data
RESET MASTER;
SHOW BINARY LOGS;
Log_name File_size

View file

@ -26,6 +26,30 @@ FLUSH BINARY LOGS;
--source include/wait_for_engine_binlog.inc
SHOW BINARY LOGS;
# Flush couple logs so we are sure the first file is on disk.
# Corrupt one bit in the first page of the first file to test that crc32
# mismatch is caught.
SET STATEMENT sql_log_bin=0 FOR
CALL mtr.add_suppression("InnoDB: Page corruption in binlog tablespace file page number 0");
FLUSH BINARY LOGS;
FLUSH BINARY LOGS;
--let $file= binlog-000000.ibb
--let $datadir= `SELECT @@datadir`
--let BINLOG_FILE= $datadir/$file
perl;
open F, '+<', $ENV{BINLOG_FILE} or die $!;
sysseek F, 50, 0 or die $!;
my $x;
sysread F, $x, 1 or die $!;
$x= chr(ord($x) ^ (1 <<3));
sysseek F, 50, 0 or die $!;
syswrite F, $x, 1 or die $!;
EOF
--error ER_ERROR_WHEN_EXECUTING_COMMAND
eval SHOW BINLOG EVENTS IN '$file' LIMIT 1;
RESET MASTER;
--let $binlog_name= binlog-000001.ibb
--let $binlog_size= 262144

View file

@ -253,9 +253,7 @@ fsp_binlog_page_fifo::flush_one_page(uint64_t file_no, bool force)
{
File fh= get_fh(file_no);
ut_a(pl->fh >= (File)0);
size_t res= my_pwrite(fh, e->page_buf, srv_page_size,
(uint64_t)page_no << srv_page_size_shift,
MYF(MY_WME));
size_t res= crc32_pwrite_page(fh, e->page_buf, page_no, MYF(MY_WME));
ut_a(res == srv_page_size);
e->flushed_clean= true;
}
@ -576,6 +574,42 @@ fsp_binlog_page_fifo::flush_thread_run()
mysql_mutex_unlock(&m_mutex);
}
size_t
crc32_pwrite_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept
{
const uint32_t payload= (uint32_t)srv_page_size - BINLOG_PAGE_CHECKSUM;
mach_write_to_4(buf + payload, my_crc32c(0, buf, payload));
return my_pwrite(fd, (const uchar *)buf, srv_page_size,
(my_off_t)page_no << srv_page_size_shift, MyFlags);
}
size_t
crc32_pread_page(File fd, byte *buf, uint32_t page_no, myf MyFlags) noexcept
{
size_t res= my_pread(fd, buf, srv_page_size,
(my_off_t)page_no << srv_page_size_shift, MyFlags);
if (UNIV_LIKELY(res == srv_page_size))
{
const uint32_t payload= (uint32_t)srv_page_size - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= mach_read_from_4(buf + payload);
/* Allow a completely zero (empty) page as well. */
if (UNIV_UNLIKELY(crc32 != my_crc32c(0, buf, payload)) &&
(buf[0] != 0 || 0 != memcmp(buf, buf+1, srv_page_size - 1)))
{
res= -1;
my_errno= EIO;
if (MyFlags & MY_WME)
sql_print_error("InnoDB: Page corruption in binlog tablespace file "
"page number %u (invalid crc32 checksum 0x%08X)",
page_no, crc32);
}
}
return res;
}
void
binlog_write_up_to_now() noexcept
{
@ -1208,8 +1242,8 @@ binlog_chunk_reader::fetch_current_page()
continue;
}
size_t res= my_pread(cur_file_handle, page_buffer, srv_page_size,
s.page_no << srv_page_size_shift, MYF(MY_WME));
size_t res= crc32_pread_page(cur_file_handle, page_buffer, s.page_no,
MYF(MY_WME));
if (res == (size_t)-1)
return CHUNK_READER_ERROR;
if (res == 0 && my_errno == HA_ERR_FILE_TOO_SHORT)

View file

@ -518,7 +518,7 @@ static int read_gtid_state_from_page(rpl_binlog_state_base *state,
Returns:
-1 error
0 File is missing (ENOENT)
0 File is missing (ENOENT) or has bad checksum on first page.
1 File found (but may be empty according to out_empty).
*/
int
@ -542,6 +542,15 @@ binlog_recovery::get_header(uint64_t file_no, lsn_t &out_lsn, bool &out_empty)
return -1;
if (read == 0)
return 0;
/*
If the crc32 does not match, the page was not written properly, so treat
it as an empty file.
*/
const uint32_t payload= (uint32_t)srv_page_size - BINLOG_PAGE_CHECKSUM;
uint32_t crc32= mach_read_from_4(page_buf + payload);
if (UNIV_UNLIKELY(crc32 != my_crc32c(0, page_buf, payload)))
return 0;
dummy_state.init();
int res= read_gtid_state_from_page(&dummy_state, page_buf, 0, &header);
if (res <= 0)
@ -826,9 +835,8 @@ binlog_recovery::flush_page() noexcept
if (cur_file_fh < (File)0 &&
open_cur_file())
return true;
size_t res= my_pwrite(cur_file_fh, page_buf, srv_page_size,
(uint64_t)cur_page_no << srv_page_size_shift,
MYF(MY_WME));
size_t res=
crc32_pwrite_page(cur_file_fh, page_buf, cur_page_no, MYF(MY_WME));
if (res != srv_page_size)
return true;
cur_page_offset= 0;
@ -844,7 +852,7 @@ binlog_recovery::zero_out_cur_file()
return;
/* Recover the original size from the current file. */
size_t read= my_pread(cur_file_fh, page_buf, srv_page_size, 0, MYF(0));
size_t read= crc32_pread_page(cur_file_fh, page_buf, 0, MYF(0));
if (read != (size_t)srv_page_size)
{
sql_print_warning("InnoDB: Could not read last binlog file during recovery");
@ -1900,9 +1908,8 @@ read_gtid_state(rpl_binlog_state_base *state, File file, uint32_t page_no,
if (UNIV_UNLIKELY(!page_buf))
return -1;
/* ToDo: Verify checksum, and handle encryption. */
size_t res= my_pread(file, page_buf.get(), srv_page_size,
(uint64_t)page_no << srv_page_size_shift, MYF(MY_WME));
/* ToDo: Handle encryption. */
size_t res= crc32_pread_page(file, page_buf.get(), page_no, MYF(MY_WME));
if (UNIV_UNLIKELY(res == (size_t)-1))
return -1;

View file

@ -34,6 +34,10 @@ InnoDB implementation of binlog.
struct chunk_data_base;
/** Store crc32 checksum at the end of the page */
#define BINLOG_PAGE_CHECKSUM 4
enum fsp_binlog_chunk_types {
/* Zero means no data, effectively EOF. */
FSP_BINLOG_TYPE_EMPTY= 0,
@ -302,6 +306,10 @@ fsp_binlog_release(fsp_binlog_page_entry *page)
binlog_page_fifo->release_page(page);
}
extern size_t crc32_pwrite_page(File fd, byte *buf, uint32_t page_no,
myf MyFlags) noexcept;
extern size_t crc32_pread_page(File fd, byte *buf, uint32_t page_no,
myf MyFlags) noexcept;
extern void binlog_write_up_to_now() noexcept;
extern void fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
uint32_t page_offset, uint32_t len);