mirror of
https://github.com/MariaDB/server.git
synced 2026-05-16 20:07:13 +02:00
MDEV-11799 Doublewrite recovery can corrupt data pages
The purpose of the InnoDB doublewrite buffer is to make InnoDB tolerant against cases where the server was killed in the middle of a page write. (In Linux, killing a process may interrupt a write system call, typically on a 4096-byte boundary.) There may exist multiple copies of a page number in the doublewrite buffer. Recovery should choose the latest valid copy of the page. By design, the FIL_PAGE_LSN must not precede the latest checkpoint LSN nor be later than the end of the recovered log. For page_compressed and encrypted pages, we were missing proper consistency checks. In the 10.4 data set generated for in MDEV-23231, the data file contained a valid page_compressed page, and an identical copy of that page was also present in the doublewrite buffer. But, recovery would incorrectly consider the page invalid and restore an uncompressed copy of the same page that had been written before the log checkpoint. (In fact, no redo log was to be applied to that page.) buf_dblwr_process(): Validate the FIL_PAGE_LSN in the doublewrite buffer pages, and always skip page 0, because those pages should have been recovered by Datafile::restore_from_doublewrite() if necessary. Datafile::restore_from_doublewrite(): Choose the latest applicable page from the doublewrite buffer. recv_dblwr_t::find_page(): Also validate encrypted or page_compressed pages. recv_dblwr_t::validate_page(): New function to validate a page, either a copy in a data file or in the doublewrite buffer. Also validate encrypted or page_compressed pages. This is joint work with Thirunarayanan Balathandayuthapani.
This commit is contained in:
parent
f35d172103
commit
879ba1979b
4 changed files with 164 additions and 114 deletions
|
|
@ -57,6 +57,7 @@ Created 9/20/1997 Heikki Tuuri
|
|||
#include "srv0start.h"
|
||||
#include "trx0roll.h"
|
||||
#include "row0merge.h"
|
||||
#include "fil0pagecompress.h"
|
||||
|
||||
/** Log records are stored in the hash table in chunks at most of this size;
|
||||
this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
|
||||
|
|
@ -3910,6 +3911,8 @@ skip_apply:
|
|||
rescan = true;
|
||||
}
|
||||
|
||||
recv_sys->parse_start_lsn = checkpoint_lsn;
|
||||
|
||||
if (srv_operation == SRV_OPERATION_NORMAL) {
|
||||
buf_dblwr_process();
|
||||
}
|
||||
|
|
@ -4084,26 +4087,91 @@ recv_recovery_rollback_active(void)
|
|||
}
|
||||
}
|
||||
|
||||
/** Find a doublewrite copy of a page.
|
||||
@param[in] space_id tablespace identifier
|
||||
@param[in] page_no page number
|
||||
@return page frame
|
||||
@retval NULL if no page was found */
|
||||
const byte*
|
||||
recv_dblwr_t::find_page(ulint space_id, ulint page_no)
|
||||
bool recv_dblwr_t::validate_page(const page_id_t page_id,
|
||||
const byte *page,
|
||||
const fil_space_t *space,
|
||||
byte *tmp_buf)
|
||||
{
|
||||
const byte *result= NULL;
|
||||
if (page_id.page_no() == 0)
|
||||
{
|
||||
ulint flags= fsp_header_get_flags(page);
|
||||
if (!fsp_flags_is_valid(flags, page_id.space()))
|
||||
{
|
||||
ulint cflags= fsp_flags_convert_from_101(flags);
|
||||
if (cflags == ULINT_UNDEFINED)
|
||||
{
|
||||
ib::warn() << "Ignoring a doublewrite copy of page " << page_id
|
||||
<< "due to invalid flags " << ib::hex(flags);
|
||||
return false;
|
||||
}
|
||||
|
||||
flags= cflags;
|
||||
}
|
||||
|
||||
/* Page 0 is never page_compressed or encrypted. */
|
||||
return !buf_page_is_corrupted(true, page, page_size_t(flags));
|
||||
}
|
||||
|
||||
ut_ad(tmp_buf);
|
||||
byte *tmp_frame= tmp_buf;
|
||||
byte *tmp_page= tmp_buf + srv_page_size;
|
||||
const uint16_t page_type= mach_read_from_2(page + FIL_PAGE_TYPE);
|
||||
const page_size_t page_size(space->flags);
|
||||
const bool expect_encrypted= space->crypt_data &&
|
||||
space->crypt_data->type != CRYPT_SCHEME_UNENCRYPTED;
|
||||
|
||||
if (expect_encrypted &&
|
||||
mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION))
|
||||
{
|
||||
if (!fil_space_verify_crypt_checksum(page, page_size))
|
||||
return false;
|
||||
if (page_type != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
|
||||
return true;
|
||||
if (page_size.is_compressed())
|
||||
return false;
|
||||
memcpy(tmp_page, page, page_size.physical());
|
||||
if (!fil_space_decrypt(space, tmp_frame, tmp_page))
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (page_type) {
|
||||
case FIL_PAGE_PAGE_COMPRESSED:
|
||||
memcpy(tmp_page, page, page_size.physical());
|
||||
/* fall through */
|
||||
case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
|
||||
if (page_size.is_compressed())
|
||||
return false; /* ROW_FORMAT=COMPRESSED cannot be page_compressed */
|
||||
ulint decomp= fil_page_decompress(tmp_frame, tmp_page);
|
||||
if (!decomp)
|
||||
return false; /* decompression failed */
|
||||
if (decomp == srv_page_size)
|
||||
return false; /* the page was not compressed (invalid page type) */
|
||||
return !buf_page_is_corrupted(true, tmp_page, page_size, space);
|
||||
}
|
||||
|
||||
return !buf_page_is_corrupted(true, page, page_size, space);
|
||||
}
|
||||
|
||||
byte *recv_dblwr_t::find_page(const page_id_t page_id,
|
||||
const fil_space_t *space, byte *tmp_buf)
|
||||
{
|
||||
byte *result= NULL;
|
||||
lsn_t max_lsn= 0;
|
||||
|
||||
for (list::const_iterator i = pages.begin(); i != pages.end(); ++i)
|
||||
{
|
||||
const byte *page= *i;
|
||||
if (page_get_page_no(page) != page_no ||
|
||||
page_get_space_id(page) != space_id)
|
||||
byte *page= *i;
|
||||
if (page_get_page_no(page) != page_id.page_no() ||
|
||||
page_get_space_id(page) != page_id.space())
|
||||
continue;
|
||||
const lsn_t lsn= mach_read_from_8(page + FIL_PAGE_LSN);
|
||||
if (lsn <= max_lsn)
|
||||
if (lsn <= max_lsn ||
|
||||
!validate_page(page_id, page, space, tmp_buf))
|
||||
{
|
||||
/* Mark processed for subsequent iterations in buf_dblwr_process() */
|
||||
memset(page + FIL_PAGE_LSN, 0, 8);
|
||||
continue;
|
||||
}
|
||||
max_lsn= lsn;
|
||||
result= page;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue