MDEV-30179 mariabackup --backup fails with FATAL ERROR: ... failed

to copy datafile

- Mariabackup fails to copy the undo log tablespace when it undergoes
truncation. So Mariabackup should detect the redo log which does
undo tablespace truncation and also backup should read the minimum
file size of the tablespace and ignore the error while reading.

- Throw error when innodb undo tablespace read failed, but backup
doesn't find the redo log for undo tablespace truncation
This commit is contained in:
Thirunarayanan Balathandayuthapani 2023-01-02 13:13:59 +05:30
parent cad33ded19
commit 17858e03a7
5 changed files with 83 additions and 8 deletions

View file

@ -130,6 +130,7 @@ xb_fil_cur_open(
in case of error */
cursor->buf = NULL;
cursor->node = NULL;
cursor->n_process_batch = 0;
cursor->space_id = node->space->id;
@ -374,6 +375,8 @@ xb_fil_cur_result_t xb_fil_cur_read(xb_fil_cur_t* cursor,
return(XB_FIL_CUR_EOF);
}
reinit_buf:
cursor->n_process_batch++;
if (to_read > (ib_int64_t) cursor->buf_size) {
to_read = (ib_int64_t) cursor->buf_size;
}
@ -415,9 +418,27 @@ read_retry:
cursor->buf_page_no = static_cast<unsigned>(offset / page_size);
if (os_file_read(IORequestRead, cursor->file, cursor->buf, offset,
(ulint) to_read) != DB_SUCCESS) {
ret = XB_FIL_CUR_ERROR;
goto func_exit;
(ulint) to_read) != DB_SUCCESS) {
if (!srv_is_undo_tablespace(cursor->space_id)) {
ret = XB_FIL_CUR_ERROR;
goto func_exit;
}
if (cursor->buf_page_no
>= SRV_UNDO_TABLESPACE_SIZE_IN_PAGES) {
ret = XB_FIL_CUR_SKIP;
goto func_exit;
}
to_read = SRV_UNDO_TABLESPACE_SIZE_IN_PAGES * page_size;
if (cursor->n_process_batch > 1) {
ret = XB_FIL_CUR_ERROR;
goto func_exit;
}
space->release();
goto reinit_buf;
}
/* check pages for corruption and re-read if necessary. i.e. in case of
partially written pages */

View file

@ -58,6 +58,7 @@ struct xb_fil_cur_t {
uint thread_n; /*!< thread number for diagnostics */
ulint space_id; /*!< ID of tablespace */
ulint space_size; /*!< space size in pages */
uint32_t n_process_batch;/*!< Number of batch processed */
/** @return whether this is not a file-per-table tablespace */
bool is_system() const

View file

@ -247,6 +247,10 @@ long innobase_file_io_threads = 4;
ulong innobase_read_io_threads = 4;
ulong innobase_write_io_threads = 4;
/** Store the failed read of undo tablespace ids. Protected by
backup mutex */
static std::set<uint32_t> fail_undo_ids;
longlong innobase_page_size = (1LL << 14); /* 16KB */
char* innobase_buffer_pool_filename = NULL;
@ -366,6 +370,10 @@ struct ddl_tracker_t {
static ddl_tracker_t ddl_tracker;
/** Store the space ids of truncated undo log tablespaces. Protected
by recv_sys.mutex */
static std::set<uint32_t> undo_trunc_ids;
// Convert non-null terminated filename to space name
std::string filename_to_spacename(const byte *filename, size_t len);
@ -874,6 +882,10 @@ static void backup_file_op_fail(ulint space_id, bool create,
}
}
static void backup_undo_trunc(uint32_t space_id)
{
undo_trunc_ids.insert(space_id);
}
/*
Retrieve default data directory, to be used with --copy-back.
@ -2780,15 +2792,27 @@ static my_bool xtrabackup_copy_datafile(fil_node_t *node, uint thread_n,
}
/* The main copy loop */
while ((res = xb_fil_cur_read(&cursor, corrupted_pages)) ==
XB_FIL_CUR_SUCCESS) {
while (1) {
res = xb_fil_cur_read(&cursor, corrupted_pages);
if (res == XB_FIL_CUR_ERROR) {
goto error;
}
if (res == XB_FIL_CUR_EOF) {
break;
}
if (!write_filter.process(&write_filt_ctxt, dstfile)) {
goto error;
}
}
if (res == XB_FIL_CUR_ERROR) {
goto error;
if (res == XB_FIL_CUR_SKIP) {
pthread_mutex_lock(&backup_mutex);
fail_undo_ids.insert(
static_cast<uint32_t>(cursor.space_id));
pthread_mutex_unlock(&backup_mutex);
break;
}
}
if (write_filter.finalize
@ -4368,6 +4392,23 @@ static bool xtrabackup_backup_low()
dst_log_file = NULL;
std::vector<uint32_t> failed_ids;
std::set_difference(
fail_undo_ids.begin(), fail_undo_ids.end(),
undo_trunc_ids.begin(), undo_trunc_ids.end(),
std::inserter(failed_ids, failed_ids.begin()));
for (uint32_t id : failed_ids) {
msg("mariabackup: Failed to read undo log "
"tablespace space id %d and there is no undo "
"tablespace truncation redo record.",
id);
}
if (failed_ids.size() > 0) {
return false;
}
if(!xtrabackup_incremental) {
strcpy(metadata_type, "full-backuped");
metadata_from_lsn = 0;
@ -4442,6 +4483,7 @@ static bool xtrabackup_backup_func()
srv_operation = SRV_OPERATION_BACKUP;
log_file_op = backup_file_op;
undo_space_trunc = backup_undo_trunc;
metadata_to_lsn = 0;
/* initialize components */
@ -4450,6 +4492,7 @@ fail:
metadata_to_lsn = log_copying_running;
stop_backup_threads();
log_file_op = NULL;
undo_space_trunc = NULL;
if (dst_log_file) {
ds_close(dst_log_file);
dst_log_file = NULL;
@ -4741,6 +4784,7 @@ fail_before_log_copying_thread_start:
innodb_shutdown();
log_file_op = NULL;
undo_space_trunc = NULL;
pthread_mutex_destroy(&backup_mutex);
pthread_cond_destroy(&scanned_lsn_cond);
if (!corrupted_pages.empty()) {

View file

@ -90,6 +90,11 @@ extern void (*log_file_op)(ulint space_id, bool create,
const byte* name, ulint len,
const byte* new_name, ulint new_len);
/** Report an operation which does undo log tablespace truncation
during backup
@param space_id undo tablespace identifier */
extern void (*undo_space_trunc)(uint32_t space_id);
/** Stored redo log record */
struct log_rec_t
{

View file

@ -596,6 +596,8 @@ void (*log_file_op)(ulint space_id, bool create,
const byte* name, ulint len,
const byte* new_name, ulint new_len);
void (*undo_space_trunc)(uint32_t space_id);
/** Information about initializing page contents during redo log processing.
FIXME: Rely on recv_sys.pages! */
class mlog_init_t
@ -1949,6 +1951,8 @@ same_page:
TRX_SYS_MAX_UNDO_SPACES, "compatibility");
truncated_undo_spaces[space_id - srv_undo_space_id_start]=
{ recovered_lsn, page_no };
if (undo_space_trunc)
undo_space_trunc(space_id);
#endif
last_offset= 1; /* the next record must not be same_page */
continue;