2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-06-21 14:21:03 +02:00
|
|
|
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
2019-04-04 20:22:43 +02:00
|
|
|
Copyright (c) 2017, 2019, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 18:25:02 +02:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file include/log0recv.h
|
|
|
|
Recovery
|
|
|
|
|
|
|
|
Created 9/20/1997 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#ifndef log0recv_h
|
|
|
|
#define log0recv_h
|
|
|
|
|
|
|
|
#include "ut0byte.h"
|
|
|
|
#include "buf0types.h"
|
|
|
|
#include "hash0hash.h"
|
|
|
|
#include "log0log.h"
|
2016-08-12 10:17:45 +02:00
|
|
|
#include "mtr0types.h"
|
|
|
|
|
2014-05-05 18:20:28 +02:00
|
|
|
#include <list>
|
2016-09-06 08:43:16 +02:00
|
|
|
#include <vector>
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-07-05 21:09:28 +02:00
|
|
|
/** Is recv_writer_thread active? */
|
|
|
|
extern bool recv_writer_thread_active;
|
|
|
|
|
2016-12-30 14:04:10 +01:00
|
|
|
/** @return whether recovery is currently running. */
|
2019-04-04 20:22:43 +02:00
|
|
|
#define recv_recovery_is_on() UNIV_UNLIKELY(recv_recovery_on)
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-05-26 13:04:19 +02:00
|
|
|
/** Find the latest checkpoint in the log header.
|
|
|
|
@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
|
|
|
|
@return error code or DB_SUCCESS */
|
|
|
|
dberr_t
|
|
|
|
recv_find_max_checkpoint(ulint* max_field)
|
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
|
|
|
|
2019-05-23 10:55:54 +02:00
|
|
|
/** Reduces recv_sys->n_addrs for the corrupted page.
|
|
|
|
This function should called when srv_force_recovery > 0.
|
2019-06-05 13:06:51 +02:00
|
|
|
@param[in] page_id page id of the corrupted page */
|
|
|
|
void recv_recover_corrupt_page(page_id_t page_id);
|
2019-05-23 10:55:54 +02:00
|
|
|
|
2019-04-06 20:23:01 +02:00
|
|
|
/** Apply any buffered redo log to a page that was just read from a data file.
|
|
|
|
@param[in,out] bpage buffer pool page */
|
|
|
|
ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage);
|
2016-12-30 14:04:10 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Start recovering from a redo log checkpoint.
|
|
|
|
@see recv_recovery_from_checkpoint_finish
|
|
|
|
@param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
|
|
|
|
of first system tablespace page
|
|
|
|
@return error code or DB_SUCCESS */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
2016-08-12 10:17:45 +02:00
|
|
|
recv_recovery_from_checkpoint_start(
|
|
|
|
lsn_t flush_lsn);
|
|
|
|
/** Complete recovery from a checkpoint. */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
recv_recovery_from_checkpoint_finish(void);
|
|
|
|
/********************************************************//**
|
|
|
|
Initiates the rollback of active transactions. */
|
|
|
|
void
|
|
|
|
recv_recovery_rollback_active(void);
|
|
|
|
/*===============================*/
|
2017-06-29 21:24:48 +02:00
|
|
|
/** Clean up after recv_sys_init() */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
2017-06-29 21:24:48 +02:00
|
|
|
recv_sys_close();
|
|
|
|
/** Initialize the redo log recovery subsystem. */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
2017-06-29 21:24:48 +02:00
|
|
|
recv_sys_init();
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Frees the recovery system. */
|
|
|
|
void
|
|
|
|
recv_sys_debug_free(void);
|
|
|
|
/*=====================*/
|
MDEV-12548 Initial implementation of Mariabackup for MariaDB 10.2
InnoDB I/O and buffer pool interfaces and the redo log format
have been changed between MariaDB 10.1 and 10.2, and the backup
code has to be adjusted accordingly.
The code has been simplified, and many memory leaks have been fixed.
Instead of the file name xtrabackup_logfile, the file name ib_logfile0
is being used for the copy of the redo log. Unnecessary InnoDB startup and
shutdown and some unnecessary threads have been removed.
Some help was provided by Vladislav Vaintroub.
Parameters have been cleaned up and aligned with those of MariaDB 10.2.
The --dbug option has been added, so that in debug builds,
--dbug=d,ib_log can be specified to enable diagnostic messages
for processing redo log entries.
By default, innodb_doublewrite=OFF, so that --prepare works faster.
If more crash-safety for --prepare is needed, double buffering
can be enabled.
The parameter innodb_log_checksums=OFF can be used to ignore redo log
checksums in --backup.
Some messages have been cleaned up.
Unless --export is specified, Mariabackup will not deal with undo log.
The InnoDB mini-transaction redo log is not only about user-level
transactions; it is actually about mini-transactions. To avoid confusion,
call it the redo log, not transaction log.
We disable any undo log processing in --prepare.
Because MariaDB 10.2 supports indexed virtual columns, the
undo log processing would need to be able to evaluate virtual column
expressions. To reduce the amount of code dependencies, we will not
process any undo log in prepare.
This means that the --export option must be disabled for now.
This also means that the following options are redundant
and have been removed:
xtrabackup --apply-log-only
innobackupex --redo-only
In addition to disabling any undo log processing, we will disable any
further changes to data pages during --prepare, including the change
buffer merge. This means that restoring incremental backups should
reliably work even when change buffering is being used on the server.
Because of this, preparing a backup will not generate any further
redo log, and the redo log file can be safely deleted. (If the
--export option is enabled in the future, it must generate redo log
when processing undo logs and buffered changes.)
In --prepare, we cannot easily know if a partial backup was used,
especially when restoring a series of incremental backups. So, we
simply warn about any missing files, and ignore the redo log for them.
FIXME: Enable the --export option.
FIXME: Improve the handling of the MLOG_INDEX_LOAD record, and write
a test that initiates a backup while an ALGORITHM=INPLACE operation
is creating indexes or rebuilding a table. An error should be detected
when preparing the backup.
FIXME: In --incremental --prepare, xtrabackup_apply_delta() should
ensure that if FSP_SIZE is modified, the file size will be adjusted
accordingly.
2017-06-30 09:49:37 +02:00
|
|
|
|
|
|
|
/** Read a log segment to a buffer.
|
|
|
|
@param[out] buf buffer
|
|
|
|
@param[in] group redo log files
|
2017-11-29 23:56:23 +01:00
|
|
|
@param[in, out] start_lsn in : read area start, out: the last read valid lsn
|
MDEV-12548 Initial implementation of Mariabackup for MariaDB 10.2
InnoDB I/O and buffer pool interfaces and the redo log format
have been changed between MariaDB 10.1 and 10.2, and the backup
code has to be adjusted accordingly.
The code has been simplified, and many memory leaks have been fixed.
Instead of the file name xtrabackup_logfile, the file name ib_logfile0
is being used for the copy of the redo log. Unnecessary InnoDB startup and
shutdown and some unnecessary threads have been removed.
Some help was provided by Vladislav Vaintroub.
Parameters have been cleaned up and aligned with those of MariaDB 10.2.
The --dbug option has been added, so that in debug builds,
--dbug=d,ib_log can be specified to enable diagnostic messages
for processing redo log entries.
By default, innodb_doublewrite=OFF, so that --prepare works faster.
If more crash-safety for --prepare is needed, double buffering
can be enabled.
The parameter innodb_log_checksums=OFF can be used to ignore redo log
checksums in --backup.
Some messages have been cleaned up.
Unless --export is specified, Mariabackup will not deal with undo log.
The InnoDB mini-transaction redo log is not only about user-level
transactions; it is actually about mini-transactions. To avoid confusion,
call it the redo log, not transaction log.
We disable any undo log processing in --prepare.
Because MariaDB 10.2 supports indexed virtual columns, the
undo log processing would need to be able to evaluate virtual column
expressions. To reduce the amount of code dependencies, we will not
process any undo log in prepare.
This means that the --export option must be disabled for now.
This also means that the following options are redundant
and have been removed:
xtrabackup --apply-log-only
innobackupex --redo-only
In addition to disabling any undo log processing, we will disable any
further changes to data pages during --prepare, including the change
buffer merge. This means that restoring incremental backups should
reliably work even when change buffering is being used on the server.
Because of this, preparing a backup will not generate any further
redo log, and the redo log file can be safely deleted. (If the
--export option is enabled in the future, it must generate redo log
when processing undo logs and buffered changes.)
In --prepare, we cannot easily know if a partial backup was used,
especially when restoring a series of incremental backups. So, we
simply warn about any missing files, and ignore the redo log for them.
FIXME: Enable the --export option.
FIXME: Improve the handling of the MLOG_INDEX_LOAD record, and write
a test that initiates a backup while an ALGORITHM=INPLACE operation
is creating indexes or rebuilding a table. An error should be detected
when preparing the backup.
FIXME: In --incremental --prepare, xtrabackup_apply_delta() should
ensure that if FSP_SIZE is modified, the file size will be adjusted
accordingly.
2017-06-30 09:49:37 +02:00
|
|
|
@param[in] end_lsn read area end
|
2017-11-29 23:56:23 +01:00
|
|
|
@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
|
|
|
|
@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
|
|
|
|
bool
|
MDEV-12548 Initial implementation of Mariabackup for MariaDB 10.2
InnoDB I/O and buffer pool interfaces and the redo log format
have been changed between MariaDB 10.1 and 10.2, and the backup
code has to be adjusted accordingly.
The code has been simplified, and many memory leaks have been fixed.
Instead of the file name xtrabackup_logfile, the file name ib_logfile0
is being used for the copy of the redo log. Unnecessary InnoDB startup and
shutdown and some unnecessary threads have been removed.
Some help was provided by Vladislav Vaintroub.
Parameters have been cleaned up and aligned with those of MariaDB 10.2.
The --dbug option has been added, so that in debug builds,
--dbug=d,ib_log can be specified to enable diagnostic messages
for processing redo log entries.
By default, innodb_doublewrite=OFF, so that --prepare works faster.
If more crash-safety for --prepare is needed, double buffering
can be enabled.
The parameter innodb_log_checksums=OFF can be used to ignore redo log
checksums in --backup.
Some messages have been cleaned up.
Unless --export is specified, Mariabackup will not deal with undo log.
The InnoDB mini-transaction redo log is not only about user-level
transactions; it is actually about mini-transactions. To avoid confusion,
call it the redo log, not transaction log.
We disable any undo log processing in --prepare.
Because MariaDB 10.2 supports indexed virtual columns, the
undo log processing would need to be able to evaluate virtual column
expressions. To reduce the amount of code dependencies, we will not
process any undo log in prepare.
This means that the --export option must be disabled for now.
This also means that the following options are redundant
and have been removed:
xtrabackup --apply-log-only
innobackupex --redo-only
In addition to disabling any undo log processing, we will disable any
further changes to data pages during --prepare, including the change
buffer merge. This means that restoring incremental backups should
reliably work even when change buffering is being used on the server.
Because of this, preparing a backup will not generate any further
redo log, and the redo log file can be safely deleted. (If the
--export option is enabled in the future, it must generate redo log
when processing undo logs and buffered changes.)
In --prepare, we cannot easily know if a partial backup was used,
especially when restoring a series of incremental backups. So, we
simply warn about any missing files, and ignore the redo log for them.
FIXME: Enable the --export option.
FIXME: Improve the handling of the MLOG_INDEX_LOAD record, and write
a test that initiates a backup while an ALGORITHM=INPLACE operation
is creating indexes or rebuilding a table. An error should be detected
when preparing the backup.
FIXME: In --incremental --prepare, xtrabackup_apply_delta() should
ensure that if FSP_SIZE is modified, the file size will be adjusted
accordingly.
2017-06-30 09:49:37 +02:00
|
|
|
log_group_read_log_seg(
|
|
|
|
byte* buf,
|
|
|
|
const log_group_t* group,
|
2017-11-29 23:56:23 +01:00
|
|
|
lsn_t* start_lsn,
|
MDEV-12548 Initial implementation of Mariabackup for MariaDB 10.2
InnoDB I/O and buffer pool interfaces and the redo log format
have been changed between MariaDB 10.1 and 10.2, and the backup
code has to be adjusted accordingly.
The code has been simplified, and many memory leaks have been fixed.
Instead of the file name xtrabackup_logfile, the file name ib_logfile0
is being used for the copy of the redo log. Unnecessary InnoDB startup and
shutdown and some unnecessary threads have been removed.
Some help was provided by Vladislav Vaintroub.
Parameters have been cleaned up and aligned with those of MariaDB 10.2.
The --dbug option has been added, so that in debug builds,
--dbug=d,ib_log can be specified to enable diagnostic messages
for processing redo log entries.
By default, innodb_doublewrite=OFF, so that --prepare works faster.
If more crash-safety for --prepare is needed, double buffering
can be enabled.
The parameter innodb_log_checksums=OFF can be used to ignore redo log
checksums in --backup.
Some messages have been cleaned up.
Unless --export is specified, Mariabackup will not deal with undo log.
The InnoDB mini-transaction redo log is not only about user-level
transactions; it is actually about mini-transactions. To avoid confusion,
call it the redo log, not transaction log.
We disable any undo log processing in --prepare.
Because MariaDB 10.2 supports indexed virtual columns, the
undo log processing would need to be able to evaluate virtual column
expressions. To reduce the amount of code dependencies, we will not
process any undo log in prepare.
This means that the --export option must be disabled for now.
This also means that the following options are redundant
and have been removed:
xtrabackup --apply-log-only
innobackupex --redo-only
In addition to disabling any undo log processing, we will disable any
further changes to data pages during --prepare, including the change
buffer merge. This means that restoring incremental backups should
reliably work even when change buffering is being used on the server.
Because of this, preparing a backup will not generate any further
redo log, and the redo log file can be safely deleted. (If the
--export option is enabled in the future, it must generate redo log
when processing undo logs and buffered changes.)
In --prepare, we cannot easily know if a partial backup was used,
especially when restoring a series of incremental backups. So, we
simply warn about any missing files, and ignore the redo log for them.
FIXME: Enable the --export option.
FIXME: Improve the handling of the MLOG_INDEX_LOAD record, and write
a test that initiates a backup while an ALGORITHM=INPLACE operation
is creating indexes or rebuilding a table. An error should be detected
when preparing the backup.
FIXME: In --incremental --prepare, xtrabackup_apply_delta() should
ensure that if FSP_SIZE is modified, the file size will be adjusted
accordingly.
2017-06-30 09:49:37 +02:00
|
|
|
lsn_t end_lsn);
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/********************************************************//**
|
2014-02-26 19:11:54 +01:00
|
|
|
Reset the state of the recovery system variables. */
|
|
|
|
void
|
|
|
|
recv_sys_var_init(void);
|
|
|
|
/*===================*/
|
2017-03-07 16:16:49 +01:00
|
|
|
|
|
|
|
/** Apply the hash table of stored log records to persistent data pages.
|
|
|
|
@param[in] last_batch whether the change buffer merge will be
|
|
|
|
performed as part of the operation */
|
|
|
|
void
|
|
|
|
recv_apply_hashed_log_recs(bool last_batch);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2018-03-13 10:49:30 +01:00
|
|
|
/** Whether to store redo log records to the hash table */
|
|
|
|
enum store_t {
|
|
|
|
/** Do not store redo log records. */
|
|
|
|
STORE_NO,
|
|
|
|
/** Store redo log records. */
|
|
|
|
STORE_YES,
|
|
|
|
/** Store redo log records if the tablespace exists. */
|
|
|
|
STORE_IF_EXISTS
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
/** Adds data from a new log block to the parsing buffer of recv_sys if
|
|
|
|
recv_sys->parse_start_lsn is non-zero.
|
|
|
|
@param[in] log_block log block to add
|
|
|
|
@param[in] scanned_lsn lsn of how far we were able to find
|
|
|
|
data in this log block
|
|
|
|
@return true if more data added */
|
|
|
|
bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
|
|
|
|
|
|
|
|
/** Parse log records from a buffer and optionally store them to a
|
|
|
|
hash table to wait merging to file pages.
|
|
|
|
@param[in] checkpoint_lsn the LSN of the latest checkpoint
|
|
|
|
@param[in] store whether to store page operations
|
|
|
|
@param[in] apply whether to apply the records
|
|
|
|
@return whether MLOG_CHECKPOINT record was seen the first time,
|
|
|
|
or corruption was noticed */
|
|
|
|
bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t store, bool apply);
|
|
|
|
|
|
|
|
/** Moves the parsing buffer data left to the buffer start. */
|
|
|
|
void recv_sys_justify_left_parsing_buf();
|
|
|
|
|
2018-08-16 15:10:18 +02:00
|
|
|
/** Report optimized DDL operation (without redo log),
|
|
|
|
corresponding to MLOG_INDEX_LOAD.
|
2018-08-09 16:06:52 +02:00
|
|
|
@param[in] space_id tablespace identifier
|
|
|
|
*/
|
2018-08-16 15:10:18 +02:00
|
|
|
extern void (*log_optimized_ddl_op)(ulint space_id);
|
|
|
|
|
|
|
|
/** Report backup-unfriendly TRUNCATE operation (with separate log file),
|
|
|
|
corresponding to MLOG_TRUNCATE. */
|
|
|
|
extern void (*log_truncate)();
|
2018-08-09 16:06:52 +02:00
|
|
|
|
|
|
|
/** Report an operation to create, delete, or rename a file during backup.
|
|
|
|
@param[in] space_id tablespace identifier
|
|
|
|
@param[in] flags tablespace flags (NULL if not create)
|
|
|
|
@param[in] name file name (not NUL-terminated)
|
|
|
|
@param[in] len length of name, in bytes
|
|
|
|
@param[in] new_name new file name (NULL if not rename)
|
|
|
|
@param[in] new_len length of new_name, in bytes (0 if NULL) */
|
|
|
|
extern void (*log_file_op)(ulint space_id, const byte* flags,
|
|
|
|
const byte* name, ulint len,
|
|
|
|
const byte* new_name, ulint new_len);
|
2018-03-13 10:49:30 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Block of log record data */
|
|
|
|
struct recv_data_t{
|
|
|
|
recv_data_t* next; /*!< pointer to the next block or NULL */
|
|
|
|
/*!< the log record data is stored physically
|
|
|
|
immediately after this struct, max amount
|
|
|
|
RECV_DATA_BLOCK_SIZE bytes of it */
|
|
|
|
};
|
|
|
|
|
|
|
|
/** Stored log record struct */
|
|
|
|
struct recv_t{
|
2016-08-12 10:17:45 +02:00
|
|
|
mlog_id_t type; /*!< log record type */
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint len; /*!< log record body length in bytes */
|
|
|
|
recv_data_t* data; /*!< chain of blocks containing the log record
|
|
|
|
body */
|
|
|
|
lsn_t start_lsn;/*!< start lsn of the log segment written by
|
|
|
|
the mtr which generated this log record: NOTE
|
|
|
|
that this is not necessarily the start lsn of
|
|
|
|
this log record */
|
|
|
|
lsn_t end_lsn;/*!< end lsn of the log segment written by
|
|
|
|
the mtr which generated this log record: NOTE
|
|
|
|
that this is not necessarily the end lsn of
|
|
|
|
this log record */
|
|
|
|
UT_LIST_NODE_T(recv_t)
|
|
|
|
rec_list;/*!< list of log records for this page */
|
|
|
|
};
|
|
|
|
|
2014-05-05 18:20:28 +02:00
|
|
|
struct recv_dblwr_t {
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Add a page frame to the doublewrite recovery buffer. */
|
2017-01-19 11:06:13 +01:00
|
|
|
void add(byte* page) {
|
2016-08-12 10:17:45 +02:00
|
|
|
pages.push_back(page);
|
|
|
|
}
|
2014-05-05 18:20:28 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Find a doublewrite copy of a page.
|
|
|
|
@param[in] space_id tablespace identifier
|
|
|
|
@param[in] page_no page number
|
|
|
|
@return page frame
|
|
|
|
@retval NULL if no page was found */
|
|
|
|
const byte* find_page(ulint space_id, ulint page_no);
|
2014-05-05 18:20:28 +02:00
|
|
|
|
2017-01-19 11:06:13 +01:00
|
|
|
typedef std::list<byte*, ut_allocator<byte*> > list;
|
2014-05-06 21:13:16 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Recovered doublewrite buffer page frames */
|
|
|
|
list pages;
|
2014-05-05 18:20:28 +02:00
|
|
|
};
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Recovery system data structure */
|
|
|
|
struct recv_sys_t{
|
|
|
|
ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
|
|
|
|
n_addrs, and the state field in each recv_addr
|
|
|
|
struct */
|
|
|
|
ib_mutex_t writer_mutex;/*!< mutex coordinating
|
|
|
|
flushing between recv_writer_thread and
|
|
|
|
the recovery thread. */
|
2016-08-12 10:17:45 +02:00
|
|
|
os_event_t flush_start;/*!< event to acticate
|
|
|
|
page cleaner threads */
|
|
|
|
os_event_t flush_end;/*!< event to signal that the page
|
|
|
|
cleaner has finished the request */
|
|
|
|
buf_flush_t flush_type;/*!< type of the flush request.
|
|
|
|
BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
|
|
|
|
BUF_FLUSH_LIST: flush all of blocks. */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool apply_log_recs;
|
|
|
|
/*!< this is TRUE when log rec application to
|
|
|
|
pages is allowed; this flag tells the
|
|
|
|
i/o-handler if it should do log record
|
|
|
|
application */
|
|
|
|
ibool apply_batch_on;
|
|
|
|
/*!< this is TRUE when a log rec application
|
|
|
|
batch is running */
|
|
|
|
byte* buf; /*!< buffer for parsing log records */
|
|
|
|
ulint len; /*!< amount of data in buf */
|
|
|
|
lsn_t parse_start_lsn;
|
|
|
|
/*!< this is the lsn from which we were able to
|
|
|
|
start parsing log records and adding them to
|
|
|
|
the hash table; zero if a suitable
|
|
|
|
start point not found yet */
|
|
|
|
lsn_t scanned_lsn;
|
|
|
|
/*!< the log data has been scanned up to this
|
|
|
|
lsn */
|
|
|
|
ulint scanned_checkpoint_no;
|
|
|
|
/*!< the log data has been scanned up to this
|
|
|
|
checkpoint number (lowest 4 bytes) */
|
|
|
|
ulint recovered_offset;
|
|
|
|
/*!< start offset of non-parsed log records in
|
|
|
|
buf */
|
|
|
|
lsn_t recovered_lsn;
|
|
|
|
/*!< the log records have been parsed up to
|
|
|
|
this lsn */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool found_corrupt_log;
|
|
|
|
/*!< set when finding a corrupt log
|
|
|
|
block or record, or there is a log
|
|
|
|
parsing buffer overflow */
|
|
|
|
bool found_corrupt_fs;
|
|
|
|
/*!< set when an inconsistency with
|
|
|
|
the file system contents is detected
|
|
|
|
during log scan or apply */
|
|
|
|
lsn_t mlog_checkpoint_lsn;
|
|
|
|
/*!< the LSN of a MLOG_CHECKPOINT
|
|
|
|
record, or 0 if none was parsed */
|
2017-03-07 16:16:49 +01:00
|
|
|
/** the time when progress was last reported */
|
2019-07-24 20:43:19 +02:00
|
|
|
time_t progress_time;
|
2014-02-26 19:11:54 +01:00
|
|
|
mem_heap_t* heap; /*!< memory heap of log records and file
|
|
|
|
addresses*/
|
|
|
|
hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
|
|
|
|
ulint n_addrs;/*!< number of not processed hashed file
|
|
|
|
addresses in the hash table */
|
2014-05-05 18:20:28 +02:00
|
|
|
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 12:43:06 +02:00
|
|
|
/** Undo tablespaces for which truncate has been logged
|
|
|
|
(indexed by id - srv_undo_space_id_start) */
|
|
|
|
struct trunc {
|
|
|
|
/** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
|
|
|
|
lsn_t lsn;
|
|
|
|
/** truncated size of the tablespace, or 0 if not truncated */
|
|
|
|
unsigned pages;
|
|
|
|
} truncated_undo_spaces[127];
|
|
|
|
|
2014-05-05 18:20:28 +02:00
|
|
|
recv_dblwr_t dblwr;
|
2017-03-07 16:16:49 +01:00
|
|
|
|
2018-03-27 10:17:56 +02:00
|
|
|
/** Lastly added LSN to the hash table of log records. */
|
|
|
|
lsn_t last_stored_lsn;
|
|
|
|
|
2017-03-07 16:16:49 +01:00
|
|
|
/** Determine whether redo log recovery progress should be reported.
|
|
|
|
@param[in] time the current time
|
|
|
|
@return whether progress should be reported
|
|
|
|
(the last report was at least 15 seconds ago) */
|
2019-07-24 20:43:19 +02:00
|
|
|
bool report(time_t time)
|
2017-03-07 16:16:49 +01:00
|
|
|
{
|
|
|
|
if (time - progress_time < 15) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
progress_time = time;
|
|
|
|
return true;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
/** The recovery system */
|
|
|
|
extern recv_sys_t* recv_sys;
|
|
|
|
|
|
|
|
/** TRUE when applying redo log records during crash recovery; FALSE
|
|
|
|
otherwise. Note that this is FALSE while a background thread is
|
|
|
|
rolling back incomplete transactions. */
|
2016-08-12 10:17:45 +02:00
|
|
|
extern volatile bool recv_recovery_on;
|
2014-02-26 19:11:54 +01:00
|
|
|
/** If the following is TRUE, the buffer pool file pages must be invalidated
|
|
|
|
after recovery and no ibuf operations are allowed; this becomes TRUE if
|
|
|
|
the log record hash table becomes too full, and log records must be merged
|
|
|
|
to file pages already before the recovery is finished: in this case no
|
|
|
|
ibuf operations are allowed, as they could modify the pages read in the
|
|
|
|
buffer pool before the pages have been recovered to the up-to-date state.
|
|
|
|
|
|
|
|
TRUE means that recovery is running and no operations on the log files
|
|
|
|
are allowed yet: the variable name is misleading. */
|
2016-08-12 10:17:45 +02:00
|
|
|
extern bool recv_no_ibuf_operations;
|
2014-02-26 19:11:54 +01:00
|
|
|
/** TRUE when recv_init_crash_recovery() has been called. */
|
2016-08-12 10:17:45 +02:00
|
|
|
extern bool recv_needed_recovery;
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
|
|
|
|
Protected by log_sys->mutex. */
|
2016-08-12 10:17:45 +02:00
|
|
|
extern bool recv_no_log_write;
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
|
|
|
/** TRUE if buf_page_is_corrupted() should check if the log sequence
|
|
|
|
number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
|
2016-08-12 10:17:45 +02:00
|
|
|
recv_recovery_from_checkpoint_start(). */
|
|
|
|
extern bool recv_lsn_checks_on;
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
|
|
|
|
times! */
|
|
|
|
#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
|
|
|
|
|
|
|
|
/** Size of block reads when the log groups are scanned forward to do a
|
|
|
|
roll-forward */
|
|
|
|
#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
|
|
|
|
|
|
|
|
/** This many frames must be left free in the buffer pool when we scan
|
|
|
|
the log and store the scanned log records in the buffer pool: we will
|
|
|
|
use these free frames to read in pages when we start applying the
|
|
|
|
log records to the database. */
|
|
|
|
extern ulint recv_n_pool_free_frames;
|
|
|
|
|
|
|
|
#endif
|