MDEV-16045: Replace log_group_t with log_t::files

There is only one log_sys and only one log_sys.log.

log_t::files::create(): Replaces log_init().

log_t::files::close(): Replaces log_group_close(), log_group_close_all().

fil_close_log_files(): if (free) log_sys.log_close();
The callers that passed free=true used to call log_group_close_all().

log_header_read(): Replaces log_group_header_read().

log_t::files::file_header_bufs_ptr: Use a single allocation.

log_t::files::file_header_bufs[]: Statically allocate the pointers.

log_t::files::set_fields(): Replaces log_group_set_fields().

log_t::files::calc_lsn_offset(): Replaces log_group_calc_lsn_offset().
Simplify the computation by using fewer variables.

log_t::files::read_log_seg(): Replaces log_group_read_log_seg().

log_sys_t::complete_checkpoint(): Replaces log_io_complete_checkpoint().

fil_aio_wait(): Move the logic from log_io_complete().
This commit is contained in:
Marko Mäkelä 2018-04-27 13:17:18 +03:00
parent d73a898d64
commit baa5a43d8c
8 changed files with 233 additions and 480 deletions

View file

@ -2544,8 +2544,7 @@ xtrabackup_copy_logfile(copy_logfile copy)
lsn_t lsn= start_lsn;
for(int retries= 0; retries < 100; retries++) {
if (log_group_read_log_seg(log_sys.buf, &log_sys.log,
&lsn, end_lsn)){
if (log_sys.log.read_log_seg(&lsn, end_lsn)) {
break;
}
msg("Retrying read of a redo log block");
@ -3819,7 +3818,7 @@ fail:
SRV_MAX_N_PENDING_SYNC_IOS);
log_sys.create();
log_init(srv_n_log_files);
log_sys.log.create(srv_n_log_files);
fil_space_t* space = fil_space_create(
"innodb_redo_log", SRV_LOG_SPACE_FIRST_ID, 0,
FIL_TYPE_LOG, NULL);
@ -3924,7 +3923,7 @@ reread_log_header:
ut_ad(!((log_sys.log.format ^ LOG_HEADER_FORMAT_CURRENT)
& ~LOG_HEADER_FORMAT_ENCRYPTED));
log_group_header_read(&log_sys.log, max_cp_field);
log_header_read(max_cp_field);
if (checkpoint_no_start != mach_read_from_8(buf + LOG_CHECKPOINT_NO)) {
goto reread_log_header;

View file

@ -2007,6 +2007,10 @@ fil_close_log_files(
}
mutex_exit(&fil_system.mutex);
if (free) {
log_sys.log.close();
}
}
/*******************************************************************//**
@ -4685,7 +4689,26 @@ fil_aio_wait(
switch (purpose) {
case FIL_TYPE_LOG:
srv_set_io_thread_op_info(segment, "complete io for log");
log_io_complete(static_cast<log_group_t*>(message));
/* We use synchronous writing of the logs
and can only end up here when writing a log checkpoint! */
ut_a(ptrdiff_t(message) == 1);
/* It was a checkpoint write */
switch (srv_file_flush_method) {
case SRV_O_DSYNC:
case SRV_NOSYNC:
break;
case SRV_FSYNC:
case SRV_LITTLESYNC:
case SRV_O_DIRECT:
case SRV_O_DIRECT_NO_FSYNC:
#ifdef _WIN32
case SRV_ALL_O_DIRECT_FSYNC:
#endif
fil_flush(SRV_LOG_SPACE_FIRST_ID);
}
DBUG_PRINT("ib_log", ("checkpoint info written"));
log_sys.complete_checkpoint();
return;
case FIL_TYPE_TABLESPACE:
case FIL_TYPE_TEMPORARY:

View file

@ -41,8 +41,8 @@ Created 12/9/1995 Heikki Tuuri
#include "os0event.h"
#include "os0file.h"
/** Redo log group */
struct log_group_t;
/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
#define SRV_N_LOG_FILES_MAX 100
/** Magic value to use instead of log checksums when they are disabled */
#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
@ -151,10 +151,6 @@ lsn_t
log_get_max_modified_age_async(void);
/*================================*/
/** Initialize the redo log.
@param[in] n_files number of files */
void
log_init(ulint n_files);
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@ -166,12 +162,6 @@ log_set_capacity(ulonglong file_size)
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
Completes an i/o to a log file. */
void
log_io_complete(
/*============*/
log_group_t* group); /*!< in: log group */
/******************************************************//**
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
@ -231,12 +221,8 @@ void
logs_empty_and_mark_files_at_shutdown(void);
/*=======================================*/
/** Read a log group header page to log_sys.checkpoint_buf.
@param[in] group log group
@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
void
log_group_header_read(
const log_group_t* group,
ulint header);
@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
void log_header_read(ulint header);
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] sync whether to wait for the write to complete
@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
@ -257,16 +243,6 @@ objects! */
void
log_check_margins(void);
/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
void
log_group_set_fields(
/*=================*/
log_group_t* group, /*!< in/out: group */
lsn_t lsn); /*!< in: lsn for which the values should be
set */
/************************************************************//**
Gets a log block flush bit.
@return TRUE if this block was the first to be written in a log flush */
@ -398,11 +374,6 @@ Refreshes the statistics used to print per-second averages. */
void
log_refresh_stats(void);
/*===================*/
/********************************************************//**
Closes all log groups. */
void
log_group_close_all(void);
/*=====================*/
/** Whether to generate and require checksums on the redo log pages */
extern my_bool innodb_log_checksums;
@ -536,50 +507,6 @@ enum log_group_state_t {
typedef ib_mutex_t LogSysMutex;
typedef ib_mutex_t FlushOrderMutex;
/** Log group consists of a number of log files, each of the same size; a log
group is implemented as a space in the sense of the module fil0fil.
Currently, this is only protected by log_sys.mutex. However, in the case
of log_write_up_to(), we will access some members only with the protection
of log_sys.write_mutex, which should affect nothing for now. */
struct log_group_t{
/** number of files in the group */
ulint n_files;
/** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
ulint format;
/** individual log file size in bytes, including the header */
lsn_t file_size;
/** corruption status */
log_group_state_t state;
/** lsn used to fix coordinates within the log group */
lsn_t lsn;
/** the byte offset of the above lsn */
lsn_t lsn_offset;
/** unaligned buffers */
byte** file_header_bufs_ptr;
/** buffers for each file header in the group */
byte** file_header_bufs;
/** used only in recovery: recovery scan succeeded up to this
lsn in this log group */
lsn_t scanned_lsn;
/** unaligned checkpoint header */
byte* checkpoint_buf_ptr;
/** buffer for writing a checkpoint header */
byte* checkpoint_buf;
/** @return whether the redo log is encrypted */
bool is_encrypted() const
{
return((format & LOG_HEADER_FORMAT_ENCRYPTED) != 0);
}
/** @return capacity in bytes */
inline lsn_t capacity() const
{
return((file_size - LOG_FILE_HDR_SIZE) * n_files);
}
};
/** Redo log buffer */
struct log_t{
MY_ALIGNED(CACHE_LINE_SIZE)
@ -590,8 +517,7 @@ struct log_t{
MY_ALIGNED(CACHE_LINE_SIZE)
LogSysMutex mutex; /*!< mutex protecting the log */
MY_ALIGNED(CACHE_LINE_SIZE)
LogSysMutex write_mutex; /*!< mutex protecting writing to log
file and accessing to log_group_t */
LogSysMutex write_mutex; /*!< mutex protecting writing to log */
MY_ALIGNED(CACHE_LINE_SIZE)
FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
@ -626,8 +552,67 @@ struct log_t{
max_checkpoint_age; this flag is
peeked at by log_free_check(), which
does not reserve the log mutex */
/** the redo log */
log_group_t log;
/** Log files. Protected by mutex or write_mutex. */
struct files {
/** number of files */
ulint n_files;
/** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
ulint format;
/** individual log file size in bytes, including the header */
lsn_t file_size;
/** corruption status */
log_group_state_t state;
/** lsn used to fix coordinates within the log group */
lsn_t lsn;
/** the byte offset of the above lsn */
lsn_t lsn_offset;
/** unaligned buffers */
byte* file_header_bufs_ptr;
/** buffers for each file header in the group */
byte* file_header_bufs[SRV_N_LOG_FILES_MAX];
/** used only in recovery: recovery scan succeeded up to this
lsn in this log group */
lsn_t scanned_lsn;
/** @return whether the redo log is encrypted */
bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; }
/** @return capacity in bytes */
lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
/** Calculate the offset of a log sequence number.
@param[in] lsn log sequence number
@return offset within the log */
inline lsn_t calc_lsn_offset(lsn_t lsn) const;
/** Set the field values to correspond to a given lsn. */
void set_fields(lsn_t lsn)
{
lsn_offset = calc_lsn_offset(lsn);
this->lsn = lsn;
}
/** Read a log segment to log_sys.buf.
@param[in,out] start_lsn in: read area start,
out: the last read valid lsn
@param[in] end_lsn read area end
@return whether no invalid blocks (e.g checksum mismatch) were found */
bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
/** Initialize the redo log buffer.
@param[in] n_files number of files */
void create(ulint n_files);
/** Close the redo log buffer. */
void close()
{
ut_free(file_header_bufs_ptr);
n_files = 0;
file_header_bufs_ptr = NULL;
memset(file_header_bufs, 0, sizeof file_header_bufs);
}
} log;
/** The fields involved in the log buffer flush @{ */
@ -730,6 +715,9 @@ public:
bool is_initialised() { return m_initialised; }
/** Complete an asynchronous checkpoint write. */
void complete_checkpoint();
/** Initialise the redo log subsystem. */
void create();
@ -740,6 +728,27 @@ public:
/** Redo log system */
extern log_t log_sys;
/** Calculate the offset of a log sequence number.
@param[in] lsn log sequence number
@return offset within the log */
inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
{
ut_ad(this == &log_sys.log);
/* The lsn parameters are updated while holding both the mutexes
and it is ok to have either of them while reading */
ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
const lsn_t group_size= capacity();
lsn_t l= lsn - this->lsn;
if (longlong(l) < 0) {
l= lsn_t(-longlong(l)) % group_size;
l= group_size - l;
}
l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
l%= group_size;
return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
}
/** Test if flush order mutex is owned. */
#define log_flush_order_mutex_own() \
mutex_own(&log_sys.log_flush_order_mutex)
@ -783,15 +792,6 @@ extern log_t log_sys;
mutex_exit(&log_sys.write_mutex); \
} while (0)
/** Calculate the offset of an lsn within a log group.
@param[in] lsn log sequence number
@param[in] group log group
@return offset within the log group */
lsn_t
log_group_calc_lsn_offset(
lsn_t lsn,
const log_group_t* group);
/* log scrubbing speed, in bytes/sec */
extern ulonglong innodb_scrub_log_speed;

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -96,20 +96,6 @@ void
recv_sys_debug_free(void);
/*=====================*/
/** Read a log segment to a buffer.
@param[out] buf buffer
@param[in] group redo log files
@param[in, out] start_lsn in : read area start, out: the last read valid lsn
@param[in] end_lsn read area end
@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
bool
log_group_read_log_seg(
byte* buf,
const log_group_t* group,
lsn_t* start_lsn,
lsn_t end_lsn);
/********************************************************//**
Reset the state of the recovery system variables. */
void

View file

@ -339,8 +339,6 @@ extern const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
extern char* srv_log_group_home_dir;
/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
#define SRV_N_LOG_FILES_MAX 100
extern ulong srv_n_log_files;
/** The InnoDB redo log file size, or 0 when changing the redo log format
at startup (while disallowing writes to the redo log). */

View file

@ -134,13 +134,6 @@ extern "C" UNIV_INTERN
os_thread_ret_t
DECLARE_THREAD(log_scrub_thread)(void*);
/******************************************************//**
Completes a checkpoint write i/o to a log file. */
static
void
log_io_complete_checkpoint(void);
/*============================*/
/****************************************************************//**
Returns the oldest modified block lsn in the pool, or log_sys.lsn if none
exists.
@ -541,108 +534,6 @@ function_exit:
return(lsn);
}
/******************************************************//**
Calculates the offset within a log group, when the log file headers are not
included.
@return size offset (<= offset) */
UNIV_INLINE
lsn_t
log_group_calc_size_offset(
/*=======================*/
lsn_t offset, /*!< in: real offset within the
log group */
const log_group_t* group) /*!< in: log group */
{
/* The lsn parameters are updated while holding both the mutexes
and it is ok to have either of them while reading */
ut_ad(log_mutex_own() || log_write_mutex_own());
return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
}
/******************************************************//**
Calculates the offset within a log group, when the log file headers are
included.
@return real offset (>= offset) */
UNIV_INLINE
lsn_t
log_group_calc_real_offset(
/*=======================*/
lsn_t offset, /*!< in: size offset within the
log group */
const log_group_t* group) /*!< in: log group */
{
/* The lsn parameters are updated while holding both the mutexes
and it is ok to have either of them while reading */
ut_ad(log_mutex_own() || log_write_mutex_own());
return(offset + LOG_FILE_HDR_SIZE
* (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
}
/** Calculate the offset of an lsn within a log group.
@param[in] lsn log sequence number
@param[in] group log group
@return offset within the log group */
lsn_t
log_group_calc_lsn_offset(
lsn_t lsn,
const log_group_t* group)
{
lsn_t gr_lsn;
lsn_t gr_lsn_size_offset;
lsn_t difference;
lsn_t group_size;
lsn_t offset;
/* The lsn parameters are updated while holding both the mutexes
and it is ok to have either of them while reading */
ut_ad(log_mutex_own() || log_write_mutex_own());
gr_lsn = group->lsn;
gr_lsn_size_offset = log_group_calc_size_offset(
group->lsn_offset, group);
group_size = group->capacity();
if (lsn >= gr_lsn) {
difference = lsn - gr_lsn;
} else {
difference = gr_lsn - lsn;
difference = difference % group_size;
difference = group_size - difference;
}
offset = (gr_lsn_size_offset + difference) % group_size;
/* fprintf(stderr,
"Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
" difference is " LSN_PF "\n",
offset, gr_lsn_size_offset, difference);
*/
return(log_group_calc_real_offset(offset, group));
}
/********************************************************//**
Sets the field values in group to correspond to a given lsn. For this function
to work, the values must already be correctly initialized to correspond to
some lsn, for instance, a checkpoint lsn. */
void
log_group_set_fields(
/*=================*/
log_group_t* group, /*!< in/out: group */
lsn_t lsn) /*!< in: lsn for which the values should be
set */
{
group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
group->lsn = lsn;
}
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@ -767,87 +658,38 @@ void log_t::create()
/** Initialize the redo log.
@param[in] n_files number of files */
void
log_init(ulint n_files)
void log_t::files::create(ulint n_files)
{
ulint i;
log_group_t* group = &log_sys.log;
ut_ad(n_files <= SRV_N_LOG_FILES_MAX);
ut_ad(this == &log_sys.log);
ut_ad(log_sys.is_initialised());
group->n_files = n_files;
group->format = srv_encrypt_log
? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED
: LOG_HEADER_FORMAT_CURRENT;
group->file_size = srv_log_file_size;
group->state = LOG_GROUP_OK;
group->lsn = LOG_START_LSN;
group->lsn_offset = LOG_FILE_HDR_SIZE;
this->n_files= n_files;
format= srv_encrypt_log
? LOG_HEADER_FORMAT_CURRENT | LOG_HEADER_FORMAT_ENCRYPTED
: LOG_HEADER_FORMAT_CURRENT;
file_size= srv_log_file_size;
state= LOG_GROUP_OK;
lsn= LOG_START_LSN;
lsn_offset= LOG_FILE_HDR_SIZE;
group->file_header_bufs_ptr = static_cast<byte**>(
ut_zalloc_nokey(sizeof(byte*) * n_files));
byte* ptr= static_cast<byte*>(ut_zalloc_nokey(LOG_FILE_HDR_SIZE
+ OS_FILE_LOG_BLOCK_SIZE
* n_files));
file_header_bufs_ptr= ptr;
ptr= static_cast<byte*>(ut_align(ptr, OS_FILE_LOG_BLOCK_SIZE));
group->file_header_bufs = static_cast<byte**>(
ut_zalloc_nokey(sizeof(byte**) * n_files));
memset(file_header_bufs, 0, sizeof file_header_bufs);
for (i = 0; i < n_files; i++) {
group->file_header_bufs_ptr[i] = static_cast<byte*>(
ut_zalloc_nokey(LOG_FILE_HDR_SIZE
+ OS_FILE_LOG_BLOCK_SIZE));
group->file_header_bufs[i] = static_cast<byte*>(
ut_align(group->file_header_bufs_ptr[i],
OS_FILE_LOG_BLOCK_SIZE));
}
group->checkpoint_buf_ptr = static_cast<byte*>(
ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
group->checkpoint_buf = static_cast<byte*>(
ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
}
/******************************************************//**
Completes an i/o to a log file. */
void
log_io_complete(
/*============*/
log_group_t* group) /*!< in: log group or a dummy pointer */
{
if ((ulint) group & 0x1UL) {
/* It was a checkpoint write */
group = (log_group_t*)((ulint) group - 1);
switch (srv_file_flush_method) {
case SRV_O_DSYNC:
case SRV_NOSYNC:
break;
case SRV_FSYNC:
case SRV_LITTLESYNC:
case SRV_O_DIRECT:
case SRV_O_DIRECT_NO_FSYNC:
#ifdef _WIN32
case SRV_ALL_O_DIRECT_FSYNC:
#endif
fil_flush(SRV_LOG_SPACE_FIRST_ID);
}
DBUG_PRINT("ib_log", ("checkpoint info written"));
log_io_complete_checkpoint();
return;
}
ut_error; /*!< We currently use synchronous writing of the
logs and cannot end up here! */
for (ulint i = 0; i < n_files; i++, ptr += LOG_FILE_HDR_SIZE)
file_header_bufs[i] = ptr;
}
/******************************************************//**
Writes a log file header to a log file space. */
static
void
log_group_file_header_flush(
/*========================*/
log_group_t* group, /*!< in: log group */
log_file_header_flush(
ulint nth_file, /*!< in: header to the nth file in the
log file space */
lsn_t start_lsn) /*!< in: log file data starts at this
@ -858,14 +700,14 @@ log_group_file_header_flush(
ut_ad(log_write_mutex_own());
ut_ad(!recv_no_log_write);
ut_a(nth_file < group->n_files);
ut_ad((group->format & ~LOG_HEADER_FORMAT_ENCRYPTED)
ut_a(nth_file < log_sys.log.n_files);
ut_ad((log_sys.log.format & ~LOG_HEADER_FORMAT_ENCRYPTED)
== LOG_HEADER_FORMAT_CURRENT);
buf = *(group->file_header_bufs + nth_file);
buf = log_sys.log.file_header_bufs[nth_file];
memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_4(buf + LOG_HEADER_FORMAT, group->format);
mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
LOG_HEADER_CREATOR_CURRENT);
@ -873,7 +715,7 @@ log_group_file_header_flush(
>= sizeof LOG_HEADER_CREATOR_CURRENT);
log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
dest_offset = nth_file * group->file_size;
dest_offset = nth_file * log_sys.log.file_size;
DBUG_PRINT("ib_log", ("write " LSN_PF
" file " ULINTPF " header",
@ -891,7 +733,7 @@ log_group_file_header_flush(
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
ulint(dest_offset & (srv_page_size - 1)),
OS_FILE_LOG_BLOCK_SIZE, buf, group);
OS_FILE_LOG_BLOCK_SIZE, buf, NULL);
srv_stats.os_log_pending_writes.dec();
}
@ -910,12 +752,10 @@ log_block_store_checksum(
}
/******************************************************//**
Writes a buffer to a log file group. */
Writes a buffer to a log file. */
static
void
log_group_write_buf(
/*================*/
log_group_t* group, /*!< in: log group */
log_write_buf(
byte* buf, /*!< in: buffer */
ulint len, /*!< in: buffer len; must be divisible
by OS_FILE_LOG_BLOCK_SIZE */
@ -946,28 +786,27 @@ loop:
return;
}
next_offset = log_group_calc_lsn_offset(start_lsn, group);
next_offset = log_sys.log.calc_lsn_offset(start_lsn);
if (write_header
&& next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
&& next_offset % log_sys.log.file_size == LOG_FILE_HDR_SIZE) {
/* We start to write a new log file instance in the group */
ut_a(next_offset / group->file_size <= ULINT_MAX);
ut_a(next_offset / log_sys.log.file_size <= ULINT_MAX);
log_group_file_header_flush(group, (ulint)
(next_offset / group->file_size),
start_lsn);
log_file_header_flush(
ulint(next_offset / log_sys.log.file_size), start_lsn);
srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
srv_stats.log_writes.inc();
}
if ((next_offset % group->file_size) + len > group->file_size) {
if ((next_offset % log_sys.log.file_size) + len
> log_sys.log.file_size) {
/* if the above condition holds, then the below expression
is < len which is ulint, so the typecast is ok */
write_len = (ulint)
(group->file_size - (next_offset % group->file_size));
write_len = ulint(log_sys.log.file_size
- (next_offset % log_sys.log.file_size));
} else {
write_len = len;
}
@ -1012,8 +851,7 @@ loop:
fil_io(IORequestLogWrite, true,
page_id_t(SRV_LOG_SPACE_FIRST_ID, page_no),
univ_page_size,
ulint(next_offset & (srv_page_size - 1)), write_len, buf,
group);
ulint(next_offset & (srv_page_size - 1)), write_len, buf, NULL);
srv_stats.os_log_pending_writes.dec();
@ -1227,7 +1065,7 @@ loop:
log_buffer_switch();
log_group_set_fields(&log_sys.log, log_sys.write_lsn);
log_sys.log.set_fields(log_sys.write_lsn);
log_mutex_exit();
/* Erase the end of the last log block. */
@ -1236,12 +1074,9 @@ loop:
/* Calculate pad_size if needed. */
pad_size = 0;
if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
lsn_t end_offset;
ulint end_offset_in_unit;
end_offset = log_group_calc_lsn_offset(
ut_uint64_align_up(write_lsn,
OS_FILE_LOG_BLOCK_SIZE),
&log_sys.log);
lsn_t end_offset = log_sys.log.calc_lsn_offset(
ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
if (end_offset_in_unit > 0
@ -1262,9 +1097,8 @@ loop:
}
/* Do the write to the log files */
log_group_write_buf(
&log_sys.log, write_buf + area_start,
area_end - area_start + pad_size,
log_write_buf(
write_buf + area_start, area_end - area_start + pad_size,
#ifdef UNIV_DEBUG
pad_size,
#endif /* UNIV_DEBUG */
@ -1444,20 +1278,17 @@ log_complete_checkpoint(void)
rw_lock_x_unlock_gen(&(log_sys.checkpoint_lock), LOG_CHECKPOINT);
}
/******************************************************//**
Completes an asynchronous checkpoint info write i/o to a log file. */
static
void
log_io_complete_checkpoint(void)
/*============================*/
/** Complete an asynchronous checkpoint write. */
void log_t::complete_checkpoint()
{
ut_ad(this == &log_sys);
MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
log_mutex_enter();
ut_ad(log_sys.n_pending_checkpoint_writes > 0);
ut_ad(n_pending_checkpoint_writes > 0);
if (--log_sys.n_pending_checkpoint_writes == 0) {
if (!--n_pending_checkpoint_writes) {
log_complete_checkpoint();
}
@ -1471,7 +1302,6 @@ void
log_group_checkpoint(lsn_t end_lsn)
{
lsn_t lsn_offset;
byte* buf;
ut_ad(!srv_read_only_mode);
ut_ad(log_mutex_own());
@ -1485,9 +1315,7 @@ log_group_checkpoint(lsn_t end_lsn)
log_sys.next_checkpoint_no,
log_sys.next_checkpoint_lsn));
log_group_t* group = &log_sys.log;
buf = group->checkpoint_buf;
byte* buf = log_sys.checkpoint_buf;
memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
@ -1497,8 +1325,7 @@ log_group_checkpoint(lsn_t end_lsn)
log_crypt_write_checkpoint_buf(buf);
}
lsn_offset = log_group_calc_lsn_offset(log_sys.next_checkpoint_lsn,
group);
lsn_offset = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
srv_log_buffer_size);
@ -1523,28 +1350,18 @@ log_group_checkpoint(lsn_t end_lsn)
/* Note: We alternate the physical place of the checkpoint info.
See the (next_checkpoint_no & 1) below. */
/* We send as the last parameter the group machine address
added with 1, as we want to distinguish between a normal log
file write and a checkpoint field write */
fil_io(IORequestLogWrite, false,
page_id_t(SRV_LOG_SPACE_FIRST_ID, 0),
univ_page_size,
(log_sys.next_checkpoint_no & 1)
? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
OS_FILE_LOG_BLOCK_SIZE,
buf, (byte*) group + 1);
ut_ad(((ulint) group & 0x1UL) == 0);
buf, reinterpret_cast<void*>(1) /* checkpoint write */);
}
/** Read a log group header page to log_sys.checkpoint_buf.
@param[in] group log group
@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
void
log_group_header_read(
const log_group_t* group,
ulint header)
@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
void log_header_read(ulint header)
{
ut_ad(log_mutex_own());
@ -2212,43 +2029,13 @@ log_refresh_stats(void)
log_sys.last_printout_time = time(NULL);
}
/** Close a log group.
@param[in,out] group log group to close */
static
void
log_group_close(log_group_t* group)
{
ulint i;
for (i = 0; i < group->n_files; i++) {
ut_free(group->file_header_bufs_ptr[i]);
}
ut_free(group->file_header_bufs_ptr);
ut_free(group->file_header_bufs);
ut_free(group->checkpoint_buf_ptr);
group->n_files = 0;
group->file_header_bufs_ptr = NULL;
group->file_header_bufs = NULL;
group->checkpoint_buf_ptr = NULL;
}
/********************************************************//**
Closes all log groups. */
void
log_group_close_all(void)
/*=====================*/
{
log_group_close(&log_sys.log);
}
/** Shut down the redo log subsystem. */
void log_t::close()
{
ut_ad(this == &log_sys);
if (!is_initialised()) return;
m_initialised = false;
log_group_close_all();
log.close();
if (!first_in_use)
buf -= srv_log_buffer_size;

View file

@ -631,42 +631,32 @@ recv_sys_debug_free(void)
mutex_exit(&(recv_sys->mutex));
}
/** Read a log segment to a buffer.
@param[out] buf buffer
@param[in] group redo log files
@param[in, out] start_lsn in : read area start, out: the last read valid lsn
/** Read a log segment to log_sys.buf.
@param[in,out] start_lsn in: read area start,
out: the last read valid lsn
@param[in] end_lsn read area end
@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
bool
log_group_read_log_seg(
byte* buf,
const log_group_t* group,
lsn_t *start_lsn,
lsn_t end_lsn)
@return whether no invalid blocks (e.g checksum mismatch) were found */
bool log_t::files::read_log_seg(lsn_t* start_lsn, lsn_t end_lsn)
{
ulint len;
lsn_t source_offset;
bool success = true;
ut_ad(log_mutex_own());
ut_ad(log_sys.mutex.is_owned());
ut_ad(!(*start_lsn % OS_FILE_LOG_BLOCK_SIZE));
ut_ad(!(end_lsn % OS_FILE_LOG_BLOCK_SIZE));
byte* buf = log_sys.buf;
loop:
source_offset = log_group_calc_lsn_offset(*start_lsn, group);
lsn_t source_offset = calc_lsn_offset(*start_lsn);
ut_a(end_lsn - *start_lsn <= ULINT_MAX);
len = (ulint) (end_lsn - *start_lsn);
ut_ad(len != 0);
const bool at_eof = (source_offset % group->file_size) + len
> group->file_size;
const bool at_eof = (source_offset % file_size) + len > file_size;
if (at_eof) {
/* If the above condition is true then len (which is ulint)
is > the expression below, so the typecast is ok */
len = (ulint) (group->file_size -
(source_offset % group->file_size));
len = ulint(file_size - (source_offset % file_size));
}
log_sys.n_log_ios++;
@ -698,7 +688,7 @@ loop:
break;
}
if (innodb_log_checksums || group->is_encrypted()) {
if (innodb_log_checksums || is_encrypted()) {
ulint crc = log_block_calc_checksum_crc32(buf);
ulint cksum = log_block_get_checksum(buf);
@ -721,7 +711,7 @@ loop:
break;
}
if (group->is_encrypted()) {
if (is_encrypted()) {
log_crypt(buf, *start_lsn,
OS_FILE_LOG_BLOCK_SIZE, true);
}
@ -759,14 +749,10 @@ recv_synchronize_groups()
the block is always incomplete */
lsn_t start_lsn = ut_uint64_align_down(recovered_lsn,
OS_FILE_LOG_BLOCK_SIZE);
log_group_read_log_seg(log_sys.buf, &log_sys.log,
&start_lsn, start_lsn + OS_FILE_LOG_BLOCK_SIZE);
/* Update the fields in the group struct to correspond to
recovered_lsn */
log_group_set_fields(&log_sys.log, recovered_lsn);
OS_FILE_LOG_BLOCK_SIZE);
log_sys.log.read_log_seg(&start_lsn,
start_lsn + OS_FILE_LOG_BLOCK_SIZE);
log_sys.log.set_fields(recovered_lsn);
/* Copy the checkpoint info to the log; remember that we have
incremented checkpoint_no by one, and the info will not be written
@ -792,19 +778,17 @@ recv_check_log_header_checksum(
}
/** Find the latest checkpoint in the format-0 log header.
@param[out] max_group log group, or NULL
@param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
@return error code or DB_SUCCESS */
static MY_ATTRIBUTE((warn_unused_result))
dberr_t
recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
recv_find_max_checkpoint_0(ulint* max_field)
{
log_group_t* group = &log_sys.log;
ib_uint64_t max_no = 0;
ib_uint64_t checkpoint_no;
byte* buf = log_sys.checkpoint_buf;
ut_ad(group->format == 0);
ut_ad(log_sys.log.format == 0);
/** Offset of the first checkpoint checksum */
static const uint CHECKSUM_1 = 288;
@ -815,11 +799,11 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
/** Least significant bits of the checkpoint offset */
static const uint OFFSET_LOW32 = 16;
*max_group = NULL;
bool found = false;
for (ulint field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
log_group_header_read(group, field);
log_header_read(field);
if (static_cast<uint32_t>(ut_fold_binary(buf, CHECKSUM_1))
!= mach_read_from_4(buf + CHECKSUM_1)
@ -846,21 +830,21 @@ recv_find_max_checkpoint_0(log_group_t** max_group, ulint* max_field)
mach_read_from_8(buf + LOG_CHECKPOINT_LSN)));
if (checkpoint_no >= max_no) {
*max_group = group;
found = true;
*max_field = field;
max_no = checkpoint_no;
group->state = LOG_GROUP_OK;
log_sys.log.state = LOG_GROUP_OK;
group->lsn = mach_read_from_8(
log_sys.log.lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_LSN);
group->lsn_offset = static_cast<ib_uint64_t>(
log_sys.log.lsn_offset = static_cast<ib_uint64_t>(
mach_read_from_4(buf + OFFSET_HIGH32)) << 32
| mach_read_from_4(buf + OFFSET_LOW32);
}
}
if (*max_group != NULL) {
if (found) {
return(DB_SUCCESS);
}
@ -882,9 +866,7 @@ dberr_t
recv_log_format_0_recover(lsn_t lsn)
{
log_mutex_enter();
log_group_t* group = &log_sys.log;
const lsn_t source_offset
= log_group_calc_lsn_offset(lsn, group);
const lsn_t source_offset = log_sys.log.calc_lsn_offset(lsn);
log_mutex_exit();
const ulint page_no = ulint(source_offset >> srv_page_size_shift);
byte* buf = log_sys.buf;
@ -933,26 +915,23 @@ recv_log_format_0_recover(lsn_t lsn)
dberr_t
recv_find_max_checkpoint(ulint* max_field)
{
log_group_t* group;
ib_uint64_t max_no;
ib_uint64_t checkpoint_no;
ulint field;
byte* buf;
group = &log_sys.log;
max_no = 0;
*max_field = 0;
buf = log_sys.checkpoint_buf;
group->state = LOG_GROUP_CORRUPTED;
log_sys.log.state = LOG_GROUP_CORRUPTED;
log_group_header_read(group, 0);
log_header_read(0);
/* Check the header page checksum. There was no
checksum in the first redo log format (version 0). */
group->format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
if (group->format != LOG_HEADER_FORMAT_3_23
log_sys.log.format = mach_read_from_4(buf + LOG_HEADER_FORMAT);
if (log_sys.log.format != LOG_HEADER_FORMAT_3_23
&& !recv_check_log_header_checksum(buf)) {
ib::error() << "Invalid redo log header checksum.";
return(DB_CORRUPTION);
@ -964,9 +943,9 @@ recv_find_max_checkpoint(ulint* max_field)
/* Ensure that the string is NUL-terminated. */
creator[LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR] = 0;
switch (group->format) {
switch (log_sys.log.format) {
case LOG_HEADER_FORMAT_3_23:
return(recv_find_max_checkpoint_0(&group, max_field));
return(recv_find_max_checkpoint_0(max_field));
case LOG_HEADER_FORMAT_10_2:
case LOG_HEADER_FORMAT_10_2 | LOG_HEADER_FORMAT_ENCRYPTED:
case LOG_HEADER_FORMAT_CURRENT:
@ -981,7 +960,7 @@ recv_find_max_checkpoint(ulint* max_field)
for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
log_group_header_read(group, field);
log_header_read(field);
const ulint crc32 = log_block_calc_checksum_crc32(buf);
const ulint cksum = log_block_get_checksum(buf);
@ -996,7 +975,7 @@ recv_find_max_checkpoint(ulint* max_field)
continue;
}
if (group->is_encrypted()
if (log_sys.is_encrypted()
&& !log_crypt_read_checkpoint_buf(buf)) {
ib::error() << "Reading checkpoint"
" encryption info failed.";
@ -1014,10 +993,10 @@ recv_find_max_checkpoint(ulint* max_field)
if (checkpoint_no >= max_no) {
*max_field = field;
max_no = checkpoint_no;
group->state = LOG_GROUP_OK;
group->lsn = mach_read_from_8(
log_sys.log.state = LOG_GROUP_OK;
log_sys.log.lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_LSN);
group->lsn_offset = mach_read_from_8(
log_sys.log.lsn_offset = mach_read_from_8(
buf + LOG_CHECKPOINT_OFFSET);
log_sys.next_checkpoint_no = checkpoint_no;
}
@ -2867,7 +2846,6 @@ recv_scan_log_recs(
/** Scans log from a buffer and stores new log data to the parsing buffer.
Parses and hashes the log records if new data found.
@param[in,out] group log group
@param[in] checkpoint_lsn latest checkpoint log sequence number
@param[in,out] contiguous_lsn log sequence number
until which all redo log has been scanned
@ -2877,7 +2855,6 @@ can be applied to the tablespaces
static
bool
recv_group_scan_log_recs(
log_group_t* group,
lsn_t checkpoint_lsn,
lsn_t* contiguous_lsn,
bool last_phase)
@ -2910,8 +2887,8 @@ recv_group_scan_log_recs(
* (buf_pool_get_n_pages()
- (recv_n_pool_free_frames * srv_buf_pool_instances));
group->scanned_lsn = end_lsn = *contiguous_lsn = ut_uint64_align_down(
*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
log_sys.log.scanned_lsn = end_lsn = *contiguous_lsn =
ut_uint64_align_down(*contiguous_lsn, OS_FILE_LOG_BLOCK_SIZE);
do {
if (last_phase && store_to_hash == STORE_NO) {
@ -2926,15 +2903,13 @@ recv_group_scan_log_recs(
start_lsn = ut_uint64_align_down(end_lsn,
OS_FILE_LOG_BLOCK_SIZE);
end_lsn = start_lsn;
log_group_read_log_seg(
log_sys.buf, group, &end_lsn,
start_lsn + RECV_SCAN_SIZE);
log_sys.log.read_log_seg(&end_lsn, start_lsn + RECV_SCAN_SIZE);
} while (end_lsn != start_lsn
&& !recv_scan_log_recs(
available_mem, &store_to_hash, log_sys.buf,
checkpoint_lsn,
start_lsn, end_lsn,
contiguous_lsn, &group->scanned_lsn));
contiguous_lsn, &log_sys.log.scanned_lsn));
if (recv_sys->found_corrupt_log || recv_sys->found_corrupt_fs) {
DBUG_RETURN(false);
@ -2942,7 +2917,7 @@ recv_group_scan_log_recs(
DBUG_PRINT("ib_log", ("%s " LSN_PF " completed",
last_phase ? "rescan" : "scan",
group->scanned_lsn));
log_sys.log.scanned_lsn));
DBUG_RETURN(store_to_hash == STORE_NO);
}
@ -3123,7 +3098,6 @@ of first system tablespace page
dberr_t
recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
{
log_group_t* group;
ulint max_cp_field;
lsn_t checkpoint_lsn;
bool rescan;
@ -3151,8 +3125,6 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
log_mutex_enter();
/* Look for the latest checkpoint from any of the log groups */
err = recv_find_max_checkpoint(&max_cp_field);
if (err != DB_SUCCESS) {
@ -3162,28 +3134,26 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
return(err);
}
log_group_header_read(&log_sys.log, max_cp_field);
log_header_read(max_cp_field);
buf = log_sys.checkpoint_buf;
checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
/* Start reading the log groups from the checkpoint lsn up. The
variable contiguous_lsn contains an lsn up to which the log is
known to be contiguously written to all log groups. */
/* Start reading the log from the checkpoint lsn. The variable
contiguous_lsn contains an lsn up to which the log is known to
be contiguously written. */
recv_sys->mlog_checkpoint_lsn = 0;
ut_ad(RECV_SCAN_SIZE <= srv_log_buffer_size);
group = &log_sys.log;
const lsn_t end_lsn = mach_read_from_8(
buf + LOG_CHECKPOINT_END_LSN);
ut_ad(recv_sys->n_addrs == 0);
contiguous_lsn = checkpoint_lsn;
switch (group->format) {
switch (log_sys.log.format) {
case 0:
log_mutex_exit();
return(recv_log_format_0_recover(checkpoint_lsn));
@ -3201,8 +3171,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
}
/* Look for MLOG_CHECKPOINT. */
recv_group_scan_log_recs(group, checkpoint_lsn, &contiguous_lsn,
false);
recv_group_scan_log_recs(checkpoint_lsn, &contiguous_lsn, false);
/* The first scan should not have stored or applied any records. */
ut_ad(recv_sys->n_addrs == 0);
ut_ad(!recv_sys->found_corrupt_fs);
@ -3219,7 +3188,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
}
if (recv_sys->mlog_checkpoint_lsn == 0) {
lsn_t scan_lsn = group->scanned_lsn;
lsn_t scan_lsn = log_sys.log.scanned_lsn;
if (!srv_read_only_mode && scan_lsn != checkpoint_lsn) {
log_mutex_exit();
ib::error err;
@ -3232,12 +3201,12 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
return(DB_ERROR);
}
group->scanned_lsn = checkpoint_lsn;
log_sys.log.scanned_lsn = checkpoint_lsn;
rescan = false;
} else {
contiguous_lsn = checkpoint_lsn;
rescan = recv_group_scan_log_recs(
group, checkpoint_lsn, &contiguous_lsn, false);
checkpoint_lsn, &contiguous_lsn, false);
if ((recv_sys->found_corrupt_log && !srv_force_recovery)
|| recv_sys->found_corrupt_fs) {
@ -3308,8 +3277,7 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
lsn_t recent_stored_lsn = recv_sys->last_stored_lsn;
rescan = recv_group_scan_log_recs(
group, checkpoint_lsn,
&recent_stored_lsn, false);
checkpoint_lsn, &recent_stored_lsn, false);
ut_ad(!recv_sys->found_corrupt_fs);
@ -3340,8 +3308,8 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
if (rescan) {
contiguous_lsn = checkpoint_lsn;
recv_group_scan_log_recs(group, checkpoint_lsn,
&contiguous_lsn, true);
recv_group_scan_log_recs(
checkpoint_lsn, &contiguous_lsn, true);
if ((recv_sys->found_corrupt_log
&& !srv_force_recovery)
@ -3354,12 +3322,11 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
ut_ad(!rescan || recv_sys->n_addrs == 0);
}
/* We currently have only one log group */
if (log_sys.log.scanned_lsn < checkpoint_lsn
|| log_sys.log.scanned_lsn < recv_max_page_lsn) {
if (group->scanned_lsn < checkpoint_lsn
|| group->scanned_lsn < recv_max_page_lsn) {
ib::error() << "We scanned the log up to " << group->scanned_lsn
ib::error() << "We scanned the log up to "
<< log_sys.log.scanned_lsn
<< ". A checkpoint was at " << checkpoint_lsn << " and"
" the maximum LSN on a database page was "
<< recv_max_page_lsn << ". It is possible that the"
@ -3375,9 +3342,6 @@ recv_recovery_from_checkpoint_start(lsn_t flush_lsn)
return(DB_ERROR);
}
/* Synchronize the uncorrupted log groups to the most up-to-date log
group; we also copy checkpoint info to groups */
log_sys.next_checkpoint_lsn = checkpoint_lsn;
log_sys.next_checkpoint_no = checkpoint_no + 1;

View file

@ -487,7 +487,7 @@ create_log_files(
}
}
log_init(srv_n_log_files);
log_sys.log.create(srv_n_log_files);
if (!log_set_capacity(srv_log_file_size_requested)) {
return(DB_ERROR);
}
@ -1926,7 +1926,7 @@ dberr_t srv_start(bool create_new_db)
}
}
log_init(srv_n_log_files_found);
log_sys.log.create(srv_n_log_files_found);
if (!log_set_capacity(srv_log_file_size_requested)) {
return(srv_init_abort(DB_ERROR));
@ -2176,7 +2176,6 @@ files_checked:
err = fil_write_flushed_lsn(log_get_lsn());
ut_ad(!buf_pool_check_no_pending_io());
fil_close_log_files(true);
log_group_close_all();
if (err == DB_SUCCESS) {
bool trunc = srv_operation
== SRV_OPERATION_RESTORE;
@ -2244,9 +2243,6 @@ files_checked:
return(srv_init_abort(DB_ERROR)););
DBUG_PRINT("ib_log", ("After innodb_log_abort_5"));
/* Free the old log file space. */
log_group_close_all();
ib::info() << "Starting to delete and rewrite log"
" files.";