mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-33894: Resurrect innodb_log_write_ahead_size
As part of commit 685d958e38
(MDEV-14425)
the parameter innodb_log_write_ahead_size was removed, because it was
thought that determining the physical block size would be a sufficient
replacement.
However, we can only determine the physical block size on Linux or
Microsoft Windows. On some file systems, the physical block size
is not relevant. For example, XFS uses a block size of 4096 bytes
even if the underlying block size may be smaller.
On Linux, we failed to determine the physical block size if
innodb_log_file_buffered=OFF was not requested or possible.
This will be fixed.
log_sys.write_size: The value of the reintroduced parameter
innodb_log_write_ahead_size. To keep it simple, this is read-only
and a power of two between 512 and 4096 bytes, so that the previous
alignment guarantees are fulfilled. This will replace the previous
log_sys.get_block_size().
log_sys.block_size, log_t::get_block_size(): Remove.
log_t::set_block_size(): Ensure that write_size will not be less
than the physical block size. There is no point to invoke this
function with 512 or less, because that is the minimum value of
write_size.
innodb_params_adjust(): Add some disabled code for adjusting
the minimum value and default value of innodb_log_write_ahead_size
to reflect the log_sys.write_size.
log_t::set_recovered(): Mark the recovery completed. This is the
place to adjust some things if we want to allow write_size>4096.
log_t::resize_write_buf(): Refer to write_size.
log_t::resize_start(): Refer to write_size instead of get_block_size().
log_write_buf(): Simplify some arithmetics and remove a goto.
log_t::write_buf(): Refer to write_size. If we are writing less than
that, do not switch buffers, but keep writing to the same buffer.
Move some code to improve the locality of reference.
recv_scan_log(): Refer to write_size instead of get_block_size().
os_file_create_func(): For type==OS_LOG_FILE on Linux, always invoke
os_file_log_maybe_unbuffered(), so that log_sys.set_block_size() will
be invoked even if we are not attempting to use O_DIRECT.
recv_sys_t::find_checkpoint(): Read the entire log header
in a single 12 KiB request into log_sys.buf.
Tested with:
./mtr --loose-innodb-log-write-ahead-size=4096
./mtr --loose-innodb-log-write-ahead-size=2048
This commit is contained in:
parent
27a3366663
commit
4ca355d863
13 changed files with 232 additions and 141 deletions
|
@ -1333,7 +1333,7 @@ enum options_xtrabackup
|
|||
OPT_INNODB_LOG_FILE_BUFFERING,
|
||||
#endif
|
||||
OPT_INNODB_LOG_FILE_SIZE,
|
||||
OPT_INNODB_LOG_FILES_IN_GROUP,
|
||||
OPT_INNODB_LOG_WRITE_AHEAD_SIZE,
|
||||
OPT_INNODB_OPEN_FILES,
|
||||
OPT_XTRA_DEBUG_SYNC,
|
||||
OPT_INNODB_CHECKSUM_ALGORITHM,
|
||||
|
@ -1905,6 +1905,10 @@ struct my_option xb_server_options[] =
|
|||
{"innodb_log_group_home_dir", OPT_INNODB_LOG_GROUP_HOME_DIR,
|
||||
"Path to InnoDB log files.", &srv_log_group_home_dir,
|
||||
&srv_log_group_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
||||
{"innodb_log_write_ahead_size", OPT_INNODB_LOG_WRITE_AHEAD_SIZE,
|
||||
"ib_logfile0 write size",
|
||||
(G_PTR*) &log_sys.write_size, (G_PTR*) &srv_log_file_size, 0,
|
||||
GET_UINT, REQUIRED_ARG, 512, 512, 4096, 0, 1, 0},
|
||||
{"innodb_max_dirty_pages_pct", OPT_INNODB_MAX_DIRTY_PAGES_PCT,
|
||||
"Percentage of dirty pages allowed in bufferpool.",
|
||||
(G_PTR*) &srv_max_buf_pool_modified_pct,
|
||||
|
@ -2233,7 +2237,6 @@ xb_get_one_option(const struct my_option *opt,
|
|||
ADD_PRINT_PARAM_OPT(srv_log_group_home_dir);
|
||||
break;
|
||||
|
||||
case OPT_INNODB_LOG_FILES_IN_GROUP:
|
||||
case OPT_INNODB_LOG_FILE_SIZE:
|
||||
break;
|
||||
|
||||
|
@ -2374,6 +2377,11 @@ xb_get_one_option(const struct my_option *opt,
|
|||
|
||||
static bool innodb_init_param()
|
||||
{
|
||||
if (!ut_is_2pow(log_sys.write_size)) {
|
||||
msg("InnoDB: innodb_log_write_ahead_size=%u"
|
||||
" is not a power of two", log_sys.write_size);
|
||||
return true;
|
||||
}
|
||||
srv_is_being_started = TRUE;
|
||||
/* === some variables from mysqld === */
|
||||
memset((G_PTR) &mysql_tmpdir_list, 0, sizeof(mysql_tmpdir_list));
|
||||
|
@ -3370,7 +3378,7 @@ static bool xtrabackup_copy_logfile()
|
|||
ut_a(dst_log_file);
|
||||
ut_ad(recv_sys.is_initialised());
|
||||
const size_t sequence_offset{log_sys.is_encrypted() ? 8U + 5U : 5U};
|
||||
const size_t block_size_1{log_sys.get_block_size() - 1};
|
||||
const size_t block_size_1{log_sys.write_size - 1};
|
||||
|
||||
ut_ad(!log_sys.is_pmem());
|
||||
|
||||
|
@ -3445,7 +3453,7 @@ static bool xtrabackup_copy_logfile()
|
|||
if (r == recv_sys_t::GOT_EOF)
|
||||
break;
|
||||
|
||||
if (recv_sys.offset < log_sys.get_block_size())
|
||||
if (recv_sys.offset < log_sys.write_size)
|
||||
break;
|
||||
|
||||
if (xtrabackup_throttle && io_ticket-- < 0)
|
||||
|
|
|
@ -287,7 +287,20 @@ WHERE engine='innodb'
|
|||
AND support IN ('YES', 'DEFAULT', 'ENABLED');
|
||||
1
|
||||
1
|
||||
# restart
|
||||
# restart: --innodb-log-write-ahead-size=513
|
||||
SELECT * FROM INFORMATION_SCHEMA.ENGINES
|
||||
WHERE engine = 'innodb'
|
||||
AND support IN ('YES', 'DEFAULT', 'ENABLED');
|
||||
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
|
||||
# restart: --innodb-log-write-ahead-size=4095
|
||||
SELECT * FROM INFORMATION_SCHEMA.ENGINES
|
||||
WHERE engine = 'innodb'
|
||||
AND support IN ('YES', 'DEFAULT', 'ENABLED');
|
||||
ENGINE SUPPORT COMMENT TRANSACTIONS XA SAVEPOINTS
|
||||
# restart: --innodb-log-write-ahead-size=10000
|
||||
SELECT @@innodb_log_write_ahead_size;
|
||||
@@innodb_log_write_ahead_size
|
||||
4096
|
||||
# Cleanup
|
||||
bak_ib_logfile0
|
||||
bak_ibdata1
|
||||
|
|
|
@ -210,8 +210,20 @@ eval $check_no_innodb;
|
|||
eval $check_yes_innodb;
|
||||
--source include/shutdown_mysqld.inc
|
||||
|
||||
--let $restart_parameters=
|
||||
--let $restart_parameters=--innodb-log-write-ahead-size=513
|
||||
--source include/start_mysqld.inc
|
||||
eval $check_no_innodb;
|
||||
--source include/shutdown_mysqld.inc
|
||||
|
||||
--let $restart_parameters=--innodb-log-write-ahead-size=4095
|
||||
--source include/start_mysqld.inc
|
||||
eval $check_no_innodb;
|
||||
--source include/shutdown_mysqld.inc
|
||||
|
||||
# this will be silently truncated to the maximum
|
||||
--let $restart_parameters=--innodb-log-write-ahead-size=10000
|
||||
--source include/start_mysqld.inc
|
||||
SELECT @@innodb_log_write_ahead_size;
|
||||
|
||||
--echo # Cleanup
|
||||
--list_files $bugdir
|
||||
|
|
|
@ -7,7 +7,9 @@ let $targetdir=$MYSQLTEST_VARDIR/tmp/backup;
|
|||
--let $backup_log=$MYSQLTEST_VARDIR/tmp/backup.log
|
||||
|
||||
--disable_result_log
|
||||
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --parallel=10 > $backup_log 2>&1;
|
||||
--error 1
|
||||
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --parallel=10 --innodb-log-write-ahead-size=4095 > $backup_log 2>&1;
|
||||
exec $XTRABACKUP --defaults-file=$MYSQLTEST_VARDIR/my.cnf --backup --target-dir=$targetdir --parallel=10 --innodb-log-write-ahead-size=10000 > $backup_log 2>&1;
|
||||
--enable_result_log
|
||||
|
||||
# The following warning must not appear after MDEV-27343 fix
|
||||
|
|
|
@ -1039,6 +1039,18 @@ NUMERIC_BLOCK_SIZE 0
|
|||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT OPTIONAL
|
||||
VARIABLE_NAME INNODB_LOG_WRITE_AHEAD_SIZE
|
||||
SESSION_VALUE NULL
|
||||
DEFAULT_VALUE 512
|
||||
VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE INT UNSIGNED
|
||||
VARIABLE_COMMENT Redo log write size to avoid read-on-write; must be a power of two
|
||||
NUMERIC_MIN_VALUE 512
|
||||
NUMERIC_MAX_VALUE 4096
|
||||
NUMERIC_BLOCK_SIZE 1
|
||||
ENUM_VALUE_LIST NULL
|
||||
READ_ONLY YES
|
||||
COMMAND_LINE_ARGUMENT REQUIRED
|
||||
VARIABLE_NAME INNODB_LRU_FLUSH_SIZE
|
||||
SESSION_VALUE NULL
|
||||
DEFAULT_VALUE 32
|
||||
|
|
|
@ -5355,7 +5355,6 @@ static int init_server_components()
|
|||
MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"),
|
||||
MARIADB_REMOVED_OPTION("innodb-log-files-in-group"),
|
||||
MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"),
|
||||
MARIADB_REMOVED_OPTION("innodb-log-write-ahead-size"),
|
||||
MARIADB_REMOVED_OPTION("innodb-page-cleaners"),
|
||||
MARIADB_REMOVED_OPTION("innodb-replication-delay"),
|
||||
MARIADB_REMOVED_OPTION("innodb-scrub-log"),
|
||||
|
|
|
@ -1796,15 +1796,18 @@ inline void log_t::write_checkpoint(lsn_t end_lsn) noexcept
|
|||
log_write_and_flush_prepare();
|
||||
resizing= resize_lsn.load(std::memory_order_relaxed);
|
||||
/* FIXME: issue an asynchronous write */
|
||||
log.write(offset, {c, get_block_size()});
|
||||
ut_ad(ut_is_2pow(write_size));
|
||||
ut_ad(write_size >= 512);
|
||||
ut_ad(write_size <= 4096);
|
||||
log.write(offset, {c, write_size});
|
||||
if (resizing > 1 && resizing <= next_checkpoint_lsn)
|
||||
{
|
||||
resize_log.write(CHECKPOINT_1, {c, write_size});
|
||||
byte *buf= static_cast<byte*>(aligned_malloc(4096, 4096));
|
||||
memset_aligned<4096>(buf, 0, 4096);
|
||||
header_write(buf, resizing, is_encrypted());
|
||||
resize_log.write(0, {buf, 4096});
|
||||
aligned_free(buf);
|
||||
resize_log.write(CHECKPOINT_1, {c, get_block_size()});
|
||||
}
|
||||
|
||||
if (srv_file_flush_method != SRV_O_DSYNC)
|
||||
|
|
|
@ -1214,11 +1214,8 @@ struct
|
|||
}
|
||||
log_requests;
|
||||
|
||||
/** @brief Adjust some InnoDB startup parameters based on file contents
|
||||
or innodb_page_size. */
|
||||
static
|
||||
void
|
||||
innodb_params_adjust();
|
||||
/** Adjust some InnoDB startup parameters based on the data directory */
|
||||
static void innodb_params_adjust();
|
||||
|
||||
/*******************************************************************//**
|
||||
This function is used to prepare an X/Open XA distributed transaction.
|
||||
|
@ -3688,6 +3685,11 @@ static MYSQL_SYSVAR_ULONGLONG(buffer_pool_size, innobase_buffer_pool_size,
|
|||
2ULL << 20,
|
||||
LLONG_MAX, 1024*1024L);
|
||||
|
||||
static MYSQL_SYSVAR_UINT(log_write_ahead_size, log_sys.write_size,
|
||||
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Redo log write size to avoid read-on-write; must be a power of two",
|
||||
nullptr, nullptr, 512, 512, 4096, 1);
|
||||
|
||||
/****************************************************************//**
|
||||
Gives the file extension of an InnoDB single-table tablespace. */
|
||||
static const char* ha_innobase_exts[] = {
|
||||
|
@ -3809,6 +3811,13 @@ static int innodb_init_params()
|
|||
DBUG_RETURN(HA_ERR_INITIALIZATION);
|
||||
}
|
||||
|
||||
if (!ut_is_2pow(log_sys.write_size)) {
|
||||
sql_print_error("InnoDB: innodb_log_write_ahead_size=%u"
|
||||
" is not a power of two",
|
||||
log_sys.write_size);
|
||||
DBUG_RETURN(HA_ERR_INITIALIZATION);
|
||||
}
|
||||
|
||||
if (compression_algorithm_is_not_loaded(innodb_compression_algorithm, ME_ERROR_LOG))
|
||||
DBUG_RETURN(HA_ERR_INITIALIZATION);
|
||||
|
||||
|
@ -19850,6 +19859,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
|
|||
MYSQL_SYSVAR(log_file_buffering),
|
||||
#endif
|
||||
MYSQL_SYSVAR(log_file_size),
|
||||
MYSQL_SYSVAR(log_write_ahead_size),
|
||||
MYSQL_SYSVAR(log_spin_wait_delay),
|
||||
MYSQL_SYSVAR(log_group_home_dir),
|
||||
MYSQL_SYSVAR(max_dirty_pages_pct),
|
||||
|
@ -20010,20 +20020,32 @@ i_s_innodb_sys_virtual,
|
|||
i_s_innodb_tablespaces_encryption
|
||||
maria_declare_plugin_end;
|
||||
|
||||
/** @brief Adjust some InnoDB startup parameters based on file contents
|
||||
or innodb_page_size. */
|
||||
static
|
||||
void
|
||||
innodb_params_adjust()
|
||||
/** Adjust some InnoDB startup parameters based on the data directory */
|
||||
static void innodb_params_adjust()
|
||||
{
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
|
||||
= 1ULL << (32U + srv_page_size_shift);
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).min_val
|
||||
= MYSQL_SYSVAR_NAME(max_undo_log_size).def_val
|
||||
= ulonglong(SRV_UNDO_TABLESPACE_SIZE_IN_PAGES)
|
||||
<< srv_page_size_shift;
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).max_val
|
||||
= 1ULL << (32U + srv_page_size_shift);
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).max_val=
|
||||
1ULL << (32U + srv_page_size_shift);
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).min_val=
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).def_val=
|
||||
ulonglong{SRV_UNDO_TABLESPACE_SIZE_IN_PAGES} << srv_page_size_shift;
|
||||
MYSQL_SYSVAR_NAME(max_undo_log_size).max_val=
|
||||
1ULL << (32U + srv_page_size_shift);
|
||||
#if 0 /* FIXME: INFORMATION_SCHEMA.SYSTEM_VARIABLES won't reflect this. */
|
||||
/* plugin_opt_set_limits() would have copied all MYSQL_SYSVAR
|
||||
before innodb_init() was invoked. Therefore, changing the
|
||||
min_val, def_val, max_val will have no observable effect. */
|
||||
# if defined __linux__ || defined _WIN32
|
||||
uint &min_val= MYSQL_SYSVAR_NAME(log_write_ahead_size).min_val;
|
||||
if (min_val < log_sys.write_size)
|
||||
{
|
||||
min_val= log_sys.write_size;
|
||||
MYSQL_SYSVAR_NAME(log_write_ahead_size).def_val= log_sys.write_size;
|
||||
}
|
||||
# endif
|
||||
ut_ad(MYSQL_SYSVAR_NAME(log_write_ahead_size).min_val <=
|
||||
log_sys.write_size);
|
||||
#endif
|
||||
ut_ad(MYSQL_SYSVAR_NAME(log_write_ahead_size).max_val == 4096);
|
||||
}
|
||||
|
||||
/****************************************************************************
|
||||
|
|
|
@ -274,11 +274,9 @@ private:
|
|||
std::atomic<lsn_t> resize_lsn;
|
||||
/** the log sequence number at the start of the log file */
|
||||
lsn_t first_lsn;
|
||||
#if defined __linux__ || defined _WIN32
|
||||
/** The physical block size of the storage */
|
||||
uint32_t block_size;
|
||||
#endif
|
||||
public:
|
||||
/** current innodb_log_write_ahead_size */
|
||||
uint write_size;
|
||||
/** format of the redo log: e.g., FORMAT_10_8 */
|
||||
uint32_t format;
|
||||
#if defined __linux__ || defined _WIN32
|
||||
|
@ -328,6 +326,8 @@ public:
|
|||
max_buf_free;
|
||||
}
|
||||
|
||||
inline void set_recovered() noexcept;
|
||||
|
||||
void set_buf_free(size_t f) noexcept
|
||||
{ ut_ad(f < buf_free_LOCK); buf_free.store(f, std::memory_order_relaxed); }
|
||||
|
||||
|
@ -368,9 +368,12 @@ public:
|
|||
inline void resize_write(lsn_t lsn, const byte *end,
|
||||
size_t len, size_t seq) noexcept;
|
||||
|
||||
private:
|
||||
/** Write resize_buf to resize_log.
|
||||
@param length the used length of resize_buf */
|
||||
ATTRIBUTE_COLD void resize_write_buf(size_t length) noexcept;
|
||||
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
|
||||
void resize_write_buf(size_t length) noexcept;
|
||||
public:
|
||||
|
||||
/** Rename a log file after resizing.
|
||||
@return whether an error occurred */
|
||||
|
@ -467,14 +470,12 @@ public:
|
|||
void close();
|
||||
|
||||
#if defined __linux__ || defined _WIN32
|
||||
/** @return the physical block size of the storage */
|
||||
size_t get_block_size() const noexcept
|
||||
{ ut_ad(block_size); return block_size; }
|
||||
/** Set the log block size for file I/O. */
|
||||
void set_block_size(uint32_t size) noexcept { block_size= size; }
|
||||
#else
|
||||
/** @return the physical block size of the storage */
|
||||
static size_t get_block_size() { return 512; }
|
||||
void set_block_size(uint32 size) noexcept
|
||||
{
|
||||
if (write_size < size)
|
||||
write_size= size;
|
||||
}
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
|
@ -236,9 +236,6 @@ void log_t::attach_low(log_file_t file, os_offset_t size)
|
|||
mprotect(ptr, size_t(size), PROT_READ);
|
||||
buf= static_cast<byte*>(ptr);
|
||||
max_buf_free= 1;
|
||||
# if defined __linux__ || defined _WIN32
|
||||
set_block_size(CPU_LEVEL1_DCACHE_LINESIZE);
|
||||
# endif
|
||||
log_maybe_unbuffered= true;
|
||||
log_buffered= false;
|
||||
mtr_t::finisher_update();
|
||||
|
@ -273,13 +270,16 @@ void log_t::attach_low(log_file_t file, os_offset_t size)
|
|||
log_buffered
|
||||
? "Buffered log writes"
|
||||
: "File system buffers for log disabled",
|
||||
block_size);
|
||||
write_size);
|
||||
#endif
|
||||
|
||||
mtr_t::finisher_update();
|
||||
#ifdef HAVE_PMEM
|
||||
checkpoint_buf= static_cast<byte*>(aligned_malloc(block_size, block_size));
|
||||
memset_aligned<64>(checkpoint_buf, 0, block_size);
|
||||
ut_ad(ut_is_2pow(write_size));
|
||||
ut_ad(write_size >= 512);
|
||||
ut_ad(write_size <= 4096);
|
||||
checkpoint_buf= static_cast<byte*>(aligned_malloc(write_size, write_size));
|
||||
memset_aligned<512>(checkpoint_buf, 0, write_size);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
@ -430,7 +430,7 @@ void log_t::set_buffered(bool buffered)
|
|||
log_buffered
|
||||
? "Buffered log writes"
|
||||
: "File system buffers for log disabled",
|
||||
block_size);
|
||||
write_size);
|
||||
}
|
||||
log_resize_release();
|
||||
}
|
||||
|
@ -467,6 +467,8 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
|
|||
OS_FILE_NORMAL, OS_LOG_FILE, false, &success);
|
||||
if (success)
|
||||
{
|
||||
ut_ad(!(size_t(file_size) & (write_size - 1)));
|
||||
ut_ad(!(size_t(size) & (write_size - 1)));
|
||||
log_resize_release();
|
||||
|
||||
void *ptr= nullptr, *ptr2= nullptr;
|
||||
|
@ -522,7 +524,7 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
|
|||
{
|
||||
memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15);
|
||||
start_lsn= first_lsn +
|
||||
(~lsn_t{get_block_size() - 1} & (write_lsn - first_lsn));
|
||||
(~lsn_t{write_size - 1} & (write_lsn - first_lsn));
|
||||
}
|
||||
}
|
||||
resize_lsn.store(start_lsn, std::memory_order_relaxed);
|
||||
|
@ -578,32 +580,30 @@ void log_t::resize_abort() noexcept
|
|||
|
||||
/** Write an aligned buffer to ib_logfile0.
|
||||
@param buf buffer to be written
|
||||
@param len length of data to be written
|
||||
@param length length of data to be written
|
||||
@param offset log file offset */
|
||||
static void log_write_buf(const byte *buf, size_t len, lsn_t offset)
|
||||
static void log_write_buf(const byte *buf, size_t length, lsn_t offset)
|
||||
{
|
||||
ut_ad(write_lock.is_owner());
|
||||
ut_ad(!recv_no_log_write);
|
||||
ut_d(const size_t block_size_1= log_sys.get_block_size() - 1);
|
||||
ut_d(const size_t block_size_1= log_sys.write_size - 1);
|
||||
ut_ad(!(offset & block_size_1));
|
||||
ut_ad(!(len & block_size_1));
|
||||
ut_ad(!(length & block_size_1));
|
||||
ut_ad(!(size_t(buf) & block_size_1));
|
||||
ut_ad(len);
|
||||
ut_ad(length);
|
||||
|
||||
if (UNIV_LIKELY(offset + len <= log_sys.file_size))
|
||||
const lsn_t maximum_write_length{log_sys.file_size - offset};
|
||||
ut_ad(maximum_write_length <= log_sys.file_size - log_sys.START_OFFSET);
|
||||
|
||||
if (UNIV_UNLIKELY(length > maximum_write_length))
|
||||
{
|
||||
write:
|
||||
log_sys.log.write(offset, {buf, len});
|
||||
return;
|
||||
log_sys.log.write(offset, {buf, size_t(maximum_write_length)});
|
||||
length-= size_t(maximum_write_length);
|
||||
buf+= size_t(maximum_write_length);
|
||||
ut_ad(log_sys.START_OFFSET + length < offset);
|
||||
offset= log_sys.START_OFFSET;
|
||||
}
|
||||
|
||||
const size_t write_len= size_t(log_sys.file_size - offset);
|
||||
log_sys.log.write(offset, {buf, write_len});
|
||||
len-= write_len;
|
||||
buf+= write_len;
|
||||
ut_ad(log_sys.START_OFFSET + len < offset);
|
||||
offset= log_sys.START_OFFSET;
|
||||
goto write;
|
||||
log_sys.log.write(offset, {buf, length});
|
||||
}
|
||||
|
||||
/** Invoke commit_checkpoint_notify_ha() to notify that outstanding
|
||||
|
@ -778,11 +778,12 @@ inline void log_t::persist(lsn_t lsn) noexcept
|
|||
}
|
||||
#endif
|
||||
|
||||
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
|
||||
/** Write resize_buf to resize_log.
|
||||
@param length the used length of resize_buf */
|
||||
ATTRIBUTE_COLD void log_t::resize_write_buf(size_t length) noexcept
|
||||
void log_t::resize_write_buf(size_t length) noexcept
|
||||
{
|
||||
const size_t block_size_1= get_block_size() - 1;
|
||||
const size_t block_size_1= write_size - 1;
|
||||
ut_ad(!(resize_target & block_size_1));
|
||||
ut_ad(!(length & block_size_1));
|
||||
ut_ad(length > block_size_1);
|
||||
|
@ -802,7 +803,7 @@ ATTRIBUTE_COLD void log_t::resize_write_buf(size_t length) noexcept
|
|||
}
|
||||
|
||||
ut_a(os_file_write_func(IORequestWrite, "ib_logfile101", resize_log.m_file,
|
||||
resize_flush_buf, offset, length) == DB_SUCCESS);
|
||||
buf, offset, length) == DB_SUCCESS);
|
||||
}
|
||||
|
||||
/** Write buf to ib_logfile0.
|
||||
|
@ -824,64 +825,88 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
|
|||
}
|
||||
else
|
||||
{
|
||||
ut_ad(write_lock.is_owner());
|
||||
ut_ad(!recv_no_log_write);
|
||||
write_lock.set_pending(lsn);
|
||||
ut_ad(write_lsn >= get_flushed_lsn());
|
||||
const size_t block_size_1{get_block_size() - 1};
|
||||
lsn_t offset{calc_lsn_offset(write_lsn) & ~lsn_t{block_size_1}};
|
||||
const size_t write_size_1{write_size - 1};
|
||||
ut_ad(ut_is_2pow(write_size));
|
||||
size_t length{buf_free.load(std::memory_order_relaxed)};
|
||||
lsn_t offset{calc_lsn_offset(write_lsn)};
|
||||
ut_ad(length >= (offset & write_size_1));
|
||||
ut_ad(write_size_1 >= 511);
|
||||
|
||||
DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF,
|
||||
write_lsn, lsn, offset));
|
||||
const byte *write_buf{buf};
|
||||
size_t length{buf_free};
|
||||
ut_ad(length >= (calc_lsn_offset(write_lsn) & block_size_1));
|
||||
const size_t new_buf_free{length & block_size_1};
|
||||
buf_free= new_buf_free;
|
||||
ut_ad(new_buf_free == ((lsn - first_lsn) & block_size_1));
|
||||
const byte *const write_buf{buf};
|
||||
offset&= ~lsn_t{write_size_1};
|
||||
|
||||
if (new_buf_free)
|
||||
if (length <= write_size_1)
|
||||
{
|
||||
ut_ad(!((length ^ (size_t(lsn) - size_t(first_lsn))) & write_size_1));
|
||||
/* Keep filling the same buffer until we have more than one block. */
|
||||
#if 0 /* TODO: Pad the last log block with dummy records. */
|
||||
buf_free= log_pad(lsn, get_block_size() - new_buf_free,
|
||||
buf + new_buf_free, flush_buf);
|
||||
buf_free= log_pad(lsn, (write_size_1 + 1) - length,
|
||||
buf + length, flush_buf);
|
||||
... /* TODO: Update the LSN and adjust other code. */
|
||||
#else
|
||||
/* The rest of the block will be written as garbage.
|
||||
(We want to avoid memset() while holding exclusive log_sys.latch)
|
||||
This block will be overwritten later, once records beyond
|
||||
the current LSN are generated. */
|
||||
# ifdef HAVE_valgrind
|
||||
MEM_MAKE_DEFINED(buf + length, get_block_size() - new_buf_free);
|
||||
if (UNIV_LIKELY_NULL(resize_flush_buf))
|
||||
MEM_MAKE_DEFINED(resize_buf + length, get_block_size() - new_buf_free);
|
||||
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length);
|
||||
if (UNIV_LIKELY_NULL(resize_buf))
|
||||
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - length);
|
||||
# endif
|
||||
buf[length]= 0; /* allow recovery to catch EOF faster */
|
||||
length&= ~block_size_1;
|
||||
memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
|
||||
if (UNIV_LIKELY_NULL(resize_flush_buf))
|
||||
memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
|
||||
(new_buf_free + 15) & ~15);
|
||||
length+= get_block_size();
|
||||
#endif
|
||||
length= write_size_1 + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
const size_t new_buf_free{length & write_size_1};
|
||||
ut_ad(new_buf_free == ((lsn - first_lsn) & write_size_1));
|
||||
buf_free.store(new_buf_free, std::memory_order_relaxed);
|
||||
|
||||
if (new_buf_free)
|
||||
{
|
||||
/* The rest of the block will be written as garbage.
|
||||
(We want to avoid memset() while holding exclusive log_sys.latch)
|
||||
This block will be overwritten later, once records beyond
|
||||
the current LSN are generated. */
|
||||
#ifdef HAVE_valgrind
|
||||
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
|
||||
if (UNIV_LIKELY_NULL(resize_buf))
|
||||
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) -
|
||||
new_buf_free);
|
||||
#endif
|
||||
buf[length]= 0; /* allow recovery to catch EOF faster */
|
||||
length&= ~write_size_1;
|
||||
memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
|
||||
if (UNIV_LIKELY_NULL(resize_buf))
|
||||
memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
|
||||
(new_buf_free + 15) & ~15);
|
||||
length+= write_size_1 + 1;
|
||||
}
|
||||
|
||||
std::swap(buf, flush_buf);
|
||||
std::swap(resize_buf, resize_flush_buf);
|
||||
}
|
||||
|
||||
std::swap(buf, flush_buf);
|
||||
std::swap(resize_buf, resize_flush_buf);
|
||||
write_to_log++;
|
||||
if (release_latch)
|
||||
latch.wr_unlock();
|
||||
|
||||
DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF " at " LSN_PF,
|
||||
write_lsn, lsn, offset));
|
||||
|
||||
/* Do the write to the log file */
|
||||
log_write_buf(write_buf, length, offset);
|
||||
|
||||
if (UNIV_LIKELY_NULL(resize_buf))
|
||||
resize_write_buf(length);
|
||||
write_lsn= lsn;
|
||||
|
||||
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))
|
||||
{
|
||||
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
|
||||
"InnoDB log write: " LSN_PF, write_lsn);
|
||||
}
|
||||
|
||||
/* Do the write to the log file */
|
||||
log_write_buf(write_buf, length, offset);
|
||||
if (UNIV_LIKELY_NULL(resize_buf))
|
||||
resize_write_buf(length);
|
||||
write_lsn= lsn;
|
||||
}
|
||||
|
||||
set_check_for_checkpoint(false);
|
||||
|
|
|
@ -1773,7 +1773,7 @@ dberr_t recv_sys_t::find_checkpoint()
|
|||
lsn= 0;
|
||||
buf= my_assume_aligned<4096>(log_sys.buf);
|
||||
if (!log_sys.is_pmem())
|
||||
if (dberr_t err= log_sys.log.read(0, {buf, 4096}))
|
||||
if (dberr_t err= log_sys.log.read(0, {buf, log_sys.START_OFFSET}))
|
||||
return err;
|
||||
/* Check the header page checksum. There was no
|
||||
checksum in the first redo log format (version 0). */
|
||||
|
@ -1842,12 +1842,7 @@ dberr_t recv_sys_t::find_checkpoint()
|
|||
for (size_t field= log_t::CHECKPOINT_1; field <= log_t::CHECKPOINT_2;
|
||||
field+= log_t::CHECKPOINT_2 - log_t::CHECKPOINT_1)
|
||||
{
|
||||
if (log_sys.is_pmem())
|
||||
buf= log_sys.buf + field;
|
||||
else
|
||||
if (dberr_t err= log_sys.log.read(field,
|
||||
{buf, log_sys.get_block_size()}))
|
||||
return err;
|
||||
buf= log_sys.buf + field;
|
||||
const lsn_t checkpoint_lsn{mach_read_from_8(buf)};
|
||||
const lsn_t end_lsn{mach_read_from_8(buf + 8)};
|
||||
if (checkpoint_lsn < first_lsn || end_lsn < checkpoint_lsn ||
|
||||
|
@ -4019,7 +4014,7 @@ static bool recv_scan_log(bool last_phase)
|
|||
DBUG_ENTER("recv_scan_log");
|
||||
|
||||
ut_ad(log_sys.is_latest());
|
||||
const size_t block_size_1{log_sys.get_block_size() - 1};
|
||||
const size_t block_size_1{log_sys.write_size - 1};
|
||||
|
||||
mysql_mutex_lock(&recv_sys.mutex);
|
||||
if (!last_phase)
|
||||
|
@ -4201,7 +4196,7 @@ static bool recv_scan_log(bool last_phase)
|
|||
if (recv_sys.is_corrupt_log())
|
||||
break;
|
||||
|
||||
if (recv_sys.offset < log_sys.get_block_size() &&
|
||||
if (recv_sys.offset < log_sys.write_size &&
|
||||
recv_sys.lsn == recv_sys.scanned_lsn)
|
||||
goto got_eof;
|
||||
|
||||
|
@ -4537,6 +4532,24 @@ dberr_t recv_recovery_read_checkpoint()
|
|||
return err;
|
||||
}
|
||||
|
||||
inline void log_t::set_recovered() noexcept
|
||||
{
|
||||
ut_ad(get_flushed_lsn() == get_lsn());
|
||||
ut_ad(recv_sys.lsn == get_lsn());
|
||||
size_t offset{recv_sys.offset};
|
||||
if (!is_pmem())
|
||||
{
|
||||
const size_t bs{log_sys.write_size}, bs_1{bs - 1};
|
||||
memmove_aligned<512>(buf, buf + (offset & ~bs_1), bs);
|
||||
offset&= bs_1;
|
||||
}
|
||||
#ifdef HAVE_PMEM
|
||||
else
|
||||
mprotect(buf, size_t(file_size), PROT_READ | PROT_WRITE);
|
||||
#endif
|
||||
set_buf_free(offset);
|
||||
}
|
||||
|
||||
/** Start recovering from a redo log checkpoint.
|
||||
of first system tablespace page
|
||||
@return error code or DB_SUCCESS */
|
||||
|
@ -4710,22 +4723,7 @@ err_exit:
|
|||
}
|
||||
|
||||
if (!srv_read_only_mode && log_sys.is_latest()) {
|
||||
ut_ad(log_sys.get_flushed_lsn() == log_sys.get_lsn());
|
||||
ut_ad(recv_sys.lsn == log_sys.get_lsn());
|
||||
if (!log_sys.is_pmem()) {
|
||||
const size_t bs_1{log_sys.get_block_size() - 1};
|
||||
const size_t ro{recv_sys.offset};
|
||||
recv_sys.offset &= bs_1;
|
||||
memmove_aligned<64>(log_sys.buf,
|
||||
log_sys.buf + (ro & ~bs_1),
|
||||
log_sys.get_block_size());
|
||||
#ifdef HAVE_PMEM
|
||||
} else {
|
||||
mprotect(log_sys.buf, size_t(log_sys.file_size),
|
||||
PROT_READ | PROT_WRITE);
|
||||
#endif
|
||||
}
|
||||
log_sys.set_buf_free(recv_sys.offset);
|
||||
log_sys.set_recovered();
|
||||
if (recv_needed_recovery
|
||||
&& srv_operation <= SRV_OPERATION_EXPORT_RESTORED
|
||||
&& recv_sys.lsn - log_sys.next_checkpoint_lsn
|
||||
|
|
|
@ -1094,7 +1094,6 @@ static ATTRIBUTE_COLD void os_file_log_buffered()
|
|||
{
|
||||
log_sys.log_maybe_unbuffered= false;
|
||||
log_sys.log_buffered= true;
|
||||
log_sys.set_block_size(512);
|
||||
}
|
||||
# endif
|
||||
|
||||
|
@ -1209,11 +1208,7 @@ os_file_create_func(
|
|||
break;
|
||||
}
|
||||
# ifdef __linux__
|
||||
} else if (type != OS_LOG_FILE) {
|
||||
} else if (log_sys.log_buffered) {
|
||||
skip_o_direct:
|
||||
os_file_log_buffered();
|
||||
} else if (create_mode != OS_FILE_CREATE
|
||||
} else if (type == OS_LOG_FILE && create_mode != OS_FILE_CREATE
|
||||
&& create_mode != OS_FILE_CREATE_SILENT
|
||||
&& !log_sys.is_opened()) {
|
||||
if (stat(name, &st)) {
|
||||
|
@ -1225,15 +1220,16 @@ os_file_create_func(
|
|||
"InnoDB: File %s was not found", name);
|
||||
goto not_found;
|
||||
}
|
||||
log_sys.set_block_size(512);
|
||||
goto skip_o_direct;
|
||||
} else if (!os_file_log_maybe_unbuffered(st)
|
||||
|| log_sys.log_buffered) {
|
||||
skip_o_direct:
|
||||
os_file_log_buffered();
|
||||
} else {
|
||||
direct_flag = O_DIRECT;
|
||||
log_sys.log_maybe_unbuffered = true;
|
||||
}
|
||||
|
||||
if (!os_file_log_maybe_unbuffered(st)) {
|
||||
goto skip_o_direct;
|
||||
}
|
||||
|
||||
direct_flag = O_DIRECT;
|
||||
log_sys.log_maybe_unbuffered= true;
|
||||
# endif
|
||||
}
|
||||
#else
|
||||
|
|
|
@ -175,7 +175,7 @@ static dberr_t create_log_file(bool create_new_db, lsn_t lsn)
|
|||
|
||||
/* We will retain ib_logfile0 until we have written a new logically
|
||||
empty log as ib_logfile101 and atomically renamed it to
|
||||
ib_logfile0 in log_t::rename_resized(). */
|
||||
ib_logfile0 in log_t::resize_rename(). */
|
||||
delete_log_files();
|
||||
|
||||
ut_ad(!os_aio_pending_reads());
|
||||
|
|
Loading…
Reference in a new issue