mirror of
https://github.com/MariaDB/server.git
synced 2025-01-26 08:44:33 +01:00
MDEV-33545: Improve innodb_doublewrite to cover NO_FSYNC
In commit 24648768b4
(MDEV-30136)
the parameter innodb_flush_method was deprecated, with no direct
replacement for innodb_flush_method=O_DIRECT_NO_FSYNC.
Let us change innodb_doublewrite from Boolean to ENUM that can
be changed while the server is running:
OFF: Assume that writes of innodb_page_size are atomic
ON: Prevent torn writes (the default)
fast: Like ON, but avoid synchronizing writes to data files
The deprecated start-up parameter innodb_flush_method=NO_FSYNC will cause
innodb_doublewrite=ON to be changed to innodb_doublewrite=fast,
which will prevent InnoDB from making any durable writes to data files.
This would normally be done right before the log checkpoint LSN is updated.
Depending on the file systems being used and their configuration,
this may or may not be safe.
The value innodb_doublewrite=fast differs from the previous combination of
innodb_doublewrite=ON and innodb_flush_method=O_DIRECT_NO_FSYNC by always
invoking os_file_flush() on the doublewrite buffer itself
in buf_dblwr_t::flush_buffered_writes_completed(). This should be safer
when there are multiple doublewrite batches between checkpoints.
Typically, once per second, buf_flush_page_cleaner() would write out
up to innodb_io_capacity pages and advance the log checkpoint.
Also typically, innodb_io_capacity>128, which is the size of the
doublewrite buffer in pages. Should os_file_flush_func() not be invoked
between doublewrite batches, writes could be reordered in an unsafe way.
The setting innodb_doublewrite=fast could be safe when the doublewrite
buffer (the first file of the system tablespace) and the data files
reside in the same file system.
This was tested by running "./mtr --rr innodb.alter_kill". On the first
server startup, with innodb_doublewrite=fast, os_file_flush_func()
would only be invoked on the ibdata1 file and possibly ib_logfile0.
On subsequent startups with innodb_doublewrite=OFF, os_file_flush_func()
will be invoked on the individual data files during log_checkpoint().
Note: The setting debug_no_sync (in the code, my_disable_sync) would
disable all durable writes to InnoDB files, which would be much less safe.
IORequest::Type: Introduce special values WRITE_DBL and PUNCH_DBL
for asynchronous writes that are submitted via the doublewrite buffer.
In this way, fil_space_t::use_doublewrite() or buf_dblwr.in_use()
will only be consulted during buf_page_t::flush() and the doublewrite
buffer can be enabled or disabled without any fear of inconsistency.
buf_dblwr_t::block_size: Replaces block_size().
buf_dblwr_t::flush_buffered_writes(): If !in_use() and the doublewrite
buffer is empty, just invoke fil_flush_file_spaces() and return. The
doublewrite buffer could have been disabled while a batch was in
progress.
innodb_init_params(): If innodb_flush_method=O_DIRECT_NO_FSYNC,
set innodb_doublewrite=fast or innodb_doublewrite=fearless.
Thanks to Mark Callaghan for reporting this, and Vladislav Vaintroub
for feedback.
This commit is contained in:
parent
fec2fd6add
commit
1122ac978e
18 changed files with 193 additions and 172 deletions
extra/mariabackup
mysql-test/suite
innodb
sys_vars
storage/innobase
|
@ -380,8 +380,8 @@ static my_bool opt_check_privileges;
|
|||
|
||||
extern const char *innodb_checksum_algorithm_names[];
|
||||
extern TYPELIB innodb_checksum_algorithm_typelib;
|
||||
extern const char *innodb_flush_method_names[];
|
||||
extern TYPELIB innodb_flush_method_typelib;
|
||||
extern TYPELIB innodb_doublewrite_typelib;
|
||||
/** Ignored option */
|
||||
static ulong innodb_flush_method;
|
||||
|
||||
|
@ -1859,8 +1859,8 @@ struct my_option xb_server_options[] =
|
|||
&innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
|
||||
{"innodb_doublewrite", OPT_INNODB_DOUBLEWRITE,
|
||||
"Enable InnoDB doublewrite buffer during --prepare.",
|
||||
(G_PTR*) &srv_use_doublewrite_buf,
|
||||
(G_PTR*) &srv_use_doublewrite_buf, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
|
||||
(G_PTR*) &buf_dblwr.use, (G_PTR*) &buf_dblwr.use,
|
||||
&innodb_doublewrite_typelib, GET_ENUM, OPT_ARG, 0, 0, 0, 0, 0, 0},
|
||||
{"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
|
||||
"Number of IOPs the server can do. Tunes the background IO rate",
|
||||
(G_PTR*) &srv_io_capacity, (G_PTR*) &srv_io_capacity,
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
SELECT @@innodb_doublewrite;
|
||||
@@innodb_doublewrite
|
||||
OFF
|
||||
SET GLOBAL innodb_doublewrite=fast;
|
||||
#
|
||||
# Bug#16720368 INNODB CRASHES ON BROKEN #SQL*.IBD FILE AT STARTUP
|
||||
#
|
||||
|
@ -12,7 +16,10 @@ connection default;
|
|||
disconnect con1;
|
||||
# Corrupt FIL_PAGE_TYPE in bug16720368.ibd,
|
||||
# and recompute innodb_checksum_algorithm=crc32
|
||||
# restart
|
||||
# restart: --innodb-flush-method=O_DIRECT
|
||||
SELECT @@innodb_doublewrite;
|
||||
@@innodb_doublewrite
|
||||
OFF
|
||||
SELECT COUNT(*) FROM bug16720368;
|
||||
ERROR HY000: Table `test`.`bug16720368` is corrupted. Please drop the table and recreate.
|
||||
INSERT INTO bug16720368 VALUES(1);
|
||||
|
|
|
@ -1 +1 @@
|
|||
--innodb-doublewrite=false
|
||||
--innodb-flush-method=O_DIRECT_NO_FSYNC --skip-innodb-doublewrite
|
||||
|
|
|
@ -7,6 +7,9 @@
|
|||
let MYSQLD_DATADIR=`select @@datadir`;
|
||||
let PAGE_SIZE=`select @@innodb_page_size`;
|
||||
|
||||
SELECT @@innodb_doublewrite;
|
||||
SET GLOBAL innodb_doublewrite=fast;
|
||||
|
||||
-- disable_query_log
|
||||
call mtr.add_suppression("InnoDB: innodb_force_recovery is on.");
|
||||
call mtr.add_suppression("InnoDB: Ignoring tablespace for.*bug16720368");
|
||||
|
@ -73,8 +76,11 @@ syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
|
|||
close(FILE) || die "Unable to close $file";
|
||||
EOF
|
||||
|
||||
-- let $restart_parameters=--innodb-flush-method=O_DIRECT
|
||||
-- source include/start_mysqld.inc
|
||||
-- let $restart_parameters=
|
||||
|
||||
SELECT @@innodb_doublewrite;
|
||||
--error ER_TABLE_CORRUPT
|
||||
SELECT COUNT(*) FROM bug16720368;
|
||||
--error ER_TABLE_CORRUPT
|
||||
|
|
|
@ -1,33 +1,25 @@
|
|||
'#---------------------BS_STVARS_026_01----------------------#'
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
COUNT(@@GLOBAL.innodb_doublewrite)
|
||||
1
|
||||
1 Expected
|
||||
'#---------------------BS_STVARS_026_02----------------------#'
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
@@GLOBAL.innodb_doublewrite
|
||||
ON
|
||||
SET @@GLOBAL.innodb_doublewrite=0;
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
@@GLOBAL.innodb_doublewrite
|
||||
OFF
|
||||
SET @@GLOBAL.innodb_doublewrite=2;
|
||||
SET @@GLOBAL.innodb_doublewrite=3;
|
||||
ERROR 42000: Variable 'innodb_doublewrite' can't be set to the value of '3'
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
@@GLOBAL.innodb_doublewrite
|
||||
fast
|
||||
SET @@GLOBAL.innodb_doublewrite=1;
|
||||
ERROR HY000: Variable 'innodb_doublewrite' is a read only variable
|
||||
Expected error 'Read only variable'
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
COUNT(@@GLOBAL.innodb_doublewrite)
|
||||
1
|
||||
1 Expected
|
||||
'#---------------------BS_STVARS_026_03----------------------#'
|
||||
SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
|
||||
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
|
||||
WHERE VARIABLE_NAME='innodb_doublewrite';
|
||||
IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
|
||||
1
|
||||
1 Expected
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
COUNT(@@GLOBAL.innodb_doublewrite)
|
||||
1
|
||||
1 Expected
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
@@GLOBAL.innodb_doublewrite
|
||||
ON
|
||||
SELECT COUNT(VARIABLE_VALUE)
|
||||
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
|
||||
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
|
||||
WHERE VARIABLE_NAME='innodb_doublewrite';
|
||||
COUNT(VARIABLE_VALUE)
|
||||
1
|
||||
1 Expected
|
||||
'#---------------------BS_STVARS_026_04----------------------#'
|
||||
SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite;
|
||||
@@innodb_doublewrite = @@GLOBAL.innodb_doublewrite
|
||||
|
@ -48,6 +40,5 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
|||
COUNT(@@GLOBAL.innodb_doublewrite)
|
||||
1
|
||||
1 Expected
|
||||
SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
|
||||
ERROR 42S22: Unknown column 'innodb_doublewrite' in 'field list'
|
||||
Expected error 'Readonly variable'
|
||||
SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite;
|
||||
ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable
|
||||
|
|
|
@ -503,14 +503,14 @@ VARIABLE_NAME INNODB_DOUBLEWRITE
|
|||
SESSION_VALUE NULL
|
||||
DEFAULT_VALUE ON
|
||||
VARIABLE_SCOPE GLOBAL
|
||||
VARIABLE_TYPE BOOLEAN
|
||||
VARIABLE_COMMENT Enable InnoDB doublewrite buffer (enabled by default). Disable with --skip-innodb-doublewrite.
|
||||
VARIABLE_TYPE ENUM
|
||||
VARIABLE_COMMENT Whether and how to use the doublewrite buffer. OFF=Assume that writes of innodb_page_size are atomic; ON=Prevent torn writes (the default); fast=Like ON, but do not synchronize writes to data files
|
||||
NUMERIC_MIN_VALUE NULL
|
||||
NUMERIC_MAX_VALUE NULL
|
||||
NUMERIC_BLOCK_SIZE NULL
|
||||
ENUM_VALUE_LIST OFF,ON
|
||||
READ_ONLY YES
|
||||
COMMAND_LINE_ARGUMENT NONE
|
||||
ENUM_VALUE_LIST OFF,ON,fast
|
||||
READ_ONLY NO
|
||||
COMMAND_LINE_ARGUMENT OPTIONAL
|
||||
VARIABLE_NAME INNODB_ENCRYPTION_ROTATE_KEY_AGE
|
||||
SESSION_VALUE NULL
|
||||
DEFAULT_VALUE 1
|
||||
|
|
1
mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.opt
Normal file
1
mysql-test/suite/sys_vars/t/innodb_doublewrite_basic.opt
Normal file
|
@ -0,0 +1 @@
|
|||
--innodb-doublewrite
|
|
@ -1,75 +1,20 @@
|
|||
|
||||
|
||||
################## mysql-test\t\innodb_doublewrite_basic.test #################
|
||||
# #
|
||||
# Variable Name: innodb_doublewrite #
|
||||
# Scope: Global #
|
||||
# Access Type: Static #
|
||||
# Data Type: boolean #
|
||||
# #
|
||||
# #
|
||||
# Creation Date: 2008-02-07 #
|
||||
# Author : Sharique Abdullah #
|
||||
# #
|
||||
# #
|
||||
# Description:Test Cases of Dynamic System Variable innodb_doublewrite #
|
||||
# that checks the behavior of this variable in the following ways #
|
||||
# * Value Check #
|
||||
# * Scope Check #
|
||||
# #
|
||||
# Reference: http://dev.mysql.com/doc/refman/5.1/en/ #
|
||||
# server-system-variables.html #
|
||||
# #
|
||||
###############################################################################
|
||||
|
||||
--source include/have_innodb.inc
|
||||
|
||||
--echo '#---------------------BS_STVARS_026_01----------------------#'
|
||||
####################################################################
|
||||
# Displaying default value #
|
||||
####################################################################
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
--echo 1 Expected
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
|
||||
SET @@GLOBAL.innodb_doublewrite=0;
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
|
||||
--echo '#---------------------BS_STVARS_026_02----------------------#'
|
||||
####################################################################
|
||||
# Check if Value can set #
|
||||
####################################################################
|
||||
|
||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||
SET @@GLOBAL.innodb_doublewrite=2;
|
||||
--error ER_WRONG_VALUE_FOR_VAR
|
||||
SET @@GLOBAL.innodb_doublewrite=3;
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
SET @@GLOBAL.innodb_doublewrite=1;
|
||||
--echo Expected error 'Read only variable'
|
||||
SELECT @@GLOBAL.innodb_doublewrite;
|
||||
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
--echo 1 Expected
|
||||
|
||||
|
||||
|
||||
|
||||
--echo '#---------------------BS_STVARS_026_03----------------------#'
|
||||
#################################################################
|
||||
# Check if the value in GLOBAL Table matches value in variable #
|
||||
#################################################################
|
||||
|
||||
--disable_warnings
|
||||
SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
|
||||
SELECT COUNT(VARIABLE_VALUE)
|
||||
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
|
||||
WHERE VARIABLE_NAME='innodb_doublewrite';
|
||||
--enable_warnings
|
||||
--echo 1 Expected
|
||||
|
||||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
--echo 1 Expected
|
||||
|
||||
--disable_warnings
|
||||
SELECT COUNT(VARIABLE_VALUE)
|
||||
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
|
||||
WHERE VARIABLE_NAME='innodb_doublewrite';
|
||||
--enable_warnings
|
||||
--echo 1 Expected
|
||||
|
||||
|
||||
|
||||
--echo '#---------------------BS_STVARS_026_04----------------------#'
|
||||
################################################################################
|
||||
|
@ -78,8 +23,6 @@ WHERE VARIABLE_NAME='innodb_doublewrite';
|
|||
SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite;
|
||||
--echo 1 Expected
|
||||
|
||||
|
||||
|
||||
--echo '#---------------------BS_STVARS_026_05----------------------#'
|
||||
################################################################################
|
||||
# Check if innodb_doublewrite can be accessed with and without @@ sign #
|
||||
|
@ -99,8 +42,5 @@ SELECT COUNT(@@SESSION.innodb_doublewrite);
|
|||
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
|
||||
--echo 1 Expected
|
||||
|
||||
--Error ER_BAD_FIELD_ERROR
|
||||
SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
|
||||
--echo Expected error 'Readonly variable'
|
||||
|
||||
|
||||
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
|
||||
SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite;
|
||||
|
|
|
@ -53,6 +53,7 @@ void buf_dblwr_t::init()
|
|||
active_slot= &slots[0];
|
||||
mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr);
|
||||
pthread_cond_init(&cond, nullptr);
|
||||
block_size= FSP_EXTENT_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,7 +68,7 @@ inline void buf_dblwr_t::init(const byte *header)
|
|||
block1= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK1));
|
||||
block2= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK2));
|
||||
|
||||
const uint32_t buf_size= 2 * block_size();
|
||||
const uint32_t buf_size= 2 * block_size;
|
||||
for (int i= 0; i < 2; i++)
|
||||
{
|
||||
slots[i].write_buf= static_cast<byte*>
|
||||
|
@ -86,7 +87,7 @@ bool buf_dblwr_t::create()
|
|||
return true;
|
||||
|
||||
mtr_t mtr;
|
||||
const ulint size= block_size();
|
||||
const ulint size= block_size;
|
||||
|
||||
start_again:
|
||||
mtr.start();
|
||||
|
@ -251,7 +252,7 @@ loads the pages from double write buffer into memory.
|
|||
dberr_t buf_dblwr_t::init_or_load_pages(pfs_os_file_t file, const char *path)
|
||||
{
|
||||
ut_ad(this == &buf_dblwr);
|
||||
const uint32_t size= block_size();
|
||||
const uint32_t size= block_size;
|
||||
|
||||
/* We do the file i/o past the buffer pool */
|
||||
byte *read_buf= static_cast<byte*>(aligned_malloc(srv_page_size,
|
||||
|
@ -488,7 +489,6 @@ void buf_dblwr_t::write_completed()
|
|||
mysql_mutex_lock(&mutex);
|
||||
|
||||
ut_ad(is_created());
|
||||
ut_ad(srv_use_doublewrite_buf);
|
||||
ut_ad(batch_running);
|
||||
slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
|
||||
ut_ad(flush_slot->reserved);
|
||||
|
@ -574,7 +574,7 @@ static void buf_dblwr_check_block(const buf_page_t *bpage)
|
|||
bool buf_dblwr_t::flush_buffered_writes(const ulint size)
|
||||
{
|
||||
mysql_mutex_assert_owner(&mutex);
|
||||
ut_ad(size == block_size());
|
||||
ut_ad(size == block_size);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
@ -647,7 +647,6 @@ static void *get_frame(const IORequest &request)
|
|||
void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
|
||||
{
|
||||
ut_ad(this == &buf_dblwr);
|
||||
ut_ad(srv_use_doublewrite_buf);
|
||||
ut_ad(is_created());
|
||||
ut_ad(!srv_read_only_mode);
|
||||
ut_ad(!request.bpage);
|
||||
|
@ -670,8 +669,14 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
|
|||
pages_written+= flush_slot->first_free;
|
||||
mysql_mutex_unlock(&mutex);
|
||||
|
||||
/* Now flush the doublewrite buffer data to disk */
|
||||
fil_system.sys_space->flush<false>();
|
||||
/* Make the doublewrite durable. Note: The doublewrite buffer is
|
||||
always in the first file of the system tablespace. We will not
|
||||
bother about fil_system.unflushed_spaces, which can result in a
|
||||
redundant call during fil_flush_file_spaces() in
|
||||
log_checkpoint(). Writes to the system tablespace should be rare,
|
||||
except when executing DDL or using the non-default settings
|
||||
innodb_file_per_table=OFF or innodb_undo_tablespaces=0. */
|
||||
os_file_flush(request.node->handle);
|
||||
|
||||
/* The writes have been flushed to disk now and in recovery we will
|
||||
find them in the doublewrite buffer blocks. Next, write the data pages. */
|
||||
|
@ -714,17 +719,18 @@ posted, and also when we may have to wait for a page latch!
|
|||
Otherwise a deadlock of threads can occur. */
|
||||
void buf_dblwr_t::flush_buffered_writes()
|
||||
{
|
||||
if (!is_created() || !srv_use_doublewrite_buf)
|
||||
mysql_mutex_lock(&mutex);
|
||||
|
||||
if (!in_use() && active_slot->first_free == 0)
|
||||
{
|
||||
mysql_mutex_unlock(&mutex);
|
||||
fil_flush_file_spaces();
|
||||
return;
|
||||
}
|
||||
|
||||
ut_ad(!srv_read_only_mode);
|
||||
const ulint size= block_size();
|
||||
|
||||
mysql_mutex_lock(&mutex);
|
||||
if (!flush_buffered_writes(size))
|
||||
if (!flush_buffered_writes(block_size))
|
||||
mysql_mutex_unlock(&mutex);
|
||||
}
|
||||
|
||||
|
@ -734,8 +740,6 @@ flush_buffered_writes() will be invoked to make space.
|
|||
@param size payload size in bytes */
|
||||
void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
|
||||
{
|
||||
ut_ad(request.is_async());
|
||||
ut_ad(request.is_write());
|
||||
ut_ad(request.bpage);
|
||||
ut_ad(request.bpage->in_file());
|
||||
ut_ad(request.node);
|
||||
|
@ -744,7 +748,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
|
|||
ut_ad(request.node->space->referenced());
|
||||
ut_ad(!srv_read_only_mode);
|
||||
|
||||
const ulint buf_size= 2 * block_size();
|
||||
const ulint buf_size= 2 * block_size;
|
||||
|
||||
mysql_mutex_lock(&mutex);
|
||||
|
||||
|
@ -773,7 +777,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
|
|||
ut_ad(active_slot->reserved == active_slot->first_free);
|
||||
ut_ad(active_slot->reserved < buf_size);
|
||||
new (active_slot->buf_block_arr + active_slot->first_free++)
|
||||
element{request, size};
|
||||
element{request.doublewritten(), size};
|
||||
active_slot->reserved= active_slot->first_free;
|
||||
|
||||
if (active_slot->first_free != buf_size ||
|
||||
|
|
|
@ -350,9 +350,9 @@ void buf_page_write_complete(const IORequest &request, bool error)
|
|||
else
|
||||
{
|
||||
bpage->write_complete(persistent, error, state);
|
||||
if (state < buf_page_t::WRITE_FIX_REINIT &&
|
||||
request.node->space->use_doublewrite())
|
||||
if (request.is_doublewritten())
|
||||
{
|
||||
ut_ad(state < buf_page_t::WRITE_FIX_REINIT);
|
||||
ut_ad(persistent);
|
||||
buf_dblwr.write_completed();
|
||||
}
|
||||
|
|
|
@ -1231,9 +1231,6 @@ void fil_system_t::create(ulint hash_size)
|
|||
ut_ad(!is_initialised());
|
||||
ut_ad(!(srv_page_size % FSP_EXTENT_SIZE));
|
||||
ut_ad(srv_page_size);
|
||||
ut_ad(!spaces.array);
|
||||
|
||||
m_initialised = true;
|
||||
|
||||
compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX));
|
||||
compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN));
|
||||
|
@ -1244,6 +1241,8 @@ void fil_system_t::create(ulint hash_size)
|
|||
|
||||
spaces.create(hash_size);
|
||||
|
||||
need_unflushed_spaces = !write_through && buf_dblwr.need_fsync();
|
||||
|
||||
fil_space_crypt_init();
|
||||
#ifdef __linux__
|
||||
ssd.clear();
|
||||
|
@ -1317,13 +1316,12 @@ void fil_system_t::close()
|
|||
|
||||
if (is_initialised())
|
||||
{
|
||||
m_initialised= false;
|
||||
spaces.free();
|
||||
mysql_mutex_destroy(&mutex);
|
||||
fil_space_crypt_cleanup();
|
||||
}
|
||||
|
||||
ut_ad(!spaces.array);
|
||||
ut_ad(!is_initialised());
|
||||
|
||||
#ifdef __linux__
|
||||
ssd.clear();
|
||||
|
@ -1464,6 +1462,7 @@ void fil_system_t::set_write_through(bool write_through)
|
|||
{
|
||||
this->write_through= write_through;
|
||||
fil_space_t::reopen_all();
|
||||
need_unflushed_spaces = !write_through && buf_dblwr.need_fsync();
|
||||
}
|
||||
|
||||
mysql_mutex_unlock(&mutex);
|
||||
|
@ -2833,19 +2832,18 @@ static void fil_invalid_page_access_msg(const char *name,
|
|||
}
|
||||
|
||||
/** Update the data structures on write completion */
|
||||
inline void fil_node_t::complete_write()
|
||||
void fil_space_t::complete_write()
|
||||
{
|
||||
mysql_mutex_assert_not_owner(&fil_system.mutex);
|
||||
|
||||
if (space->purpose != FIL_TYPE_TEMPORARY &&
|
||||
(!fil_system.is_write_through() && !my_disable_sync) &&
|
||||
space->set_needs_flush())
|
||||
if (purpose != FIL_TYPE_TEMPORARY &&
|
||||
fil_system.use_unflushed_spaces() && set_needs_flush())
|
||||
{
|
||||
mysql_mutex_lock(&fil_system.mutex);
|
||||
if (!space->is_in_unflushed_spaces)
|
||||
if (!is_in_unflushed_spaces)
|
||||
{
|
||||
space->is_in_unflushed_spaces= true;
|
||||
fil_system.unflushed_spaces.push_front(*space);
|
||||
is_in_unflushed_spaces= true;
|
||||
fil_system.unflushed_spaces.push_front(*this);
|
||||
}
|
||||
mysql_mutex_unlock(&fil_system.mutex);
|
||||
}
|
||||
|
@ -2945,7 +2943,7 @@ io_error:
|
|||
if (!type.is_async()) {
|
||||
if (type.is_write()) {
|
||||
release_sync_write:
|
||||
node->complete_write();
|
||||
complete_write();
|
||||
release:
|
||||
release();
|
||||
goto func_exit;
|
||||
|
@ -2965,21 +2963,28 @@ void IORequest::write_complete(int io_error) const
|
|||
{
|
||||
ut_ad(fil_validate_skip());
|
||||
ut_ad(node);
|
||||
fil_space_t *space= node->space;
|
||||
ut_ad(is_write());
|
||||
node->complete_write();
|
||||
|
||||
if (!bpage)
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
if (type == IORequest::DBLWR_BATCH)
|
||||
{
|
||||
buf_dblwr.flush_buffered_writes_completed(*this);
|
||||
/* Above, we already invoked os_file_flush() on the
|
||||
doublewrite buffer if needed. */
|
||||
goto func_exit;
|
||||
}
|
||||
else
|
||||
ut_ad(type == IORequest::WRITE_ASYNC);
|
||||
}
|
||||
else
|
||||
buf_page_write_complete(*this, io_error);
|
||||
|
||||
node->space->release();
|
||||
space->complete_write();
|
||||
func_exit:
|
||||
space->release();
|
||||
}
|
||||
|
||||
void IORequest::read_complete(int io_error) const
|
||||
|
|
|
@ -352,7 +352,7 @@ static TYPELIB innodb_default_row_format_typelib = {
|
|||
};
|
||||
|
||||
/** Names of allowed values of innodb_flush_method */
|
||||
const char* innodb_flush_method_names[] = {
|
||||
static const char* innodb_flush_method_names[] = {
|
||||
"fsync",
|
||||
"O_DSYNC",
|
||||
"littlesync",
|
||||
|
@ -380,6 +380,18 @@ TYPELIB innodb_flush_method_typelib = {
|
|||
/** Deprecated parameter */
|
||||
static ulong innodb_flush_method;
|
||||
|
||||
/** Names of allowed values of innodb_doublewrite */
|
||||
static const char *innodb_doublewrite_names[]=
|
||||
{"OFF", "ON", "fast", nullptr};
|
||||
|
||||
/** Enumeration of innodb_doublewrite */
|
||||
TYPELIB innodb_doublewrite_typelib= {
|
||||
array_elements(innodb_doublewrite_names) - 1,
|
||||
"innodb_doublewrite_typelib",
|
||||
innodb_doublewrite_names,
|
||||
nullptr
|
||||
};
|
||||
|
||||
/** Names of allowed values of innodb_deadlock_report */
|
||||
static const char *innodb_deadlock_report_names[]= {
|
||||
"off", /* Do not report any details of deadlocks */
|
||||
|
@ -3982,6 +3994,10 @@ static int innodb_init_params()
|
|||
} else if (innodb_flush_method >= 4 /* O_DIRECT */
|
||||
IF_WIN(&& innodb_flush_method < 8 /* normal */,)) {
|
||||
/* O_DIRECT and similar settings do nothing */
|
||||
if (innodb_flush_method == 5 /* O_DIRECT_NO_FSYNC */
|
||||
&& buf_dblwr.use) {
|
||||
buf_dblwr.use = buf_dblwr.USE_FAST;
|
||||
}
|
||||
#ifdef O_DIRECT
|
||||
} else if (srv_use_atomic_writes && my_may_have_atomic_write) {
|
||||
/* If atomic writes are enabled, do the same as with
|
||||
|
@ -18442,6 +18458,12 @@ static void innodb_data_file_write_through_update(THD *, st_mysql_sys_var*,
|
|||
mysql_mutex_lock(&LOCK_global_system_variables);
|
||||
}
|
||||
|
||||
static void innodb_doublewrite_update(THD *, st_mysql_sys_var*,
|
||||
void *, const void *save)
|
||||
{
|
||||
fil_system.set_use_doublewrite(*static_cast<const ulong*>(save));
|
||||
}
|
||||
|
||||
static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
|
||||
void *var, const void *save)
|
||||
{
|
||||
|
@ -18775,11 +18797,14 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
|
|||
"The common part for InnoDB table spaces.",
|
||||
NULL, NULL, NULL);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
"Enable InnoDB doublewrite buffer (enabled by default)."
|
||||
" Disable with --skip-innodb-doublewrite.",
|
||||
NULL, NULL, TRUE);
|
||||
static MYSQL_SYSVAR_ENUM(doublewrite, buf_dblwr.use,
|
||||
PLUGIN_VAR_OPCMDARG,
|
||||
"Whether and how to use the doublewrite buffer. "
|
||||
"OFF=Assume that writes of innodb_page_size are atomic; "
|
||||
"ON=Prevent torn writes (the default); "
|
||||
"fast=Like ON, but do not synchronize writes to data files",
|
||||
nullptr, innodb_doublewrite_update, true,
|
||||
&innodb_doublewrite_typelib);
|
||||
|
||||
static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
|
||||
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
|
||||
|
|
|
@ -53,9 +53,9 @@ class buf_dblwr_t
|
|||
element* buf_block_arr;
|
||||
};
|
||||
|
||||
/** the page number of the first doublewrite block (block_size() pages) */
|
||||
/** the page number of the first doublewrite block (block_size pages) */
|
||||
page_id_t block1{0, 0};
|
||||
/** the page number of the second doublewrite block (block_size() pages) */
|
||||
/** the page number of the second doublewrite block (block_size pages) */
|
||||
page_id_t block2{0, 0};
|
||||
|
||||
/** mutex protecting the data members below */
|
||||
|
@ -74,6 +74,22 @@ class buf_dblwr_t
|
|||
slot slots[2];
|
||||
slot *active_slot;
|
||||
|
||||
/** Size of the doublewrite block in pages */
|
||||
uint32_t block_size;
|
||||
|
||||
public:
|
||||
/** Values of use */
|
||||
enum usage {
|
||||
/** Assume that writes are atomic */
|
||||
USE_NO= 0,
|
||||
/** Use the doublewrite buffer with full durability */
|
||||
USE_YES,
|
||||
/** Durable writes to the doublewrite buffer, not to data files */
|
||||
USE_FAST
|
||||
};
|
||||
/** The value of innodb_doublewrite */
|
||||
ulong use;
|
||||
private:
|
||||
/** Initialise the persistent storage of the doublewrite buffer.
|
||||
@param header doublewrite page header in the TRX_SYS page */
|
||||
inline void init(const byte *header);
|
||||
|
@ -126,9 +142,6 @@ public:
|
|||
@param request the completed batch write request */
|
||||
void flush_buffered_writes_completed(const IORequest &request);
|
||||
|
||||
/** Size of the doublewrite block in pages */
|
||||
uint32_t block_size() const { return FSP_EXTENT_SIZE; }
|
||||
|
||||
/** Schedule a page write. If the doublewrite memory buffer is full,
|
||||
flush_buffered_writes() will be invoked to make space.
|
||||
@param request asynchronous write request
|
||||
|
@ -139,6 +152,19 @@ public:
|
|||
bool is_created() const
|
||||
{ return UNIV_LIKELY(block1 != page_id_t(0, 0)); }
|
||||
|
||||
/** @return whether the doublewrite buffer is in use */
|
||||
bool in_use() const { return is_created() && use; }
|
||||
/** @return whether fsync() is needed on non-doublewrite pages */
|
||||
bool need_fsync() const { return use < USE_FAST; }
|
||||
|
||||
void set_use(ulong use)
|
||||
{
|
||||
ut_ad(use <= USE_FAST);
|
||||
mysql_mutex_lock(&mutex);
|
||||
this->use= use;
|
||||
mysql_mutex_unlock(&mutex);
|
||||
}
|
||||
|
||||
/** @return whether a page identifier is part of the doublewrite buffer */
|
||||
bool is_inside(const page_id_t id) const
|
||||
{
|
||||
|
@ -147,8 +173,8 @@ public:
|
|||
ut_ad(block1 < block2);
|
||||
if (id < block1)
|
||||
return false;
|
||||
const uint32_t size= block_size();
|
||||
return id < block1 + size || (id >= block2 && id < block2 + size);
|
||||
return id < block1 + block_size ||
|
||||
(id >= block2 && id < block2 + block_size);
|
||||
}
|
||||
|
||||
/** Wait for flush_buffered_writes() to be fully completed */
|
||||
|
|
|
@ -48,9 +48,6 @@ struct named_spaces_tag_t;
|
|||
|
||||
using space_list_t= ilist<fil_space_t, space_list_tag_t>;
|
||||
|
||||
// Forward declaration
|
||||
extern my_bool srv_use_doublewrite_buf;
|
||||
|
||||
/** Undo tablespaces starts with space_id. */
|
||||
extern uint32_t srv_undo_space_id_start;
|
||||
/** The number of UNDO tablespaces that are open and ready to use. */
|
||||
|
@ -1008,6 +1005,9 @@ public:
|
|||
/** @return the tablespace name (databasename/tablename) */
|
||||
name_type name() const;
|
||||
|
||||
/** Update the data structures on write completion */
|
||||
void complete_write();
|
||||
|
||||
private:
|
||||
/** @return whether the file is usable for io() */
|
||||
ATTRIBUTE_COLD bool prepare_acquired();
|
||||
|
@ -1080,9 +1080,6 @@ struct fil_node_t final
|
|||
@return detached handle or OS_FILE_CLOSED */
|
||||
inline pfs_os_file_t close_to_free(bool detach_handle= false);
|
||||
|
||||
/** Update the data structures on write completion */
|
||||
inline void complete_write();
|
||||
|
||||
private:
|
||||
/** Does stuff common for close() and detach() */
|
||||
void prepare_to_close_or_detach();
|
||||
|
@ -1090,8 +1087,7 @@ private:
|
|||
|
||||
inline bool fil_space_t::use_doublewrite() const
|
||||
{
|
||||
return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf &&
|
||||
buf_dblwr.is_created();
|
||||
return !UT_LIST_GET_FIRST(chain)->atomic_write && buf_dblwr.in_use();
|
||||
}
|
||||
|
||||
inline void fil_space_t::set_imported()
|
||||
|
@ -1352,9 +1348,9 @@ struct fil_system_t
|
|||
Some members may require late initialisation, thus we just mark object as
|
||||
uninitialised. Real initialisation happens in create().
|
||||
*/
|
||||
fil_system_t() : m_initialised(false) {}
|
||||
fil_system_t() {}
|
||||
|
||||
bool is_initialised() const { return m_initialised; }
|
||||
bool is_initialised() const { return spaces.array; }
|
||||
|
||||
/**
|
||||
Create the file system interface at database start.
|
||||
|
@ -1367,8 +1363,6 @@ struct fil_system_t
|
|||
void close();
|
||||
|
||||
private:
|
||||
bool m_initialised;
|
||||
|
||||
/** Points to the last opened space in space_list. Protected with
|
||||
fil_system.mutex. */
|
||||
fil_space_t *space_list_last_opened= nullptr;
|
||||
|
@ -1404,19 +1398,32 @@ public:
|
|||
/** Map of fil_space_t::id to fil_space_t* */
|
||||
hash_table_t spaces;
|
||||
|
||||
/** whether each write to data files is durable (O_DSYNC) */
|
||||
/** false=invoke fsync() or fdatasync() on data files before checkpoint;
|
||||
true=each write is durable (O_DSYNC) */
|
||||
my_bool write_through;
|
||||
/** whether data files are buffered (not O_DIRECT) */
|
||||
my_bool buffered;
|
||||
/** whether fdatasync() is needed on data files */
|
||||
Atomic_relaxed<bool> need_unflushed_spaces;
|
||||
|
||||
/** Try to enable or disable write-through of data files */
|
||||
void set_write_through(bool write_through);
|
||||
/** Update innodb_doublewrite */
|
||||
void set_use_doublewrite(ulong use)
|
||||
{
|
||||
buf_dblwr.set_use(use);
|
||||
need_unflushed_spaces= !write_through && buf_dblwr.need_fsync();
|
||||
}
|
||||
|
||||
/** Try to enable or disable file system caching of data files */
|
||||
void set_buffered(bool buffered);
|
||||
|
||||
TPOOL_SUPPRESS_TSAN bool is_write_through() const { return write_through; }
|
||||
TPOOL_SUPPRESS_TSAN bool is_buffered() const { return buffered; }
|
||||
|
||||
/** @return whether to update unflushed_spaces */
|
||||
bool use_unflushed_spaces() const { return need_unflushed_spaces; }
|
||||
|
||||
/** tablespaces for which fil_space_t::needs_flush() holds */
|
||||
sized_ilist<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
|
||||
/** number of currently open files; protected by mutex */
|
||||
|
|
|
@ -185,10 +185,14 @@ public:
|
|||
WRITE_SYNC= 16,
|
||||
/** Asynchronous write */
|
||||
WRITE_ASYNC= WRITE_SYNC | 1,
|
||||
/** Asynchronous doublewritten page */
|
||||
WRITE_DBL= WRITE_ASYNC | 4,
|
||||
/** A doublewrite batch */
|
||||
DBLWR_BATCH= WRITE_ASYNC | 8,
|
||||
/** Write data and punch hole for the rest */
|
||||
PUNCH= WRITE_ASYNC | 16,
|
||||
/** Write doublewritten data and punch hole for the rest */
|
||||
PUNCH_DBL= PUNCH | 4,
|
||||
/** Zero out a range of bytes in fil_space_t::io() */
|
||||
PUNCH_RANGE= WRITE_SYNC | 32,
|
||||
};
|
||||
|
@ -204,6 +208,14 @@ public:
|
|||
bool is_read() const { return (type & READ_SYNC) != 0; }
|
||||
bool is_write() const { return (type & WRITE_SYNC) != 0; }
|
||||
bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
|
||||
bool is_doublewritten() const { return (type & 4) != 0; }
|
||||
|
||||
/** Create a write request for the doublewrite buffer. */
|
||||
IORequest doublewritten() const
|
||||
{
|
||||
ut_ad(type == WRITE_ASYNC || type == PUNCH);
|
||||
return IORequest{bpage, slot, node, Type(type | 4)};
|
||||
}
|
||||
|
||||
void write_complete(int io_error) const;
|
||||
void read_complete(int io_error) const;
|
||||
|
|
|
@ -303,7 +303,6 @@ extern my_bool srv_stats_include_delete_marked;
|
|||
extern unsigned long long srv_stats_modified_counter;
|
||||
extern my_bool srv_stats_sample_traditional;
|
||||
|
||||
extern my_bool srv_use_doublewrite_buf;
|
||||
extern ulong srv_checksum_algorithm;
|
||||
|
||||
extern my_bool srv_force_primary_key;
|
||||
|
|
|
@ -305,8 +305,6 @@ unsigned long long srv_stats_modified_counter;
|
|||
based on number of configured pages */
|
||||
my_bool srv_stats_sample_traditional;
|
||||
|
||||
my_bool srv_use_doublewrite_buf;
|
||||
|
||||
/** innodb_sync_spin_loops */
|
||||
ulong srv_n_spin_wait_rounds;
|
||||
/** innodb_spin_wait_delay */
|
||||
|
|
|
@ -1193,7 +1193,7 @@ dberr_t srv_start(bool create_new_db)
|
|||
|
||||
if (srv_read_only_mode) {
|
||||
sql_print_information("InnoDB: Started in read only mode");
|
||||
srv_use_doublewrite_buf = false;
|
||||
buf_dblwr.use = buf_dblwr.USE_NO;
|
||||
}
|
||||
|
||||
high_level_read_only = srv_read_only_mode
|
||||
|
|
Loading…
Add table
Reference in a new issue