MDEV-33545: Improve innodb_doublewrite to cover NO_FSYNC

In commit 24648768b4 (MDEV-30136)
the parameter innodb_flush_method was deprecated, with no direct
replacement for innodb_flush_method=O_DIRECT_NO_FSYNC.

Let us change innodb_doublewrite from Boolean to ENUM that can
be changed while the server is running:

OFF: Assume that writes of innodb_page_size are atomic
ON: Prevent torn writes (the default)
fast: Like ON, but avoid synchronizing writes to data files

The deprecated start-up parameter innodb_flush_method=NO_FSYNC will cause
innodb_doublewrite=ON to be changed to innodb_doublewrite=fast,
which will prevent InnoDB from making any durable writes to data files.
This would normally be done right before the log checkpoint LSN is updated.
Depending on the file systems being used and their configuration,
this may or may not be safe.

The value innodb_doublewrite=fast differs from the previous combination of
innodb_doublewrite=ON and innodb_flush_method=O_DIRECT_NO_FSYNC by always
invoking os_file_flush() on the doublewrite buffer itself
in buf_dblwr_t::flush_buffered_writes_completed(). This should be safer
when there are multiple doublewrite batches between checkpoints.
Typically, once per second, buf_flush_page_cleaner() would write out
up to innodb_io_capacity pages and advance the log checkpoint.
Also typically, innodb_io_capacity>128, which is the size of the
doublewrite buffer in pages. Should os_file_flush_func() not be invoked
between doublewrite batches, writes could be reordered in an unsafe way.

The setting innodb_doublewrite=fast could be safe when the doublewrite
buffer (the first file of the system tablespace) and the data files
reside in the same file system.

This was tested by running "./mtr --rr innodb.alter_kill". On the first
server startup, with innodb_doublewrite=fast, os_file_flush_func()
would only be invoked on the ibdata1 file and possibly ib_logfile0.
On subsequent startups with innodb_doublewrite=OFF, os_file_flush_func()
will be invoked on the individual data files during log_checkpoint().

Note: The setting debug_no_sync (in the code, my_disable_sync) would
disable all durable writes to InnoDB files, which would be much less safe.

IORequest::Type: Introduce special values WRITE_DBL and PUNCH_DBL
for asynchronous writes that are submitted via the doublewrite buffer.
In this way, fil_space_t::use_doublewrite() or buf_dblwr.in_use()
will only be consulted during buf_page_t::flush() and the doublewrite
buffer can be enabled or disabled without any fear of inconsistency.

buf_dblwr_t::block_size: Replaces block_size().

buf_dblwr_t::flush_buffered_writes(): If !in_use() and the doublewrite
buffer is empty, just invoke fil_flush_file_spaces() and return. The
doublewrite buffer could have been disabled while a batch was in
progress.

innodb_init_params(): If innodb_flush_method=O_DIRECT_NO_FSYNC,
set innodb_doublewrite=fast or innodb_doublewrite=fearless.

Thanks to Mark Callaghan for reporting this, and Vladislav Vaintroub
for feedback.
This commit is contained in:
Marko Mäkelä 2024-04-04 08:12:54 +03:00
parent fec2fd6add
commit 1122ac978e
18 changed files with 193 additions and 172 deletions

View file

@ -380,8 +380,8 @@ static my_bool opt_check_privileges;
extern const char *innodb_checksum_algorithm_names[];
extern TYPELIB innodb_checksum_algorithm_typelib;
extern const char *innodb_flush_method_names[];
extern TYPELIB innodb_flush_method_typelib;
extern TYPELIB innodb_doublewrite_typelib;
/** Ignored option */
static ulong innodb_flush_method;
@ -1859,8 +1859,8 @@ struct my_option xb_server_options[] =
&innobase_data_home_dir, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0},
{"innodb_doublewrite", OPT_INNODB_DOUBLEWRITE,
"Enable InnoDB doublewrite buffer during --prepare.",
(G_PTR*) &srv_use_doublewrite_buf,
(G_PTR*) &srv_use_doublewrite_buf, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
(G_PTR*) &buf_dblwr.use, (G_PTR*) &buf_dblwr.use,
&innodb_doublewrite_typelib, GET_ENUM, OPT_ARG, 0, 0, 0, 0, 0, 0},
{"innodb_io_capacity", OPT_INNODB_IO_CAPACITY,
"Number of IOPs the server can do. Tunes the background IO rate",
(G_PTR*) &srv_io_capacity, (G_PTR*) &srv_io_capacity,

View file

@ -1,3 +1,7 @@
SELECT @@innodb_doublewrite;
@@innodb_doublewrite
OFF
SET GLOBAL innodb_doublewrite=fast;
#
# Bug#16720368 INNODB CRASHES ON BROKEN #SQL*.IBD FILE AT STARTUP
#
@ -12,7 +16,10 @@ connection default;
disconnect con1;
# Corrupt FIL_PAGE_TYPE in bug16720368.ibd,
# and recompute innodb_checksum_algorithm=crc32
# restart
# restart: --innodb-flush-method=O_DIRECT
SELECT @@innodb_doublewrite;
@@innodb_doublewrite
OFF
SELECT COUNT(*) FROM bug16720368;
ERROR HY000: Table `test`.`bug16720368` is corrupted. Please drop the table and recreate.
INSERT INTO bug16720368 VALUES(1);

View file

@ -1 +1 @@
--innodb-doublewrite=false
--innodb-flush-method=O_DIRECT_NO_FSYNC --skip-innodb-doublewrite

View file

@ -7,6 +7,9 @@
let MYSQLD_DATADIR=`select @@datadir`;
let PAGE_SIZE=`select @@innodb_page_size`;
SELECT @@innodb_doublewrite;
SET GLOBAL innodb_doublewrite=fast;
-- disable_query_log
call mtr.add_suppression("InnoDB: innodb_force_recovery is on.");
call mtr.add_suppression("InnoDB: Ignoring tablespace for.*bug16720368");
@ -73,8 +76,11 @@ syswrite(FILE, $page, $ps)==$ps || die "Unable to write $file\n";
close(FILE) || die "Unable to close $file";
EOF
-- let $restart_parameters=--innodb-flush-method=O_DIRECT
-- source include/start_mysqld.inc
-- let $restart_parameters=
SELECT @@innodb_doublewrite;
--error ER_TABLE_CORRUPT
SELECT COUNT(*) FROM bug16720368;
--error ER_TABLE_CORRUPT

View file

@ -1,33 +1,25 @@
'#---------------------BS_STVARS_026_01----------------------#'
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
'#---------------------BS_STVARS_026_02----------------------#'
SELECT @@GLOBAL.innodb_doublewrite;
@@GLOBAL.innodb_doublewrite
ON
SET @@GLOBAL.innodb_doublewrite=0;
SELECT @@GLOBAL.innodb_doublewrite;
@@GLOBAL.innodb_doublewrite
OFF
SET @@GLOBAL.innodb_doublewrite=2;
SET @@GLOBAL.innodb_doublewrite=3;
ERROR 42000: Variable 'innodb_doublewrite' can't be set to the value of '3'
SELECT @@GLOBAL.innodb_doublewrite;
@@GLOBAL.innodb_doublewrite
fast
SET @@GLOBAL.innodb_doublewrite=1;
ERROR HY000: Variable 'innodb_doublewrite' is a read only variable
Expected error 'Read only variable'
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
'#---------------------BS_STVARS_026_03----------------------#'
SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
1
1 Expected
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
SELECT @@GLOBAL.innodb_doublewrite;
@@GLOBAL.innodb_doublewrite
ON
SELECT COUNT(VARIABLE_VALUE)
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
COUNT(VARIABLE_VALUE)
1
1 Expected
'#---------------------BS_STVARS_026_04----------------------#'
SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite;
@@innodb_doublewrite = @@GLOBAL.innodb_doublewrite
@ -48,6 +40,5 @@ SELECT COUNT(@@GLOBAL.innodb_doublewrite);
COUNT(@@GLOBAL.innodb_doublewrite)
1
1 Expected
SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
ERROR 42S22: Unknown column 'innodb_doublewrite' in 'field list'
Expected error 'Readonly variable'
SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite;
ERROR HY000: Variable 'innodb_doublewrite' is a GLOBAL variable

View file

@ -503,14 +503,14 @@ VARIABLE_NAME INNODB_DOUBLEWRITE
SESSION_VALUE NULL
DEFAULT_VALUE ON
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BOOLEAN
VARIABLE_COMMENT Enable InnoDB doublewrite buffer (enabled by default). Disable with --skip-innodb-doublewrite.
VARIABLE_TYPE ENUM
VARIABLE_COMMENT Whether and how to use the doublewrite buffer. OFF=Assume that writes of innodb_page_size are atomic; ON=Prevent torn writes (the default); fast=Like ON, but do not synchronize writes to data files
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST OFF,ON
READ_ONLY YES
COMMAND_LINE_ARGUMENT NONE
ENUM_VALUE_LIST OFF,ON,fast
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_ENCRYPTION_ROTATE_KEY_AGE
SESSION_VALUE NULL
DEFAULT_VALUE 1

View file

@ -0,0 +1 @@
--innodb-doublewrite

View file

@ -1,75 +1,20 @@
################## mysql-test\t\innodb_doublewrite_basic.test #################
# #
# Variable Name: innodb_doublewrite #
# Scope: Global #
# Access Type: Static #
# Data Type: boolean #
# #
# #
# Creation Date: 2008-02-07 #
# Author : Sharique Abdullah #
# #
# #
# Description:Test Cases of Dynamic System Variable innodb_doublewrite #
# that checks the behavior of this variable in the following ways #
# * Value Check #
# * Scope Check #
# #
# Reference: http://dev.mysql.com/doc/refman/5.1/en/ #
# server-system-variables.html #
# #
###############################################################################
--source include/have_innodb.inc
--echo '#---------------------BS_STVARS_026_01----------------------#'
####################################################################
# Displaying default value #
####################################################################
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
SELECT @@GLOBAL.innodb_doublewrite;
SET @@GLOBAL.innodb_doublewrite=0;
SELECT @@GLOBAL.innodb_doublewrite;
--echo '#---------------------BS_STVARS_026_02----------------------#'
####################################################################
# Check if Value can set #
####################################################################
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SET @@GLOBAL.innodb_doublewrite=2;
--error ER_WRONG_VALUE_FOR_VAR
SET @@GLOBAL.innodb_doublewrite=3;
SELECT @@GLOBAL.innodb_doublewrite;
SET @@GLOBAL.innodb_doublewrite=1;
--echo Expected error 'Read only variable'
SELECT @@GLOBAL.innodb_doublewrite;
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
--echo '#---------------------BS_STVARS_026_03----------------------#'
#################################################################
# Check if the value in GLOBAL Table matches value in variable #
#################################################################
--disable_warnings
SELECT IF(@@GLOBAL.innodb_doublewrite, "ON", "OFF") = VARIABLE_VALUE
SELECT COUNT(VARIABLE_VALUE)
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
--enable_warnings
--echo 1 Expected
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
--disable_warnings
SELECT COUNT(VARIABLE_VALUE)
FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
WHERE VARIABLE_NAME='innodb_doublewrite';
--enable_warnings
--echo 1 Expected
--echo '#---------------------BS_STVARS_026_04----------------------#'
################################################################################
@ -78,8 +23,6 @@ WHERE VARIABLE_NAME='innodb_doublewrite';
SELECT @@innodb_doublewrite = @@GLOBAL.innodb_doublewrite;
--echo 1 Expected
--echo '#---------------------BS_STVARS_026_05----------------------#'
################################################################################
# Check if innodb_doublewrite can be accessed with and without @@ sign #
@ -99,8 +42,5 @@ SELECT COUNT(@@SESSION.innodb_doublewrite);
SELECT COUNT(@@GLOBAL.innodb_doublewrite);
--echo 1 Expected
--Error ER_BAD_FIELD_ERROR
SELECT innodb_doublewrite = @@SESSION.innodb_doublewrite;
--echo Expected error 'Readonly variable'
--error ER_INCORRECT_GLOBAL_LOCAL_VAR
SELECT @@innodb_doublewrite = @@SESSION.innodb_doublewrite;

View file

@ -53,6 +53,7 @@ void buf_dblwr_t::init()
active_slot= &slots[0];
mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr);
pthread_cond_init(&cond, nullptr);
block_size= FSP_EXTENT_SIZE;
}
}
@ -67,7 +68,7 @@ inline void buf_dblwr_t::init(const byte *header)
block1= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK1));
block2= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK2));
const uint32_t buf_size= 2 * block_size();
const uint32_t buf_size= 2 * block_size;
for (int i= 0; i < 2; i++)
{
slots[i].write_buf= static_cast<byte*>
@ -86,7 +87,7 @@ bool buf_dblwr_t::create()
return true;
mtr_t mtr;
const ulint size= block_size();
const ulint size= block_size;
start_again:
mtr.start();
@ -251,7 +252,7 @@ loads the pages from double write buffer into memory.
dberr_t buf_dblwr_t::init_or_load_pages(pfs_os_file_t file, const char *path)
{
ut_ad(this == &buf_dblwr);
const uint32_t size= block_size();
const uint32_t size= block_size;
/* We do the file i/o past the buffer pool */
byte *read_buf= static_cast<byte*>(aligned_malloc(srv_page_size,
@ -488,7 +489,6 @@ void buf_dblwr_t::write_completed()
mysql_mutex_lock(&mutex);
ut_ad(is_created());
ut_ad(srv_use_doublewrite_buf);
ut_ad(batch_running);
slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
ut_ad(flush_slot->reserved);
@ -574,7 +574,7 @@ static void buf_dblwr_check_block(const buf_page_t *bpage)
bool buf_dblwr_t::flush_buffered_writes(const ulint size)
{
mysql_mutex_assert_owner(&mutex);
ut_ad(size == block_size());
ut_ad(size == block_size);
for (;;)
{
@ -647,7 +647,6 @@ static void *get_frame(const IORequest &request)
void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
{
ut_ad(this == &buf_dblwr);
ut_ad(srv_use_doublewrite_buf);
ut_ad(is_created());
ut_ad(!srv_read_only_mode);
ut_ad(!request.bpage);
@ -670,8 +669,14 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
pages_written+= flush_slot->first_free;
mysql_mutex_unlock(&mutex);
/* Now flush the doublewrite buffer data to disk */
fil_system.sys_space->flush<false>();
/* Make the doublewrite durable. Note: The doublewrite buffer is
always in the first file of the system tablespace. We will not
bother about fil_system.unflushed_spaces, which can result in a
redundant call during fil_flush_file_spaces() in
log_checkpoint(). Writes to the system tablespace should be rare,
except when executing DDL or using the non-default settings
innodb_file_per_table=OFF or innodb_undo_tablespaces=0. */
os_file_flush(request.node->handle);
/* The writes have been flushed to disk now and in recovery we will
find them in the doublewrite buffer blocks. Next, write the data pages. */
@ -714,17 +719,18 @@ posted, and also when we may have to wait for a page latch!
Otherwise a deadlock of threads can occur. */
void buf_dblwr_t::flush_buffered_writes()
{
if (!is_created() || !srv_use_doublewrite_buf)
mysql_mutex_lock(&mutex);
if (!in_use() && active_slot->first_free == 0)
{
mysql_mutex_unlock(&mutex);
fil_flush_file_spaces();
return;
}
ut_ad(!srv_read_only_mode);
const ulint size= block_size();
mysql_mutex_lock(&mutex);
if (!flush_buffered_writes(size))
if (!flush_buffered_writes(block_size))
mysql_mutex_unlock(&mutex);
}
@ -734,8 +740,6 @@ flush_buffered_writes() will be invoked to make space.
@param size payload size in bytes */
void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
{
ut_ad(request.is_async());
ut_ad(request.is_write());
ut_ad(request.bpage);
ut_ad(request.bpage->in_file());
ut_ad(request.node);
@ -744,7 +748,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
ut_ad(request.node->space->referenced());
ut_ad(!srv_read_only_mode);
const ulint buf_size= 2 * block_size();
const ulint buf_size= 2 * block_size;
mysql_mutex_lock(&mutex);
@ -773,7 +777,7 @@ void buf_dblwr_t::add_to_batch(const IORequest &request, size_t size)
ut_ad(active_slot->reserved == active_slot->first_free);
ut_ad(active_slot->reserved < buf_size);
new (active_slot->buf_block_arr + active_slot->first_free++)
element{request, size};
element{request.doublewritten(), size};
active_slot->reserved= active_slot->first_free;
if (active_slot->first_free != buf_size ||

View file

@ -350,9 +350,9 @@ void buf_page_write_complete(const IORequest &request, bool error)
else
{
bpage->write_complete(persistent, error, state);
if (state < buf_page_t::WRITE_FIX_REINIT &&
request.node->space->use_doublewrite())
if (request.is_doublewritten())
{
ut_ad(state < buf_page_t::WRITE_FIX_REINIT);
ut_ad(persistent);
buf_dblwr.write_completed();
}

View file

@ -1231,9 +1231,6 @@ void fil_system_t::create(ulint hash_size)
ut_ad(!is_initialised());
ut_ad(!(srv_page_size % FSP_EXTENT_SIZE));
ut_ad(srv_page_size);
ut_ad(!spaces.array);
m_initialised = true;
compile_time_assert(!(UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX));
compile_time_assert(!(UNIV_PAGE_SIZE_MIN % FSP_EXTENT_SIZE_MIN));
@ -1244,6 +1241,8 @@ void fil_system_t::create(ulint hash_size)
spaces.create(hash_size);
need_unflushed_spaces = !write_through && buf_dblwr.need_fsync();
fil_space_crypt_init();
#ifdef __linux__
ssd.clear();
@ -1317,13 +1316,12 @@ void fil_system_t::close()
if (is_initialised())
{
m_initialised= false;
spaces.free();
mysql_mutex_destroy(&mutex);
fil_space_crypt_cleanup();
}
ut_ad(!spaces.array);
ut_ad(!is_initialised());
#ifdef __linux__
ssd.clear();
@ -1464,6 +1462,7 @@ void fil_system_t::set_write_through(bool write_through)
{
this->write_through= write_through;
fil_space_t::reopen_all();
need_unflushed_spaces = !write_through && buf_dblwr.need_fsync();
}
mysql_mutex_unlock(&mutex);
@ -2833,19 +2832,18 @@ static void fil_invalid_page_access_msg(const char *name,
}
/** Update the data structures on write completion */
inline void fil_node_t::complete_write()
void fil_space_t::complete_write()
{
mysql_mutex_assert_not_owner(&fil_system.mutex);
if (space->purpose != FIL_TYPE_TEMPORARY &&
(!fil_system.is_write_through() && !my_disable_sync) &&
space->set_needs_flush())
if (purpose != FIL_TYPE_TEMPORARY &&
fil_system.use_unflushed_spaces() && set_needs_flush())
{
mysql_mutex_lock(&fil_system.mutex);
if (!space->is_in_unflushed_spaces)
if (!is_in_unflushed_spaces)
{
space->is_in_unflushed_spaces= true;
fil_system.unflushed_spaces.push_front(*space);
is_in_unflushed_spaces= true;
fil_system.unflushed_spaces.push_front(*this);
}
mysql_mutex_unlock(&fil_system.mutex);
}
@ -2945,7 +2943,7 @@ io_error:
if (!type.is_async()) {
if (type.is_write()) {
release_sync_write:
node->complete_write();
complete_write();
release:
release();
goto func_exit;
@ -2965,21 +2963,28 @@ void IORequest::write_complete(int io_error) const
{
ut_ad(fil_validate_skip());
ut_ad(node);
fil_space_t *space= node->space;
ut_ad(is_write());
node->complete_write();
if (!bpage)
{
ut_ad(!srv_read_only_mode);
if (type == IORequest::DBLWR_BATCH)
{
buf_dblwr.flush_buffered_writes_completed(*this);
/* Above, we already invoked os_file_flush() on the
doublewrite buffer if needed. */
goto func_exit;
}
else
ut_ad(type == IORequest::WRITE_ASYNC);
}
else
buf_page_write_complete(*this, io_error);
node->space->release();
space->complete_write();
func_exit:
space->release();
}
void IORequest::read_complete(int io_error) const

View file

@ -352,7 +352,7 @@ static TYPELIB innodb_default_row_format_typelib = {
};
/** Names of allowed values of innodb_flush_method */
const char* innodb_flush_method_names[] = {
static const char* innodb_flush_method_names[] = {
"fsync",
"O_DSYNC",
"littlesync",
@ -380,6 +380,18 @@ TYPELIB innodb_flush_method_typelib = {
/** Deprecated parameter */
static ulong innodb_flush_method;
/** Names of allowed values of innodb_doublewrite */
static const char *innodb_doublewrite_names[]=
{"OFF", "ON", "fast", nullptr};
/** Enumeration of innodb_doublewrite */
TYPELIB innodb_doublewrite_typelib= {
array_elements(innodb_doublewrite_names) - 1,
"innodb_doublewrite_typelib",
innodb_doublewrite_names,
nullptr
};
/** Names of allowed values of innodb_deadlock_report */
static const char *innodb_deadlock_report_names[]= {
"off", /* Do not report any details of deadlocks */
@ -3982,6 +3994,10 @@ static int innodb_init_params()
} else if (innodb_flush_method >= 4 /* O_DIRECT */
IF_WIN(&& innodb_flush_method < 8 /* normal */,)) {
/* O_DIRECT and similar settings do nothing */
if (innodb_flush_method == 5 /* O_DIRECT_NO_FSYNC */
&& buf_dblwr.use) {
buf_dblwr.use = buf_dblwr.USE_FAST;
}
#ifdef O_DIRECT
} else if (srv_use_atomic_writes && my_may_have_atomic_write) {
/* If atomic writes are enabled, do the same as with
@ -18442,6 +18458,12 @@ static void innodb_data_file_write_through_update(THD *, st_mysql_sys_var*,
mysql_mutex_lock(&LOCK_global_system_variables);
}
static void innodb_doublewrite_update(THD *, st_mysql_sys_var*,
void *, const void *save)
{
fil_system.set_use_doublewrite(*static_cast<const ulong*>(save));
}
static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
void *var, const void *save)
{
@ -18775,11 +18797,14 @@ static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
"The common part for InnoDB table spaces.",
NULL, NULL, NULL);
static MYSQL_SYSVAR_BOOL(doublewrite, srv_use_doublewrite_buf,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Enable InnoDB doublewrite buffer (enabled by default)."
" Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
static MYSQL_SYSVAR_ENUM(doublewrite, buf_dblwr.use,
PLUGIN_VAR_OPCMDARG,
"Whether and how to use the doublewrite buffer. "
"OFF=Assume that writes of innodb_page_size are atomic; "
"ON=Prevent torn writes (the default); "
"fast=Like ON, but do not synchronize writes to data files",
nullptr, innodb_doublewrite_update, true,
&innodb_doublewrite_typelib);
static MYSQL_SYSVAR_BOOL(use_atomic_writes, srv_use_atomic_writes,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,

View file

@ -53,9 +53,9 @@ class buf_dblwr_t
element* buf_block_arr;
};
/** the page number of the first doublewrite block (block_size() pages) */
/** the page number of the first doublewrite block (block_size pages) */
page_id_t block1{0, 0};
/** the page number of the second doublewrite block (block_size() pages) */
/** the page number of the second doublewrite block (block_size pages) */
page_id_t block2{0, 0};
/** mutex protecting the data members below */
@ -74,6 +74,22 @@ class buf_dblwr_t
slot slots[2];
slot *active_slot;
/** Size of the doublewrite block in pages */
uint32_t block_size;
public:
/** Values of use */
enum usage {
/** Assume that writes are atomic */
USE_NO= 0,
/** Use the doublewrite buffer with full durability */
USE_YES,
/** Durable writes to the doublewrite buffer, not to data files */
USE_FAST
};
/** The value of innodb_doublewrite */
ulong use;
private:
/** Initialise the persistent storage of the doublewrite buffer.
@param header doublewrite page header in the TRX_SYS page */
inline void init(const byte *header);
@ -126,9 +142,6 @@ public:
@param request the completed batch write request */
void flush_buffered_writes_completed(const IORequest &request);
/** Size of the doublewrite block in pages */
uint32_t block_size() const { return FSP_EXTENT_SIZE; }
/** Schedule a page write. If the doublewrite memory buffer is full,
flush_buffered_writes() will be invoked to make space.
@param request asynchronous write request
@ -139,6 +152,19 @@ public:
bool is_created() const
{ return UNIV_LIKELY(block1 != page_id_t(0, 0)); }
/** @return whether the doublewrite buffer is in use */
bool in_use() const { return is_created() && use; }
/** @return whether fsync() is needed on non-doublewrite pages */
bool need_fsync() const { return use < USE_FAST; }
void set_use(ulong use)
{
ut_ad(use <= USE_FAST);
mysql_mutex_lock(&mutex);
this->use= use;
mysql_mutex_unlock(&mutex);
}
/** @return whether a page identifier is part of the doublewrite buffer */
bool is_inside(const page_id_t id) const
{
@ -147,8 +173,8 @@ public:
ut_ad(block1 < block2);
if (id < block1)
return false;
const uint32_t size= block_size();
return id < block1 + size || (id >= block2 && id < block2 + size);
return id < block1 + block_size ||
(id >= block2 && id < block2 + block_size);
}
/** Wait for flush_buffered_writes() to be fully completed */

View file

@ -48,9 +48,6 @@ struct named_spaces_tag_t;
using space_list_t= ilist<fil_space_t, space_list_tag_t>;
// Forward declaration
extern my_bool srv_use_doublewrite_buf;
/** Undo tablespaces starts with space_id. */
extern uint32_t srv_undo_space_id_start;
/** The number of UNDO tablespaces that are open and ready to use. */
@ -1008,6 +1005,9 @@ public:
/** @return the tablespace name (databasename/tablename) */
name_type name() const;
/** Update the data structures on write completion */
void complete_write();
private:
/** @return whether the file is usable for io() */
ATTRIBUTE_COLD bool prepare_acquired();
@ -1080,9 +1080,6 @@ struct fil_node_t final
@return detached handle or OS_FILE_CLOSED */
inline pfs_os_file_t close_to_free(bool detach_handle= false);
/** Update the data structures on write completion */
inline void complete_write();
private:
/** Does stuff common for close() and detach() */
void prepare_to_close_or_detach();
@ -1090,8 +1087,7 @@ private:
inline bool fil_space_t::use_doublewrite() const
{
return !UT_LIST_GET_FIRST(chain)->atomic_write && srv_use_doublewrite_buf &&
buf_dblwr.is_created();
return !UT_LIST_GET_FIRST(chain)->atomic_write && buf_dblwr.in_use();
}
inline void fil_space_t::set_imported()
@ -1352,9 +1348,9 @@ struct fil_system_t
Some members may require late initialisation, thus we just mark object as
uninitialised. Real initialisation happens in create().
*/
fil_system_t() : m_initialised(false) {}
fil_system_t() {}
bool is_initialised() const { return m_initialised; }
bool is_initialised() const { return spaces.array; }
/**
Create the file system interface at database start.
@ -1367,8 +1363,6 @@ struct fil_system_t
void close();
private:
bool m_initialised;
/** Points to the last opened space in space_list. Protected with
fil_system.mutex. */
fil_space_t *space_list_last_opened= nullptr;
@ -1404,19 +1398,32 @@ public:
/** Map of fil_space_t::id to fil_space_t* */
hash_table_t spaces;
/** whether each write to data files is durable (O_DSYNC) */
/** false=invoke fsync() or fdatasync() on data files before checkpoint;
true=each write is durable (O_DSYNC) */
my_bool write_through;
/** whether data files are buffered (not O_DIRECT) */
my_bool buffered;
/** whether fdatasync() is needed on data files */
Atomic_relaxed<bool> need_unflushed_spaces;
/** Try to enable or disable write-through of data files */
void set_write_through(bool write_through);
/** Update innodb_doublewrite */
void set_use_doublewrite(ulong use)
{
buf_dblwr.set_use(use);
need_unflushed_spaces= !write_through && buf_dblwr.need_fsync();
}
/** Try to enable or disable file system caching of data files */
void set_buffered(bool buffered);
TPOOL_SUPPRESS_TSAN bool is_write_through() const { return write_through; }
TPOOL_SUPPRESS_TSAN bool is_buffered() const { return buffered; }
/** @return whether to update unflushed_spaces */
bool use_unflushed_spaces() const { return need_unflushed_spaces; }
/** tablespaces for which fil_space_t::needs_flush() holds */
sized_ilist<fil_space_t, unflushed_spaces_tag_t> unflushed_spaces;
/** number of currently open files; protected by mutex */

View file

@ -185,10 +185,14 @@ public:
WRITE_SYNC= 16,
/** Asynchronous write */
WRITE_ASYNC= WRITE_SYNC | 1,
/** Asynchronous doublewritten page */
WRITE_DBL= WRITE_ASYNC | 4,
/** A doublewrite batch */
DBLWR_BATCH= WRITE_ASYNC | 8,
/** Write data and punch hole for the rest */
PUNCH= WRITE_ASYNC | 16,
/** Write doublewritten data and punch hole for the rest */
PUNCH_DBL= PUNCH | 4,
/** Zero out a range of bytes in fil_space_t::io() */
PUNCH_RANGE= WRITE_SYNC | 32,
};
@ -204,6 +208,14 @@ public:
bool is_read() const { return (type & READ_SYNC) != 0; }
bool is_write() const { return (type & WRITE_SYNC) != 0; }
bool is_async() const { return (type & (READ_SYNC ^ READ_ASYNC)) != 0; }
bool is_doublewritten() const { return (type & 4) != 0; }
/** Create a write request for the doublewrite buffer. */
IORequest doublewritten() const
{
ut_ad(type == WRITE_ASYNC || type == PUNCH);
return IORequest{bpage, slot, node, Type(type | 4)};
}
void write_complete(int io_error) const;
void read_complete(int io_error) const;

View file

@ -303,7 +303,6 @@ extern my_bool srv_stats_include_delete_marked;
extern unsigned long long srv_stats_modified_counter;
extern my_bool srv_stats_sample_traditional;
extern my_bool srv_use_doublewrite_buf;
extern ulong srv_checksum_algorithm;
extern my_bool srv_force_primary_key;

View file

@ -305,8 +305,6 @@ unsigned long long srv_stats_modified_counter;
based on number of configured pages */
my_bool srv_stats_sample_traditional;
my_bool srv_use_doublewrite_buf;
/** innodb_sync_spin_loops */
ulong srv_n_spin_wait_rounds;
/** innodb_spin_wait_delay */

View file

@ -1193,7 +1193,7 @@ dberr_t srv_start(bool create_new_db)
if (srv_read_only_mode) {
sql_print_information("InnoDB: Started in read only mode");
srv_use_doublewrite_buf = false;
buf_dblwr.use = buf_dblwr.USE_NO;
}
high_level_read_only = srv_read_only_mode