MDEV-24854: Change innodb_flush_method=O_DIRECT by default

We have innodb_use_native_aio=ON by default since the introduction of
that parameter in commit 2f9fb41b05
(MySQL 5.5 and MariaDB 5.5).

However, to really benefit from the setting, the files should be
opened in O_DIRECT mode, to bypass the file system cache.
In this way, the reads and writes can be submitted with DMA, using
the InnoDB buffer pool directly, and no processor cycles need to be
used for copying data. The use of O_DIRECT benefits not only the
current libaio implementation, but also liburing.

os_file_set_nocache(): Test innodb_flush_method in the function,
not in the callers.
This commit is contained in:
Marko Mäkelä 2021-02-20 11:58:58 +02:00
parent 43b239a081
commit 420f8e24ab
7 changed files with 27 additions and 39 deletions

View file

@ -1506,7 +1506,7 @@ struct my_option xb_server_options[] =
"With which method to flush data.",
&srv_file_flush_method, &srv_file_flush_method,
&innodb_flush_method_typelib, GET_ENUM, REQUIRED_ARG,
IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC), 0, 0, 0, 0, 0},
IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_O_DIRECT), 0, 0, 0, 0, 0},
{"innodb_log_buffer_size", OPT_INNODB_LOG_BUFFER_SIZE,
"The size of the buffer which InnoDB uses to write log to the log files on disk.",

View file

@ -1,7 +1,7 @@
call mtr.add_suppression("InnoDB: Failed to set .*DIRECT");
select @@innodb_flush_method;
@@innodb_flush_method
fsync
O_DIRECT
create table t(a serial) engine=innodb;
FLUSH TABLES;
# restart: --innodb-flush-method=5

View file

@ -743,7 +743,7 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_FLUSH_METHOD
SESSION_VALUE NULL
DEFAULT_VALUE fsync
DEFAULT_VALUE O_DIRECT
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE ENUM
VARIABLE_COMMENT With which method to flush data.

View file

@ -6,9 +6,9 @@ machine="Linux-x64"
# InnoDB tests
./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=1 --skip-innodb-doublewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
./run-all-tests --suffix=-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --log
./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --innodb_flush_method=O_DIRECT --innodb_log_files_in_group=1 --skip-innodb-doublewrite" --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
./run-all-tests --suffix=_fast-innodb --comments="Engine=InnoDB --innodb_buffer_pool_size=256M --innodb_log_file_size=2000M --innodb_log_buffer_size=16M --innodb_lock_wait_timeout=50 --innodb_flush_log_at_trx_commit=1 --create-options="ENGINE=InnoDB" --hw="$hw" --optimization="$optimization" --machine="$machine" --fast --log
# MyISAM tests

View file

@ -18308,7 +18308,7 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
static MYSQL_SYSVAR_ENUM(flush_method, innodb_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"With which method to flush data.",
NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_FSYNC),
NULL, NULL, IF_WIN(SRV_ALL_O_DIRECT_FSYNC, SRV_O_DIRECT),
&innodb_flush_method_typelib);
static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,

View file

@ -61,13 +61,11 @@ enum srv_flush_t
/** do not flush after writing */
SRV_NOSYNC,
/** invoke os_file_set_nocache() on data files. This implies using
non-buffered IO but still using fsync, the reason for which is that
some FS do not flush meta-data when unbuffered IO happens */
unbuffered I/O but still fdatasync(), because some filesystems might
not flush meta-data on write completion */
SRV_O_DIRECT,
/** do not use fsync() when using direct IO i.e.: it can be set to
avoid the fsync() call that we make when using SRV_UNIX_O_DIRECT.
However, in this case user/DBA should be sure about the integrity of
the meta-data */
/** Like O_DIRECT, but skip fdatasync(), assuming that the data is
durable on write completion */
SRV_O_DIRECT_NO_FSYNC
#ifdef _WIN32
/** Traditional Windows appoach to open all files without caching,

View file

@ -2,7 +2,7 @@
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2013, 2020, MariaDB Corporation.
Copyright (c) 2013, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
@ -1112,12 +1112,8 @@ os_file_create_simple_func(
/* This function is always called for data files, we should disable
OS caching (O_DIRECT) here as we do in os_file_create_func(), so
we open the same file in the same mode, see man page of open(2). */
if (!srv_read_only_mode
&& *success
&& (srv_file_flush_method == SRV_O_DIRECT
|| srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
os_file_set_nocache(file, name, mode_str);
if (!srv_read_only_mode && *success) {
os_file_set_nocache(file, name, mode_str);
}
#ifdef USE_FILE_LOCK
@ -1426,11 +1422,8 @@ os_file_create_func(
if (!read_only
&& *success
&& type != OS_LOG_FILE && type != OS_DATA_TEMP_FILE
&& type != OS_DATA_FILE_NO_O_DIRECT
&& (srv_file_flush_method == SRV_O_DIRECT
|| srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC)) {
os_file_set_nocache(file, name, mode_str);
&& type != OS_DATA_FILE_NO_O_DIRECT) {
os_file_set_nocache(file, name, mode_str);
}
#ifdef USE_FILE_LOCK
@ -3484,6 +3477,15 @@ os_file_set_nocache(
const char* file_name MY_ATTRIBUTE((unused)),
const char* operation_name MY_ATTRIBUTE((unused)))
{
const auto innodb_flush_method = srv_file_flush_method;
switch (innodb_flush_method) {
case SRV_O_DIRECT:
case SRV_O_DIRECT_NO_FSYNC:
break;
default:
return;
}
/* some versions of Solaris may not have DIRECTIO_ON */
#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
if (directio(fd, DIRECTIO_ON) == -1) {
@ -3502,23 +3504,11 @@ os_file_set_nocache(
if (errno_save == EINVAL) {
if (!warning_message_printed) {
warning_message_printed = true;
# ifdef UNIV_LINUX
ib::warn()
<< "Failed to set O_DIRECT on file"
<< file_name << "; " << operation_name
<< ": " << strerror(errno_save) << ", "
"continuing anyway. O_DIRECT is "
"known to result in 'Invalid argument' "
"on Linux on tmpfs, "
"see MySQL Bug#26662.";
# else /* UNIV_LINUX */
goto short_warning;
# endif /* UNIV_LINUX */
ib::info()
<< "Setting O_DIRECT on file "
<< file_name << " failed";
}
} else {
# ifndef UNIV_LINUX
short_warning:
# endif
ib::warn()
<< "Failed to set O_DIRECT on file "
<< file_name << "; " << operation_name