MDEV-36234: Add innodb_linux_aio

This controls which linux implementation to use for
innodb_use_native_aio=ON.

innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when
it is available, and falling back to innodb_linux_aio=aio when not.

Debian packaging is no longer aio exclusive or uring, so
for those older Debian or Ubuntu releases, its a remove_uring directive.
For more recent releases, add mandatory liburing for consistent packaging.

WITH_LIBAIO is now an independent option from WITH_URING.

LINUX_NATIVE_AIO preprocessor constant is renamed to HAVE_LIBAIO,
analogous to existing HAVE_URING.

tpool::is_aio_supported(): A common feature check.

is_linux_native_aio_supported(): Remove. This had originally been added in
mysql/mysql-server@0da310b69d in 2012
to fix an issue where io_submit() on CentOS 5.5 would return EINVAL
for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE.
But, starting with commit 2e814d4702 InnoDB
temporary tables will be written to innodb_temp_data_file_path.
The 2012 commit said that the error could occur on "old kernels".
Any GNU/Linux distribution that we currently support should be based
on a newer Linux kernel; for example, Red Hat Enterprise Linux 7
was released in 2014.

tpool::create_linux_aio(): Wraps the Linux implementations:
create_libaio() and create_liburing(), each defined in separate
compilation units (aio_linux.cc, aio_libaio.cc, aio_liburing.cc).

The CMake definitions are simplified using target_sources() and
target_compile_definitions(), all available since CMake 2.8.12.
With this change, there is no need to include ${CMAKE_SOURCE_DIR}/tpool
or add TPOOL_DEFINES flags anymore, target_link_libraries(lib tpool)
does all that.

This is joint work with Daniel Black and Vladislav Vaintroub.
This commit is contained in:
Marko Mäkelä 2025-06-23 13:51:52 +03:00
commit a87bb96ecb
29 changed files with 512 additions and 453 deletions

View file

@ -131,7 +131,6 @@ ENDIF()
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include
${CMAKE_SOURCE_DIR}/storage/innobase/handler
${CMAKE_SOURCE_DIR}/libbinlogevents/include)
INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/tpool)
SET(INNOBASE_SOURCES
btr/btr0btr.cc
@ -445,14 +444,16 @@ MYSQL_ADD_PLUGIN(innobase ${INNOBASE_SOURCES} STORAGE_ENGINE
${ZLIB_LIBRARIES}
${NUMA_LIBRARY}
${LIBSYSTEMD}
${LINKER_SCRIPT})
${LINKER_SCRIPT}
tpool
)
IF(NOT TARGET innobase)
RETURN()
ENDIF()
ADD_DEFINITIONS(${SSL_DEFINES} ${TPOOL_DEFINES})
ADD_DEFINITIONS(${SSL_DEFINES})
# A GCC bug causes crash when compiling these files on ARM64 with -O1+
# Compile them with -O0 as a workaround.
IF(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64"
@ -541,7 +542,7 @@ IF(INNODB_ENABLE_XAP_UNLOCK_UNMODIFIED_FOR_PRIMARY)
ENDIF()
IF(NOT (PLUGIN_INNOBASE STREQUAL DYNAMIC))
TARGET_LINK_LIBRARIES(innobase tpool mysys)
TARGET_LINK_LIBRARIES(innobase mysys)
ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)
ENDIF()

View file

@ -315,6 +315,25 @@ static TYPELIB innodb_stats_method_typelib = {
NULL
};
/** Possible values for system variable "innodb_linux_aio" */
#ifdef __linux__
const char* innodb_linux_aio_names[] = {
"auto", /* SRV_LINUX_AIO_AUTO */
"io_uring", /* SRV_LINUX_AIO_IO_URING */
"aio", /* SRV_LINUX_AIO_LIBAIO */
NullS
};
/** Used to define an enumerate type of the system variable
innodb_linux_aio. Used by mariadb-backup too. */
TYPELIB innodb_linux_aio_typelib = {
array_elements(innodb_linux_aio_names) - 1,
"innodb_linux_aio_typelib",
innodb_linux_aio_names,
NULL
};
#endif
/** Possible values of the parameter innodb_checksum_algorithm */
const char* innodb_checksum_algorithm_names[] = {
"crc32",
@ -4059,12 +4078,8 @@ static int innodb_init_params()
log_sys.log_buffered= true;
#endif
#if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32
/* Currently native AIO is supported only on windows and linux
and that also when the support is compiled in. In all other
cases, we ignore the setting of innodb_use_native_aio. */
srv_use_native_aio= FALSE;
#endif
if (!tpool::supports_native_aio())
srv_use_native_aio= FALSE;
#ifdef _WIN32
switch (srv_file_flush_method) {
@ -19664,6 +19679,15 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
#ifdef __linux__
static MYSQL_SYSVAR_ENUM(linux_aio, srv_linux_aio_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"Specifies which Linux AIO implementation should be used."
" Possible value are \"auto\" (default) to select io_uring"
" and fallback to aio, or explicit \"io_uring\" or \"aio\"",
nullptr, nullptr, SRV_LINUX_AIO_AUTO, &innodb_linux_aio_typelib);
#endif
#ifdef HAVE_LIBNUMA
static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@ -20059,6 +20083,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(tmpdir),
MYSQL_SYSVAR(autoinc_lock_mode),
MYSQL_SYSVAR(use_native_aio),
#ifdef __linux__
MYSQL_SYSVAR(linux_aio),
#endif
#ifdef HAVE_LIBNUMA
MYSQL_SYSVAR(numa_interleave),
#endif /* HAVE_LIBNUMA */

View file

@ -77,6 +77,19 @@ enum srv_flush_t
#endif
};
/** Possible values of innodb_linux_aio */
#ifdef __linux__
enum srv_linux_aio_t
{
/** auto, io_uring first and then aio */
SRV_LINUX_AIO_AUTO,
/** io_uring */
SRV_LINUX_AIO_IO_URING,
/** aio (libaio interface) */
SRV_LINUX_AIO_LIBAIO
};
#endif
/** innodb_flush_method */
extern ulong srv_file_flush_method;

View file

@ -178,6 +178,12 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
extern ulong srv_linux_aio_method;
#endif
extern my_bool srv_numa_interleave;
/* Use atomic writes i.e disable doublewrite buffer */

View file

@ -52,10 +52,6 @@ Created 10/21/1995 Heikki Tuuri
#include <tpool_structs.h>
#ifdef LINUX_NATIVE_AIO
#include <libaio.h>
#endif /* LINUX_NATIVE_AIO */
#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
# include <fcntl.h>
# include <linux/falloc.h>
@ -3079,132 +3075,6 @@ static void write_io_callback(void *c)
write_slots->release(cb);
}
#ifdef LINUX_NATIVE_AIO
/** Checks if the system supports native linux aio. On some kernel
versions where native aio is supported it won't work on tmpfs. In such
cases we can't use native aio.
@return: true if supported, false otherwise. */
static bool is_linux_native_aio_supported()
{
File fd;
io_context_t io_ctx;
std::string log_file_path = get_log_file_path();
memset(&io_ctx, 0, sizeof(io_ctx));
if (io_setup(1, &io_ctx)) {
/* The platform does not support native aio. */
return(false);
}
else if (!srv_read_only_mode) {
/* Now check if tmpdir supports native aio ops. */
fd = mysql_tmpfile("ib");
if (fd < 0) {
ib::warn()
<< "Unable to create temp file to check"
" native AIO support.";
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
}
else {
fd = my_open(log_file_path.c_str(), O_RDONLY | O_CLOEXEC,
MYF(0));
if (fd == -1) {
ib::warn() << "Unable to open \"" << log_file_path
<< "\" to check native"
<< " AIO read support.";
int ret = io_destroy(io_ctx);
ut_a(ret != EINVAL);
ut_ad(ret != EFAULT);
return(false);
}
}
struct io_event io_event;
memset(&io_event, 0x0, sizeof(io_event));
byte* ptr = static_cast<byte*>(aligned_malloc(srv_page_size,
srv_page_size));
struct iocb iocb;
/* Suppress valgrind warning. */
memset(ptr, 0, srv_page_size);
memset(&iocb, 0x0, sizeof(iocb));
struct iocb* p_iocb = &iocb;
if (!srv_read_only_mode) {
io_prep_pwrite(p_iocb, fd, ptr, srv_page_size, 0);
}
else {
ut_a(srv_page_size >= 512);
io_prep_pread(p_iocb, fd, ptr, 512, 0);
}
int err = io_submit(io_ctx, 1, &p_iocb);
if (err >= 1) {
/* Now collect the submitted IO request. */
err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
}
aligned_free(ptr);
my_close(fd, MYF(MY_WME));
switch (err) {
case 1:
{
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(true);
}
case -EINVAL:
case -ENOSYS:
ib::warn()
<< "Linux Native AIO not supported. You can either"
" move "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< " to a file system that supports native"
" AIO or you can set innodb_use_native_aio to"
" FALSE to avoid this message.";
/* fall through. */
default:
ib::warn()
<< "Linux Native AIO check on "
<< (srv_read_only_mode ? log_file_path : "tmpdir")
<< "returned error[" << -err << "]";
}
int ret = io_destroy(io_ctx);
ut_a(ret != -EINVAL);
ut_ad(ret != -EFAULT);
return(false);
}
#endif
int os_aio_init() noexcept
{
int max_write_events= int(srv_n_write_io_threads *
@ -3212,41 +3082,41 @@ int os_aio_init() noexcept
int max_read_events= int(srv_n_read_io_threads *
OS_AIO_N_PENDING_IOS_PER_THREAD);
int max_events= max_read_events + max_write_events;
int ret;
#if LINUX_NATIVE_AIO
if (srv_use_native_aio && !is_linux_native_aio_supported())
goto disable;
#endif
int ret= 1;
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events);
#ifdef LINUX_NATIVE_AIO
if (ret)
if (srv_use_native_aio)
{
ut_ad(srv_use_native_aio);
disable:
ib::warn() << "Linux Native AIO disabled.";
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
}
tpool::aio_implementation aio_impl= tpool::OS_IO_DEFAULT;
#ifdef __linux__
compile_time_assert(SRV_LINUX_AIO_IO_URING == (srv_linux_aio_t)tpool::OS_IO_URING);
compile_time_assert(SRV_LINUX_AIO_LIBAIO == (srv_linux_aio_t) tpool::OS_IO_LIBAIO);
compile_time_assert(SRV_LINUX_AIO_AUTO == (srv_linux_aio_t) tpool::OS_IO_DEFAULT);
aio_impl=(tpool::aio_implementation) srv_linux_aio_method;
#endif
#ifdef HAVE_URING
ret= srv_thread_pool->configure_aio(srv_use_native_aio, max_events,
aio_impl);
if (ret)
{
srv_use_native_aio= false;
sql_print_warning("InnoDB: native AIO failed: falling back to"
" innodb_use_native_aio=OFF");
}
else
sql_print_information("InnoDB: Using %s", srv_thread_pool
->get_aio_implementation());
}
if (ret)
{
ut_ad(srv_use_native_aio);
ib::warn()
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
srv_use_native_aio= false;
ret= srv_thread_pool->configure_aio(false, max_events);
}
#endif
ret= srv_thread_pool->configure_aio(false, max_events,
tpool::OS_IO_DEFAULT);
if (!ret)
{
read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
write_slots= new io_slots(max_write_events, srv_n_write_io_threads);
}
else
sql_print_error("InnoDB: Cannot initialize AIO sub-system");
return ret;
}
@ -3285,8 +3155,8 @@ int os_aio_resize(ulint n_reader_threads, ulint n_writer_threads) noexcept
int max_write_events= int(n_writer_threads * OS_AIO_N_PENDING_IOS_PER_THREAD);
int events= max_read_events + max_write_events;
/** Do the Linux AIO dance (this will try to create a new
io context with changed max_events ,etc*/
/* Do the Linux AIO dance (this will try to create a new
io context with changed max_events, etc.) */
int ret= srv_thread_pool->reconfigure_aio(srv_use_native_aio, events);

View file

@ -137,6 +137,10 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
my_bool srv_use_native_aio;
#ifdef __linux__
/* This enum is defined which linux native io method to use */
ulong srv_linux_aio_method;
#endif
my_bool srv_numa_interleave;
/** copy of innodb_use_atomic_writes; @see innodb_init_params() */
my_bool srv_use_atomic_writes;

View file

@ -1287,22 +1287,9 @@ dberr_t srv_start(bool create_new_db)
}
if (os_aio_init()) {
ib::error() << "Cannot initialize AIO sub-system";
return(srv_init_abort(DB_ERROR));
}
#ifdef LINUX_NATIVE_AIO
if (srv_use_native_aio) {
ib::info() << "Using Linux native AIO";
}
#endif
#ifdef HAVE_URING
if (srv_use_native_aio) {
ib::info() << "Using liburing";
}
#endif
fil_system.create(srv_file_per_table ? 50000 : 5000);
if (buf_pool.create()) {