mariadb/tpool/aio_libaio.cc
Marko Mäkelä a87bb96ecb MDEV-36234: Add innodb_linux_aio
This controls which linux implementation to use for
innodb_use_native_aio=ON.

innodb_linux_aio=auto is equivalent to innodb_linux_aio=io_uring when
it is available, and falling back to innodb_linux_aio=aio when not.

Debian packaging is no longer aio exclusive or uring, so
for those older Debian or Ubuntu releases, its a remove_uring directive.
For more recent releases, add mandatory liburing for consistent packaging.

WITH_LIBAIO is now an independent option from WITH_URING.

LINUX_NATIVE_AIO preprocessor constant is renamed to HAVE_LIBAIO,
analogous to existing HAVE_URING.

tpool::is_aio_supported(): A common feature check.

is_linux_native_aio_supported(): Remove. This had originally been added in
mysql/mysql-server@0da310b69d in 2012
to fix an issue where io_submit() on CentOS 5.5 would return EINVAL
for a /tmp/#sql*.ibd file associated with CREATE TEMPORARY TABLE.
But, starting with commit 2e814d4702 InnoDB
temporary tables will be written to innodb_temp_data_file_path.
The 2012 commit said that the error could occur on "old kernels".
Any GNU/Linux distribution that we currently support should be based
on a newer Linux kernel; for example, Red Hat Enterprise Linux 7
was released in 2014.

tpool::create_linux_aio(): Wraps the Linux implementations:
create_libaio() and create_liburing(), each defined in separate
compilation units (aio_linux.cc, aio_libaio.cc, aio_liburing.cc).

The CMake definitions are simplified using target_sources() and
target_compile_definitions(), all available since CMake 2.8.12.
With this change, there is no need to include ${CMAKE_SOURCE_DIR}/tpool
or add TPOOL_DEFINES flags anymore, target_link_libraries(lib tpool)
does all that.

This is joint work with Daniel Black and Vladislav Vaintroub.
2025-06-23 13:51:52 +03:00

193 lines
5.5 KiB
C++

/* Copyright (C) 2019, 2020, MariaDB Corporation.
This program is free software; you can redistribute itand /or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
#include "tpool.h"
#include <thread>
#include <sys/syscall.h>
#include <libaio.h>
/**
Invoke the io_getevents() system call, without timeout parameter.
@param ctx context from io_setup()
@param min_nr minimum number of completion events to wait for
@param nr maximum number of completion events to collect
@param ev the collected events
In https://pagure.io/libaio/c/7cede5af5adf01ad26155061cc476aad0804d3fc
the io_getevents() implementation in libaio was "optimized" so that it
would elide the system call when there are no outstanding requests
and a timeout was specified.
The libaio code for dereferencing ctx would occasionally trigger
SIGSEGV if io_destroy() was concurrently invoked from another thread.
Hence, we have to use the raw system call.
WHY are we doing this at all?
Because we want io_destroy() from another thread to interrupt io_getevents().
And, WHY do we want io_destroy() from another thread to interrupt
io_getevents()?
Because there is no documented, libaio-friendly and
race-condition-free way to interrupt io_getevents(). io_destroy()
coupled with raw syscall seemed to work for us so far.
Historical note: in the past, we used io_getevents with
timeouts. We'd wake up periodically, check for shutdown flag, return
from the main routine. This was admittedly safer, yet it did cost
periodic wakeups, which we are not willing to do anymore.
@note we also rely on the undocumented property, that io_destroy(ctx)
will make this version of io_getevents return EINVAL.
*/
static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
noexcept
{
int saved_errno= errno;
int ret= syscall(__NR_io_getevents, reinterpret_cast<long>(ctx),
min_nr, nr, ev, 0);
if (ret < 0)
{
ret= -errno;
errno= saved_errno;
}
return ret;
}
/*
Linux AIO implementation, based on native AIO.
Needs libaio.h and -laio at the compile time.
io_submit() is used to submit async IO.
A single thread will collect the completion notification
with io_getevents() and forward io completion callback to
the worker threadpool.
*/
namespace
{
using namespace tpool;
class aio_libaio final : public aio
{
thread_pool *m_pool;
io_context_t m_io_ctx;
std::thread m_getevent_thread;
static std::atomic<bool> shutdown_in_progress;
static void getevent_thread_routine(aio_libaio *aio)
{
/*
We collect events in small batches to hopefully reduce the
number of system calls.
*/
constexpr unsigned MAX_EVENTS= 256;
aio->m_pool->m_worker_init_callback();
io_event events[MAX_EVENTS];
for (;;)
{
switch (int ret= my_getevents(aio->m_io_ctx, 1, MAX_EVENTS, events)) {
case -EINTR:
continue;
case -EINVAL:
if (shutdown_in_progress)
goto end;
/* fall through */
default:
if (ret < 0)
{
fprintf(stderr, "io_getevents returned %d\n", ret);
abort();
goto end;
}
for (int i= 0; i < ret; i++)
{
const io_event &event= events[i];
aiocb *iocb= reinterpret_cast<aiocb*>(event.obj);
if (static_cast<int>(event.res) < 0)
{
iocb->m_err= -event.res;
iocb->m_ret_len= 0;
}
else
{
iocb->m_ret_len= event.res;
iocb->m_err= 0;
finish_synchronous(iocb);
}
iocb->m_internal_task.m_func= iocb->m_callback;
iocb->m_internal_task.m_arg= iocb;
iocb->m_internal_task.m_group= iocb->m_group;
aio->m_pool->submit_task(&iocb->m_internal_task);
}
}
}
end:
aio->m_pool->m_worker_destroy_callback();
}
public:
aio_libaio(io_context_t ctx, thread_pool *pool)
: m_pool(pool), m_io_ctx(ctx),
m_getevent_thread(getevent_thread_routine, this)
{
}
~aio_libaio()
{
shutdown_in_progress= true;
io_destroy(m_io_ctx);
m_getevent_thread.join();
shutdown_in_progress= false;
}
int submit_io(aiocb *cb) override
{
io_prep_pread(&cb->m_iocb, cb->m_fh, cb->m_buffer, cb->m_len, cb->m_offset);
if (cb->m_opcode != aio_opcode::AIO_PREAD)
cb->m_iocb.aio_lio_opcode= IO_CMD_PWRITE;
iocb *icb= &cb->m_iocb;
int ret= io_submit(m_io_ctx, 1, &icb);
if (ret == 1)
return 0;
errno= -ret;
return -1;
}
int bind(native_file_handle&) override { return 0; }
int unbind(const native_file_handle&) override { return 0; }
const char *get_implementation() const override { return "Linux native AIO"; };
};
std::atomic<bool> aio_libaio::shutdown_in_progress;
}
namespace tpool
{
aio *create_libaio(thread_pool *pool, int max_io)
{
io_context_t ctx;
memset(&ctx, 0, sizeof ctx);
if (int ret= io_setup(max_io, &ctx))
{
fprintf(stderr, "io_setup(%d) returned %d\n", max_io, ret);
return nullptr;
}
return new aio_libaio(ctx, pool);
}
}