mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-24883 add io_uring support for tpool
liburing is a new optional dependency (WITH_URING=auto|yes|no) that replaces libaio when it is available. aio_uring: class which wraps io_uring stuff aio_uring::bind()/unbind(): optional optimization aio_uring::submit_io(): mutex prevents data race. liburing calls are thread-unsafe. But if you look into it's implementation you'll see atomic operations. They're used for synchronization between kernel and user-space only. That's why our own synchronization is still needed. For systemd, we add LimitMEMLOCK=524288 (ulimit -l 524288) because the io_uring_setup system call that is invoked by io_uring_queue_init() requests locked memory. The value was found empirically; with 262144, we would occasionally fail to enable io_uring when using the maximum values of innodb_read_io_threads=64 and innodb_write_io_threads=64. aio_uring::thread_routine(): Tolerate -EINTR return from io_uring_wait_cqe(), because it may occur on shutdown on Ubuntu 20.10 (Groovy Gorilla). This was mostly implemented by Eugene Kosov. Systemd integration and improved startup/shutdown error handling by Marko Mäkelä.
This commit is contained in:
parent
3dfda08702
commit
783625d78f
17 changed files with 262 additions and 22 deletions
|
@ -174,6 +174,7 @@ INCLUDE(mysql_add_executable)
|
|||
INCLUDE(symlinks)
|
||||
INCLUDE(compile_flags)
|
||||
INCLUDE(pmem)
|
||||
INCLUDE(uring)
|
||||
|
||||
# Handle options
|
||||
OPTION(DISABLE_SHARED
|
||||
|
@ -394,7 +395,7 @@ MYSQL_CHECK_READLINE()
|
|||
SET(MALLOC_LIBRARY "system")
|
||||
|
||||
CHECK_PCRE()
|
||||
|
||||
CHECK_URING()
|
||||
CHECK_SYSTEMD()
|
||||
|
||||
IF(CMAKE_CROSSCOMPILING)
|
||||
|
|
|
@ -49,6 +49,10 @@ MACRO(CHECK_SYSTEMD)
|
|||
SET(SYSTEMD_EXECSTARTPRE "ExecStartPre=/usr/bin/install -m 755 -o mysql -g root -d /var/run/mysqld")
|
||||
SET(SYSTEMD_EXECSTARTPOST "ExecStartPost=/etc/mysql/debian-start")
|
||||
ENDIF()
|
||||
IF(LIBURING AND HAVE_LIBURING_H AND NOT WITH_URING STREQUAL "no")
|
||||
SET(SYSTEMD_LIMIT "# For liburing and io_uring_setup()
|
||||
LimitMEMLOCK=524288")
|
||||
ENDIF()
|
||||
MESSAGE_ONCE(systemd "Systemd features enabled")
|
||||
ELSE()
|
||||
UNSET(LIBSYSTEMD)
|
||||
|
|
20
cmake/uring.cmake
Normal file
20
cmake/uring.cmake
Normal file
|
@ -0,0 +1,20 @@
|
|||
MACRO(CHECK_URING)
|
||||
IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
INCLUDE(CheckIncludeFiles)
|
||||
SET(WITH_URING "auto" CACHE STRING "Enable liburing usage")
|
||||
IF(WITH_URING STREQUAL "yes" OR WITH_URING STREQUAL "auto")
|
||||
FIND_LIBRARY(LIBURING uring)
|
||||
CHECK_INCLUDE_FILES(liburing.h HAVE_LIBURING_H)
|
||||
IF (LIBURING AND HAVE_LIBURING_H)
|
||||
ADD_DEFINITIONS(-DHAVE_URING)
|
||||
LINK_LIBRARIES(uring)
|
||||
ELSE()
|
||||
IF(WITH_URING STREQUAL "yes")
|
||||
MESSAGE(FATAL_ERROR "Requested WITH_URING=yes but liburing was not found")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ELSEIF(NOT WITH_URING STREQUAL "no")
|
||||
MESSAGE(FATAL_ERROR "Invalid value for WITH_URING. Must be 'yes', 'no', or 'auto'.")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
ENDMACRO()
|
|
@ -2141,6 +2141,11 @@ static bool innodb_init_param()
|
|||
if (srv_use_native_aio) {
|
||||
msg("InnoDB: Using Linux native AIO");
|
||||
}
|
||||
#elif defined(HAVE_URING)
|
||||
|
||||
if (srv_use_native_aio) {
|
||||
msg("InnoDB: Using liburing");
|
||||
}
|
||||
#else
|
||||
/* Currently native AIO is supported only on windows and linux
|
||||
and that also when the support is compiled in. In all other
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# -*- cperl -*-
|
||||
|
||||
# Copyright (c) 2004, 2014, Oracle and/or its affiliates.
|
||||
# Copyright (c) 2009, 2020, MariaDB Corporation
|
||||
# Copyright (c) 2009, 2021, MariaDB Corporation
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
@ -4376,6 +4376,8 @@ sub extract_warning_lines ($$) {
|
|||
qr|Linux Native AIO|, # warning that aio does not work on /dev/shm
|
||||
qr|InnoDB: io_setup\(\) attempt|,
|
||||
qr|InnoDB: io_setup\(\) failed with EAGAIN|,
|
||||
qr|io_uring_queue_init\(\) failed with|,
|
||||
qr|InnoDB: liburing disabled|,
|
||||
qr|setrlimit could not change the size of core files to 'infinity';|,
|
||||
qr|feedback plugin: failed to retrieve the MAC address|,
|
||||
qr|Plugin 'FEEDBACK' init function returned error|,
|
||||
|
|
|
@ -3543,8 +3543,7 @@ static int innodb_init_params()
|
|||
srv_use_doublewrite_buf = FALSE;
|
||||
}
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
#elif !defined _WIN32
|
||||
#if !defined LINUX_NATIVE_AIO && !defined HAVE_URING && !defined _WIN32
|
||||
/* Currently native AIO is supported only on windows and linux
|
||||
and that also when the support is compiled in. In all other
|
||||
cases, we ignore the setting of innodb_use_native_aio. */
|
||||
|
|
|
@ -56,12 +56,14 @@ IF(UNIX)
|
|||
|
||||
ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
|
||||
|
||||
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
|
||||
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
|
||||
IF (NOT LIBURING)
|
||||
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
|
||||
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
|
||||
|
||||
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
|
||||
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
|
||||
LINK_LIBRARIES(aio)
|
||||
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
|
||||
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
|
||||
LINK_LIBRARIES(aio)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
IF(HAVE_LIBNUMA)
|
||||
LINK_LIBRARIES(numa)
|
||||
|
|
|
@ -4024,6 +4024,17 @@ disable:
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_URING
|
||||
if (ret)
|
||||
{
|
||||
ut_ad(srv_use_native_aio);
|
||||
ib::warn()
|
||||
<< "liburing disabled: falling back to innodb_use_native_aio=OFF";
|
||||
srv_use_native_aio= false;
|
||||
ret= srv_thread_pool->configure_aio(false, max_events);
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!ret)
|
||||
{
|
||||
read_slots= new io_slots(max_read_events, srv_n_read_io_threads);
|
||||
|
|
|
@ -1193,6 +1193,11 @@ dberr_t srv_start(bool create_new_db)
|
|||
ib::info() << "Using Linux native AIO";
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_URING
|
||||
if (srv_use_native_aio) {
|
||||
ib::info() << "Using liburing";
|
||||
}
|
||||
#endif
|
||||
|
||||
fil_system.create(srv_file_per_table ? 50000 : 5000);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
# Copyright (c) 2006, 2016, Oracle and/or its affiliates.
|
||||
# Copyright (c) 2012, 2017, MariaDB
|
||||
# Copyright (c) 2012, 2021, MariaDB
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
|
@ -144,7 +144,7 @@ TimeoutStopSec=900
|
|||
|
||||
# Number of files limit. previously [mysqld_safe] open-files-limit
|
||||
LimitNOFILE=16384
|
||||
|
||||
@SYSTEMD_LIMIT@
|
||||
# Maximium core size. previously [mysqld_safe] core-file-size
|
||||
# LimitCore=
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ Group=mysql
|
|||
|
||||
# Number of files limit. previously [mysqld_safe] open-files-limit
|
||||
LimitNOFILE=16384
|
||||
|
||||
@SYSTEMD_LIMIT@
|
||||
# Maximium core size. previously [mysqld_safe] core-file-size
|
||||
# LimitCore=
|
||||
|
||||
|
|
|
@ -1,16 +1,19 @@
|
|||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
IF(WIN32)
|
||||
SET(EXTRA_SOURCES tpool_win.cc aio_win.cc)
|
||||
ELSE()
|
||||
SET(EXTRA_SOURCES aio_linux.cc)
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND LIBURING)
|
||||
SET(EXTRA_SOURCES aio_liburing.cc)
|
||||
ENDIF()
|
||||
|
||||
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND NOT LIBURING)
|
||||
CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
|
||||
CHECK_LIBRARY_EXISTS(aio io_queue_init "" HAVE_LIBAIO)
|
||||
IF(HAVE_LIBAIO_H AND HAVE_LIBAIO)
|
||||
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
|
||||
LINK_LIBRARIES(aio)
|
||||
SET(EXTRA_SOURCES aio_linux.cc)
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
|
||||
|
@ -26,4 +29,4 @@ ADD_LIBRARY(tpool STATIC
|
|||
${EXTRA_SOURCES}
|
||||
)
|
||||
|
||||
INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
|
||||
INCLUDE_DIRECTORIES(${PROJECT_SOURCE_DIR}/include)
|
||||
|
|
185
tpool/aio_liburing.cc
Normal file
185
tpool/aio_liburing.cc
Normal file
|
@ -0,0 +1,185 @@
|
|||
/* Copyright (C) 2021, MariaDB Corporation.
|
||||
|
||||
This program is free software; you can redistribute itand /or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
|
||||
|
||||
#include "tpool_structs.h"
|
||||
#include "tpool.h"
|
||||
#include "mysql/service_my_print_error.h"
|
||||
#include "mysqld_error.h"
|
||||
|
||||
#include <liburing.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <thread>
|
||||
#include <mutex>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class aio_uring final : public tpool::aio
|
||||
{
|
||||
public:
|
||||
aio_uring(tpool::thread_pool *tpool, int max_aio) : tpool_(tpool)
|
||||
{
|
||||
if (io_uring_queue_init(max_aio, &uring_, 0) != 0)
|
||||
{
|
||||
switch (const auto e= errno) {
|
||||
case ENOMEM:
|
||||
case ENOSYS:
|
||||
my_printf_error(ER_UNKNOWN_ERROR, e == ENOMEM
|
||||
? "io_uring_queue_init() failed with ENOMEM:"
|
||||
" try larger ulimit -l\n"
|
||||
: "io_uring_queue_init() failed with ENOSYS:"
|
||||
" try uprading the kernel\n",
|
||||
ME_ERROR_LOG | ME_WARNING);
|
||||
break;
|
||||
default:
|
||||
my_printf_error(ER_UNKNOWN_ERROR,
|
||||
"io_uring_queue_init() failed with errno %d\n",
|
||||
ME_ERROR_LOG | ME_WARNING, e);
|
||||
}
|
||||
throw std::runtime_error("aio_uring()");
|
||||
}
|
||||
|
||||
thread_= std::thread(thread_routine, this);
|
||||
}
|
||||
|
||||
~aio_uring() noexcept
|
||||
{
|
||||
{
|
||||
std::lock_guard<std::mutex> _(mutex_);
|
||||
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
|
||||
io_uring_prep_nop(sqe);
|
||||
io_uring_sqe_set_data(sqe, nullptr);
|
||||
auto ret= io_uring_submit(&uring_);
|
||||
if (ret != 1)
|
||||
{
|
||||
my_printf_error(ER_UNKNOWN_ERROR,
|
||||
"io_uring_submit() returned %d during shutdown:"
|
||||
" this may cause a hang\n",
|
||||
ME_ERROR_LOG | ME_FATAL, ret);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
thread_.join();
|
||||
io_uring_queue_exit(&uring_);
|
||||
}
|
||||
|
||||
int submit_io(tpool::aiocb *cb) final
|
||||
{
|
||||
cb->iov_base= cb->m_buffer;
|
||||
cb->iov_len= cb->m_len;
|
||||
|
||||
// The whole operation since io_uring_get_sqe() and till io_uring_submit()
|
||||
// must be atomical. This is because liburing provides thread-unsafe calls.
|
||||
std::lock_guard<std::mutex> _(mutex_);
|
||||
|
||||
io_uring_sqe *sqe= io_uring_get_sqe(&uring_);
|
||||
if (cb->m_opcode == tpool::aio_opcode::AIO_PREAD)
|
||||
io_uring_prep_readv(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
|
||||
cb->m_offset);
|
||||
else
|
||||
io_uring_prep_writev(sqe, cb->m_fh, static_cast<struct iovec *>(cb), 1,
|
||||
cb->m_offset);
|
||||
io_uring_sqe_set_data(sqe, cb);
|
||||
|
||||
return io_uring_submit(&uring_) == 1 ? 0 : -1;
|
||||
}
|
||||
|
||||
int bind(native_file_handle &fd) final
|
||||
{
|
||||
std::lock_guard<std::mutex> _(files_mutex_);
|
||||
auto it= std::lower_bound(files_.begin(), files_.end(), fd);
|
||||
assert(it == files_.end() || *it != fd);
|
||||
files_.insert(it, fd);
|
||||
return io_uring_register_files_update(&uring_, 0, files_.data(),
|
||||
files_.size());
|
||||
}
|
||||
|
||||
int unbind(const native_file_handle &fd) final
|
||||
{
|
||||
std::lock_guard<std::mutex> _(files_mutex_);
|
||||
auto it= std::lower_bound(files_.begin(), files_.end(), fd);
|
||||
assert(*it == fd);
|
||||
files_.erase(it);
|
||||
return io_uring_register_files_update(&uring_, 0, files_.data(),
|
||||
files_.size());
|
||||
}
|
||||
|
||||
private:
|
||||
static void thread_routine(aio_uring *aio)
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
io_uring_cqe *cqe;
|
||||
if (int ret= io_uring_wait_cqe(&aio->uring_, &cqe))
|
||||
{
|
||||
if (ret == -EINTR) // this may occur during shutdown
|
||||
break;
|
||||
my_printf_error(ER_UNKNOWN_ERROR,
|
||||
"io_uring_wait_cqe() returned %d\n",
|
||||
ME_ERROR_LOG | ME_FATAL, ret);
|
||||
abort();
|
||||
}
|
||||
|
||||
auto *iocb= static_cast<tpool::aiocb*>(io_uring_cqe_get_data(cqe));
|
||||
if (!iocb)
|
||||
break;
|
||||
|
||||
int res= cqe->res;
|
||||
if (res < 0)
|
||||
{
|
||||
iocb->m_err= -res;
|
||||
iocb->m_ret_len= 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
iocb->m_err= 0;
|
||||
iocb->m_ret_len= res;
|
||||
}
|
||||
|
||||
io_uring_cqe_seen(&aio->uring_, cqe);
|
||||
|
||||
iocb->m_internal_task.m_func= iocb->m_callback;
|
||||
iocb->m_internal_task.m_arg= iocb;
|
||||
iocb->m_internal_task.m_group= iocb->m_group;
|
||||
aio->tpool_->submit_task(&iocb->m_internal_task);
|
||||
}
|
||||
}
|
||||
|
||||
io_uring uring_;
|
||||
std::mutex mutex_;
|
||||
tpool::thread_pool *tpool_;
|
||||
std::thread thread_;
|
||||
|
||||
std::vector<native_file_handle> files_;
|
||||
std::mutex files_mutex_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace tpool
|
||||
{
|
||||
|
||||
aio *create_linux_aio(thread_pool *pool, int max_aio)
|
||||
{
|
||||
try {
|
||||
return new aio_uring(pool, max_aio);
|
||||
} catch (std::runtime_error& error) {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tpool
|
|
@ -16,7 +16,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
|
|||
#include "tpool_structs.h"
|
||||
#include "tpool.h"
|
||||
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
# include <thread>
|
||||
# include <atomic>
|
||||
# include <libaio.h>
|
||||
|
@ -69,7 +68,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
|
|||
}
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
|
@ -84,7 +82,6 @@ static int my_getevents(io_context_t ctx, long min_nr, long nr, io_event *ev)
|
|||
*/
|
||||
namespace tpool
|
||||
{
|
||||
#ifdef LINUX_NATIVE_AIO
|
||||
|
||||
class aio_linux final : public aio
|
||||
{
|
||||
|
@ -187,7 +184,4 @@ aio *create_linux_aio(thread_pool *pool, int max_io)
|
|||
}
|
||||
return new aio_linux(ctx, pool);
|
||||
}
|
||||
#else
|
||||
aio *create_linux_aio(thread_pool*, int) { return nullptr; }
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -22,6 +22,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111 - 1301 USA*/
|
|||
#ifdef LINUX_NATIVE_AIO
|
||||
#include <libaio.h>
|
||||
#endif
|
||||
#ifdef HAVE_URING
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
|
@ -123,6 +126,8 @@ struct aiocb
|
|||
:OVERLAPPED
|
||||
#elif defined LINUX_NATIVE_AIO
|
||||
:iocb
|
||||
#elif defined HAVE_URING
|
||||
:iovec
|
||||
#endif
|
||||
{
|
||||
native_file_handle m_fh;
|
||||
|
|
|
@ -38,7 +38,11 @@ namespace tpool
|
|||
{
|
||||
|
||||
#ifdef __linux__
|
||||
#if defined(HAVE_URING) || defined(LINUX_NATIVE_AIO)
|
||||
extern aio* create_linux_aio(thread_pool* tp, int max_io);
|
||||
#else
|
||||
aio *create_linux_aio(thread_pool *, int) { return nullptr; };
|
||||
#endif
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
extern aio* create_win_aio(thread_pool* tp, int max_io);
|
||||
|
|
Loading…
Reference in a new issue