mariadb/storage/rocksdb/rdb_io_watchdog.cc

240 lines
6.9 KiB
C++
Raw Normal View History

/*
Copyright (c) 2017, Facebook, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
/* This C++ file's header */
#include "./rdb_io_watchdog.h"
/* C++ standard header files */
#include <string>
#include <vector>
2017-08-05 15:57:17 +02:00
/* Rdb_io_watchdog doesn't work on Windows [yet] */
#if !defined(_WIN32) && !defined(__APPLE__)
namespace myrocks {
void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
DBUG_ASSERT(timer_data.sival_ptr != nullptr);
// The treatment of any pending signal generated by the deleted timer is
// unspecified. Therefore we still need to handle the rare case where we
// finished the I/O operation right before the timer was deleted and callback
// was in flight.
if (!m_io_in_progress.load()) {
return;
}
// At this point we know that I/O has been stuck in `write()` for more than
// `m_write_timeout` seconds. We'll log a message and shut down the service.
// NO_LINT_DEBUG
sql_print_error("MyRocks has detected a combination of I/O requests which "
"have cumulatively been blocking for more than %u seconds. "
"Shutting the service down.",
m_write_timeout);
abort_with_stack_traces();
}
void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
DBUG_ASSERT(timer_data.sival_ptr != nullptr);
struct sigevent e;
e.sigev_notify = SIGEV_THREAD;
e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
e.sigev_value.sival_ptr = this;
e.sigev_notify_attributes = nullptr;
int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return;
}
struct itimerspec timer_spec;
memset(&timer_spec, 0, sizeof(timer_spec));
// One time execution only for the watchdog. No interval.
timer_spec.it_value.tv_sec = m_write_timeout;
ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
errno);
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return;
}
m_io_in_progress.store(true);
// Verify the write access to all directories we care about.
for (const std::string &directory : m_dirs_to_check) {
ret = check_write_access(directory);
// We'll log a warning and attept to continue to see if the problem happens
// in other cases as well.
if (unlikely(ret != HA_EXIT_SUCCESS)) {
// NO_LINT_DEBUG
sql_print_warning("Unable to verify write access to %s (error code %d).",
directory.c_str(), ret);
}
}
m_io_in_progress.store(false);
// Clean up the watchdog timer.
ret = timer_delete(m_io_check_watchdog_timer);
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
}
m_io_check_watchdog_timer = nullptr;
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
}
int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
DBUG_ASSERT(!dirname.empty());
DBUG_ASSERT(m_buf != nullptr);
const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
// O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
// buffer cache.
int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
S_IRWXU | S_IWUSR);
if (unlikely(fd == -1)) {
return fd;
}
int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
return ret;
}
ret = close(fd);
if (unlikely(ret)) {
return ret;
}
ret = unlink(fname.c_str());
if (unlikely(ret)) {
return ret;
}
return HA_EXIT_SUCCESS;
}
int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) {
// This function will be called either from a thread initializing MyRocks
// engine or handling system variable changes. We need to account for the
// possibility of I/O callback executing at the same time. If that happens
// then we'll wait for it to finish.
RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
struct sigevent e;
// In all the cases all the active timers needs to be stopped.
int ret = stop_timers();
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Stopping I/O timers failed with %d.", errno);
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return ret;
}
m_write_timeout = write_timeout;
m_io_in_progress.store(false);
// Zero means that the I/O timer will be disabled. Therefore there's nothing
// for us to do here.
if (!write_timeout) {
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return HA_EXIT_SUCCESS;
}
free(m_buf);
ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
if (unlikely(ret)) {
m_buf = nullptr;
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
// NB! The value of errno is not set.
return ret;
}
DBUG_ASSERT(m_buf != nullptr);
memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
// Common case gets handled here - we'll create a timer with a specific
// interval to check a set of directories for write access.
DBUG_ASSERT(m_dirs_to_check.size() > 0);
e.sigev_notify = SIGEV_THREAD;
e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
e.sigev_value.sival_ptr = this;
e.sigev_notify_attributes = nullptr;
ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Creating a I/O timer failed with %d.", errno);
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return ret;
}
struct itimerspec timer_spec;
memset(&timer_spec, 0, sizeof(timer_spec));
// I/O timer will need to execute on a certain interval.
timer_spec.it_value.tv_sec = m_write_timeout;
timer_spec.it_interval.tv_sec = m_write_timeout;
ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
if (unlikely(ret)) {
// NO_LINT_DEBUG
sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
errno);
}
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
return HA_EXIT_SUCCESS;
}
} // namespace myrocks
#endif