2017-07-28 19:52:07 +02:00
|
|
|
/*
|
|
|
|
Copyright (c) 2017, Facebook, Inc.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
|
|
|
|
|
|
|
|
/* This C++ file's header */
|
|
|
|
#include "./rdb_io_watchdog.h"
|
|
|
|
|
|
|
|
/* C++ standard header files */
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
|
2017-08-01 10:50:25 +02:00
|
|
|
#ifndef _WIN32
|
|
|
|
|
2017-07-28 19:52:07 +02:00
|
|
|
namespace myrocks {
|
|
|
|
|
|
|
|
void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
|
|
|
|
DBUG_ASSERT(timer_data.sival_ptr != nullptr);
|
|
|
|
|
|
|
|
// The treatment of any pending signal generated by the deleted timer is
|
|
|
|
// unspecified. Therefore we still need to handle the rare case where we
|
|
|
|
// finished the I/O operation right before the timer was deleted and callback
|
|
|
|
// was in flight.
|
|
|
|
if (!m_io_in_progress.load()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// At this point we know that I/O has been stuck in `write()` for more than
|
|
|
|
// `m_write_timeout` seconds. We'll log a message and shut down the service.
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_error("MyRocks has detected a combination of I/O requests which "
|
|
|
|
"have cumulatively been blocking for more than %u seconds. "
|
|
|
|
"Shutting the service down.",
|
|
|
|
m_write_timeout);
|
|
|
|
|
|
|
|
abort_with_stack_traces();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
|
|
|
|
RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
|
|
|
|
|
|
|
|
DBUG_ASSERT(timer_data.sival_ptr != nullptr);
|
|
|
|
|
|
|
|
struct sigevent e;
|
|
|
|
|
|
|
|
e.sigev_notify = SIGEV_THREAD;
|
|
|
|
e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
|
|
|
|
e.sigev_value.sival_ptr = this;
|
|
|
|
e.sigev_notify_attributes = nullptr;
|
|
|
|
|
|
|
|
int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct itimerspec timer_spec;
|
|
|
|
memset(&timer_spec, 0, sizeof(timer_spec));
|
|
|
|
|
|
|
|
// One time execution only for the watchdog. No interval.
|
|
|
|
timer_spec.it_value.tv_sec = m_write_timeout;
|
|
|
|
|
|
|
|
ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
|
|
|
|
errno);
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_io_in_progress.store(true);
|
|
|
|
|
|
|
|
// Verify the write access to all directories we care about.
|
|
|
|
for (const std::string &directory : m_dirs_to_check) {
|
|
|
|
ret = check_write_access(directory);
|
|
|
|
|
|
|
|
// We'll log a warning and attept to continue to see if the problem happens
|
|
|
|
// in other cases as well.
|
|
|
|
if (unlikely(ret != HA_EXIT_SUCCESS)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Unable to verify write access to %s (error code %d).",
|
|
|
|
directory.c_str(), ret);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
m_io_in_progress.store(false);
|
|
|
|
|
|
|
|
// Clean up the watchdog timer.
|
|
|
|
ret = timer_delete(m_io_check_watchdog_timer);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
|
|
|
|
}
|
|
|
|
|
|
|
|
m_io_check_watchdog_timer = nullptr;
|
|
|
|
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
|
|
|
|
DBUG_ASSERT(!dirname.empty());
|
|
|
|
DBUG_ASSERT(m_buf != nullptr);
|
|
|
|
|
|
|
|
const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
|
|
|
|
|
|
|
|
// O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
|
|
|
|
// buffer cache.
|
|
|
|
int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
|
|
|
|
S_IRWXU | S_IWUSR);
|
|
|
|
|
|
|
|
if (unlikely(fd == -1)) {
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
|
|
|
int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
|
|
|
|
|
|
|
|
if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = close(fd);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = unlink(fname.c_str());
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return HA_EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) {
|
|
|
|
// This function will be called either from a thread initializing MyRocks
|
|
|
|
// engine or handling system variable changes. We need to account for the
|
|
|
|
// possibility of I/O callback executing at the same time. If that happens
|
|
|
|
// then we'll wait for it to finish.
|
|
|
|
RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
|
|
|
|
|
|
|
|
struct sigevent e;
|
|
|
|
|
|
|
|
// In all the cases all the active timers needs to be stopped.
|
|
|
|
int ret = stop_timers();
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Stopping I/O timers failed with %d.", errno);
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
m_write_timeout = write_timeout;
|
|
|
|
m_io_in_progress.store(false);
|
|
|
|
|
|
|
|
// Zero means that the I/O timer will be disabled. Therefore there's nothing
|
|
|
|
// for us to do here.
|
|
|
|
if (!write_timeout) {
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
return HA_EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(m_buf);
|
|
|
|
|
|
|
|
ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
|
|
|
|
RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
m_buf = nullptr;
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
// NB! The value of errno is not set.
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_ASSERT(m_buf != nullptr);
|
|
|
|
memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
|
|
|
|
|
|
|
|
// Common case gets handled here - we'll create a timer with a specific
|
|
|
|
// interval to check a set of directories for write access.
|
|
|
|
DBUG_ASSERT(m_dirs_to_check.size() > 0);
|
|
|
|
|
|
|
|
e.sigev_notify = SIGEV_THREAD;
|
|
|
|
e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
|
|
|
|
e.sigev_value.sival_ptr = this;
|
|
|
|
e.sigev_notify_attributes = nullptr;
|
|
|
|
|
|
|
|
ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Creating a I/O timer failed with %d.", errno);
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct itimerspec timer_spec;
|
|
|
|
memset(&timer_spec, 0, sizeof(timer_spec));
|
|
|
|
|
|
|
|
// I/O timer will need to execute on a certain interval.
|
|
|
|
timer_spec.it_value.tv_sec = m_write_timeout;
|
|
|
|
timer_spec.it_interval.tv_sec = m_write_timeout;
|
|
|
|
|
|
|
|
ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
|
|
|
|
|
|
|
|
if (unlikely(ret)) {
|
|
|
|
// NO_LINT_DEBUG
|
|
|
|
sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
|
|
|
|
errno);
|
|
|
|
}
|
|
|
|
|
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
|
|
|
|
|
|
|
|
return HA_EXIT_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace myrocks
|
2017-08-01 10:50:25 +02:00
|
|
|
|
|
|
|
#endif
|
|
|
|
|