/* Copyright (c) 2017, Facebook, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /* This C++ file's header */ #include "./rdb_io_watchdog.h" /* C++ standard header files */ #include #include #ifndef _WIN32 namespace myrocks { void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) { DBUG_ASSERT(timer_data.sival_ptr != nullptr); // The treatment of any pending signal generated by the deleted timer is // unspecified. Therefore we still need to handle the rare case where we // finished the I/O operation right before the timer was deleted and callback // was in flight. if (!m_io_in_progress.load()) { return; } // At this point we know that I/O has been stuck in `write()` for more than // `m_write_timeout` seconds. We'll log a message and shut down the service. // NO_LINT_DEBUG sql_print_error("MyRocks has detected a combination of I/O requests which " "have cumulatively been blocking for more than %u seconds. " "Shutting the service down.", m_write_timeout); abort_with_stack_traces(); } void Rdb_io_watchdog::io_check_callback(union sigval timer_data) { RDB_MUTEX_LOCK_CHECK(m_reset_mutex); DBUG_ASSERT(timer_data.sival_ptr != nullptr); struct sigevent e; e.sigev_notify = SIGEV_THREAD; e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper; e.sigev_value.sival_ptr = this; e.sigev_notify_attributes = nullptr; int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno); RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return; } struct itimerspec timer_spec; memset(&timer_spec, 0, sizeof(timer_spec)); // One time execution only for the watchdog. No interval. timer_spec.it_value.tv_sec = m_write_timeout; ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Setting time for a watchdog I/O timer failed with %d.", errno); RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return; } m_io_in_progress.store(true); // Verify the write access to all directories we care about. for (const std::string &directory : m_dirs_to_check) { ret = check_write_access(directory); // We'll log a warning and attept to continue to see if the problem happens // in other cases as well. if (unlikely(ret != HA_EXIT_SUCCESS)) { // NO_LINT_DEBUG sql_print_warning("Unable to verify write access to %s (error code %d).", directory.c_str(), ret); } } m_io_in_progress.store(false); // Clean up the watchdog timer. ret = timer_delete(m_io_check_watchdog_timer); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno); } m_io_check_watchdog_timer = nullptr; RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); } int Rdb_io_watchdog::check_write_access(const std::string &dirname) const { DBUG_ASSERT(!dirname.empty()); DBUG_ASSERT(m_buf != nullptr); const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME; // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's // buffer cache. int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC, S_IRWXU | S_IWUSR); if (unlikely(fd == -1)) { return fd; } int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE); if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) { return ret; } ret = close(fd); if (unlikely(ret)) { return ret; } ret = unlink(fname.c_str()); if (unlikely(ret)) { return ret; } return HA_EXIT_SUCCESS; } int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) { // This function will be called either from a thread initializing MyRocks // engine or handling system variable changes. We need to account for the // possibility of I/O callback executing at the same time. If that happens // then we'll wait for it to finish. RDB_MUTEX_LOCK_CHECK(m_reset_mutex); struct sigevent e; // In all the cases all the active timers needs to be stopped. int ret = stop_timers(); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Stopping I/O timers failed with %d.", errno); RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return ret; } m_write_timeout = write_timeout; m_io_in_progress.store(false); // Zero means that the I/O timer will be disabled. Therefore there's nothing // for us to do here. if (!write_timeout) { RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return HA_EXIT_SUCCESS; } free(m_buf); ret = posix_memalign(reinterpret_cast(&m_buf), RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE); if (unlikely(ret)) { m_buf = nullptr; RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); // NB! The value of errno is not set. return ret; } DBUG_ASSERT(m_buf != nullptr); memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE); // Common case gets handled here - we'll create a timer with a specific // interval to check a set of directories for write access. DBUG_ASSERT(m_dirs_to_check.size() > 0); e.sigev_notify = SIGEV_THREAD; e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper; e.sigev_value.sival_ptr = this; e.sigev_notify_attributes = nullptr; ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Creating a I/O timer failed with %d.", errno); RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return ret; } struct itimerspec timer_spec; memset(&timer_spec, 0, sizeof(timer_spec)); // I/O timer will need to execute on a certain interval. timer_spec.it_value.tv_sec = m_write_timeout; timer_spec.it_interval.tv_sec = m_write_timeout; ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr); if (unlikely(ret)) { // NO_LINT_DEBUG sql_print_warning("Setting time for a watchdog I/O timer failed with %d.", errno); } RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); return HA_EXIT_SUCCESS; } } // namespace myrocks #endif