2016-10-06 19:24:09 +02:00
|
|
|
/*
|
|
|
|
Copyright (c) 2015, Facebook, Inc.
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
2018-02-10 08:28:23 +01:00
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2016-10-09 14:36:58 +02:00
|
|
|
#include <my_config.h>
|
|
|
|
|
2016-10-06 19:24:09 +02:00
|
|
|
/* This C++ file's header file */
|
|
|
|
#include "./rdb_mutex_wrapper.h"
|
|
|
|
|
2016-10-21 23:11:47 +02:00
|
|
|
/* The following are for THD_ENTER_COND: */
|
|
|
|
#define MYSQL_SERVER 1
|
|
|
|
#include "sql_priv.h"
|
|
|
|
#include "my_decimal.h"
|
|
|
|
#include "sql_class.h"
|
2017-02-19 17:29:01 +01:00
|
|
|
//psergey-merge-todo: does MariaDB have/need: #include "../sql/replication.h"
|
|
|
|
|
2016-10-21 23:11:47 +02:00
|
|
|
|
2016-10-06 19:24:09 +02:00
|
|
|
/* MyRocks header files */
|
|
|
|
#include "./ha_rocksdb.h"
|
|
|
|
#include "./rdb_utils.h"
|
|
|
|
|
2016-10-21 23:11:47 +02:00
|
|
|
|
2016-10-06 19:24:09 +02:00
|
|
|
using namespace rocksdb;
|
|
|
|
|
|
|
|
namespace myrocks {
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
static PSI_stage_info stage_waiting_on_row_lock2 = {0, "Waiting for row lock",
|
|
|
|
0};
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
static const int64_t ONE_SECOND_IN_MICROSECS = 1000 * 1000;
|
2016-10-06 19:24:09 +02:00
|
|
|
// A timeout as long as one full non-leap year worth of microseconds is as
|
|
|
|
// good as infinite timeout.
|
2017-02-06 18:39:08 +01:00
|
|
|
static const int64_t ONE_YEAR_IN_MICROSECS =
|
|
|
|
ONE_SECOND_IN_MICROSECS * 60 * 60 * 24 * 365;
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
Rdb_cond_var::Rdb_cond_var() { mysql_cond_init(0, &m_cond, nullptr); }
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
Rdb_cond_var::~Rdb_cond_var() { mysql_cond_destroy(&m_cond); }
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2016-12-31 21:30:09 +01:00
|
|
|
Status Rdb_cond_var::Wait(const std::shared_ptr<TransactionDBMutex> mutex_arg) {
|
2017-02-06 18:39:08 +01:00
|
|
|
return WaitFor(mutex_arg, ONE_YEAR_IN_MICROSECS);
|
2016-10-06 19:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
@brief
|
|
|
|
Wait on condition variable. The caller must make sure that we own
|
|
|
|
*mutex_ptr. The mutex is released and re-acquired by the wait function.
|
|
|
|
|
|
|
|
@param
|
|
|
|
timeout_micros Timeout in microseconds. Negative value means no timeout.
|
|
|
|
|
|
|
|
@return
|
|
|
|
Status::OK() - Wait successfull
|
|
|
|
Status::TimedOut() - Timed out or wait killed (the caller can check
|
|
|
|
thd_killed() to determine which occurred)
|
|
|
|
*/
|
|
|
|
|
2019-06-15 20:29:46 +02:00
|
|
|
Status Rdb_cond_var::WaitFor(
|
|
|
|
const std::shared_ptr<TransactionDBMutex> mutex_arg,
|
|
|
|
int64_t timeout_micros) {
|
2017-02-06 18:39:08 +01:00
|
|
|
auto *mutex_obj = reinterpret_cast<Rdb_mutex *>(mutex_arg.get());
|
2016-10-06 19:24:09 +02:00
|
|
|
DBUG_ASSERT(mutex_obj != nullptr);
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
mysql_mutex_t *const mutex_ptr = &mutex_obj->m_mutex;
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
int res = 0;
|
2016-10-06 19:24:09 +02:00
|
|
|
struct timespec wait_timeout;
|
|
|
|
|
2019-06-15 20:29:46 +02:00
|
|
|
if (timeout_micros < 0) timeout_micros = ONE_YEAR_IN_MICROSECS;
|
2017-02-06 18:39:08 +01:00
|
|
|
set_timespec_nsec(wait_timeout, timeout_micros * 1000);
|
2016-10-06 19:24:09 +02:00
|
|
|
|
|
|
|
#ifndef STANDALONE_UNITTEST
|
|
|
|
PSI_stage_info old_stage;
|
|
|
|
mysql_mutex_assert_owner(mutex_ptr);
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
if (current_thd && mutex_obj->m_old_stage_info.count(current_thd) == 0) {
|
|
|
|
THD_ENTER_COND(current_thd, &m_cond, mutex_ptr, &stage_waiting_on_row_lock2,
|
|
|
|
&old_stage);
|
2016-10-06 19:24:09 +02:00
|
|
|
/*
|
|
|
|
After the mysql_cond_timedwait we need make this call
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
THD_EXIT_COND(thd, &old_stage);
|
2016-10-06 19:24:09 +02:00
|
|
|
|
|
|
|
to inform the SQL layer that KILLable wait has ended. However,
|
|
|
|
that will cause mutex to be released. Defer the release until the mutex
|
|
|
|
that is unlocked by RocksDB's Pessimistic Transactions system.
|
|
|
|
*/
|
|
|
|
mutex_obj->set_unlock_action(&old_stage);
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
2017-02-06 18:39:08 +01:00
|
|
|
bool killed = false;
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
do {
|
|
|
|
res = mysql_cond_timedwait(&m_cond, mutex_ptr, &wait_timeout);
|
2016-10-06 19:24:09 +02:00
|
|
|
|
|
|
|
#ifndef STANDALONE_UNITTEST
|
2019-06-15 23:28:33 +02:00
|
|
|
if (current_thd) killed = thd_killed(current_thd);
|
2016-10-06 19:24:09 +02:00
|
|
|
#endif
|
|
|
|
} while (!killed && res == EINTR);
|
|
|
|
|
2019-06-15 20:29:46 +02:00
|
|
|
if (res || killed) {
|
2016-10-06 19:24:09 +02:00
|
|
|
return Status::TimedOut();
|
2019-06-15 20:29:46 +02:00
|
|
|
} else {
|
2016-10-06 19:24:09 +02:00
|
|
|
return Status::OK();
|
2019-06-15 20:29:46 +02:00
|
|
|
}
|
2016-10-06 19:24:09 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
|
|
@note
|
|
|
|
This function may be called while not holding the mutex that is used to wait
|
|
|
|
on the condition variable.
|
|
|
|
|
|
|
|
The manual page says ( http://linux.die.net/man/3/pthread_cond_signal):
|
|
|
|
|
|
|
|
The pthread_cond_broadcast() or pthread_cond_signal() functions may be called
|
|
|
|
by a thread whether or not it currently owns the mutex that threads calling
|
|
|
|
pthread_cond_wait() or pthread_cond_timedwait() have associated with the
|
|
|
|
condition variable during their waits; however, IF PREDICTABLE SCHEDULING
|
|
|
|
BEHAVIOR IS REQUIRED, THEN THAT MUTEX SHALL BE LOCKED by the thread calling
|
|
|
|
pthread_cond_broadcast() or pthread_cond_signal().
|
|
|
|
|
|
|
|
What's "predicate scheduling" and do we need it? The explanation is here:
|
|
|
|
|
|
|
|
https://groups.google.com/forum/?hl=ky#!msg/comp.programming.threads/wEUgPq541v8/ZByyyS8acqMJ
|
|
|
|
"The problem (from the realtime side) with condition variables is that
|
|
|
|
if you can signal/broadcast without holding the mutex, and any thread
|
|
|
|
currently running can acquire an unlocked mutex and check a predicate
|
|
|
|
without reference to the condition variable, then you can have an
|
|
|
|
indirect priority inversion."
|
|
|
|
|
|
|
|
Another possible consequence is that one can create spurious wake-ups when
|
|
|
|
there are multiple threads signaling the condition.
|
|
|
|
|
|
|
|
None of this looks like a problem for our use case.
|
|
|
|
*/
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
void Rdb_cond_var::Notify() { mysql_cond_signal(&m_cond); }
|
2016-10-06 19:24:09 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
@note
|
|
|
|
This is called without holding the mutex that's used for waiting on the
|
|
|
|
condition. See ::Notify().
|
|
|
|
*/
|
2017-02-06 18:39:08 +01:00
|
|
|
void Rdb_cond_var::NotifyAll() { mysql_cond_broadcast(&m_cond); }
|
2016-10-06 19:24:09 +02:00
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
Rdb_mutex::Rdb_mutex() {
|
2016-10-06 19:24:09 +02:00
|
|
|
mysql_mutex_init(0 /* Don't register in P_S. */, &m_mutex,
|
|
|
|
MY_MUTEX_INIT_FAST);
|
|
|
|
}
|
|
|
|
|
2017-02-06 18:39:08 +01:00
|
|
|
Rdb_mutex::~Rdb_mutex() { mysql_mutex_destroy(&m_mutex); }
|
2016-10-06 19:24:09 +02:00
|
|
|
|
|
|
|
Status Rdb_mutex::Lock() {
|
2017-03-11 05:17:42 +01:00
|
|
|
RDB_MUTEX_LOCK_CHECK(m_mutex);
|
2016-10-06 19:24:09 +02:00
|
|
|
DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
|
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to acquire lock. If timeout is non-negative, operation may be
|
|
|
|
// failed after this many milliseconds.
|
|
|
|
// If implementing a custom version of this class, the implementation may
|
|
|
|
// choose to ignore the timeout.
|
|
|
|
// Return OK on success, or other Status on failure.
|
2017-02-06 18:39:08 +01:00
|
|
|
Status Rdb_mutex::TryLockFor(int64_t timeout_time MY_ATTRIBUTE((__unused__))) {
|
2016-10-06 19:24:09 +02:00
|
|
|
/*
|
|
|
|
Note: PThreads API has pthread_mutex_timedlock(), but mysql's
|
|
|
|
mysql_mutex_* wrappers do not wrap that function.
|
|
|
|
*/
|
2017-03-11 05:17:42 +01:00
|
|
|
RDB_MUTEX_LOCK_CHECK(m_mutex);
|
2016-10-06 19:24:09 +02:00
|
|
|
return Status::OK();
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifndef STANDALONE_UNITTEST
|
2017-02-06 18:39:08 +01:00
|
|
|
void Rdb_mutex::set_unlock_action(const PSI_stage_info *const old_stage_arg) {
|
2016-10-06 19:24:09 +02:00
|
|
|
DBUG_ASSERT(old_stage_arg != nullptr);
|
|
|
|
|
|
|
|
mysql_mutex_assert_owner(&m_mutex);
|
|
|
|
DBUG_ASSERT(m_old_stage_info.count(current_thd) == 0);
|
|
|
|
|
|
|
|
m_old_stage_info[current_thd] =
|
|
|
|
std::make_shared<PSI_stage_info>(*old_stage_arg);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// Unlock Mutex that was successfully locked by Lock() or TryLockUntil()
|
|
|
|
void Rdb_mutex::UnLock() {
|
|
|
|
#ifndef STANDALONE_UNITTEST
|
2017-02-06 18:39:08 +01:00
|
|
|
if (m_old_stage_info.count(current_thd) > 0) {
|
2016-12-31 21:30:09 +01:00
|
|
|
const std::shared_ptr<PSI_stage_info> old_stage =
|
2017-02-06 18:39:08 +01:00
|
|
|
m_old_stage_info[current_thd];
|
2016-10-06 19:24:09 +02:00
|
|
|
m_old_stage_info.erase(current_thd);
|
|
|
|
/* The following will call mysql_mutex_unlock */
|
2016-10-21 23:11:47 +02:00
|
|
|
THD_EXIT_COND(current_thd, old_stage.get());
|
2016-10-06 19:24:09 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
#endif
|
2017-03-11 05:17:42 +01:00
|
|
|
RDB_MUTEX_UNLOCK_CHECK(m_mutex);
|
2016-10-06 19:24:09 +02:00
|
|
|
}
|
|
|
|
|
2019-06-15 20:29:46 +02:00
|
|
|
} // namespace myrocks
|