mirror of
https://github.com/MariaDB/server.git
synced 2025-01-30 18:41:56 +01:00
MDEV-24302 follow-up: RESET MASTER hangs
As pointed out by Andrei Elkin, the previous fix did not fix one race condition that may have caused the observed hang. innodb_log_flush_request(): If we are enqueueing the very first request at the same time the log write is being completed, we must ensure that a near-concurrent call to log_flush_notify() will not result in a missed notification. We guarantee this by release-acquire operations on log_requests.start and log_sys.flushed_to_disk_lsn. log_flush_notify_and_unlock(): Cleanup: Always release the mutex. log_sys_t::get_flushed_lsn(): Use acquire memory order. log_sys_t::set_flushed_lsn(): Use release memory order. log_sys_t::set_lsn(): Use release memory order. log_sys_t::get_lsn(): Use relaxed memory order by default, and allow the caller to specify acquire memory order explicitly. Whenever the log_sys.mutex is being held or when log writes are prohibited during startup, we can use a relaxed load. Likewise, in some assertions where reading a stale value of log_sys.lsn should not matter, we can use a relaxed load. This will cause some additional instructions to be emitted on architectures that do not implement Total Store Ordering (TSO), such as POWER, ARM, and RISC-V Weak Memory Ordering (RVWMO).
This commit is contained in:
parent
0df74a0197
commit
8c2e3259c1
7 changed files with 71 additions and 69 deletions
|
@ -1,4 +1,4 @@
|
||||||
/* Copyright (c) 2020, MariaDB
|
/* Copyright (c) 2020, 2021, MariaDB
|
||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
|
@ -41,7 +41,9 @@ public:
|
||||||
Atomic_relaxed(Type val) : m(val) {}
|
Atomic_relaxed(Type val) : m(val) {}
|
||||||
Atomic_relaxed() {}
|
Atomic_relaxed() {}
|
||||||
|
|
||||||
operator Type() const { return m.load(std::memory_order_relaxed); }
|
Type load(std::memory_order o= std::memory_order_relaxed) const
|
||||||
|
{ return m.load(o); }
|
||||||
|
operator Type() const { return m.load(); }
|
||||||
Type operator=(const Type val)
|
Type operator=(const Type val)
|
||||||
{ m.store(val, std::memory_order_relaxed); return val; }
|
{ m.store(val, std::memory_order_relaxed); return val; }
|
||||||
Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; }
|
Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; }
|
||||||
|
|
|
@ -1559,7 +1559,7 @@ static void log_flush(void *)
|
||||||
fil_flush_file_spaces();
|
fil_flush_file_spaces();
|
||||||
|
|
||||||
/* Guarantee progress for buf_flush_lists(). */
|
/* Guarantee progress for buf_flush_lists(). */
|
||||||
log_write_up_to(log_sys.get_lsn(), true);
|
log_buffer_flush_to_disk(true);
|
||||||
log_flush_pending.clear();
|
log_flush_pending.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1577,15 +1577,17 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
|
||||||
if (n_flush)
|
if (n_flush)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (log_sys.get_lsn() > log_sys.get_flushed_lsn())
|
lsn_t flushed_lsn= log_sys.get_flushed_lsn();
|
||||||
|
if (log_sys.get_lsn() > flushed_lsn)
|
||||||
{
|
{
|
||||||
log_flush_task.wait();
|
log_flush_task.wait();
|
||||||
if (log_sys.get_lsn() > log_sys.get_flushed_lsn() &&
|
flushed_lsn= log_sys.get_flushed_lsn();
|
||||||
|
if (log_sys.get_lsn() > flushed_lsn &&
|
||||||
!log_flush_pending.test_and_set())
|
!log_flush_pending.test_and_set())
|
||||||
srv_thread_pool->submit_task(&log_flush_task);
|
srv_thread_pool->submit_task(&log_flush_task);
|
||||||
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
|
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
|
||||||
if (UNIV_UNLIKELY(ibuf_debug))
|
if (UNIV_UNLIKELY(ibuf_debug))
|
||||||
log_write_up_to(log_sys.get_lsn(), true);
|
log_buffer_flush_to_disk(true);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1730,7 +1732,7 @@ static bool log_checkpoint()
|
||||||
/** Make a checkpoint. */
|
/** Make a checkpoint. */
|
||||||
ATTRIBUTE_COLD void log_make_checkpoint()
|
ATTRIBUTE_COLD void log_make_checkpoint()
|
||||||
{
|
{
|
||||||
buf_flush_wait_flushed(log_sys.get_lsn());
|
buf_flush_wait_flushed(log_sys.get_lsn(std::memory_order_acquire));
|
||||||
while (!log_checkpoint());
|
while (!log_checkpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1182,7 +1182,7 @@ MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) static
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
/** first request */
|
/** first request */
|
||||||
Atomic_relaxed<log_flush_request*> start;
|
std::atomic<log_flush_request*> start;
|
||||||
/** last request */
|
/** last request */
|
||||||
log_flush_request *end;
|
log_flush_request *end;
|
||||||
/** mutex protecting this object */
|
/** mutex protecting this object */
|
||||||
|
@ -4460,13 +4460,11 @@ innobase_rollback_trx(
|
||||||
|
|
||||||
/** Invoke commit_checkpoint_notify_ha() on completed log flush requests.
|
/** Invoke commit_checkpoint_notify_ha() on completed log flush requests.
|
||||||
@param pending log_requests.start
|
@param pending log_requests.start
|
||||||
@param lsn log_sys.get_flushed_lsn()
|
@param lsn log_sys.get_flushed_lsn() */
|
||||||
@return whether something was notified (and log_requests.mutex released) */
|
static void log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
||||||
static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
|
||||||
{
|
{
|
||||||
mysql_mutex_assert_owner(&log_requests.mutex);
|
mysql_mutex_assert_owner(&log_requests.mutex);
|
||||||
ut_ad(pending == log_requests.start);
|
ut_ad(pending == log_requests.start.load(std::memory_order_relaxed));
|
||||||
|
|
||||||
log_flush_request *entry= pending, *last= nullptr;
|
log_flush_request *entry= pending, *last= nullptr;
|
||||||
/* Process the first requests that have been completed. Since
|
/* Process the first requests that have been completed. Since
|
||||||
the list is not necessarily in ascending order of LSN, we may
|
the list is not necessarily in ascending order of LSN, we may
|
||||||
|
@ -4478,12 +4476,15 @@ static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
||||||
for (; entry && entry->lsn <= lsn; last= entry, entry= entry->next);
|
for (; entry && entry->lsn <= lsn; last= entry, entry= entry->next);
|
||||||
|
|
||||||
if (!last)
|
if (!last)
|
||||||
return false;
|
{
|
||||||
|
mysql_mutex_unlock(&log_requests.mutex);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* Detach the head of the list that corresponds to persisted log writes. */
|
/* Detach the head of the list that corresponds to persisted log writes. */
|
||||||
log_requests.start= entry;
|
|
||||||
if (!entry)
|
if (!entry)
|
||||||
log_requests.end= nullptr;
|
log_requests.end= entry;
|
||||||
|
log_requests.start.store(entry, std::memory_order_relaxed);
|
||||||
mysql_mutex_unlock(&log_requests.mutex);
|
mysql_mutex_unlock(&log_requests.mutex);
|
||||||
|
|
||||||
/* Now that we have released the mutex, notify the submitters
|
/* Now that we have released the mutex, notify the submitters
|
||||||
|
@ -4496,21 +4497,16 @@ static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
||||||
my_free(entry);
|
my_free(entry);
|
||||||
}
|
}
|
||||||
while (entry != last);
|
while (entry != last);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Invoke commit_checkpoint_notify_ha() to notify that outstanding
|
/** Invoke commit_checkpoint_notify_ha() to notify that outstanding
|
||||||
log writes have been completed. */
|
log writes have been completed. */
|
||||||
void log_flush_notify(lsn_t flush_lsn)
|
void log_flush_notify(lsn_t flush_lsn)
|
||||||
{
|
{
|
||||||
if (log_requests.start)
|
if (auto pending= log_requests.start.load(std::memory_order_acquire))
|
||||||
{
|
{
|
||||||
mysql_mutex_lock(&log_requests.mutex);
|
mysql_mutex_lock(&log_requests.mutex);
|
||||||
if (log_flush_request *pending= log_requests.start)
|
log_flush_notify_and_unlock(pending, flush_lsn);
|
||||||
if (log_flush_notify_and_unlock(pending, flush_lsn))
|
|
||||||
return;
|
|
||||||
mysql_mutex_unlock(&log_requests.mutex);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4520,8 +4516,9 @@ checkpoint complete when we have flushed the redo log.
|
||||||
If we have already flushed all relevant redo log, we notify immediately.*/
|
If we have already flushed all relevant redo log, we notify immediately.*/
|
||||||
static void innodb_log_flush_request(void *cookie)
|
static void innodb_log_flush_request(void *cookie)
|
||||||
{
|
{
|
||||||
const lsn_t lsn= log_sys.get_lsn();
|
|
||||||
lsn_t flush_lsn= log_sys.get_flushed_lsn();
|
lsn_t flush_lsn= log_sys.get_flushed_lsn();
|
||||||
|
/* Load lsn relaxed after flush_lsn was loaded from the same cache line */
|
||||||
|
const lsn_t lsn= log_sys.get_lsn();
|
||||||
|
|
||||||
if (flush_lsn >= lsn)
|
if (flush_lsn >= lsn)
|
||||||
/* All log is already persistent. */;
|
/* All log is already persistent. */;
|
||||||
|
@ -4539,24 +4536,38 @@ static void innodb_log_flush_request(void *cookie)
|
||||||
req->cookie= cookie;
|
req->cookie= cookie;
|
||||||
req->lsn= lsn;
|
req->lsn= lsn;
|
||||||
|
|
||||||
|
log_flush_request *start= nullptr;
|
||||||
|
|
||||||
mysql_mutex_lock(&log_requests.mutex);
|
mysql_mutex_lock(&log_requests.mutex);
|
||||||
auto old_end= log_requests.end;
|
/* In order to prevent a race condition where log_flush_notify()
|
||||||
log_requests.end= req;
|
would skip a notification due to, we must update log_requests.start from
|
||||||
if (old_end)
|
nullptr (empty) to the first req using std::memory_order_release. */
|
||||||
|
if (log_requests.start.compare_exchange_strong(start, req,
|
||||||
|
std::memory_order_release,
|
||||||
|
std::memory_order_relaxed))
|
||||||
|
{
|
||||||
|
ut_ad(!log_requests.end);
|
||||||
|
start= req;
|
||||||
|
/* In case log_flush_notify() executed
|
||||||
|
log_requests.start.load(std::memory_order_acquire) right before
|
||||||
|
our successful compare_exchange, we must re-read flush_lsn to
|
||||||
|
ensure that our request will be notified immediately if applicable. */
|
||||||
|
flush_lsn= log_sys.get_flushed_lsn();
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
/* Append the entry to the list. Because we determined req->lsn before
|
/* Append the entry to the list. Because we determined req->lsn before
|
||||||
acquiring the mutex, this list may not be ordered by req->lsn,
|
acquiring the mutex, this list may not be ordered by req->lsn,
|
||||||
even though log_flush_notify_and_unlock() assumes so. */
|
even though log_flush_notify_and_unlock() assumes so. */
|
||||||
old_end->next= req;
|
log_requests.end->next= req;
|
||||||
|
}
|
||||||
|
|
||||||
|
log_requests.end= req;
|
||||||
|
|
||||||
/* This hopefully addresses the hang that was reported in MDEV-24302.
|
/* This hopefully addresses the hang that was reported in MDEV-24302.
|
||||||
Upon receiving a new request, we will notify old requests of
|
Upon receiving a new request, we will notify old requests of
|
||||||
completion. */
|
completion. */
|
||||||
if (log_flush_notify_and_unlock(log_requests.start, flush_lsn))
|
log_flush_notify_and_unlock(start, flush_lsn);
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
log_requests.start= req;
|
|
||||||
mysql_mutex_unlock(&log_requests.mutex);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -18345,16 +18356,17 @@ checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
|
||||||
if (*(my_bool*) save) {
|
if (*(my_bool*) save) {
|
||||||
mysql_mutex_unlock(&LOCK_global_system_variables);
|
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||||
|
|
||||||
while (log_sys.last_checkpoint_lsn
|
lsn_t lsn;
|
||||||
|
|
||||||
|
while (log_sys.last_checkpoint_lsn.load(
|
||||||
|
std::memory_order_acquire)
|
||||||
+ SIZE_OF_FILE_CHECKPOINT
|
+ SIZE_OF_FILE_CHECKPOINT
|
||||||
< log_sys.get_lsn()) {
|
< (lsn= log_sys.get_lsn(std::memory_order_acquire))) {
|
||||||
log_make_checkpoint();
|
log_make_checkpoint();
|
||||||
log_sys.log.flush();
|
log_sys.log.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
dberr_t err = fil_write_flushed_lsn(log_sys.get_lsn());
|
if (dberr_t err = fil_write_flushed_lsn(lsn)) {
|
||||||
|
|
||||||
if (err != DB_SUCCESS) {
|
|
||||||
ib::warn() << "Checkpoint set failed " << err;
|
ib::warn() << "Checkpoint set failed " << err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
|
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||||
Copyright (c) 2009, Google Inc.
|
Copyright (c) 2009, Google Inc.
|
||||||
Copyright (c) 2017, 2020, MariaDB Corporation.
|
Copyright (c) 2017, 2021, MariaDB Corporation.
|
||||||
|
|
||||||
Portions of this file contain modifications contributed and copyrighted by
|
Portions of this file contain modifications contributed and copyrighted by
|
||||||
Google, Inc. Those modifications are gratefully acknowledged and are described
|
Google, Inc. Those modifications are gratefully acknowledged and are described
|
||||||
|
@ -87,12 +87,6 @@ log_free_check(void);
|
||||||
@param[in] len requested minimum size in bytes */
|
@param[in] len requested minimum size in bytes */
|
||||||
void log_buffer_extend(ulong len);
|
void log_buffer_extend(ulong len);
|
||||||
|
|
||||||
/** Read the current LSN. */
|
|
||||||
#define log_get_lsn() log_sys.get_lsn()
|
|
||||||
|
|
||||||
/** Read the durable LSN */
|
|
||||||
#define log_get_flush_lsn() log_sys.get_flushed_lsn()
|
|
||||||
|
|
||||||
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
|
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
|
||||||
and lsn - buf_pool.get_oldest_modification().
|
and lsn - buf_pool.get_oldest_modification().
|
||||||
@param[in] file_size requested innodb_log_file_size
|
@param[in] file_size requested innodb_log_file_size
|
||||||
|
@ -646,13 +640,14 @@ public:
|
||||||
|
|
||||||
bool is_initialised() const { return m_initialised; }
|
bool is_initialised() const { return m_initialised; }
|
||||||
|
|
||||||
lsn_t get_lsn() const { return lsn.load(std::memory_order_relaxed); }
|
lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const
|
||||||
void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_relaxed); }
|
{ return lsn.load(order); }
|
||||||
|
void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_release); }
|
||||||
|
|
||||||
lsn_t get_flushed_lsn() const
|
lsn_t get_flushed_lsn() const
|
||||||
{ return flushed_to_disk_lsn.load(std::memory_order_relaxed); }
|
{ return flushed_to_disk_lsn.load(std::memory_order_acquire); }
|
||||||
void set_flushed_lsn(lsn_t lsn)
|
void set_flushed_lsn(lsn_t lsn)
|
||||||
{ flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); }
|
{ flushed_to_disk_lsn.store(lsn, std::memory_order_release); }
|
||||||
|
|
||||||
bool check_flush_or_checkpoint() const
|
bool check_flush_or_checkpoint() const
|
||||||
{
|
{
|
||||||
|
|
|
@ -835,12 +835,10 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
|
||||||
/** write to the log file up to the last log entry.
|
/** write to the log file up to the last log entry.
|
||||||
@param[in] sync whether we want the written log
|
@param[in] sync whether we want the written log
|
||||||
also to be flushed to disk. */
|
also to be flushed to disk. */
|
||||||
void
|
void log_buffer_flush_to_disk(bool sync)
|
||||||
log_buffer_flush_to_disk(
|
|
||||||
bool sync)
|
|
||||||
{
|
{
|
||||||
ut_ad(!srv_read_only_mode);
|
ut_ad(!srv_read_only_mode);
|
||||||
log_write_up_to(log_get_lsn(), sync);
|
log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync);
|
||||||
}
|
}
|
||||||
|
|
||||||
/********************************************************************
|
/********************************************************************
|
||||||
|
|
|
@ -1340,14 +1340,7 @@ void srv_monitor_task(void*)
|
||||||
where the lsn seems to decrease at times */
|
where the lsn seems to decrease at times */
|
||||||
|
|
||||||
lsn_t new_lsn = log_sys.get_lsn();
|
lsn_t new_lsn = log_sys.get_lsn();
|
||||||
if (new_lsn < old_lsn) {
|
ut_a(new_lsn >= old_lsn);
|
||||||
ib::error() << "Old log sequence number " << old_lsn << " was"
|
|
||||||
<< " greater than the new log sequence number "
|
|
||||||
<< new_lsn << ". Please submit a bug report to"
|
|
||||||
" https://jira.mariadb.org/";
|
|
||||||
ut_ad(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
old_lsn = new_lsn;
|
old_lsn = new_lsn;
|
||||||
|
|
||||||
/* Update the statistics collected for deciding LRU
|
/* Update the statistics collected for deciding LRU
|
||||||
|
|
|
@ -956,7 +956,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
|
||||||
log_sys.log.flush();
|
log_sys.log.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
ut_ad(flushed_lsn == log_get_lsn());
|
ut_ad(flushed_lsn == log_sys.get_lsn());
|
||||||
|
|
||||||
/* Check if the buffer pools are clean. If not
|
/* Check if the buffer pools are clean. If not
|
||||||
retry till it is clean. */
|
retry till it is clean. */
|
||||||
|
@ -1346,7 +1346,7 @@ dberr_t srv_start(bool create_new_db)
|
||||||
|
|
||||||
/* Suppress the message about
|
/* Suppress the message about
|
||||||
crash recovery. */
|
crash recovery. */
|
||||||
flushed_lsn = log_get_lsn();
|
flushed_lsn = log_sys.get_lsn();
|
||||||
goto file_checked;
|
goto file_checked;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1424,7 +1424,7 @@ file_checked:
|
||||||
|
|
||||||
buf_flush_sync();
|
buf_flush_sync();
|
||||||
|
|
||||||
flushed_lsn = log_get_lsn();
|
flushed_lsn = log_sys.get_lsn();
|
||||||
|
|
||||||
err = fil_write_flushed_lsn(flushed_lsn);
|
err = fil_write_flushed_lsn(flushed_lsn);
|
||||||
|
|
||||||
|
@ -1600,7 +1600,7 @@ file_checked:
|
||||||
InnoDB files is needed. */
|
InnoDB files is needed. */
|
||||||
ut_ad(!srv_force_recovery);
|
ut_ad(!srv_force_recovery);
|
||||||
ut_ad(recv_no_log_write);
|
ut_ad(recv_no_log_write);
|
||||||
err = fil_write_flushed_lsn(log_get_lsn());
|
err = fil_write_flushed_lsn(log_sys.get_lsn());
|
||||||
DBUG_ASSERT(!buf_pool.any_io_pending());
|
DBUG_ASSERT(!buf_pool.any_io_pending());
|
||||||
log_sys.log.close_file();
|
log_sys.log.close_file();
|
||||||
if (err == DB_SUCCESS) {
|
if (err == DB_SUCCESS) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue