mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
MDEV-24302 follow-up: RESET MASTER hangs
As pointed out by Andrei Elkin, the previous fix did not fix one race condition that may have caused the observed hang. innodb_log_flush_request(): If we are enqueueing the very first request at the same time the log write is being completed, we must ensure that a near-concurrent call to log_flush_notify() will not result in a missed notification. We guarantee this by release-acquire operations on log_requests.start and log_sys.flushed_to_disk_lsn. log_flush_notify_and_unlock(): Cleanup: Always release the mutex. log_sys_t::get_flushed_lsn(): Use acquire memory order. log_sys_t::set_flushed_lsn(): Use release memory order. log_sys_t::set_lsn(): Use release memory order. log_sys_t::get_lsn(): Use relaxed memory order by default, and allow the caller to specify acquire memory order explicitly. Whenever the log_sys.mutex is being held or when log writes are prohibited during startup, we can use a relaxed load. Likewise, in some assertions where reading a stale value of log_sys.lsn should not matter, we can use a relaxed load. This will cause some additional instructions to be emitted on architectures that do not implement Total Store Ordering (TSO), such as POWER, ARM, and RISC-V Weak Memory Ordering (RVWMO).
This commit is contained in:
parent
0df74a0197
commit
8c2e3259c1
7 changed files with 71 additions and 69 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (c) 2020, MariaDB
|
||||
/* Copyright (c) 2020, 2021, MariaDB
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -41,7 +41,9 @@ public:
|
|||
Atomic_relaxed(Type val) : m(val) {}
|
||||
Atomic_relaxed() {}
|
||||
|
||||
operator Type() const { return m.load(std::memory_order_relaxed); }
|
||||
Type load(std::memory_order o= std::memory_order_relaxed) const
|
||||
{ return m.load(o); }
|
||||
operator Type() const { return m.load(); }
|
||||
Type operator=(const Type val)
|
||||
{ m.store(val, std::memory_order_relaxed); return val; }
|
||||
Type operator=(const Atomic_relaxed<Type> &rhs) { return *this= Type{rhs}; }
|
||||
|
|
|
@ -1559,7 +1559,7 @@ static void log_flush(void *)
|
|||
fil_flush_file_spaces();
|
||||
|
||||
/* Guarantee progress for buf_flush_lists(). */
|
||||
log_write_up_to(log_sys.get_lsn(), true);
|
||||
log_buffer_flush_to_disk(true);
|
||||
log_flush_pending.clear();
|
||||
}
|
||||
|
||||
|
@ -1577,15 +1577,17 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
|
|||
if (n_flush)
|
||||
return 0;
|
||||
|
||||
if (log_sys.get_lsn() > log_sys.get_flushed_lsn())
|
||||
lsn_t flushed_lsn= log_sys.get_flushed_lsn();
|
||||
if (log_sys.get_lsn() > flushed_lsn)
|
||||
{
|
||||
log_flush_task.wait();
|
||||
if (log_sys.get_lsn() > log_sys.get_flushed_lsn() &&
|
||||
flushed_lsn= log_sys.get_flushed_lsn();
|
||||
if (log_sys.get_lsn() > flushed_lsn &&
|
||||
!log_flush_pending.test_and_set())
|
||||
srv_thread_pool->submit_task(&log_flush_task);
|
||||
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
|
||||
if (UNIV_UNLIKELY(ibuf_debug))
|
||||
log_write_up_to(log_sys.get_lsn(), true);
|
||||
log_buffer_flush_to_disk(true);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1730,7 +1732,7 @@ static bool log_checkpoint()
|
|||
/** Make a checkpoint. */
|
||||
ATTRIBUTE_COLD void log_make_checkpoint()
|
||||
{
|
||||
buf_flush_wait_flushed(log_sys.get_lsn());
|
||||
buf_flush_wait_flushed(log_sys.get_lsn(std::memory_order_acquire));
|
||||
while (!log_checkpoint());
|
||||
}
|
||||
|
||||
|
|
|
@ -1182,7 +1182,7 @@ MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) static
|
|||
struct
|
||||
{
|
||||
/** first request */
|
||||
Atomic_relaxed<log_flush_request*> start;
|
||||
std::atomic<log_flush_request*> start;
|
||||
/** last request */
|
||||
log_flush_request *end;
|
||||
/** mutex protecting this object */
|
||||
|
@ -4460,13 +4460,11 @@ innobase_rollback_trx(
|
|||
|
||||
/** Invoke commit_checkpoint_notify_ha() on completed log flush requests.
|
||||
@param pending log_requests.start
|
||||
@param lsn log_sys.get_flushed_lsn()
|
||||
@return whether something was notified (and log_requests.mutex released) */
|
||||
static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
||||
@param lsn log_sys.get_flushed_lsn() */
|
||||
static void log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
||||
{
|
||||
mysql_mutex_assert_owner(&log_requests.mutex);
|
||||
ut_ad(pending == log_requests.start);
|
||||
|
||||
ut_ad(pending == log_requests.start.load(std::memory_order_relaxed));
|
||||
log_flush_request *entry= pending, *last= nullptr;
|
||||
/* Process the first requests that have been completed. Since
|
||||
the list is not necessarily in ascending order of LSN, we may
|
||||
|
@ -4478,12 +4476,15 @@ static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
|||
for (; entry && entry->lsn <= lsn; last= entry, entry= entry->next);
|
||||
|
||||
if (!last)
|
||||
return false;
|
||||
{
|
||||
mysql_mutex_unlock(&log_requests.mutex);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Detach the head of the list that corresponds to persisted log writes. */
|
||||
log_requests.start= entry;
|
||||
if (!entry)
|
||||
log_requests.end= nullptr;
|
||||
log_requests.end= entry;
|
||||
log_requests.start.store(entry, std::memory_order_relaxed);
|
||||
mysql_mutex_unlock(&log_requests.mutex);
|
||||
|
||||
/* Now that we have released the mutex, notify the submitters
|
||||
|
@ -4496,21 +4497,16 @@ static bool log_flush_notify_and_unlock(log_flush_request *pending, lsn_t lsn)
|
|||
my_free(entry);
|
||||
}
|
||||
while (entry != last);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Invoke commit_checkpoint_notify_ha() to notify that outstanding
|
||||
log writes have been completed. */
|
||||
void log_flush_notify(lsn_t flush_lsn)
|
||||
{
|
||||
if (log_requests.start)
|
||||
if (auto pending= log_requests.start.load(std::memory_order_acquire))
|
||||
{
|
||||
mysql_mutex_lock(&log_requests.mutex);
|
||||
if (log_flush_request *pending= log_requests.start)
|
||||
if (log_flush_notify_and_unlock(pending, flush_lsn))
|
||||
return;
|
||||
mysql_mutex_unlock(&log_requests.mutex);
|
||||
log_flush_notify_and_unlock(pending, flush_lsn);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4520,8 +4516,9 @@ checkpoint complete when we have flushed the redo log.
|
|||
If we have already flushed all relevant redo log, we notify immediately.*/
|
||||
static void innodb_log_flush_request(void *cookie)
|
||||
{
|
||||
const lsn_t lsn= log_sys.get_lsn();
|
||||
lsn_t flush_lsn= log_sys.get_flushed_lsn();
|
||||
/* Load lsn relaxed after flush_lsn was loaded from the same cache line */
|
||||
const lsn_t lsn= log_sys.get_lsn();
|
||||
|
||||
if (flush_lsn >= lsn)
|
||||
/* All log is already persistent. */;
|
||||
|
@ -4539,24 +4536,38 @@ static void innodb_log_flush_request(void *cookie)
|
|||
req->cookie= cookie;
|
||||
req->lsn= lsn;
|
||||
|
||||
log_flush_request *start= nullptr;
|
||||
|
||||
mysql_mutex_lock(&log_requests.mutex);
|
||||
auto old_end= log_requests.end;
|
||||
log_requests.end= req;
|
||||
if (old_end)
|
||||
/* In order to prevent a race condition where log_flush_notify()
|
||||
would skip a notification due to, we must update log_requests.start from
|
||||
nullptr (empty) to the first req using std::memory_order_release. */
|
||||
if (log_requests.start.compare_exchange_strong(start, req,
|
||||
std::memory_order_release,
|
||||
std::memory_order_relaxed))
|
||||
{
|
||||
ut_ad(!log_requests.end);
|
||||
start= req;
|
||||
/* In case log_flush_notify() executed
|
||||
log_requests.start.load(std::memory_order_acquire) right before
|
||||
our successful compare_exchange, we must re-read flush_lsn to
|
||||
ensure that our request will be notified immediately if applicable. */
|
||||
flush_lsn= log_sys.get_flushed_lsn();
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Append the entry to the list. Because we determined req->lsn before
|
||||
acquiring the mutex, this list may not be ordered by req->lsn,
|
||||
even though log_flush_notify_and_unlock() assumes so. */
|
||||
old_end->next= req;
|
||||
/* This hopefully addresses the hang that was reported in MDEV-24302.
|
||||
Upon receiving a new request, we will notify old requests of
|
||||
completion. */
|
||||
if (log_flush_notify_and_unlock(log_requests.start, flush_lsn))
|
||||
return;
|
||||
log_requests.end->next= req;
|
||||
}
|
||||
else
|
||||
log_requests.start= req;
|
||||
mysql_mutex_unlock(&log_requests.mutex);
|
||||
|
||||
log_requests.end= req;
|
||||
|
||||
/* This hopefully addresses the hang that was reported in MDEV-24302.
|
||||
Upon receiving a new request, we will notify old requests of
|
||||
completion. */
|
||||
log_flush_notify_and_unlock(start, flush_lsn);
|
||||
return;
|
||||
}
|
||||
else
|
||||
|
@ -18345,16 +18356,17 @@ checkpoint_now_set(THD*, st_mysql_sys_var*, void*, const void* save)
|
|||
if (*(my_bool*) save) {
|
||||
mysql_mutex_unlock(&LOCK_global_system_variables);
|
||||
|
||||
while (log_sys.last_checkpoint_lsn
|
||||
lsn_t lsn;
|
||||
|
||||
while (log_sys.last_checkpoint_lsn.load(
|
||||
std::memory_order_acquire)
|
||||
+ SIZE_OF_FILE_CHECKPOINT
|
||||
< log_sys.get_lsn()) {
|
||||
< (lsn= log_sys.get_lsn(std::memory_order_acquire))) {
|
||||
log_make_checkpoint();
|
||||
log_sys.log.flush();
|
||||
}
|
||||
|
||||
dberr_t err = fil_write_flushed_lsn(log_sys.get_lsn());
|
||||
|
||||
if (err != DB_SUCCESS) {
|
||||
if (dberr_t err = fil_write_flushed_lsn(lsn)) {
|
||||
ib::warn() << "Checkpoint set failed " << err;
|
||||
}
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
Copyright (c) 2009, Google Inc.
|
||||
Copyright (c) 2017, 2020, MariaDB Corporation.
|
||||
Copyright (c) 2017, 2021, MariaDB Corporation.
|
||||
|
||||
Portions of this file contain modifications contributed and copyrighted by
|
||||
Google, Inc. Those modifications are gratefully acknowledged and are described
|
||||
|
@ -87,12 +87,6 @@ log_free_check(void);
|
|||
@param[in] len requested minimum size in bytes */
|
||||
void log_buffer_extend(ulong len);
|
||||
|
||||
/** Read the current LSN. */
|
||||
#define log_get_lsn() log_sys.get_lsn()
|
||||
|
||||
/** Read the durable LSN */
|
||||
#define log_get_flush_lsn() log_sys.get_flushed_lsn()
|
||||
|
||||
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
|
||||
and lsn - buf_pool.get_oldest_modification().
|
||||
@param[in] file_size requested innodb_log_file_size
|
||||
|
@ -646,13 +640,14 @@ public:
|
|||
|
||||
bool is_initialised() const { return m_initialised; }
|
||||
|
||||
lsn_t get_lsn() const { return lsn.load(std::memory_order_relaxed); }
|
||||
void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_relaxed); }
|
||||
lsn_t get_lsn(std::memory_order order= std::memory_order_relaxed) const
|
||||
{ return lsn.load(order); }
|
||||
void set_lsn(lsn_t lsn) { this->lsn.store(lsn, std::memory_order_release); }
|
||||
|
||||
lsn_t get_flushed_lsn() const
|
||||
{ return flushed_to_disk_lsn.load(std::memory_order_relaxed); }
|
||||
{ return flushed_to_disk_lsn.load(std::memory_order_acquire); }
|
||||
void set_flushed_lsn(lsn_t lsn)
|
||||
{ flushed_to_disk_lsn.store(lsn, std::memory_order_relaxed); }
|
||||
{ flushed_to_disk_lsn.store(lsn, std::memory_order_release); }
|
||||
|
||||
bool check_flush_or_checkpoint() const
|
||||
{
|
||||
|
|
|
@ -835,12 +835,10 @@ void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
|
|||
/** write to the log file up to the last log entry.
|
||||
@param[in] sync whether we want the written log
|
||||
also to be flushed to disk. */
|
||||
void
|
||||
log_buffer_flush_to_disk(
|
||||
bool sync)
|
||||
void log_buffer_flush_to_disk(bool sync)
|
||||
{
|
||||
ut_ad(!srv_read_only_mode);
|
||||
log_write_up_to(log_get_lsn(), sync);
|
||||
ut_ad(!srv_read_only_mode);
|
||||
log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync);
|
||||
}
|
||||
|
||||
/********************************************************************
|
||||
|
|
|
@ -1340,14 +1340,7 @@ void srv_monitor_task(void*)
|
|||
where the lsn seems to decrease at times */
|
||||
|
||||
lsn_t new_lsn = log_sys.get_lsn();
|
||||
if (new_lsn < old_lsn) {
|
||||
ib::error() << "Old log sequence number " << old_lsn << " was"
|
||||
<< " greater than the new log sequence number "
|
||||
<< new_lsn << ". Please submit a bug report to"
|
||||
" https://jira.mariadb.org/";
|
||||
ut_ad(0);
|
||||
}
|
||||
|
||||
ut_a(new_lsn >= old_lsn);
|
||||
old_lsn = new_lsn;
|
||||
|
||||
/* Update the statistics collected for deciding LRU
|
||||
|
|
|
@ -956,7 +956,7 @@ static lsn_t srv_prepare_to_delete_redo_log_file(bool old_exists)
|
|||
log_sys.log.flush();
|
||||
}
|
||||
|
||||
ut_ad(flushed_lsn == log_get_lsn());
|
||||
ut_ad(flushed_lsn == log_sys.get_lsn());
|
||||
|
||||
/* Check if the buffer pools are clean. If not
|
||||
retry till it is clean. */
|
||||
|
@ -1346,7 +1346,7 @@ dberr_t srv_start(bool create_new_db)
|
|||
|
||||
/* Suppress the message about
|
||||
crash recovery. */
|
||||
flushed_lsn = log_get_lsn();
|
||||
flushed_lsn = log_sys.get_lsn();
|
||||
goto file_checked;
|
||||
}
|
||||
|
||||
|
@ -1424,7 +1424,7 @@ file_checked:
|
|||
|
||||
buf_flush_sync();
|
||||
|
||||
flushed_lsn = log_get_lsn();
|
||||
flushed_lsn = log_sys.get_lsn();
|
||||
|
||||
err = fil_write_flushed_lsn(flushed_lsn);
|
||||
|
||||
|
@ -1600,7 +1600,7 @@ file_checked:
|
|||
InnoDB files is needed. */
|
||||
ut_ad(!srv_force_recovery);
|
||||
ut_ad(recv_no_log_write);
|
||||
err = fil_write_flushed_lsn(log_get_lsn());
|
||||
err = fil_write_flushed_lsn(log_sys.get_lsn());
|
||||
DBUG_ASSERT(!buf_pool.any_io_pending());
|
||||
log_sys.log.close_file();
|
||||
if (err == DB_SUCCESS) {
|
||||
|
|
Loading…
Add table
Reference in a new issue