2020-06-18 13:38:30 +03:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2022-02-18 15:13:56 +02:00
|
|
|
Copyright (c) 2020, 2022, MariaDB Corporation.
|
2020-06-18 13:38:30 +03:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
#include <atomic>
|
|
|
|
#include "my_dbug.h"
|
|
|
|
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
/** Simple read-write lock based on std::atomic */
|
2020-06-18 13:38:30 +03:00
|
|
|
class rw_lock
|
|
|
|
{
|
|
|
|
/** The lock word */
|
|
|
|
std::atomic<uint32_t> lock;
|
|
|
|
|
|
|
|
protected:
|
|
|
|
/** Available lock */
|
|
|
|
static constexpr uint32_t UNLOCKED= 0;
|
|
|
|
/** Flag to indicate that write_lock() is being held */
|
2020-11-25 11:32:49 +02:00
|
|
|
static constexpr uint32_t WRITER= 1U << 31;
|
2020-06-18 13:38:30 +03:00
|
|
|
/** Flag to indicate that write_lock_wait() is pending */
|
2020-11-25 11:32:49 +02:00
|
|
|
static constexpr uint32_t WRITER_WAITING= 1U << 30;
|
2020-06-18 13:38:30 +03:00
|
|
|
/** Flag to indicate that write_lock() or write_lock_wait() is pending */
|
|
|
|
static constexpr uint32_t WRITER_PENDING= WRITER | WRITER_WAITING;
|
|
|
|
|
|
|
|
/** Start waiting for an exclusive lock. */
|
|
|
|
void write_lock_wait_start()
|
2021-10-02 11:14:14 +03:00
|
|
|
{
|
|
|
|
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
|
|
|
static_assert(WRITER_WAITING == 1U << 30, "compatibility");
|
|
|
|
__asm__ __volatile__("lock btsl $30, %0" : "+m" (lock));
|
2022-01-28 16:42:37 +02:00
|
|
|
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
|
2021-10-02 11:14:14 +03:00
|
|
|
static_assert(WRITER_WAITING == 1U << 30, "compatibility");
|
|
|
|
_interlockedbittestandset(reinterpret_cast<volatile long*>(&lock), 30);
|
|
|
|
#else
|
|
|
|
lock.fetch_or(WRITER_WAITING, std::memory_order_relaxed);
|
|
|
|
#endif
|
|
|
|
}
|
2020-11-24 15:41:03 +02:00
|
|
|
/** Start waiting for an exclusive lock.
|
|
|
|
@return current value of the lock word */
|
2021-10-02 11:19:55 +03:00
|
|
|
uint32_t write_lock_wait_start_read()
|
2020-11-24 15:41:03 +02:00
|
|
|
{ return lock.fetch_or(WRITER_WAITING, std::memory_order_relaxed); }
|
|
|
|
/** Wait for an exclusive lock.
|
|
|
|
@param l the value of the lock word
|
|
|
|
@return whether the exclusive lock was acquired */
|
|
|
|
bool write_lock_wait_try(uint32_t &l)
|
|
|
|
{
|
|
|
|
return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire,
|
|
|
|
std::memory_order_relaxed);
|
|
|
|
}
|
2020-11-24 11:33:39 +02:00
|
|
|
/** Try to acquire a shared lock.
|
|
|
|
@param l the value of the lock word
|
|
|
|
@return whether the lock was acquired */
|
|
|
|
bool read_trylock(uint32_t &l)
|
|
|
|
{
|
|
|
|
l= UNLOCKED;
|
|
|
|
while (!lock.compare_exchange_strong(l, l + 1, std::memory_order_acquire,
|
|
|
|
std::memory_order_relaxed))
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(!(WRITER & l) || !(~WRITER_PENDING & l));
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
if (l & WRITER_PENDING)
|
2020-11-24 11:33:39 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2021-02-12 17:43:10 +02:00
|
|
|
|
2020-06-18 13:38:30 +03:00
|
|
|
/** Wait for an exclusive lock.
|
|
|
|
@return whether the exclusive lock was acquired */
|
|
|
|
bool write_lock_poll()
|
|
|
|
{
|
2020-12-03 10:42:18 +02:00
|
|
|
auto l= WRITER_WAITING;
|
2020-11-24 15:41:03 +02:00
|
|
|
if (write_lock_wait_try(l))
|
2020-06-18 13:38:30 +03:00
|
|
|
return true;
|
|
|
|
if (!(l & WRITER_WAITING))
|
|
|
|
/* write_lock() must have succeeded for another thread */
|
|
|
|
write_lock_wait_start();
|
|
|
|
return false;
|
|
|
|
}
|
2020-11-30 11:47:09 +02:00
|
|
|
/** @return the lock word value */
|
|
|
|
uint32_t value() const { return lock.load(std::memory_order_acquire); }
|
2020-06-18 13:38:30 +03:00
|
|
|
|
|
|
|
public:
|
|
|
|
/** Default constructor */
|
|
|
|
rw_lock() : lock(UNLOCKED) {}
|
|
|
|
|
2020-11-24 15:41:03 +02:00
|
|
|
/** Release a shared lock.
|
|
|
|
@return whether any writers may have to be woken up */
|
|
|
|
bool read_unlock()
|
2020-06-18 13:38:30 +03:00
|
|
|
{
|
2020-11-24 15:41:03 +02:00
|
|
|
auto l= lock.fetch_sub(1, std::memory_order_release);
|
MDEV-25512 Deadlock between sux_lock::u_x_upgrade() and sux_lock::u_lock()
In the SUX_LOCK_GENERIC implementation, we can remember at most
one pending exclusive lock request. If multiple exclusive lock
requests are pending, the WRITER_WAITING flag will be cleared when
the first waiting writer acquires the exclusive lock.
ssux_lock_low::update_lock(): If WRITER_WAITING is set, wake up
the writer even if the UPDATER flag is set, because the waiting
writer may be in the process of upgrading its U lock to X.
rw_lock::read_unlock(): Also indicate that an X lock waiter must
be woken up if an U lock exists.
This fix may cause unnecessary wake-ups and system calls, but this
is the best that we can do. Ideally we would use the MDEV-25404
idea of a separate 'writer' mutex, but there is no portable way to
request that a non-recursive mutex be created, and InnoDB requires
the ability to transfer buf_block_t::lock ownership to an I/O thread.
To allow problems like this to be caught more reliably in the future,
we add a unit test for srw_mutex, srw_lock, ssux_lock, sux_lock.
2021-04-25 12:58:16 +03:00
|
|
|
DBUG_ASSERT(!(l & WRITER)); /* no write lock must have existed */
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
DBUG_ASSERT(~(WRITER_PENDING) & l); /* at least one read lock */
|
2020-11-24 15:41:03 +02:00
|
|
|
return (~WRITER_PENDING & l) == 1;
|
2020-12-03 10:42:18 +02:00
|
|
|
}
|
2020-06-18 13:38:30 +03:00
|
|
|
/** Release an exclusive lock */
|
|
|
|
void write_unlock()
|
|
|
|
{
|
2021-10-02 11:29:44 +03:00
|
|
|
/* Below, we use fetch_sub(WRITER) instead of fetch_and(~WRITER).
|
|
|
|
The reason is that on IA-32 and AMD64 it translates into the 80486
|
|
|
|
instruction LOCK XADD, while fetch_and() translates into a loop
|
|
|
|
around LOCK CMPXCHG. For other ISA either form should be fine. */
|
|
|
|
static_assert(WRITER == 1U << 31, "compatibility");
|
|
|
|
IF_DBUG_ASSERT(auto l=,) lock.fetch_sub(WRITER, std::memory_order_release);
|
2020-12-03 10:42:18 +02:00
|
|
|
/* the write lock must have existed */
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
DBUG_ASSERT(l & WRITER);
|
2020-06-18 13:38:30 +03:00
|
|
|
}
|
|
|
|
/** Try to acquire a shared lock.
|
|
|
|
@return whether the lock was acquired */
|
2020-11-24 11:33:39 +02:00
|
|
|
bool read_trylock() { uint32_t l; return read_trylock(l); }
|
2020-06-18 13:38:30 +03:00
|
|
|
/** Try to acquire an exclusive lock.
|
|
|
|
@return whether the lock was acquired */
|
|
|
|
bool write_trylock()
|
|
|
|
{
|
|
|
|
auto l= UNLOCKED;
|
|
|
|
return lock.compare_exchange_strong(l, WRITER, std::memory_order_acquire,
|
|
|
|
std::memory_order_relaxed);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** @return whether an exclusive lock is being held by any thread */
|
2021-10-22 12:38:45 +03:00
|
|
|
bool is_write_locked() const { return !!(value() & WRITER); }
|
2020-11-24 15:41:03 +02:00
|
|
|
/** @return whether any lock is being held or waited for by any thread */
|
2021-10-22 12:38:45 +03:00
|
|
|
bool is_locked_or_waiting() const { return value() != 0; }
|
2020-06-18 13:38:30 +03:00
|
|
|
/** @return whether any lock is being held by any thread */
|
2021-10-22 12:38:45 +03:00
|
|
|
bool is_locked() const { return (value() & ~WRITER_WAITING) != 0; }
|
2020-06-18 13:38:30 +03:00
|
|
|
};
|