2020-11-30 11:47:09 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2022-02-18 15:13:56 +02:00
|
|
|
Copyright (c) 2020, 2022, MariaDB Corporation.
|
2020-11-30 11:47:09 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
#include "srw_lock.h"
|
|
|
|
#include "srv0srv.h"
|
2020-12-04 19:02:58 +02:00
|
|
|
#include "my_cpu.h"
|
2021-10-22 12:38:45 +03:00
|
|
|
#include "transactional_lock_guard.h"
|
|
|
|
|
|
|
|
#ifdef NO_ELISION
|
|
|
|
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
|
|
|
|
# include <intrin.h>
|
|
|
|
bool have_transactional_memory;
|
|
|
|
bool transactional_lock_enabled()
|
|
|
|
{
|
|
|
|
int regs[4];
|
|
|
|
__cpuid(regs, 0);
|
|
|
|
if (regs[0] < 7)
|
|
|
|
return false;
|
|
|
|
__cpuidex(regs, 7, 0);
|
|
|
|
/* Restricted Transactional Memory (RTM) */
|
|
|
|
have_transactional_memory= regs[1] & 1U << 11;
|
|
|
|
return have_transactional_memory;
|
|
|
|
}
|
|
|
|
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
|
|
|
# include <cpuid.h>
|
|
|
|
bool have_transactional_memory;
|
|
|
|
bool transactional_lock_enabled()
|
|
|
|
{
|
|
|
|
if (__get_cpuid_max(0, nullptr) < 7)
|
|
|
|
return false;
|
|
|
|
unsigned eax, ebx, ecx, edx;
|
|
|
|
__cpuid_count(7, 0, eax, ebx, ecx, edx);
|
|
|
|
/* Restricted Transactional Memory (RTM) */
|
|
|
|
have_transactional_memory= ebx & 1U << 11;
|
|
|
|
return have_transactional_memory;
|
|
|
|
}
|
|
|
|
|
|
|
|
# ifdef UNIV_DEBUG
|
|
|
|
TRANSACTIONAL_TARGET
|
|
|
|
bool xtest() { return have_transactional_memory && _xtest(); }
|
|
|
|
# endif
|
2022-02-25 18:24:01 +11:00
|
|
|
#elif defined __powerpc64__ || defined __s390__
|
2022-03-02 11:48:24 +11:00
|
|
|
# include <htmxlintrin.h>
|
2022-02-25 18:24:01 +11:00
|
|
|
# include <setjmp.h>
|
|
|
|
# include <signal.h>
|
2022-03-02 11:48:24 +11:00
|
|
|
|
|
|
|
__attribute__((target("htm"),hot))
|
|
|
|
bool xbegin()
|
|
|
|
{
|
|
|
|
return have_transactional_memory &&
|
|
|
|
__TM_simple_begin() == _HTM_TBEGIN_STARTED;
|
|
|
|
}
|
|
|
|
|
|
|
|
__attribute__((target("htm"),hot))
|
|
|
|
void xabort() { __TM_abort(); }
|
|
|
|
|
|
|
|
__attribute__((target("htm"),hot))
|
|
|
|
void xend() { __TM_end(); }
|
2022-02-25 18:24:01 +11:00
|
|
|
|
2021-10-22 12:38:45 +03:00
|
|
|
bool have_transactional_memory;
|
2022-02-25 18:24:01 +11:00
|
|
|
static sigjmp_buf ill_jmp;
|
|
|
|
static void ill_handler(int sig)
|
|
|
|
{
|
|
|
|
siglongjmp(ill_jmp, sig);
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
Here we are testing we can do a transaction without SIGILL
|
|
|
|
and a 1 instruction store can succeed.
|
|
|
|
*/
|
|
|
|
__attribute__((noinline))
|
|
|
|
static void test_tm(bool *r)
|
|
|
|
{
|
|
|
|
if (__TM_simple_begin() == _HTM_TBEGIN_STARTED)
|
|
|
|
{
|
|
|
|
*r= true;
|
|
|
|
__TM_end();
|
|
|
|
}
|
|
|
|
}
|
2021-10-22 12:38:45 +03:00
|
|
|
bool transactional_lock_enabled()
|
|
|
|
{
|
2022-02-25 18:24:01 +11:00
|
|
|
bool r= false;
|
|
|
|
sigset_t oset;
|
|
|
|
struct sigaction ill_act, oact_ill;
|
|
|
|
|
|
|
|
memset(&ill_act, 0, sizeof(ill_act));
|
|
|
|
ill_act.sa_handler = ill_handler;
|
|
|
|
sigfillset(&ill_act.sa_mask);
|
|
|
|
sigdelset(&ill_act.sa_mask, SIGILL);
|
|
|
|
|
|
|
|
sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
|
|
|
|
sigaction(SIGILL, &ill_act, &oact_ill);
|
|
|
|
if (sigsetjmp(ill_jmp, 1) == 0)
|
|
|
|
{
|
|
|
|
test_tm(&r);
|
|
|
|
}
|
|
|
|
sigaction(SIGILL, &oact_ill, NULL);
|
|
|
|
sigprocmask(SIG_SETMASK, &oset, NULL);
|
|
|
|
return r;
|
2021-10-22 12:38:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
# ifdef UNIV_DEBUG
|
2022-03-02 11:48:24 +11:00
|
|
|
__attribute__((target("htm"),hot))
|
|
|
|
bool xtest()
|
2021-10-22 12:38:45 +03:00
|
|
|
{
|
|
|
|
return have_transactional_memory &&
|
|
|
|
_HTM_STATE (__builtin_ttest ()) == _HTM_TRANSACTIONAL;
|
|
|
|
}
|
|
|
|
# endif
|
|
|
|
#endif
|
2020-11-30 11:47:09 +02:00
|
|
|
|
2021-09-06 12:22:33 +03:00
|
|
|
/** @return the parameter for srw_pause() */
|
|
|
|
static inline unsigned srw_pause_delay()
|
|
|
|
{
|
|
|
|
return my_cpu_relax_multiplier / 4 * srv_spin_wait_delay;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Pause the CPU for some time, with no memory accesses. */
|
|
|
|
static inline void srw_pause(unsigned delay)
|
|
|
|
{
|
|
|
|
HMT_low();
|
|
|
|
while (delay--)
|
|
|
|
MY_RELAX_CPU();
|
|
|
|
HMT_medium();
|
|
|
|
}
|
|
|
|
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
#ifdef SUX_LOCK_GENERIC
|
2022-04-06 12:51:27 +03:00
|
|
|
# ifndef PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP
|
|
|
|
template<> void pthread_mutex_wrapper<true>::wr_wait()
|
2021-09-28 17:19:06 +03:00
|
|
|
{
|
|
|
|
const unsigned delay= srw_pause_delay();
|
|
|
|
|
|
|
|
for (auto spin= srv_n_spin_wait_rounds; spin; spin--)
|
|
|
|
{
|
|
|
|
srw_pause(delay);
|
|
|
|
if (wr_lock_try())
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pthread_mutex_lock(&lock);
|
|
|
|
}
|
2022-04-06 12:51:27 +03:00
|
|
|
# endif
|
2021-09-28 17:19:06 +03:00
|
|
|
|
2022-04-06 12:51:27 +03:00
|
|
|
template void ssux_lock_impl<false>::init();
|
|
|
|
template void ssux_lock_impl<true>::init();
|
|
|
|
template void ssux_lock_impl<false>::destroy();
|
|
|
|
template void ssux_lock_impl<true>::destroy();
|
2020-11-30 11:47:09 +02:00
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
2022-04-06 12:51:27 +03:00
|
|
|
inline void srw_mutex_impl<spinloop>::wait(uint32_t lk)
|
2020-11-30 11:47:09 +02:00
|
|
|
{
|
|
|
|
pthread_mutex_lock(&mutex);
|
2022-04-06 12:51:27 +03:00
|
|
|
while (lock.load(std::memory_order_relaxed) == lk)
|
|
|
|
pthread_cond_wait(&cond, &mutex);
|
2020-11-30 11:47:09 +02:00
|
|
|
pthread_mutex_unlock(&mutex);
|
|
|
|
}
|
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
2022-04-06 12:51:27 +03:00
|
|
|
inline void ssux_lock_impl<spinloop>::wait(uint32_t lk)
|
2020-11-30 11:47:09 +02:00
|
|
|
{
|
2022-04-06 12:51:27 +03:00
|
|
|
pthread_mutex_lock(&writer.mutex);
|
|
|
|
while (readers.load(std::memory_order_relaxed) == lk)
|
|
|
|
pthread_cond_wait(&readers_cond, &writer.mutex);
|
|
|
|
pthread_mutex_unlock(&writer.mutex);
|
2020-11-30 11:47:09 +02:00
|
|
|
}
|
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
2022-04-06 12:51:27 +03:00
|
|
|
void srw_mutex_impl<spinloop>::wake()
|
2020-11-30 11:47:09 +02:00
|
|
|
{
|
|
|
|
pthread_mutex_lock(&mutex);
|
2022-04-06 12:51:27 +03:00
|
|
|
pthread_cond_signal(&cond);
|
2020-11-30 11:47:09 +02:00
|
|
|
pthread_mutex_unlock(&mutex);
|
|
|
|
}
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
2022-04-06 12:51:27 +03:00
|
|
|
void ssux_lock_impl<spinloop>::wake()
|
2020-11-30 11:47:09 +02:00
|
|
|
{
|
2022-04-06 12:51:27 +03:00
|
|
|
pthread_mutex_lock(&writer.mutex);
|
|
|
|
pthread_cond_signal(&readers_cond);
|
|
|
|
pthread_mutex_unlock(&writer.mutex);
|
2020-11-30 11:47:09 +02:00
|
|
|
}
|
2022-04-06 12:51:27 +03:00
|
|
|
#else
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
static_assert(4 == sizeof(rw_lock), "ABI");
|
|
|
|
# ifdef _WIN32
|
|
|
|
# include <synchapi.h>
|
2020-11-30 11:47:09 +02:00
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
inline void srw_mutex_impl<spinloop>::wait(uint32_t lk)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{ WaitOnAddress(&lock, &lk, 4, INFINITE); }
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
void srw_mutex_impl<spinloop>::wake() { WakeByAddressSingle(&lock); }
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
inline void ssux_lock_impl<spinloop>::wait(uint32_t lk)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{ WaitOnAddress(&readers, &lk, 4, INFINITE); }
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
void ssux_lock_impl<spinloop>::wake() { WakeByAddressSingle(&readers); }
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
# else
|
|
|
|
# ifdef __linux__
|
|
|
|
# include <linux/futex.h>
|
|
|
|
# include <sys/syscall.h>
|
|
|
|
# define SRW_FUTEX(a,op,n) \
|
|
|
|
syscall(SYS_futex, a, FUTEX_ ## op ## _PRIVATE, n, nullptr, nullptr, 0)
|
|
|
|
# elif defined __OpenBSD__
|
|
|
|
# include <sys/time.h>
|
|
|
|
# include <sys/futex.h>
|
|
|
|
# define SRW_FUTEX(a,op,n) \
|
|
|
|
futex((volatile uint32_t*) a, FUTEX_ ## op, n, nullptr, nullptr)
|
2022-02-18 15:13:56 +02:00
|
|
|
# elif defined __FreeBSD__
|
|
|
|
# include <sys/types.h>
|
|
|
|
# include <sys/umtx.h>
|
|
|
|
# define FUTEX_WAKE UMTX_OP_WAKE_PRIVATE
|
|
|
|
# define FUTEX_WAIT UMTX_OP_WAIT_UINT_PRIVATE
|
|
|
|
# define SRW_FUTEX(a,op,n) _umtx_op(a, FUTEX_ ## op, n, nullptr, nullptr)
|
|
|
|
# elif defined __DragonFly__
|
|
|
|
# include <unistd.h>
|
|
|
|
# define FUTEX_WAKE(a,n) umtx_wakeup(a,n)
|
|
|
|
# define FUTEX_WAIT(a,n) umtx_sleep(a,n,0)
|
|
|
|
# define SRW_FUTEX(a,op,n) FUTEX_ ## op((volatile int*) a, int(n))
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
# else
|
|
|
|
# error "no futex support"
|
|
|
|
# endif
|
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
inline void srw_mutex_impl<spinloop>::wait(uint32_t lk)
|
|
|
|
{ SRW_FUTEX(&lock, WAIT, lk); }
|
|
|
|
template<bool spinloop>
|
|
|
|
void srw_mutex_impl<spinloop>::wake() { SRW_FUTEX(&lock, WAKE, 1); }
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
inline void ssux_lock_impl<spinloop>::wait(uint32_t lk)
|
|
|
|
{ SRW_FUTEX(&readers, WAIT, lk); }
|
|
|
|
template<bool spinloop>
|
|
|
|
void ssux_lock_impl<spinloop>::wake() { SRW_FUTEX(&readers, WAKE, 1); }
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
# endif
|
2022-04-06 12:51:27 +03:00
|
|
|
#endif
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template void srw_mutex_impl<false>::wake();
|
|
|
|
template void ssux_lock_impl<false>::wake();
|
|
|
|
template void srw_mutex_impl<true>::wake();
|
|
|
|
template void ssux_lock_impl<true>::wake();
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
|
2021-09-29 10:15:07 +03:00
|
|
|
/*
|
|
|
|
|
|
|
|
Unfortunately, compilers targeting IA-32 or AMD64 currently cannot
|
|
|
|
translate the following single-bit operations into Intel 80386 instructions:
|
|
|
|
|
|
|
|
m.fetch_or(1<<b) & 1<<b LOCK BTS b, m
|
|
|
|
m.fetch_and(~(1<<b)) & 1<<b LOCK BTR b, m
|
|
|
|
m.fetch_xor(1<<b) & 1<<b LOCK BTC b, m
|
|
|
|
|
|
|
|
Hence, we will manually translate fetch_or() using GCC-style inline
|
|
|
|
assembler code or a Microsoft intrinsic function.
|
|
|
|
|
|
|
|
*/
|
2021-10-12 07:47:10 +03:00
|
|
|
|
|
|
|
#if defined __clang_major__ && __clang_major__ < 10
|
|
|
|
/* Only clang-10 introduced support for asm goto */
|
2022-04-06 12:51:27 +03:00
|
|
|
#elif defined __APPLE__
|
|
|
|
/* At least some versions of Apple Xcode do not support asm goto */
|
2021-10-12 07:47:10 +03:00
|
|
|
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
|
2021-09-29 10:15:07 +03:00
|
|
|
# define IF_FETCH_OR_GOTO(mem, bit, label) \
|
|
|
|
__asm__ goto("lock btsl $" #bit ", %0\n\t" \
|
|
|
|
"jc %l1" : : "m" (mem) : "cc", "memory" : label);
|
|
|
|
# define IF_NOT_FETCH_OR_GOTO(mem, bit, label) \
|
|
|
|
__asm__ goto("lock btsl $" #bit ", %0\n\t" \
|
|
|
|
"jnc %l1" : : "m" (mem) : "cc", "memory" : label);
|
2022-01-28 16:42:37 +02:00
|
|
|
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
|
2021-09-29 10:15:07 +03:00
|
|
|
# define IF_FETCH_OR_GOTO(mem, bit, label) \
|
|
|
|
if (_interlockedbittestandset(reinterpret_cast<volatile long*>(&mem), bit)) \
|
|
|
|
goto label;
|
|
|
|
# define IF_NOT_FETCH_OR_GOTO(mem, bit, label) \
|
|
|
|
if (!_interlockedbittestandset(reinterpret_cast<volatile long*>(&mem), bit))\
|
|
|
|
goto label;
|
|
|
|
#endif
|
|
|
|
|
2022-04-06 12:51:27 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
void srw_mutex_impl<spinloop>::wait_and_lock()
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
{
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
uint32_t lk= 1 + lock.fetch_add(1, std::memory_order_relaxed);
|
2021-09-06 12:22:33 +03:00
|
|
|
|
2022-04-06 12:51:27 +03:00
|
|
|
if (spinloop)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{
|
2022-04-06 12:51:27 +03:00
|
|
|
const unsigned delay= srw_pause_delay();
|
|
|
|
|
|
|
|
for (auto spin= srv_n_spin_wait_rounds;;)
|
2021-09-06 12:16:26 +03:00
|
|
|
{
|
2022-04-06 12:51:27 +03:00
|
|
|
DBUG_ASSERT(~HOLDER & lk);
|
|
|
|
if (lk & HOLDER)
|
|
|
|
lk= lock.load(std::memory_order_relaxed);
|
|
|
|
else
|
|
|
|
{
|
2021-09-29 10:15:07 +03:00
|
|
|
#ifdef IF_NOT_FETCH_OR_GOTO
|
2022-04-06 12:51:27 +03:00
|
|
|
static_assert(HOLDER == (1U << 31), "compatibility");
|
|
|
|
IF_NOT_FETCH_OR_GOTO(*this, 31, acquired);
|
|
|
|
lk|= HOLDER;
|
2021-09-29 10:15:07 +03:00
|
|
|
#else
|
2022-04-06 12:51:27 +03:00
|
|
|
if (!((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & HOLDER))
|
|
|
|
goto acquired;
|
2021-09-28 17:17:59 +03:00
|
|
|
#endif
|
2022-04-06 12:51:27 +03:00
|
|
|
srw_pause(delay);
|
|
|
|
}
|
|
|
|
if (!--spin)
|
|
|
|
break;
|
2021-09-06 12:16:26 +03:00
|
|
|
}
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
}
|
|
|
|
|
2021-09-28 17:17:59 +03:00
|
|
|
for (;;)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{
|
2021-09-28 17:17:59 +03:00
|
|
|
DBUG_ASSERT(~HOLDER & lk);
|
2021-09-06 12:16:26 +03:00
|
|
|
if (lk & HOLDER)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{
|
2021-09-28 17:17:59 +03:00
|
|
|
wait(lk);
|
2021-09-29 10:15:07 +03:00
|
|
|
#ifdef IF_FETCH_OR_GOTO
|
2021-09-28 17:17:59 +03:00
|
|
|
reload:
|
|
|
|
#endif
|
2021-09-06 12:16:26 +03:00
|
|
|
lk= lock.load(std::memory_order_relaxed);
|
|
|
|
}
|
2021-09-28 17:17:59 +03:00
|
|
|
else
|
|
|
|
{
|
2021-09-29 10:15:07 +03:00
|
|
|
#ifdef IF_FETCH_OR_GOTO
|
2021-09-28 17:17:59 +03:00
|
|
|
static_assert(HOLDER == (1U << 31), "compatibility");
|
2021-09-29 10:15:07 +03:00
|
|
|
IF_FETCH_OR_GOTO(*this, 31, reload);
|
2021-09-28 17:17:59 +03:00
|
|
|
#else
|
2021-09-29 10:15:07 +03:00
|
|
|
if ((lk= lock.fetch_or(HOLDER, std::memory_order_relaxed)) & HOLDER)
|
|
|
|
continue;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
DBUG_ASSERT(lk);
|
2021-09-29 10:15:07 +03:00
|
|
|
#endif
|
|
|
|
acquired:
|
2021-09-06 12:16:26 +03:00
|
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
|
|
return;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
}
|
|
|
|
}
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
}
|
2020-12-03 10:42:18 +02:00
|
|
|
|
2022-04-06 12:51:27 +03:00
|
|
|
template void srw_mutex_impl<false>::wait_and_lock();
|
|
|
|
template void srw_mutex_impl<true>::wait_and_lock();
|
2021-09-06 12:32:24 +03:00
|
|
|
|
|
|
|
template<bool spinloop>
|
|
|
|
void ssux_lock_impl<spinloop>::wr_wait(uint32_t lk)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{
|
|
|
|
DBUG_ASSERT(writer.is_locked());
|
|
|
|
DBUG_ASSERT(lk);
|
|
|
|
DBUG_ASSERT(lk < WRITER);
|
2021-09-28 17:19:26 +03:00
|
|
|
|
|
|
|
if (spinloop)
|
|
|
|
{
|
|
|
|
const unsigned delay= srw_pause_delay();
|
|
|
|
|
|
|
|
for (auto spin= srv_n_spin_wait_rounds; spin; spin--)
|
|
|
|
{
|
|
|
|
srw_pause(delay);
|
|
|
|
lk= readers.load(std::memory_order_acquire);
|
|
|
|
if (lk == WRITER)
|
|
|
|
return;
|
|
|
|
DBUG_ASSERT(lk > WRITER);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lk|= WRITER;
|
2021-09-28 17:19:26 +03:00
|
|
|
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
do
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(lk > WRITER);
|
|
|
|
wait(lk);
|
|
|
|
lk= readers.load(std::memory_order_acquire);
|
|
|
|
}
|
|
|
|
while (lk != WRITER);
|
|
|
|
}
|
|
|
|
|
2021-09-06 12:32:24 +03:00
|
|
|
template void ssux_lock_impl<true>::wr_wait(uint32_t);
|
|
|
|
template void ssux_lock_impl<false>::wr_wait(uint32_t);
|
|
|
|
|
|
|
|
template<bool spinloop>
|
|
|
|
void ssux_lock_impl<spinloop>::rd_wait()
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
{
|
|
|
|
for (;;)
|
|
|
|
{
|
|
|
|
writer.wr_lock();
|
2021-09-28 17:17:59 +03:00
|
|
|
bool acquired= rd_lock_try();
|
|
|
|
writer.wr_unlock();
|
|
|
|
if (acquired)
|
|
|
|
break;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
}
|
|
|
|
}
|
2021-09-06 12:32:24 +03:00
|
|
|
|
|
|
|
template void ssux_lock_impl<true>::rd_wait();
|
|
|
|
template void ssux_lock_impl<false>::rd_wait();
|
2020-12-03 09:55:53 +02:00
|
|
|
|
2021-09-28 17:19:06 +03:00
|
|
|
#if defined _WIN32 || defined SUX_LOCK_GENERIC
|
|
|
|
template<> void srw_lock_<true>::rd_wait()
|
|
|
|
{
|
|
|
|
const unsigned delay= srw_pause_delay();
|
|
|
|
|
|
|
|
for (auto spin= srv_n_spin_wait_rounds; spin; spin--)
|
|
|
|
{
|
|
|
|
srw_pause(delay);
|
|
|
|
if (rd_lock_try())
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-22 12:38:45 +03:00
|
|
|
IF_WIN(AcquireSRWLockShared(&lk), rw_rdlock(&lk));
|
2021-09-28 17:19:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
template<> void srw_lock_<true>::wr_wait()
|
|
|
|
{
|
|
|
|
const unsigned delay= srw_pause_delay();
|
|
|
|
|
|
|
|
for (auto spin= srv_n_spin_wait_rounds; spin; spin--)
|
|
|
|
{
|
|
|
|
srw_pause(delay);
|
|
|
|
if (wr_lock_try())
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-10-22 12:38:45 +03:00
|
|
|
IF_WIN(AcquireSRWLockExclusive(&lk), rw_wrlock(&lk));
|
2021-09-28 17:19:06 +03:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2020-12-03 09:55:53 +02:00
|
|
|
#ifdef UNIV_PFS_RWLOCK
|
2021-09-06 12:32:24 +03:00
|
|
|
template void srw_lock_impl<false>::psi_rd_lock(const char*, unsigned);
|
|
|
|
template void srw_lock_impl<false>::psi_wr_lock(const char*, unsigned);
|
|
|
|
template void srw_lock_impl<true>::psi_rd_lock(const char*, unsigned);
|
|
|
|
template void srw_lock_impl<true>::psi_wr_lock(const char*, unsigned);
|
2021-09-28 17:19:06 +03:00
|
|
|
|
|
|
|
template<bool spinloop>
|
|
|
|
void srw_lock_impl<spinloop>::psi_rd_lock(const char *file, unsigned line)
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
|
|
|
const bool nowait= lock.rd_lock_try();
|
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)
|
|
|
|
(&state, pfs_psi,
|
|
|
|
nowait ? PSI_RWLOCK_TRYREADLOCK : PSI_RWLOCK_READLOCK, file, line))
|
|
|
|
{
|
|
|
|
if (!nowait)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lock.rd_lock();
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
|
|
|
else if (!nowait)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lock.rd_lock();
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
}
|
|
|
|
|
2021-09-28 17:19:06 +03:00
|
|
|
template<bool spinloop>
|
|
|
|
void srw_lock_impl<spinloop>::psi_wr_lock(const char *file, unsigned line)
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
|
|
|
const bool nowait= lock.wr_lock_try();
|
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)
|
|
|
|
(&state, pfs_psi,
|
|
|
|
nowait ? PSI_RWLOCK_TRYWRITELOCK : PSI_RWLOCK_WRITELOCK, file, line))
|
|
|
|
{
|
|
|
|
if (!nowait)
|
|
|
|
lock.wr_lock();
|
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
|
|
|
else if (!nowait)
|
|
|
|
lock.wr_lock();
|
|
|
|
}
|
|
|
|
|
|
|
|
void ssux_lock::psi_rd_lock(const char *file, unsigned line)
|
2020-12-03 09:55:53 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
const bool nowait= lock.rd_lock_try();
|
2020-12-03 09:55:53 +02:00
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_rdwait)
|
|
|
|
(&state, pfs_psi,
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
nowait ? PSI_RWLOCK_TRYSHAREDLOCK : PSI_RWLOCK_SHAREDLOCK, file, line))
|
2020-12-03 09:55:53 +02:00
|
|
|
{
|
|
|
|
if (!nowait)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lock.rd_lock();
|
2020-12-03 09:55:53 +02:00
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
|
|
|
else if (!nowait)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lock.rd_lock();
|
2020-12-03 09:55:53 +02:00
|
|
|
}
|
|
|
|
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
void ssux_lock::psi_u_lock(const char *file, unsigned line)
|
2020-12-03 10:42:18 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)
|
|
|
|
(&state, pfs_psi, PSI_RWLOCK_SHAREDEXCLUSIVELOCK, file, line))
|
|
|
|
{
|
2020-12-03 14:49:41 +02:00
|
|
|
lock.u_lock();
|
2020-12-03 10:42:18 +02:00
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
|
|
|
else
|
2020-12-03 14:49:41 +02:00
|
|
|
lock.u_lock();
|
2020-12-03 10:42:18 +02:00
|
|
|
}
|
|
|
|
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
void ssux_lock::psi_wr_lock(const char *file, unsigned line)
|
2020-12-03 09:55:53 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
const bool nowait= lock.wr_lock_try();
|
2020-12-03 09:55:53 +02:00
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)
|
|
|
|
(&state, pfs_psi,
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
nowait ? PSI_RWLOCK_TRYEXCLUSIVELOCK : PSI_RWLOCK_EXCLUSIVELOCK,
|
2020-12-03 09:55:53 +02:00
|
|
|
file, line))
|
|
|
|
{
|
|
|
|
if (!nowait)
|
|
|
|
lock.wr_lock();
|
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
|
|
|
else if (!nowait)
|
|
|
|
lock.wr_lock();
|
|
|
|
}
|
2020-12-03 10:42:18 +02:00
|
|
|
|
MDEV-24142/MDEV-24167 fixup: Split ssux_lock and srw_lock
This conceptually reverts commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79
and reintroduces an option for srw_lock to wrap a native implementation.
The srw_lock and srw_lock_low differ from ssux_lock and ssux_lock_low
in that Slim SUX locks support three modes (Shared, Update, eXclusive)
while Slim RW locks support only two (Read, Write).
On Microsoft Windows, the srw_lock will be implemented by SRWLOCK.
On Linux and OpenBSD, it will be implemented by rw_lock and the
futex system call, just like earlier.
On other systems or if SRW_LOCK_DUMMY is defined on anything else
than Microsoft Windows, rw_lock_t will be used.
ssux_lock_low::read_lock(), ssux_lock_low::update_lock(): Correct
the SRW_LOCK_DUMMY implementation to prevent hangs. The intention of
commit 1fdc161d8faeb18acf0ccea9b33ad64f0b596f79 seems to have been
do ... while loops, but the 'do' keyword was missing. This total
breakage was missed in commit 260161fc9fb5b4885013d550e606681769b52019
which did reduce the probability of the hangs.
ssux_lock_low::u_unlock(): In the SRW_LOCK_DUMMY implementation
(based on a mutex and two condition variables), always invoke
writer_wake() in order to ensure that a waiting update_lock()
will be woken up.
ssux_lock_low::writer_wait(), ssux_lock_low::readers_wait():
In the SRW_LOCK_DUMMY implementation, keep waiting for the signal
until the lock word has changed. The "while" had been changed to "if"
in order to avoid hangs.
2020-12-15 14:29:40 +02:00
|
|
|
void ssux_lock::psi_u_wr_upgrade(const char *file, unsigned line)
|
2020-12-03 10:42:18 +02:00
|
|
|
{
|
|
|
|
PSI_rwlock_locker_state state;
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
DBUG_ASSERT(lock.writer.is_locked());
|
|
|
|
uint32_t lk= 1;
|
|
|
|
const bool nowait=
|
2021-09-06 12:32:24 +03:00
|
|
|
lock.readers.compare_exchange_strong(lk, ssux_lock_impl<false>::WRITER,
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
std::memory_order_acquire,
|
|
|
|
std::memory_order_relaxed);
|
|
|
|
if (PSI_rwlock_locker *locker= PSI_RWLOCK_CALL(start_rwlock_wrwait)
|
|
|
|
(&state, pfs_psi,
|
|
|
|
nowait ? PSI_RWLOCK_TRYEXCLUSIVELOCK : PSI_RWLOCK_EXCLUSIVELOCK,
|
|
|
|
file, line))
|
|
|
|
{
|
|
|
|
if (!nowait)
|
|
|
|
lock.u_wr_upgrade();
|
|
|
|
PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
|
|
|
|
}
|
2020-12-03 10:42:18 +02:00
|
|
|
else if (!nowait)
|
MDEV-25404: ssux_lock_low: Introduce a separate writer mutex
Having both readers and writers use a single lock word in
futex system calls caused performance regression compared to
SRW_LOCK_DUMMY (mutex and 2 condition variables).
A contributing factor is that we did not accurately keep
track of the number of waiting threads and thus had to invoke
system calls to wake up any waiting threads.
SUX_LOCK_GENERIC: Renamed from SRW_LOCK_DUMMY. This is the
original implementation, with rw_lock (std::atomic<uint32_t>),
a mutex and two condition variables. Using a separate writer
mutex (as described below) is not possible, because the mutex ownership
in a buf_block_t::lock must be able to transfer from a write submitter
thread to an I/O completion thread, and pthread_mutex_lock() may assume
that the submitter thread is recursively acquiring the mutex that it
already holds, while in reality the I/O completion thread is the real
owner. POSIX does not define an interface for requesting a mutex to
be non-recursive.
On Microsoft Windows, srw_lock_low will remain a simple wrapper of
SRWLOCK. On 32-bit Microsoft Windows, sizeof(SRWLOCK)=4 while
sizeof(srw_lock_low)=8.
On other platforms, srw_lock_low is an alias of ssux_lock_low,
the Simple (non-recursive) Shared/Update/eXclusive lock.
In the futex-based implementation of ssux_lock_low (Linux, OpenBSD,
Microsoft Windows), we shall use a dedicated mutex for exclusive
requests (writer), and have a WRITER flag in the 'readers' lock word
to inform that a writer is holding the lock or waiting for the lock to
be granted. When the WRITER flag is set, all lock requests must acquire
the writer mutex. Normally, shared (S) lock requests simply perform a
compare-and-swap on the 'readers' word.
Update locks are implemented as a combination of writer mutex
and a normal counter in the 'readers' lock word. The conflict between
U and X locks is guaranteed by the writer mutex.
Unlike SUX_LOCK_GENERIC, wr_u_downgrade() will not wake up any pending
rd_lock() waits. They will wait until u_unlock() releases the writer mutex.
The ssux_lock_low is always wrapped by sux_lock (with a recursion count
of U and X locks), used for dict_index_t::lock and buf_block_t::lock.
Their memory footprint for the futex-based implementation will increase
by sizeof(srw_mutex), or 4 bytes.
This change addresses a performance regression in read-only benchmarks,
such as sysbench oltp_read_only. Also write performance was improved.
On 32-bit Linux and OpenBSD, lock_sys_t::hash_table will allocate
two hash table elements for each srw_lock (14 instead of 15 hash
table cells per 64-byte cache line on IA-32). On Microsoft Windows,
sizeof(SRWLOCK)==sizeof(void*) and there is no change.
Reviewed by: Vladislav Vaintroub
Tested by: Axel Schwenke and Vladislav Vaintroub
2021-04-19 18:15:49 +03:00
|
|
|
lock.u_wr_upgrade();
|
2020-12-03 10:42:18 +02:00
|
|
|
}
|
2021-09-06 12:32:24 +03:00
|
|
|
#else /* UNIV_PFS_RWLOCK */
|
|
|
|
template void ssux_lock_impl<false>::rd_lock();
|
|
|
|
template void ssux_lock_impl<false>::rd_unlock();
|
|
|
|
template void ssux_lock_impl<false>::u_unlock();
|
|
|
|
template void ssux_lock_impl<false>::wr_unlock();
|
2020-12-03 09:55:53 +02:00
|
|
|
#endif /* UNIV_PFS_RWLOCK */
|