2017-12-07 15:03:59 +02:00
|
|
|
#ifndef MY_CPU_INCLUDED
|
|
|
|
#define MY_CPU_INCLUDED
|
2020-02-01 14:53:41 +02:00
|
|
|
/* Copyright (c) 2013, 2020, MariaDB
|
2014-08-19 19:28:35 +03:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
Update FSF address
This commit is based on the work of Michal Schorm, rebased on the
earliest MariaDB version.
Th command line used to generate this diff was:
find ./ -type f \
-exec sed -i -e 's/Foundation, Inc., 59 Temple Place, Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/Foundation, Inc. 59 Temple Place.* Suite 330, Boston, /Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, /g' {} \; \
-exec sed -i -e 's/MA.*.....-1307.*USA/MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/Foundation, Inc., 59 Temple/Foundation, Inc., 51 Franklin/g' {} \; \
-exec sed -i -e 's/Place, Suite 330, Boston, MA.*02111-1307.*USA/Street, Fifth Floor, Boston, MA 02110-1335 USA/g' {} \; \
-exec sed -i -e 's/MA.*.....-1307/MA 02110-1335/g' {} \;
2019-05-10 20:49:46 +03:00
|
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-08-19 19:28:35 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* instructions for specific cpu's */
|
|
|
|
|
|
|
|
/*
|
|
|
|
Macros for adjusting thread priority (hardware multi-threading)
|
|
|
|
The defines are the same ones used by the linux kernel
|
|
|
|
*/
|
|
|
|
|
2017-11-28 16:34:31 +04:00
|
|
|
#ifdef _ARCH_PWR8
|
2024-08-28 18:56:55 +10:00
|
|
|
#ifdef __GLIBC__
|
2017-11-28 16:34:31 +04:00
|
|
|
#include <sys/platform/ppc.h>
|
2014-08-19 19:28:35 +03:00
|
|
|
/* Very low priority */
|
2017-11-28 16:34:31 +04:00
|
|
|
#define HMT_very_low() __ppc_set_ppr_very_low()
|
2014-08-19 19:28:35 +03:00
|
|
|
/* Low priority */
|
2017-11-28 16:34:31 +04:00
|
|
|
#define HMT_low() __ppc_set_ppr_low()
|
2014-08-19 19:28:35 +03:00
|
|
|
/* Medium low priority */
|
2017-11-28 16:34:31 +04:00
|
|
|
#define HMT_medium_low() __ppc_set_ppr_med_low()
|
2014-08-19 19:28:35 +03:00
|
|
|
/* Medium priority */
|
2017-11-28 16:34:31 +04:00
|
|
|
#define HMT_medium() __ppc_set_ppr_med()
|
2014-08-19 19:28:35 +03:00
|
|
|
/* Medium high priority */
|
2017-11-28 16:34:31 +04:00
|
|
|
#define HMT_medium_high() __ppc_set_ppr_med_high()
|
2014-08-19 19:28:35 +03:00
|
|
|
/* High priority */
|
|
|
|
#define HMT_high() asm volatile("or 3,3,3")
|
2024-08-28 18:56:55 +10:00
|
|
|
#else /* GLIBC */
|
|
|
|
#if defined(__FreeBSD__)
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#endif
|
|
|
|
#define HMT_very_low() __asm__ volatile ("or 31,31,31")
|
|
|
|
#define HMT_low() __asm__ volatile ("or 1,1,1")
|
|
|
|
#define HMT_medium_low() __asm__ volatile ("or 6,6,6")
|
|
|
|
#define HMT_medium() __asm__ volatile ("or 2,2,2")
|
|
|
|
#define HMT_medium_high() __asm__ volatile ("or 5,5,5")
|
|
|
|
#define HMT_high() asm volatile("or 3,3,3")
|
|
|
|
#endif /* GLIBC */
|
2014-08-19 19:28:35 +03:00
|
|
|
#else
|
|
|
|
#define HMT_very_low()
|
|
|
|
#define HMT_low()
|
|
|
|
#define HMT_medium_low()
|
|
|
|
#define HMT_medium()
|
|
|
|
#define HMT_medium_high()
|
|
|
|
#define HMT_high()
|
|
|
|
#endif
|
2017-12-07 15:03:59 +02:00
|
|
|
|
2019-06-27 10:53:18 +03:00
|
|
|
#if defined __i386__ || defined __x86_64__ || defined _WIN32
|
|
|
|
# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */
|
|
|
|
#endif
|
2017-12-07 15:03:59 +02:00
|
|
|
|
2020-02-01 14:53:41 +02:00
|
|
|
#ifdef _WIN32
|
|
|
|
#elif defined HAVE_PAUSE_INSTRUCTION
|
|
|
|
#elif defined(_ARCH_PWR8)
|
MDEV-23633 MY_RELAX_CPU performs unnecessary compare-and-swap on ARM
This follows up MDEV-14374, which was filed against MariaDB Server 10.3.
Back then, on a 48-core Qualcomm Centriq 2400, the performance of
delay loops for spinloops was tested both with and without the dummy
compare-and-swap operation, and it was decided to keep the dummy
operation.
On target architectures where nothing special is available (other than
x86 (IA-32, AMD64) or POWER), we perform a dummy compare-and-swap operation.
This is contrary to the idea of the x86 PAUSE instruction and the
__ppc_get_timebase(), which aim to keep the memory bus idle for a while,
to allow other cores to better execute code while a spinloop is waiting
for something to be changed.
On MariaDB Server 10.4 and another implementation of the ARMv8 ISA,
omitting the dummy compare-and-swap improved performance by up to 12%.
So, let us avoid the dummy compare-and-swap on ARM.
For now, we are retaining the dummy compare-and-swap on other ISAs
(such as SPARC, MIPS, S390x, RISC-V) because we do not have any
performance data for them.
2020-09-04 10:31:41 +03:00
|
|
|
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
2020-02-01 14:53:41 +02:00
|
|
|
#else
|
2020-03-02 14:28:01 +01:00
|
|
|
# include "my_global.h"
|
2020-02-01 14:53:41 +02:00
|
|
|
# include "my_atomic.h"
|
|
|
|
#endif
|
|
|
|
|
2017-12-07 15:03:59 +02:00
|
|
|
static inline void MY_RELAX_CPU(void)
|
|
|
|
{
|
2019-06-27 10:53:18 +03:00
|
|
|
#ifdef _WIN32
|
|
|
|
/*
|
|
|
|
In the Win32 API, the x86 PAUSE instruction is executed by calling
|
|
|
|
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
|
|
|
independent way by using YieldProcessor.
|
|
|
|
*/
|
|
|
|
YieldProcessor();
|
|
|
|
#elif defined HAVE_PAUSE_INSTRUCTION
|
2017-12-07 15:03:59 +02:00
|
|
|
/*
|
|
|
|
According to the gcc info page, asm volatile means that the
|
|
|
|
instruction has important side-effects and must not be removed.
|
|
|
|
Also asm volatile may trigger a memory barrier (spilling all registers
|
|
|
|
to memory).
|
|
|
|
*/
|
|
|
|
#ifdef __SUNPRO_CC
|
|
|
|
asm ("pause" );
|
|
|
|
#else
|
|
|
|
__asm__ __volatile__ ("pause");
|
|
|
|
#endif
|
|
|
|
#elif defined(_ARCH_PWR8)
|
2024-08-28 18:56:55 +10:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
uint64_t __tb;
|
|
|
|
__asm__ volatile ("mfspr %0, 268" : "=r" (__tb));
|
|
|
|
#else
|
|
|
|
/* Changed from __ppc_get_timebase for musl compatibility */
|
|
|
|
__builtin_ppc_get_timebase();
|
|
|
|
#endif
|
MDEV-23633 MY_RELAX_CPU performs unnecessary compare-and-swap on ARM
This follows up MDEV-14374, which was filed against MariaDB Server 10.3.
Back then, on a 48-core Qualcomm Centriq 2400, the performance of
delay loops for spinloops was tested both with and without the dummy
compare-and-swap operation, and it was decided to keep the dummy
operation.
On target architectures where nothing special is available (other than
x86 (IA-32, AMD64) or POWER), we perform a dummy compare-and-swap operation.
This is contrary to the idea of the x86 PAUSE instruction and the
__ppc_get_timebase(), which aim to keep the memory bus idle for a while,
to allow other cores to better execute code while a spinloop is waiting
for something to be changed.
On MariaDB Server 10.4 and another implementation of the ARMv8 ISA,
omitting the dummy compare-and-swap improved performance by up to 12%.
So, let us avoid the dummy compare-and-swap on ARM.
For now, we are retaining the dummy compare-and-swap on other ISAs
(such as SPARC, MIPS, S390x, RISC-V) because we do not have any
performance data for them.
2020-09-04 10:31:41 +03:00
|
|
|
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
|
|
|
/* Mainly, prevent the compiler from optimizing away delay loops */
|
2020-09-04 11:40:17 +03:00
|
|
|
__asm__ __volatile__ ("":::"memory");
|
2017-12-07 15:03:59 +02:00
|
|
|
#else
|
|
|
|
int32 var, oldval = 0;
|
|
|
|
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
|
|
|
|
MY_MEMORY_ORDER_RELAXED);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-06-27 10:53:18 +03:00
|
|
|
#ifdef HAVE_PAUSE_INSTRUCTION
|
|
|
|
# ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
# endif
|
|
|
|
extern unsigned my_cpu_relax_multiplier;
|
|
|
|
void my_cpu_init(void);
|
|
|
|
# ifdef __cplusplus
|
|
|
|
}
|
|
|
|
# endif
|
|
|
|
#else
|
|
|
|
# define my_cpu_relax_multiplier 200
|
|
|
|
# define my_cpu_init() /* nothing */
|
|
|
|
#endif
|
|
|
|
|
2017-12-07 15:03:59 +02:00
|
|
|
/*
|
|
|
|
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
|
|
|
|
recommends to use it in spin loops also on non-HT machines to reduce power
|
|
|
|
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
|
|
|
|
|
|
|
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
|
|
|
and YieldProcessor shows that much better performance is achieved by calling
|
|
|
|
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
|
|
|
loop count in the range 200-300 brought best results.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline int LF_BACKOFF(void)
|
|
|
|
{
|
2019-06-27 10:53:18 +03:00
|
|
|
unsigned i= my_cpu_relax_multiplier;
|
|
|
|
while (i--)
|
2017-12-07 15:03:59 +02:00
|
|
|
MY_RELAX_CPU();
|
|
|
|
return 1;
|
|
|
|
}
|
2019-06-27 10:53:18 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
Run a delay loop while waiting for a shared resource to be released.
|
|
|
|
@param delay originally, roughly microseconds on 100 MHz Intel Pentium
|
|
|
|
*/
|
|
|
|
static inline void ut_delay(unsigned delay)
|
|
|
|
{
|
|
|
|
unsigned i= my_cpu_relax_multiplier / 4 * delay;
|
|
|
|
HMT_low();
|
|
|
|
while (i--)
|
|
|
|
MY_RELAX_CPU();
|
|
|
|
HMT_medium();
|
|
|
|
}
|
|
|
|
|
2017-12-07 15:03:59 +02:00
|
|
|
#endif
|