mirror of
https://github.com/MariaDB/server.git
synced 2025-04-21 22:55:35 +02:00

MY_RELAX_CPU(): On GCC and compatible compilers (including clang and its derivatives), let us use a null inline assembler block as the fallback. This should benefit s390x and LoongArch, for example. Also, let us remove the generic fallback block that does exactly the opposite of what this function aims to achieve: avoid hogging the memory bus so that other threads will have a chance to let our spin loop to proceed. On RISC-V, we will use __builtin_riscv_pause() which is a valid instruction encoding in all ISA versions according to https://gcc.gnu.org/pipermail/gcc-patches/2021-January/562936.html
159 lines
4.5 KiB
C
159 lines
4.5 KiB
C
#ifndef MY_CPU_INCLUDED
|
|
#define MY_CPU_INCLUDED
|
|
/* Copyright (c) 2013, 2020, MariaDB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
|
*/
|
|
|
|
/* instructions for specific cpu's */
|
|
|
|
/*
|
|
Macros for adjusting thread priority (hardware multi-threading)
|
|
The defines are the same ones used by the linux kernel
|
|
*/
|
|
|
|
#ifdef _ARCH_PWR8
|
|
#ifdef __GLIBC__
|
|
#include <sys/platform/ppc.h>
|
|
/* Very low priority */
|
|
#define HMT_very_low() __ppc_set_ppr_very_low()
|
|
/* Low priority */
|
|
#define HMT_low() __ppc_set_ppr_low()
|
|
/* Medium low priority */
|
|
#define HMT_medium_low() __ppc_set_ppr_med_low()
|
|
/* Medium priority */
|
|
#define HMT_medium() __ppc_set_ppr_med()
|
|
/* Medium high priority */
|
|
#define HMT_medium_high() __ppc_set_ppr_med_high()
|
|
/* High priority */
|
|
#define HMT_high() asm volatile("or 3,3,3")
|
|
#else /* GLIBC */
|
|
#if defined(__FreeBSD__)
|
|
#include <sys/types.h>
|
|
#include <sys/sysctl.h>
|
|
#endif
|
|
#define HMT_very_low() __asm__ volatile ("or 31,31,31")
|
|
#define HMT_low() __asm__ volatile ("or 1,1,1")
|
|
#define HMT_medium_low() __asm__ volatile ("or 6,6,6")
|
|
#define HMT_medium() __asm__ volatile ("or 2,2,2")
|
|
#define HMT_medium_high() __asm__ volatile ("or 5,5,5")
|
|
#define HMT_high() asm volatile("or 3,3,3")
|
|
#endif /* GLIBC */
|
|
#else
|
|
#define HMT_very_low()
|
|
#define HMT_low()
|
|
#define HMT_medium_low()
|
|
#define HMT_medium()
|
|
#define HMT_medium_high()
|
|
#define HMT_high()
|
|
#endif
|
|
|
|
#if defined __i386__ || defined __x86_64__ || defined _WIN32
|
|
# define HAVE_PAUSE_INSTRUCTION /* added in Intel Pentium 4 */
|
|
#endif
|
|
|
|
#ifdef _WIN32
|
|
#elif defined HAVE_PAUSE_INSTRUCTION
|
|
#elif defined(_ARCH_PWR8)
|
|
#elif defined __GNUC__ && (defined __arm__ || defined __aarch64__)
|
|
#else
|
|
# include "my_global.h"
|
|
# include "my_atomic.h"
|
|
#endif
|
|
|
|
static inline void MY_RELAX_CPU(void)
|
|
{
|
|
#ifdef _WIN32
|
|
/*
|
|
In the Win32 API, the x86 PAUSE instruction is executed by calling
|
|
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
|
independent way by using YieldProcessor.
|
|
*/
|
|
YieldProcessor();
|
|
#elif defined HAVE_PAUSE_INSTRUCTION
|
|
/*
|
|
According to the gcc info page, asm volatile means that the
|
|
instruction has important side-effects and must not be removed.
|
|
Also asm volatile may trigger a memory barrier (spilling all registers
|
|
to memory).
|
|
*/
|
|
#ifdef __SUNPRO_CC
|
|
asm ("pause" );
|
|
#else
|
|
__asm__ __volatile__ ("pause");
|
|
#endif
|
|
#elif defined(_ARCH_PWR8)
|
|
#ifdef __FreeBSD__
|
|
uint64_t __tb;
|
|
__asm__ volatile ("mfspr %0, 268" : "=r" (__tb));
|
|
#else
|
|
/* Changed from __ppc_get_timebase for musl compatibility */
|
|
__builtin_ppc_get_timebase();
|
|
#endif
|
|
#elif defined __GNUC__ && defined __riscv
|
|
__builtin_riscv_pause();
|
|
#elif defined __GNUC__
|
|
/* Mainly, prevent the compiler from optimizing away delay loops */
|
|
__asm__ __volatile__ ("":::"memory");
|
|
#endif
|
|
}
|
|
|
|
|
|
#ifdef HAVE_PAUSE_INSTRUCTION
|
|
# ifdef __cplusplus
|
|
extern "C" {
|
|
# endif
|
|
extern unsigned my_cpu_relax_multiplier;
|
|
void my_cpu_init(void);
|
|
# ifdef __cplusplus
|
|
}
|
|
# endif
|
|
#else
|
|
# define my_cpu_relax_multiplier 200
|
|
# define my_cpu_init() /* nothing */
|
|
#endif
|
|
|
|
/*
|
|
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
|
|
recommends to use it in spin loops also on non-HT machines to reduce power
|
|
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
|
|
|
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
|
and YieldProcessor shows that much better performance is achieved by calling
|
|
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
|
loop count in the range 200-300 brought best results.
|
|
*/
|
|
|
|
static inline int LF_BACKOFF(void)
|
|
{
|
|
unsigned i= my_cpu_relax_multiplier;
|
|
while (i--)
|
|
MY_RELAX_CPU();
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
Run a delay loop while waiting for a shared resource to be released.
|
|
@param delay originally, roughly microseconds on 100 MHz Intel Pentium
|
|
*/
|
|
static inline void ut_delay(unsigned delay)
|
|
{
|
|
unsigned i= my_cpu_relax_multiplier / 4 * delay;
|
|
HMT_low();
|
|
while (i--)
|
|
MY_RELAX_CPU();
|
|
HMT_medium();
|
|
}
|
|
|
|
#endif
|