MDEV-27956 hardware lock ellision on s390x/ppc64{,le}

Per https://gcc.gnu.org/onlinedocs/gcc/PowerPC-Hardware-Transactional-Memory-Built-in-Functions.html

The .. high level HTM interface .. is common between PowerPC and S/390

Reimplemented the transactional_lock_enabled() detection mechanism for
s390x and POWER based on SIGILL. This also gives non-Linux based unixes
the ability to use HTM. The implementation is based off openssl.
(ref:
1c0eede982/crypto/s390xcap.c (L104))

The other ppc64{,le} problems with getauxvec based detection:
* Checking PPC_FEATURE2_HTM_NOSC not needed as we do not do syscalls while
  in a transactional state.
* As we don't use, and never should use PPC_FEATURE2_HTM_NO_SUSPEND,
  or do syscalls while in transactional state, don't test it.
From: https://www.kernel.org/doc/html/v5.4/powerpc/syscall64-abi.html#transactional-memory

S390x high level __builtin_tbegin functions in the htmxlintrin.h are not
inline. This header file can be included once in the entire set of sources for
a linked target, otherwise duplicate symbols occur. While we could use inline
xabort/xend functions using the low level interface, we keep this the same as
ppc64 for simplicity.

SLES-15, gcc-7, appeared to want everything that included the htmlxlintrin to
be compiled with -mhtm otherwise the __builtin_t{func} where not defined
(in addition to a #ifdef __HTM__ #error). Debian sid gcc-11.2 wanted the same
on ppc64le/ppc64. In general we want to avoid a wide spread use of architecture
cflags as it makes justifications for selective optimizations easier.
(ref: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1006702)
This commit is contained in:
Daniel Black 2022-02-25 18:24:01 +11:00
parent 06ec439b8c
commit fabaac86a1
4 changed files with 58 additions and 26 deletions

View file

@ -384,7 +384,10 @@ ENDIF()
# Older gcc version insist on -mhtm flag for including the
# htmxlintrin.h header. This is also true for new gcc versions
# like 11.2.0 in Debian Sid
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64")
# s390x because of the way it defines the high level intrinsics
# as not-inline in the header file can only be included by one
# source file that has -mhtm enabled.
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64|s390x")
ADD_COMPILE_FLAGS(
sync/srw_lock.cc
COMPILE_FLAGS "-mhtm"

View file

@ -18,8 +18,8 @@ this program; if not, write to the Free Software Foundation, Inc.,
#pragma once
#if defined __powerpc64__ && defined __clang__ && defined __linux__
#elif defined __powerpc64__&&defined __GNUC__&&defined __linux__&&__GNUC__ > 4
#if defined __powerpc64__
#elif defined __s390__
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64) && !defined(__clang__)
#elif defined __GNUC__ && (defined __i386__ || defined __x86_64__)
# if __GNUC__ >= 8
@ -69,7 +69,7 @@ static inline bool xtest() { return have_transactional_memory && _xtest(); }
TRANSACTIONAL_INLINE static inline void xabort() { _xabort(0); }
TRANSACTIONAL_INLINE static inline void xend() { _xend(); }
# elif defined __powerpc64__
# elif defined __powerpc64__ || defined __s390__
extern bool have_transactional_memory;
bool transactional_lock_enabled();
# define TRANSACTIONAL_TARGET __attribute__((hot))
@ -77,10 +77,17 @@ bool transactional_lock_enabled();
/**
Newer gcc compilers only provide __builtin_{htm}
function when the -mhtm is actually provided. So
functions when the -mhtm CFLAG is actually provided. So
we've got the option of including it globally, or
pushing down to one file with that enabled and removing
the inline optimization.
pushing down the inclusion of htmxlintrin.h to one
file with -mhtm enabled and removing the inline
optimization.
Per FIXME in s390x's htmxlintrin.h, the __TM_simple_begin
isn't always_inline resulting in duplicate definitions if
it where included more than once. While xabort and xend
could be implemented here, we keep the implementation the
same as ppc64.
*/
TRANSACTIONAL_TARGET bool xbegin();
TRANSACTIONAL_TARGET void xabort();

View file

@ -54,8 +54,10 @@ bool transactional_lock_enabled()
TRANSACTIONAL_TARGET
bool xtest() { return have_transactional_memory && _xtest(); }
# endif
#elif defined __powerpc64__
#elif defined __powerpc64__ || defined __s390__
# include <htmxlintrin.h>
# include <setjmp.h>
# include <signal.h>
__attribute__((target("htm"),hot))
bool xbegin()
@ -69,27 +71,46 @@ void xabort() { __TM_abort(); }
__attribute__((target("htm"),hot))
void xend() { __TM_end(); }
# ifdef __linux__
# include <sys/auxv.h>
# ifndef PPC_FEATURE2_HTM_NOSC
# define PPC_FEATURE2_HTM_NOSC 0x01000000
# endif
# ifndef PPC_FEATURE2_HTM_NO_SUSPEND
# define PPC_FEATURE2_HTM_NO_SUSPEND 0x00080000
# endif
# ifndef AT_HWCAP2
# define AT_HWCAP2 26
# endif
# endif
bool have_transactional_memory;
static sigjmp_buf ill_jmp;
static void ill_handler(int sig)
{
siglongjmp(ill_jmp, sig);
}
/**
Here we are testing we can do a transaction without SIGILL
and a 1 instruction store can succeed.
*/
__attribute__((noinline))
static void test_tm(bool *r)
{
if (__TM_simple_begin() == _HTM_TBEGIN_STARTED)
{
*r= true;
__TM_end();
}
}
bool transactional_lock_enabled()
{
# ifdef __linux__
return getauxval(AT_HWCAP2) &
(PPC_FEATURE2_HTM_NOSC | PPC_FEATURE2_HTM_NO_SUSPEND);
# endif
bool r= false;
sigset_t oset;
struct sigaction ill_act, oact_ill;
memset(&ill_act, 0, sizeof(ill_act));
ill_act.sa_handler = ill_handler;
sigfillset(&ill_act.sa_mask);
sigdelset(&ill_act.sa_mask, SIGILL);
sigprocmask(SIG_SETMASK, &ill_act.sa_mask, &oset);
sigaction(SIGILL, &ill_act, &oact_ill);
if (sigsetjmp(ill_jmp, 1) == 0)
{
test_tm(&r);
}
sigaction(SIGILL, &oact_ill, NULL);
sigprocmask(SIG_SETMASK, &oset, NULL);
return r;
}
# ifdef UNIV_DEBUG

View file

@ -21,7 +21,8 @@ ADD_EXECUTABLE(innodb_fts-t innodb_fts-t.cc)
TARGET_LINK_LIBRARIES(innodb_fts-t mysys mytap)
ADD_DEPENDENCIES(innodb_fts-t GenError)
MY_ADD_TEST(innodb_fts)
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64")
# See explanation in innobase/CmakeLists.txt
IF(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64|s390x")
ADD_COMPILE_FLAGS(
../sync/srw_lock.cc
COMPILE_FLAGS "-mhtm"