mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-25870 followup - some Windows ARM64 improvements
- optimize atomic store64/load64 implementation. - allow CRC32 optimization. Do not allow pmull yet, as this fails like in https://stackoverflow.com/questions/54048837/how-to-perform-polynomial-multiplication-using-arm64
This commit is contained in:
parent
e9573c0596
commit
905c3d61e1
3 changed files with 28 additions and 9 deletions
|
@ -72,7 +72,7 @@ static inline int64 my_atomic_add64(int64 volatile *a, int64 v)
|
||||||
64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
|
64-bit Windows. Reads and writes to 64-bit values are not guaranteed to be
|
||||||
atomic on 32-bit Windows.
|
atomic on 32-bit Windows.
|
||||||
|
|
||||||
https://msdn.microsoft.com/en-us/library/windows/desktop/ms684122(v=vs.85).aspx
|
https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static inline int32 my_atomic_load32(int32 volatile *a)
|
static inline int32 my_atomic_load32(int32 volatile *a)
|
||||||
|
@ -84,7 +84,7 @@ static inline int32 my_atomic_load32(int32 volatile *a)
|
||||||
|
|
||||||
static inline int64 my_atomic_load64(int64 volatile *a)
|
static inline int64 my_atomic_load64(int64 volatile *a)
|
||||||
{
|
{
|
||||||
#ifdef _M_X64
|
#if defined(_M_X64) || defined(_M_ARM64)
|
||||||
int64 value= *a;
|
int64 value= *a;
|
||||||
MemoryBarrier();
|
MemoryBarrier();
|
||||||
return value;
|
return value;
|
||||||
|
@ -123,7 +123,7 @@ static inline void my_atomic_store32(int32 volatile *a, int32 v)
|
||||||
|
|
||||||
static inline void my_atomic_store64(int64 volatile *a, int64 v)
|
static inline void my_atomic_store64(int64 volatile *a, int64 v)
|
||||||
{
|
{
|
||||||
#ifdef _M_X64
|
#if defined(_M_X64) || defined(_M_ARM64)
|
||||||
MemoryBarrier();
|
MemoryBarrier();
|
||||||
*a= v;
|
*a= v;
|
||||||
#else
|
#else
|
||||||
|
|
|
@ -67,6 +67,9 @@ IF(MSVC_INTEL)
|
||||||
IF(CLANG_CL)
|
IF(CLANG_CL)
|
||||||
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
|
SET_SOURCE_FILES_PROPERTIES(crc32/crc32_x86.c PROPERTIES COMPILE_FLAGS "-msse4.2 -mpclmul")
|
||||||
ENDIF()
|
ENDIF()
|
||||||
|
ELSEIF(MSVC_ARM64)
|
||||||
|
SET (MYSYS_SOURCES ${MYSYS_SOURCES} crc32/crc32_arm64.c)
|
||||||
|
ADD_DEFINITIONS(-DHAVE_ARMV8_CRC -DHAVE_ARMV8_CRC_CRYPTO_INTRINSICS)
|
||||||
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
|
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|i386|i686")
|
||||||
MY_CHECK_CXX_COMPILER_FLAG(-msse4.2)
|
MY_CHECK_CXX_COMPILER_FLAG(-msse4.2)
|
||||||
MY_CHECK_CXX_COMPILER_FLAG(-mpclmul)
|
MY_CHECK_CXX_COMPILER_FLAG(-mpclmul)
|
||||||
|
|
|
@ -6,7 +6,22 @@ static int pmull_supported;
|
||||||
|
|
||||||
#if defined(HAVE_ARMV8_CRC)
|
#if defined(HAVE_ARMV8_CRC)
|
||||||
|
|
||||||
#if defined(__APPLE__)
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
int crc32_aarch64_available(void)
|
||||||
|
{
|
||||||
|
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *crc32c_aarch64_available(void)
|
||||||
|
{
|
||||||
|
if (crc32_aarch64_available() == 0)
|
||||||
|
return NULL;
|
||||||
|
/* TODO : pmull seems supported, but does not compile*/
|
||||||
|
return "Using ARMv8 crc32 instructions";
|
||||||
|
}
|
||||||
|
|
||||||
|
#elif defined(__APPLE__)
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
|
|
||||||
int crc32_aarch64_available(void)
|
int crc32_aarch64_available(void)
|
||||||
|
@ -103,7 +118,10 @@ asm(".arch_extension crypto");
|
||||||
#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
|
#else /* HAVE_ARMV8_CRC_CRYPTO_INTRINSICS */
|
||||||
|
|
||||||
/* Intrinsics header*/
|
/* Intrinsics header*/
|
||||||
|
#ifndef _WIN32
|
||||||
#include <arm_acle.h>
|
#include <arm_acle.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <arm_neon.h>
|
#include <arm_neon.h>
|
||||||
|
|
||||||
#define CRC32CX(crc, value) (crc) = __crc32cd((crc), (value))
|
#define CRC32CX(crc, value) (crc) = __crc32cd((crc), (value))
|
||||||
|
@ -159,11 +177,11 @@ asm(".arch_extension crypto");
|
||||||
|
|
||||||
uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
|
uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
|
||||||
{
|
{
|
||||||
uint32_t crc0, crc1, crc2;
|
|
||||||
int64_t length= (int64_t)len;
|
int64_t length= (int64_t)len;
|
||||||
|
|
||||||
crc^= 0xffffffff;
|
crc^= 0xffffffff;
|
||||||
|
|
||||||
|
#ifdef HAVE_ARMV8_CRYPTO
|
||||||
/* Pmull runtime check here.
|
/* Pmull runtime check here.
|
||||||
* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).
|
* Raspberry Pi 4 supports crc32 but doesn't support pmull (MDEV-23030).
|
||||||
*
|
*
|
||||||
|
@ -174,8 +192,8 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
|
||||||
*/
|
*/
|
||||||
if (pmull_supported)
|
if (pmull_supported)
|
||||||
{
|
{
|
||||||
|
uint32_t crc0, crc1, crc2;
|
||||||
/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */
|
/* The following Macro (HAVE_ARMV8_CRYPTO) is used for compiling check */
|
||||||
#ifdef HAVE_ARMV8_CRYPTO
|
|
||||||
|
|
||||||
/* Crypto extension Support
|
/* Crypto extension Support
|
||||||
* Parallel computation with 1024 Bytes (per block)
|
* Parallel computation with 1024 Bytes (per block)
|
||||||
|
@ -277,10 +295,8 @@ uint32_t crc32c_aarch64(uint32_t crc, const unsigned char *buffer, uint64_t len)
|
||||||
/* Done if Input data size is aligned with 1024 */
|
/* Done if Input data size is aligned with 1024 */
|
||||||
if (!(length+= 1024))
|
if (!(length+= 1024))
|
||||||
return ~crc;
|
return ~crc;
|
||||||
|
|
||||||
#endif /* HAVE_ARMV8_CRYPTO */
|
|
||||||
|
|
||||||
} // end if pmull_supported
|
} // end if pmull_supported
|
||||||
|
#endif /* HAVE_ARMV8_CRYPTO */
|
||||||
|
|
||||||
while ((length-= sizeof(uint64_t)) >= 0)
|
while ((length-= sizeof(uint64_t)) >= 0)
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue