MDEV-19684 enable intel assembly (AESNI etc) and fastmath when compiling WolfSSL

Using different recommended speedup options for WolfSSL.

- Enable  x64 assembly code on Intel.
- in my_crypt.cc, align EVP_CIPHER_CTX buffer, since some members need
alignment of 16 (for AESNI instructions), when assembler is enabled.
- Adjust MY_AES_CTX_SIZE
- Enable fastmath in wolfssl (large integer math).
This commit is contained in:
Vladislav Vaintroub 2019-06-04 10:07:39 +02:00
parent 92df31dfbf
commit c5beac6847
3 changed files with 82 additions and 8 deletions

View file

@ -1,3 +1,26 @@
IF(MSVC)
PROJECT(wolfssl C ASM_MASM)
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
PROJECT(wolfssl C ASM)
ELSE()
PROJECT(wolfssl C)
ENDIF()
IF(MSVC AND (CMAKE_SIZEOF_VOID_P MATCHES 8))
SET(WOLFSSL_INTELASM ON)
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
IF(CMAKE_C_COMPILER_ID MATCHES GNU AND CMAKE_C_COMPILER_VERSION VERSION_LESS 4.9)
MESSAGE_ONCE(NO_INTEL_ASSEMBLY "Disable Intel assembly for WolfSSL - compiler is too old")
ELSE()
MY_CHECK_C_COMPILER_FLAG(-maes)
MY_CHECK_C_COMPILER_FLAG(-msse4)
MY_CHECK_C_COMPILER_FLAG(-mpclmul)
ENDIF()
IF(have_C__maes AND have_C__msse4 AND have_C__mpclmul)
SET(WOLFSSL_INTELASM ON)
ENDIF()
ENDIF()
SET(WOLFSSL_SRCDIR ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl/src)
ADD_DEFINITIONS(${SSL_DEFINES})
ADD_DEFINITIONS(
@ -32,7 +55,7 @@ ADD_DEFINITIONS(-DWOLFSSL_LIB)
INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/wolfssl)
IF(MSVC)
# size_t to long truncation warning
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd4267")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd4267 -wd4334")
IF(CMAKE_C_COMPILER_ID MATCHES Clang)
# Workaround a bug with clang-cl, see https://github.com/wolfSSL/wolfssl/pull/2090
ADD_DEFINITIONS(-DMP_16BIT)
@ -66,7 +89,6 @@ ${WOLFCRYPT_SRCDIR}/dsa.c
${WOLFCRYPT_SRCDIR}/ecc.c
${WOLFCRYPT_SRCDIR}/error.c
${WOLFCRYPT_SRCDIR}/hmac.c
${WOLFCRYPT_SRCDIR}/integer.c
${WOLFCRYPT_SRCDIR}/logging.c
${WOLFCRYPT_SRCDIR}/md4.c
${WOLFCRYPT_SRCDIR}/md5.c
@ -85,5 +107,42 @@ ${WOLFCRYPT_SRCDIR}/hash.c
${WOLFCRYPT_SRCDIR}/wolfmath.c
)
# Use fastmath large number math library.
IF(NOT (MSVC AND CMAKE_C_COMPILER_ID MATCHES Clang)
AND (CMAKE_SIZEOF_VOID_P EQUAL 8))
SET(WOLFSSL_FASTMATH 1)
ENDIF()
IF(WOLFSSL_FASTMATH)
ADD_DEFINITIONS(-DUSE_FAST_MATH)
# FP_MAX_BITS is set high solely to satisfy ssl_8k_key.test
# WolfSSL will use more stack space with it
ADD_DEFINITIONS(-DFP_MAX_BITS=16384)
SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/tfm.c)
ELSE()
SET(WOLFCRYPT_SOURCES ${WOLFCRYPT_SOURCES} ${WOLFCRYPT_SRCDIR}/integer.c)
ENDIF()
IF(WOLFSSL_INTELASM)
ADD_DEFINITIONS(-DWOLFSSL_AESNI)
SET(SSL_DEFINES "${SSL_DEFINES} -DWOLFSSL_AESNI" PARENT_SCOPE)
LIST(APPEND WOLFCRYPT_SOURCES ${WOLFCRYPT_SRCDIR}/cpuid.c)
IF(MSVC)
LIST(APPEND WOLFCRYPT_SOURCES ${WOLFCRYPT_SRCDIR}/aes_asm.asm)
IF(CMAKE_C_COMPILER_ID MATCHES Clang)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
ELSE()
ADD_DEFINITIONS(-DHAVE_INTEL_RDSEED -DWOLFSSL_X86_64_BUILD)
ENDIF()
ELSEIF(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
ADD_DEFINITIONS(-DHAVE_INTEL_RDSEED -DUSE_INTEL_SPEEDUP)
LIST(APPEND WOLFCRYPT_SOURCES
${WOLFCRYPT_SRCDIR}/aes_asm.S
${WOLFCRYPT_SRCDIR}/sha512_asm.S
${WOLFCRYPT_SRCDIR}/sha256_asm.S)
ADD_DEFINITIONS(-maes -msse4 -mpclmul)
ENDIF()
ENDIF()
ADD_CONVENIENCE_LIBRARY(wolfcrypt ${WOLFCRYPT_SOURCES})

View file

@ -45,7 +45,7 @@ extern "C" {
/* The max key length of all supported algorithms */
#define MY_AES_MAX_KEY_LENGTH 32
#define MY_AES_CTX_SIZE 560
#define MY_AES_CTX_SIZE 600
enum my_aes_mode {
MY_AES_ECB, MY_AES_CBC

View file

@ -25,16 +25,28 @@
#include <my_crypt.h>
#include <ssl_compat.h>
#include <cstdint>
#ifdef HAVE_WOLFSSL
#define CTX_ALIGN 16
#else
#define CTX_ALIGN 0
#endif
class MyCTX
{
public:
char ctx_buf[EVP_CIPHER_CTX_SIZE];
EVP_CIPHER_CTX *ctx;
char ctx_buf[EVP_CIPHER_CTX_SIZE + CTX_ALIGN];
EVP_CIPHER_CTX* ctx;
MyCTX()
{
ctx= (EVP_CIPHER_CTX *)ctx_buf;
#if CTX_ALIGN > 0
uintptr_t p= ((uintptr_t)ctx_buf + (CTX_ALIGN - 1)) & ~(CTX_ALIGN - 1);
ctx = reinterpret_cast<EVP_CIPHER_CTX*>(p);
#else
ctx = (EVP_CIPHER_CTX*)ctx_buf;
#endif
EVP_CIPHER_CTX_init(ctx);
}
virtual ~MyCTX()
@ -149,8 +161,11 @@ public:
uchar mask[MY_AES_BLOCK_SIZE];
uint mlen;
my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
int rc= my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_ENCRYPT | ENCRYPTION_FLAG_NOPAD,
oiv, sizeof(mask), mask, &mlen, key, klen, 0, 0);
DBUG_ASSERT(rc == MY_AES_OK);
if (rc)
return rc;
DBUG_ASSERT(mlen == sizeof(mask));
for (uint i=0; i < buf_len; i++)