mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
611cd6b981
In our unit test, let us rely on our own reference implementation using the reflected CRC-32 ISO 3309 and CRC-32C polynomials. Let us also test with various lengths. Let us refactor the CRC-32 and CRC-32C implementations so that no special compilation flags will be needed and that some function call indirection will be avoided. pmull_supported: Remove. We will have pointers to two separate functions crc32c_aarch64_pmull() and crc32c_aarch64().
344 lines
10 KiB
C
344 lines
10 KiB
C
/* Copyright (c) 2020, 2021, MariaDB
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
/*
|
|
Implementation of CRC32 (Ethernet) uing Intel PCLMULQDQ
|
|
Ported from Intels work, see https://github.com/intel/soft-crc
|
|
*/
|
|
|
|
/*******************************************************************************
|
|
Copyright (c) 2009-2018, Intel Corporation
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
may be used to endorse or promote products derived from this software
|
|
without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
|
|
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*******************************************************************************/
|
|
|
|
|
|
#include <my_global.h>
|
|
#include <my_compiler.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
#ifdef __GNUC__
|
|
# include <emmintrin.h>
|
|
# include <smmintrin.h>
|
|
# include <tmmintrin.h>
|
|
# include <wmmintrin.h>
|
|
# define USE_PCLMUL __attribute__((target("sse4.2,pclmul")))
|
|
#elif defined(_MSC_VER)
|
|
# include <intrin.h>
|
|
# define USE_PCLMUL /* nothing */
|
|
#else
|
|
# error "unknown compiler"
|
|
#endif
|
|
|
|
/**
|
|
* @brief Shifts left 128 bit register by specified number of bytes
|
|
*
|
|
* @param reg 128 bit value
|
|
* @param num number of bytes to shift left \a reg by (0-16)
|
|
*
|
|
* @return \a reg << (\a num * 8)
|
|
*/
|
|
USE_PCLMUL
|
|
static inline __m128i xmm_shift_left(__m128i reg, const unsigned int num)
|
|
{
|
|
static const MY_ALIGNED(16) uint8_t crc_xmm_shift_tab[48]= {
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
|
|
0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0xff, 0xff, 0xff, 0xff,
|
|
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
|
|
|
|
const __m128i *p= (const __m128i *) (crc_xmm_shift_tab + 16 - num);
|
|
|
|
return _mm_shuffle_epi8(reg, _mm_loadu_si128(p));
|
|
}
|
|
|
|
struct crcr_pclmulqdq_ctx
|
|
{
|
|
uint64_t rk1;
|
|
uint64_t rk2;
|
|
uint64_t rk5;
|
|
uint64_t rk6;
|
|
uint64_t rk7;
|
|
uint64_t rk8;
|
|
};
|
|
|
|
/**
|
|
* @brief Performs one folding round
|
|
*
|
|
* Logically function operates as follows:
|
|
* DATA = READ_NEXT_16BYTES();
|
|
* F1 = LSB8(FOLD)
|
|
* F2 = MSB8(FOLD)
|
|
* T1 = CLMUL(F1, RK1)
|
|
* T2 = CLMUL(F2, RK2)
|
|
* FOLD = XOR(T1, T2, DATA)
|
|
*
|
|
* @param data_block 16 byte data block
|
|
* @param precomp precomputed rk1 constanst
|
|
* @param fold running 16 byte folded data
|
|
*
|
|
* @return New 16 byte folded data
|
|
*/
|
|
USE_PCLMUL
|
|
static inline __m128i crcr32_folding_round(const __m128i data_block,
|
|
const __m128i precomp, const __m128i fold)
|
|
{
|
|
__m128i tmp0= _mm_clmulepi64_si128(fold, precomp, 0x01);
|
|
__m128i tmp1= _mm_clmulepi64_si128(fold, precomp, 0x10);
|
|
|
|
return _mm_xor_si128(tmp1, _mm_xor_si128(data_block, tmp0));
|
|
}
|
|
|
|
/**
|
|
* @brief Performs reduction from 128 bits to 64 bits
|
|
*
|
|
* @param data128 128 bits data to be reduced
|
|
* @param precomp rk5 and rk6 precomputed constants
|
|
*
|
|
* @return data reduced to 64 bits
|
|
*/
|
|
USE_PCLMUL
|
|
static inline __m128i crcr32_reduce_128_to_64(__m128i data128, const __m128i precomp)
|
|
{
|
|
__m128i tmp0, tmp1, tmp2;
|
|
|
|
/* 64b fold */
|
|
tmp0= _mm_clmulepi64_si128(data128, precomp, 0x00);
|
|
tmp1= _mm_srli_si128(data128, 8);
|
|
tmp0= _mm_xor_si128(tmp0, tmp1);
|
|
|
|
/* 32b fold */
|
|
tmp2= _mm_slli_si128(tmp0, 4);
|
|
tmp1= _mm_clmulepi64_si128(tmp2, precomp, 0x10);
|
|
|
|
return _mm_xor_si128(tmp1, tmp0);
|
|
}
|
|
|
|
/**
|
|
* @brief Performs Barret's reduction from 64 bits to 32 bits
|
|
*
|
|
* @param data64 64 bits data to be reduced
|
|
* @param precomp rk7 precomputed constant
|
|
*
|
|
* @return data reduced to 32 bits
|
|
*/
|
|
USE_PCLMUL
|
|
static inline uint32_t crcr32_reduce_64_to_32(__m128i data64, const __m128i precomp)
|
|
{
|
|
static const MY_ALIGNED(16) uint32_t mask1[4]= {
|
|
0xffffffff, 0xffffffff, 0x00000000, 0x00000000};
|
|
static const MY_ALIGNED(16) uint32_t mask2[4]= {
|
|
0x00000000, 0xffffffff, 0xffffffff, 0xffffffff};
|
|
__m128i tmp0, tmp1, tmp2;
|
|
|
|
tmp0= _mm_and_si128(data64, _mm_load_si128((__m128i *) mask2));
|
|
|
|
tmp1= _mm_clmulepi64_si128(tmp0, precomp, 0x00);
|
|
tmp1= _mm_xor_si128(tmp1, tmp0);
|
|
tmp1= _mm_and_si128(tmp1, _mm_load_si128((__m128i *) mask1));
|
|
|
|
tmp2= _mm_clmulepi64_si128(tmp1, precomp, 0x10);
|
|
tmp2= _mm_xor_si128(tmp2, tmp1);
|
|
tmp2= _mm_xor_si128(tmp2, tmp0);
|
|
|
|
return _mm_extract_epi32(tmp2, 2);
|
|
}
|
|
|
|
/**
|
|
* @brief Calculates reflected 32-bit CRC for given \a data block
|
|
* by applying folding and reduction methods.
|
|
*
|
|
* Algorithm operates on 32 bit CRCs.
|
|
* Polynomials and initial values may need to be promoted to
|
|
* 32 bits where required.
|
|
*
|
|
* @param crc initial CRC value (32 bit value)
|
|
* @param data pointer to data block
|
|
* @param data_len length of \a data block in bytes
|
|
* @param params pointer to PCLMULQDQ CRC calculation context
|
|
*
|
|
* @return CRC for given \a data block (32 bits wide).
|
|
*/
|
|
USE_PCLMUL
|
|
static inline uint32_t crcr32_calc_pclmulqdq(const uint8_t *data, uint32_t data_len,
|
|
uint32_t crc,
|
|
const struct crcr_pclmulqdq_ctx *params)
|
|
{
|
|
__m128i temp, fold, k;
|
|
uint32_t n;
|
|
|
|
DBUG_ASSERT(data != NULL || data_len == 0);
|
|
DBUG_ASSERT(params);
|
|
|
|
if (unlikely(data_len == 0))
|
|
return crc;
|
|
|
|
/**
|
|
* Get CRC init value
|
|
*/
|
|
temp= _mm_insert_epi32(_mm_setzero_si128(), crc, 0);
|
|
|
|
/**
|
|
* -------------------------------------------------
|
|
* Folding all data into single 16 byte data block
|
|
* Assumes: \a fold holds first 16 bytes of data
|
|
*/
|
|
|
|
if (unlikely(data_len < 32))
|
|
{
|
|
if (unlikely(data_len == 16))
|
|
{
|
|
/* 16 bytes */
|
|
fold= _mm_loadu_si128((__m128i *) data);
|
|
fold= _mm_xor_si128(fold, temp);
|
|
goto reduction_128_64;
|
|
}
|
|
if (unlikely(data_len < 16))
|
|
{
|
|
/* 0 to 15 bytes */
|
|
MY_ALIGNED(16) uint8_t buffer[16];
|
|
|
|
memset(buffer, 0, sizeof(buffer));
|
|
memcpy(buffer, data, data_len);
|
|
|
|
fold= _mm_load_si128((__m128i *) buffer);
|
|
fold= _mm_xor_si128(fold, temp);
|
|
if ((data_len < 4))
|
|
{
|
|
fold= xmm_shift_left(fold, 8 - data_len);
|
|
goto barret_reduction;
|
|
}
|
|
fold= xmm_shift_left(fold, 16 - data_len);
|
|
goto reduction_128_64;
|
|
}
|
|
/* 17 to 31 bytes */
|
|
fold= _mm_loadu_si128((__m128i *) data);
|
|
fold= _mm_xor_si128(fold, temp);
|
|
n= 16;
|
|
k= _mm_load_si128((__m128i *) (¶ms->rk1));
|
|
goto partial_bytes;
|
|
}
|
|
|
|
/**
|
|
* At least 32 bytes in the buffer
|
|
*/
|
|
|
|
/**
|
|
* Apply CRC initial value
|
|
*/
|
|
fold= _mm_loadu_si128((const __m128i *) data);
|
|
fold= _mm_xor_si128(fold, temp);
|
|
|
|
/**
|
|
* Main folding loop
|
|
* - the last 16 bytes is processed separately
|
|
*/
|
|
k= _mm_load_si128((__m128i *) (¶ms->rk1));
|
|
for (n= 16; (n + 16) <= data_len; n+= 16)
|
|
{
|
|
temp= _mm_loadu_si128((__m128i *) &data[n]);
|
|
fold= crcr32_folding_round(temp, k, fold);
|
|
}
|
|
|
|
partial_bytes:
|
|
if (likely(n < data_len))
|
|
{
|
|
static const MY_ALIGNED(16) uint32_t mask3[4]= {0x80808080, 0x80808080,
|
|
0x80808080, 0x80808080};
|
|
static const MY_ALIGNED(16) uint8_t shf_table[32]= {
|
|
0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,
|
|
0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
|
|
0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f};
|
|
__m128i last16, a, b;
|
|
|
|
last16= _mm_loadu_si128((const __m128i *) &data[data_len - 16]);
|
|
|
|
temp= _mm_loadu_si128((const __m128i *) &shf_table[data_len & 15]);
|
|
a= _mm_shuffle_epi8(fold, temp);
|
|
|
|
temp= _mm_xor_si128(temp, _mm_load_si128((const __m128i *) mask3));
|
|
b= _mm_shuffle_epi8(fold, temp);
|
|
b= _mm_blendv_epi8(b, last16, temp);
|
|
|
|
/* k = rk1 & rk2 */
|
|
temp= _mm_clmulepi64_si128(a, k, 0x01);
|
|
fold= _mm_clmulepi64_si128(a, k, 0x10);
|
|
|
|
fold= _mm_xor_si128(fold, temp);
|
|
fold= _mm_xor_si128(fold, b);
|
|
}
|
|
|
|
/**
|
|
* -------------------------------------------------
|
|
* Reduction 128 -> 32
|
|
* Assumes: \a fold holds 128bit folded data
|
|
*/
|
|
reduction_128_64:
|
|
k= _mm_load_si128((__m128i *) (¶ms->rk5));
|
|
fold= crcr32_reduce_128_to_64(fold, k);
|
|
|
|
barret_reduction:
|
|
k= _mm_load_si128((__m128i *) (¶ms->rk7));
|
|
n= crcr32_reduce_64_to_32(fold, k);
|
|
return n;
|
|
}
|
|
|
|
static const MY_ALIGNED(16) struct crcr_pclmulqdq_ctx ether_crc32_clmul= {
|
|
0xccaa009e, /**< rk1 */
|
|
0x1751997d0, /**< rk2 */
|
|
0xccaa009e, /**< rk5 */
|
|
0x163cd6124, /**< rk6 */
|
|
0x1f7011640, /**< rk7 */
|
|
0x1db710641 /**< rk8 */
|
|
};
|
|
|
|
/**
|
|
* @brief Calculates Ethernet CRC32 using PCLMULQDQ method.
|
|
*
|
|
* @param data pointer to data block to calculate CRC for
|
|
* @param data_len size of data block
|
|
*
|
|
* @return New CRC value
|
|
*/
|
|
unsigned int crc32_pclmul(unsigned int crc32, const void *buf, size_t len)
|
|
{
|
|
return ~crcr32_calc_pclmulqdq(buf, (uint32_t)len, ~crc32, ðer_crc32_clmul);
|
|
}
|