mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 11:01:52 +01:00
d405bee058
MDEV-15843 mysys: remove optimized memcpy from 18 years ago While this code has remained dormant for 18 years, libc implementers have used assembly features to gain improvements using achitecture features optimized and by the buffer length like: * https://svnweb.freebsd.org/base/head/lib/libc/amd64/string/memcmp.S * https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/aarch64/memcmp.S * https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/powerpc/powerpc64/memcpy.S From an sysbench-1.0.6 oltp_read_only test on binary charset table: x86_64: was ptr_compare_0: perf report -g --no-children: + 3.37% mysqld mysqld [.] hp_rec_hashnr + 3.15% mysqld libc-2.26.so [.] __memmove_avx_unaligned_erms + 2.73% mysqld mysqld [.] row_search_mvcc + 1.97% mysqld mysqld [.] rec_get_offsets_func + 1.24% mysqld mysqld [.] ptr_compare_0 + 1.14% mysqld mysqld [.] my_qsort2 After: __memcmp_avx2_movbe + 3.42% mysqld mysqld [.] hp_rec_hashnr + 2.96% mysqld libc-2.26.so [.] __memmove_avx_unaligned_erms + 2.91% mysqld mysqld [.] row_search_mvcc + 2.13% mysqld mysqld [.] rec_get_offsets_func + 1.18% mysqld libc-2.26.so [.] __memcmp_avx2_movbe + 1.04% mysqld mysqld [.] evaluate_join_record + 1.02% mysqld mysqld [.] my_qsort2 Power9: Before: ptr_compare_0 + 4.24% mysqld mysqld [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm + 2.18% mysqld mysqld [.] hp_rec_hashnr + 2.07% mysqld mysqld [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t + 1.60% mysqld mysqld [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei + 1.20% mysqld mysqld [.] _ZN11ha_innobase13general_fetchEPhjj + 1.05% mysqld mysqld [.] _ZN17Item_func_between15val_int_cmp_intEv + 0.92% mysqld mysqld [.] _Z40row_sel_field_store_in_mysql_format_funcPhPK17mysql_row_templ_tPKhm + 0.91% mysqld mysqld [.] _ZNK10Item_param6PValue7val_intEPK19Type_std_attributes + 0.84% mysqld mysqld [.] ptr_compare_0 After: __memcmp_power8 + 2.29% mysqld mysqld [.] _Z15row_search_mvccPh15page_cur_mode_tP14row_prebuilt_tmm + 1.32% mysqld mysqld [.] hp_rec_hashnr + 1.18% swapper [kernel.kallsyms] [k] power_enter_stop + 1.12% mysqld mysqld [.] _Z20rec_get_offsets_funcPKhPK12dict_index_tPmbmPP16mem_block_info_t + 0.87% mysqld mysqld [.] _ZL20evaluate_join_recordP4JOINP13st_join_tablei + 0.87% mysqld [kernel.kallsyms] [k] ___bpf_prog_run + 0.76% mysqld libc-2.26.so [.] __memcmp_power8 + 0.68% mysqld mysqld [.] _ZN11ha_innobase13general_fetchEPhjj + 0.58% mysqld mysqld [.] _ZN17Item_func_between15val_int_cmp_intEv
224 lines
5.4 KiB
C
224 lines
5.4 KiB
C
/* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */
|
|
|
|
/*
|
|
get_ptr_compare(len) returns a pointer to a optimal byte-compare function
|
|
for a array of stringpointer where all strings have size len.
|
|
The bytes are compare as unsigned chars.
|
|
*/
|
|
|
|
#include "mysys_priv.h"
|
|
#include <myisampack.h>
|
|
/*
|
|
* On some platforms, memcmp() is faster than the unrolled ptr_compare_N
|
|
* functions, as memcmp() is usually a platform-specific implementation
|
|
* written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1.
|
|
* on Solaris, or on Windows inside C runtime linrary.
|
|
*
|
|
* On Solaris, native implementation is also usually faster than the
|
|
* built-in memcmp supplied by GCC, so it is recommended to build
|
|
* with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris.
|
|
*/
|
|
|
|
/*
|
|
Daniel Blacks tests shows that libc memcmp is generally faster than
|
|
ptr_cmp() at least of x86 and power8 platforms, so we use the libc
|
|
code as deafult for now
|
|
*/
|
|
|
|
#define USE_NATIVE_MEMCMP 1
|
|
|
|
#ifdef USE_NATIVE_MEMCMP
|
|
|
|
#include <string.h>
|
|
|
|
static int native_compare(size_t *length, unsigned char **a, unsigned char **b)
|
|
{
|
|
return memcmp(*a, *b, *length);
|
|
}
|
|
|
|
qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
|
|
{
|
|
return (qsort2_cmp) native_compare;
|
|
}
|
|
|
|
#else /* USE_NATIVE_MEMCMP */
|
|
|
|
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b);
|
|
static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b);
|
|
static int ptr_compare_1(size_t *compare_length, uchar **a, uchar **b);
|
|
static int ptr_compare_2(size_t *compare_length, uchar **a, uchar **b);
|
|
static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b);
|
|
|
|
qsort2_cmp get_ptr_compare (size_t size)
|
|
{
|
|
if (size < 4)
|
|
return (qsort2_cmp) ptr_compare;
|
|
switch (size & 3) {
|
|
case 0: return (qsort2_cmp) ptr_compare_0;
|
|
case 1: return (qsort2_cmp) ptr_compare_1;
|
|
case 2: return (qsort2_cmp) ptr_compare_2;
|
|
case 3: return (qsort2_cmp) ptr_compare_3;
|
|
}
|
|
return 0; /* Impossible */
|
|
}
|
|
/*
|
|
Compare to keys to see witch is smaller.
|
|
Loop unrolled to make it quick !!
|
|
*/
|
|
|
|
#define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N]
|
|
|
|
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b)
|
|
{
|
|
size_t length= *compare_length;
|
|
uchar *first,*last;
|
|
|
|
DBUG_ASSERT(length > 0);
|
|
first= *a; last= *b;
|
|
while (--length)
|
|
{
|
|
if (*first++ != *last++)
|
|
return (int) first[-1] - (int) last[-1];
|
|
}
|
|
return (int) first[0] - (int) last[0];
|
|
}
|
|
|
|
|
|
static int ptr_compare_0(size_t *compare_length,uchar **a, uchar **b)
|
|
{
|
|
size_t length= *compare_length;
|
|
uchar *first,*last;
|
|
|
|
first= *a; last= *b;
|
|
loop:
|
|
cmp(0);
|
|
cmp(1);
|
|
cmp(2);
|
|
cmp(3);
|
|
if ((length-=4))
|
|
{
|
|
first+=4;
|
|
last+=4;
|
|
goto loop;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
|
|
static int ptr_compare_1(size_t *compare_length,uchar **a, uchar **b)
|
|
{
|
|
size_t length= *compare_length-1;
|
|
uchar *first,*last;
|
|
|
|
first= *a+1; last= *b+1;
|
|
cmp(-1);
|
|
loop:
|
|
cmp(0);
|
|
cmp(1);
|
|
cmp(2);
|
|
cmp(3);
|
|
if ((length-=4))
|
|
{
|
|
first+=4;
|
|
last+=4;
|
|
goto loop;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int ptr_compare_2(size_t *compare_length,uchar **a, uchar **b)
|
|
{
|
|
size_t length= *compare_length-2;
|
|
uchar *first,*last;
|
|
|
|
first= *a +2 ; last= *b +2;
|
|
cmp(-2);
|
|
cmp(-1);
|
|
loop:
|
|
cmp(0);
|
|
cmp(1);
|
|
cmp(2);
|
|
cmp(3);
|
|
if ((length-=4))
|
|
{
|
|
first+=4;
|
|
last+=4;
|
|
goto loop;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static int ptr_compare_3(size_t *compare_length,uchar **a, uchar **b)
|
|
{
|
|
size_t length= *compare_length-3;
|
|
uchar *first,*last;
|
|
|
|
first= *a +3 ; last= *b +3;
|
|
cmp(-3);
|
|
cmp(-2);
|
|
cmp(-1);
|
|
loop:
|
|
cmp(0);
|
|
cmp(1);
|
|
cmp(2);
|
|
cmp(3);
|
|
if ((length-=4))
|
|
{
|
|
first+=4;
|
|
last+=4;
|
|
goto loop;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
#endif /* USE_NATIVE_MEMCMP */
|
|
|
|
void my_store_ptr(uchar *buff, size_t pack_length, my_off_t pos)
|
|
{
|
|
switch (pack_length) {
|
|
#if SIZEOF_OFF_T > 4
|
|
case 8: mi_int8store(buff,pos); break;
|
|
case 7: mi_int7store(buff,pos); break;
|
|
case 6: mi_int6store(buff,pos); break;
|
|
case 5: mi_int5store(buff,pos); break;
|
|
#endif
|
|
case 4: mi_int4store(buff,pos); break;
|
|
case 3: mi_int3store(buff,pos); break;
|
|
case 2: mi_int2store(buff,pos); break;
|
|
case 1: buff[0]= (uchar) pos; break;
|
|
default: DBUG_ASSERT(0);
|
|
}
|
|
return;
|
|
}
|
|
|
|
my_off_t my_get_ptr(uchar *ptr, size_t pack_length)
|
|
{
|
|
my_off_t pos;
|
|
switch (pack_length) {
|
|
#if SIZEOF_OFF_T > 4
|
|
case 8: pos= (my_off_t) mi_uint8korr(ptr); break;
|
|
case 7: pos= (my_off_t) mi_uint7korr(ptr); break;
|
|
case 6: pos= (my_off_t) mi_uint6korr(ptr); break;
|
|
case 5: pos= (my_off_t) mi_uint5korr(ptr); break;
|
|
#endif
|
|
case 4: pos= (my_off_t) mi_uint4korr(ptr); break;
|
|
case 3: pos= (my_off_t) mi_uint3korr(ptr); break;
|
|
case 2: pos= (my_off_t) mi_uint2korr(ptr); break;
|
|
case 1: pos= (my_off_t) *(uchar*) ptr; break;
|
|
default: DBUG_ASSERT(0); return 0;
|
|
}
|
|
return pos;
|
|
}
|