mirror of
https://github.com/MariaDB/server.git
synced 2025-01-26 16:54:15 +01:00
Small Windows specific performance fixes:
- Use native memcmp() supplied with C runtime instead of hand-unrolled loop ptr_compare_N loop Prior to fix ptr_compare_0() has 3.7% samples in OLTP-RO in-memory. Fix brings this down to 1.8% (all memcmp samples) - Innodb : fix UT_RELAX_CPU to be defined as YieldProcessor, as was also originally intended (but intention was lost in the #ifdef maze This reduces number of ut_delay() samples in profile from 1.5% to 0.5%
This commit is contained in:
parent
99aa3d465e
commit
1be4b121d5
3 changed files with 27 additions and 21 deletions
|
@ -21,17 +21,23 @@
|
|||
|
||||
#include "mysys_priv.h"
|
||||
#include <myisampack.h>
|
||||
|
||||
#ifdef __sun
|
||||
/*
|
||||
* On Solaris, memcmp() is normally faster than the unrolled ptr_compare_N
|
||||
* On some platforms, memcmp() is faster than the unrolled ptr_compare_N
|
||||
* functions, as memcmp() is usually a platform-specific implementation
|
||||
* written in assembler, provided in /usr/lib/libc/libc_hwcap*.so.1.
|
||||
* This implementation is also usually faster than the built-in memcmp
|
||||
* supplied by GCC, so it is recommended to build with "-fno-builtin-memcmp"
|
||||
* in CFLAGS if building with GCC on Solaris.
|
||||
* written in assembler. for example one in /usr/lib/libc/libc_hwcap*.so.1.
|
||||
* on Solaris, or on Windows inside C runtime linrary.
|
||||
*
|
||||
* On Solaris, native implementation is also usually faster than the
|
||||
* built-in memcmp supplied by GCC, so it is recommended to build
|
||||
* with "-fno-builtin-memcmp"in CFLAGS if building with GCC on Solaris.
|
||||
*/
|
||||
|
||||
#if defined (__sun) || defined (_WIN32)
|
||||
#define USE_NATIVE_MEMCMP 1
|
||||
#endif
|
||||
|
||||
#ifdef USE_NATIVE_MEMCMP
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static int native_compare(size_t *length, unsigned char **a, unsigned char **b)
|
||||
|
@ -39,7 +45,7 @@ static int native_compare(size_t *length, unsigned char **a, unsigned char **b)
|
|||
return memcmp(*a, *b, *length);
|
||||
}
|
||||
|
||||
#else /* __sun */
|
||||
#else /* USE_NATIVE_MEMCMP */
|
||||
|
||||
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b);
|
||||
static int ptr_compare_0(size_t *compare_length, uchar **a, uchar **b);
|
||||
|
@ -50,7 +56,7 @@ static int ptr_compare_3(size_t *compare_length, uchar **a, uchar **b);
|
|||
|
||||
/* Get a pointer to a optimal byte-compare function for a given size */
|
||||
|
||||
#ifdef __sun
|
||||
#ifdef USE_NATIVE_MEMCMP
|
||||
qsort2_cmp get_ptr_compare (size_t size __attribute__((unused)))
|
||||
{
|
||||
return (qsort2_cmp) native_compare;
|
||||
|
@ -68,7 +74,7 @@ qsort2_cmp get_ptr_compare (size_t size)
|
|||
}
|
||||
return 0; /* Impossible */
|
||||
}
|
||||
#endif /* __sun */
|
||||
#endif /* USE_NATIVE_MEMCMP */
|
||||
|
||||
|
||||
/*
|
||||
|
@ -78,7 +84,7 @@ qsort2_cmp get_ptr_compare (size_t size)
|
|||
|
||||
#define cmp(N) if (first[N] != last[N]) return (int) first[N] - (int) last[N]
|
||||
|
||||
#ifndef __sun
|
||||
#ifndef USE_NATIVE_MEMCMP
|
||||
|
||||
static int ptr_compare(size_t *compare_length, uchar **a, uchar **b)
|
||||
{
|
||||
|
|
|
@ -63,16 +63,16 @@ typedef time_t ib_time_t;
|
|||
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
|
||||
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
|
||||
#elif defined(HAVE_ATOMIC_BUILTINS)
|
||||
# define UT_RELAX_CPU() do { \
|
||||
volatile lint volatile_var; \
|
||||
os_compare_and_swap_lint(&volatile_var, 0, 1); \
|
||||
} while (0)
|
||||
#elif defined(HAVE_WINDOWS_ATOMICS)
|
||||
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
|
||||
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
||||
independent way by using YieldProcessor. */
|
||||
# define UT_RELAX_CPU() YieldProcessor()
|
||||
#elif defined(HAVE_ATOMIC_BUILTINS)
|
||||
# define UT_RELAX_CPU() do { \
|
||||
volatile lint volatile_var; \
|
||||
os_compare_and_swap_lint(&volatile_var, 0, 1); \
|
||||
} while (0)
|
||||
#else
|
||||
# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
|
||||
#endif
|
||||
|
|
|
@ -63,16 +63,16 @@ typedef time_t ib_time_t;
|
|||
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
|
||||
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
|
||||
#elif defined(HAVE_ATOMIC_BUILTINS)
|
||||
# define UT_RELAX_CPU() do { \
|
||||
volatile lint volatile_var; \
|
||||
os_compare_and_swap_lint(&volatile_var, 0, 1); \
|
||||
} while (0)
|
||||
#elif defined(HAVE_WINDOWS_ATOMICS)
|
||||
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
|
||||
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
||||
independent way by using YieldProcessor. */
|
||||
# define UT_RELAX_CPU() YieldProcessor()
|
||||
#elif defined(HAVE_ATOMIC_BUILTINS)
|
||||
# define UT_RELAX_CPU() do { \
|
||||
volatile lint volatile_var; \
|
||||
os_compare_and_swap_lint(&volatile_var, 0, 1); \
|
||||
} while (0)
|
||||
#else
|
||||
# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue