mirror of
https://github.com/MariaDB/server.git
synced 2025-03-30 20:05:38 +02:00
Restore LF_BACKOFF
Moved InnoDB UT_RELAX_CPU() to server. Restored cross-platform LF_BACKOFF implementation basing on UT_RELAX_CPU().
This commit is contained in:
parent
07e9ff1fe1
commit
b3346c2f41
10 changed files with 67 additions and 76 deletions
include
mysys
storage/innobase
|
@ -90,37 +90,7 @@ C_MODE_END
|
|||
ret= 0; /* avoid compiler warning */ \
|
||||
ret= IL_COMP_EXCHG ## S (a, ret, ret);
|
||||
#endif
|
||||
/*
|
||||
my_yield_processor (equivalent of x86 PAUSE instruction) should be used
|
||||
to improve performance on hyperthreaded CPUs. Intel recommends to use it in
|
||||
spin loops also on non-HT machines to reduce power consumption (see e.g
|
||||
http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
||||
|
||||
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
||||
and YieldProcessor shows that much better performance is achieved by calling
|
||||
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
||||
loop count in the range 200-300 brought best results.
|
||||
*/
|
||||
#ifndef YIELD_LOOPS
|
||||
#define YIELD_LOOPS 200
|
||||
#endif
|
||||
|
||||
static __inline int my_yield_processor()
|
||||
{
|
||||
int i;
|
||||
for(i=0; i<YIELD_LOOPS; i++)
|
||||
{
|
||||
#if (_MSC_VER <= 1310)
|
||||
/* On older compilers YieldProcessor is not available, use inline assembly*/
|
||||
__asm { rep nop }
|
||||
#else
|
||||
YieldProcessor();
|
||||
#endif
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define LF_BACKOFF my_yield_processor()
|
||||
#else /* cleanup */
|
||||
|
||||
#undef IL_EXCHG_ADD32
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#define INCLUDE_LF_INCLUDED
|
||||
|
||||
#include <my_atomic.h>
|
||||
#include <my_cpu.h>
|
||||
|
||||
C_MODE_START
|
||||
|
||||
|
|
|
@ -346,15 +346,6 @@ make_atomic_store(ptr)
|
|||
#undef make_atomic_fas_body
|
||||
#undef intptr
|
||||
|
||||
/*
|
||||
the macro below defines (as an expression) the code that
|
||||
will be run in spin-loops. Intel manuals recummend to have PAUSE there.
|
||||
It is expected to be defined in include/atomic/ *.h files
|
||||
*/
|
||||
#ifndef LF_BACKOFF
|
||||
#define LF_BACKOFF (1)
|
||||
#endif
|
||||
|
||||
#define MY_ATOMIC_OK 0
|
||||
#define MY_ATOMIC_NOT_1CPU 1
|
||||
extern int my_atomic_initialize();
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
#ifndef MY_CPU_INCLUDED
|
||||
#define MY_CPU_INCLUDED
|
||||
/* Copyright (c) 2013, MariaDB foundation Ab and SkySQL
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
|
@ -42,3 +44,58 @@
|
|||
#define HMT_medium_high()
|
||||
#define HMT_high()
|
||||
#endif
|
||||
|
||||
|
||||
static inline void MY_RELAX_CPU(void)
|
||||
{
|
||||
#ifdef HAVE_PAUSE_INSTRUCTION
|
||||
/*
|
||||
According to the gcc info page, asm volatile means that the
|
||||
instruction has important side-effects and must not be removed.
|
||||
Also asm volatile may trigger a memory barrier (spilling all registers
|
||||
to memory).
|
||||
*/
|
||||
#ifdef __SUNPRO_CC
|
||||
asm ("pause" );
|
||||
#else
|
||||
__asm__ __volatile__ ("pause");
|
||||
#endif
|
||||
|
||||
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
||||
__asm__ __volatile__ ("rep; nop");
|
||||
#elif defined _WIN32
|
||||
/*
|
||||
In the Win32 API, the x86 PAUSE instruction is executed by calling
|
||||
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
||||
independent way by using YieldProcessor.
|
||||
*/
|
||||
YieldProcessor();
|
||||
#elif defined(_ARCH_PWR8)
|
||||
__ppc_get_timebase();
|
||||
#else
|
||||
int32 var, oldval = 0;
|
||||
my_atomic_cas32_strong_explicit(&var, &oldval, 1, MY_MEMORY_ORDER_RELAXED,
|
||||
MY_MEMORY_ORDER_RELAXED);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
LF_BACKOFF should be used to improve performance on hyperthreaded CPUs. Intel
|
||||
recommends to use it in spin loops also on non-HT machines to reduce power
|
||||
consumption (see e.g http://softwarecommunity.intel.com/articles/eng/2004.htm)
|
||||
|
||||
Running benchmarks for spinlocks implemented with InterlockedCompareExchange
|
||||
and YieldProcessor shows that much better performance is achieved by calling
|
||||
YieldProcessor in a loop - that is, yielding longer. On Intel boxes setting
|
||||
loop count in the range 200-300 brought best results.
|
||||
*/
|
||||
|
||||
static inline int LF_BACKOFF(void)
|
||||
{
|
||||
int i;
|
||||
for (i= 0; i < 200; i++)
|
||||
MY_RELAX_CPU();
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -430,7 +430,7 @@ static void alloc_free(uchar *first,
|
|||
{
|
||||
anext_node(last)= tmp.node;
|
||||
} while (!my_atomic_casptr((void **)(char *)&allocator->top,
|
||||
(void **)&tmp.ptr, first) && LF_BACKOFF);
|
||||
(void **)&tmp.ptr, first) && LF_BACKOFF());
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -501,7 +501,7 @@ void *lf_alloc_new(LF_PINS *pins)
|
|||
{
|
||||
node= allocator->top;
|
||||
lf_pin(pins, 0, node);
|
||||
} while (node != allocator->top && LF_BACKOFF);
|
||||
} while (node != allocator->top && LF_BACKOFF());
|
||||
if (!node)
|
||||
{
|
||||
node= (void *)my_malloc(allocator->element_size, MYF(MY_WME));
|
||||
|
|
|
@ -102,7 +102,7 @@ retry:
|
|||
do { /* PTR() isn't necessary below, head is a dummy node */
|
||||
cursor->curr= (LF_SLIST *)(*cursor->prev);
|
||||
lf_pin(pins, 1, cursor->curr);
|
||||
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF);
|
||||
} while (*cursor->prev != (intptr)cursor->curr && LF_BACKOFF());
|
||||
|
||||
for (;;)
|
||||
{
|
||||
|
@ -117,7 +117,7 @@ retry:
|
|||
link= cursor->curr->link;
|
||||
cursor->next= PTR(link);
|
||||
lf_pin(pins, 0, cursor->next);
|
||||
} while (link != cursor->curr->link && LF_BACKOFF);
|
||||
} while (link != cursor->curr->link && LF_BACKOFF());
|
||||
|
||||
if (!DELETED(link))
|
||||
{
|
||||
|
@ -145,7 +145,7 @@ retry:
|
|||
and remove this deleted node
|
||||
*/
|
||||
if (my_atomic_casptr((void **) cursor->prev,
|
||||
(void **) &cursor->curr, cursor->next) && LF_BACKOFF)
|
||||
(void **) &cursor->curr, cursor->next) && LF_BACKOFF())
|
||||
lf_alloc_free(pins, cursor->curr);
|
||||
else
|
||||
goto retry;
|
||||
|
|
|
@ -617,7 +617,7 @@ retry:
|
|||
{
|
||||
rc= *shared_ptr;
|
||||
lf_pin(arg->thd->pins, 0, rc);
|
||||
} while (rc != *shared_ptr && LF_BACKOFF);
|
||||
} while (rc != *shared_ptr && LF_BACKOFF());
|
||||
|
||||
if (rc == 0)
|
||||
{
|
||||
|
|
|
@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
|
|||
#include "univ.i"
|
||||
|
||||
#include "ut0ut.h"
|
||||
#include "my_cpu.h"
|
||||
|
||||
/** Execute a given function exactly once in a multi-threaded environment
|
||||
or wait for the function to be executed by another thread.
|
||||
|
@ -110,7 +111,7 @@ public:
|
|||
ut_error;
|
||||
}
|
||||
|
||||
UT_RELAX_CPU();
|
||||
MY_RELAX_CPU();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -52,35 +52,6 @@ Created 1/20/1994 Heikki Tuuri
|
|||
/** Time stamp */
|
||||
typedef time_t ib_time_t;
|
||||
|
||||
#ifdef HAVE_PAUSE_INSTRUCTION
|
||||
/* According to the gcc info page, asm volatile means that the
|
||||
instruction has important side-effects and must not be removed.
|
||||
Also asm volatile may trigger a memory barrier (spilling all registers
|
||||
to memory). */
|
||||
# ifdef __SUNPRO_CC
|
||||
# define UT_RELAX_CPU() asm ("pause" )
|
||||
# else
|
||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
|
||||
# endif /* __SUNPRO_CC */
|
||||
|
||||
#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
|
||||
# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
|
||||
#elif defined _WIN32
|
||||
/* In the Win32 API, the x86 PAUSE instruction is executed by calling
|
||||
the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
|
||||
independent way by using YieldProcessor. */
|
||||
# define UT_RELAX_CPU() YieldProcessor()
|
||||
#elif defined(__powerpc__) && defined __GLIBC__
|
||||
# include <sys/platform/ppc.h>
|
||||
# define UT_RELAX_CPU() __ppc_get_timebase()
|
||||
#else
|
||||
# define UT_RELAX_CPU() do { \
|
||||
volatile int32 volatile_var; \
|
||||
int32 oldval= 0; \
|
||||
my_atomic_cas32(&volatile_var, &oldval, 1); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#if defined (__GNUC__)
|
||||
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
|
||||
#elif defined (_MSC_VER)
|
||||
|
|
|
@ -293,7 +293,7 @@ ut_delay(
|
|||
UT_LOW_PRIORITY_CPU();
|
||||
|
||||
for (i = 0; i < delay * 50; i++) {
|
||||
UT_RELAX_CPU();
|
||||
MY_RELAX_CPU();
|
||||
UT_COMPILER_BARRIER();
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue