2007-11-14 17:58:38 +00:00
|
|
|
/* Time {m,l,s}fence vs.xchgl for a memory barrier. */
|
|
|
|
|
|
|
|
/* Timing numbers:
|
|
|
|
* Intel T2500 2GHZ
|
|
|
|
|
|
|
|
do1 9.0ns/loop
|
|
|
|
mfence: 29.0ns/loop (marginal cost= 20.0ns)
|
|
|
|
sfence: 17.3ns/loop (marginal cost= 8.3ns)
|
|
|
|
lfence: 23.6ns/loop (marginal cost= 14.6ns)
|
|
|
|
xchgl: 35.8ns/loop (marginal cost= 26.8ns)
|
|
|
|
|
|
|
|
* AMD Athlon 64 X2 Dual Core Processor 4200+
|
|
|
|
Timings are more crazy
|
|
|
|
|
|
|
|
do1 20.6ns/loop
|
|
|
|
mfence: 12.9ns/loop (marginal cost= -7.6ns)
|
|
|
|
sfence: 8.4ns/loop (marginal cost= -12.1ns)
|
|
|
|
lfence: 20.2ns/loop (marginal cost= -0.3ns)
|
|
|
|
xchgl: 16.6ns/loop (marginal cost= -3.9ns)
|
|
|
|
|
|
|
|
do1 13.0ns/loop
|
|
|
|
mfence: 25.6ns/loop (marginal cost= 12.6ns)
|
|
|
|
sfence: 21.0ns/loop (marginal cost= 8.1ns)
|
|
|
|
lfence: 12.9ns/loop (marginal cost= -0.1ns)
|
|
|
|
xchgl: 29.3ns/loop (marginal cost= 16.3ns)
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
enum { COUNT = 100000000 };
|
|
|
|
|
|
|
|
static inline void xchgl (void) {
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* According to the Intel Architecture Software Developer's
|
|
|
|
* Manual, Volume 3: System Programming Guide
|
|
|
|
* (http://www.intel.com/design/pro/manuals/243192.htm), page
|
|
|
|
* 7-6, "For the P6 family processors, locked operations
|
|
|
|
* serialize all outstanding load and store operations (that
|
|
|
|
* is, wait for them to complete)."
|
|
|
|
* Since xchg is locked by default, it is one way to do membar.
|
|
|
|
*/
|
|
|
|
int x=0, y;
|
|
|
|
asm volatile ("xchgl %0,%1" :"=r" (x) :"m" (y), "0" (x) :"memory");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void mfence (void) {
|
|
|
|
asm volatile ("mfence":::"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void lfence (void) {
|
|
|
|
asm volatile ("lfence":::"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void sfence (void) {
|
|
|
|
asm volatile ("sfence":::"memory");
|
|
|
|
}
|
|
|
|
|
2013-04-16 23:57:51 -04:00
|
|
|
int lock_for_lock_and_unlock;
|
|
|
|
static inline void lock_and_unlock (void) {
|
2013-04-16 23:57:51 -04:00
|
|
|
(void)__sync_lock_test_and_set(&lock_for_lock_and_unlock, 1);
|
2013-04-16 23:57:51 -04:00
|
|
|
__sync_lock_release(&lock_for_lock_and_unlock);
|
|
|
|
}
|
|
|
|
|
2007-11-14 17:58:38 +00:00
|
|
|
|
|
|
|
double tdiff (struct timeval *start, struct timeval *end) {
|
|
|
|
return ((end->tv_sec-start->tv_sec + 1e-6*(end->tv_usec + start->tv_usec))/COUNT)*1e9;
|
|
|
|
}
|
|
|
|
|
|
|
|
double nop_cost;
|
|
|
|
|
|
|
|
void do1 (volatile int *x) {
|
|
|
|
int i;
|
|
|
|
struct timeval start, end;
|
|
|
|
gettimeofday(&start, 0);
|
|
|
|
for (i=0; i<COUNT; i++) {
|
|
|
|
x[0]++;
|
|
|
|
x[1]++;
|
|
|
|
x[2]++;
|
|
|
|
x[3]++;
|
|
|
|
}
|
|
|
|
gettimeofday(&end, 0);
|
|
|
|
printf("do1 %6.1fns/loop\n", nop_cost=tdiff(&start, &end));
|
|
|
|
}
|
|
|
|
|
|
|
|
#define doit(name) void do ##name (volatile int *x) { \
|
|
|
|
int i; \
|
|
|
|
struct timeval start, end; \
|
|
|
|
gettimeofday(&start, 0); \
|
|
|
|
for (i=0; i<COUNT; i++) { \
|
|
|
|
x[0]++; \
|
|
|
|
x[1]++; \
|
|
|
|
name(); \
|
|
|
|
x[2]++; \
|
|
|
|
x[3]++; \
|
|
|
|
} \
|
|
|
|
gettimeofday(&end, 0); \
|
|
|
|
double this_cost = tdiff(&start, &end); \
|
2013-04-16 23:57:51 -04:00
|
|
|
printf("%15s:%6.1fns/loop (marginal cost=%6.1fns)\n", #name, this_cost, this_cost-nop_cost); \
|
2007-11-14 17:58:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
doit(mfence)
|
|
|
|
doit(lfence)
|
|
|
|
doit(sfence)
|
|
|
|
doit(xchgl)
|
2013-04-16 23:57:51 -04:00
|
|
|
doit(lock_and_unlock);
|
2007-11-14 17:58:38 +00:00
|
|
|
|
|
|
|
int main (int argc __attribute__((__unused__)),
|
|
|
|
char *argv[] __attribute__((__unused__))) {
|
|
|
|
int x[4];
|
|
|
|
int i;
|
2013-04-16 23:57:51 -04:00
|
|
|
for (i=0; i<4; i++) {
|
2007-11-14 17:58:38 +00:00
|
|
|
do1(x);
|
|
|
|
domfence(x);
|
|
|
|
dosfence(x);
|
|
|
|
dolfence(x);
|
|
|
|
doxchgl(x);
|
2013-04-16 23:57:51 -04:00
|
|
|
dolock_and_unlock(x);
|
2007-11-14 17:58:38 +00:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|