2008-01-24 15:10:32 +00:00
|
|
|
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
|
2007-11-29 14:18:54 +00:00
|
|
|
|
2007-11-14 17:58:38 +00:00
|
|
|
#if defined(__x86_64) || defined(__i386)
|
|
|
|
|
|
|
|
static inline void mfence (void) {
|
|
|
|
__asm__ volatile ("mfence":::"memory");
|
|
|
|
}
|
|
|
|
static inline void rfence (void) {
|
|
|
|
__asm__ volatile ("rfence":::"memory");
|
|
|
|
}
|
|
|
|
static inline void sfence (void) {
|
|
|
|
__asm__ volatile ("sfence":::"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
/* According to the Intel Architecture Software Developer's
|
|
|
|
* Manual, Volume 3: System Programming Guide
|
|
|
|
* (http://www.intel.com/design/pro/manuals/243192.htm), page 7-6,
|
|
|
|
* "For the P6 family processors, locked operations serialize all
|
|
|
|
* outstanding load and store operations (that is, wait for them to
|
|
|
|
* complete)."
|
|
|
|
*
|
|
|
|
* Bradley found that fence instructions is faster on an opteron
|
|
|
|
* mfence takes 8ns on a 1.5GHZ AMD64 (maybe this is an 801)
|
|
|
|
* sfence takes 5ns
|
|
|
|
* lfence takes 3ns
|
|
|
|
* xchgl takes 14ns
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline lock_xchgl(volatile int *ptr, int x)
|
|
|
|
{
|
|
|
|
__asm__("xchgl %0,%1" :"=r" (x) :"m" (*(ptr)), "0" (x) :"memory");
|
|
|
|
return x;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef volatile int SPINLOCK[1];
|
|
|
|
|
|
|
|
static inline void spin_init (SPINLOCK v) {
|
|
|
|
v[0] = 0;
|
|
|
|
mfence();
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void spin_lock (SPINLOCK v) {
|
|
|
|
while (lock_xchgl((int*)v, 1)!=0) {
|
|
|
|
while (v[0]); /* Spin using only reads. It would be better to use MCS locks, but this reduces bus traffic. */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
static inline void spin_unlock (SPINLOCK v) {
|
|
|
|
sfence(); // Want all previous stores to take place before we unlock.
|
|
|
|
v[0]=0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
#error Need to define architectur-specific stuff for other machines.
|
|
|
|
#endif
|