/* -*- mode: C; c-basic-offset: 4 -*- */ #ident "$Id$" #ident "Copyright (c) 2010 Tokutek Inc. All rights reserved." // Here are some timing numbers: // (Note: The not-quite-working version with cas can be found in r22519 of https://svn.tokutek.com/tokudb/toku/tokudb.2825/) It's about as fast as "Best cas".) // // On ramie (2.53GHz E5540) // Best nop time= 1.074300ns // Best cas time= 8.595600ns // Best mutex time= 19.340201ns // Best rwlock time= 34.024799ns // Best newbrt rwlock time= 38.680500ns // Best prelocked time= 2.148700ns // Best fair rwlock time= 45.127600ns // On laptop // Best nop time= 2.876000ns // Best cas time= 15.362500ns // Best mutex time= 51.951498ns // Best rwlock time= 97.721201ns // Best newbrt rwlock time=110.456800ns // Best prelocked time= 4.240100ns // Best fair rwlock time=113.119102ns // // Analysis: If the mutex can be prelocked (as cachetable does, it uses the same mutex for the cachetable and for the condition variable protecting the cache table) // then you can save quite a bit. What does the cachetable do? // During pin: (In the common case:) It grabs the mutex, grabs a read lock, and releases the mutex. // During unpin: It grabs the mutex, unlocks the rwlock lock in the pair, and releases the mutex. // Both actions must acquire a cachetable lock during that time, so definitely saves time to do it that way. #include #include #include #include #include #include #include #include #include #include #include #include #include #include "rwlock_condvar.h" static int verbose=1; static int timing_only=0; static void parse_args (int argc, const char *argv[]) { const char *progname = argv[0]; argc--; argv++; while (argc>0) { if (strcmp(argv[0], "-v")==0) { verbose++; } else if (strcmp(argv[0], "-q")==0) { verbose--; } else if (strcmp(argv[0], "--timing-only")==0) { timing_only=1; } else { fprintf(stderr, "Usage: %s {-q}* {-v}* {--timing-only}\n", progname); exit(1); } argc--; argv++; } } static const int T=6; static const int N=10000000; static double best_nop_time=1e12; static double best_fcall_time=1e12; static double best_cas_time=1e12; static double best_mutex_time=1e12; static double best_rwlock_time=1e12; static double best_newbrt_time=1e12; static double best_prelocked_time=1e12; static double best_cv_fair_rwlock_time=1e12; // fair from condition variables static double best_fair_rwlock_time=1e12; static double best_frwlock_time=1e12; static double best_frwlock_prelocked_time=1e12; static double mind(double a, double b) { if (a1) fprintf(stderr, "nop = %.6fns/(lock+unlock)\n", diff); best_nop_time=mind(best_nop_time,diff); } } // This function is defined so we can measure the cost of a function call. int fcall_nop (int i) __attribute__((__noinline__)); int fcall_nop (int i) { return i; } void time_fcall (void) __attribute((__noinline__)); void time_fcall (void) { struct timeval start,end; for (int t=0; t1) fprintf(stderr, "fcall = %.6fns/(lock+unlock)\n", diff); best_fcall_time=mind(best_fcall_time,diff); } } void time_cas (void) __attribute__((__noinline__)); void time_cas (void) { volatile int64_t tval = 0; struct timeval start,end; for (int t=0; t1) fprintf(stderr, "cas = %.6fns/(lock+unlock)\n", diff); best_cas_time=mind(best_cas_time,diff); } } void time_pthread_mutex (void) __attribute__((__noinline__)); void time_pthread_mutex (void) { pthread_mutex_t mutex; { int r = pthread_mutex_init(&mutex, NULL); assert(r==0); } struct timeval start,end; pthread_mutex_lock(&mutex); pthread_mutex_unlock(&mutex); for (int t=0; t1) fprintf(stderr, "pthread_mutex = %.6fns/(lock+unlock)\n", diff); best_mutex_time=mind(best_mutex_time,diff); } { int r = pthread_mutex_destroy(&mutex); assert(r==0); } } void time_pthread_rwlock (void) __attribute__((__noinline__)); void time_pthread_rwlock (void) { pthread_rwlock_t mutex; { int r = pthread_rwlock_init(&mutex, NULL); assert(r==0); } struct timeval start,end; pthread_rwlock_rdlock(&mutex); pthread_rwlock_unlock(&mutex); for (int t=0; t1) fprintf(stderr, "pthread_rwlock(r) = %.6fns/(lock+unlock)\n", diff); best_rwlock_time=mind(best_rwlock_time,diff); } { int r = pthread_rwlock_destroy(&mutex); assert(r==0); } } static void newbrt_rwlock_lock (RWLOCK rwlock, toku_mutex_t *mutex) { toku_mutex_lock(mutex); rwlock_read_lock(rwlock, mutex); toku_mutex_unlock(mutex); } static void newbrt_rwlock_unlock (RWLOCK rwlock, toku_mutex_t *mutex) { toku_mutex_lock(mutex); rwlock_read_unlock(rwlock); toku_mutex_unlock(mutex); } // Time the read lock that's in newbrt/rwlock.h void time_newbrt_rwlock (void) __attribute((__noinline__)); void time_newbrt_rwlock (void) { struct rwlock rwlock; toku_mutex_t external_mutex; toku_mutex_init(&external_mutex, NULL); rwlock_init(&rwlock); struct timeval start,end; newbrt_rwlock_lock(&rwlock, &external_mutex); newbrt_rwlock_unlock(&rwlock, &external_mutex); for (int t=0; t1) fprintf(stderr, "newbrt_rwlock(r) = %.6fns/(lock+unlock)\n", diff); best_newbrt_time=mind(best_newbrt_time,diff); } rwlock_destroy(&rwlock); toku_mutex_destroy(&external_mutex); } // Time the read lock that's in newbrt/rwlock.h, assuming the mutex is already held. void time_newbrt_prelocked_rwlock (void) __attribute__((__noinline__)); void time_newbrt_prelocked_rwlock (void) { struct rwlock rwlock; toku_mutex_t external_mutex; toku_mutex_init(&external_mutex, NULL); toku_mutex_lock(&external_mutex); rwlock_init(&rwlock); struct timeval start,end; rwlock_read_lock(&rwlock, &external_mutex); rwlock_read_unlock(&rwlock); for (int t=0; t1) fprintf(stderr, "pre_newbrt_rwlock(r) = %.6fns/(lock+unlock)\n", diff); best_prelocked_time=mind(best_prelocked_time,diff); } rwlock_destroy(&rwlock); toku_mutex_unlock(&external_mutex); toku_mutex_destroy(&external_mutex); } void time_toku_fair_rwlock (void) __attribute__((__noinline__)); void time_toku_fair_rwlock (void) { toku_fair_rwlock_t mutex; toku_fair_rwlock_init(&mutex); struct timeval start,end; toku_fair_rwlock_rdlock(&mutex); toku_fair_rwlock_unlock(&mutex); for (int t=0; t1) fprintf(stderr, "pthread_fair(r) = %.6fns/(lock+unlock)\n", diff); best_fair_rwlock_time=mind(best_fair_rwlock_time,diff); } toku_fair_rwlock_destroy(&mutex); } /* not static*/ void time_toku_cv_fair_rwlock(void) __attribute__((__noinline__)); void time_toku_cv_fair_rwlock(void) { toku_cv_fair_rwlock_t mutex; toku_cv_fair_rwlock_init(&mutex); struct timeval start,end; toku_cv_fair_rwlock_rdlock(&mutex); toku_cv_fair_rwlock_unlock(&mutex); for (int t=0; t1) fprintf(stderr, "pthread_cvfair(r) = %.6fns/(lock+unlock)\n", diff); best_cv_fair_rwlock_time=mind(best_cv_fair_rwlock_time,diff); } toku_cv_fair_rwlock_destroy(&mutex); } void time_frwlock_prelocked(void) __attribute__((__noinline__)); void time_frwlock_prelocked(void) { toku_mutex_t external_mutex; toku_mutex_init(&external_mutex, NULL); struct timeval start,end; toku::frwlock x; x.init(&external_mutex); toku_mutex_lock(&external_mutex); bool got_lock; x.read_lock(); x.read_unlock(); got_lock = x.try_read_lock(); invariant(got_lock); x.read_unlock(); x.write_lock(true); x.write_unlock(); got_lock = x.try_write_lock(true); invariant(got_lock); x.write_unlock(); for (int t=0; t1) fprintf(stderr, "frwlock_prelocked = %.6fns/(lock+unlock)\n", diff); best_frwlock_prelocked_time=mind(best_frwlock_prelocked_time,diff); } x.deinit(); toku_mutex_unlock(&external_mutex); toku_mutex_destroy(&external_mutex); } void time_frwlock(void) __attribute__((__noinline__)); void time_frwlock(void) { toku_mutex_t external_mutex; toku_mutex_init(&external_mutex, NULL); struct timeval start,end; toku::frwlock x; x.init(&external_mutex); toku_mutex_lock(&external_mutex); x.read_lock(); x.read_unlock(); toku_mutex_unlock(&external_mutex); for (int t=0; t1) fprintf(stderr, "frwlock = %.6fns/(lock+unlock)\n", diff); best_frwlock_time=mind(best_frwlock_time,diff); } x.deinit(); toku_mutex_destroy(&external_mutex); } #define N 6 #define T 150000 #define L 5 #define N_LOG_ENTRIES (L*N*4) static toku_fair_rwlock_t rwlock; static struct log_s { int threadid, loopid; char action; } actionlog[N_LOG_ENTRIES]; static int log_counter=0; static void logit (int threadid, int loopid, char action) { //printf("%d %d %c\n", threadid, loopid, action); int my_log_counter = toku_sync_fetch_and_add(&log_counter, 1); assert(my_log_counter=0); state++; if (state>reader_max) reader_max=state; break; case 'u': assert(tstate[actionlog[i].threadid]=='R'); tstate[actionlog[i].threadid]=0; assert(state>=0); state--; break; default: abort(); } } assert(reader_max>=expected_reader_parallelism_min); assert(reader_max<=expected_reader_parallelism_max); assert(writer_max==expected_writer_max_count); } static void test_rwlock_internal (void *(*start_th)(void*), int max_wr, int min_rd, int max_rd) { if (verbose>=2) printf("Running threads:\n"); log_counter=0; pthread_t threads[N]; int v[N]; toku_fair_rwlock_init(&rwlock); for (int i=0; i1) { for (int i=0; i2) printf("OK\n"); } static void test_rwlock (void) { test_rwlock_internal(start_thread, 1, 2, 3); for (int i=0; i<10; i++) { test_rwlock_internal(start_thread_random, 1, 0, N); } } int main (int argc, const char *argv[]) { parse_args(argc, argv); if (timing_only) { if (1) { // to make it easy to only time the templated frwlock time_nop(); time_fcall(); time_cas(); time_pthread_mutex(); time_pthread_rwlock(); time_newbrt_rwlock(); time_newbrt_prelocked_rwlock(); time_toku_cv_fair_rwlock(); time_toku_fair_rwlock(); } time_frwlock(); time_frwlock_prelocked(); if (verbose>0) { if (1) { // to make it easy to only time the templated frwlock printf("// Best nop time=%10.6fns\n", best_nop_time); printf("// Best fcall time=%10.6fns\n", best_fcall_time); printf("// Best cas time=%10.6fns\n", best_cas_time); printf("// Best mutex time=%10.6fns\n", best_mutex_time); printf("// Best rwlock time=%10.6fns\n", best_rwlock_time); printf("// Best newbrt rwlock time=%10.6fns\n", best_newbrt_time); printf("// Best prelocked time=%10.6fns\n", best_prelocked_time); printf("// Best fair cv rwlock time=%10.6fns\n", best_cv_fair_rwlock_time); printf("// Best fair fast rwlock time=%10.6fns\n", best_fair_rwlock_time); } printf("// Best frwlock time=%10.6fns\n", best_frwlock_time); printf("// Best frwlock_pre time=%10.6fns\n", best_frwlock_prelocked_time); } } else { test_rwlock(); } return 0; }