From 04f341a870e8bcdd75dbe941affd545ae77b5926 Mon Sep 17 00:00:00 2001 From: Dave Wells Date: Tue, 16 Apr 2013 23:58:56 -0400 Subject: [PATCH] modify recovery stress tests to occasionally crash in recovery [t:2028] git-svn-id: file:///svn/toku/tokudb@16282 c7de825b-a66e-492c-adef-691d508d4ae1 --- src/tests/recovery_fileops_stress.c | 35 +++++++++++++++++++------- src/tests/recovery_stress.c | 38 +++++++++++++++++++++++------ src/tests/run_stress_test.py | 2 -- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/src/tests/recovery_fileops_stress.c b/src/tests/recovery_fileops_stress.c index 68004a9c017..6a1138579e3 100644 --- a/src/tests/recovery_fileops_stress.c +++ b/src/tests/recovery_fileops_stress.c @@ -8,7 +8,6 @@ #include #include -static const int OPER_PER_STEP = 3; static const int NUM_DICTIONARIES = 100; //static const int NUM_DICTIONARIES = 3; static const char *table = "tbl"; @@ -18,7 +17,11 @@ DB_ENV *env; DB** db_array; DB* states; static const int percent_do_op = 20; +static const int percent_do_abort = 25; static const int commit_abort_ratio = 3; +static const int start_crashing_iter = 10; +// iterations_per_crash_in_recovery should be an odd number; +static const int iterations_per_crash_in_recovery = 7; char *state_db_name="states.db"; #define CREATED 0 @@ -30,16 +33,11 @@ char *state_db_name="states.db"; #define ABORT_TXN 1 static int commit_or_abort(void) { - int rval; -#if 1 - int i = random() % (commit_abort_ratio + 1); - rval = ( i < commit_abort_ratio ) ? COMMIT_TXN : ABORT_TXN; + int i = random() % 100; + int rval = ( i < percent_do_abort ) ? ABORT_TXN : COMMIT_TXN; if ( verbose ) { if ( rval == ABORT_TXN ) printf("%s : abort txn\n", __FILE__); } -#else - rval = COMMIT_TXN; -#endif return rval; } @@ -73,6 +71,7 @@ static int get_state(int db_num) { static int crash_timer; static void crash_it(void); +static void crash_it_callback_f(void*); static void set_crash_timer(void) { crash_timer = random() % (3 * NUM_DICTIONARIES); } @@ -239,6 +238,7 @@ static void do_random_fileops(void) } } + static void run_test(int iter){ u_int32_t recovery_flags = DB_INIT_LOG | DB_INIT_TXN; int r, i; @@ -254,6 +254,19 @@ static void run_test(int iter){ else recovery_flags += DB_RECOVER; + // crash somewhat frequently during recovery + // first, wait until after first crash + if ( iter > start_crashing_iter + 1 ) { + // every N cycles, crash in recovery + if ( (iter % iterations_per_crash_in_recovery) == 0 ) { + // crash at different places in recovery + if ( iter & 1 ) + db_env_set_recover_callback(crash_it_callback_f, NULL); + else + db_env_set_recover_callback2(crash_it_callback_f, NULL); + } + } + env_startup(recovery_flags); if ( verbose ) printf("%s : environment init\n", __FILE__); @@ -347,7 +360,7 @@ static void run_test(int iter){ // 2) during the next checkpoint // 3) after the next (final) checkpoint - if ( iter > 10 ) { + if ( iter >= start_crashing_iter ) { set_crash_timer(); } else { @@ -529,3 +542,7 @@ static void UU() crash_it(void) { fflush(stdout); fflush(stderr); } + +static void crash_it_callback_f(void *dummy UU()) { + crash_it(); +} diff --git a/src/tests/recovery_stress.c b/src/tests/recovery_stress.c index 4f7048ee990..0a372b9d0e1 100644 --- a/src/tests/recovery_stress.c +++ b/src/tests/recovery_stress.c @@ -13,6 +13,9 @@ static const u_int64_t max_windows_cachesize = 256 << 20; static const int NUM_DICTIONARIES = 1; static const int OPER_STEPS = 6; + +static const int ITERATIONS_PER_CRASH_IN_RECOVERY = 7; + typedef enum __recovery_stress_steps { PRE_PRE_STEP = 0, @@ -54,6 +57,10 @@ drop_dead(void) { fflush(stdout); } +static void drop_dead_callback_f(void *dummy UU()) { + drop_dead(); +} + static void verify (DICTIONARY dictionaries, int iter) { int i, key; DB *db; @@ -388,8 +395,7 @@ static void post_checkpoint_acts(ITER_SPEC spec) { return; } - -static void run_test (int iter, int die UU()) { +static void run_test (int iter) { u_int32_t flags = DB_DUP|DB_DUPSORT; int i, r; @@ -414,8 +420,29 @@ static void run_test (int iter, int die UU()) { if ( iter != 0 ) recovery_flags += DB_RECOVER; + // crash somewhat frequently during recovery + // first, wait until after the system is primed + if ( iter > ITERATIONS_PER_CRASH_IN_RECOVERY + 5 ) { + // every N cycles, crash in recovery + if ( (iter % ITERATIONS_PER_CRASH_IN_RECOVERY) == 0 ) { + // crash at different places in recovery + if ( iter & 1 ) + db_env_set_recover_callback(drop_dead_callback_f, NULL); + else + db_env_set_recover_callback2(drop_dead_callback_f, NULL); + } + } + env_startup(cachebytes, recovery_flags); + // logic below counts on a mapping of 'iter' to dictionary values + // since crashes in recovery do not modify dictionary values + // need to adjust 'iter' to be iter of successful recoveries + int crashes_in_recovery = (iter / ITERATIONS_PER_CRASH_IN_RECOVERY) - ( ( ITERATIONS_PER_CRASH_IN_RECOVERY + 5 ) / ITERATIONS_PER_CRASH_IN_RECOVERY ); + if ( crashes_in_recovery > 0 ) { + iter = iter - crashes_in_recovery; + } + // create array of dictionaries // for each dictionary verify previous iterations and perform new inserts @@ -453,7 +480,7 @@ static void run_test (int iter, int die UU()) { post_checkpoint_acts(&spec); // if requesting crash, randomly do other non-committed acts, then "drop_dead" - if (die && (iter > 0)) { + if (iter > 0) { if (verbose) printf("dying\n"); #if 0 // separate thread will perform random acts on other dictionaries (not 0) @@ -486,11 +513,10 @@ static void run_test (int iter, int die UU()) { static void do_args(int argc, char *argv[]); static int iter_arg = 0; -static int do_crash = 0; int test_main(int argc, char **argv) { do_args(argc, argv); - run_test(iter_arg,do_crash); + run_test(iter_arg); return 0; } @@ -512,8 +538,6 @@ static void do_args(int argc, char *argv[]) { } else if (strcmp(argv[0], "-i")==0) { argc--; argv++; iter_arg = atoi(argv[0]); - } else if (strcmp(argv[0], "-C")==0) { - do_crash = 1; } else { fprintf(stderr, "Unknown arg: %s\n", argv[0]); resultcode=1; diff --git a/src/tests/run_stress_test.py b/src/tests/run_stress_test.py index d0b7cdf0c45..e5bfcf54700 100755 --- a/src/tests/run_stress_test.py +++ b/src/tests/run_stress_test.py @@ -11,14 +11,12 @@ import optparse parser = optparse.OptionParser() parser.add_option('--test', dest='test', type='string', default=None, help="name of stress test to run") parser.add_option('--iterations', dest='iterations', type='int', default=1, help="Number of test iterations (default = 1)") -parser.add_option('--crash', dest='crash', action="store_true", default=False, help="Crash the DB every iteration (default = FALSE)") parser.add_option('--verbose', dest='verbose', action="store_true", default=False, help="Verbose printing (default = FALSE)") options, remainder = parser.parse_args() def run_test(): cmd = options.test if ( options.verbose ): cmd += ' -v' - if ( options.crash ): cmd += ' -C' for i in range(options.iterations): os.system(cmd + ' -i %d' % (i))