diff --git a/buildheader/db.h_4_1 b/buildheader/db.h_4_1 index c2bfec02280..fb51ad739bc 100644 --- a/buildheader/db.h_4_1 +++ b/buildheader/db.h_4_1 @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -275,6 +275,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -415,7 +417,8 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); - void* __toku_dummy0[16]; + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); + void* __toku_dummy0[15]; char __toku_dummy1[96]; void *api_internal; /* 32-bit offset=236 size=4, 64=bit offset=376 size=8 */ void* __toku_dummy2[5]; diff --git a/buildheader/db.h_4_3 b/buildheader/db.h_4_3 index 09c03de66e9..5a1dbb89992 100644 --- a/buildheader/db.h_4_3 +++ b/buildheader/db.h_4_3 @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -277,6 +277,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -425,7 +427,8 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); - void* __toku_dummy0[19]; + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); + void* __toku_dummy0[18]; char __toku_dummy1[96]; void *api_internal; /* 32-bit offset=248 size=4, 64=bit offset=400 size=8 */ void* __toku_dummy2[5]; diff --git a/buildheader/db.h_4_4 b/buildheader/db.h_4_4 index 9874dea0bdd..f7ee625f7e7 100644 --- a/buildheader/db.h_4_4 +++ b/buildheader/db.h_4_4 @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -277,6 +277,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -427,7 +429,8 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); - void* __toku_dummy0[21]; + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); + void* __toku_dummy0[20]; char __toku_dummy1[96]; void *api_internal; /* 32-bit offset=256 size=4, 64=bit offset=416 size=8 */ void* __toku_dummy2[5]; diff --git a/buildheader/db.h_4_5 b/buildheader/db.h_4_5 index 28ca215e55c..bddab95bced 100644 --- a/buildheader/db.h_4_5 +++ b/buildheader/db.h_4_5 @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -277,6 +277,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -427,7 +429,8 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); - void* __toku_dummy0[24]; + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); + void* __toku_dummy0[23]; char __toku_dummy1[96]; void *api_internal; /* 32-bit offset=268 size=4, 64=bit offset=440 size=8 */ void* __toku_dummy2[5]; diff --git a/buildheader/db.h_4_6 b/buildheader/db.h_4_6 index f72f26fa813..9c67b082b97 100644 --- a/buildheader/db.h_4_6 +++ b/buildheader/db.h_4_6 @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -278,6 +278,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -430,7 +432,8 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); - void* __toku_dummy1[28]; + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); + void* __toku_dummy1[27]; char __toku_dummy2[80]; void *api_internal; /* 32-bit offset=276 size=4, 64=bit offset=464 size=8 */ void* __toku_dummy3[5]; diff --git a/buildheader/make_db_h.c b/buildheader/make_db_h.c index 9b4361a975f..d03fa28b448 100644 --- a/buildheader/make_db_h.c +++ b/buildheader/make_db_h.c @@ -74,6 +74,8 @@ enum { TOKUDB_MVCC_DICTIONARY_TOO_NEW = -100010, TOKUDB_UPGRADE_FAILURE = -100011, TOKUDB_TRY_AGAIN = -100012, + TOKUDB_NEEDS_REPAIR = -100013, + TOKUDB_FINGERPRINT_ERROR = -100014, }; static void print_defines (void) { @@ -224,6 +226,8 @@ static void print_defines (void) { dodefine(TOKUDB_MVCC_DICTIONARY_TOO_NEW); dodefine(TOKUDB_UPGRADE_FAILURE); dodefine(TOKUDB_TRY_AGAIN); + dodefine(TOKUDB_NEEDS_REPAIR); + dodefine(TOKUDB_FINGERPRINT_ERROR); /* LOADER flags */ printf("/* LOADER flags */\n"); @@ -677,6 +681,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ "int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)", "int (*set_indexer)(DB*, DB_INDEXER*)", "void (*get_indexer)(DB*, DB_INDEXER**)", + "int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going)", NULL}; print_struct("db", 1, db_fields32, db_fields64, sizeof(db_fields32)/sizeof(db_fields32[0]), extra); } diff --git a/buildheader/tdb.h b/buildheader/tdb.h index 4ab8db082d1..4f98afee5ae 100644 --- a/buildheader/tdb.h +++ b/buildheader/tdb.h @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -278,6 +278,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -399,6 +401,7 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); void *api_internal; int (*close) (DB*, u_int32_t); int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t); diff --git a/include/db.h b/include/db.h index 4ab8db082d1..4f98afee5ae 100644 --- a/include/db.h +++ b/include/db.h @@ -72,9 +72,9 @@ typedef struct __toku_engine_status { u_int32_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */ u_int32_t max_waiters; /* The maximum of num_waiters_now. */ u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */ - u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held. */ - u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock */ - u_int64_t total_time_since_start; /* Total time since the lock was created. Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ + u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */ + u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */ + u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */ u_int32_t checkpoint_period; /* delay between automatic checkpoints */ u_int32_t checkpoint_footprint; /* state of checkpoint procedure */ char checkpoint_time_begin[26]; /* time of last checkpoint begin */ @@ -278,6 +278,8 @@ typedef enum { #define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010 #define TOKUDB_UPGRADE_FAILURE -100011 #define TOKUDB_TRY_AGAIN -100012 +#define TOKUDB_NEEDS_REPAIR -100013 +#define TOKUDB_FINGERPRINT_ERROR -100014 /* LOADER flags */ #define LOADER_USE_PUTS 1 typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val); @@ -399,6 +401,7 @@ struct __toku_db { int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION); int (*set_indexer)(DB*, DB_INDEXER*); void (*get_indexer)(DB*, DB_INDEXER**); + int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going); void *api_internal; int (*close) (DB*, u_int32_t); int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t); diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index 051c2370a2f..866796c324d 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -357,7 +357,9 @@ void mempool_release(struct mempool *); // release anything that was not release void toku_verify_all_in_mempool(BRTNODE node); -int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, int height, struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot, int recurse) +int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, int height, struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot, + int (*progress_callback)(void *extra, float progress), void *extra, + int recurse, int verbose, int keep_on_going) __attribute__ ((warn_unused_result)); void toku_brtheader_free (struct brt_header *h); diff --git a/newbrt/brt-verify.c b/newbrt/brt-verify.c index dc14b66044c..a616fa71dd0 100644 --- a/newbrt/brt-verify.c +++ b/newbrt/brt-verify.c @@ -1,6 +1,6 @@ /* -*- mode: C; c-basic-offset: 4 -*- */ #ident "$Id$" -#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved." +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." /* Verify a BRT. */ @@ -15,9 +15,10 @@ #include "includes.h" -static int verify_local_fingerprint (BRTNODE node) __attribute__ ((warn_unused_result)); +static int verify_local_fingerprint (BRTNODE node, int verbose) __attribute__ ((warn_unused_result)); -static int verify_local_fingerprint (BRTNODE node) { +static int +verify_local_fingerprint (BRTNODE node, int verbose) { u_int32_t fp=0; int i; int r = 0; @@ -28,8 +29,10 @@ static int verify_local_fingerprint (BRTNODE node) { fp += node->rand4fingerprint * toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen); }); if (fp!=node->local_fingerprint) { - fprintf(stderr, "%s:%d local fingerprints don't match\n", __FILE__, __LINE__); - r = -200001; + if (verbose) { + fprintf(stderr, "%s:%d local fingerprints don't match\n", __FILE__, __LINE__); + } + r = TOKUDB_FINGERPRINT_ERROR; } } else { toku_verify_or_set_counts(node, FALSE); @@ -37,21 +40,26 @@ static int verify_local_fingerprint (BRTNODE node) { return r; } -static int compare_pairs (BRT brt, struct kv_pair *a, struct kv_pair *b) { +static int +compare_pairs (BRT brt, struct kv_pair *a, struct kv_pair *b) { DBT x,y; int cmp = brt->compare_fun(brt->db, toku_fill_dbt(&x, kv_pair_key(a), kv_pair_keylen(a)), toku_fill_dbt(&y, kv_pair_key(b), kv_pair_keylen(b))); return cmp; } -static int compare_leafentries (BRT brt, LEAFENTRY a, LEAFENTRY b) { + +static int +compare_leafentries (BRT brt, LEAFENTRY a, LEAFENTRY b) { DBT x,y; int cmp = brt->compare_fun(brt->db, toku_fill_dbt(&x, le_key(a), le_keylen(a)), toku_fill_dbt(&y, le_key(b), le_keylen(b))); return cmp; } -static int compare_pair_to_leafentry (BRT brt, struct kv_pair *a, LEAFENTRY b) { + +static int +compare_pair_to_leafentry (BRT brt, struct kv_pair *a, LEAFENTRY b) { DBT x,y; int cmp = brt->compare_fun(brt->db, toku_fill_dbt(&x, kv_pair_key(a), kv_pair_keylen(a)), @@ -59,10 +67,67 @@ static int compare_pair_to_leafentry (BRT brt, struct kv_pair *a, LEAFENTRY b) { return cmp; } -int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, int height, - struct kv_pair *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) - struct kv_pair *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) - int recurse) +static int +compare_pair_to_key (BRT brt, struct kv_pair *a, bytevec key, ITEMLEN keylen) { + DBT x, y; + int cmp = brt->compare_fun(brt->db, + toku_fill_dbt(&x, kv_pair_key(a), kv_pair_keylen(a)), + toku_fill_dbt(&y, key, keylen)); + return cmp; +} + +static int +verify_msg_in_child_buffer(BRT brt, int type, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot) + __attribute__((warn_unused_result)); + +static int +verify_msg_in_child_buffer(BRT brt, int type, bytevec key, ITEMLEN keylen, bytevec UU(data), ITEMLEN UU(datalen), XIDS UU(xids), struct kv_pair *lesser_pivot, struct kv_pair *greatereq_pivot) { + int result = 0; + switch (type) { + case BRT_INSERT: + case BRT_INSERT_NO_OVERWRITE: + case BRT_DELETE_ANY: + case BRT_ABORT_ANY: + case BRT_COMMIT_ANY: + // verify key in bounds + if (lesser_pivot) { + int compare = compare_pair_to_key(brt, lesser_pivot, key, keylen); + if (compare >= 0) + result = EINVAL; + } + if (result == 0 && greatereq_pivot) { + int compare = compare_pair_to_key(brt, greatereq_pivot, key, keylen); + if (compare < 0) + result = EINVAL; + } + break; + } + return result; +} + +static LEAFENTRY +get_ith_leafentry (BRTNODE node, int i) { + OMTVALUE le_v; + int r = toku_omt_fetch(node->u.l.buffer, i, &le_v, NULL); + invariant(r == 0); // this is a bad failure if it happens. + return (LEAFENTRY)le_v; +} + +#define VERIFY_ASSERTION(predicate, i, string) ({ \ + if(!(predicate)) { \ + if (verbose) { \ + fprintf(stderr, "%s:%d: Looking at child %d of block %" PRId64 ": %s\n", __FILE__, __LINE__, i, blocknum.b, string); \ + } \ + result = TOKUDB_NEEDS_REPAIR; \ + if (!keep_going_on_failure) goto done; \ + }}) + +int +toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, int height, + struct kv_pair *lesser_pivot, // Everything in the subtree should be > lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) + struct kv_pair *greatereq_pivot, // Everything in the subtree should be <= lesser_pivot. (lesser_pivot==NULL if there is no lesser pivot.) + int (*progress_callback)(void *extra, float progress), void *progress_extra, + int recurse, int verbose, int keep_going_on_failure) { int result=0; BRTNODE node; @@ -71,129 +136,101 @@ int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, int height, { int r = toku_cachetable_get_and_pin(brt->cf, blocknum, fullhash, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt->h); - if (r) return r; + assert_zero(r); // this is a bad failure if it happens. } //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); - node=node_v; - if (node->fullhash!=fullhash) { - fprintf(stderr, "%s:%d fullhash does not match\n", __FILE__, __LINE__); - return -200001; - } - if (height==-1) { - height = node->height; - } - if (node->height !=height) { - fprintf(stderr, "%s:%d node->height=%d height=%d\n", __FILE__, __LINE__, node->height, height); - return -200001; - } + node = node_v; + assert(node->fullhash == fullhash); // this is a bad failure if wrong + if (height >= 0) + invariant(height == node->height); // this is a bad failure if wrong { - int r = verify_local_fingerprint(node); - if (r) result=r; + int r = verify_local_fingerprint(node, verbose); + if (r) { + result = r; + if (!keep_going_on_failure) goto done; + } } - if (node->height>0) { + if (node->height > 0) { // Verify that all the pivot keys are in order. - for (int i=0; iu.n.n_children-2; i++) { + for (int i = 0; i < node->u.n.n_children-2; i++) { int compare = compare_pairs(brt, node->u.n.childkeys[i], node->u.n.childkeys[i+1]); - if (compare>=0) { - fprintf(stderr, "%s:%d The %dth value is >= the %dth value in block %" PRId64 "\n", __FILE__, __LINE__, - i, i+1, blocknum.b); - result = -200001; - } + VERIFY_ASSERTION(compare < 0, i, "Value is >= the next value"); } // Verify that all the pivot keys are lesser_pivot < pivot <= greatereq_pivot - for (int i=0; iu.n.n_children-1; i++) { + for (int i = 0; i < node->u.n.n_children-1; i++) { if (lesser_pivot) { int compare = compare_pairs(brt, lesser_pivot, node->u.n.childkeys[i]); - if (compare>=0) { - fprintf(stderr, "%s:%d The %dth pivot is >= the previous in block %" PRId64 "\n", __FILE__, __LINE__, - i, blocknum.b); - result = -200001; - } + VERIFY_ASSERTION(compare < 0, i, "Pivot is >= the lower-bound pivot"); } if (greatereq_pivot) { int compare = compare_pairs(brt, greatereq_pivot, node->u.n.childkeys[i]); - if (compare < 0) { - fprintf(stderr, "%s:%d The %dth pivot is < the next in block %" PRId64 "\n", __FILE__, __LINE__, - i, blocknum.b); - result = -200001; - } + VERIFY_ASSERTION(compare >= 0, i, "Pivot is < the upper-bound pivot"); } } // Verify that messages in the buffers are in the right place. - {/*nothing*/} // To do later. + for (int i = 0; i < node->u.n.n_children; i++) { + FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid, + { + int r = verify_msg_in_child_buffer(brt, type, key, keylen, data, datalen, xid, + (i==0) ? lesser_pivot : node->u.n.childkeys[i-1], + (i==node->u.n.n_children-1) ? greatereq_pivot : node->u.n.childkeys[i]); + VERIFY_ASSERTION(r==0, i, "A message in the buffer is out of place"); + }); + } // Verify that the subtrees have the right properties. if (recurse) { - for (int i=0; iu.n.n_children; i++) { - int r = toku_verify_brtnode(brt, BNC_BLOCKNUM(node, i), height-1, + for (int i = 0; i < node->u.n.n_children; i++) { + int r = toku_verify_brtnode(brt, BNC_BLOCKNUM(node, i), node->height-1, (i==0) ? lesser_pivot : node->u.n.childkeys[i-1], (i==node->u.n.n_children-1) ? greatereq_pivot : node->u.n.childkeys[i], - recurse); - if (r) result=r; + progress_callback, progress_extra, + recurse, verbose, keep_going_on_failure); + if (r) { + result = r; + if (!keep_going_on_failure || result != TOKUDB_NEEDS_REPAIR) goto done; + } } } } else { /* It's a leaf. Make sure every leaf value is between the pivots, and that the leaf values are sorted. */ - for (u_int32_t i=0; iu.l.buffer); i++) { - OMTVALUE le_v; - { - int r = toku_omt_fetch(node->u.l.buffer, i, &le_v, NULL); - if (r) { - fprintf(stderr, "%s:%d Could not fetch value from OMT, r=%d\n", __FILE__, __LINE__, r); - result = r; - } - } - LEAFENTRY le = le_v; - + for (u_int32_t i = 0; i < toku_omt_size(node->u.l.buffer); i++) { + LEAFENTRY le = get_ith_leafentry(node, i); if (lesser_pivot) { int compare = compare_pair_to_leafentry(brt, lesser_pivot, le); - if (compare>=0) { - fprintf(stderr, "%s:%d The %dth leafentry key is >= the previous pivot in block %" PRId64 "\n", __FILE__, __LINE__, - i, blocknum.b); - result = -200001; - } + VERIFY_ASSERTION(compare < 0, i, "The leafentry is >= the lower-bound pivot"); } if (greatereq_pivot) { int compare = compare_pair_to_leafentry(brt, greatereq_pivot, le); - if (compare<0) { - fprintf(stderr, "%s:%d The %dth leafentry key is < the next pivot in block %" PRId64 "\n", __FILE__, __LINE__, - i, blocknum.b); - result = -200001; - } - + VERIFY_ASSERTION(compare >= 0, i, "The leafentry is < the upper-bound pivot"); } - if (0u.l.buffer, i-1, &prev_le_v, NULL); - assert(r==0); - LEAFENTRY prev_le = prev_le_v; + if (0 < i) { + LEAFENTRY prev_le = get_ith_leafentry(node, i-1); int compare = compare_leafentries(brt, prev_le, le); - if (compare>=0) { - fprintf(stderr, "%s:%d The %dth leafentry key is >= the previous leafentry block %" PRId64 "\n", __FILE__, __LINE__, - i, blocknum.b); - result = -200001; - } + VERIFY_ASSERTION(compare < 0, i, "Adjacent leafentries are out of order"); } } } + done: { int r = toku_cachetable_unpin(brt->cf, blocknum, fullhash, CACHETABLE_CLEAN, 0); - if (r) { - fprintf(stderr, "%s:%d could not unpin\n", __FILE__, __LINE__); - result = r; - } + assert_zero(r); // this is a bad failure if it happens. } + + if (result == 0 && progress_callback) + result = progress_callback(progress_extra, 0.0); + return result; } -int toku_verify_brt (BRT brt) { - CACHEKEY *rootp; +int +toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_on_going) { assert(brt->h); u_int32_t root_hash; - rootp = toku_calculate_root_offset_pointer(brt, &root_hash); + CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &root_hash); int n_pinned_before = toku_cachefile_count_pinned(brt->cf, 0); - int r = toku_verify_brtnode(brt, *rootp, -1, NULL, NULL, 1); + int r = toku_verify_brtnode(brt, *rootp, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going); int n_pinned_after = toku_cachefile_count_pinned(brt->cf, 0); if (n_pinned_before!=n_pinned_after) {// this may stop working if we release the ydb lock (in some future version of the code). fprintf(stderr, "%s:%d n_pinned_before=%d n_pinned_after=%d\n", __FILE__, __LINE__, n_pinned_before, n_pinned_after); @@ -201,3 +238,8 @@ int toku_verify_brt (BRT brt) { return r; } +int +toku_verify_brt (BRT brt) { + return toku_verify_brt_with_progress(brt, NULL, NULL, 0, 0); +} + diff --git a/newbrt/brt.c b/newbrt/brt.c index e170327c760..28b8789dd13 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -5346,7 +5346,7 @@ toku_dump_brtnode (FILE *file, BRT brt, BLOCKNUM blocknum, int depth, struct kv_ fprintf(file, "%s:%d pin %p\n", __FILE__, __LINE__, node_v); node=node_v; lazy_assert(node->fullhash==fullhash); - result=toku_verify_brtnode(brt, blocknum, -1, lorange, hirange, 0); + result=toku_verify_brtnode(brt, blocknum, -1, lorange, hirange, NULL, NULL, 0, 1, 0); fprintf(file, "%*sNode=%p\n", depth, "", node); if (node->height>0) { fprintf(file, "%*sNode %"PRId64" nodesize=%u height=%d n_children=%d n_bytes_in_buffers=%u keyrange=%s %s\n", diff --git a/newbrt/brt.h b/newbrt/brt.h index 88e5d031913..1d4db165894 100644 --- a/newbrt/brt.h +++ b/newbrt/brt.h @@ -131,6 +131,7 @@ int toku_brt_create_cachetable(CACHETABLE *t, long cachesize, LSN initial_lsn, T extern int toku_brt_debug_mode; int toku_verify_brt (BRT brt) __attribute__ ((warn_unused_result)); +int toku_verify_brt_with_progress (BRT brt, int (*progress_callback)(void *extra, float progress), void *extra, int verbose, int keep_going) __attribute__ ((warn_unused_result)); //int show_brt_blocknumbers(BRT); diff --git a/newbrt/tests/verify-bad-pivots.c b/newbrt/tests/verify-bad-pivots.c new file mode 100644 index 00000000000..2f92830fcc8 --- /dev/null +++ b/newbrt/tests/verify-bad-pivots.c @@ -0,0 +1,187 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with bad pivots and check that brt->verify finds them + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { + for (int i = 0; i < n; i++) { + int k = htonl(seq + i); + int v = seq + i; + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); + } + *minkey = htonl(seq); + *maxkey = htonl(seq + n - 1); +} + +static UU() void +insert_into_child_buffer(BRTNODE node, int childnum, int minkey, int maxkey) { + for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { + unsigned int key = htonl(val); + DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); + DBT theval; toku_fill_dbt(&theval, &val, sizeof val); + toku_brt_append_to_child_buffer(node, childnum, BRT_INSERT, xids_get_root_xids(), &thekey, &theval); + } +} + +static BRTNODE +make_tree(BRT brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { + BRTNODE node; + if (height == 0) { + node = make_node(brt, 0); + populate_leaf(node, *seq, nperleaf, minkey, maxkey); + *seq += nperleaf; + } else { + node = make_node(brt, height); + int minkeys[fanout], maxkeys[fanout]; + for (int childnum = 0; childnum < fanout; childnum++) { + BRTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + if (childnum == 0) + toku_brt_nonleaf_append_child(node, child, NULL, 0); + else { + int k = minkeys[childnum]; // use the min key of the right subtree, which creates a broken tree + struct kv_pair *pivotkey = kv_pair_malloc(&k, sizeof k, NULL, 0); + toku_brt_nonleaf_append_child(node, child, pivotkey, sizeof k); + } + int r = toku_unpin_brtnode(brt, child); + assert(r == 0); + } + *minkey = minkeys[0]; + *maxkey = maxkeys[0]; + for (int i = 1; i < fanout; i++) { + if (memcmp(minkey, &minkeys[i], sizeof minkeys[i]) > 0) + *minkey = minkeys[i]; + if (memcmp(maxkey, &maxkeys[i], sizeof maxkeys[i]) < 0) + *maxkey = maxkeys[i]; + } + } + return node; +} + +static UU() void +deleted_row(UU() DB *db, UU() DBT *key, UU() DBT *val) { +} + +static void +test_make_tree(int height, int fanout, int nperleaf, int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // make a tree + int seq = 0, minkey, maxkey; + BRTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int height = 1; + int fanout = 2; + int nperleaf = 8; + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--height") == 0 && i+1 < argc) { + height = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--fanout") == 0 && i+1 < argc) { + fanout = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--nperleaf") == 0 && i+1 < argc) { + nperleaf = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_make_tree(height, fanout, nperleaf, do_verify); + return 0; +} diff --git a/newbrt/tests/verify-dup-in-leaf.c b/newbrt/tests/verify-dup-in-leaf.c new file mode 100644 index 00000000000..79aa0042184 --- /dev/null +++ b/newbrt/tests/verify-dup-in-leaf.c @@ -0,0 +1,119 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with a single leaf node containing duplicate keys +// check that brt verify finds them + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int k, int v) { + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); +} + +static void +test_dup_in_leaf(int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + BRTNODE newroot = make_node(brt, 0); + populate_leaf(newroot, htonl(2), 1); + populate_leaf(newroot, htonl(2), 2); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_dup_in_leaf(do_verify); + return 0; +} diff --git a/newbrt/tests/verify-dup-pivots.c b/newbrt/tests/verify-dup-pivots.c new file mode 100644 index 00000000000..62b90a2eaf4 --- /dev/null +++ b/newbrt/tests/verify-dup-pivots.c @@ -0,0 +1,187 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with duplicate pivots and check that brt->verify finds them + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { + for (int i = 0; i < n; i++) { + int k = htonl(seq + i); + int v = seq + i; + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); + } + *minkey = htonl(seq); + *maxkey = htonl(seq + n - 1); +} + +static UU() void +insert_into_child_buffer(BRTNODE node, int childnum, int minkey, int maxkey) { + for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { + unsigned int key = htonl(val); + DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); + DBT theval; toku_fill_dbt(&theval, &val, sizeof val); + toku_brt_append_to_child_buffer(node, childnum, BRT_INSERT, xids_get_root_xids(), &thekey, &theval); + } +} + +static BRTNODE +make_tree(BRT brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { + BRTNODE node; + if (height == 0) { + node = make_node(brt, 0); + populate_leaf(node, *seq, nperleaf, minkey, maxkey); + *seq += nperleaf; + } else { + node = make_node(brt, height); + int minkeys[fanout], maxkeys[fanout]; + for (int childnum = 0; childnum < fanout; childnum++) { + BRTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + if (childnum == 0) + toku_brt_nonleaf_append_child(node, child, NULL, 0); + else { + int k = maxkeys[0]; // use duplicate pivots, should result in a broken tree + struct kv_pair *pivotkey = kv_pair_malloc(&k, sizeof k, NULL, 0); + toku_brt_nonleaf_append_child(node, child, pivotkey, sizeof k); + } + int r = toku_unpin_brtnode(brt, child); + assert(r == 0); + } + *minkey = minkeys[0]; + *maxkey = maxkeys[0]; + for (int i = 1; i < fanout; i++) { + if (memcmp(minkey, &minkeys[i], sizeof minkeys[i]) > 0) + *minkey = minkeys[i]; + if (memcmp(maxkey, &maxkeys[i], sizeof maxkeys[i]) < 0) + *maxkey = maxkeys[i]; + } + } + return node; +} + +static UU() void +deleted_row(UU() DB *db, UU() DBT *key, UU() DBT *val) { +} + +static void +test_make_tree(int height, int fanout, int nperleaf, int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // make a tree + int seq = 0, minkey, maxkey; + BRTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int height = 1; + int fanout = 3; + int nperleaf = 8; + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--height") == 0 && i+1 < argc) { + height = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--fanout") == 0 && i+1 < argc) { + fanout = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--nperleaf") == 0 && i+1 < argc) { + nperleaf = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_make_tree(height, fanout, nperleaf, do_verify); + return 0; +} diff --git a/newbrt/tests/verify-misrouted-msgs.c b/newbrt/tests/verify-misrouted-msgs.c new file mode 100644 index 00000000000..9a30e6df412 --- /dev/null +++ b/newbrt/tests/verify-misrouted-msgs.c @@ -0,0 +1,191 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with misrouted messages in the child buffers. +// check that brt verify finds them. + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { + for (int i = 0; i < n; i++) { + int k = htonl(seq + i); + int v = seq + i; + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); + } + *minkey = htonl(seq); + *maxkey = htonl(seq + n - 1); +} + +static void +insert_into_child_buffer(BRTNODE node, int childnum, int minkey, int maxkey) { + int k = htonl(maxkey); + maxkey = htonl(k+1); + for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { + unsigned int key = htonl(val); + DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); + DBT theval; toku_fill_dbt(&theval, &val, sizeof val); + toku_brt_append_to_child_buffer(node, childnum, BRT_INSERT, xids_get_root_xids(), &thekey, &theval); + } +} + +static BRTNODE +make_tree(BRT brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { + BRTNODE node; + if (height == 0) { + node = make_node(brt, 0); + populate_leaf(node, *seq, nperleaf, minkey, maxkey); + *seq += nperleaf; + } else { + node = make_node(brt, height); + int minkeys[fanout], maxkeys[fanout]; + for (int childnum = 0; childnum < fanout; childnum++) { + BRTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + if (childnum == 0) + toku_brt_nonleaf_append_child(node, child, NULL, 0); + else { + int k = maxkeys[childnum-1]; // use the max of the left tree + struct kv_pair *pivotkey = kv_pair_malloc(&k, sizeof k, NULL, 0); + toku_brt_nonleaf_append_child(node, child, pivotkey, sizeof k); + } + int r = toku_unpin_brtnode(brt, child); + assert(r == 0); + insert_into_child_buffer(node, childnum, minkeys[childnum], maxkeys[childnum]); + } + *minkey = minkeys[0]; + *maxkey = maxkeys[0]; + for (int i = 1; i < fanout; i++) { + if (memcmp(minkey, &minkeys[i], sizeof minkeys[i]) > 0) + *minkey = minkeys[i]; + if (memcmp(maxkey, &maxkeys[i], sizeof maxkeys[i]) < 0) + *maxkey = maxkeys[i]; + } + } + return node; +} + +static UU() void +deleted_row(UU() DB *db, UU() DBT *key, UU() DBT *val) { +} + +static void +test_make_tree(int height, int fanout, int nperleaf, int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // make a tree + int seq = 0, minkey, maxkey; + BRTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int height = 1; + int fanout = 2; + int nperleaf = 8; + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--height") == 0 && i+1 < argc) { + height = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--fanout") == 0 && i+1 < argc) { + fanout = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--nperleaf") == 0 && i+1 < argc) { + nperleaf = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_make_tree(height, fanout, nperleaf, do_verify); + return 0; +} diff --git a/newbrt/tests/verify-unsorted-leaf.c b/newbrt/tests/verify-unsorted-leaf.c new file mode 100644 index 00000000000..948cabf7849 --- /dev/null +++ b/newbrt/tests/verify-unsorted-leaf.c @@ -0,0 +1,119 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with a single leaf node containing unsorted keys +// check that brt verify finds them + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int k, int v) { + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); +} + +static void +test_dup_in_leaf(int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + BRTNODE newroot = make_node(brt, 0); + populate_leaf(newroot, htonl(2), 1); + populate_leaf(newroot, htonl(1), 2); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_dup_in_leaf(do_verify); + return 0; +} diff --git a/newbrt/tests/verify-unsorted-pivots.c b/newbrt/tests/verify-unsorted-pivots.c new file mode 100644 index 00000000000..19c4b9ef2d7 --- /dev/null +++ b/newbrt/tests/verify-unsorted-pivots.c @@ -0,0 +1,187 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2011 Tokutek Inc. All rights reserved." + +// generate a tree with unsorted pivots and check that brt->verify finds them + +#include "includes.h" +#include "test.h" + +static BRTNODE +make_node(BRT brt, int height) { + BRTNODE node = NULL; + int r = toku_create_new_brtnode(brt, &node, height, 0); + assert(r == 0); + return node; +} + +static void +append_leaf(BRTNODE leafnode, void *key, size_t keylen, void *val, size_t vallen) { + assert(leafnode->height == 0); + + DBT thekey; toku_fill_dbt(&thekey, key, keylen); + DBT theval; toku_fill_dbt(&theval, val, vallen); + + // get an index that we can use to create a new leaf entry + uint32_t idx = toku_omt_size(leafnode->u.l.buffer); + + // apply an insert to the leaf node + BRT_MSG_S cmd = { BRT_INSERT, xids_get_root_xids(), .u.id = { &thekey, &theval } }; + int r = brt_leaf_apply_cmd_once(leafnode, &cmd, idx, NULL, NULL); + assert(r == 0); + + // dont forget to dirty the node + leafnode->dirty = 1; +} + +static void +populate_leaf(BRTNODE leafnode, int seq, int n, int *minkey, int *maxkey) { + for (int i = 0; i < n; i++) { + int k = htonl(seq + i); + int v = seq + i; + append_leaf(leafnode, &k, sizeof k, &v, sizeof v); + } + *minkey = htonl(seq); + *maxkey = htonl(seq + n - 1); +} + +static UU() void +insert_into_child_buffer(BRTNODE node, int childnum, int minkey, int maxkey) { + for (unsigned int val = htonl(minkey); val <= htonl(maxkey); val++) { + unsigned int key = htonl(val); + DBT thekey; toku_fill_dbt(&thekey, &key, sizeof key); + DBT theval; toku_fill_dbt(&theval, &val, sizeof val); + toku_brt_append_to_child_buffer(node, childnum, BRT_INSERT, xids_get_root_xids(), &thekey, &theval); + } +} + +static BRTNODE +make_tree(BRT brt, int height, int fanout, int nperleaf, int *seq, int *minkey, int *maxkey) { + BRTNODE node; + if (height == 0) { + node = make_node(brt, 0); + populate_leaf(node, *seq, nperleaf, minkey, maxkey); + *seq += nperleaf; + } else { + node = make_node(brt, height); + int minkeys[fanout], maxkeys[fanout]; + for (int childnum = 0; childnum < fanout; childnum++) { + BRTNODE child = make_tree(brt, height-1, fanout, nperleaf, seq, &minkeys[childnum], &maxkeys[childnum]); + if (childnum == 0) + toku_brt_nonleaf_append_child(node, child, NULL, 0); + else { + int k = minkeys[fanout - childnum - 1]; // use unsorted pivots + struct kv_pair *pivotkey = kv_pair_malloc(&k, sizeof k, NULL, 0); + toku_brt_nonleaf_append_child(node, child, pivotkey, sizeof k); + } + int r = toku_unpin_brtnode(brt, child); + assert(r == 0); + } + *minkey = minkeys[0]; + *maxkey = maxkeys[0]; + for (int i = 1; i < fanout; i++) { + if (memcmp(minkey, &minkeys[i], sizeof minkeys[i]) > 0) + *minkey = minkeys[i]; + if (memcmp(maxkey, &maxkeys[i], sizeof maxkeys[i]) < 0) + *maxkey = maxkeys[i]; + } + } + return node; +} + +static UU() void +deleted_row(UU() DB *db, UU() DBT *key, UU() DBT *val) { +} + +static void +test_make_tree(int height, int fanout, int nperleaf, int do_verify) { + int r; + + // cleanup + char fname[]= __FILE__ ".brt"; + r = unlink(fname); + assert(r == 0 || (r == -1 && errno == ENOENT)); + + // create a cachetable + CACHETABLE ct = NULL; + r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); + assert(r == 0); + + // create the brt + TOKUTXN null_txn = NULL; + DB *null_db = NULL; + BRT brt = NULL; + r = toku_open_brt(fname, 1, &brt, 1024, ct, null_txn, toku_builtin_compare_fun, null_db); + assert(r == 0); + + // make a tree + int seq = 0, minkey, maxkey; + BRTNODE newroot = make_tree(brt, height, fanout, nperleaf, &seq, &minkey, &maxkey); + + // discard the old root block + u_int32_t fullhash = 0; + CACHEKEY *rootp; + rootp = toku_calculate_root_offset_pointer(brt, &fullhash); + + // set the new root to point to the new tree + *rootp = newroot->thisnodename; + + // unpin the new root + r = toku_unpin_brtnode(brt, newroot); + assert(r == 0); + + if (do_verify) { + r = toku_verify_brt(brt); + assert(r != 0); + } + + // flush to the file system + r = toku_close_brt(brt, 0); + assert(r == 0); + + // shutdown the cachetable + r = toku_cachetable_close(&ct); + assert(r == 0); +} + +static int +usage(void) { + return 1; +} + +int +test_main (int argc , const char *argv[]) { + int height = 1; + int fanout = 3; + int nperleaf = 8; + int do_verify = 1; + for (int i = 1; i < argc; i++) { + const char *arg = argv[i]; + if (strcmp(arg, "-v") == 0) { + verbose++; + continue; + } + if (strcmp(arg, "-q") == 0) { + verbose = 0; + continue; + } + if (strcmp(arg, "--height") == 0 && i+1 < argc) { + height = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--fanout") == 0 && i+1 < argc) { + fanout = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--nperleaf") == 0 && i+1 < argc) { + nperleaf = atoi(argv[++i]); + continue; + } + if (strcmp(arg, "--verify") == 0 && i+1 < argc) { + do_verify = atoi(argv[++i]); + continue; + } + return usage(); + } + test_make_tree(height, fanout, nperleaf, do_verify); + return 0; +} diff --git a/src/ydb.c b/src/ydb.c index 50b93d6164b..d44900a1192 100644 --- a/src/ydb.c +++ b/src/ydb.c @@ -5805,6 +5805,31 @@ locked_db_get_indexer(DB *db, DB_INDEXER **indexer_ptr) { toku_ydb_lock(); *indexer_ptr = toku_db_get_indexer(db); toku_ydb_unlock(); } +struct ydb_verify_context { + int (*progress_callback)(void *extra, float progress); + void *progress_extra; +}; + +static int +ydb_verify_progress_callback(void *extra, float progress) { + struct ydb_verify_context *context = (struct ydb_verify_context *) extra; + toku_ydb_unlock_and_yield(1000); + int r = 0; + if (context->progress_callback) + r = context->progress_callback(context->progress_extra, progress); + toku_ydb_lock(); + return r; +} + +static int +locked_db_verify_with_progress(DB *db, int (*progress_callback)(void *extra, float progress), void *progress_extra, int verbose, int keep_going) { + struct ydb_verify_context context = { progress_callback, progress_extra }; + toku_ydb_lock(); + int r = toku_verify_brt_with_progress(db->i->brt, ydb_verify_progress_callback, &context, verbose, keep_going); + toku_ydb_unlock(); + return r; +} + static int toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) { int r; @@ -5851,6 +5876,7 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) { SDB(get_fragmentation); SDB(set_indexer); SDB(get_indexer); + SDB(verify_with_progress); #undef SDB result->dbt_pos_infty = toku_db_dbt_pos_infty; result->dbt_neg_infty = toku_db_dbt_neg_infty;