From d73f07d489facdc3020a906481d59641a2d48771 Mon Sep 17 00:00:00 2001 From: Leif Walsh Date: Wed, 17 Apr 2013 00:00:01 -0400 Subject: [PATCH] [t:4306] merging work for 4306 git-svn-id: file:///svn/toku/tokudb@38079 c7de825b-a66e-492c-adef-691d508d4ae1 --- newbrt/brt.c | 6 +- newbrt/omt.c | 17 +++- newbrt/omt.h | 6 ++ newbrt/tests/omt-test.c | 75 ++++++++++++++ src/tests/Makefile | 1 + src/tests/keyrange-merge.c | 197 +++++++++++++++++++++++++++++++++++++ 6 files changed, 294 insertions(+), 8 deletions(-) create mode 100644 src/tests/keyrange-merge.c diff --git a/newbrt/brt.c b/newbrt/brt.c index 856c99a187c..03041c7fa8d 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -2032,11 +2032,7 @@ toku_bnc_flush_to_child( toku_omt_destroy(&snapshot_txnids); } if (live_list_reverse) { - OMTVALUE v; - int r = toku_omt_fetch(live_list_reverse, 0, &v); - if (r == 0) { - toku_free(v); - } + toku_omt_free_items_pool(live_list_reverse); toku_omt_destroy(&live_list_reverse); } return 0; diff --git a/newbrt/omt.c b/newbrt/omt.c index affbf48e2d8..5656847de7f 100644 --- a/newbrt/omt.c +++ b/newbrt/omt.c @@ -799,7 +799,7 @@ static int omt_copy_data(OMTVALUE *a, OMT omt, u_int32_t eltsize) { int toku_omt_clone(OMT *dest, OMT src, u_int32_t eltsize) { u_int32_t size = omt_size(src); if (size == 0) { - *dest = NULL; + toku_omt_create(dest); return 0; } OMTVALUE *a = toku_xmalloc((sizeof *a) * size); @@ -819,7 +819,7 @@ err: int toku_omt_clone_pool(OMT *dest, OMT src, u_int32_t eltsize) { u_int32_t size = omt_size(src); if (size == 0) { - *dest = NULL; + toku_omt_create(dest); return 0; } OMTVALUE *a = toku_xmalloc((sizeof *a) * size); @@ -838,10 +838,21 @@ err: return r; } +void toku_omt_free_items_pool(OMT omt) { + if (toku_omt_size(omt) == 0) { + return; + } + OMTVALUE v; + int r = toku_omt_fetch(omt, 0, &v); + lazy_assert_zero(r); + invariant(v != NULL); + toku_free(v); +} + int toku_omt_clone_noptr(OMT *dest, OMT src) { u_int32_t size = omt_size(src); if (size == 0) { - *dest = NULL; + toku_omt_create(dest); return 0; } OMTVALUE *a = toku_xmalloc((sizeof *a) * size); diff --git a/newbrt/omt.h b/newbrt/omt.h index f552a4b390d..619d5702d65 100644 --- a/newbrt/omt.h +++ b/newbrt/omt.h @@ -427,6 +427,12 @@ int toku_omt_clone_pool(OMT *dest, OMT src, u_int32_t eltsize); // Performance: time between O(n) and O(n log n), depending how long it // takes to traverse src. +void toku_omt_free_items_pool(OMT omt); +// Effect: Frees the memory containing the items in an omt created with toku_omt_clone_pool. +// Since toku_omt_clone_pool allocates a contiguous chunk of memory and +// the first item is at the first position, this just gets the first +// value out of the omt and frees it for you. + int toku_omt_clone_noptr(OMT *dest, OMT src); // Effect: Creates a copy of an omt. // Sets *dest to the clone diff --git a/newbrt/tests/omt-test.c b/newbrt/tests/omt-test.c index f0e15659f5d..562b469cddf 100644 --- a/newbrt/tests/omt-test.c +++ b/newbrt/tests/omt-test.c @@ -776,6 +776,77 @@ runtests_create_choice (enum create_type create_choice) { test_find( create_choice, CLOSE_WHEN_DONE); } +static void +test_clone(u_int32_t nelts) +// Test that each clone operation gives the right data back. If nelts is +// zero, also tests that you still get a valid OMT back and that the way +// to deallocate it still works. +{ + OMT src = NULL, dest = NULL; + int r; + + r = toku_omt_create(&src); + assert_zero(r); + for (long i = 0; i < nelts; ++i) { + r = toku_omt_insert_at(src, (OMTVALUE) i, i); + assert_zero(r); + } + + r = toku_omt_clone_noptr(&dest, src); + assert_zero(r); + assert(dest != NULL); + assert(toku_omt_size(dest) == nelts); + for (long i = 0; i < nelts; ++i) { + OMTVALUE v; + long l; + r = toku_omt_fetch(dest, i, &v); + assert_zero(r); + l = (long) v; + assert(l == i); + } + toku_omt_destroy(&dest); + toku_omt_destroy(&src); + + r = toku_omt_create(&src); + assert_zero(r); + long array[nelts]; + for (long i = 0; i < nelts; ++i) { + array[i] = i; + r = toku_omt_insert_at(src, &array[i], i); + assert_zero(r); + } + + r = toku_omt_clone_pool(&dest, src, (sizeof array[0])); + assert_zero(r); + assert(dest != NULL); + assert(toku_omt_size(dest) == nelts); + for (long i = 0; i < nelts; ++i) { + OMTVALUE v; + long *l; + r = toku_omt_fetch(dest, i, &v); + assert_zero(r); + l = v; + assert(*l == i); + } + toku_omt_free_items_pool(dest); + toku_omt_destroy(&dest); + r = toku_omt_clone(&dest, src, (sizeof array[0])); + assert_zero(r); + assert(dest != NULL); + assert(toku_omt_size(dest) == nelts); + for (long i = 0; i < nelts; ++i) { + OMTVALUE v; + long *l; + r = toku_omt_fetch(dest, i, &v); + assert_zero(r); + l = v; + assert(*l == i); + } + toku_omt_free_items(dest); + toku_omt_destroy(&dest); + toku_omt_destroy(&src); +} + int test_main(int argc, const char *argv[]) { parse_args(argc, argv); @@ -786,6 +857,10 @@ test_main(int argc, const char *argv[]) { runtests_create_choice(STEAL_ARRAY); runtests_create_choice(INSERT_AT); runtests_create_choice(INSERT_AT_ALMOST_RANDOM); + test_clone(0); + test_clone(1); + test_clone(1000); + test_clone(10000); cleanup_globals(); return 0; } diff --git a/src/tests/Makefile b/src/tests/Makefile index 4564ec7fe1f..64cf81f0ae5 100644 --- a/src/tests/Makefile +++ b/src/tests/Makefile @@ -139,6 +139,7 @@ BDB_DONTRUN_TESTS = \ isolation \ isolation-read-committed \ keyrange \ + keyrange-merge \ last-verify-time \ loader-cleanup-test \ loader-create-abort \ diff --git a/src/tests/keyrange-merge.c b/src/tests/keyrange-merge.c new file mode 100644 index 00000000000..aadc339cade --- /dev/null +++ b/src/tests/keyrange-merge.c @@ -0,0 +1,197 @@ +/* -*- mode: C; c-basic-offset: 4 -*- */ +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "test.h" + +// verify that key_range64 returns reasonable results after leaf merges + +// create a tree with at least 2 child nodes and large rows. +// replace the rows with small rows. +// this should cause a leaf node merge. +// verify stats after the merge. + +#include +#include +#include + +static DB_ENV *env = NULL; +static DB_TXN *txn = NULL; +static DB *db = NULL; +static u_int32_t db_page_size = 4096; +static u_int32_t db_basement_size = 4096; +static char *envdir = ENVDIR; +static u_int64_t nrows = 0; + +static u_int64_t +max64(u_int64_t a, u_int64_t b) { + return a < b ? b : a; +} + +static void +run_test(void) { + if (verbose) printf("%s %" PRIu64 "\n", __FUNCTION__, nrows); + + // create a tree with 2 children + size_t key_size = 9; + size_t val_size = db_basement_size / 4; + size_t est_row_size_with_overhead = 8 + key_size + 4 + val_size + 4; // xid + key + key_len + val + val)len + size_t rows_per_basement = db_basement_size / est_row_size_with_overhead; + + if (nrows == 0) + nrows = 2 * (db_page_size / est_row_size_with_overhead); + + int r; + r = db_env_create(&env, 0); CKERR(r); + env->set_errfile(env, stderr); + r = env->set_redzone(env, 0); CKERR(r); + r = env->open(env, envdir, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + r = db_create(&db, env, 0); CKERR(r); + r = db->set_pagesize(db, db_page_size); + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + r = txn->commit(txn, 0); CKERR(r); + + // insert keys 1, 3, 5, ... 2*(nrows-1) + 1 + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + for (u_int64_t i=0; iput(db, txn, &k, &v, 0); CKERR(r); + } + + DB_BTREE_STAT64 s64; + r = db->stat64(db, txn, &s64); CKERR(r); + if (verbose) + printf("stats %" PRId64 " %" PRId64 "\n", s64.bt_nkeys, s64.bt_dsize); + assert(0 < s64.bt_nkeys && s64.bt_nkeys <= nrows); + assert(0 < s64.bt_dsize && s64.bt_dsize <= nrows * (key_size + val_size)); + + r = txn->commit(txn, 0); CKERR(r); + + // lose the seqinsert bit by flushing the tree from the cache table + r = db->close(db, 0); CKERR(r); + r = db_create(&db, env, 0); CKERR(r); + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + r = db->open(db, txn, "foo.db", 0, DB_BTREE, 0, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + r = txn->commit(txn, 0); CKERR(r); + + // replace the rows with small values. this should shrink the leaf node and induce merging. + // do this until a leaf node merge occurs. + int t; + for (t = 0; t<100; t++) { + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + // replace in reverse order to disable the sequential insertion code + for (u_int64_t i=nrows; i>0; i--) { + char key[100]; + snprintf(key, sizeof key, "%08llu", (unsigned long long)2*(i-1)+1); + assert(1+strlen(key) == key_size); + DBT k = { .data = key, .size = 1+strlen(key), }; + DBT v = { .data = NULL, .size = 0, }; + r = db->put(db, txn, &k, &v, 0); CKERR(r); + } + r = txn->commit(txn, 0); CKERR(r); + ENGINE_STATUS es; + r = env->get_engine_status(env, &es, NULL, 0); + CKERR(r); + if (es.merge_leaf > 0) { + if (verbose) printf("t=%d\n", t); + break; + } + } + assert(t < 100); // if this asserts, then no leaf merge occurred + + // verify key_range for keys that exist in the tree + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + for (u_int64_t i=0; ikey_range64(db, txn, dbt_init(&k, key, 1+strlen(key)), &less, &equal, &greater, &is_exact); CKERR(r); + if (verbose) + printf("key %llu/%llu %llu %llu %llu %llu\n", (unsigned long long)2*i, (unsigned long long)2*nrows, (unsigned long long)less, (unsigned long long)equal, (unsigned long long)greater, + (unsigned long long)(less+equal+greater)); + assert(is_exact == 0); + assert(0 < less + equal + greater); + assert(less + equal + greater < 2*nrows); + assert(equal == 1); + u_int64_t est_i = max64(i, i + rows_per_basement/2); + assert(less <= est_i + est_i / 1); + assert(greater <= nrows - i + rows_per_basement/2); + } + r = txn->commit(txn, 0); CKERR(r); + + // verify key range for keys that do not exist in the tree + r = env->txn_begin(env, 0, &txn, 0); CKERR(r); + for (u_int64_t i=0; i<1+nrows; i++) { + char key[100]; + snprintf(key, 100, "%08llu", (unsigned long long)2*i); + DBT k; + u_int64_t less,equal,greater; + int is_exact; + r = db->key_range64(db, txn, dbt_init(&k, key, 1+strlen(key)), &less, &equal, &greater, &is_exact); CKERR(r); + if (verbose) + printf("key %llu/%llu %llu %llu %llu %llu\n", (unsigned long long)2*i, (unsigned long long)2*nrows, (unsigned long long)less, (unsigned long long)equal, (unsigned long long)greater, + (unsigned long long)(less+equal+greater)); + assert(is_exact == 0); + assert(0 < less + equal + greater); + assert(less + equal + greater < 2*nrows); + assert(equal == 0); + u_int64_t est_i = max64(i, i + rows_per_basement/2); + assert(less <= est_i + est_i / 1); + assert(greater <= nrows - i + rows_per_basement/2); + } + r = txn->commit(txn, 0); CKERR(r); + + r = db->close(db, 0); CKERR(r); + r = env->close(env, 0); CKERR(r); +} + +static int +usage(void) { + fprintf(stderr, "-v (verbose)\n"); + fprintf(stderr, "-q (quiet)\n"); + fprintf(stderr, "--envdir %s\n", envdir); + fprintf(stderr, "--nrows %" PRIu64 " (number of rows)\n", nrows); + fprintf(stderr, "--nrows %" PRIu64 " (number of rows)\n", nrows); + return 1; +} + +int +test_main (int argc , char * const argv[]) { + for (int i = 1 ; i < argc; i++) { + if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) { + verbose++; + continue; + } + if (strcmp(argv[i], "-q") == 0) { + if (verbose > 0) + verbose--; + continue; + } + if (strcmp(argv[i], "--envdir") == 0 && i+1 < argc) { + envdir = argv[++i]; + continue; + } + if (strcmp(argv[i], "--nrows") == 0 && i+1 < argc) { + nrows = atoll(argv[++i]); + continue; + } + return usage(); + } + + char rmcmd[32 + strlen(envdir)]; + snprintf(rmcmd, sizeof rmcmd, "rm -rf %s", envdir); + int r; + r = system(rmcmd); CKERR(r); + r = toku_os_mkdir(envdir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + run_test(); + + return 0; +}