From cf3bbe92df242a55c9814d040f539fbd4341c1b2 Mon Sep 17 00:00:00 2001 From: Rich Prohaska Date: Tue, 16 Apr 2013 23:59:34 -0400 Subject: [PATCH] #3147 merge tokudb block size changes to main refs[t:3147] git-svn-id: file:///svn/toku/tokudb@26780 c7de825b-a66e-492c-adef-691d508d4ae1 --- newbrt/brt.c | 2 +- newbrt/logformat.c | 9 ++ newbrt/logger.c | 4 +- newbrt/logger.h | 2 +- newbrt/recover.c | 56 +++++++-- src/tests/recover-fcreate-nodesize.c | 163 +++++++++++++++++++++++++++ src/ydb.c | 13 +++ 7 files changed, 237 insertions(+), 12 deletions(-) create mode 100644 src/tests/recover-fcreate-nodesize.c diff --git a/newbrt/brt.c b/newbrt/brt.c index 914f03fb7cf..55911779084 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -3314,7 +3314,7 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET if (r != 0) goto died1; } txn_created = (BOOL)(txn!=NULL); - r = toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor)); + r = toku_logger_log_fcreate2(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor), t->nodesize); if (r!=0) goto died1; r = brt_create_file(t, fname_in_cwd, &fd); } diff --git a/newbrt/logformat.c b/newbrt/logformat.c index dec0c406fbf..7077f0949ea 100644 --- a/newbrt/logformat.c +++ b/newbrt/logformat.c @@ -122,6 +122,15 @@ const struct logtype logtypes[] = { {"u_int32_t", "descriptor_version", 0}, {"BYTESTRING", "descriptor", 0}, NULLFIELD}}, + {"fcreate2", 'G', FA{{"TXNID", "xid", 0}, + {"FILENUM", "filenum", 0}, + {"BYTESTRING", "iname", 0}, + {"u_int32_t", "mode", "0%o"}, + {"u_int32_t", "treeflags", 0}, + {"u_int32_t", "descriptor_version", 0}, + {"BYTESTRING", "descriptor", 0}, + {"u_int32_t", "nodesize", 0}, + NULLFIELD}}, //TODO: #2037 Add dname {"fopen", 'O', FA{{"BYTESTRING", "iname", 0}, {"FILENUM", "filenum", 0}, diff --git a/newbrt/logger.c b/newbrt/logger.c index 20077b8502c..c0e9aa08809 100644 --- a/newbrt/logger.c +++ b/newbrt/logger.c @@ -854,13 +854,13 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn) } // fname is the iname -int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t treeflags, DESCRIPTOR descriptor_p) { +int toku_logger_log_fcreate2 (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t treeflags, DESCRIPTOR descriptor_p, u_int32_t nodesize) { if (txn==0) return 0; if (txn->logger->is_panicked) return EINVAL; BYTESTRING bs_fname = { .len=strlen(fname), .data = (char *) fname }; BYTESTRING bs_descriptor = { .len=descriptor_p->dbt.size, .data = descriptor_p->dbt.data }; // fsync log on fcreate - int r = toku_log_fcreate (txn->logger, (LSN*)0, 1, toku_txn_get_txnid(txn), filenum, bs_fname, mode, treeflags, descriptor_p->version, bs_descriptor); + int r = toku_log_fcreate2 (txn->logger, (LSN*)0, 1, toku_txn_get_txnid(txn), filenum, bs_fname, mode, treeflags, descriptor_p->version, bs_descriptor, nodesize); return r; } diff --git a/newbrt/logger.h b/newbrt/logger.h index 5e33bac661c..57681ce4ce0 100644 --- a/newbrt/logger.h +++ b/newbrt/logger.h @@ -57,7 +57,7 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn); // Returns: 0 if success int toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn); -int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t flags, DESCRIPTOR descriptor_p); +int toku_logger_log_fcreate2 (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t flags, DESCRIPTOR descriptor_p, u_int32_t nodesize); int toku_logger_log_fdelete (TOKUTXN txn, const char *fname); int toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags); int toku_logger_log_descriptor (TOKUTXN txn, FILENUM filenum, DESCRIPTOR descriptor_p); diff --git a/newbrt/recover.c b/newbrt/recover.c index 164507458da..a5caafe0c9f 100644 --- a/newbrt/recover.c +++ b/newbrt/recover.c @@ -238,7 +238,7 @@ static void recover_yield(voidfp f, void *fpthunk, void *UU(yieldthunk)) { // Open the file if it is not already open. If it is already open, then do nothing. static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create, int mode, BYTESTRING *bs_iname, FILENUM filenum, u_int32_t treeflags, - u_int32_t descriptor_version, BYTESTRING* descriptor, TOKUTXN txn) { + u_int32_t descriptor_version, BYTESTRING* descriptor, TOKUTXN txn, uint32_t nodesize) { int r; char *iname = fixup_fname(bs_iname); @@ -247,12 +247,17 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create assert(r == 0); r = toku_brt_set_flags(brt, treeflags); - assert(r==0); + assert(r == 0); + + if (nodesize != 0) { + r = toku_brt_set_nodesize(brt, nodesize); + assert(r == 0); + } // set the key compare functions if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) { r = toku_brt_set_bt_compare(brt, renv->bt_compare); - assert(r==0); + assert(r == 0); } // TODO mode (FUTURE FEATURE) @@ -264,7 +269,7 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create DBT descriptor_dbt; toku_fill_dbt(&descriptor_dbt, descriptor->data, descriptor->len); r = toku_brt_set_descriptor(brt, descriptor_version, &descriptor_dbt); - if (r!=0) goto close_brt; + if (r != 0) goto close_brt; } r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, fake_db, filenum); if (r != 0) { @@ -275,7 +280,7 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create int rr = toku_close_brt(brt, NULL); assert(rr == 0); toku_free(iname); toku_free(fake_db); //Free memory allocated for the fake db. - if (r==ENOENT) //Not an error to simply be missing. + if (r == ENOENT) //Not an error to simply be missing. r = 0; return r; } @@ -404,7 +409,7 @@ static int toku_recover_fassociate (struct logtype_fassociate *l, RECOVER_ENV re renv->ss.checkpoint_num_fassociate++; assert(r==DB_NOTFOUND); //Not open // open it if it exists - r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, 0, NULL, NULL); + r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, 0, NULL, NULL, 0); if (r==0 && !strcmp(fname, ROLLBACK_CACHEFILE_NAME)) { //Load rollback cachefile r = file_map_find(&renv->fmap, l->filenum, &tuple); @@ -632,7 +637,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) { toku_free(iname); BOOL must_create = TRUE; - r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn); + r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn, 0); return r; } @@ -641,6 +646,41 @@ static int toku_recover_backward_fcreate (struct logtype_fcreate *UU(l), RECOVER return 0; } +static int toku_recover_fcreate2 (struct logtype_fcreate2 *l, RECOVER_ENV renv) { + int r; + + TOKUTXN txn = NULL; + r = toku_txnid2txn(renv->logger, l->xid, &txn); + assert(r == 0); + + // assert that filenum is closed + struct file_map_tuple *tuple = NULL; + r = file_map_find(&renv->fmap, l->filenum, &tuple); + assert(r==DB_NOTFOUND); + + assert(txn!=NULL); + + //unlink if it exists (recreate from scratch). + char *iname = fixup_fname(&l->iname); + r = unlink(iname); + if (r != 0 && errno != ENOENT) { + fprintf(stderr, "Tokudb recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, errno); + toku_free(iname); + return r; + } + assert(strcmp(iname, ROLLBACK_CACHEFILE_NAME)); //Creation of rollback cachefile never gets logged. + toku_free(iname); + + BOOL must_create = TRUE; + r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn, l->nodesize); + return r; +} + +static int toku_recover_backward_fcreate2 (struct logtype_fcreate2 *UU(l), RECOVER_ENV UU(renv)) { + // nothing + return 0; +} + static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) { int r; @@ -657,7 +697,7 @@ static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) { if (strcmp(fname, ROLLBACK_CACHEFILE_NAME)) { //Rollback cachefile can only be opened via fassociate. - r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, descriptor_version, descriptor, txn); + r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, descriptor_version, descriptor, txn, 0); } toku_free(fname); return r; diff --git a/src/tests/recover-fcreate-nodesize.c b/src/tests/recover-fcreate-nodesize.c new file mode 100644 index 00000000000..200fee91773 --- /dev/null +++ b/src/tests/recover-fcreate-nodesize.c @@ -0,0 +1,163 @@ +// verify thtat we can create the correct tree type after the db is removed + +#include +#include "test.h" + + +static const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE; + +static char *namea="a.db"; uint32_t nodesizea = 0; +static char *nameb="b.db"; uint32_t nodesizeb = 64*1024; + +static void do_remove(DB_ENV *env, const char *filename) { + int r; +#if TOKUDB + DBT dname; + DBT iname; + dbt_init(&dname, filename, strlen(filename)+1); + dbt_init(&iname, NULL, 0); + iname.flags |= DB_DBT_MALLOC; + r = env->get_iname(env, &dname, &iname); CKERR(r); + if (verbose) printf("%s -> %s\n", filename, (char *) iname.data); + char rmcmd[32 + strlen(ENVDIR) + strlen(iname.data)]; + sprintf(rmcmd, "rm %s/%s", ENVDIR, (char *) iname.data); + r = system(rmcmd); CKERR(r); + toku_free(iname.data); +#else + env = env; + char rmcmd[32 + strlen(ENVDIR) + strlen(filename)]; + sprintf(rmcmd, "rm %s/%s", ENVDIR, filename); + r = system(rmcmd); CKERR(r); +#endif +} + +static void run_test (void) { + int r; + + r = system("rm -rf " ENVDIR); + CKERR(r); + toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + r = env->txn_checkpoint(env, 0, 0, 0); CKERR(r); + + DB_TXN *txn; + r = env->txn_begin(env, NULL, &txn, 0); CKERR(r); + + // create a db with the default nodesize + DB *dba; + r = db_create(&dba, env, 0); CKERR(r); + r = dba->get_pagesize(dba, &nodesizea); CKERR(r); + if (verbose) printf("nodesizea=%u", nodesizea); + r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); + r = dba->close(dba, 0); CKERR(r); + + // create a db with a small nodesize + DB *dbb; + r = db_create(&dbb, env, 0); CKERR(r); + r = dbb->set_pagesize(dbb, nodesizeb); CKERR(r); + r = dbb->open(dbb, NULL, nameb, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r); + r = dbb->close(dbb, 0); CKERR(r); + + r = txn->commit(txn, 0); CKERR(r); + + // remove the inames to force recovery to recreate them + do_remove(env, namea); + do_remove(env, nameb); + + toku_hard_crash_on_purpose(); +} + +static void run_recover (void) { + int r; + + // run recovery + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, ENVDIR, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + + // verify that the trees have the correct nodesizes + uint32_t pagesize; + DB *dba; + r = db_create(&dba, env, 0); CKERR(r); + r = dba->open(dba, NULL, namea, NULL, DB_UNKNOWN, DB_AUTO_COMMIT, 0666); CKERR(r); + r = dba->get_pagesize(dba, &pagesize); CKERR(r); + if (verbose) printf("%u\n", pagesize); + // assert(pagesize == nodesizea); + r = dba->close(dba, 0); CKERR(r); + + DB *dbb; + r = db_create(&dbb, env, 0); CKERR(r); + r = dbb->open(dbb, NULL, nameb, NULL, DB_UNKNOWN, DB_AUTO_COMMIT, 0666); CKERR(r); + r = dbb->get_pagesize(dbb, &pagesize); CKERR(r); + if (verbose) printf("%u\n", pagesize); + assert(pagesize == nodesizeb); + r = dbb->close(dbb, 0); CKERR(r); + + r = env->close(env, 0); CKERR(r); + exit(0); +} + +static void run_no_recover (void) { + int r; + + DB_ENV *env; + r = db_env_create(&env, 0); CKERR(r); + r = env->open(env, ENVDIR, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r); + r = env->close(env, 0); CKERR(r); + exit(0); +} + +static const char *cmd; + +static BOOL do_test=FALSE, do_recover=FALSE, do_recover_only=FALSE, do_no_recover = FALSE; + +static void test_parse_args (int argc, char * const argv[]) { + int resultcode; + cmd = argv[0]; + argc--; argv++; + while (argc>0) { + if (strcmp(argv[0], "-v") == 0) { + verbose++; + } else if (strcmp(argv[0],"-q")==0) { + verbose--; + if (verbose<0) verbose=0; + } else if (strcmp(argv[0], "--test")==0) { + do_test=TRUE; + } else if (strcmp(argv[0], "--recover") == 0) { + do_recover=TRUE; + } else if (strcmp(argv[0], "--recover-only") == 0) { + do_recover_only=TRUE; + } else if (strcmp(argv[0], "--no-recover") == 0) { + do_no_recover=TRUE; + } else if (strcmp(argv[0], "-h")==0) { + resultcode=0; + do_usage: + fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--test | --recover } \n", cmd); + exit(resultcode); + } else { + fprintf(stderr, "Unknown arg: %s\n", argv[0]); + resultcode=1; + goto do_usage; + } + argc--; + argv++; + } +} + +int test_main (int argc, char * const argv[]) { + test_parse_args(argc, argv); + if (do_test) { + run_test(); + } else if (do_recover) { + run_recover(); + } else if (do_recover_only) { + run_recover(); + } else if (do_no_recover) { + run_no_recover(); + } + return 0; +} diff --git a/src/ydb.c b/src/ydb.c index f35b3c1d255..222ec0dcfdf 100644 --- a/src/ydb.c +++ b/src/ydb.c @@ -5221,6 +5221,13 @@ toku_db_set_pagesize(DB *db, u_int32_t pagesize) { return r; } +static int +toku_db_get_pagesize(DB *db, u_int32_t *pagesize_ptr) { + HANDLE_PANICKED_DB(db); + int r = toku_brt_get_nodesize(db->i->brt, pagesize_ptr); + return r; +} + static int toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) { HANDLE_PANICKED_DB(db); @@ -5634,6 +5641,11 @@ locked_db_set_pagesize(DB *db, u_int32_t pagesize) { toku_ydb_lock(); int r = toku_db_set_pagesize(db, pagesize); toku_ydb_unlock(); return r; } +static int +locked_db_get_pagesize(DB *db, u_int32_t *pagesize_ptr) { + toku_ydb_lock(); int r = toku_db_get_pagesize(db, pagesize_ptr); toku_ydb_unlock(); return r; +} + // TODO 2216 delete this static int locked_db_fd(DB * UU(db), int * UU(fdp)) { @@ -5786,6 +5798,7 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) { SDB(set_descriptor); SDB(set_errfile); SDB(set_pagesize); + SDB(get_pagesize); SDB(set_flags); SDB(get_flags); SDB(stat64);