#3147 merge tokudb block size changes to main refs[t:3147]

git-svn-id: file:///svn/toku/tokudb@26780 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Rich Prohaska 2013-04-16 23:59:34 -04:00 committed by Yoni Fogel
parent 974d5147b5
commit cf3bbe92df
7 changed files with 237 additions and 12 deletions

View file

@ -3314,7 +3314,7 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
if (r != 0) goto died1; if (r != 0) goto died1;
} }
txn_created = (BOOL)(txn!=NULL); txn_created = (BOOL)(txn!=NULL);
r = toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor)); r = toku_logger_log_fcreate2(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor), t->nodesize);
if (r!=0) goto died1; if (r!=0) goto died1;
r = brt_create_file(t, fname_in_cwd, &fd); r = brt_create_file(t, fname_in_cwd, &fd);
} }

View file

@ -122,6 +122,15 @@ const struct logtype logtypes[] = {
{"u_int32_t", "descriptor_version", 0}, {"u_int32_t", "descriptor_version", 0},
{"BYTESTRING", "descriptor", 0}, {"BYTESTRING", "descriptor", 0},
NULLFIELD}}, NULLFIELD}},
{"fcreate2", 'G', FA{{"TXNID", "xid", 0},
{"FILENUM", "filenum", 0},
{"BYTESTRING", "iname", 0},
{"u_int32_t", "mode", "0%o"},
{"u_int32_t", "treeflags", 0},
{"u_int32_t", "descriptor_version", 0},
{"BYTESTRING", "descriptor", 0},
{"u_int32_t", "nodesize", 0},
NULLFIELD}},
//TODO: #2037 Add dname //TODO: #2037 Add dname
{"fopen", 'O', FA{{"BYTESTRING", "iname", 0}, {"fopen", 'O', FA{{"BYTESTRING", "iname", 0},
{"FILENUM", "filenum", 0}, {"FILENUM", "filenum", 0},

View file

@ -854,13 +854,13 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn)
} }
// fname is the iname // fname is the iname
int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t treeflags, DESCRIPTOR descriptor_p) { int toku_logger_log_fcreate2 (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t treeflags, DESCRIPTOR descriptor_p, u_int32_t nodesize) {
if (txn==0) return 0; if (txn==0) return 0;
if (txn->logger->is_panicked) return EINVAL; if (txn->logger->is_panicked) return EINVAL;
BYTESTRING bs_fname = { .len=strlen(fname), .data = (char *) fname }; BYTESTRING bs_fname = { .len=strlen(fname), .data = (char *) fname };
BYTESTRING bs_descriptor = { .len=descriptor_p->dbt.size, .data = descriptor_p->dbt.data }; BYTESTRING bs_descriptor = { .len=descriptor_p->dbt.size, .data = descriptor_p->dbt.data };
// fsync log on fcreate // fsync log on fcreate
int r = toku_log_fcreate (txn->logger, (LSN*)0, 1, toku_txn_get_txnid(txn), filenum, bs_fname, mode, treeflags, descriptor_p->version, bs_descriptor); int r = toku_log_fcreate2 (txn->logger, (LSN*)0, 1, toku_txn_get_txnid(txn), filenum, bs_fname, mode, treeflags, descriptor_p->version, bs_descriptor, nodesize);
return r; return r;
} }

View file

@ -57,7 +57,7 @@ int toku_logger_restart(TOKULOGGER logger, LSN lastlsn);
// Returns: 0 if success // Returns: 0 if success
int toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn); int toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn);
int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t flags, DESCRIPTOR descriptor_p); int toku_logger_log_fcreate2 (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t flags, DESCRIPTOR descriptor_p, u_int32_t nodesize);
int toku_logger_log_fdelete (TOKUTXN txn, const char *fname); int toku_logger_log_fdelete (TOKUTXN txn, const char *fname);
int toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags); int toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags);
int toku_logger_log_descriptor (TOKUTXN txn, FILENUM filenum, DESCRIPTOR descriptor_p); int toku_logger_log_descriptor (TOKUTXN txn, FILENUM filenum, DESCRIPTOR descriptor_p);

View file

@ -238,7 +238,7 @@ static void recover_yield(voidfp f, void *fpthunk, void *UU(yieldthunk)) {
// Open the file if it is not already open. If it is already open, then do nothing. // Open the file if it is not already open. If it is already open, then do nothing.
static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create, int mode, BYTESTRING *bs_iname, FILENUM filenum, u_int32_t treeflags, static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create, int mode, BYTESTRING *bs_iname, FILENUM filenum, u_int32_t treeflags,
u_int32_t descriptor_version, BYTESTRING* descriptor, TOKUTXN txn) { u_int32_t descriptor_version, BYTESTRING* descriptor, TOKUTXN txn, uint32_t nodesize) {
int r; int r;
char *iname = fixup_fname(bs_iname); char *iname = fixup_fname(bs_iname);
@ -247,12 +247,17 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create
assert(r == 0); assert(r == 0);
r = toku_brt_set_flags(brt, treeflags); r = toku_brt_set_flags(brt, treeflags);
assert(r==0); assert(r == 0);
if (nodesize != 0) {
r = toku_brt_set_nodesize(brt, nodesize);
assert(r == 0);
}
// set the key compare functions // set the key compare functions
if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) { if (!(treeflags & TOKU_DB_KEYCMP_BUILTIN) && renv->bt_compare) {
r = toku_brt_set_bt_compare(brt, renv->bt_compare); r = toku_brt_set_bt_compare(brt, renv->bt_compare);
assert(r==0); assert(r == 0);
} }
// TODO mode (FUTURE FEATURE) // TODO mode (FUTURE FEATURE)
@ -264,7 +269,7 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create
DBT descriptor_dbt; DBT descriptor_dbt;
toku_fill_dbt(&descriptor_dbt, descriptor->data, descriptor->len); toku_fill_dbt(&descriptor_dbt, descriptor->data, descriptor->len);
r = toku_brt_set_descriptor(brt, descriptor_version, &descriptor_dbt); r = toku_brt_set_descriptor(brt, descriptor_version, &descriptor_dbt);
if (r!=0) goto close_brt; if (r != 0) goto close_brt;
} }
r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, fake_db, filenum); r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, fake_db, filenum);
if (r != 0) { if (r != 0) {
@ -275,7 +280,7 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create
int rr = toku_close_brt(brt, NULL); assert(rr == 0); int rr = toku_close_brt(brt, NULL); assert(rr == 0);
toku_free(iname); toku_free(iname);
toku_free(fake_db); //Free memory allocated for the fake db. toku_free(fake_db); //Free memory allocated for the fake db.
if (r==ENOENT) //Not an error to simply be missing. if (r == ENOENT) //Not an error to simply be missing.
r = 0; r = 0;
return r; return r;
} }
@ -404,7 +409,7 @@ static int toku_recover_fassociate (struct logtype_fassociate *l, RECOVER_ENV re
renv->ss.checkpoint_num_fassociate++; renv->ss.checkpoint_num_fassociate++;
assert(r==DB_NOTFOUND); //Not open assert(r==DB_NOTFOUND); //Not open
// open it if it exists // open it if it exists
r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, 0, NULL, NULL); r = internal_recover_fopen_or_fcreate(renv, FALSE, 0, &l->iname, l->filenum, l->treeflags, 0, NULL, NULL, 0);
if (r==0 && !strcmp(fname, ROLLBACK_CACHEFILE_NAME)) { if (r==0 && !strcmp(fname, ROLLBACK_CACHEFILE_NAME)) {
//Load rollback cachefile //Load rollback cachefile
r = file_map_find(&renv->fmap, l->filenum, &tuple); r = file_map_find(&renv->fmap, l->filenum, &tuple);
@ -632,7 +637,7 @@ static int toku_recover_fcreate (struct logtype_fcreate *l, RECOVER_ENV renv) {
toku_free(iname); toku_free(iname);
BOOL must_create = TRUE; BOOL must_create = TRUE;
r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn); r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn, 0);
return r; return r;
} }
@ -641,6 +646,41 @@ static int toku_recover_backward_fcreate (struct logtype_fcreate *UU(l), RECOVER
return 0; return 0;
} }
static int toku_recover_fcreate2 (struct logtype_fcreate2 *l, RECOVER_ENV renv) {
int r;
TOKUTXN txn = NULL;
r = toku_txnid2txn(renv->logger, l->xid, &txn);
assert(r == 0);
// assert that filenum is closed
struct file_map_tuple *tuple = NULL;
r = file_map_find(&renv->fmap, l->filenum, &tuple);
assert(r==DB_NOTFOUND);
assert(txn!=NULL);
//unlink if it exists (recreate from scratch).
char *iname = fixup_fname(&l->iname);
r = unlink(iname);
if (r != 0 && errno != ENOENT) {
fprintf(stderr, "Tokudb recovery %s:%d unlink %s %d\n", __FUNCTION__, __LINE__, iname, errno);
toku_free(iname);
return r;
}
assert(strcmp(iname, ROLLBACK_CACHEFILE_NAME)); //Creation of rollback cachefile never gets logged.
toku_free(iname);
BOOL must_create = TRUE;
r = internal_recover_fopen_or_fcreate(renv, must_create, l->mode, &l->iname, l->filenum, l->treeflags, l->descriptor_version, &l->descriptor, txn, l->nodesize);
return r;
}
static int toku_recover_backward_fcreate2 (struct logtype_fcreate2 *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) { static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) {
int r; int r;
@ -657,7 +697,7 @@ static int toku_recover_fopen (struct logtype_fopen *l, RECOVER_ENV renv) {
if (strcmp(fname, ROLLBACK_CACHEFILE_NAME)) { if (strcmp(fname, ROLLBACK_CACHEFILE_NAME)) {
//Rollback cachefile can only be opened via fassociate. //Rollback cachefile can only be opened via fassociate.
r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, descriptor_version, descriptor, txn); r = internal_recover_fopen_or_fcreate(renv, must_create, 0, &l->iname, l->filenum, l->treeflags, descriptor_version, descriptor, txn, 0);
} }
toku_free(fname); toku_free(fname);
return r; return r;

View file

@ -0,0 +1,163 @@
// verify thtat we can create the correct tree type after the db is removed
#include <sys/stat.h>
#include "test.h"
static const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
static char *namea="a.db"; uint32_t nodesizea = 0;
static char *nameb="b.db"; uint32_t nodesizeb = 64*1024;
static void do_remove(DB_ENV *env, const char *filename) {
int r;
#if TOKUDB
DBT dname;
DBT iname;
dbt_init(&dname, filename, strlen(filename)+1);
dbt_init(&iname, NULL, 0);
iname.flags |= DB_DBT_MALLOC;
r = env->get_iname(env, &dname, &iname); CKERR(r);
if (verbose) printf("%s -> %s\n", filename, (char *) iname.data);
char rmcmd[32 + strlen(ENVDIR) + strlen(iname.data)];
sprintf(rmcmd, "rm %s/%s", ENVDIR, (char *) iname.data);
r = system(rmcmd); CKERR(r);
toku_free(iname.data);
#else
env = env;
char rmcmd[32 + strlen(ENVDIR) + strlen(filename)];
sprintf(rmcmd, "rm %s/%s", ENVDIR, filename);
r = system(rmcmd); CKERR(r);
#endif
}
static void run_test (void) {
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
DB_ENV *env;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = env->txn_checkpoint(env, 0, 0, 0); CKERR(r);
DB_TXN *txn;
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
// create a db with the default nodesize
DB *dba;
r = db_create(&dba, env, 0); CKERR(r);
r = dba->get_pagesize(dba, &nodesizea); CKERR(r);
if (verbose) printf("nodesizea=%u", nodesizea);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dba->close(dba, 0); CKERR(r);
// create a db with a small nodesize
DB *dbb;
r = db_create(&dbb, env, 0); CKERR(r);
r = dbb->set_pagesize(dbb, nodesizeb); CKERR(r);
r = dbb->open(dbb, NULL, nameb, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dbb->close(dbb, 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
// remove the inames to force recovery to recreate them
do_remove(env, namea);
do_remove(env, nameb);
toku_hard_crash_on_purpose();
}
static void run_recover (void) {
int r;
// run recovery
DB_ENV *env;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
// verify that the trees have the correct nodesizes
uint32_t pagesize;
DB *dba;
r = db_create(&dba, env, 0); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_UNKNOWN, DB_AUTO_COMMIT, 0666); CKERR(r);
r = dba->get_pagesize(dba, &pagesize); CKERR(r);
if (verbose) printf("%u\n", pagesize);
// assert(pagesize == nodesizea);
r = dba->close(dba, 0); CKERR(r);
DB *dbb;
r = db_create(&dbb, env, 0); CKERR(r);
r = dbb->open(dbb, NULL, nameb, NULL, DB_UNKNOWN, DB_AUTO_COMMIT, 0666); CKERR(r);
r = dbb->get_pagesize(dbb, &pagesize); CKERR(r);
if (verbose) printf("%u\n", pagesize);
assert(pagesize == nodesizeb);
r = dbb->close(dbb, 0); CKERR(r);
r = env->close(env, 0); CKERR(r);
exit(0);
}
static void run_no_recover (void) {
int r;
DB_ENV *env;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = env->close(env, 0); CKERR(r);
exit(0);
}
static const char *cmd;
static BOOL do_test=FALSE, do_recover=FALSE, do_recover_only=FALSE, do_no_recover = FALSE;
static void test_parse_args (int argc, char * const argv[]) {
int resultcode;
cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v") == 0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "--test")==0) {
do_test=TRUE;
} else if (strcmp(argv[0], "--recover") == 0) {
do_recover=TRUE;
} else if (strcmp(argv[0], "--recover-only") == 0) {
do_recover_only=TRUE;
} else if (strcmp(argv[0], "--no-recover") == 0) {
do_no_recover=TRUE;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--test | --recover } \n", cmd);
exit(resultcode);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}
int test_main (int argc, char * const argv[]) {
test_parse_args(argc, argv);
if (do_test) {
run_test();
} else if (do_recover) {
run_recover();
} else if (do_recover_only) {
run_recover();
} else if (do_no_recover) {
run_no_recover();
}
return 0;
}

View file

@ -5221,6 +5221,13 @@ toku_db_set_pagesize(DB *db, u_int32_t pagesize) {
return r; return r;
} }
static int
toku_db_get_pagesize(DB *db, u_int32_t *pagesize_ptr) {
HANDLE_PANICKED_DB(db);
int r = toku_brt_get_nodesize(db->i->brt, pagesize_ptr);
return r;
}
static int static int
toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) { toku_db_stat64(DB * db, DB_TXN *txn, DB_BTREE_STAT64 *s) {
HANDLE_PANICKED_DB(db); HANDLE_PANICKED_DB(db);
@ -5634,6 +5641,11 @@ locked_db_set_pagesize(DB *db, u_int32_t pagesize) {
toku_ydb_lock(); int r = toku_db_set_pagesize(db, pagesize); toku_ydb_unlock(); return r; toku_ydb_lock(); int r = toku_db_set_pagesize(db, pagesize); toku_ydb_unlock(); return r;
} }
static int
locked_db_get_pagesize(DB *db, u_int32_t *pagesize_ptr) {
toku_ydb_lock(); int r = toku_db_get_pagesize(db, pagesize_ptr); toku_ydb_unlock(); return r;
}
// TODO 2216 delete this // TODO 2216 delete this
static int static int
locked_db_fd(DB * UU(db), int * UU(fdp)) { locked_db_fd(DB * UU(db), int * UU(fdp)) {
@ -5786,6 +5798,7 @@ toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags) {
SDB(set_descriptor); SDB(set_descriptor);
SDB(set_errfile); SDB(set_errfile);
SDB(set_pagesize); SDB(set_pagesize);
SDB(get_pagesize);
SDB(set_flags); SDB(set_flags);
SDB(get_flags); SDB(get_flags);
SDB(stat64); SDB(stat64);