/* -*- mode: C; c-basic-offset: 4 -*- */ #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." /* Recover an env. The logs are in argv[1]. The new database is created in the cwd. */ // Test: // cd ../src/tests/tmpdir // ../../../newbrt/recover ../dir.test_log2.c.tdb #include "includes.h" //#define DO_VERIFY_COUNTS #ifdef DO_VERIFY_COUNTS #define VERIFY_COUNTS(n) toku_verify_counts(n) #else #define VERIFY_COUNTS(n) ((void)0) #endif static DB * const null_db=0; // These data structures really should be part of a recovery data structure. Recovery could be multithreaded (on different environments...) But this is OK since recovery can only happen in one static CACHETABLE ct; static struct cf_pair { FILENUM filenum; CACHEFILE cf; BRT brt; // set to zero on an fopen, but filled in when an fheader is seen. } *cf_pairs; static int n_cf_pairs=0, max_cf_pairs=0; int toku_recover_init (void) { int r = toku_create_cachetable(&ct, 1<<25, (LSN){0}, 0); return r; } void toku_recover_cleanup (void) { int i; for (i=0; i0) { tmp[i]=0; mode_t oldu = umask(0); int r = toku_os_mkdir(tmp, S_IRWXU); if (r!=0 && errno!=EEXIST) { printf("error: %s\n", strerror(errno)); } assert (r==0 || (errno==EEXIST)); umask(oldu); tmp[i]=ch; } } } toku_free(tmp); } static void toku_recover_fcreate (LSN UU(lsn), TXNID UU(txnid), FILENUM UU(filenum), BYTESTRING fname,u_int32_t mode) { char *fixed_fname = fixup_fname(&fname); create_dir_from_file(fixed_fname); int fd = open(fixed_fname, O_CREAT+O_TRUNC+O_WRONLY+O_BINARY, mode); assert(fd>=0); toku_free(fixed_fname); toku_free_BYTESTRING(fname); int r = close(fd); assert(r==0); } static int toku_recover_note_cachefile (FILENUM fnum, CACHEFILE cf, BRT brt) { if (max_cf_pairs==0) { n_cf_pairs=1; max_cf_pairs=2; MALLOC_N(max_cf_pairs, cf_pairs); if (cf_pairs==0) return errno; } else { if (n_cf_pairs>=max_cf_pairs) { max_cf_pairs*=2; cf_pairs = toku_realloc(cf_pairs, max_cf_pairs*sizeof(*cf_pairs)); } n_cf_pairs++; } cf_pairs[n_cf_pairs-1].filenum = fnum; cf_pairs[n_cf_pairs-1].cf = cf; cf_pairs[n_cf_pairs-1].brt = brt; return 0; } static int find_cachefile (FILENUM fnum, struct cf_pair **cf_pair) { int i; for (i=0; idirty=0; h->panic=0; h->panic_string=0; XMALLOC(h->flags_array); h->flags_array[0] = header.flags; h->nodesize = header.nodesize; assert(h->blocktable /* Not initialized. Is this used? */); toku_block_recovery_set_free_blocks(h->blocktable, header.free_blocks); toku_block_recovery_set_unused_blocks(h->blocktable, header.unused_blocks); h->n_named_roots = header.n_named_roots; r=toku_fifo_create(&h->fifo); assert(r==0); if ((signed)header.n_named_roots==-1) { MALLOC_N(1, h->roots); assert(h->roots); MALLOC_N(1, h->root_hashes); assert(h->root_hashes); h->roots[0] = header.u.one.root; h->root_hashes[0].valid= FALSE; } else { assert(0); } //toku_cachetable_put(pair->cf, header_blocknum, fullhash, h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); if (pair->brt) { toku_free(pair->brt->h); } else { r = toku_brt_create(&pair->brt); assert(r==0); pair->brt->cf = pair->cf; pair->brt->database_name = 0; // Special case, we don't know or care what the database name is for recovery. list_init(&pair->brt->cursors); pair->brt->compare_fun = 0; pair->brt->dup_compare = 0; pair->brt->db = 0; } pair->brt->h = h; pair->brt->nodesize = h->nodesize; pair->brt->flags = h->nodesize; toku_cachefile_set_userdata(pair->cf, pair->brt->h, toku_brtheader_close, toku_brtheader_checkpoint); } static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum) { struct cf_pair *pair = NULL; int r = find_cachefile(filenum, &pair); assert(r==0); //void *h_v; //r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash, // &h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); struct brt_header *h=0; bytevec storedkey,storeddata; ITEMLEN storedkeylen, storeddatalen; TXNID storedxid; u_int32_t storedtype; r = toku_fifo_peek(h->fifo, &storedkey, &storedkeylen, &storeddata, &storeddatalen, &storedtype, &storedxid); assert(r==0); r = toku_fifo_deq(h->fifo); assert(r==0); //r = toku_cachetable_unpin(pair->cf, header_blocknum, fullhash, CACHETABLE_DIRTY, 0); //assert(r==0); } static void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) { struct cf_pair *pair = NULL; int r = find_cachefile(filenum, &pair); assert(r==0); void *h_v; u_int32_t fullhash = toku_cachetable_hash(pair->cf, header_blocknum); if (0) { //r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash, &h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); } else { h_v=0; assert(0); } assert(r==0); struct brt_header *h=h_v; r = toku_fifo_enq(h->fifo, key.data, key.len, val.data, val.len, typ, xid); assert(r==0); r = toku_cachetable_unpin(pair->cf, header_blocknum, fullhash, CACHETABLE_DIRTY, 0); assert(r==0); toku_free(key.data); toku_free(val.data); } static void toku_recover_fopen (LSN UU(lsn), TXNID UU(txnid), BYTESTRING fname, FILENUM filenum) { char *fixedfname = fixup_fname(&fname); CACHEFILE cf; int fd = open(fixedfname, O_RDWR+O_BINARY, 0); assert(fd>=0); BRT brt=0; int r = toku_brt_create(&brt); assert(r==0); brt->fname = fixedfname; brt->database_name = 0; brt->h=0; brt->compare_fun = 0; brt->dup_compare = 0; brt->db = 0; r = toku_cachetable_openfd(&cf, ct, fd, fixedfname); assert(r==0); brt->cf=cf; toku_recover_note_cachefile(filenum, cf, brt); toku_free_BYTESTRING(fname); } static void toku_recover_brtclose (LSN UU(lsn), BYTESTRING UU(fname), FILENUM filenum) { struct cf_pair *pair = NULL; int r = find_cachefile(filenum, &pair); assert(r==0); // Bump up the reference count toku_cachefile_refup(pair->cf); r = toku_close_brt(pair->brt, 0, 0); assert(r==0); pair->brt=0; toku_free_BYTESTRING(fname); } static void toku_recover_cfclose (LSN UU(lsn), BYTESTRING UU(fname), FILENUM filenum) { int i; for (i=0; ibrt); assert(pair->brt->h); pair->brt->h->roots[0] = newroot; pair->brt->h->root_hashes[0].valid = FALSE; } static void toku_recover_changenamedroot (LSN UU(lsn), FILENUM UU(filenum), BYTESTRING UU(name), BLOCKNUM UU(oldroot), BLOCKNUM UU(newroot)) { assert(0); } static void toku_recover_changeunusedmemory (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(oldunused), BLOCKNUM newunused) { struct cf_pair *pair = NULL; int r = find_cachefile(filenum, &pair); assert(r==0); assert(pair->brt); assert(pair->brt->h); toku_block_recovery_set_unused_blocks(pair->brt->h->blocktable, newunused); } static int toku_recover_checkpoint (LSN UU(lsn)) { return 0; } static int toku_recover_xbegin (LSN UU(lsn), TXNID UU(parent)) { return 0; } static int toku_delete_rolltmp_files (const char *log_dir) { struct dirent *de; DIR *d = opendir(log_dir); if (d==0) { return errno; } int result = 0; while ((de=readdir(d))) { char rolltmp_prefix[] = "__rolltmp."; int r = memcmp(de->d_name, rolltmp_prefix, sizeof(rolltmp_prefix) - 1); if (r==0) { int fnamelen = strlen(log_dir) + strlen(de->d_name) + 2; // One for the slash and one for the trailing NUL. char fname[fnamelen]; int l = snprintf(fname, fnamelen, "%s/%s", log_dir, de->d_name); assert(l+1 == fnamelen); r = unlink(fname); if (r!=0) { result = errno; perror("Trying to delete a rolltmp file"); } } } { int r = closedir(d); if (r==-1) return errno; } return result; } int tokudb_recover(const char *data_dir, const char *log_dir) { int failresult = 0; int r; int entrycount=0; char **logfiles; int lockfd; { const char fname[] = "/__recoverylock_dont_delete_me"; int namelen=strlen(data_dir); char lockfname[namelen+sizeof(fname)]; int l = snprintf(lockfname, sizeof(lockfname), "%s%s", data_dir, fname); assert(l+1 == (signed)(sizeof(lockfname))); lockfd = toku_os_lock_file(lockfname); if (lockfd<0) { printf("Couldn't run recovery because some other process holds the recovery lock %s\n", lockfname); return errno; } } r = toku_delete_rolltmp_files(log_dir); if (r!=0) { failresult=r; goto fail; } r = toku_logger_find_logfiles(log_dir, &logfiles); if (r!=0) { failresult=r; goto fail; } int i; toku_recover_init(); char org_wd[1000]; { char *wd=getcwd(org_wd, sizeof(org_wd)); assert(wd!=0); //printf("%s:%d org_wd=\"%s\"\n", __FILE__, __LINE__, org_wd); } char data_wd[1000]; { r=chdir(data_dir); assert(r==0); char *wd=getcwd(data_wd, sizeof(data_wd)); assert(wd!=0); //printf("%s:%d data_wd=\"%s\"\n", __FILE__, __LINE__, data_wd); } for (i=0; logfiles[i]; i++) { //fprintf(stderr, "Opening %s\n", logfiles[i]); r=chdir(org_wd); assert(r==0); FILE *f = fopen(logfiles[i], "r"); struct log_entry le; u_int32_t version; //printf("Reading file %s\n", logfiles[i]); r=toku_read_and_print_logmagic(f, &version); assert(r==0 && version==0); r=chdir(data_wd); assert(r==0); while ((r = toku_log_fread(f, &le))==0) { //printf("%lld: Got cmd %c\n", (long long)le.u.commit.lsn.lsn, le.cmd); logtype_dispatch_args(&le, toku_recover_); entrycount++; } if (r!=EOF) { if (r==DB_BADFORMAT) { fprintf(stderr, "Bad log format at record %d\n", entrycount); return r; } else { fprintf(stderr, "Huh? %s\n", strerror(r)); return r; } } fclose(f); } toku_recover_cleanup(); for (i=0; logfiles[i]; i++) { toku_free(logfiles[i]); } toku_free(logfiles); r=toku_os_unlock_file(lockfd); if (r!=0) return errno; r=chdir(org_wd); if (r!=0) return errno; //printf("%s:%d recovery successful! ls -l says\n", __FILE__, __LINE__); //system("ls -l"); return 0; fail: toku_os_unlock_file(lockfd); chdir(org_wd); return failresult; }