2007-11-29 14:18:54 +00:00
/* -*- mode: C; c-basic-offset: 4 -*- */
2008-01-24 15:10:32 +00:00
# ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
2007-11-29 14:18:54 +00:00
2007-11-23 18:27:50 +00:00
/* Recover an env. The logs are in argv[1]. The new database is created in the cwd. */
// Test:
// cd ../src/tests/tmpdir
// ../../../newbrt/recover ../dir.test_log2.c.tdb
2007-11-23 20:36:03 +00:00
# include "cachetable.h"
# include "key.h"
2008-02-08 03:17:38 +00:00
# include "log-internal.h"
# include "log_header.h"
# include "toku_assert.h"
2008-04-02 23:40:36 +00:00
# include "kv-pair.h"
# include "gpma-internal.h"
2008-03-12 19:40:38 +00:00
2008-02-08 03:17:38 +00:00
# include <fcntl.h>
# include <stdlib.h>
2008-03-12 19:40:38 +00:00
# include <sys/file.h>
# include <sys/stat.h>
2008-02-08 03:17:38 +00:00
# include <unistd.h>
2007-11-23 20:36:03 +00:00
2008-04-03 22:27:32 +00:00
# define DO_VERIFY_COUNTS
2008-03-14 19:14:31 +00:00
# ifdef DO_VERIFY_COUNTS
# define VERIFY_COUNTS(n) toku_verify_counts(n)
# else
# define VERIFY_COUNTS(n) ((void)0)
# endif
static DB * const null_db = 0 ;
// These data structures really should be part of a recovery data structure. Recovery could be multithreaded (on different environments...) But this is OK since recovery can only happen in one
static CACHETABLE ct ;
static struct cf_pair {
FILENUM filenum ;
CACHEFILE cf ;
BRT brt ; // set to zero on an fopen, but filled in when an fheader is seen.
} * cf_pairs ;
static int n_cf_pairs = 0 , max_cf_pairs = 0 ; ;
int toku_recover_init ( void ) {
int r = toku_create_cachetable ( & ct , 1 < < 25 , ( LSN ) { 0 } , 0 ) ;
return r ;
}
void toku_recover_cleanup ( void ) {
int i ;
for ( i = 0 ; i < n_cf_pairs ; i + + ) {
if ( cf_pairs [ i ] . brt ) {
int r = toku_close_brt ( cf_pairs [ i ] . brt ) ;
//r = toku_cachefile_close(&cf_pairs[i].cf);
assert ( r = = 0 ) ;
}
}
toku_free ( cf_pairs ) ;
{
int r = toku_cachetable_close ( & ct ) ;
assert ( r = = 0 ) ;
}
}
void toku_recover_commit ( LSN UU ( lsn ) , TXNID UU ( txnid ) ) {
}
void toku_recover_fcreate ( LSN UU ( lsn ) , TXNID UU ( txnid ) , BYTESTRING fname , u_int32_t mode ) {
char * fixed_fname = fixup_fname ( & fname ) ;
int fd = creat ( fixed_fname , mode ) ;
assert ( fd > = 0 ) ;
toku_free ( fixed_fname ) ;
toku_free_BYTESTRING ( fname ) ;
}
int toku_recover_note_cachefile ( FILENUM fnum , CACHEFILE cf , BRT brt ) {
if ( max_cf_pairs = = 0 ) {
n_cf_pairs = 1 ;
max_cf_pairs = 2 ;
MALLOC_N ( max_cf_pairs , cf_pairs ) ;
if ( cf_pairs = = 0 ) return errno ;
} else {
if ( n_cf_pairs > = max_cf_pairs ) {
max_cf_pairs * = 2 ;
cf_pairs = toku_realloc ( cf_pairs , max_cf_pairs * sizeof ( * cf_pairs ) ) ;
}
n_cf_pairs + + ;
}
cf_pairs [ n_cf_pairs - 1 ] . filenum = fnum ;
cf_pairs [ n_cf_pairs - 1 ] . cf = cf ;
cf_pairs [ n_cf_pairs - 1 ] . brt = brt ;
return 0 ;
}
static int find_cachefile ( FILENUM fnum , struct cf_pair * * cf_pair ) {
int i ;
for ( i = 0 ; i < n_cf_pairs ; i + + ) {
if ( fnum . fileid = = cf_pairs [ i ] . filenum . fileid ) {
* cf_pair = cf_pairs + i ;
return 0 ;
}
}
return 1 ;
}
static void toku_recover_fheader ( LSN UU ( lsn ) , TXNID UU ( txnid ) , FILENUM filenum , LOGGEDBRTHEADER header ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
struct brt_header * MALLOC ( h ) ;
assert ( h ) ;
h - > dirty = 0 ;
h - > flags = header . flags ;
h - > nodesize = header . nodesize ;
h - > freelist = header . freelist ;
h - > unused_memory = header . unused_memory ;
h - > n_named_roots = header . n_named_roots ;
if ( ( signed ) header . n_named_roots = = - 1 ) {
h - > unnamed_root = header . u . one . root ;
} else {
assert ( 0 ) ;
}
toku_cachetable_put ( pair - > cf , 0 , h , 0 , toku_brtheader_flush_callback , toku_brtheader_fetch_callback , 0 ) ;
if ( pair - > brt ) {
free ( pair - > brt - > h ) ;
} else {
MALLOC ( pair - > brt ) ;
pair - > brt - > cf = pair - > cf ;
pair - > brt - > database_name = 0 ; // Special case, we don't know or care what the database name is for recovery.
list_init ( & pair - > brt - > cursors ) ;
pair - > brt - > compare_fun = 0 ;
pair - > brt - > dup_compare = 0 ;
pair - > brt - > db = 0 ;
pair - > brt - > skey = pair - > brt - > sval = 0 ;
}
pair - > brt - > h = h ;
pair - > brt - > nodesize = h - > nodesize ;
pair - > brt - > flags = h - > nodesize ;
r = toku_unpin_brt_header ( pair - > brt ) ;
assert ( r = = 0 ) ;
}
void toku_recover_newbrtnode ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t height , u_int32_t nodesize , u_int8_t is_dup_sort , u_int32_t rand4fingerprint ) {
int r ;
struct cf_pair * pair = NULL ;
r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
TAGMALLOC ( BRTNODE , n ) ;
n - > nodesize = nodesize ;
n - > thisnodename = diskoff ;
n - > log_lsn = n - > disk_lsn = lsn ;
//printf("%s:%d %p->disk_lsn=%"PRId64"\n", __FILE__, __LINE__, n, n->disk_lsn.lsn);
2008-04-02 23:40:36 +00:00
n - > layout_version = 3 ;
2008-03-14 19:14:31 +00:00
n - > height = height ;
n - > rand4fingerprint = rand4fingerprint ;
n - > flags = is_dup_sort ? TOKU_DB_DUPSORT : 0 ; // Don't have TOKU_DB_DUP ???
n - > local_fingerprint = 0 ; // nothing there yet
n - > dirty = 1 ;
if ( height = = 0 ) {
2008-04-02 23:40:36 +00:00
r = toku_gpma_create ( & n - > u . l . buffer , 0 ) ;
2008-03-14 19:14:31 +00:00
assert ( r = = 0 ) ;
n - > u . l . n_bytes_in_buffer = 0 ;
2008-04-02 23:40:36 +00:00
{
2008-04-03 22:27:32 +00:00
u_int32_t mpsize = n - > nodesize + n - > nodesize / 4 ;
void * mp = toku_malloc ( mpsize ) ;
2008-04-02 23:40:36 +00:00
assert ( mp ) ;
2008-04-03 22:27:32 +00:00
toku_mempool_init ( & n - > u . l . buffer_mempool , mp , mpsize ) ;
2008-04-02 23:40:36 +00:00
}
2008-03-14 19:14:31 +00:00
} else {
n - > u . n . n_children = 0 ;
n - > u . n . totalchildkeylens = 0 ;
n - > u . n . n_bytes_in_buffers = 0 ;
MALLOC_N ( 3 , n - > u . n . childinfos ) ;
MALLOC_N ( 2 , n - > u . n . childkeys ) ;
}
// Now put it in the cachetable
toku_cachetable_put ( pair - > cf , diskoff , n , toku_serialize_brtnode_size ( n ) , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , 0 ) ;
VERIFY_COUNTS ( n ) ;
n - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( n ) ) ;
assert ( r = = 0 ) ;
}
static void recover_setup_node ( FILENUM filenum , DISKOFF diskoff , CACHEFILE * cf , BRTNODE * resultnode ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
assert ( pair - > brt ) ;
void * node_v ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
* resultnode = node ;
* cf = pair - > cf ;
}
2008-03-17 18:56:12 +00:00
void toku_recover_brtdeq ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , TXNID xid , u_int32_t typ , BYTESTRING key , BYTESTRING data , u_int32_t oldfingerprint , u_int32_t newfingerprint ) {
CACHEFILE cf ;
BRTNODE node ;
int r ;
recover_setup_node ( filenum , diskoff , & cf , & node ) ;
assert ( node - > height > 0 ) ;
2008-03-18 10:19:41 +00:00
//printf("deq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint);
2008-03-17 18:56:12 +00:00
assert ( node - > local_fingerprint = = oldfingerprint ) ;
bytevec actual_key , actual_data ;
ITEMLEN actual_keylen , actual_datalen ;
u_int32_t actual_type ;
TXNID actual_xid ;
2008-03-18 11:01:44 +00:00
assert ( childnum < ( u_int32_t ) node - > u . n . n_children ) ;
2008-03-18 10:19:41 +00:00
r = toku_fifo_peek ( BNC_BUFFER ( node , childnum ) , & actual_key , & actual_keylen , & actual_data , & actual_datalen , & actual_type , & actual_xid ) ;
2008-03-17 18:56:12 +00:00
assert ( r = = 0 ) ;
assert ( actual_keylen = = ( ITEMLEN ) key . len ) ;
assert ( memcmp ( actual_key , key . data , actual_keylen ) = = 0 ) ;
assert ( actual_datalen = data . len ) ;
assert ( memcmp ( actual_data , data . data , actual_datalen ) = = 0 ) ;
assert ( actual_type = = typ ) ;
assert ( actual_xid = = xid ) ;
2008-03-18 10:19:41 +00:00
u_int32_t sizediff = key . len + data . len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ;
2008-03-17 18:56:12 +00:00
node - > local_fingerprint = newfingerprint ;
node - > log_lsn = lsn ;
2008-03-18 10:19:41 +00:00
node - > u . n . n_bytes_in_buffers - = sizediff ;
BNC_NBYTESINBUF ( node , childnum ) - = sizediff ;
r = toku_fifo_deq ( BNC_BUFFER ( node , childnum ) ) ; // don't deq till were' done looking at the data.
2008-03-17 18:56:12 +00:00
r = toku_cachetable_unpin ( cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
toku_free ( key . data ) ;
toku_free ( data . data ) ;
}
2008-03-14 19:14:31 +00:00
2008-03-17 18:56:12 +00:00
void toku_recover_brtenq ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , TXNID xid , u_int32_t typ , BYTESTRING key , BYTESTRING data , u_int32_t oldfingerprint , u_int32_t newfingerprint ) {
CACHEFILE cf ;
BRTNODE node ;
int r ;
recover_setup_node ( filenum , diskoff , & cf , & node ) ;
assert ( node - > height > 0 ) ;
2008-03-18 10:19:41 +00:00
//printf("enq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint);
2008-03-17 18:56:12 +00:00
assert ( node - > local_fingerprint = = oldfingerprint ) ;
r = toku_fifo_enq ( BNC_BUFFER ( node , childnum ) , key . data , key . len , data . data , data . len , typ , xid ) ;
assert ( r = = 0 ) ;
node - > local_fingerprint = newfingerprint ;
node - > log_lsn = lsn ;
2008-03-18 10:19:41 +00:00
u_int32_t sizediff = key . len + data . len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ;
2008-03-17 18:56:12 +00:00
r = toku_cachetable_unpin ( cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
2008-03-18 10:19:41 +00:00
node - > u . n . n_bytes_in_buffers + = sizediff ;
BNC_NBYTESINBUF ( node , childnum ) + = sizediff ;
2008-03-17 18:56:12 +00:00
toku_free ( key . data ) ;
toku_free ( data . data ) ;
}
2008-03-14 19:14:31 +00:00
void toku_recover_addchild ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , DISKOFF child , u_int32_t childfingerprint ) {
CACHEFILE cf ;
BRTNODE node ;
recover_setup_node ( filenum , diskoff , & cf , & node ) ;
assert ( node - > height > 0 ) ;
assert ( childnum < = ( unsigned ) node - > u . n . n_children ) ;
unsigned int i ;
REALLOC_N ( node - > u . n . n_children + 1 , node - > u . n . childinfos ) ;
REALLOC_N ( node - > u . n . n_children , node - > u . n . childkeys ) ;
for ( i = node - > u . n . n_children ; i > childnum ; i - - ) {
node - > u . n . childinfos [ i ] = node - > u . n . childinfos [ i - 1 ] ;
BNC_NBYTESINBUF ( node , i ) = BNC_NBYTESINBUF ( node , i - 1 ) ;
assert ( i > = 2 ) ;
node - > u . n . childkeys [ i - 1 ] = node - > u . n . childkeys [ i - 2 ] ;
}
if ( childnum > 0 ) {
node - > u . n . childkeys [ childnum - 1 ] = 0 ;
}
BNC_DISKOFF ( node , childnum ) = child ;
BNC_SUBTREE_FINGERPRINT ( node , childnum ) = childfingerprint ;
int r = toku_fifo_create ( & BNC_BUFFER ( node , childnum ) ) ; assert ( r = = 0 ) ;
BNC_NBYTESINBUF ( node , childnum ) = 0 ;
node - > u . n . n_children + + ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
}
void toku_recover_delchild ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , DISKOFF child , u_int32_t childfingerprint , BYTESTRING pivotkey ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height > 0 ) ;
assert ( childnum < ( unsigned ) node - > u . n . n_children ) ;
assert ( node - > u . n . childinfos [ childnum ] . subtree_fingerprint = = childfingerprint ) ;
assert ( BNC_DISKOFF ( node , childnum ) = = child ) ;
assert ( toku_fifo_n_entries ( BNC_BUFFER ( node , childnum ) ) = = 0 ) ;
assert ( BNC_NBYTESINBUF ( node , childnum ) = = 0 ) ;
assert ( node - > u . n . n_children > 2 ) ; // Must be at least two children.
u_int32_t i ;
assert ( childnum > 0 ) ;
node - > u . n . totalchildkeylens - = toku_brt_pivot_key_len ( pair - > brt , node - > u . n . childkeys [ childnum - 1 ] ) ;
toku_free ( ( void * ) node - > u . n . childkeys [ childnum - 1 ] ) ;
toku_fifo_free ( & BNC_BUFFER ( node , childnum ) ) ;
for ( i = childnum + 1 ; i < ( unsigned ) node - > u . n . n_children ; i + + ) {
node - > u . n . childinfos [ i - 1 ] = node - > u . n . childinfos [ i ] ;
BNC_NBYTESINBUF ( node , i - 1 ) = BNC_NBYTESINBUF ( node , i ) ;
node - > u . n . childkeys [ i - 2 ] = node - > u . n . childkeys [ i - 1 ] ;
}
node - > u . n . n_children - - ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
toku_free ( pivotkey . data ) ;
}
void toku_recover_setchild ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , DISKOFF UU ( oldchild ) , DISKOFF newchild ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height > 0 ) ;
assert ( childnum < ( unsigned ) node - > u . n . n_children ) ;
BNC_DISKOFF ( node , childnum ) = newchild ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
}
void toku_recover_setpivot ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , BYTESTRING pivotkey ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height > 0 ) ;
struct kv_pair * new_pivot = kv_pair_malloc ( pivotkey . data , pivotkey . len , 0 , 0 ) ;
node - > u . n . childkeys [ childnum ] = new_pivot ;
node - > u . n . totalchildkeylens + = toku_brt_pivot_key_len ( pair - > brt , node - > u . n . childkeys [ childnum ] ) ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
toku_free ( pivotkey . data ) ;
}
void toku_recover_changechildfingerprint ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t childnum , u_int32_t UU ( oldfingerprint ) , u_int32_t newfingerprint ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height > 0 ) ;
2008-03-17 14:24:01 +00:00
assert ( ( signed ) childnum < = node - > u . n . n_children ) ; // we allow the childnum to be one too large.
2008-03-14 19:14:31 +00:00
BNC_SUBTREE_FINGERPRINT ( node , childnum ) = newfingerprint ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
}
void toku_recover_fopen ( LSN UU ( lsn ) , TXNID UU ( txnid ) , BYTESTRING fname , FILENUM filenum ) {
char * fixedfname = fixup_fname ( & fname ) ;
CACHEFILE cf ;
int fd = open ( fixedfname , O_RDWR , 0 ) ;
assert ( fd > = 0 ) ;
BRT MALLOC ( brt ) ;
assert ( errno = = 0 & & brt ! = 0 ) ;
brt - > database_name = fixedfname ;
brt - > h = 0 ;
list_init ( & brt - > cursors ) ;
brt - > compare_fun = 0 ;
brt - > dup_compare = 0 ;
brt - > db = 0 ;
int r = toku_cachetable_openfd ( & cf , ct , fd , brt ) ;
assert ( r = = 0 ) ;
brt - > skey = brt - > sval = 0 ;
brt - > cf = cf ;
toku_recover_note_cachefile ( filenum , cf , brt ) ;
toku_free_BYTESTRING ( fname ) ;
}
void toku_recover_insertinleaf ( LSN lsn , TXNID UU ( txnid ) , FILENUM filenum , DISKOFF diskoff , u_int32_t pmaidx , BYTESTRING keybs , BYTESTRING databs ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height = = 0 ) ;
VERIFY_COUNTS ( node ) ;
2008-04-02 23:40:36 +00:00
struct kv_pair * kvp = brtnode_malloc_kv_pair ( node - > u . l . buffer , & node - > u . l . buffer_mempool , keybs . data , keybs . len , databs . data , databs . len ) ;
assert ( pair ) ;
toku_gpma_set_at_index ( node - > u . l . buffer , pmaidx , kv_pair_size ( kvp ) , kvp ) ;
2008-03-14 19:14:31 +00:00
node - > local_fingerprint + = node - > rand4fingerprint * toku_calccrc32_kvpair ( keybs . data , keybs . len , databs . data , databs . len ) ;
2008-04-02 23:40:36 +00:00
// printf("%s:%d local_fingerprint=%08x (this=%08x)\n", __FILE__, __LINE__, node->local_fingerprint, toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len));
2008-03-14 19:14:31 +00:00
node - > u . l . n_bytes_in_buffer + = PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + keybs . len + databs . len ;
2008-03-17 02:40:59 +00:00
// PMA_ITERATE_IDX(node->u.l.buffer, idx, skey, keylen __attribute__((__unused__)), sdata, datalen __attribute__((__unused__)),
// printf("%d: %s %s\n", idx, (char*)skey, (char*)sdata));
2008-03-14 19:14:31 +00:00
VERIFY_COUNTS ( node ) ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
toku_free_BYTESTRING ( keybs ) ;
toku_free_BYTESTRING ( databs ) ;
}
void toku_recover_deleteinleaf ( LSN lsn , TXNID UU ( txnid ) , FILENUM filenum , DISKOFF diskoff , u_int32_t pmaidx , BYTESTRING keybs , BYTESTRING databs ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height = = 0 ) ;
VERIFY_COUNTS ( node ) ;
2008-04-02 23:40:36 +00:00
{
u_int32_t len ;
void * data ;
r = toku_gpma_get_from_index ( node - > u . l . buffer , pmaidx , & len , & data ) ;
if ( r = = 0 ) {
toku_mempool_mfree ( & node - > u . l . buffer_mempool , data , len ) ;
}
}
toku_gpma_clear_at_index ( node - > u . l . buffer , pmaidx ) ;
2008-03-14 19:14:31 +00:00
node - > local_fingerprint - = node - > rand4fingerprint * toku_calccrc32_kvpair ( keybs . data , keybs . len , databs . data , databs . len ) ;
node - > u . l . n_bytes_in_buffer - = PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + keybs . len + databs . len ;
VERIFY_COUNTS ( node ) ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
toku_free_BYTESTRING ( keybs ) ;
toku_free_BYTESTRING ( databs ) ;
}
// a newbrtnode should have been done before this
2008-04-02 23:40:36 +00:00
void toku_recover_resizepma ( LSN lsn , FILENUM filenum , DISKOFF diskoff , u_int32_t oldsize __attribute__ ( ( __unused__ ) ) , u_int32_t newsize ) {
2008-03-14 19:14:31 +00:00
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_v ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , diskoff , & node_v , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE node = node_v ;
assert ( node - > height = = 0 ) ;
2008-04-02 23:40:36 +00:00
r = toku_resize_gpma_exactly ( node - > u . l . buffer , newsize ) ;
2008-03-14 19:14:31 +00:00
assert ( r = = 0 ) ;
VERIFY_COUNTS ( node ) ;
node - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , diskoff , 1 , toku_serialize_brtnode_size ( node ) ) ;
assert ( r = = 0 ) ;
}
2008-04-03 22:27:32 +00:00
int move_indices ( GPMA from , struct mempool * from_mempool ,
GPMA to , struct mempool * to_mempool ,
INTPAIRARRAY fromto ,
2008-04-02 23:40:36 +00:00
u_int32_t a_rand , u_int32_t * a_fp ,
u_int32_t b_rand , u_int32_t * b_fp ,
2008-04-03 22:27:32 +00:00
u_int32_t * a_nbytes , u_int32_t * b_nbytes ,
u_int32_t new_N ) {
toku_verify_gpma ( from ) ;
toku_verify_gpma ( to ) ;
2008-04-02 23:40:36 +00:00
struct gitem * MALLOC_N ( fromto . size , items ) ;
if ( items = = 0 ) return errno ;
u_int32_t i ;
u_int32_t fp = 0 ;
u_int32_t sizediff = 0 ;
for ( i = 0 ; i < fromto . size ; i + + ) {
int idx = fromto . array [ i ] . a ;
struct gitem item = from - > items [ idx ] ;
items [ i ] = item ;
from - > items [ idx ] . data = 0 ;
2008-04-03 23:00:59 +00:00
fp + = toku_calccrc32_kvpair_struct ( item . data ) ;
2008-04-02 23:40:36 +00:00
sizediff + = PMA_ITEM_OVERHEAD + item . len ;
2008-04-03 22:27:32 +00:00
assert ( kv_pair_size ( item . data ) = = item . len ) ;
2008-04-02 23:40:36 +00:00
}
2008-04-03 22:27:32 +00:00
from - > n_items_present - = fromto . size ;
if ( new_N ! = toku_gpma_index_limit ( to ) ) {
int r = toku_resize_gpma_exactly ( to , new_N ) ;
assert ( r = = 0 ) ;
}
2008-04-02 23:40:36 +00:00
for ( i = 0 ; i < fromto . size ; i + + ) {
2008-04-03 22:27:32 +00:00
int to_idx = fromto . array [ i ] . b ;
assert ( to - > items [ to_idx ] . data = = 0 ) ;
if ( from = = to ) {
to - > items [ to_idx ] = items [ i ] ;
} else {
2008-04-04 18:03:03 +00:00
void * new_data = mempool_malloc_from_gpma ( to , to_mempool , items [ i ] . len ) ;
2008-04-03 22:27:32 +00:00
memcpy ( new_data , items [ i ] . data , items [ i ] . len ) ;
to - > items [ to_idx ] = ( struct gitem ) { items [ i ] . len , new_data } ;
toku_mempool_mfree ( from_mempool , items [ i ] . data , items [ i ] . len ) ;
}
assert ( kv_pair_size ( to - > items [ to_idx ] . data ) = = to - > items [ to_idx ] . len ) ;
2008-04-02 23:40:36 +00:00
}
2008-04-03 22:27:32 +00:00
to - > n_items_present + = fromto . size ;
2008-04-02 23:40:36 +00:00
* a_fp - = a_rand * fp ;
* b_fp + = b_rand * fp ;
* a_nbytes - = sizediff ;
* b_nbytes + = sizediff ;
toku_free ( items ) ;
2008-04-03 22:27:32 +00:00
toku_verify_gpma ( from ) ;
toku_verify_gpma ( to ) ;
2008-04-02 23:40:36 +00:00
return 0 ;
}
void toku_recover_pmadistribute ( LSN lsn , FILENUM filenum , DISKOFF old_diskoff , DISKOFF new_diskoff , INTPAIRARRAY fromto , u_int32_t old_N __attribute__ ( ( __unused__ ) ) , u_int32_t new_N ) {
2008-03-14 19:14:31 +00:00
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
void * node_va , * node_vb ;
assert ( pair - > brt ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , old_diskoff , & node_va , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
r = toku_cachetable_get_and_pin ( pair - > cf , new_diskoff , & node_vb , NULL , toku_brtnode_flush_callback , toku_brtnode_fetch_callback , pair - > brt ) ;
assert ( r = = 0 ) ;
BRTNODE nodea = node_va ; assert ( nodea - > height = = 0 ) ;
BRTNODE nodeb = node_vb ; assert ( nodeb - > height = = 0 ) ;
{
unsigned int i ;
2008-03-17 02:40:59 +00:00
//printf("{");
2008-03-14 19:14:31 +00:00
for ( i = 0 ; i < fromto . size ; i + + ) {
2008-03-17 02:40:59 +00:00
//printf(" {%d %d}", fromto.array[i].a, fromto.array[i].b);
2008-04-02 23:40:36 +00:00
assert ( fromto . array [ i ] . a < toku_gpma_index_limit ( nodea - > u . l . buffer ) ) ;
2008-04-03 22:27:32 +00:00
assert ( fromto . array [ i ] . b < new_N ) ;
2008-03-14 19:14:31 +00:00
}
2008-03-17 02:40:59 +00:00
//printf("}\n");
2008-03-14 19:14:31 +00:00
}
2008-04-03 22:27:32 +00:00
VERIFY_COUNTS ( nodea ) ;
r = move_indices ( nodea - > u . l . buffer , & nodea - > u . l . buffer_mempool ,
nodeb - > u . l . buffer , & nodeb - > u . l . buffer_mempool ,
fromto ,
2008-04-02 23:40:36 +00:00
nodea - > rand4fingerprint , & nodea - > local_fingerprint ,
nodeb - > rand4fingerprint , & nodeb - > local_fingerprint ,
2008-04-03 22:27:32 +00:00
& nodea - > u . l . n_bytes_in_buffer , & nodeb - > u . l . n_bytes_in_buffer ,
new_N
2008-04-02 23:40:36 +00:00
) ;
2008-03-14 19:14:31 +00:00
// The bytes in buffer and fingerprint shouldn't change
2008-03-17 02:40:59 +00:00
// PMA_ITERATE_IDX(nodeb->u.l.buffer, idx, key, keylen __attribute__((__unused__)), data, datalen __attribute__((__unused__)),
// printf("%d: %s %s\n", idx, (char*)key, (char*)data));
2008-03-14 19:14:31 +00:00
VERIFY_COUNTS ( nodea ) ;
VERIFY_COUNTS ( nodeb ) ;
nodea - > log_lsn = lsn ;
nodeb - > log_lsn = lsn ;
r = toku_cachetable_unpin ( pair - > cf , old_diskoff , 1 , toku_serialize_brtnode_size ( nodea ) ) ;
assert ( r = = 0 ) ;
r = toku_cachetable_unpin ( pair - > cf , new_diskoff , 1 , toku_serialize_brtnode_size ( nodeb ) ) ;
assert ( r = = 0 ) ;
toku_free_INTPAIRARRAY ( fromto ) ;
}
void toku_recover_changeunnamedroot ( LSN UU ( lsn ) , FILENUM filenum , DISKOFF UU ( oldroot ) , DISKOFF newroot ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
assert ( pair - > brt ) ;
r = toku_read_and_pin_brt_header ( pair - > cf , & pair - > brt - > h ) ;
assert ( r = = 0 ) ;
pair - > brt - > h - > unnamed_root = newroot ;
r = toku_unpin_brt_header ( pair - > brt ) ;
}
void toku_recover_changenamedroot ( LSN UU ( lsn ) , FILENUM UU ( filenum ) , BYTESTRING UU ( name ) , DISKOFF UU ( oldroot ) , DISKOFF UU ( newroot ) ) { assert ( 0 ) ; }
void toku_recover_changeunusedmemory ( LSN UU ( lsn ) , FILENUM filenum , DISKOFF UU ( oldunused ) , DISKOFF newunused ) {
struct cf_pair * pair = NULL ;
int r = find_cachefile ( filenum , & pair ) ;
assert ( r = = 0 ) ;
assert ( pair - > brt ) ;
r = toku_read_and_pin_brt_header ( pair - > cf , & pair - > brt - > h ) ;
assert ( r = = 0 ) ;
pair - > brt - > h - > unused_memory = newunused ;
r = toku_unpin_brt_header ( pair - > brt ) ;
}
2008-03-21 19:40:32 +00:00
static int toku_recover_checkpoint ( LSN UU ( lsn ) ) {
return 0 ;
}
2008-03-14 19:14:31 +00:00
2008-03-21 21:02:30 +00:00
static int toku_recover_xbegin ( LSN UU ( lsn ) , TXNID UU ( parent ) ) {
return 0 ;
}
2008-03-12 19:40:38 +00:00
int tokudb_recover ( const char * data_dir , const char * log_dir ) {
2007-11-23 18:27:50 +00:00
int r ;
2007-12-26 16:52:55 +00:00
int entrycount = 0 ;
2007-11-23 18:27:50 +00:00
char * * logfiles ;
2008-03-12 19:40:38 +00:00
int lockfd ;
{
int namelen = strlen ( data_dir ) ;
char lockfname [ namelen + 20 ] ;
snprintf ( lockfname , sizeof ( lockfname ) , " %s/__recoverylock_dont_delete_me " , data_dir ) ;
lockfd = open ( lockfname , O_RDWR | O_CREAT , S_IRUSR | S_IWUSR ) ;
if ( lockfd < 0 ) {
printf ( " Couldn't open %s \n " , lockfname ) ;
2008-03-14 11:02:28 +00:00
return errno ;
2008-03-12 19:40:38 +00:00
}
r = flock ( lockfd , LOCK_EX | LOCK_NB ) ;
if ( r ! = 0 ) {
printf ( " Couldn't run recovery because some other process holds the recovery lock %s \n " , lockfname ) ;
2008-03-14 11:02:28 +00:00
return errno ;
2008-03-12 19:40:38 +00:00
}
}
2008-03-21 19:40:32 +00:00
r = toku_logger_find_logfiles ( log_dir , & logfiles ) ;
2008-03-14 11:02:28 +00:00
if ( r ! = 0 ) return r ;
2007-11-23 18:27:50 +00:00
int i ;
2008-01-11 03:09:14 +00:00
toku_recover_init ( ) ;
2008-03-12 19:40:38 +00:00
char org_wd [ 1000 ] ;
{
char * wd = getcwd ( org_wd , sizeof ( org_wd ) ) ;
assert ( wd ! = 0 ) ;
//printf("%s:%d org_wd=\"%s\"\n", __FILE__, __LINE__, org_wd);
}
char data_wd [ 1000 ] ;
{
r = chdir ( data_dir ) ; assert ( r = = 0 ) ;
char * wd = getcwd ( data_wd , sizeof ( data_wd ) ) ;
assert ( wd ! = 0 ) ;
//printf("%s:%d data_wd=\"%s\"\n", __FILE__, __LINE__, data_wd);
}
2008-03-21 19:40:32 +00:00
for ( i = 0 ; logfiles [ i ] ; i + + ) {
2007-12-22 20:56:20 +00:00
//fprintf(stderr, "Opening %s\n", logfiles[i]);
2008-03-12 19:40:38 +00:00
r = chdir ( org_wd ) ;
assert ( r = = 0 ) ;
2007-11-23 18:27:50 +00:00
FILE * f = fopen ( logfiles [ i ] , " r " ) ;
struct log_entry le ;
u_int32_t version ;
2008-03-17 02:40:59 +00:00
//printf("Reading file %s\n", logfiles[i]);
2007-11-28 19:09:24 +00:00
r = toku_read_and_print_logmagic ( f , & version ) ;
2007-11-23 18:27:50 +00:00
assert ( r = = 0 & & version = = 0 ) ;
2008-03-12 19:40:38 +00:00
r = chdir ( data_wd ) ;
assert ( r = = 0 ) ;
2007-11-29 18:14:40 +00:00
while ( ( r = toku_log_fread ( f , & le ) ) = = 0 ) {
2008-03-17 02:40:59 +00:00
//printf("%lld: Got cmd %c\n", (long long)le.u.commit.lsn.lsn, le.cmd);
2008-02-08 22:16:02 +00:00
logtype_dispatch_args ( & le , toku_recover_ ) ;
2007-12-29 19:27:01 +00:00
entrycount + + ;
2007-11-23 18:27:50 +00:00
}
if ( r ! = EOF ) {
if ( r = = DB_BADFORMAT ) {
2008-01-25 21:50:07 +00:00
fprintf ( stderr , " Bad log format at record %d \n " , entrycount ) ;
2008-03-14 11:02:28 +00:00
return r ;
2007-11-23 18:27:50 +00:00
} else {
fprintf ( stderr , " Huh? %s \n " , strerror ( r ) ) ;
2008-03-14 11:02:28 +00:00
return r ;
2007-11-23 18:27:50 +00:00
}
}
fclose ( f ) ;
}
2008-01-11 03:09:14 +00:00
toku_recover_cleanup ( ) ;
2008-03-21 19:40:32 +00:00
for ( i = 0 ; logfiles [ i ] ; i + + ) {
2007-11-23 18:27:50 +00:00
toku_free ( logfiles [ i ] ) ;
}
toku_free ( logfiles ) ;
2008-03-12 19:40:38 +00:00
r = flock ( lockfd , LOCK_UN ) ;
2008-03-14 11:02:28 +00:00
if ( r ! = 0 ) return errno ;
r = chdir ( org_wd ) ;
if ( r ! = 0 ) return errno ;
2008-03-12 19:40:38 +00:00
//printf("%s:%d recovery successful! ls -l says\n", __FILE__, __LINE__);
//system("ls -l");
2007-11-23 18:27:50 +00:00
return 0 ;
}