[t:2561] Merge tokudb.2561b to main. Disabled auto-upgrade and auto-upgrade tests

git-svn-id: file:///svn/toku/tokudb@20778 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Yoni Fogel 2013-04-16 23:59:17 -04:00
parent 5b254d5608
commit c9b2b066dd
84 changed files with 3698 additions and 1693 deletions

View file

@ -336,9 +336,10 @@ struct __toku_dbt {
u_int32_t flags; /* 32-bit offset=20 size=4, 64=bit offset=24 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -364,8 +365,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -346,9 +346,10 @@ struct __toku_dbt {
u_int32_t flags; /* 32-bit offset=20 size=4, 64=bit offset=24 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -374,8 +375,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -350,9 +350,10 @@ struct __toku_dbt {
u_int32_t flags; /* 32-bit offset=20 size=4, 64=bit offset=24 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -378,8 +379,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -350,9 +350,10 @@ struct __toku_dbt {
u_int32_t flags; /* 32-bit offset=24 size=4, 64=bit offset=32 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -378,8 +379,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -353,9 +353,10 @@ struct __toku_dbt {
u_int32_t flags; /* 32-bit offset=24 size=4, 64=bit offset=32 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -382,8 +383,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -572,9 +572,11 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
assert(sizeof(dbt_fields32)==sizeof(dbt_fields64));
print_struct("dbt", 0, dbt_fields32, dbt_fields64, sizeof(dbt_fields32)/sizeof(dbt_fields32[0]), 0);
printf("typedef int (*toku_dbt_upgradef)(DB*,\n");
printf(" u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,\n");
printf(" u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);\n");
//descriptor
printf("typedef struct __toku_descriptor {\n");
printf(" u_int32_t version;\n");
printf(" DBT dbt;\n");
printf("} *DESCRIPTOR, DESCRIPTOR_S;\n");
assert(sizeof(db_fields32)==sizeof(db_fields64));
{
@ -601,8 +603,8 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
"const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/",
"int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */",
"int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */",
"const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */",
"int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */",
"DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */",
"int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */",
"int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
"int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */",
"int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */",

View file

@ -323,9 +323,10 @@ struct __toku_dbt {
u_int32_t ulen;
u_int32_t flags;
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -351,8 +352,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -323,9 +323,10 @@ struct __toku_dbt {
u_int32_t ulen;
u_int32_t flags;
};
typedef int (*toku_dbt_upgradef)(DB*,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val);
typedef struct __toku_descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
@ -351,8 +352,8 @@ struct __toku_db {
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
int (*delboth) (DB*, DB_TXN*, DBT*, DBT*, u_int32_t) /* Delete the key/value pair. */;
int (*row_size_supported) (DB*, u_int32_t) /* Test whether a row size is supported. */;
const DBT *descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) /* set row/dictionary descriptor for a db. Available only while db is open */;
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*set_descriptor) (DB*, u_int32_t version, const DBT* descriptor) /* set row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*getf_get_both)(DB*, DB_TXN*, u_int32_t, DBT*, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_get_both without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;

View file

@ -372,6 +372,25 @@ toku_fsync_dirfd_without_accounting(DIR *dirp) {
return r;
}
int
toku_fsync_dir_by_name_without_accounting(const char *dir_name) {
int r = 0;
DIR * dir = opendir(dir_name);
if (!dir) {
r = errno;
assert(r);
}
else {
r = toku_fsync_dirfd_without_accounting(dir);
int rc = closedir(dir);
if (r==0 && rc!=0) {
r = errno;
assert(r);
}
}
return r;
}
// include fsync in scheduling accounting
int
toku_file_fsync(int fd) {
@ -421,16 +440,7 @@ toku_fsync_directory(const char *fname) {
}
if (result == 0) {
// fsync the dir
DIR *d = opendir(dirname);
if (d == NULL) {
result = errno;
} else {
result = toku_fsync_dirfd_without_accounting(d);
int r = closedir(d);
if (result == 0 && r != 0)
result = errno;
}
result = toku_fsync_dir_by_name_without_accounting(dirname);
}
toku_free(dirname);
return result;

View file

@ -61,6 +61,7 @@ BRT_SOURCES = \
logfilemgr \
logger \
log_code \
log_upgrade \
log_print \
logcursor \
memarena \
@ -94,6 +95,8 @@ BRT_O_FILES = $(patsubst %,%.$(OEXT),$(BRT_SOURCES))
newbrt.$(OEXT): $(BRT_C_FILES) $(DEPEND_COMPILE)
$(CC) -c $(BRT_C_FILES) $(COMBINE_C) $(CPPFLAGS) $(CFLAGS) $(OOUTPUT)$@
brt-serialize.$(OEXT): $(wildcard backwards_*.c)
ifneq ($(CYGWIN),)
NEWBRT_O_FILES = $(BRT_O_FILES)
else ifeq ($(CC),icc)

File diff suppressed because it is too large Load diff

View file

@ -1,33 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#ifndef BACKWARD_10_H
#define BACKWARD_10_H
int le10_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result);
int le10_both (TXNID xid, u_int32_t cklen, void* ckval, u_int32_t cdlen, void* cdval, u_int32_t pdlen, void* pdval,
u_int32_t *memsize, u_int32_t *disksize, LEAFENTRY *result);
int le10_provdel (TXNID xid, u_int32_t klen, void* kval, u_int32_t dlen, void* dval,
u_int32_t *resultsize, u_int32_t *memsize, LEAFENTRY *result);
int le10_provpair (TXNID xid, u_int32_t klen, void* kval, u_int32_t plen, void* pval, u_int32_t *memsize, u_int32_t *disksize, LEAFENTRY *result);
enum le_state { LE_COMMITTED=1, // A committed pair.
LE_BOTH, // A committed pair and a provisional pair.
LE_PROVDEL, // A committed pair that has been provisionally deleted
LE_PROVPAIR }; // No committed value, but a provisional pair.
static inline enum le_state get_le_state(LEAFENTRY le) {
return (enum le_state)*(unsigned char *)le;
}
#include "ule.h"
//Exposed ule functions for the purpose of upgrading
void toku_upgrade_ule_init_empty_ule(ULE ule, u_int32_t keylen, void * keyp);
void toku_upgrade_ule_remove_innermost_uxr(ULE ule);
void toku_upgrade_ule_push_insert_uxr(ULE ule, TXNID xid, u_int32_t vallen, void * valp);
void toku_upgrade_ule_push_delete_uxr(ULE ule, TXNID xid);
//Exposed brt functions for the purpose of upgrading
void toku_calculate_leaf_stats(BRTNODE node);
#endif

771
newbrt/backwards_11.c Normal file
View file

@ -0,0 +1,771 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: brt-serialize.c 18555 2010-03-18 01:20:07Z yfogel $"
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "includes.h"
#define PRINT 0
static u_int32_t x1764_memory_11 (const void *buf, int len)
{
const u_int64_t *lbuf=buf;
u_int64_t c=0;
while (len>=8) {
c = c*17 + *lbuf;
if (PRINT) printf("%d: c=%016" PRIx64 " sum=%016" PRIx64 "\n", __LINE__, *lbuf, c);
lbuf++;
len-=8;
}
if (len>0) {
const u_int8_t *cbuf=(u_int8_t*)lbuf;
int i;
u_int64_t input=0;
for (i=0; i<len; i++) {
input |= ((u_int64_t)(cbuf[i]))<<(8*i);
}
c = c*17 + input;
}
return (c&0xFFFFFFFF) ^ (c>>32);
}
// Given a version 11 header, create a version 12 header.
// If new memory is needed for the new header, allocate it here and free the memory of the old version header.
static int
upgrade_brtheader_11_12(int fd, struct brt_header **brth_11, struct brt_header ** brth_12) {
int r = 0;
assert((*brth_11)->layout_version == BRT_LAYOUT_VERSION_11);
*brth_12 = *brth_11;
*brth_11 = NULL;
(*brth_12)->layout_version = BRT_LAYOUT_VERSION_12;
toku_list_init(&(*brth_12)->checkpoint_before_commit_link);
(void) toku_sync_fetch_and_increment_uint64(&upgrade_status.header);
{ //Re-write descriptor to fix checksum (does not get done automatically).
DISKOFF offset;
DESCRIPTOR d = &(*brth_12)->descriptor;
//4 for checksum
toku_realloc_descriptor_on_disk((*brth_12)->blocktable, toku_serialize_descriptor_size(d)+4, &offset, (*brth_12));
r = toku_serialize_descriptor_contents_to_fd(fd, d, offset);
}
return r;
}
// Structure of brtnode is same for versions 11, 12. The only difference is in the
// disk format and layout version.
static int
upgrade_brtnode_11_12 (BRTNODE *brtnode_11, BRTNODE *brtnode_12) {
*brtnode_12 = *brtnode_11;
*brtnode_11 = NULL;
BRTNODE brt = *brtnode_12;
brt->layout_version = BRT_LAYOUT_VERSION_12;
brt->dirty = 1;
if (brt->height) {
(void) toku_sync_fetch_and_increment_uint64(&upgrade_status.nonleaf);
}
else {
(void) toku_sync_fetch_and_increment_uint64(&upgrade_status.leaf);
}
//x1764 calculation (fingerprint) has changed between 11 and 12.
//Update all local fields based on x1764, verify several others.
toku_verify_or_set_counts(brt, TRUE);
return 0;
}
static u_int32_t
toku_serialize_descriptor_size_11(DESCRIPTOR desc) {
//Checksum NOT included in this. Checksum only exists in header's version.
u_int32_t size = 4+ //version
4; //size
size += desc->dbt.size;
return size;
}
static unsigned int toku_brtnode_pivot_key_len_11 (BRTNODE node, struct kv_pair *pk) {
if (node->flags & TOKU_DB_DUPSORT) {
return kv_pair_keylen(pk) + kv_pair_vallen(pk);
} else {
return kv_pair_keylen(pk);
}
}
enum { uncompressed_magic_len_11 = (8 // tokuleaf or tokunode
+4 // layout version
+4 // layout version original
)
};
// uncompressed header offsets
enum {
uncompressed_magic_offset_11 = 0,
uncompressed_version_offset_11 = 8,
};
// compression header sub block sizes
struct sub_block_sizes {
u_int32_t compressed_size; // real compressed size
u_int32_t uncompressed_size;
u_int32_t compressed_size_bound; // estimated compressed size
};
// target sub-block sizs and max number of sub-blocks per block.
static const int target_sub_block_size_11 = 512*1024;
static const int max_sub_blocks_11 = 8;
// round up n
static inline int roundup2(int n, int alignment) {
return (n+alignment-1)&~(alignment-1);
}
// get the size of the compression header
static size_t get_compression_header_size(int UU(layout_version), int n) {
return sizeof (u_int32_t) + (n * 2 * sizeof (u_int32_t));
}
// get the sum of the sub block uncompressed sizes
static size_t get_sum_uncompressed_size_11(int n, struct sub_block_sizes sizes[]) {
int i;
size_t uncompressed_size = 0;
for (i=0; i<n; i++)
uncompressed_size += sizes[i].uncompressed_size;
return uncompressed_size;
}
static inline void ignore_int (int UU(ignore_me)) {}
static void deserialize_descriptor_from_rbuf_11(struct rbuf *rb, DESCRIPTOR desc, BOOL temporary);
static int
deserialize_brtnode_nonleaf_from_rbuf_11 (BRTNODE result, bytevec magic, struct rbuf *rb) {
int r;
int i;
if (memcmp(magic, "tokunode", 8)!=0) {
r = toku_db_badformat();
return r;
}
result->u.n.totalchildkeylens=0;
u_int32_t subtree_fingerprint = rbuf_int(rb);
u_int32_t check_subtree_fingerprint = 0;
result->u.n.n_children = rbuf_int(rb);
MALLOC_N(result->u.n.n_children+1, result->u.n.childinfos);
MALLOC_N(result->u.n.n_children, result->u.n.childkeys);
//printf("n_children=%d\n", result->n_children);
assert(result->u.n.n_children>=0);
for (i=0; i<result->u.n.n_children; i++) {
u_int32_t childfp = rbuf_int(rb);
BNC_SUBTREE_FINGERPRINT(result, i)= childfp;
check_subtree_fingerprint += childfp;
struct subtree_estimates *se = &(BNC_SUBTREE_ESTIMATES(result, i));
se->nkeys = rbuf_ulonglong(rb);
se->ndata = rbuf_ulonglong(rb);
se->dsize = rbuf_ulonglong(rb);
se->exact = (BOOL) (rbuf_char(rb) != 0);
}
for (i=0; i<result->u.n.n_children-1; i++) {
if (result->flags & TOKU_DB_DUPSORT) {
bytevec keyptr, dataptr;
unsigned int keylen, datalen;
rbuf_bytes(rb, &keyptr, &keylen);
rbuf_bytes(rb, &dataptr, &datalen);
result->u.n.childkeys[i] = kv_pair_malloc(keyptr, keylen, dataptr, datalen);
} else {
bytevec childkeyptr;
unsigned int cklen;
rbuf_bytes(rb, &childkeyptr, &cklen); /* Returns a pointer into the rbuf. */
result->u.n.childkeys[i] = kv_pair_malloc((void*)childkeyptr, cklen, 0, 0);
}
//printf(" key %d length=%d data=%s\n", i, result->childkeylens[i], result->childkeys[i]);
result->u.n.totalchildkeylens+=toku_brtnode_pivot_key_len_11(result, result->u.n.childkeys[i]);
}
for (i=0; i<result->u.n.n_children; i++) {
BNC_BLOCKNUM(result,i) = rbuf_blocknum(rb);
BNC_HAVE_FULLHASH(result, i) = FALSE;
BNC_NBYTESINBUF(result,i) = 0;
//printf("Child %d at %lld\n", i, result->children[i]);
}
result->u.n.n_bytes_in_buffers = 0;
for (i=0; i<result->u.n.n_children; i++) {
r=toku_fifo_create(&BNC_BUFFER(result,i));
if (r!=0) {
int j;
if (0) { died_1: j=result->u.n.n_bytes_in_buffers; }
for (j=0; j<i; j++) toku_fifo_free(&BNC_BUFFER(result,j));
return toku_db_badformat();
}
}
{
int cnum;
u_int32_t check_local_fingerprint = 0;
for (cnum=0; cnum<result->u.n.n_children; cnum++) {
int n_in_this_hash = rbuf_int(rb);
//printf("%d in hash\n", n_in_hash);
for (i=0; i<n_in_this_hash; i++) {
int diff;
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
//toku_verify_counts_11(result);
int type = rbuf_char(rb);
XIDS xids;
xids_create_from_buffer(rb, &xids);
rbuf_bytes(rb, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(rb, &val, &vallen);
check_local_fingerprint += result->rand4fingerprint * toku_calc_fingerprint_cmd(type, xids, key, keylen, val, vallen);
//printf("Found %s,%s\n", (char*)key, (char*)val);
{
r=toku_fifo_enq(BNC_BUFFER(result, cnum), key, keylen, val, vallen, type, xids); /* Copies the data into the hash table. */
if (r!=0) { goto died_1; }
}
diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD + xids_get_serialize_size(xids);
result->u.n.n_bytes_in_buffers += diff;
BNC_NBYTESINBUF(result,cnum) += diff;
//printf("Inserted\n");
xids_destroy(&xids);
}
}
if (check_local_fingerprint != result->local_fingerprint) {
fprintf(stderr, "%s:%d local fingerprint is wrong (found %8x calcualted %8x\n", __FILE__, __LINE__, result->local_fingerprint, check_local_fingerprint);
return toku_db_badformat();
}
if (check_subtree_fingerprint+check_local_fingerprint != subtree_fingerprint) {
fprintf(stderr, "%s:%d subtree fingerprint is wrong\n", __FILE__, __LINE__);
return toku_db_badformat();
}
}
(void)rbuf_int(rb); //Ignore the crc (already verified).
if (rb->ndone != rb->size) { //Verify we read exactly the entire block.
r = toku_db_badformat(); goto died_1;
}
return 0;
}
static int
deserialize_brtnode_leaf_from_rbuf_11 (BRTNODE result, bytevec magic, struct rbuf *rb) {
int r;
int i;
if (memcmp(magic, "tokuleaf", 8)!=0) {
r = toku_db_badformat();
return r;
}
result->u.l.leaf_stats.nkeys = rbuf_ulonglong(rb);
result->u.l.leaf_stats.ndata = rbuf_ulonglong(rb);
result->u.l.leaf_stats.dsize = rbuf_ulonglong(rb);
result->u.l.leaf_stats.exact = TRUE;
int n_in_buf = rbuf_int(rb);
result->u.l.n_bytes_in_buffer = 0;
result->u.l.seqinsert = 0;
//printf("%s:%d r PMA= %p\n", __FILE__, __LINE__, result->u.l.buffer);
toku_mempool_init(&result->u.l.buffer_mempool, rb->buf, rb->size);
u_int32_t actual_sum = 0;
u_int32_t start_of_data = rb->ndone;
OMTVALUE *MALLOC_N(n_in_buf, array);
for (i=0; i<n_in_buf; i++) {
LEAFENTRY le = (LEAFENTRY)(&rb->buf[rb->ndone]);
u_int32_t disksize = leafentry_disksize(le);
rb->ndone += disksize;
assert(rb->ndone<=rb->size);
array[i]=(OMTVALUE)le;
actual_sum += x1764_memory_11(le, disksize);
}
toku_trace("fill array");
u_int32_t end_of_data = rb->ndone;
result->u.l.n_bytes_in_buffer += end_of_data-start_of_data + n_in_buf*OMT_ITEM_OVERHEAD;
actual_sum *= result->rand4fingerprint;
r = toku_omt_create_steal_sorted_array(&result->u.l.buffer, &array, n_in_buf, n_in_buf);
toku_trace("create omt");
if (r!=0) {
toku_free(array);
r = toku_db_badformat();
if (0) { died_1: toku_omt_destroy(&result->u.l.buffer); }
return r;
}
assert(array==NULL);
result->u.l.buffer_mempool.frag_size = start_of_data;
result->u.l.buffer_mempool.free_offset = end_of_data;
if (r!=0) goto died_1;
if (actual_sum!=result->local_fingerprint) {
//fprintf(stderr, "%s:%d Corrupted checksum stored=%08x rand=%08x actual=%08x height=%d n_keys=%d\n", __FILE__, __LINE__, result->rand4fingerprint, result->local_fingerprint, actual_sum, result->height, n_in_buf);
r = toku_db_badformat();
goto died_1;
} else {
//fprintf(stderr, "%s:%d Good checksum=%08x height=%d\n", __FILE__, __LINE__, actual_sum, result->height);
}
//toku_verify_counts_11(result);
(void)rbuf_int(rb); //Ignore the crc (already verified).
if (rb->ndone != rb->size) { //Verify we read exactly the entire block.
r = toku_db_badformat(); goto died_1;
}
r = toku_leaflock_borrow(result->u.l.leaflock_pool, &result->u.l.leaflock);
if (r!=0) goto died_1;
rb->buf = NULL; //Buffer was used for node's mempool.
return 0;
}
static int
deserialize_brtnode_from_rbuf_11 (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h, struct rbuf *rb) {
TAGMALLOC(BRTNODE, result);
int r;
if (result==0) {
r=errno;
if (0) { died0: toku_free(result); }
return r;
}
result->ever_been_written = 1;
//printf("Deserializing %lld datasize=%d\n", off, datasize);
bytevec magic;
rbuf_literal_bytes(rb, &magic, 8);
result->layout_version = rbuf_int(rb);
assert(result->layout_version == BRT_LAYOUT_VERSION_11);
result->layout_version_original = rbuf_int(rb);
result->layout_version_read_from_disk = result->layout_version;
{
//Restrict scope for now since we do not support upgrades.
DESCRIPTOR_S desc;
//desc.dbt.data is TEMPORARY. Will be unusable when the rc buffer is freed.
deserialize_descriptor_from_rbuf_11(rb, &desc, TRUE);
//Just throw away.
}
result->nodesize = rbuf_int(rb);
result->thisnodename = blocknum;
result->flags = rbuf_int(rb);
result->height = rbuf_int(rb);
result->rand4fingerprint = rbuf_int(rb);
result->local_fingerprint = rbuf_int(rb);
// printf("%s:%d read %08x\n", __FILE__, __LINE__, result->local_fingerprint);
result->dirty = 0;
result->fullhash = fullhash;
//printf("height==%d\n", result->height);
if (result->height>0)
r = deserialize_brtnode_nonleaf_from_rbuf_11(result, magic, rb);
else {
result->u.l.leaflock_pool = toku_cachefile_leaflock_pool(h->cf);
r = deserialize_brtnode_leaf_from_rbuf_11(result, magic, rb);
}
if (r!=0) goto died0;
//printf("%s:%d Ok got %lld n_children=%d\n", __FILE__, __LINE__, result->thisnodename, result->n_children);
if (result->height>0) {
// For height==0 we used the buf inside the OMT
toku_free(rb->buf);
rb->buf = NULL;
}
toku_trace("deserial done");
*brtnode = result;
//toku_verify_counts_11(result);
return 0;
}
static int
verify_decompressed_brtnode_checksum (struct rbuf *rb) {
int r = 0;
if (rb->size >= 4) {
uint32_t verify_size = rb->size - 4; //Not counting the checksum
toku_trace("x1764 start");
uint32_t crc = x1764_memory_11(rb->buf, verify_size);
toku_trace("x1764");
uint32_t *crcp = (uint32_t*)(((uint8_t*)rb->buf) + verify_size);
uint32_t storedcrc = toku_dtoh32(*crcp);
if (crc!=storedcrc) {
printf("Bad CRC\n");
printf("%s:%d crc=%08x stored=%08x\n", __FILE__, __LINE__, crc, storedcrc);
r = toku_db_badformat();
}
}
else r = toku_db_badformat();
return r;
}
#define PAR_DECOMPRESS 1
#if PAR_DECOMPRESS
#include "workset.h"
struct decompress_work_11 {
struct work base;
void *compress_ptr;
void *uncompress_ptr;
u_int32_t compress_size;
u_int32_t uncompress_size;
};
// initialize the decompression work
static void
decompress_work_init_11(struct decompress_work_11 *dw,
void *compress_ptr, u_int32_t compress_size,
void *uncompress_ptr, u_int32_t uncompress_size) {
dw->compress_ptr = compress_ptr;
dw->compress_size = compress_size;
dw->uncompress_ptr = uncompress_ptr;
dw->uncompress_size = uncompress_size;
}
// decompress one block
static void
decompress_block(struct decompress_work_11 *dw) {
if (0) printf("%s:%d %x %p\n", __FUNCTION__, __LINE__, (int) toku_pthread_self(), dw);
uLongf destlen = dw->uncompress_size;
int r = uncompress(dw->uncompress_ptr, &destlen, dw->compress_ptr, dw->compress_size);
assert(destlen == dw->uncompress_size);
assert(r==Z_OK);
}
// decompress blocks until there is no more work to do
static void *
decompress_worker_11(void *arg) {
struct workset *ws = (struct workset *) arg;
while (1) {
struct decompress_work_11 *dw = (struct decompress_work_11 *) workset_get(ws);
if (dw == NULL)
break;
decompress_block(dw);
}
return arg;
}
#else
#define DO_DECOMPRESS_WORKER 0
struct decompress_work_11 {
toku_pthread_t id;
void *compress_ptr;
void *uncompress_ptr;
u_int32_t compress_size;
u_int32_t uncompress_size;
};
// initialize the decompression work
static void init_decompress_work(struct decompress_work_11 *w,
void *compress_ptr, u_int32_t compress_size,
void *uncompress_ptr, u_int32_t uncompress_size) {
memset(&w->id, 0, sizeof(w->id));
w->compress_ptr = compress_ptr; w->compress_size = compress_size;
w->uncompress_ptr = uncompress_ptr; w->uncompress_size = uncompress_size;
}
// do the decompression work
static void do_decompress_work(struct decompress_work_11 *w) {
uLongf destlen = w->uncompress_size;
int r = uncompress(w->uncompress_ptr, &destlen,
w->compress_ptr, w->compress_size);
assert(destlen==w->uncompress_size);
assert(r==Z_OK);
}
#if DO_DECOMPRESS_WORKER
static void *decompress_worker_11(void *);
static void start_decompress_work(struct decompress_work_11 *w) {
int r = toku_pthread_create(&w->id, NULL, decompress_worker_11, w); assert(r == 0);
}
static void wait_decompress_work(struct decompress_work_11 *w) {
void *ret;
int r = toku_pthread_join(w->id, &ret); assert(r == 0);
}
static void *decompress_worker_11(void *arg) {
struct decompress_work_11 *w = (struct decompress_work_11 *) arg;
do_decompress_work(w);
return arg;
}
#endif
#endif
static int
decompress_brtnode_from_raw_block_into_rbuf_11(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
int r;
int i;
// get the number of compressed sub blocks
int n_sub_blocks;
int compression_header_offset;
{
n_sub_blocks = toku_dtoh32(*(u_int32_t*)(&raw_block[uncompressed_magic_len_11]));
compression_header_offset = uncompressed_magic_len_11 + 4;
}
assert(0 < n_sub_blocks);
// verify the sizes of the compressed sub blocks
if (0 && n_sub_blocks != 1) printf("%s:%d %d\n", __FUNCTION__, __LINE__, n_sub_blocks);
struct sub_block_sizes sub_block_sizes[n_sub_blocks];
for (i=0; i<n_sub_blocks; i++) {
u_int32_t compressed_size = toku_dtoh32(*(u_int32_t*)(&raw_block[compression_header_offset+8*i]));
if (compressed_size<=0 || compressed_size>(1<<30)) { r = toku_db_badformat(); return r; }
u_int32_t uncompressed_size = toku_dtoh32(*(u_int32_t*)(&raw_block[compression_header_offset+8*i+4]));
if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size);
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { r = toku_db_badformat(); return r; }
sub_block_sizes[i].compressed_size = compressed_size;
sub_block_sizes[i].uncompressed_size = uncompressed_size;
}
unsigned char *compressed_data = raw_block + uncompressed_magic_len_11 + get_compression_header_size(BRT_LAYOUT_VERSION_11, n_sub_blocks);
size_t uncompressed_size = get_sum_uncompressed_size_11(n_sub_blocks, sub_block_sizes);
rb->size= uncompressed_magic_len_11 + uncompressed_size;
assert(rb->size>0);
rb->buf=toku_xmalloc(rb->size);
// construct the uncompressed block from the header and compressed sub blocks
memcpy(rb->buf, raw_block, uncompressed_magic_len_11);
#if PAR_DECOMPRESS
// compute the number of additional threads needed for decompressing this node
int T = num_cores; // T = min(#cores, #blocks) - 1
if (T > n_sub_blocks)
T = n_sub_blocks;
if (T > 0)
T = T - 1; // threads in addition to the running thread
// init the decompression work set
struct workset ws;
workset_init(&ws);
// initialize the decompression work and add to the work set
unsigned char *uncompressed_data = rb->buf+uncompressed_magic_len_11;
struct decompress_work_11 decompress_work_11[n_sub_blocks];
workset_lock(&ws);
for (i = 0; i < n_sub_blocks; i++) {
decompress_work_init_11(&decompress_work_11[i], compressed_data, sub_block_sizes[i].compressed_size, uncompressed_data, sub_block_sizes[i].uncompressed_size);
uncompressed_data += sub_block_sizes[i].uncompressed_size;
compressed_data += sub_block_sizes[i].compressed_size;
workset_put_locked(&ws, &decompress_work_11[i].base);
}
workset_unlock(&ws);
// decompress the sub-blocks
if (0) printf("%s:%d Cores=%d Blocks=%d T=%d\n", __FUNCTION__, __LINE__, num_cores, n_sub_blocks, T);
toku_pthread_t tids[T];
threadset_create(tids, &T, decompress_worker_11, &ws);
decompress_worker_11(&ws);
// cleanup
threadset_join(tids, T);
workset_destroy(&ws);
#else
// decompress the sub blocks
unsigned char *uncompressed_data = rb->buf+uncompressed_magic_len_11;
struct decompress_work_11 decompress_work_11[n_sub_blocks];
for (i=0; i<n_sub_blocks; i++) {
init_decompress_work(&decompress_work_11[i], compressed_data, sub_block_sizes[i].compressed_size, uncompressed_data, sub_block_sizes[i].uncompressed_size);
if (i>0) {
#if DO_DECOMPRESS_WORKER
start_decompress_work(&decompress_work_11[i]);
#else
do_decompress_work(&decompress_work_11[i]);
#endif
}
uncompressed_data += sub_block_sizes[i].uncompressed_size;
compressed_data += sub_block_sizes[i].compressed_size;
}
do_decompress_work(&decompress_work_11[0]);
#if DO_DECOMPRESS_WORKER
for (i=1; i<n_sub_blocks; i++)
wait_decompress_work(&decompress_work_11[i]);
#endif
#endif
toku_trace("decompress done");
if (0) printf("First 4 bytes of uncompressed data are %02x%02x%02x%02x\n",
rb->buf[uncompressed_magic_len_11], rb->buf[uncompressed_magic_len_11+1],
rb->buf[uncompressed_magic_len_11+2], rb->buf[uncompressed_magic_len_11+3]);
rb->ndone=0;
r = verify_decompressed_brtnode_checksum(rb);
return r;
}
// ################
static void
deserialize_descriptor_from_rbuf_11(struct rbuf *rb, DESCRIPTOR desc, BOOL temporary) {
desc->version = rbuf_int(rb);
u_int32_t size;
bytevec data;
rbuf_bytes(rb, &data, &size);
bytevec data_copy = data;;
if (size>0) {
if (!temporary) {
data_copy = toku_memdup(data, size); //Cannot keep the reference from rbuf. Must copy.
assert(data_copy);
}
}
else {
assert(size==0);
data_copy = NULL;
}
toku_fill_dbt(&desc->dbt, data_copy, size);
if (desc->version==0) assert(desc->dbt.size==0);
}
static void
deserialize_descriptor_from_11(int fd, struct brt_header *h, DESCRIPTOR desc) {
DISKOFF offset;
DISKOFF size;
toku_get_descriptor_offset_size(h->blocktable, &offset, &size);
memset(desc, 0, sizeof(*desc));
if (size > 0) {
assert(size>=4); //4 for checksum
{
unsigned char *XMALLOC_N(size, dbuf);
{
lock_for_pwrite();
ssize_t r = pread(fd, dbuf, size, offset);
assert(r==size);
unlock_for_pwrite();
}
{
// check the checksum
u_int32_t x1764 = x1764_memory_11(dbuf, size-4);
//printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk);
u_int32_t stored_x1764 = toku_dtoh32(*(int*)(dbuf + size-4));
assert(x1764 == stored_x1764);
}
{
struct rbuf rb = {.buf = dbuf, .size = size, .ndone = 0};
//Not temporary; must have a toku_memdup'd copy.
deserialize_descriptor_from_rbuf_11(&rb, desc, FALSE);
}
assert(toku_serialize_descriptor_size_11(desc)+4 == size);
toku_free(dbuf);
}
}
}
// We only deserialize brt header once and then share everything with all the brts.
static int
deserialize_brtheader_11 (int fd, struct rbuf *rb, struct brt_header **brth) {
// We already know:
// we have an rbuf representing the header.
// The checksum has been validated
//Steal rbuf (used to simplify merge, reduce diff size, and keep old code)
struct rbuf rc = *rb;
memset(rb, 0, sizeof(*rb));
//Verification of initial elements.
{
//Check magic number
bytevec magic;
rbuf_literal_bytes(&rc, &magic, 8);
assert(memcmp(magic,"tokudata",8)==0);
}
struct brt_header *CALLOC(h);
if (h==0) return errno;
int ret=-1;
if (0) { died1: toku_free(h); return ret; }
h->type = BRTHEADER_CURRENT;
h->checkpoint_header = NULL;
h->dirty=0;
h->panic = 0;
h->panic_string = 0;
toku_list_init(&h->live_brts);
toku_list_init(&h->zombie_brts);
//version MUST be in network order on disk regardless of disk order
h->layout_version = rbuf_network_int(&rc);
//TODO: #1924
assert(h->layout_version==BRT_LAYOUT_VERSION_11);
//Size MUST be in network order regardless of disk order.
u_int32_t size = rbuf_network_int(&rc);
assert(size==rc.size);
bytevec tmp_byte_order_check;
rbuf_literal_bytes(&rc, &tmp_byte_order_check, 8); //Must not translate byte order
int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check;
assert(byte_order_stored == toku_byte_order_host);
h->checkpoint_count = rbuf_ulonglong(&rc);
h->checkpoint_lsn = rbuf_lsn(&rc);
h->nodesize = rbuf_int(&rc);
DISKOFF translation_address_on_disk = rbuf_diskoff(&rc);
DISKOFF translation_size_on_disk = rbuf_diskoff(&rc);
assert(translation_address_on_disk>0);
assert(translation_size_on_disk>0);
// printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk);
//Load translation table
{
lock_for_pwrite();
unsigned char *XMALLOC_N(translation_size_on_disk, tbuf);
{
// This cast is messed up in 32-bits if the block translation table is ever more than 4GB. But in that case, the translation table itself won't fit in main memory.
ssize_t r = pread(fd, tbuf, translation_size_on_disk, translation_address_on_disk);
assert(r==translation_size_on_disk);
}
unlock_for_pwrite();
// Create table and read in data.
toku_blocktable_create_from_buffer(&h->blocktable,
translation_address_on_disk,
translation_size_on_disk,
tbuf,
TRUE);
toku_free(tbuf);
}
h->root = rbuf_blocknum(&rc);
h->root_hash.valid = FALSE;
h->flags = rbuf_int(&rc);
deserialize_descriptor_from_11(fd, h, &h->descriptor);
h->layout_version_original = rbuf_int(&rc);
(void)rbuf_int(&rc); //Read in checksum and ignore (already verified).
if (rc.ndone!=rc.size) {ret = EINVAL; goto died1;}
toku_free(rc.buf);
rc.buf = NULL;
*brth = h;
return 0;
}

18
newbrt/backwards_11.h Normal file
View file

@ -0,0 +1,18 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#ifndef BACKWARD_11_H
#define BACKWARD_11_H
static int upgrade_brtheader_11_12 (int fd, struct brt_header **brth_11, struct brt_header **brth_12);
static int upgrade_brtnode_11_12 (BRTNODE *brtnode_11, BRTNODE *brtnode_12);
static int deserialize_brtheader_11 (int fd, struct rbuf *rb, struct brt_header **brth);
static int decompress_brtnode_from_raw_block_into_rbuf_11(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum);
static int deserialize_brtnode_from_rbuf_11 (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h, struct rbuf *rb);
#endif

View file

@ -84,11 +84,13 @@ static void
brtheader_set_dirty(struct brt_header *h, BOOL for_checkpoint){
assert(h->blocktable->is_locked);
assert(h->type == BRTHEADER_CURRENT);
h->dirty = 1;
if (for_checkpoint) {
assert(h->checkpoint_header->type == BRTHEADER_CHECKPOINT_INPROGRESS);
h->checkpoint_header->dirty = 1;
}
else {
h->dirty = 1;
}
}
//fd is protected (must be holding fdlock)
@ -131,6 +133,22 @@ copy_translation(struct translation * dst, struct translation * src, enum transl
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff = diskoff_unused;
}
int64_t
toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt) {
BLOCKNUM b;
struct translation *t = &bt->current;
int64_t num_blocks = 0;
{
//Reserved blocknums do not get upgraded; They are part of the header.
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b; b.b++) {
if (t->block_translation[b.b].size != size_is_free) {
num_blocks++;
}
}
}
return num_blocks;
}
static void
maybe_optimize_translation(struct translation *t) {
//Reduce 'smallest_never_used_blocknum.b' (completely free blocknums instead of just
@ -727,7 +745,14 @@ static void
translation_deserialize_from_buffer(struct translation *t, // destination into which to deserialize
DISKOFF location_on_disk, //Location of translation_buffer
u_int64_t size_on_disk,
unsigned char * translation_buffer) { // buffer with serialized translation
unsigned char * translation_buffer
#if BRT_LAYOUT_MIN_SUPPORTED_VERSION <= BRT_LAYOUT_VERSION_11
, BOOL invert_checksum
#else
#error The above code block is obsolete
#endif
) { // buffer with serialized translation
assert(location_on_disk!=0);
t->type = TRANSLATION_CHECKPOINTED;
{
@ -736,6 +761,13 @@ translation_deserialize_from_buffer(struct translation *t, // destination int
u_int64_t offset = size_on_disk - 4;
//printf("%s:%d read from %ld (x1764 offset=%ld) size=%ld\n", __FILE__, __LINE__, block_translation_address_on_disk, offset, block_translation_size_on_disk);
u_int32_t stored_x1764 = toku_dtoh32(*(int*)(translation_buffer + offset));
#if BRT_LAYOUT_MIN_SUPPORTED_VERSION <= BRT_LAYOUT_VERSION_11
if (invert_checksum) {
x1764 = ~x1764;
}
#else
#error The above code block is obsolete
#endif
assert(x1764 == stored_x1764);
}
struct rbuf rt;
@ -783,9 +815,10 @@ void
toku_blocktable_create_from_buffer(BLOCK_TABLE *btp,
DISKOFF location_on_disk, //Location of translation_buffer
DISKOFF size_on_disk,
unsigned char *translation_buffer) {
unsigned char *translation_buffer,
BOOL invert_checksum) {
BLOCK_TABLE bt = blocktable_create_internal();
translation_deserialize_from_buffer(&bt->checkpointed, location_on_disk, size_on_disk, translation_buffer);
translation_deserialize_from_buffer(&bt->checkpointed, location_on_disk, size_on_disk, translation_buffer, invert_checksum);
blocktable_note_translation(bt->block_allocator, &bt->checkpointed);
// we just filled in checkpointed, now copy it to current.
copy_translation(&bt->current, &bt->checkpointed, TRANSLATION_CURRENT);

View file

@ -21,7 +21,7 @@ struct block_translation_pair {
};
void toku_blocktable_create_new(BLOCK_TABLE *btp);
void toku_blocktable_create_from_buffer(BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer);
void toku_blocktable_create_from_buffer(BLOCK_TABLE *btp, DISKOFF location_on_disk, DISKOFF size_on_disk, unsigned char *translation_buffer, BOOL invert_checksum);
void toku_blocktable_destroy(BLOCK_TABLE *btp);
void toku_brtheader_lock(struct brt_header *h);
@ -73,6 +73,8 @@ void toku_block_table_get_fragmentation_unlocked(BLOCK_TABLE bt, TOKU_DB_FRAGMEN
//Requires: blocktable lock is held.
//Requires: report->file_size_bytes is already filled in.
int64_t toku_block_get_blocks_in_use_unlocked(BLOCK_TABLE bt);
//Unmovable reserved first, then reallocable.
// We reserve one blocknum for the translation table itself.
enum {RESERVED_BLOCKNUM_NULL =0,

View file

@ -88,7 +88,6 @@ typedef struct brtnode *BRTNODE;
/* Internal nodes. */
struct brtnode {
enum typ_tag tag;
struct descriptor *desc;
unsigned int nodesize;
int ever_been_written;
unsigned int flags;
@ -170,11 +169,12 @@ struct brt_header {
int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging)
int layout_version_read_from_disk; // transient, not serialized to disk
BOOL upgrade_brt_performed; // initially FALSE, set TRUE when brt has been fully updated (even though nodes may not have been)
uint64_t num_blocks_to_upgrade; // Number of blocks still not newest version. When we release layout 13 we may need to turn this to an array.
unsigned int nodesize;
BLOCKNUM root; // roots of the dictionary
struct remembered_hash root_hash; // hash of the root offset.
unsigned int flags;
struct descriptor descriptor;
DESCRIPTOR_S descriptor;
u_int64_t root_put_counter; // the generation number of the brt
@ -200,8 +200,7 @@ struct brt {
unsigned int flags;
BOOL did_set_flags;
BOOL did_set_descriptor;
struct descriptor temp_descriptor;
toku_dbt_upgradef dbt_userformat_upgrade;
DESCRIPTOR_S temp_descriptor;
int (*compare_fun)(DB*,const DBT*,const DBT*);
int (*dup_compare)(DB*,const DBT*,const DBT*);
DB *db; // To pass to the compare fun, and close once transactions are done.
@ -230,14 +229,14 @@ int toku_deserialize_brtnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/,
unsigned int toku_serialize_brtnode_size(BRTNODE node); /* How much space will it take? */
int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
void toku_verify_counts(BRTNODE);
void toku_verify_or_set_counts(BRTNODE, BOOL);
int toku_serialize_brt_header_size (struct brt_header *h);
int toku_serialize_brt_header_to (int fd, struct brt_header *h);
int toku_serialize_brt_header_to_wbuf (struct wbuf *, struct brt_header *h, int64_t address_translation, int64_t size_translation);
int toku_deserialize_brtheader_from (int fd, struct brt_header **brth);
int toku_serialize_descriptor_contents_to_fd(int fd, const struct descriptor *desc, DISKOFF offset);
void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const struct descriptor *desc);
int toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset);
void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc);
void toku_brtnode_free (BRTNODE *node);
@ -347,10 +346,10 @@ enum brt_layout_version_e {
BRT_LAYOUT_VERSION_9 = 9, // Diff from 8 to 9: Variable-sized blocks and compression.
BRT_LAYOUT_VERSION_10 = 10, // Diff from 9 to 10: Variable number of compressed sub-blocks per block, disk byte order == intel byte order, Subtree estimates instead of just leafentry estimates, translation table, dictionary descriptors, checksum in header, subdb support removed from brt layer
BRT_LAYOUT_VERSION_11 = 11, // Diff from 10 to 11: Nested transaction leafentries (completely redesigned). BRT_CMDs on disk now support XIDS (multiple txnids) instead of exactly one.
BRT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added BRT_CMD 'BRT_INSERT_NO_OVERWRITE'
BRT_LAYOUT_VERSION_12 = 12, // Diff from 11 to 12: Added BRT_CMD 'BRT_INSERT_NO_OVERWRITE', compressed block format, num old blocks
BRT_NEXT_VERSION, // the version after the current version
BRT_LAYOUT_VERSION = BRT_NEXT_VERSION-1, // A hack so I don't have to change this line.
BRT_LAYOUT_MIN_SUPPORTED_VERSION = BRT_LAYOUT_VERSION // Minimum version supported without transparent upgrade
BRT_LAYOUT_MIN_SUPPORTED_VERSION = BRT_LAYOUT_VERSION_12 // Minimum version supported
};
void toku_brtheader_free (struct brt_header *h);
@ -364,6 +363,15 @@ int toku_db_badformat(void);
int toku_brt_remove_on_commit(TOKUTXN child, DBT* iname_dbt_p);
int toku_brt_remove_now(CACHETABLE ct, DBT* iname_dbt_p);
typedef struct brt_upgrade_status {
u_int64_t header;
u_int64_t nonleaf;
u_int64_t leaf;
} BRT_UPGRADE_STATUS_S, *BRT_UPGRADE_STATUS;
void toku_brt_get_upgrade_status(BRT_UPGRADE_STATUS);
C_END
#endif

View file

@ -4,14 +4,21 @@
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "includes.h"
#include "toku_atomic.h"
#include "backwards_10.h"
#include "backwards_11.h"
// NOTE: The backwards compatability functions are in a file that is included at the END of this file.
static int deserialize_brtheader_10 (int fd, struct rbuf *rb, struct brt_header **brth);
static int upgrade_brtheader_10_11 (struct brt_header **brth_10, struct brt_header **brth_11);
static int decompress_brtnode_from_raw_block_into_rbuf_10(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum);
static int deserialize_brtnode_from_rbuf_10 (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h, struct rbuf *rb);
static int upgrade_brtnode_10_11 (BRTNODE *brtnode_10, BRTNODE *brtnode_11);
static BRT_UPGRADE_STATUS_S upgrade_status; // accountability, used in backwards_x.c
void
toku_brt_get_upgrade_status (BRT_UPGRADE_STATUS s) {
*s = upgrade_status;
}
// performance tracing
#define DO_TOKU_TRACE 0
@ -172,8 +179,7 @@ enum {
4+ // layout_version
4), // layout_version_original
extended_node_header_overhead = (0+ // descriptor (variable, not counted here)
4+ // nodesize
extended_node_header_overhead = (4+ // nodesize
4+ // flags
4+ // height
4+ // random for fingerprint
@ -194,7 +200,6 @@ addupsize (OMTVALUE lev, u_int32_t UU(idx), void *vp) {
static unsigned int
toku_serialize_brtnode_size_slow (BRTNODE node) {
unsigned int size = node_header_overhead + extended_node_header_overhead;
size += toku_serialize_descriptor_size(node->desc);
if (node->height > 0) {
unsigned int hsize=0;
unsigned int csize=0;
@ -236,7 +241,6 @@ unsigned int
toku_serialize_brtnode_size (BRTNODE node) {
unsigned int result = node_header_overhead + extended_node_header_overhead;
assert(sizeof(toku_off_t)==8);
result += toku_serialize_descriptor_size(node->desc);
if (node->height > 0) {
result += 4; /* subtree fingerpirnt */
result += 4; /* n_children */
@ -277,9 +281,6 @@ serialize_node_header(BRTNODE node, struct wbuf *wbuf) {
wbuf_nocrc_int(wbuf, node->layout_version);
wbuf_nocrc_int(wbuf, node->layout_version_original);
// serialize the descriptor
toku_serialize_descriptor_contents_to_wbuf(wbuf, node->desc);
//printf("%s:%d %lld.calculated_size=%d\n", __FILE__, __LINE__, off, calculated_size);
wbuf_nocrc_uint(wbuf, node->nodesize);
wbuf_nocrc_uint(wbuf, node->flags);
@ -518,8 +519,6 @@ toku_serialize_brtnode_to_memory (BRTNODE node, int UU(n_workitems), int UU(n_th
int
toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint) {
assert(node->desc == &h->descriptor);
size_t n_to_write;
char *compressed_buf;
{
@ -550,7 +549,7 @@ toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct brt_h
return 0;
}
static void deserialize_descriptor_from_rbuf(struct rbuf *rb, struct descriptor *desc, BOOL temporary);
static void deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, BOOL temporary);
#include "workset.h"
@ -843,7 +842,6 @@ deserialize_brtnode_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *b
if (0) { died0: toku_free(result); }
return r;
}
result->desc = &h->descriptor;
result->ever_been_written = 1;
//printf("Deserializing %lld datasize=%d\n", off, datasize);
@ -853,13 +851,6 @@ deserialize_brtnode_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *b
assert(result->layout_version == BRT_LAYOUT_VERSION);
result->layout_version_original = rbuf_int(rb);
result->layout_version_read_from_disk = result->layout_version;
{
//Restrict scope for now since we do not support upgrades.
struct descriptor desc;
//desc.dbt.data is TEMPORARY. Will be unusable when the rc buffer is freed.
deserialize_descriptor_from_rbuf(rb, &desc, TRUE);
assert(desc.version == result->desc->version); //We do not yet support upgrading the dbts.
}
result->nodesize = rbuf_int(rb);
result->thisnodename = blocknum;
@ -892,10 +883,8 @@ deserialize_brtnode_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *b
}
static int
decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
toku_trace("decompress");
int r;
// get the number of compressed sub blocks
int n_sub_blocks;
n_sub_blocks = toku_dtoh32(*(u_int32_t*)(&raw_block[node_header_overhead]));
@ -903,6 +892,15 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb, BLOCKN
// verify the number of sub blocks
assert(0 <= n_sub_blocks && n_sub_blocks <= max_sub_blocks);
{ // verify the header checksum
u_int32_t header_length = node_header_overhead + sub_block_header_size(n_sub_blocks);
assert(header_length <= raw_block_size);
u_int32_t xsum = x1764_memory(raw_block, header_length);
u_int32_t stored_xsum = toku_dtoh32(*(u_int32_t *)(raw_block + header_length));
assert(xsum == stored_xsum);
}
int r;
// deserialize the sub block header
struct sub_block sub_block[n_sub_blocks];
u_int32_t *sub_block_header = (u_int32_t *) &raw_block[node_header_overhead+4];
@ -954,14 +952,14 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb, BLOCKN
}
static int
decompress_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
decompress_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
int r;
switch (version) {
case BRT_LAYOUT_VERSION_10:
r = decompress_brtnode_from_raw_block_into_rbuf_10(raw_block, rb, blocknum);
case BRT_LAYOUT_VERSION_11:
r = decompress_brtnode_from_raw_block_into_rbuf_11(raw_block, rb, blocknum);
break;
case BRT_LAYOUT_VERSION:
r = decompress_from_raw_block_into_rbuf(raw_block, rb, blocknum);
r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum);
break;
default:
assert(FALSE);
@ -972,26 +970,32 @@ decompress_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_b
static int
deserialize_brtnode_from_rbuf_versioned (u_int32_t version, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h, struct rbuf *rb) {
int r = 0;
BRTNODE brtnode_10 = NULL;
BRTNODE brtnode_11 = NULL;
BRTNODE brtnode_12 = NULL;
int upgrade = 0;
switch (version) {
case BRT_LAYOUT_VERSION_10:
case BRT_LAYOUT_VERSION_11:
if (!upgrade)
r = deserialize_brtnode_from_rbuf_10(blocknum, fullhash, &brtnode_10, h, rb);
r = deserialize_brtnode_from_rbuf_11(blocknum, fullhash, &brtnode_11, h, rb);
upgrade++;
if (r==0)
r = upgrade_brtnode_10_11(&brtnode_10, &brtnode_11);
r = upgrade_brtnode_11_12(&brtnode_11, &brtnode_12);
//Fall through on purpose.
case BRT_LAYOUT_VERSION:
if (!upgrade)
r = deserialize_brtnode_from_rbuf(blocknum, fullhash, &brtnode_11, h, rb);
r = deserialize_brtnode_from_rbuf(blocknum, fullhash, &brtnode_12, h, rb);
if (r==0) {
assert(brtnode_11);
*brtnode = brtnode_11;
assert(brtnode_12);
*brtnode = brtnode_12;
}
if (upgrade && r == 0) {
toku_brtheader_lock(h);
assert(h->num_blocks_to_upgrade>0);
h->num_blocks_to_upgrade--;
toku_brtheader_unlock(h);
(*brtnode)->dirty = 1;
}
if (upgrade && r == 0) (*brtnode)->dirty = 1;
break; // this is the only break
default:
assert(FALSE);
@ -1037,15 +1041,7 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
}
}
// verify the header checksum
u_int32_t n_sub_blocks = toku_dtoh32(*(u_int32_t *)(raw_block + node_header_overhead));
u_int32_t header_length = node_header_overhead + sub_block_header_size(n_sub_blocks);
assert(header_length <= size);
u_int32_t xsum = x1764_memory(raw_block, header_length);
u_int32_t stored_xsum = toku_dtoh32(*(u_int32_t *)(raw_block + header_length));
assert(xsum == stored_xsum);
r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, rb, blocknum);
r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, size, rb, blocknum);
if (r!=0) goto cleanup;
*layout_version_p = layout_version;
@ -1097,8 +1093,8 @@ toku_maybe_upgrade_brt(BRT t) { // possibly do some work to complete the version
int version = t->h->layout_version_read_from_disk;
if (!t->h->upgrade_brt_performed) {
switch (version) {
case BRT_LAYOUT_VERSION_10:
r = toku_brt_broadcast_commit_all(t);
case BRT_LAYOUT_VERSION_11:
r = 0;
//Fall through on purpose.
case BRT_LAYOUT_VERSION:
if (r == 0) {
@ -1144,7 +1140,8 @@ sum_item (OMTVALUE lev, u_int32_t UU(idx), void *vsi) {
return 0;
}
void toku_verify_counts (BRTNODE node) {
void
toku_verify_or_set_counts (BRTNODE node, BOOL set_fingerprints) {
/*foo*/
if (node->height==0) {
assert(node->u.l.buffer);
@ -1155,6 +1152,9 @@ void toku_verify_counts (BRTNODE node) {
assert(sum_info.msum == node->u.l.buffer_mempool.free_offset - node->u.l.buffer_mempool.frag_size);
u_int32_t fps = node->rand4fingerprint * sum_info.fp;
if (set_fingerprints) {
node->local_fingerprint = fps;
}
assert(fps==node->local_fingerprint);
} else {
unsigned int sum = 0;
@ -1162,6 +1162,17 @@ void toku_verify_counts (BRTNODE node) {
sum += BNC_NBYTESINBUF(node,i);
// We don't rally care of the later buffers have garbage in them. Valgrind would do a better job noticing if we leave it uninitialized.
// But for now the code always initializes the later tables so they are 0.
uint32_t fp = 0;
int i;
for (i=0; i<node->u.n.n_children; i++)
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
{
fp += node->rand4fingerprint * toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
});
if (set_fingerprints) {
node->local_fingerprint = fp;
}
assert(fp==node->local_fingerprint);
assert(sum==node->u.n.n_bytes_in_buffers);
}
}
@ -1171,12 +1182,12 @@ serialize_brt_header_min_size (u_int32_t version) {
u_int32_t size = 0;
switch(version) {
case BRT_LAYOUT_VERSION_12:
case BRT_LAYOUT_VERSION_11:
size += 4; // original_version
size += 8; // Number of blocks in old version.
// fall through to add up bytes in previous version
case BRT_LAYOUT_VERSION_10:
case BRT_LAYOUT_VERSION_11:
size += (+8 // "tokudata"
+4 // version
+4 // original_version
+4 // size
+8 // byte order verification
+8 // checkpoint_count
@ -1221,6 +1232,7 @@ int toku_serialize_brt_header_to_wbuf (struct wbuf *wbuf, struct brt_header *h,
wbuf_BLOCKNUM(wbuf, h->root);
wbuf_int(wbuf, h->flags);
wbuf_int(wbuf, h->layout_version_original);
wbuf_ulonglong(wbuf, h->num_blocks_to_upgrade);
u_int32_t checksum = x1764_finish(&wbuf->checksum);
wbuf_int(wbuf, checksum);
assert(wbuf->ndone == wbuf->size);
@ -1287,7 +1299,7 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) {
}
u_int32_t
toku_serialize_descriptor_size(const struct descriptor *desc) {
toku_serialize_descriptor_size(const DESCRIPTOR desc) {
//Checksum NOT included in this. Checksum only exists in header's version.
u_int32_t size = 4+ //version
4; //size
@ -1296,7 +1308,7 @@ toku_serialize_descriptor_size(const struct descriptor *desc) {
}
void
toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const struct descriptor *desc) {
toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc) {
if (desc->version==0) assert(desc->dbt.size==0);
wbuf_int(wb, desc->version);
wbuf_bytes(wb, desc->dbt.data, desc->dbt.size);
@ -1306,7 +1318,7 @@ toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const struct descrip
//descriptor.
//Descriptors are NOT written during the header checkpoint process.
int
toku_serialize_descriptor_contents_to_fd(int fd, const struct descriptor *desc, DISKOFF offset) {
toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset) {
int r = 0;
// make the checksum
int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum
@ -1330,7 +1342,7 @@ toku_serialize_descriptor_contents_to_fd(int fd, const struct descriptor *desc,
}
static void
deserialize_descriptor_from_rbuf(struct rbuf *rb, struct descriptor *desc, BOOL temporary) {
deserialize_descriptor_from_rbuf(struct rbuf *rb, DESCRIPTOR desc, BOOL temporary) {
desc->version = rbuf_int(rb);
u_int32_t size;
bytevec data;
@ -1351,7 +1363,7 @@ deserialize_descriptor_from_rbuf(struct rbuf *rb, struct descriptor *desc, BOOL
}
static void
deserialize_descriptor_from(int fd, struct brt_header *h, struct descriptor *desc) {
deserialize_descriptor_from(int fd, struct brt_header *h, DESCRIPTOR desc) {
DISKOFF offset;
DISKOFF size;
toku_get_descriptor_offset_size(h->blocktable, &offset, &size);
@ -1454,7 +1466,8 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
toku_blocktable_create_from_buffer(&h->blocktable,
translation_address_on_disk,
translation_size_on_disk,
tbuf);
tbuf,
FALSE /*not version 11 or older */ );
toku_free(tbuf);
}
@ -1463,6 +1476,7 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
h->flags = rbuf_int(&rc);
deserialize_descriptor_from(fd, h, &h->descriptor);
h->layout_version_original = rbuf_int(&rc);
h->num_blocks_to_upgrade = rbuf_ulonglong(&rc);
(void)rbuf_int(&rc); //Read in checksum and ignore (already verified).
if (rc.ndone!=rc.size) {ret = EINVAL; goto died1;}
toku_free(rc.buf);
@ -1473,31 +1487,36 @@ deserialize_brtheader (int fd, struct rbuf *rb, struct brt_header **brth) {
//TODO: When version 12 exists, add case for version 11 that looks like version 10 case,
// but calls deserialize_brtheader_11() and upgrade_11_12()
//TODO: When version 13 exists, add case for version 12 that looks like version 10 case,
// but calls deserialize_brtheader_12() and upgrade_12_13()
static int
deserialize_brtheader_versioned (int fd, struct rbuf *rb, struct brt_header **brth, u_int32_t version) {
int rval;
struct brt_header *brth_10 = NULL;
struct brt_header *brth_11 = NULL;
struct brt_header *brth_12 = NULL;
int upgrade = 0;
switch(version) {
case BRT_LAYOUT_VERSION_10:
case BRT_LAYOUT_VERSION_11:
if (!upgrade)
rval = deserialize_brtheader_10(fd, rb, &brth_10);
rval = deserialize_brtheader_11(fd, rb, &brth_11);
upgrade++;
if (rval == 0)
rval = upgrade_brtheader_10_11(&brth_10, &brth_11);
rval = upgrade_brtheader_11_12(fd, &brth_11, &brth_12);
//Fall through on purpose.
case BRT_LAYOUT_VERSION:
if (!upgrade)
rval = deserialize_brtheader (fd, rb, &brth_11);
rval = deserialize_brtheader (fd, rb, &brth_12);
if (rval == 0) {
assert(brth_11);
*brth = brth_11;
assert(brth_12);
*brth = brth_12;
}
if (upgrade && rval == 0) (*brth)->dirty = 1;
if (upgrade && rval == 0) {
toku_brtheader_lock(*brth);
(*brth)->num_blocks_to_upgrade = toku_block_get_blocks_in_use_unlocked((*brth)->blocktable);
(*brth)->dirty = 1;
toku_brtheader_unlock(*brth);
}
break; // this is the only break
default:
assert(FALSE);
@ -1582,6 +1601,13 @@ deserialize_brtheader_from_fd_into_rbuf(int fd, toku_off_t offset, struct rbuf *
//Verify checksum
u_int32_t calculated_x1764 = x1764_memory(rb->buf, rb->size-4);
u_int32_t stored_x1764 = toku_dtoh32(*(int*)(rb->buf+rb->size-4));
#if BRT_LAYOUT_MIN_SUPPORTED_VERSION <= BRT_LAYOUT_VERSION_11
if (version<=BRT_LAYOUT_VERSION_11) {
calculated_x1764 = ~calculated_x1764;
}
#else
#error The above code block is obsolete
#endif
if (calculated_x1764!=stored_x1764) r = TOKUDB_DICTIONARY_NO_HEADER; //Header useless
}
if (r==0) {
@ -1869,6 +1895,7 @@ static int
deserialize_rollback_log_from_rbuf_versioned (u_int32_t version, BLOCKNUM blocknum, u_int32_t fullhash,
ROLLBACK_LOG_NODE *log,
struct brt_header *h, struct rbuf *rb) {
//Upgrade is not necessary really here. Rollback log nodes do not survive version changes.
int r = 0;
ROLLBACK_LOG_NODE rollback_log_node = NULL;
@ -1923,5 +1950,5 @@ cleanup:
// NOTE: Backwards compatibility functions are in the included .c file(s):
#include "backwards_10.c"
#include "backwards_11.c"

View file

@ -73,7 +73,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt);
if (r!=0) return r;
BRTNODE node=node_v;
toku_verify_counts(node);
toku_verify_or_set_counts(node, FALSE);
assert(node->height==0);
size_t lesize, disksize;
@ -114,7 +114,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke
node->dirty=1;
*subtree_fingerprint = node->local_fingerprint;
toku_verify_counts(node);
toku_verify_or_set_counts(node, FALSE);
r = toku_unpin_brtnode(brt, node_v);
return r;

View file

@ -31,7 +31,7 @@ static void verify_local_fingerprint (BRTNODE node) {
});
assert(fp==node->local_fingerprint);
} else {
toku_verify_counts(node);
toku_verify_or_set_counts(node, FALSE);
}
}

View file

@ -227,7 +227,7 @@ int toku_brt_debug_mode = 0;
//#define SLOW
#ifdef SLOW
#define VERIFY_NODE(t,n) (toku_verify_counts(n), toku_verify_estimates(t,n))
#define VERIFY_NODE(t,n) (toku_verify_or_set_counts(n, FALSE), toku_verify_estimates(t,n))
#else
#define VERIFY_NODE(t,n) ((void)0)
#endif
@ -648,7 +648,6 @@ initialize_empty_brtnode (BRT t, BRTNODE n, BLOCKNUM nodename, int height, size_
// Effect: Fill in N as an empty brtnode.
{
n->tag = TYP_BRTNODE;
n->desc = &t->h->descriptor;
n->nodesize = t->h->nodesize;
n->flags = t->flags;
n->thisnodename = nodename;
@ -3009,6 +3008,7 @@ brt_init_header_partial (BRT t) {
if (t->h->cf!=NULL) assert(t->h->cf == t->cf);
t->h->cf = t->cf;
t->h->nodesize=t->nodesize;
t->h->num_blocks_to_upgrade = 0;
compute_and_fill_remembered_hash(t);
@ -3152,6 +3152,44 @@ verify_builtin_comparisons_consistent(BRT t, u_int32_t flags) {
return 0;
}
//if r==0, then frees/takes over descriptor_dbt.data
int
toku_maybe_upgrade_descriptor(BRT t, DESCRIPTOR d, BOOL do_log, TOKUTXN txn) {
int r = 0;
//txn is only for access to logger
if (t->h->descriptor.version!=d->version ||
t->h->descriptor.dbt.size!=d->dbt.size ||
memcmp(t->h->descriptor.dbt.data, d->dbt.data, d->dbt.size)) {
if (d->version <= t->h->descriptor.version) {
//Changing descriptor requires upping the version.
r = EINVAL;
goto cleanup;
}
if (do_log) {
//If we didn't log fcreate (which contains descriptor)
//we need to log descriptor now.
r = toku_logger_log_descriptor(txn, toku_cachefile_filenum(t->cf), d);
if (r!=0) goto cleanup;
}
DISKOFF offset;
//4 for checksum
toku_realloc_descriptor_on_disk(t->h->blocktable, toku_serialize_descriptor_size(d)+4, &offset, t->h);
{
int fd = toku_cachefile_get_and_pin_fd (t->cf);
r = toku_serialize_descriptor_contents_to_fd(fd, d, offset);
toku_cachefile_unpin_fd(t->cf);
}
if (r!=0) goto cleanup;
if (t->h->descriptor.dbt.data) toku_free(t->h->descriptor.dbt.data);
t->h->descriptor = *d;
}
else toku_free(d->dbt.data);
d->dbt.data = NULL;
cleanup:
return r;
}
// This is the actual open, used for various purposes, such as normal use, recovery, and redirect.
// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix)
static int
@ -3172,7 +3210,6 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
assert(is_create || !only_create);
t->db = db;
BOOL log_fopen = FALSE; // set true if we're opening a pre-existing file
BOOL did_create = FALSE;
FILENUM reserved_filenum = use_filenum;
{
@ -3208,8 +3245,6 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
fname_in_env,
use_reserved_filenum||did_create, reserved_filenum, did_create);
if (r != 0) goto died1;
if (!did_create)
log_fopen = TRUE; //Log of fopen must be delayed till flags are available
}
if (r!=0) {
died_after_open:
@ -3254,12 +3289,22 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
}
}
int use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid;
if (!was_already_open) {
if (log_fopen) { //Only log the fopen that OPENs the file. If it was already open, don't log.
if (!did_create) { //Only log the fopen that OPENs the file. If it was already open, don't log.
r = toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(t->cf), t->flags);
if (r!=0) goto died_after_read_and_pin;
}
}
if (t->did_set_descriptor) {
r = toku_maybe_upgrade_descriptor(t, &t->temp_descriptor, !did_create, txn);
if (r!=0) {
toku_free(t->temp_descriptor.dbt.data);
goto died_after_read_and_pin;
}
t->did_set_descriptor = FALSE;
}
int use_reserved_dict_id = use_dictionary_id.dictid != DICTIONARY_ID_NONE.dictid;
if (!was_already_open) {
DICTIONARY_ID dict_id;
if (use_reserved_dict_id)
dict_id = use_dictionary_id;
@ -3275,40 +3320,6 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
assert(t->h);
assert(t->h->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
assert(t->h->dict_id.dictid < dict_id_serial);
if (t->did_set_descriptor) {
if (t->h->descriptor.version!=t->temp_descriptor.version ||
t->h->descriptor.dbt.size!=t->temp_descriptor.dbt.size ||
memcmp(t->h->descriptor.dbt.data, t->temp_descriptor.dbt.data, t->temp_descriptor.dbt.size)) {
if (t->temp_descriptor.version <= t->h->descriptor.version) {
//Changing descriptor requires upping the version.
r = EINVAL;
goto died_after_read_and_pin;
}
toku_brtheader_lock(t->h);
if (!toku_list_empty(&t->h->live_brts) || !toku_list_empty(&t->h->zombie_brts)) {
//Disallow changing if exists two brts with the same header (counting this one)
//The upgrade would be impossible/very hard!
r = EINVAL;
toku_brtheader_unlock(t->h);
goto died_after_read_and_pin;
}
toku_brtheader_unlock(t->h);
DISKOFF offset;
//4 for checksum
toku_realloc_descriptor_on_disk(t->h->blocktable, toku_serialize_descriptor_size(&t->temp_descriptor)+4, &offset, t->h);
{
int fd = toku_cachefile_get_and_pin_fd (t->cf);
r = toku_serialize_descriptor_contents_to_fd(fd, &t->temp_descriptor, offset);
toku_cachefile_unpin_fd(t->cf);
}
if (r!=0) goto died_after_read_and_pin;
if (t->h->descriptor.dbt.data) toku_free(t->h->descriptor.dbt.data);
t->h->descriptor = t->temp_descriptor;
}
else toku_free(t->temp_descriptor.dbt.data);
t->temp_descriptor.dbt.data = NULL;
t->did_set_descriptor = FALSE;
}
r = toku_maybe_upgrade_brt(t); // possibly do some work to complete the version upgrade of brt
if (r!=0) goto died_after_read_and_pin;
@ -3316,7 +3327,7 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
// brtheader_note_brt_open must be after all functions that can fail.
r = brtheader_note_brt_open(t);
if (r!=0) goto died_after_read_and_pin;
if (t->db) t->db->descriptor = &t->h->descriptor.dbt;
if (t->db) t->db->descriptor = &t->h->descriptor;
if (txn_created) {
assert(txn);
assert(t->h->txnid_that_created_or_locked_when_empty == TXNID_NONE);
@ -3353,15 +3364,6 @@ toku_brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, C
return r;
}
static int
abort_on_upgrade(DB* UU(pdb),
u_int32_t UU(old_version), const DBT *UU(old_descriptor), const DBT *UU(old_key), const DBT *UU(old_val),
u_int32_t UU(new_version), const DBT *UU(new_descriptor), const DBT *UU(new_key), const DBT *UU(new_val)) {
assert(FALSE); //Must not upgrade.
return ENOSYS;
}
// Open a brt for use by redirect. The new brt must have the same dict_id as the old_brt passed in. (FILENUM is assigned by the brt_open() function.)
static int
brt_open_for_redirect(BRT *new_brtp, const char *fname_in_env, TOKUTXN txn, BRT old_brt) {
@ -3380,7 +3382,7 @@ brt_open_for_redirect(BRT *new_brtp, const char *fname_in_env, TOKUTXN txn, BRT
r = toku_brt_set_nodesize(t, old_brt->nodesize);
assert(r==0);
if (old_h->descriptor.version>0) {
r = toku_brt_set_descriptor(t, old_h->descriptor.version, &old_h->descriptor.dbt, abort_on_upgrade);
r = toku_brt_set_descriptor(t, old_h->descriptor.version, &old_h->descriptor.dbt);
assert(r==0);
}
CACHETABLE ct = toku_cachefile_get_cachetable(old_brt->cf);
@ -3503,7 +3505,7 @@ dictionary_redirect_internal(const char *dst_fname_in_env, struct brt_header *sr
//Do not need to swap descriptors pointers.
//Done by brt_open_for_redirect
assert(dst_brt->db->descriptor == &dst_brt->h->descriptor.dbt);
assert(dst_brt->db->descriptor == &dst_brt->h->descriptor);
//Set db->i->brt to new brt
brt_redirect_db(dst_brt, src_brt);
@ -4032,11 +4034,10 @@ int toku_brt_create(BRT *brt_ptr) {
}
int
toku_brt_set_descriptor (BRT t, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) {
toku_brt_set_descriptor (BRT t, u_int32_t version, const DBT* descriptor) {
int r;
if (t->did_set_descriptor) r = EINVAL;
else if (version==0) r = EINVAL; //0 is reserved for default (no descriptor).
else if (dbt_userformat_upgrade==NULL) r = EINVAL; //Must have an upgrade function.
else {
void *copy = toku_memdup(descriptor->data, descriptor->size);
if (!copy) r = ENOMEM;
@ -4044,8 +4045,6 @@ toku_brt_set_descriptor (BRT t, u_int32_t version, const DBT* descriptor, toku_d
t->temp_descriptor.version = version;
assert(!t->temp_descriptor.dbt.data);
toku_fill_dbt(&t->temp_descriptor.dbt, copy, descriptor->size);
assert(!t->dbt_userformat_upgrade);
t->dbt_userformat_upgrade = dbt_userformat_upgrade;
t->did_set_descriptor = TRUE;
r = 0;
}
@ -5739,14 +5738,6 @@ int toku_brt_set_panic(BRT brt, int panic, char *panic_string) {
return toku_brt_header_set_panic(brt->h, panic, panic_string);
}
//Wrapper functions for upgrading from version 10.
#include "backwards_10.h"
void
toku_calculate_leaf_stats (BRTNODE node) {
assert(node->height == 0);
node->u.l.leaf_stats = calc_leaf_stats(node);
}
#if 0
int toku_logger_save_rollback_fdelete (TOKUTXN txn, u_int8_t file_was_open, FILENUM filenum, BYTESTRING iname) {

View file

@ -32,16 +32,17 @@ typedef int(*BRT_GET_CALLBACK_FUNCTION)(ITEMLEN, bytevec, ITEMLEN, bytevec, void
typedef int(*BRT_GET_STRADDLE_CALLBACK_FUNCTION)(ITEMLEN, bytevec, ITEMLEN, bytevec, ITEMLEN, bytevec, ITEMLEN, bytevec, void*);
int toku_open_brt (const char *fname, int is_create, BRT *, int nodesize, CACHETABLE, TOKUTXN, int(*)(DB*,const DBT*,const DBT*), DB*);
int toku_maybe_upgrade_descriptor(BRT t, DESCRIPTOR d, BOOL do_log, TOKUTXN txn);
int toku_dictionary_redirect (const char *dst_fname_in_env, BRT old_brt, TOKUTXN txn);
// See the brt.c file for what this toku_redirect_brt does
int toku_dictionary_redirect_abort(struct brt_header *old_h, struct brt_header *new_h, TOKUTXN txn);
u_int32_t toku_serialize_descriptor_size(const struct descriptor *desc);
u_int32_t toku_serialize_descriptor_size(const DESCRIPTOR desc);
int toku_brt_create(BRT *);
int toku_brt_set_flags(BRT, unsigned int flags);
int toku_brt_set_descriptor (BRT t, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade);
int toku_brt_set_descriptor (BRT t, u_int32_t version, const DBT* descriptor);
int toku_brt_get_flags(BRT, unsigned int *flags);
int toku_brt_set_nodesize(BRT, unsigned int nodesize);
int toku_brt_get_nodesize(BRT, unsigned int *nodesize);

View file

@ -121,7 +121,7 @@ struct brtloader_s {
DB *src_db;
int N;
DB **dbs; // N of these
const struct descriptor **descriptors; // N of these.
DESCRIPTOR *descriptors; // N of these.
const char **new_fnames_in_env; // N of these. The file names that the final data will be written to (relative to env).
uint64_t *extracted_datasizes; // N of these.
@ -170,7 +170,7 @@ u_int64_t toku_brt_loader_get_n_rows(BRTLOADER bl);
// The data passed into a fractal_thread via pthread_create.
struct fractal_thread_args {
BRTLOADER bl;
const struct descriptor *descriptor;
const DESCRIPTOR descriptor;
int fd; // write the brt into tfd.
int progress_allocation;
QUEUE q;
@ -195,14 +195,14 @@ int mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_d
CILK_END
//int write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const struct descriptor *descriptor, int progress_allocation);
//int write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation);
int toku_merge_some_files_using_dbufio (const BOOL to_q, FIDX dest_data, QUEUE q, int n_sources, DBUFIO_FILESET bfs, FIDX srcs_fidxs[/*n_sources*/], BRTLOADER bl, int which_db, DB *dest_db, brt_compare_func compare, int progress_allocation);
int brt_loader_sort_and_write_rows (struct rowset *rows, struct merge_fileset *fs, BRTLOADER bl, int which_db, DB *dest_db, brt_compare_func);
// This is probably only for testing.
int toku_loader_write_brt_from_q_in_C (BRTLOADER bl,
const struct descriptor *descriptor,
const DESCRIPTOR descriptor,
int fd, // write to here
int progress_allocation,
QUEUE q,
@ -210,7 +210,7 @@ int toku_loader_write_brt_from_q_in_C (BRTLOADER bl,
int brt_loader_mergesort_row_array (struct row rows[/*n*/], int n, int which_db, DB *dest_db, brt_compare_func, BRTLOADER, struct rowset *);
int brt_loader_write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const struct descriptor *descriptor, int progress_allocation);
int brt_loader_write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation);
int brtloader_init_file_infos (struct file_infos *fi);
void brtloader_fi_destroy (struct file_infos *fi, BOOL is_error);
@ -223,7 +223,7 @@ int toku_brt_loader_internal_init (/* out */ BRTLOADER *blp,
generate_row_for_put_func g,
DB *src_db,
int N, DB*dbs[/*N*/],
const struct descriptor *descriptors[/*N*/],
const DESCRIPTOR descriptors[/*N*/],
const char *new_fnames_in_env[/*N*/],
brt_compare_func bt_compare_functions[/*N*/],
const char *temp_file_template,

View file

@ -385,7 +385,7 @@ int toku_brt_loader_internal_init (/* out */ BRTLOADER *blp,
generate_row_for_put_func g,
DB *src_db,
int N, DB*dbs[/*N*/],
const struct descriptor *descriptors[/*N*/],
const DESCRIPTOR descriptors[/*N*/],
const char *new_fnames_in_env[/*N*/],
brt_compare_func bt_compare_functions[/*N*/],
const char *temp_file_template,
@ -484,7 +484,7 @@ int toku_brt_loader_open (/* out */ BRTLOADER *blp,
generate_row_for_put_func g,
DB *src_db,
int N, DB*dbs[/*N*/],
const struct descriptor *descriptors[/*N*/],
const DESCRIPTOR descriptors[/*N*/],
const char *new_fnames_in_env[/*N*/],
brt_compare_func bt_compare_functions[/*N*/],
const char *temp_file_template,
@ -2051,7 +2051,7 @@ static inline long int loader_random(void) {
return r;
}
static struct leaf_buf *start_leaf (struct dbout *out, const struct descriptor *desc, int64_t lblocknum) {
static struct leaf_buf *start_leaf (struct dbout *out, const DESCRIPTOR UU(desc), int64_t lblocknum) {
invariant(lblocknum < out->n_translations_limit);
struct leaf_buf *XMALLOC(lbuf);
lbuf->blocknum = lblocknum;
@ -2063,10 +2063,6 @@ static struct leaf_buf *start_leaf (struct dbout *out, const struct descriptor *
putbuf_int32(&lbuf->dbuf, layout_version);
putbuf_int32(&lbuf->dbuf, layout_version); // layout_version original
putbuf_int32(&lbuf->dbuf, desc->version); // desc version
putbuf_int32(&lbuf->dbuf, desc->dbt.size); // desc size
putbuf_bytes(&lbuf->dbuf, desc->dbt.data, desc->dbt.size);
putbuf_int32(&lbuf->dbuf, nodesize);
putbuf_int32(&lbuf->dbuf, flags);
putbuf_int32(&lbuf->dbuf, height);
@ -2089,7 +2085,7 @@ static struct leaf_buf *start_leaf (struct dbout *out, const struct descriptor *
CILK_BEGIN
static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progress_allocation, BRTLOADER bl);
static int write_nonleaves (BRTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const struct descriptor *descriptor);
static int write_nonleaves (BRTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor);
CILK_END
static void add_pair_to_leafnode (struct leaf_buf *lbuf, unsigned char *key, int keylen, unsigned char *val, int vallen);
static int write_translation_table (struct dbout *out, long long *off_of_translation_p);
@ -2110,7 +2106,7 @@ static void drain_writer_q(QUEUE q) {
CILK_BEGIN
static int toku_loader_write_brt_from_q (BRTLOADER bl,
const struct descriptor *descriptor,
const DESCRIPTOR descriptor,
int fd, // write to here
int progress_allocation,
QUEUE q,
@ -2359,7 +2355,7 @@ static int toku_loader_write_brt_from_q (BRTLOADER bl,
CILK_END
int toku_loader_write_brt_from_q_in_C (BRTLOADER bl,
const struct descriptor *descriptor,
const DESCRIPTOR descriptor,
int fd, // write to here
int progress_allocation,
QUEUE q,
@ -2390,7 +2386,7 @@ static int loader_do_i (BRTLOADER bl,
int which_db,
DB *dest_db,
brt_compare_func compare,
const struct descriptor *descriptor,
const DESCRIPTOR descriptor,
const char *new_fname,
int progress_allocation // how much progress do I need to add into bl->progress by the end..
)
@ -2768,7 +2764,7 @@ static int write_header (struct dbout *out, long long translation_location_on_di
struct brt_header h; memset(&h, 0, sizeof h);
h.layout_version = BRT_LAYOUT_VERSION;
h.checkpoint_count = 1;
h.checkpoint_lsn = load_lsn; // (max_uint_long means that this doesn't need any kind of recovery
h.checkpoint_lsn = load_lsn;
h.nodesize = nodesize;
h.root = root_blocknum_on_disk;
h.flags = 0;
@ -2898,14 +2894,14 @@ CILK_BEGIN
static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknum_of_new_node, int n_children,
DBT *pivots, /* must free this array, as well as the things it points t */
struct subtree_info *subtree_info, int height, const struct descriptor *desc)
struct subtree_info *subtree_info, int height, const DESCRIPTOR UU(desc))
{
//Nodes do not currently touch descriptors
invariant(height>0);
int result = 0;
BRTNODE XMALLOC(node);
node->desc =(struct descriptor *)desc;
node->nodesize = nodesize;
node->thisnodename = make_blocknum(blocknum_of_new_node);
node->layout_version = BRT_LAYOUT_VERSION;
@ -2991,7 +2987,7 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu
brt_loader_set_panic(bl, result, TRUE);
}
static int write_nonleaves (BRTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const struct descriptor *descriptor) {
static int write_nonleaves (BRTLOADER bl, FIDX pivots_fidx, struct dbout *out, struct subtrees_info *sts, const DESCRIPTOR descriptor) {
int result = 0;
int height = 1;
@ -3113,7 +3109,7 @@ CILK_END
#if 0
// C function for testing write_file_to_dbfile
int brt_loader_write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const struct descriptor *descriptor, int progress_allocation) {
int brt_loader_write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation) {
#if defined(__cilkplusplus)
return cilk::run(write_file_to_dbfile, outfile, infile, bl, descriptor, progress_allocation);
#else

View file

@ -24,7 +24,7 @@ int toku_brt_loader_open (BRTLOADER *bl,
DB *src_db,
int N,
DB *dbs[/*N*/],
const struct descriptor *descriptors[/*N*/],
const DESCRIPTOR descriptors[/*N*/],
const char * new_fnames_in_env[/*N*/],
brt_compare_func bt_compare_functions[/*N*/],
const char *temp_file_template,

View file

@ -22,11 +22,6 @@ typedef struct brt *BRT;
struct brt_header;
struct wbuf;
typedef struct descriptor {
u_int32_t version;
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
typedef unsigned int ITEMLEN;
typedef const void *bytevec;
//typedef const void *bytevec;

View file

@ -321,7 +321,7 @@ void toku_cachetable_release_reserved_memory(CACHETABLE ct, uint64_t reserved_me
}
void
toku_cachetable_set_env_dir(CACHETABLE ct, char *env_dir) {
toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir) {
assert(!ct->set_env_dir);
toku_free(ct->env_dir);
ct->env_dir = toku_xstrdup(env_dir);

View file

@ -330,7 +330,7 @@ void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);
LEAFLOCK_POOL toku_cachefile_leaflock_pool(CACHEFILE cf);
void toku_cachetable_set_env_dir(CACHETABLE ct, char *env_dir);
void toku_cachetable_set_env_dir(CACHETABLE ct, const char *env_dir);
char * toku_construct_full_name(int count, ...);
char * toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env);

View file

@ -38,6 +38,10 @@ static inline void toku_free_FILENUMS(FILENUMS val) { toku_free(val.filenums); }
void toku_set_lsn_increment (uint64_t incr) __attribute__((__visibility__("default")));
int toku_maybe_upgrade_log (const char *env_dir, const char *log_dir);
uint64_t toku_log_upgrade_get_footprint(void);
#if defined(__cplusplus) || defined(__cilkplusplus)
};
#endif

426
newbrt/log_upgrade.c Normal file
View file

@ -0,0 +1,426 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "includes.h"
#include "log_header.h"
#include "checkpoint.h"
static uint64_t footprint = 0; // for debug and accountability
static uint64_t footprint_previous_upgrade = 0; // for debug and accountability
uint64_t
toku_log_upgrade_get_footprint(void) {
return footprint + (100000 * footprint_previous_upgrade);
}
#define FOOTPRINT(x) footprint=footprint_start+(x*footprint_increment)
#define FOOTPRINTSETUP(increment) uint64_t footprint_start=footprint; uint64_t footprint_increment=increment;
// The lock file is used to detect a failed upgrade. It is created at the start
// of the upgrade procedure and deleted at the end of the upgrade procedure. If
// it exists at startup, then there was a crash during an upgrade, and the previous
// upgrade attempt must be undone.
static const char upgrade_lock_file_suffix[] = "/__tokudb_upgrade_dont_delete_me";
static const char upgrade_commit_file_suffix[] = "/__tokudb_upgrade_commit_dont_delete_me";
//This will be the base information needed.
//Future 'upgrade in progress' files that need more information
//should store it AFTER the prefix checksum, and have its own checksum.
static const int upgrade_lock_prefix_size = 8 // magic ("tokuupgr")
+4 // version upgrading to
+4 // upgrading from version
+4 // size of suffix (data following prefix checksum)
+4; // prefix checksum
static int
verify_clean_shutdown_of_log_version_current(const char *log_dir, LSN * last_lsn) {
int rval = DB_RUNRECOVERY;
TOKULOGCURSOR logcursor = NULL;
int r;
FOOTPRINTSETUP(100);
FOOTPRINT(1);
r = toku_logcursor_create(&logcursor, log_dir);
assert(r == 0);
struct log_entry *le = NULL;
r = toku_logcursor_last(logcursor, &le);
if (r == 0) {
FOOTPRINT(2);
if (le->cmd==LT_shutdown) {
LSN lsn = le->u.shutdown.lsn;
if (last_lsn)
*last_lsn = lsn;
rval = 0;
}
}
r = toku_logcursor_destroy(&logcursor);
assert(r == 0);
return rval;
}
static int
verify_clean_shutdown_of_log_version_1(const char *log_dir, LSN * last_lsn) {
FOOTPRINTSETUP(100);
FOOTPRINT(1);
//TODO: Remove this hack:
//Base this function on
// - (above)verify_clean_shutdown_of_log_version_current
// - (3.1)tokudb_needs_recovery
// - do breadth/depth first search to find out which functions have to be copied over from 3.1
// - Put copied functions in .. backwards_log_1.[ch]
LSN lsn = {.lsn = 1LLU << 40};
if (last_lsn)
*last_lsn = lsn;
log_dir = log_dir;
return 0;
}
static int
verify_clean_shutdown_of_log_version(const char *log_dir, uint32_t version, LSN *last_lsn) {
// return 0 if clean shutdown, DB_RUNRECOVERY if not clean shutdown
// examine logfile at logfilenum and possibly logfilenum-1
int r = 0;
FOOTPRINTSETUP(100);
if (version == TOKU_LOG_VERSION_1) {
FOOTPRINT(1);
r = verify_clean_shutdown_of_log_version_1(log_dir, last_lsn);
}
else {
FOOTPRINT(2);
assert(version == TOKU_LOG_VERSION);
r = verify_clean_shutdown_of_log_version_current(log_dir, last_lsn);
}
return r;
}
//Cross the Rubicon (POINT OF NO RETURN)
static int
convert_logs_and_fsync(const char *log_dir, const char *env_dir, uint32_t from_version, uint32_t to_version) {
int r;
FOOTPRINTSETUP(100);
r = verify_clean_shutdown_of_log_version(log_dir, to_version, NULL);
assert(r==0);
r = toku_delete_all_logs_of_version(log_dir, from_version);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
if (to_version==TOKU_LOG_VERSION_1) {
//Undo an upgrade from version 1.
//Delete rollback cachefile if it exists.
FOOTPRINT(1);
int rollback_len = strlen(log_dir) + sizeof(ROLLBACK_CACHEFILE_NAME) +1; //1 for '/'
char rollback_fname[rollback_len];
{
int l = snprintf(rollback_fname, sizeof(rollback_fname),
"%s/%s", env_dir, ROLLBACK_CACHEFILE_NAME);
assert(l+1 == (signed)(sizeof(rollback_fname)));
}
r = unlink(rollback_fname);
assert(r==0 || errno==ENOENT);
if (r==0) {
r = toku_fsync_dir_by_name_without_accounting(env_dir);
assert(r==0);
}
}
return r;
}
//After this function completes:
// If any log files exist they are all of the same version.
// There is no lock file.
// There is no commit file.
static int
cleanup_previous_upgrade_attempt(const char *env_dir, const char *log_dir,
const char *upgrade_lock_fname,
const char *upgrade_commit_fname) {
int r = 0;
int lock_fd;
int commit_fd;
unsigned char prefix[upgrade_lock_prefix_size];
FOOTPRINTSETUP(1000);
commit_fd = open(upgrade_commit_fname, O_RDONLY|O_BINARY, S_IRWXU);
if (commit_fd<0) {
assert(errno==ENOENT);
}
lock_fd = open(upgrade_lock_fname, O_RDONLY|O_BINARY, S_IRWXU);
if (lock_fd<0) {
assert(errno == ENOENT);
//Nothing to clean up (lock file does not exist).
}
else { //Lock file exists. Will commit or abort the upgrade.
FOOTPRINT(1);
int64_t n = pread(lock_fd, prefix, upgrade_lock_prefix_size, 0);
assert(n>=0 && n <= upgrade_lock_prefix_size);
struct rbuf rb;
rb.size = upgrade_lock_prefix_size;
rb.buf = prefix;
rb.ndone = 0;
if (n == upgrade_lock_prefix_size) {
FOOTPRINT(2);
//Check magic number
bytevec magic;
rbuf_literal_bytes(&rb, &magic, 8);
assert(memcmp(magic,"tokuupgr",8)==0);
uint32_t to_version = rbuf_network_int(&rb);
uint32_t from_version = rbuf_network_int(&rb);
uint32_t suffix_length = rbuf_int(&rb);
uint32_t stored_x1764 = rbuf_int(&rb);
uint32_t calculated_x1764 = x1764_memory(rb.buf, rb.size-4);
assert(calculated_x1764 == stored_x1764);
//Now that checksum matches, verify data.
assert(to_version == TOKU_LOG_VERSION); //Only upgrading directly to newest log version.
assert(from_version < TOKU_LOG_VERSION); //Otherwise it isn't an upgrade.
assert(from_version >= TOKU_LOG_MIN_SUPPORTED_VERSION); //TODO: make this an error case once we have 3 log versions
assert(suffix_length == 0); //TODO: Future versions may change this.
if (commit_fd>=0) { //Commit the upgrade
footprint_previous_upgrade = 1;
FOOTPRINT(3);
r = convert_logs_and_fsync(log_dir, env_dir, from_version, to_version);
assert(r==0);
}
else { //Abort the upgrade
footprint_previous_upgrade = 2;
FOOTPRINT(4);
r = convert_logs_and_fsync(log_dir, env_dir, to_version, from_version);
assert(r==0);
}
}
else { // We never finished writing lock file: commit file cannot exist yet.
// We are aborting the upgrade, but because the previous attempt never got past
// writing the lock file, nothing needs to be undone.
assert(commit_fd<0);
}
{ //delete lock file
r = close(lock_fd);
assert(r==0);
r = unlink(upgrade_lock_fname);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
}
}
if (commit_fd>=0) { //delete commit file
r = close(commit_fd);
assert(r==0);
r = unlink(upgrade_commit_fname);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
}
return r;
}
static int
write_commit_file_and_fsync(const char *log_dir, const char * upgrade_commit_fname) {
int fd;
fd = open(upgrade_commit_fname, O_RDWR|O_BINARY|O_CREAT|O_EXCL, S_IRWXU);
assert(fd>=0);
int r;
r = toku_file_fsync_without_accounting(fd);
assert(r==0);
r = close(fd);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
return r;
}
static int
write_lock_file_and_fsync(const char *log_dir, const char * upgrade_lock_fname, uint32_t from_version) {
int fd;
fd = open(upgrade_lock_fname, O_RDWR|O_BINARY|O_CREAT|O_EXCL, S_IRWXU);
assert(fd>=0);
char buf[upgrade_lock_prefix_size];
struct wbuf wb;
const int suffix_size = 0;
wbuf_init(&wb, buf, upgrade_lock_prefix_size);
{ //Serialize to wbuf
wbuf_literal_bytes(&wb, "tokuupgr", 8); //magic
wbuf_network_int(&wb, TOKU_LOG_VERSION); //to version
wbuf_network_int(&wb, from_version); //from version
wbuf_int(&wb, suffix_size); //Suffix Length
u_int32_t checksum = x1764_finish(&wb.checksum);
wbuf_int(&wb, checksum); //checksum
assert(wb.ndone == wb.size);
}
toku_os_full_pwrite(fd, wb.buf, wb.size, 0);
{
//Serialize suffix to wbuf and then disk (if exist)
//There is no suffix as of TOKU_LOG_VERSION_2
}
int r;
r = toku_file_fsync_without_accounting(fd);
assert(r==0);
r = close(fd);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
return r;
}
// from_version is version of lognumber_newest, which contains last_lsn
static int
upgrade_log(const char *env_dir, const char *log_dir,
const char * upgrade_lock_fname, const char * upgrade_commit_fname,
LSN last_lsn,
uint32_t from_version) { // the real deal
int r;
FOOTPRINTSETUP(1000);
r = write_lock_file_and_fsync(log_dir, upgrade_lock_fname, from_version);
assert(r==0);
LSN initial_lsn = last_lsn;
initial_lsn.lsn++;
CACHETABLE ct;
TOKULOGGER logger;
{ //Create temporary environment
r = toku_create_cachetable(&ct, 1<<25, initial_lsn, NULL);
assert(r == 0);
toku_cachetable_set_env_dir(ct, env_dir);
r = toku_logger_create(&logger);
assert(r == 0);
toku_logger_write_log_files(logger, FALSE); //Prevent initial creation of log file
toku_logger_set_cachetable(logger, ct);
r = toku_logger_open(log_dir, logger);
assert(r==0);
r = toku_logger_restart(logger, initial_lsn); //Turn log writing on and create first log file with initial lsn
assert(r==0);
FOOTPRINT(1);
}
if (from_version == TOKU_LOG_VERSION_1) {
{ //Create rollback cachefile
r = toku_logger_open_rollback(logger, ct, TRUE);
assert(r==0);
}
{ //Checkpoint
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL);
assert(r == 0);
}
{ //Close rollback cachefile
r = toku_logger_close_rollback(logger, FALSE);
assert(r==0);
}
FOOTPRINT(2);
}
{ //Checkpoint
r = toku_checkpoint(ct, logger, NULL, NULL, NULL, NULL); //fsyncs log dir
assert(r == 0);
FOOTPRINT(3);
}
{ //Close cachetable and logger
r = toku_logger_shutdown(logger);
assert(r==0);
r = toku_cachetable_close(&ct);
assert(r==0);
r = toku_logger_close(&logger);
assert(r==0);
FOOTPRINT(4);
}
{ //Write commit file
r = write_commit_file_and_fsync(log_dir, upgrade_commit_fname);
assert(r==0);
}
{ // Cross the Rubicon here:
// Delete all old logs: POINT OF NO RETURN
r = convert_logs_and_fsync(log_dir, env_dir, from_version, TOKU_LOG_VERSION);
assert(r==0);
FOOTPRINT(5);
}
{ //Delete upgrade lock file and ensure directory is fsynced
r = unlink(upgrade_lock_fname);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
}
{ //Delete upgrade commit file and ensure directory is fsynced
r = unlink(upgrade_commit_fname);
assert(r==0);
r = toku_fsync_dir_by_name_without_accounting(log_dir);
assert(r==0);
}
FOOTPRINT(6);
return 0;
}
int
toku_maybe_upgrade_log(const char *env_dir, const char *log_dir) {
int r;
int lockfd = -1;
FOOTPRINTSETUP(10000);
r = toku_recover_lock(log_dir, &lockfd);
if (r == 0) {
assert(log_dir);
assert(env_dir);
char upgrade_lock_fname[strlen(log_dir) + sizeof(upgrade_lock_file_suffix)];
{ //Generate full fname
int l = snprintf(upgrade_lock_fname, sizeof(upgrade_lock_fname),
"%s%s", log_dir, upgrade_lock_file_suffix);
assert(l+1 == (ssize_t)(sizeof(upgrade_lock_fname)));
}
char upgrade_commit_fname[strlen(log_dir) + sizeof(upgrade_commit_file_suffix)];
{ //Generate full fname
int l = snprintf(upgrade_commit_fname, sizeof(upgrade_commit_fname),
"%s%s", log_dir, upgrade_commit_file_suffix);
assert(l+1 == (ssize_t)(sizeof(upgrade_commit_fname)));
}
r = cleanup_previous_upgrade_attempt(env_dir, log_dir,
upgrade_lock_fname, upgrade_commit_fname);
if (r==0) {
uint32_t version_of_logs_on_disk;
BOOL found_any_logs;
r = toku_get_version_of_logs_on_disk(log_dir, &found_any_logs, &version_of_logs_on_disk);
if (r==0) {
if (!found_any_logs)
r = 0; //No logs means no logs to upgrade.
else if (version_of_logs_on_disk > TOKU_LOG_VERSION)
r = TOKUDB_DICTIONARY_TOO_NEW;
else if (version_of_logs_on_disk < TOKU_LOG_MIN_SUPPORTED_VERSION)
r = TOKUDB_DICTIONARY_TOO_OLD;
else if (version_of_logs_on_disk == TOKU_LOG_VERSION)
r = 0; //Logs are up to date
else {
FOOTPRINT(1);
LSN last_lsn;
r = verify_clean_shutdown_of_log_version(log_dir, version_of_logs_on_disk, &last_lsn);
if (r==0) {
FOOTPRINT(2);
r = upgrade_log(env_dir, log_dir,
upgrade_lock_fname, upgrade_commit_fname,
last_lsn, version_of_logs_on_disk);
}
}
}
}
{
//Clean up
int rc;
rc = toku_recover_unlock(lockfd);
if (r==0) r = rc;
}
}
return r;
}

View file

@ -80,8 +80,10 @@ int toku_logfilemgr_init(TOKULOGFILEMGR lfm, const char *log_dir) {
}
// find the index
basename = strrchr(logfiles[i], '/') + 1;
r = sscanf(basename, "log%lld.tokulog", &index);
assert(r==1); // found index
int version;
r = sscanf(basename, "log%lld.tokulog%d", &index, &version);
assert(r==2); // found index and version
assert(version==TOKU_LOG_VERSION);
lf_info->index = index;
// find last LSN
r = toku_logcursor_create_for_file(&cursor, log_dir, basename);

View file

@ -135,6 +135,11 @@ const struct logtype logtypes[] = {
{"u_int32_t", "treeflags", 0},
NULLFIELD}},
//TODO: #2037 Add dname
{"fdescriptor", 'd', FA{{"FILENUM", "filenum", 0},
{"u_int32_t", "descriptor_version", 0},
{"BYTESTRING", "descriptor", 0},
NULLFIELD}},
//TODO: #2037 Add dname
{"fclose", 'e', FA{{"BYTESTRING", "iname", 0},
{"FILENUM", "filenum", 0},
NULLFIELD}},
@ -176,6 +181,8 @@ const struct logtype logtypes[] = {
{"comment", 'T', FA{{"u_int64_t", "timestamp", 0},
{"BYTESTRING", "comment", 0},
NULLFIELD}},
{"shutdown", 'Q', FA{{"u_int64_t", "timestamp", 0},
NULLFIELD}},
{"load", 'l', FA{{"TXNID", "xid", 0},
{"BYTESTRING", "old_iname", 0},
{"BYTESTRING", "new_iname", 0},

View file

@ -13,14 +13,40 @@ static int delete_logfile(TOKULOGGER logger, long long index);
static void grab_output(TOKULOGGER logger, LSN *fsynced_lsn);
static void release_output(TOKULOGGER logger, LSN fsynced_lsn);
static BOOL is_a_logfile_any_version (const char *name, uint64_t *number_result, uint32_t *version_of_log) {
BOOL rval = TRUE;
uint64_t result;
int n;
int r;
uint32_t version;
r = sscanf(name, "log%"SCNu64".tokulog%"SCNu32"%n", &result, &version, &n);
if (r!=2 || name[n]!='\0' || version <= TOKU_LOG_VERSION_1) {
//Version 1 does NOT append 'version' to end of '.tokulog'
version = TOKU_LOG_VERSION_1;
r = sscanf(name, "log%"SCNu64".tokulog%n", &result, &n);
if (r!=1 || name[n]!='\0') {
rval = FALSE;
}
}
if (rval) {
*number_result = result;
*version_of_log = version;
}
return rval;
}
// added for #2424, improved for #2521
static BOOL is_a_logfile (const char *name, long long *number_result) {
unsigned long long result;
int n;
int r = sscanf(name, "log%llu.tokulog%n", &result, &n);
if (r!=1 || name[n]!=0) return FALSE;
*number_result = result;
return TRUE;
BOOL rval;
uint64_t result;
uint32_t version;
rval = is_a_logfile_any_version(name, &result, &version);
if (rval && version != TOKU_LOG_VERSION)
rval = FALSE;
if (rval)
*number_result = result;
return rval;
}
@ -234,8 +260,8 @@ int toku_logger_shutdown(TOKULOGGER logger) {
int r = 0;
if (logger->is_open) {
if (toku_omt_size(logger->live_txns) == 0) {
BYTESTRING comment = { strlen("shutdown"), "shutdown" };
int r2 = toku_log_comment(logger, NULL, TRUE, 0, comment);
time_t tnow = time(NULL);
int r2 = toku_log_shutdown(logger, NULL, TRUE, tnow);
if (!r) r = r2;
}
}
@ -575,7 +601,7 @@ static int open_logfile (TOKULOGGER logger)
{
int fnamelen = strlen(logger->directory)+50;
char fname[fnamelen];
snprintf(fname, fnamelen, "%s/log%012lld.tokulog", logger->directory, logger->next_log_file_number);
snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION);
long long index = logger->next_log_file_number;
if (logger->write_log_files) {
logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRWXU);
@ -608,7 +634,7 @@ static int delete_logfile(TOKULOGGER logger, long long index)
{
int fnamelen = strlen(logger->directory)+50;
char fname[fnamelen];
snprintf(fname, fnamelen, "%s/log%012lld.tokulog", logger->directory, index);
snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, index, TOKU_LOG_VERSION);
int r = remove(fname);
return r;
}
@ -786,6 +812,14 @@ int toku_logger_log_fdelete (TOKUTXN txn, const char *fname) {
return r;
}
// fname is the iname
int toku_logger_log_descriptor (TOKUTXN txn, FILENUM filenum, DESCRIPTOR descriptor_p) {
if (txn==0) return 0;
if (txn->logger->is_panicked) return EINVAL;
BYTESTRING bs_descriptor = { .len=descriptor_p->dbt.size, .data = descriptor_p->dbt.data };
int r = toku_log_fdescriptor (txn->logger, (LSN*)0, 1, filenum, descriptor_p->version, bs_descriptor);
return r;
}
@ -1258,3 +1292,82 @@ toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s) {
s->swap_ctr = 0;
}
}
//Used for upgrade
int
toku_get_version_of_logs_on_disk(const char *log_dir, BOOL *found_any_logs, uint32_t *version_found) {
BOOL found = FALSE;
uint32_t single_version = 0;
int r = 0;
struct dirent *de;
DIR *d=opendir(log_dir);
if (d==NULL) {
r = errno;
}
else {
// Examine every file in the directory and assert that all log files are of the same version (single_version).
while ((de=readdir(d))) {
uint32_t this_log_version;
uint64_t this_log_number;
BOOL is_log = is_a_logfile_any_version(de->d_name, &this_log_number, &this_log_version);
if (is_log) {
if (found)
assert(single_version == this_log_version);
found = TRUE;
single_version = this_log_version;
}
}
}
{
int r2 = closedir(d);
if (r==0) r = r2;
}
if (r==0) {
*found_any_logs = found;
if (found)
*version_found = single_version;
}
return r;
}
//Used for upgrade
int
toku_delete_all_logs_of_version(const char *log_dir, uint32_t version_to_delete) {
int r = 0;
struct dirent *de;
DIR *d=opendir(log_dir);
if (d==NULL) {
r = errno;
}
else {
// Examine every file in the directory and if it is a log of the given version, delete it
while ((de=readdir(d))) {
uint32_t this_log_version;
uint64_t this_log_number;
BOOL is_log = is_a_logfile_any_version(de->d_name, &this_log_number, &this_log_version);
if (is_log && this_log_version == version_to_delete) {
char log_full_name[strlen(log_dir) + strlen(de->d_name) + 2]; //'\0' and '/'
{ //Generate full fname
int l = snprintf(log_full_name, sizeof(log_full_name),
"%s/%s", log_dir, de->d_name);
assert(l+1 == (ssize_t)(sizeof(log_full_name)));
}
r = unlink(log_full_name);
if (r!=0) {
r = errno;
assert(r);
break;
}
}
}
}
{
int r2 = closedir(d);
if (r==0) r = r2;
}
return r;
}

View file

@ -14,6 +14,7 @@ enum {
TOKU_LOG_VERSION_2 = 2,
TOKU_LOG_NEXT_VERSION, // the version after the current version
TOKU_LOG_VERSION = TOKU_LOG_NEXT_VERSION-1, // A hack so I don't have to change this line.
TOKU_LOG_MIN_SUPPORTED_VERSION = TOKU_LOG_VERSION_2
};
#define ROLLBACK_CACHEFILE_NAME "tokudb.rollback"
@ -56,6 +57,7 @@ int toku_logger_maybe_trim_log(TOKULOGGER logger, LSN oldest_open_lsn);
int toku_logger_log_fcreate (TOKUTXN txn, const char *fname, FILENUM filenum, u_int32_t mode, u_int32_t flags, DESCRIPTOR descriptor_p);
int toku_logger_log_fdelete (TOKUTXN txn, const char *fname);
int toku_logger_log_fopen (TOKUTXN txn, const char * fname, FILENUM filenum, uint32_t treeflags);
int toku_logger_log_descriptor (TOKUTXN txn, FILENUM filenum, DESCRIPTOR descriptor_p);
int toku_fread_u_int8_t (FILE *f, u_int8_t *v, struct x1764 *mm, u_int32_t *len);
int toku_fread_u_int32_t_nocrclen (FILE *f, u_int32_t *v);
@ -166,6 +168,9 @@ typedef struct logger_status {
void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
int toku_get_version_of_logs_on_disk(const char *log_dir, BOOL *found_any_logs, uint32_t *version_found);
int toku_delete_all_logs_of_version(const char *log_dir, uint32_t version_to_delete);
#if defined(__cplusplus) || defined(__cilkplusplus)
};
#endif

View file

@ -7,11 +7,13 @@
#include "log_header.h"
#include "checkpoint.h"
static const char recovery_lock_file[] = "/__tokudb_recoverylock_dont_delete_me";
int tokudb_recovery_trace = 0; // turn on recovery tracing, default off.
//#define DO_VERIFY_COUNTS
#ifdef DO_VERIFY_COUNTS
#define VERIFY_COUNTS(n) toku_verify_counts(n)
#define VERIFY_COUNTS(n) toku_verify_or_set_counts(n, FALSE)
#else
#define VERIFY_COUNTS(n) ((void)0)
#endif
@ -235,14 +237,6 @@ static void recover_yield(voidfp f, void *fpthunk, void *UU(yieldthunk)) {
if (f) f(fpthunk);
}
static int
abort_on_upgrade(DB* UU(pdb),
u_int32_t UU(old_version), const DBT *UU(old_descriptor), const DBT *UU(old_key), const DBT *UU(old_val),
u_int32_t UU(new_version), const DBT *UU(new_descriptor), const DBT *UU(new_key), const DBT *UU(new_val)) {
assert(FALSE); //Must not upgrade.
return ENOSYS;
}
// Open the file if it is not already open. If it is already open, then do nothing.
static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create, int mode, BYTESTRING *bs_iname, FILENUM filenum, u_int32_t treeflags,
u_int32_t descriptor_version, BYTESTRING* descriptor, TOKUTXN txn) {
@ -269,7 +263,7 @@ static int internal_recover_fopen_or_fcreate (RECOVER_ENV renv, BOOL must_create
if (descriptor_version > 0) {
DBT descriptor_dbt;
toku_fill_dbt(&descriptor_dbt, descriptor->data, descriptor->len);
r = toku_brt_set_descriptor(brt, descriptor_version, &descriptor_dbt, abort_on_upgrade);
r = toku_brt_set_descriptor(brt, descriptor_version, &descriptor_dbt);
if (r!=0) goto close_brt;
}
r = toku_brt_open_recovery(brt, iname, must_create, must_create, renv->ct, txn, fake_db, filenum);
@ -674,6 +668,32 @@ static int toku_recover_backward_fopen (struct logtype_fopen *UU(l), RECOVER_ENV
return 0;
}
static int toku_recover_fdescriptor (struct logtype_fdescriptor *l, RECOVER_ENV renv) {
int r;
struct file_map_tuple *tuple = NULL;
r = file_map_find(&renv->fmap, l->filenum, &tuple);
if (r==0) {
//Maybe do the descriptor (lsn filter)
LSN treelsn = toku_brt_checkpoint_lsn(tuple->brt);
if (l->lsn.lsn > treelsn.lsn) {
//Upgrade descriptor.
assert(tuple->brt->h->descriptor.version < l->descriptor_version); //Must be doing an upgrade.
DESCRIPTOR_S d;
d.version = l->descriptor_version;
toku_fill_dbt(&d.dbt, toku_xmemdup(l->descriptor.data, l->descriptor.len), l->descriptor.len);
r = toku_maybe_upgrade_descriptor(tuple->brt, &d, FALSE, NULL);
assert(r==0);
}
}
return 0;
}
static int toku_recover_backward_fdescriptor (struct logtype_fdescriptor *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
// if file referred to in l is open, close it
static int toku_recover_fclose (struct logtype_fclose *l, RECOVER_ENV renv) {
struct file_map_tuple *tuple = NULL;
@ -949,6 +969,16 @@ static int toku_recover_backward_comment (struct logtype_comment *UU(l), RECOVER
return 0;
}
static int toku_recover_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_backward_shutdown (struct logtype_shutdown *UU(l), RECOVER_ENV UU(renv)) {
// nothing
return 0;
}
static int toku_recover_load(struct logtype_load *UU(l), RECOVER_ENV UU(renv)) {
int r;
TOKUTXN txn = NULL;
@ -992,7 +1022,7 @@ int tokudb_needs_recovery(const char *log_dir, BOOL ignore_log_empty) {
if (r != 0) {
needs_recovery = TRUE; goto exit;
}
if (le->cmd == LT_comment) {
if (le->cmd==LT_shutdown || le->cmd==LT_comment) {
r = toku_logcursor_prev(logcursor, &le);
if (r != 0) {
needs_recovery = TRUE; goto exit;
@ -1261,15 +1291,14 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
return rr;
}
static int recover_lock(const char *lock_dir, int *lockfd) {
int
toku_recover_lock(const char *lock_dir, int *lockfd) {
if (!lock_dir)
return ENOENT;
const char fname[] = "/__tokudb_recoverylock_dont_delete_me";
int namelen=strlen(lock_dir);
char lockfname[namelen+sizeof(fname)];
char lockfname[namelen+sizeof(recovery_lock_file)];
int l = snprintf(lockfname, sizeof(lockfname), "%s%s", lock_dir, fname);
int l = snprintf(lockfname, sizeof(lockfname), "%s%s", lock_dir, recovery_lock_file);
assert(l+1 == (signed)(sizeof(lockfname)));
*lockfd = toku_os_lock_file(lockfname);
if (*lockfd < 0) {
@ -1280,13 +1309,16 @@ static int recover_lock(const char *lock_dir, int *lockfd) {
return 0;
}
static int recover_unlock(int lockfd) {
int
toku_recover_unlock(int lockfd) {
int r = toku_os_unlock_file(lockfd);
if (r != 0)
return errno;
return 0;
}
int tokudb_recover(const char *env_dir, const char *log_dir,
brt_compare_func bt_compare,
brt_compare_func dup_compare,
@ -1296,7 +1328,7 @@ int tokudb_recover(const char *env_dir, const char *log_dir,
int r;
int lockfd = -1;
r = recover_lock(log_dir, &lockfd);
r = toku_recover_lock(log_dir, &lockfd);
if (r != 0)
return r;
@ -1314,7 +1346,7 @@ int tokudb_recover(const char *env_dir, const char *log_dir,
recover_env_cleanup(&renv, (BOOL)(rr == 0));
}
r = recover_unlock(lockfd);
r = toku_recover_unlock(lockfd);
if (r != 0)
return r;

View file

@ -41,6 +41,11 @@ void toku_recover_set_callback2 (void (*)(void*), void*);
extern int tokudb_recovery_trace;
int toku_recover_lock (const char *lock_dir, int *lockfd);
int toku_recover_unlock(int lockfd);
#if defined(__cplusplus) || defined(__cilkplusplus)
};
#endif

View file

@ -100,7 +100,6 @@ test_serialize_nonleaf(void) {
assert(size == 100);
}
sn.desc = &brt->h->descriptor;
r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE);
assert(r==0);

View file

@ -76,7 +76,7 @@ static void test_extractor(int nrows, int nrowsets, BOOL expect_fail) {
// open the brtloader. this runs the extractor.
const int N = 1;
DB *dbs[N];
const struct descriptor *descriptors[N];
DESCRIPTOR descriptors[N];
const char *fnames[N];
brt_compare_func compares[N];
for (int i = 0; i < N; i++) {

View file

@ -88,7 +88,7 @@ static void test_extractor(int nrows, int nrowsets, BOOL expect_fail, const char
// open the brtloader. this runs the extractor.
const int N = 1;
DB *dbs[N];
const struct descriptor *descriptors[N];
DESCRIPTOR descriptors[N];
const char *fnames[N];
brt_compare_func compares[N];
for (int i = 0; i < N; i++) {

View file

@ -295,7 +295,7 @@ static void test_extractor(int nrows, int nrowsets, const char *testdir) {
// open the brtloader. this runs the extractor.
const int N = 1;
DB *dbs[N];
const struct descriptor *descriptors[N];
DESCRIPTOR descriptors[N];
const char *fnames[N];
brt_compare_func compares[N];
for (int i = 0; i < N; i++) {

View file

@ -292,7 +292,7 @@ static void test (const char *directory, BOOL is_error) {
BRTLOADER bl;
DB **XMALLOC_N(N_DEST_DBS, dbs);
const struct descriptor **XMALLOC_N(N_DEST_DBS, descriptors);
DESCRIPTOR *XMALLOC_N(N_DEST_DBS, descriptors);
const char **XMALLOC_N(N_DEST_DBS, new_fnames_in_env);
for (int i=0; i<N_DEST_DBS; i++) {
char s[100];

View file

@ -43,7 +43,7 @@ static void test_loader_open(int ndbs) {
// open the brtloader. this runs the extractor.
DB *dbs[ndbs];
const struct descriptor *descriptors[ndbs];
DESCRIPTOR descriptors[ndbs];
const char *fnames[ndbs];
brt_compare_func compares[ndbs];
for (int i = 0; i < ndbs; i++) {

View file

@ -115,7 +115,7 @@ static void write_dbfile (char *template, int n, char *output_name, BOOL expect_
r = queue_destroy(q);
assert(r==0);
struct descriptor desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
DESCRIPTOR_S desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
int fd = open(output_name, O_RDWR | O_CREAT | O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO);
assert(fd>=0);

View file

@ -152,7 +152,7 @@ static void test_write_dbfile (char *template, int n, char *output_name) {
r = queue_destroy(q);
assert(r==0);
struct descriptor desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
DESCRIPTOR_S desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
int fd = open(output_name, O_RDWR | O_CREAT | O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO);
assert(fd>=0);

View file

@ -316,7 +316,7 @@ static void verify_dbfile(int n, int sorted_keys[], const char *sorted_vals[], c
assert(fs.n_temp_files==0);
struct descriptor desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
DESCRIPTOR_S desc = {.version = 1, .dbt = (DBT){.size = 4, .data="abcd"}};
int fd = open(output_name, O_RDWR | O_CREAT | O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO);
assert(fd>=0);

View file

@ -13,6 +13,7 @@ test_main (int argc __attribute__((__unused__)),
const char *argv[] __attribute__((__unused__))) {
int r;
long long lognum;
char logname[PATH_MAX];
r = system(rmrf);
CKERR(r);
r = toku_os_mkdir(dname, S_IRWXU); assert(r==0);
@ -20,18 +21,21 @@ test_main (int argc __attribute__((__unused__)),
assert(r==0 && lognum==0LL);
mode_t mode = S_IRWXU + S_IRWXG + S_IRWXO;
r = open(dname "/log01.tokulog", O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
sprintf(logname, dname "/log01.tokulog%d", TOKU_LOG_VERSION);
r = open(logname, O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
r = close(r); assert(r==0);
r = toku_logger_find_next_unused_log_file(dname,&lognum);
assert(r==0 && lognum==2LL);
r = open(dname "/log123456789012345.tokulog", O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
sprintf(logname, dname "/log123456789012345.tokulog%d", TOKU_LOG_VERSION);
r = open(logname, O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
r = close(r); assert(r==0);
r = toku_logger_find_next_unused_log_file(dname,&lognum);
assert(r==0 && lognum==123456789012346LL);
r = open(dname "/log3.tokulog", O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
sprintf(logname, dname "/log3.tokulog%d", TOKU_LOG_VERSION);
r = open(logname, O_WRONLY + O_CREAT + O_BINARY, mode); assert(r>=0);
r = close(r); assert(r==0);
r = toku_logger_find_next_unused_log_file(dname,&lognum);
assert(r==0 && lognum==123456789012346LL);

View file

@ -14,6 +14,7 @@ int
test_main (int argc __attribute__((__unused__)),
const char *argv[] __attribute__((__unused__))) {
int r;
char logname[PATH_MAX];
r = system(rmrf);
CKERR(r);
r = toku_os_mkdir(dname, S_IRWXU); assert(r==0);
@ -34,7 +35,8 @@ test_main (int argc __attribute__((__unused__)),
r = toku_logger_close(&logger); assert(r == 0);
{
toku_struct_stat statbuf;
r = toku_stat(dname "/log000000000000.tokulog", &statbuf);
sprintf(logname, dname "/log000000000000.tokulog%d", TOKU_LOG_VERSION);
r = toku_stat(logname, &statbuf);
assert(r==0);
assert(statbuf.st_size==12+5);
}

View file

@ -56,8 +56,10 @@ test_main (int argc __attribute__((__unused__)),
assert(r == 0);
{
char logname[PATH_MAX];
toku_struct_stat statbuf;
r = toku_stat(dname "/log000000000000.tokulog", &statbuf);
sprintf(logname, dname "/log000000000000.tokulog%d", TOKU_LOG_VERSION);
r = toku_stat(logname, &statbuf);
assert(r==0);
assert(statbuf.st_size<=LSIZE);
}

View file

@ -13,8 +13,10 @@
static void corrupt_the_checksum(void) {
// change the LSN in the first log entry of log 0. this will cause an checksum error.
char logname[PATH_MAX];
int r;
FILE *f = fopen(dname "/" "log000000000000.tokulog", "r+b"); assert(f);
sprintf(logname, dname "/" "log000000000000.tokulog%d", TOKU_LOG_VERSION);
FILE *f = fopen(logname, "r+b"); assert(f);
r = fseek(f, 025, SEEK_SET); assert(r == 0);
char c = 100;
size_t n = fwrite(&c, sizeof c, 1, f); assert(n == sizeof c);

View file

@ -59,7 +59,7 @@ test_main (int argc, const char *argv[]) {
r = toku_logger_find_next_unused_log_file(dname, &nexti);
assert(r == 0);
char mt_fname[128];
snprintf(mt_fname, 128, "%s/log%012lld.tokulog", dname, nexti);
snprintf(mt_fname, 128, "%s/log%012lld.tokulog%d", dname, nexti, TOKU_LOG_VERSION);
int mt_fd = open(mt_fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRWXU);
assert(mt_fd != -1);
r = close(mt_fd);
@ -89,7 +89,7 @@ test_main (int argc, const char *argv[]) {
r = toku_logger_find_next_unused_log_file(dname, &nexti);
assert(r == 0);
char mt_fname[128];
snprintf(mt_fname, 128, "%s/log%012lld.tokulog", dname, nexti);
snprintf(mt_fname, 128, "%s/log%012lld.tokulog%d", dname, nexti, TOKU_LOG_VERSION);
int mt_fd = open(mt_fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRWXU);
assert(mt_fd != -1);
r = close(mt_fd);

View file

@ -46,7 +46,7 @@ run_test(void) {
r = close(devnul); assert(r==0);
char fname[256];
sprintf(fname, "%s/%s", TESTDIR, "log000000000000.tokulog");
sprintf(fname, "%s/%s%d", TESTDIR, "log000000000000.tokulog", TOKU_LOG_VERSION);
r = toku_stat(fname, &st); assert(r==0);
if ( st.st_size - trim > magic_begin_end_checkpoint_sz ) {

View file

@ -8,7 +8,9 @@
static void recover_callback_at_turnaround(void *UU(arg)) {
// change the LSN in the first log entry of log 2. this will cause an LSN error during the forward scan.
int r;
FILE *f = fopen("log000000000002.tokulog", "r+b"); assert(f);
char logname[PATH_MAX];
sprintf(logname, "log000000000002.tokulog%d", TOKU_LOG_VERSION);
FILE *f = fopen(logname, "r+b"); assert(f);
r = fseek(f, 025, SEEK_SET); assert(r == 0);
char c = 100;
size_t n = fwrite(&c, sizeof c, 1, f); assert(n == sizeof c);

View file

@ -1,121 +0,0 @@
#include <toku_portability.h>
#include <string.h>
#include "test.h"
#include "brttypes.h"
#include "includes.h"
#include "backwards_10.h"
static char
int32_get_char(u_int32_t i, int which) {
char *c = (char*)&i;
return c[which];
}
#define UINT32TOCHAR(i) int32_get_char(i, 0), int32_get_char(i, 1), int32_get_char(i, 2), int32_get_char(i, 3)
#define UINT64TOCHAR(i) UINT32TOCHAR(i>>32), UINT32TOCHAR(i&0xffffffff)
static void test_leafentry_1 (void) {
LEAFENTRY l;
int r;
u_int32_t msize, dsize;
r = le10_committed(4, "abc", 3, "xy", &msize, &dsize, &l);
assert(r==0);
char expect[] = {LE_COMMITTED,
UINT32TOCHAR(4),
'a', 'b', 'c', 0,
UINT32TOCHAR(3),
'x', 'y', 0};
assert(sizeof(expect)==msize);
assert(msize==dsize);
assert(memcmp(l, expect, msize)==0);
toku_free(l);
}
static void test_leafentry_2 (void) {
LEAFENTRY l;
int r;
u_int32_t msize, dsize;
r = le10_both(0x0123456789abcdef0LL, 3, "ab", 4, "xyz", 5, "lmno", &msize, &dsize, &l);
assert(r==0);
char expect[] = {LE_BOTH,
UINT64TOCHAR(0x0123456789abcdef0LL),
UINT32TOCHAR(3), 'a', 'b', 0,
UINT32TOCHAR(4), 'x', 'y', 'z', 0,
UINT32TOCHAR(5), 'l', 'm', 'n', 'o', 0};
assert(sizeof(expect)==msize);
assert(msize==dsize);
assert(memcmp(l, expect, msize)==0);
toku_free(l);
}
static void test_leafentry_3 (void) {
LEAFENTRY l;
int r;
u_int32_t msize, dsize;
r = le10_provdel(0x0123456789abcdef0LL, 3, "ab", 5, "lmno", &msize, &dsize, &l);
assert(r==0);
char expect[] = {LE_PROVDEL,
UINT64TOCHAR(0x0123456789abcdef0LL),
UINT32TOCHAR(3), 'a', 'b', 0,
UINT32TOCHAR(5), 'l', 'm', 'n', 'o', 0};
assert(sizeof(expect)==msize);
assert(msize==dsize);
assert(memcmp(l, expect, msize)==0);
toku_free(l);
}
static void test_leafentry_4 (void) {
LEAFENTRY l;
int r;
u_int32_t msize, dsize;
r = le10_provpair(0x0123456789abcdef0LL, 3, "ab", 5, "lmno", &msize, &dsize, &l);
assert(r==0);
char expect[] = {LE_PROVPAIR,
UINT64TOCHAR(0x0123456789abcdef0LL),
UINT32TOCHAR(3), 'a', 'b', 0,
UINT32TOCHAR(5), 'l', 'm', 'n', 'o', 0};
assert(sizeof(expect)==msize);
assert(msize==dsize);
assert(memcmp(l, expect, msize)==0);
toku_free(l);
}
char zeros[1026];
#define n5zeros 0,0,0,0,0
#define n10zeros n5zeros,n5zeros
#define n25zeros n5zeros,n10zeros,n10zeros
#define n75zeros n25zeros,n25zeros,n25zeros
#define n125zeros n75zeros,n25zeros,n25zeros
#define n150zeros n75zeros,n75zeros
#define n300zeros n150zeros,n150zeros
#define n301zeros 0,n300zeros
#define n1025zeros n300zeros,n300zeros,n300zeros,n125zeros
static void test_leafentry_3long (void) {
char expect_3long[] = {LE_PROVDEL,
UINT64TOCHAR(0x0123456789abcdef0LL),
UINT32TOCHAR(301), n301zeros,
UINT32TOCHAR(1025), n1025zeros};
LEAFENTRY l;
int r;
u_int32_t msize, dsize;
r = le10_provdel(0x0123456789abcdef0LL, 301, zeros, 1025, zeros, &msize, &dsize, &l);
assert(r==0);
assert(sizeof(expect_3long)==msize);
assert(msize==dsize);
assert(memcmp(l, expect_3long, msize)==0);
toku_free(l);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
test_leafentry_1();
test_leafentry_2();
test_leafentry_3();
test_leafentry_4();
test_leafentry_3long();
return 0;
}

View file

@ -127,7 +127,9 @@ int test_0 (void) {
// test per-file version
int test_1 () {
int r=0;
char logfile[100] = "log000000000000.tokulog";
char logfile[PATH_MAX];
sprintf(logfile, "log000000000000.tokulog%d", TOKU_LOG_VERSION);
struct toku_logcursor *cursor;
struct log_entry *entry;

View file

@ -1641,25 +1641,3 @@ bool transaction_open(TXNID xid) {
#endif
// Wrapper code to support backwards compatibility with version 10 (until we don't want it).
// These wrappers should be removed if/when we remove support for version 10 leafentries.
#include "backwards_10.h"
void
toku_upgrade_ule_init_empty_ule(ULE ule, u_int32_t keylen, void * keyp) {
ule_init_empty_ule(ule, keylen, keyp);
}
void
toku_upgrade_ule_remove_innermost_uxr(ULE ule) {
ule_remove_innermost_uxr(ule);
}
void
toku_upgrade_ule_push_insert_uxr(ULE ule, TXNID xid, u_int32_t vallen, void * valp) {
ule_push_insert_uxr(ule, xid, vallen, valp);
}
void
toku_upgrade_ule_push_delete_uxr(ULE ule, TXNID xid) {
ule_push_delete_uxr(ule, xid);
}

View file

@ -212,7 +212,7 @@ int toku_loader_create_loader(DB_ENV *env,
}
else {
char **XMALLOC_N(N, new_inames_in_env);
const struct descriptor **XMALLOC_N(N, descriptors);
DESCRIPTOR *XMALLOC_N(N, descriptors);
for (int i=0; i<N; i++) {
descriptors[i] = &dbs[i]->i->brt->h->descriptor;
}

View file

@ -33,7 +33,12 @@ endif
SRCS = $(sort $(wildcard *.c))
RECOVER_SRCS = $(wildcard recover-*.c)
LOADER_SRCS = $(wildcard loader-*.c)
NONSTANDARD_SRCS=$(RECOVER_SRCS) $(LOADER_SRCS)
TRANSPARENT_UPGRADE_SRCS = $(wildcard upgrade-*.c)
NONSTANDARD_SRCS= \
$(RECOVER_SRCS) \
$(LOADER_SRCS) \
$(TRANSPARENT_UPGRADE_SRCS) \
#end
#Tests that don't compile in windows. SHould
WINDOWS_NOT_PORTED_TESTS = \
@ -104,6 +109,7 @@ BDB_DONTRUN_TESTS = \
loader-tpch-load \
manyfiles \
powerfail \
preload-3.1-db \
progress \
recover-2483 \
recover-compare-db \
@ -117,6 +123,8 @@ BDB_DONTRUN_TESTS = \
recover-put-multiple-fdelete-some \
recover-split-checkpoint \
recover-tablelock \
recover-upgrade-db-descriptor-multihandle \
recover-upgrade-db-descriptor \
recovery_fileops_stress \
recovery_fileops_unit \
recovery_stress \
@ -153,6 +161,10 @@ BDB_DONTRUN_TESTS = \
test_txn_nested4 \
test_txn_nested5 \
transactional_fileops \
upgrade-test-1 \
upgrade-test-2 \
upgrade-test-3 \
upgrade-test-4 \
zombie_db \
#\ ends prev line

View file

@ -159,7 +159,7 @@ db_startup(DICTIONARY d, DB_TXN *open_txn) {
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
r = db->set_descriptor(db, 1, &desc, abort_on_upgrade);
r = db->set_descriptor(db, 1, &desc);
CKERR(r);
}
{

View file

@ -88,7 +88,7 @@ delete_directory(void) {
static void
delete_log(void) {
char cmd[1024];
sprintf(cmd, "rm -rf %s%s%s", ENVDIR, "/", "*.tokulog");
sprintf(cmd, "rm -rf %s%s%s", ENVDIR, "/", "*.tokulog*");
int r = system(cmd);
CKERR(r);
}

View file

@ -754,7 +754,7 @@ static void run_test(enum test_type t, int trigger)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -322,7 +322,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -127,7 +127,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -132,7 +132,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -361,7 +361,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -395,7 +395,7 @@ static int run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

202
src/tests/preload-3.1-db.c Normal file
View file

@ -0,0 +1,202 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "$Id: loader-stress-test.c 20470 2010-05-20 18:30:04Z bkuszmaul $"
#define kv_pair_funcs 1 // pull in kv_pair generators from test.h
#include "test.h"
#include "toku_pthread.h"
#include "toku_atomic.h"
#include <db.h>
#include <sys/stat.h>
#include "ydb-internal.h"
#include "test_kv_gen.h"
/*
*/
DB_ENV *env;
enum {MAX_NAME=128};
enum {ROWS_PER_TRANSACTION=10000};
int NUM_DBS=5;
int NUM_ROWS=100000;
int CHECK_RESULTS=0;
enum { old_default_cachesize=1024 }; // MB
int CACHESIZE=old_default_cachesize;
int ALLOW_DUPS=0;
static struct timeval starttime;
static double UU() elapsed_time (void) {
struct timeval now;
gettimeofday(&now, NULL);
return now.tv_sec - starttime.tv_sec + 1e-6*(now.tv_usec - starttime.tv_usec);
}
static void preload_dbs(DB **dbs)
{
gettimeofday(&starttime, NULL);
int r;
DB_TXN *txn;
uint32_t db_flags[MAX_DBS];
uint32_t dbt_flags[MAX_DBS];
uint32_t flags = DB_NOOVERWRITE;
flags = DB_YESOVERWRITE;
for(int i=0;i<MAX_DBS;i++) {
db_flags[i] = flags;
dbt_flags[i] = 0;
}
DBT skey, sval;
DBT key, val;
dbt_init_realloc(&key);
dbt_init_realloc(&val);
unsigned int k, v;
if ( verbose ) { printf("loading");fflush(stdout); }
int outer_loop_num = ( NUM_ROWS <= ROWS_PER_TRANSACTION ) ? 1 : (NUM_ROWS / ROWS_PER_TRANSACTION);
for(int x=0;x<outer_loop_num;x++) {
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
for(int i=1;i<=ROWS_PER_TRANSACTION;i++) {
k = i + (x*ROWS_PER_TRANSACTION);
v = generate_val(k, 0);
dbt_init(&skey, &k, sizeof(unsigned int));
dbt_init(&sval, &v, sizeof(unsigned int));
for(int db = 0;db < NUM_DBS;db++) {
put_multiple_generate(dbs[db], // dest_db
NULL, // src_db, ignored
&key, &val, // dest_key, dest_val
&skey, &sval, // src_key, src_val
NULL); // extra, ignored
r = dbs[db]->put(dbs[db], txn, &key, &val, 0); CKERR(r);
if (key.flags == 0) { dbt_init_realloc(&key); }
if (val.flags == 0) { dbt_init_realloc(&val); }
}
}
r = txn->commit(txn, 0); CKERR(r);
if ( verbose ) {printf(".");fflush(stdout);}
}
if ( key.flags ) { toku_free(key.data); key.data = NULL; }
if ( val.flags ) { toku_free(val.data); key.data = NULL; }
if ( CHECK_RESULTS) {
if ( verbose ) {printf("\nchecking");fflush(stdout);}
check_results(env, dbs, NUM_DBS, NUM_ROWS);
}
if ( verbose) {printf("\ndone\n");fflush(stdout);}
}
char *free_me = NULL;
char *env_dir = ENVDIR; // the default env_dir.
static void run_test(void)
{
int r;
{
int len = strlen(env_dir) + 20;
char syscmd[len];
r = snprintf(syscmd, len, "rm -rf %s", env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
}
r = toku_os_mkdir(env_dir, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_env_create(&env, 0); CKERR(r);
// r = env->set_default_bt_compare(env, uint_dbt_cmp); CKERR(r);
// r = env->set_default_dup_compare(env, uint_dbt_cmp); CKERR(r);
// if ( verbose ) printf("CACHESIZE = %d MB\n", CACHESIZE);
// r = env->set_cachesize(env, CACHESIZE / 1024, (CACHESIZE % 1024)*1024*1024, 1); CKERR(r);
// CKERR(r);
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE;
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
env->set_errfile(env, stderr);
r = env->checkpointing_set_period(env, 60); CKERR(r);
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
DB **dbs = (DB**)toku_malloc(sizeof(DB*) * NUM_DBS);
assert(dbs != NULL);
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
generate_permute_tables();
// -------------------------- //
preload_dbs(dbs);
// -------------------------- //
for(int i=0;i<NUM_DBS;i++) {
r = dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
if (verbose >= 2)
print_engine_status(env);
r = env->close(env, 0); CKERR(r);
toku_free(dbs);
// reopen, then close environment to trim logfiles
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = env->close(env, 0); CKERR(r);
}
// ------------ infrastructure ----------
static void do_args(int argc, char * const argv[]);
int test_main(int argc, char * const argv[]) {
do_args(argc, argv);
run_test();
if (free_me) toku_free(free_me);
return 0;
}
static void do_args(int argc, char * const argv[]) {
int resultcode;
char *cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v")==0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage: -h -c -d <num_dbs> -r <num_rows> %s\n", cmd);
exit(resultcode);
} else if (strcmp(argv[0], "-d")==0) {
argc--; argv++;
NUM_DBS = atoi(argv[0]);
if ( NUM_DBS > MAX_DBS ) {
fprintf(stderr, "max value for -d field is %d\n", MAX_DBS);
resultcode=1;
goto do_usage;
}
} else if (strcmp(argv[0], "-r")==0) {
argc--; argv++;
NUM_ROWS = atoi(argv[0]);
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}

View file

@ -15,8 +15,8 @@ char *nameb="b.db";
static int my_compare(DB *UU(db), const DBT *a, const DBT *b) {
assert(db);
assert(db->descriptor);
assert(db->descriptor->size == sizeof(descriptor_contents));
assert(memcmp(db->descriptor->data, descriptor_contents, sizeof(descriptor_contents)) == 0);
assert(db->descriptor->dbt.size == sizeof(descriptor_contents));
assert(memcmp(db->descriptor->dbt.data, descriptor_contents, sizeof(descriptor_contents)) == 0);
assert(a->size == b->size);
return memcmp(a->data, b->data, a->size);
@ -29,7 +29,7 @@ set_descriptor(DB* db) {
#if USE_TDB
DBT descriptor;
dbt_init(&descriptor, descriptor_contents, sizeof(descriptor_contents));
int r = db->set_descriptor(db, 1, &descriptor, abort_on_upgrade); CKERR(r);
int r = db->set_descriptor(db, 1, &descriptor); CKERR(r);
#endif
}

View file

@ -378,7 +378,7 @@ static void run_test(void)
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc, abort_on_upgrade); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);

View file

@ -14,38 +14,21 @@ static DBT dest_vals[num_dbs];
BOOL do_test=FALSE, do_recover=FALSE;
static int
crash_on_upgrade(DB* db,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val) {
db = db;
old_version = old_version;
old_descriptor = old_descriptor;
old_key = old_key;
old_val = old_val;
new_version = new_version;
new_descriptor = new_descriptor;
new_key = new_key;
new_val = new_val;
assert(FALSE);
return 0;
}
static int
put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val, void *extra) {
if (extra == NULL) {
if (src_db) {
assert(src_db->descriptor);
assert(src_db->descriptor->size == 4);
assert((*(uint32_t*)src_db->descriptor->data) == 0);
assert(src_db->descriptor->dbt.size == 4);
assert((*(uint32_t*)src_db->descriptor->dbt.data) == 0);
}
}
else {
assert(src_db == NULL);
assert(extra==&namea); //Verifying extra gets set right.
}
assert(dest_db->descriptor->size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->data;
assert(dest_db->descriptor->dbt.size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->dbt.data;
assert(which < num_dbs);
if (dest_key->data) toku_free(dest_key->data);
@ -88,9 +71,9 @@ static void run_test (void) {
r = db_create(&dba, env, 0); CKERR(r);
r = db_create(&dbb, env, 0); CKERR(r);
which = 0;
r = dba->set_descriptor(dba, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dba->set_descriptor(dba, 1, &descriptor); CKERR(r);
which = 1;
r = dbb->set_descriptor(dbb, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dbb->set_descriptor(dbb, 1, &descriptor); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dbb->open(dbb, NULL, nameb, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);

View file

@ -62,17 +62,17 @@ static void run_recover (void) {
r = toku_os_mkdir(ENVDIR "/savedlogs", S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
r = system("mv " ENVDIR "/*.tokulog " ENVDIR "/savedlogs/");
r = system("mv " ENVDIR "/*.tokulog* " ENVDIR "/savedlogs/");
CKERR(r);
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR2(r, ENOENT);
r = system("rm -rf " ENVDIR "/*.tokulog");
r = system("rm -rf " ENVDIR "/*.tokulog*");
CKERR(r);
r = system("mv " ENVDIR "/savedlogs/*.tokulog " ENVDIR "/");
r = system("mv " ENVDIR "/savedlogs/*.tokulog* " ENVDIR "/");
CKERR(r);
r = env->open(env, ENVDIR, envflags + DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);

View file

@ -14,29 +14,12 @@ static DBT dest_vals[num_dbs];
BOOL do_test=FALSE, do_recover=FALSE;
static int
crash_on_upgrade(DB* db,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val) {
db = db;
old_version = old_version;
old_descriptor = old_descriptor;
old_key = old_key;
old_val = old_val;
new_version = new_version;
new_descriptor = new_descriptor;
new_key = new_key;
new_val = new_val;
assert(FALSE);
return 0;
}
static int
put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val, void *extra) {
assert(src_db == NULL);
assert(extra==&namea || extra==NULL); //Verifying extra gets set right.
assert(dest_db->descriptor->size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->data;
assert(dest_db->descriptor->dbt.size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->dbt.data;
assert(which < num_dbs);
if (dest_key->data) toku_free(dest_key->data);
@ -79,9 +62,9 @@ static void run_test (void) {
r = db_create(&dba, env, 0); CKERR(r);
r = db_create(&dbb, env, 0); CKERR(r);
which = 0;
r = dba->set_descriptor(dba, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dba->set_descriptor(dba, 1, &descriptor); CKERR(r);
which = 1;
r = dbb->set_descriptor(dbb, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dbb->set_descriptor(dbb, 1, &descriptor); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dbb->open(dbb, NULL, nameb, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);

View file

@ -14,38 +14,21 @@ static DBT dest_vals[num_dbs];
BOOL do_test=FALSE, do_recover=FALSE;
static int
crash_on_upgrade(DB* db,
u_int32_t old_version, const DBT *old_descriptor, const DBT *old_key, const DBT *old_val,
u_int32_t new_version, const DBT *new_descriptor, const DBT *new_key, const DBT *new_val) {
db = db;
old_version = old_version;
old_descriptor = old_descriptor;
old_key = old_key;
old_val = old_val;
new_version = new_version;
new_descriptor = new_descriptor;
new_key = new_key;
new_val = new_val;
assert(FALSE);
return 0;
}
static int
put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val, void *extra) {
if (extra == NULL) {
if (src_db) {
assert(src_db->descriptor);
assert(src_db->descriptor->size == 4);
assert((*(uint32_t*)src_db->descriptor->data) == 0);
assert(src_db->descriptor->dbt.size == 4);
assert((*(uint32_t*)src_db->descriptor->dbt.data) == 0);
}
}
else {
assert(src_db == NULL);
assert(extra==&namea); //Verifying extra gets set right.
}
assert(dest_db->descriptor->size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->data;
assert(dest_db->descriptor->dbt.size == 4);
uint32_t which = *(uint32_t*)dest_db->descriptor->dbt.data;
assert(which < num_dbs);
if (dest_key->data) toku_free(dest_key->data);
@ -88,9 +71,9 @@ static void run_test (void) {
r = db_create(&dba, env, 0); CKERR(r);
r = db_create(&dbb, env, 0); CKERR(r);
which = 0;
r = dba->set_descriptor(dba, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dba->set_descriptor(dba, 1, &descriptor); CKERR(r);
which = 1;
r = dbb->set_descriptor(dbb, 1, &descriptor, crash_on_upgrade); CKERR(r);
r = dbb->set_descriptor(dbb, 1, &descriptor); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dbb->open(dbb, NULL, nameb, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);

View file

@ -0,0 +1,294 @@
// verify that the comparison function get a valid db object pointer
#include <sys/stat.h>
#include "test.h"
char *descriptor_contents[] = {
"Spoon full of sugar",
"Bucket full of pants"
};
const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
char *namea="a.db";
int verified = 0;
uint32_t forced_version = 2;
#if USE_TDB
static int my_compare(DB *UU(db), const DBT *a, const DBT *b) {
assert(db);
assert(db->descriptor);
uint32_t version = db->descriptor->version;
assert(version > 0);
assert(version == forced_version);
uint32_t which = version-1;
size_t len = strlen(descriptor_contents[which])+1;
assert(db->descriptor->dbt.size == len);
assert(memcmp(db->descriptor->dbt.data, descriptor_contents[which], len) == 0);
assert(a->size == b->size);
verified = 1;
return memcmp(a->data, b->data, a->size);
}
#endif
static void
set_descriptor(DB* db, int which) {
#if USE_TDB
DBT descriptor;
size_t len = strlen(descriptor_contents[which])+1;
dbt_init(&descriptor, descriptor_contents[which], len);
int r = db->set_descriptor(db, which+1, &descriptor); CKERR(r);
#endif
}
static void
do_x1_shutdown (BOOL do_commit, BOOL do_abort) {
int r;
r = system("rm -rf " ENVDIR); CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = toku_os_mkdir(ENVDIR"/data", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
DB_ENV *env;
DB *dba, *dbb;
r = db_env_create(&env, 0); CKERR(r);
r = env->set_data_dir(env, "data"); CKERR(r);
#if USE_TDB
r = env->set_default_bt_compare(env, my_compare); CKERR(r);
#endif
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_create(&dba, env, 0); CKERR(r);
set_descriptor(dba, 0);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = db_create(&dbb, env, 0); CKERR(r);
set_descriptor(dbb, 1);
r = dbb->open(dbb, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
DB_TXN *txn;
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
{
DBT a={.data="a", .size=2};
DBT b={.data="b", .size=2};
r = dba->put(dba, txn, &a, &b, 0); CKERR(r);
r = dba->put(dba, txn, &b, &a, 0); CKERR(r);
r = dbb->put(dbb, txn, &b, &a, 0); CKERR(r);
}
//printf("opened\n");
if (do_commit) {
r = txn->commit(txn, 0); CKERR(r);
} else if (do_abort) {
r = txn->abort(txn); CKERR(r);
// force an fsync of the log
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
}
assert(verified);
//printf("shutdown\n");
toku_hard_crash_on_purpose();
}
static void
do_x1_recover (BOOL did_commit) {
DB_ENV *env;
DB *dba;
int r;
r = system("rm -rf " ENVDIR"/data"); /* Delete dictionaries */ CKERR(r);
r = toku_os_mkdir(ENVDIR"/data", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_env_create(&env, 0); CKERR(r);
r = env->set_data_dir(env, "data"); CKERR(r);
#if USE_TDB
r = env->set_default_bt_compare(env, my_compare); CKERR(r);
#endif
r = env->open(env, ENVDIR, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_create(&dba, env, 0); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
DBT aa={.size=0}, ab={.size=0};
DB_TXN *txn;
DBC *ca;
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = dba->cursor(dba, txn, &ca, 0); CKERR(r);
int ra = ca->c_get(ca, &aa, &ab, DB_FIRST); CKERR(r);
if (did_commit) {
assert(ra==0);
// verify key-value pairs
assert(aa.size==2);
assert(ab.size==2);
const char a[2] = "a";
const char b[2] = "b";
assert(memcmp(aa.data, &a, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == 0);
assert(aa.size == 2 && ab.size == 2 && memcmp(aa.data, b, 2) == 0 && memcmp(ab.data, a, 2) == 0);
// make sure no other entries in DB
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == DB_NOTFOUND);
} else {
// It wasn't committed (it also wasn't aborted), but a checkpoint happened.
assert(ra==DB_NOTFOUND);
}
r = ca->c_close(ca); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dba->close(dba, 0); CKERR(r);
r = env->close(env, 0); CKERR(r);
assert(verified);
exit(0);
}
static void
do_x1_recover_only (void) {
DB_ENV *env;
int r;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = env->close(env, 0); CKERR(r);
exit(0);
}
static void
do_x1_no_recover (void) {
DB_ENV *env;
int r;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == DB_RUNRECOVERY);
r = env->close(env, 0); CKERR(r);
exit(0);
}
const char *cmd;
#if 0
static void
do_test_internal (BOOL commit)
{
pid_t pid;
if (0 == (pid=fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--commit" : "--abort", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("signaled=%d sig=%d\n", WIFSIGNALED(status), WTERMSIG(status));
assert(WIFSIGNALED(status) && WTERMSIG(status)==SIGABRT);
}
// Now find out what happend
if (0 == (pid = fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--recover-committed" : "--recover-aborted", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("recovery exited=%d\n", WIFEXITED(status));
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
}
static void
do_test (void) {
do_test_internal(TRUE);
do_test_internal(FALSE);
}
#endif
BOOL do_commit=FALSE, do_abort=FALSE, do_explicit_abort=FALSE, do_recover_committed=FALSE, do_recover_aborted=FALSE, do_recover_only=FALSE, do_no_recover = FALSE;
static void
x1_parse_args (int argc, char * const argv[]) {
int resultcode;
cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v") == 0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "--commit")==0 || strcmp(argv[0], "--test") == 0) {
do_commit=TRUE;
} else if (strcmp(argv[0], "--abort")==0) {
do_abort=TRUE;
} else if (strcmp(argv[0], "--explicit-abort")==0) {
do_explicit_abort=TRUE;
} else if (strcmp(argv[0], "--recover-committed")==0 || strcmp(argv[0], "--recover") == 0) {
do_recover_committed=TRUE;
} else if (strcmp(argv[0], "--recover-aborted")==0) {
do_recover_aborted=TRUE;
} else if (strcmp(argv[0], "--recover-only") == 0) {
do_recover_only=TRUE;
} else if (strcmp(argv[0], "--no-recover") == 0) {
do_no_recover=TRUE;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--commit | --abort | --explicit-abort | --recover-committed | --recover-aborted } \n", cmd);
exit(resultcode);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
{
int n_specified=0;
if (do_commit) n_specified++;
if (do_abort) n_specified++;
if (do_explicit_abort) n_specified++;
if (do_recover_committed) n_specified++;
if (do_recover_aborted) n_specified++;
if (do_recover_only) n_specified++;
if (do_no_recover) n_specified++;
if (n_specified>1) {
printf("Specify only one of --commit or --abort or --recover-committed or --recover-aborted\n");
resultcode=1;
goto do_usage;
}
}
}
int
test_main (int argc, char * const argv[])
{
x1_parse_args(argc, argv);
if (do_commit) {
do_x1_shutdown (TRUE, FALSE);
} else if (do_abort) {
do_x1_shutdown (FALSE, FALSE);
} else if (do_explicit_abort) {
do_x1_shutdown(FALSE, TRUE);
} else if (do_recover_committed) {
do_x1_recover(TRUE);
} else if (do_recover_aborted) {
do_x1_recover(FALSE);
} else if (do_recover_only) {
do_x1_recover_only();
} else if (do_no_recover) {
do_x1_no_recover();
}
#if 0
else {
do_test();
}
#endif
return 0;
}

View file

@ -0,0 +1,297 @@
// verify that the comparison function get a valid db object pointer
#include <sys/stat.h>
#include "test.h"
char *descriptor_contents[] = {
"Spoon full of sugar",
"Bucket full of pants"
};
const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
char *namea="a.db";
int verified = 0;
uint32_t forced_version = 2;
#if USE_TDB
static int my_compare(DB *UU(db), const DBT *a, const DBT *b) {
assert(db);
assert(db->descriptor);
uint32_t version = db->descriptor->version;
assert(version > 0);
assert(version == forced_version);
uint32_t which = version-1;
size_t len = strlen(descriptor_contents[which])+1;
assert(db->descriptor->dbt.size == len);
assert(memcmp(db->descriptor->dbt.data, descriptor_contents[which], len) == 0);
assert(a->size == b->size);
verified = 1;
return memcmp(a->data, b->data, a->size);
}
#endif
static void
set_descriptor(DB* db, int which) {
#if USE_TDB
DBT descriptor;
size_t len = strlen(descriptor_contents[which])+1;
dbt_init(&descriptor, descriptor_contents[which], len);
int r = db->set_descriptor(db, which+1, &descriptor); CKERR(r);
#endif
}
static void
do_x1_shutdown (BOOL do_commit, BOOL do_abort) {
int r;
r = system("rm -rf " ENVDIR); CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = toku_os_mkdir(ENVDIR"/data", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
DB_ENV *env;
DB *dba;
r = db_env_create(&env, 0); CKERR(r);
r = env->set_data_dir(env, "data"); CKERR(r);
#if USE_TDB
r = env->set_default_bt_compare(env, my_compare); CKERR(r);
#endif
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_create(&dba, env, 0); CKERR(r);
set_descriptor(dba, 0);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
r = dba->close(dba, 0); CKERR(r);
r = db_create(&dba, env, 0); CKERR(r);
set_descriptor(dba, 1);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
DB_TXN *txn;
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
{
DBT a={.data="a", .size=2};
DBT b={.data="b", .size=2};
r = dba->put(dba, txn, &a, &b, 0); CKERR(r);
r = dba->put(dba, txn, &b, &a, 0); CKERR(r);
}
//printf("opened\n");
if (do_commit) {
r = txn->commit(txn, 0); CKERR(r);
} else if (do_abort) {
r = txn->abort(txn); CKERR(r);
// force an fsync of the log
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
}
//printf("shutdown\n");
assert(verified);
toku_hard_crash_on_purpose();
}
static void
do_x1_recover (BOOL did_commit) {
DB_ENV *env;
DB *dba;
int r;
r = system("rm -rf " ENVDIR"/data"); /* Delete dictionaries */ CKERR(r);
r = toku_os_mkdir(ENVDIR"/data", S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_env_create(&env, 0); CKERR(r);
r = env->set_data_dir(env, "data"); CKERR(r);
#if USE_TDB
r = env->set_default_bt_compare(env, my_compare); CKERR(r);
#endif
r = env->open(env, ENVDIR, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = db_create(&dba, env, 0); CKERR(r);
r = dba->open(dba, NULL, namea, NULL, DB_BTREE, DB_AUTO_COMMIT|DB_CREATE, 0666); CKERR(r);
DBT aa={.size=0}, ab={.size=0};
DB_TXN *txn;
DBC *ca;
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
r = dba->cursor(dba, txn, &ca, 0); CKERR(r);
int ra = ca->c_get(ca, &aa, &ab, DB_FIRST); CKERR(r);
if (did_commit) {
assert(ra==0);
// verify key-value pairs
assert(aa.size==2);
assert(ab.size==2);
const char a[2] = "a";
const char b[2] = "b";
assert(memcmp(aa.data, &a, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == 0);
assert(aa.size == 2 && ab.size == 2 && memcmp(aa.data, b, 2) == 0 && memcmp(ab.data, a, 2) == 0);
// make sure no other entries in DB
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == DB_NOTFOUND);
} else {
// It wasn't committed (it also wasn't aborted), but a checkpoint happened.
assert(ra==DB_NOTFOUND);
}
r = ca->c_close(ca); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dba->close(dba, 0); CKERR(r);
r = env->close(env, 0); CKERR(r);
assert(verified);
exit(0);
}
static void
do_x1_recover_only (void) {
DB_ENV *env;
int r;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags|DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r = env->close(env, 0); CKERR(r);
exit(0);
}
static void
do_x1_no_recover (void) {
DB_ENV *env;
int r;
r = db_env_create(&env, 0); CKERR(r);
r = env->open(env, ENVDIR, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO);
assert(r == DB_RUNRECOVERY);
r = env->close(env, 0); CKERR(r);
exit(0);
}
const char *cmd;
#if 0
static void
do_test_internal (BOOL commit)
{
pid_t pid;
if (0 == (pid=fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--commit" : "--abort", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("signaled=%d sig=%d\n", WIFSIGNALED(status), WTERMSIG(status));
assert(WIFSIGNALED(status) && WTERMSIG(status)==SIGABRT);
}
// Now find out what happend
if (0 == (pid = fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--recover-committed" : "--recover-aborted", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("recovery exited=%d\n", WIFEXITED(status));
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
}
static void
do_test (void) {
do_test_internal(TRUE);
do_test_internal(FALSE);
}
#endif
BOOL do_commit=FALSE, do_abort=FALSE, do_explicit_abort=FALSE, do_recover_committed=FALSE, do_recover_aborted=FALSE, do_recover_only=FALSE, do_no_recover = FALSE;
static void
x1_parse_args (int argc, char * const argv[]) {
int resultcode;
cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v") == 0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "--commit")==0 || strcmp(argv[0], "--test") == 0) {
do_commit=TRUE;
} else if (strcmp(argv[0], "--abort")==0) {
do_abort=TRUE;
} else if (strcmp(argv[0], "--explicit-abort")==0) {
do_explicit_abort=TRUE;
} else if (strcmp(argv[0], "--recover-committed")==0 || strcmp(argv[0], "--recover") == 0) {
do_recover_committed=TRUE;
} else if (strcmp(argv[0], "--recover-aborted")==0) {
do_recover_aborted=TRUE;
} else if (strcmp(argv[0], "--recover-only") == 0) {
do_recover_only=TRUE;
} else if (strcmp(argv[0], "--no-recover") == 0) {
do_no_recover=TRUE;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--commit | --abort | --explicit-abort | --recover-committed | --recover-aborted } \n", cmd);
exit(resultcode);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
{
int n_specified=0;
if (do_commit) n_specified++;
if (do_abort) n_specified++;
if (do_explicit_abort) n_specified++;
if (do_recover_committed) n_specified++;
if (do_recover_aborted) n_specified++;
if (do_recover_only) n_specified++;
if (do_no_recover) n_specified++;
if (n_specified>1) {
printf("Specify only one of --commit or --abort or --recover-committed or --recover-aborted\n");
resultcode=1;
goto do_usage;
}
}
}
int
test_main (int argc, char * const argv[])
{
x1_parse_args(argc, argv);
if (do_commit) {
do_x1_shutdown (TRUE, FALSE);
} else if (do_abort) {
do_x1_shutdown (FALSE, FALSE);
} else if (do_explicit_abort) {
do_x1_shutdown(FALSE, TRUE);
} else if (do_recover_committed) {
do_x1_recover(TRUE);
} else if (do_recover_aborted) {
do_x1_recover(FALSE);
} else if (do_recover_only) {
do_x1_recover_only();
} else if (do_no_recover) {
do_x1_no_recover();
}
#if 0
else {
do_test();
}
#endif
return 0;
}

View file

@ -185,14 +185,6 @@ typedef enum __toku_bool { FALSE=0, TRUE=1} BOOL;
#include <memory.h>
static int __attribute__((__unused__))
abort_on_upgrade(DB* UU(pdb),
u_int32_t UU(old_version), const DBT *UU(old_descriptor), const DBT *UU(old_key), const DBT *UU(old_val),
u_int32_t UU(new_version), const DBT *UU(new_descriptor), const DBT *UU(new_key), const DBT *UU(new_val)) {
assert(FALSE); //Must not upgrade.
return ENOSYS;
}
unsigned int seed = 0xFEEDFACE;
static u_int64_t __attribute__((__unused__))

View file

@ -15,7 +15,7 @@
#define FNAME "foo.tokudb"
char *name = NULL;
#define NUM 8
#define NUM 3
#define MAX_LENGTH (1<<16)
int order[NUM+1];
@ -24,7 +24,8 @@ u_int8_t data[NUM][MAX_LENGTH];
DBT descriptors[NUM];
DB_ENV *env;
DB *db;
enum {NUM_DBS=2};
DB *dbs[NUM_DBS];
DB_TXN *txn = NULL;
DB_TXN *null_txn;
int last_open_descriptor = -1;
@ -37,17 +38,27 @@ int manual_truncate = 0;
static void
verify_db_matches(void) {
const DBT * dbt = db->descriptor;
DB *db;
int which;
for (which = 0; which < NUM_DBS; which++) {
db = dbs[which];
if (db) {
const DBT * dbt = &db->descriptor->dbt;
if (last_open_descriptor<0) {
assert(dbt->size == 0 && dbt->data == NULL);
}
else {
assert(last_open_descriptor < NUM);
assert(dbt->size == descriptors[last_open_descriptor].size);
assert(!memcmp(dbt->data, descriptors[last_open_descriptor].data, dbt->size));
assert(dbt->data != descriptors[last_open_descriptor].data);
if (last_open_descriptor<0) {
assert(dbt->size == 0 && dbt->data == NULL);
assert(db->descriptor->version == 0);
}
else {
assert(last_open_descriptor < NUM);
assert(dbt->size == descriptors[last_open_descriptor].size);
assert(!memcmp(dbt->data, descriptors[last_open_descriptor].data, dbt->size));
assert(dbt->data != descriptors[last_open_descriptor].data);
assert(db->descriptor->version == (uint32_t)last_open_descriptor+1);
}
}
}
}
static int
@ -59,30 +70,33 @@ verify_int_cmp (DB *dbp, const DBT *a, const DBT *b) {
}
static void
open_db(int descriptor) {
open_db(int descriptor, int which) {
/* create the dup database file */
assert(txn==NULL);
assert(dbs[which]==NULL);
DB *db;
int r = db_create(&db, env, 0);
CKERR(r);
dbs[which] = db;
r = db->set_bt_compare(db, verify_int_cmp);
CKERR(r);
assert(abort_type >=0 && abort_type <= 2);
if (abort_type==2) {
if (abort_type==2 && !txn) {
r = env->txn_begin(env, null_txn, &txn, 0);
CKERR(r);
last_open_descriptor = -1; //DB was destroyed at end of last close, did not hang around.
}
if (descriptor >= 0) {
assert(descriptor < NUM);
u_int32_t descriptor_version = 1;
r = db->set_descriptor(db, descriptor_version, &descriptors[descriptor], abort_on_upgrade);
u_int32_t descriptor_version = descriptor+1;
r = db->set_descriptor(db, descriptor_version, &descriptors[descriptor]);
CKERR(r);
last_open_descriptor = descriptor;
}
r = db->open(db, txn, FNAME, name, DB_BTREE, DB_CREATE, 0666);
CKERR(r);
verify_db_matches();
if (abort_type!=2) {
if (abort_type!=2 && !txn) {
r = env->txn_begin(env, null_txn, &txn, 0);
CKERR(r);
}
@ -95,6 +109,11 @@ open_db(int descriptor) {
static void
delete_db(void) {
int which;
for (which = 0; which < NUM_DBS; which++) {
assert(dbs[which] == NULL);
}
DB *db;
int r = db_create(&db, env, 0);
CKERR(r);
r = db->remove(db, FNAME, name, 0);
@ -106,14 +125,26 @@ delete_db(void) {
}
static void
close_db(void) {
close_db(int which) {
assert(dbs[which]!=NULL);
DB *db = dbs[which];
dbs[which] = NULL;
int r;
if (which==1) {
r = db->close(db, 0);
CKERR(r);
return;
}
if (manual_truncate) {
u_int32_t ignore_row_count;
r = db->truncate(db, txn, &ignore_row_count, 0);
CKERR(r);
}
if (abort_type>0) {
if (abort_type==2 && dbs[1]) {
close_db(1);
}
r = db->close(db, 0);
CKERR(r);
r = txn->abort(txn);
@ -163,7 +194,17 @@ permute_order(void) {
}
static void
test_insert (int n) {
test_insert (int n, int which) {
if (which == -1) {
for (which = 0; which < NUM_DBS; which++) {
if (dbs[which]) {
test_insert(n, which);
}
}
return;
}
assert(dbs[which]!=NULL);
DB *db = dbs[which];
int i;
static int last = 0;
for (i=0; i<n; i++) {
@ -187,26 +228,63 @@ runtest(void) {
permute_order();
int i;
/* Subsumed by rest of test.
for (i=0; i < NUM; i++) {
open_db(-1);
test_insert(i);
close_db();
open_db(-1);
test_insert(i);
close_db();
delete_db();
open_db(order[i]);
test_insert(i);
close_db();
open_db(-1);
test_insert(i);
close_db();
open_db(order[i]);
test_insert(i);
close_db();
open_db(-1, 0);
test_insert(i, 0);
close_db(0);
open_db(-1, 0);
test_insert(i, 0);
close_db(0);
delete_db();
}
for (i=0; i < NUM; i++) {
open_db(order[i], 0);
test_insert(i, 0);
close_db(0);
open_db(-1, 0);
test_insert(i, 0);
close_db(0);
open_db(order[i], 0);
test_insert(i, 0);
close_db(0);
delete_db();
}
*/
//Upgrade descriptors along the way. Need version to increase, so do not use 'order[i]'
for (i=0; i < NUM; i++) {
open_db(i, 0);
test_insert(i, 0);
close_db(0);
open_db(-1, 0);
test_insert(i, 0);
close_db(0);
open_db(i, 0);
test_insert(i, 0);
close_db(0);
}
delete_db();
//Upgrade descriptors along the way. With two handles
open_db(-1, 1);
for (i=0; i < NUM; i++) {
open_db(i, 0);
test_insert(i, -1);
close_db(0);
open_db(-1, 0);
test_insert(i, -1);
close_db(0);
open_db(i, 0);
test_insert(i, -1);
close_db(0);
}
if (dbs[1]) {
close_db(1);
}
delete_db();
env->close(env, 0);
}

177
src/tests/test_kv_gen.h Normal file
View file

@ -0,0 +1,177 @@
#ifndef __TEST_KV_GEN_H
#define __TEST_KV_GEN_H
#if defined(__cilkplusplus) || defined(__cplusplus)
extern "C" {
#endif
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
//
// Functions to create unique key/value pairs, row generators, checkers, ... for each of NUM_DBS
//
// a is the bit-wise permute table. For DB[i], permute bits as described in a[i] using 'twiddle32'
// inv is the inverse bit-wise permute of a[]. To get the original value from a twiddled value, twiddle32 (again) with inv[]
enum {MAX_DBS=256};
enum {MAGIC=311};
static int aa[MAX_DBS][32] UU();
static int inv[MAX_DBS][32] UU();
// rotate right and left functionsp
static inline unsigned int UU()
rotr32(const unsigned int x, const unsigned int num) {
const unsigned int n = num % 32;
return (x >> n) | ( x << (32 - n));
}
static inline unsigned int UU()
rotl32(const unsigned int x, const unsigned int num) {
const unsigned int n = num % 32;
return (x << n) | ( x >> (32 - n));
}
static void UU()
generate_permute_tables(void) {
srandom(1);
int i, j, tmp;
for(int db=0;db<MAX_DBS;db++) {
for(i=0;i<32;i++) {
aa[db][i] = i;
}
for(i=0;i<32;i++) {
j = random() % (i + 1);
tmp = aa[db][j];
aa[db][j] = aa[db][i];
aa[db][i] = tmp;
}
for(i=0;i<32;i++) {
inv[db][aa[db][i]] = i;
}
}
}
// permute bits of x based on permute table bitmap
static unsigned int UU()
twiddle32(unsigned int x, int db)
{
unsigned int b = 0;
for(int i=0;i<32;i++) {
b |= (( x >> i ) & 1) << aa[db][i];
}
return b;
}
// permute bits of x based on inverse permute table bitmap
static unsigned int UU()
inv_twiddle32(unsigned int x, int db)
{
unsigned int b = 0;
for(int i=0;i<32;i++) {
b |= (( x >> i ) & 1) << inv[db][i];
}
return b;
}
// generate val from key, index
static unsigned int UU()
generate_val(int key, int i) {
return rotl32((key + MAGIC), i);
}
static unsigned int UU()
pkey_for_val(int key, int i) {
return rotr32(key, i) - MAGIC;
}
static void UU()
check_results(DB_ENV *env, DB **dbs, const int num_dbs, const int num_rows)
{
for(int j=0;j<num_dbs;j++){
DBT key, val;
unsigned int k=0, v=0;
dbt_init(&key, &k, sizeof(unsigned int));
dbt_init(&val, &v, sizeof(unsigned int));
int r;
unsigned int pkey_for_db_key;
DB_TXN *txn;
r = env->txn_begin(env, NULL, &txn, 0);
CKERR(r);
DBC *cursor;
r = dbs[j]->cursor(dbs[j], txn, &cursor, 0);
CKERR(r);
for(int i=0;i<num_rows;i++) {
r = cursor->c_get(cursor, &key, &val, DB_NEXT);
CKERR(r);
k = *(unsigned int*)key.data;
pkey_for_db_key = (j == 0) ? k : inv_twiddle32(k, j);
v = *(unsigned int*)val.data;
// test that we have the expected keys and values
if ((unsigned int)pkey_for_db_key != (unsigned int)pkey_for_val(v, j))
printf(" DB[%d] key = %10u, val = %10u, pkey_for_db_key = %10u, pkey_for_val=%10d\n", j, v, k, pkey_for_db_key, pkey_for_val(v, j));
assert((unsigned int)pkey_for_db_key == (unsigned int)pkey_for_val(v, j));
dbt_init(&key, NULL, sizeof(unsigned int));
dbt_init(&val, NULL, sizeof(unsigned int));
}
if ( verbose ) {printf("."); fflush(stdout);}
r = cursor->c_close(cursor);
CKERR(r);
r = txn->commit(txn, 0);
CKERR(r);
}
if ( verbose ) {printf("ok");fflush(stdout);}
}
static int UU()
put_multiple_generate(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val, void *extra) {
src_db = src_db;
extra = extra;
uint32_t which = *(uint32_t*)dest_db->app_private;
if ( which == 0 ) {
if (dest_key->flags==DB_DBT_REALLOC) {
if (dest_key->data) toku_free(dest_key->data);
dest_key->flags = 0;
dest_key->ulen = 0;
}
if (dest_val->flags==DB_DBT_REALLOC) {
if (dest_val->data) toku_free(dest_val->data);
dest_val->flags = 0;
dest_val->ulen = 0;
}
dbt_init(dest_key, src_key->data, src_key->size);
dbt_init(dest_val, src_val->data, src_val->size);
}
else {
assert(dest_key->flags==DB_DBT_REALLOC);
if (dest_key->ulen < sizeof(unsigned int)) {
dest_key->data = toku_xrealloc(dest_key->data, sizeof(unsigned int));
dest_key->ulen = sizeof(unsigned int);
}
assert(dest_val->flags==DB_DBT_REALLOC);
if (dest_val->ulen < sizeof(unsigned int)) {
dest_val->data = toku_xrealloc(dest_val->data, sizeof(unsigned int));
dest_val->ulen = sizeof(unsigned int);
}
unsigned int *new_key = (unsigned int *)dest_key->data;
unsigned int *new_val = (unsigned int *)dest_val->data;
*new_key = twiddle32(*(unsigned int*)src_key->data, which);
*new_val = generate_val(*(unsigned int*)src_key->data, which);
dest_key->size = sizeof(unsigned int);
dest_val->size = sizeof(unsigned int);
//data is already set above
}
return 0;
}
#if defined(__cilkplusplus) || defined(__cplusplus)
}
#endif
#endif // __TEST_KV_GEN_H

164
src/tests/upgrade-test-1.c Normal file
View file

@ -0,0 +1,164 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "$Id: loader-stress-test.c 20470 2010-05-20 18:30:04Z bkuszmaul $"
#define kv_pair_funcs 1 // pull in kv_pair generators from test.h
#include "test.h"
#include "toku_pthread.h"
#include "toku_atomic.h"
#include <db.h>
#include <sys/stat.h>
#include "ydb-internal.h"
#include "test_kv_gen.h"
/*
*/
DB_ENV *env;
enum {MAX_NAME=128};
int NUM_DBS=5;
int NUM_ROWS=100000;
int CHECK_RESULTS=0;
enum { old_default_cachesize=1024 }; // MB
int CACHESIZE=old_default_cachesize;
char *db_v3_dir = "../../utils/dir.preload-3.1-db.c.tdb";
char *db_v4_dir = "dir.preload-3.1-db.c.tdb";
char *env_dir = ENVDIR; // the default env_dir.
int SRC_VERSION = 4;
static void upgrade_test_1(DB **dbs) {
int r;
// open the DBS
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// read and verify all rows
{
if ( verbose ) {printf("checking");fflush(stdout);}
check_results(env, dbs, NUM_DBS, NUM_ROWS);
if ( verbose) {printf("\ndone\n");fflush(stdout);}
}
// close
{
for(int i=0;i<NUM_DBS;i++) {
dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
}
static void run_test(void)
{
int r;
char *src_db_dir;
if ( SRC_VERSION == 3 )
src_db_dir = db_v3_dir;
else if ( SRC_VERSION == 4 )
src_db_dir = db_v4_dir;
else {
fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
assert(0);
}
{
int len = 256;
char syscmd[len];
r = snprintf(syscmd, len, "rm -rf %s", env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
r = snprintf(syscmd, len, "cp -r %s %s", src_db_dir, env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
}
generate_permute_tables();
r = db_env_create(&env, 0); CKERR(r);
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE;
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
env->set_errfile(env, stderr);
r = env->checkpointing_set_period(env, 60); CKERR(r);
DB **dbs = (DB**)toku_malloc(sizeof(DB*) * NUM_DBS);
assert(dbs != NULL);
// --------------------------
upgrade_test_1(dbs);
// --------------------------
if (verbose >= 2)
print_engine_status(env);
r = env->close(env, 0); CKERR(r);
toku_free(dbs);
}
// ------------ infrastructure ----------
static void do_args(int argc, char * const argv[]);
int test_main(int argc, char * const *argv) {
do_args(argc, argv);
run_test();
return 0;
}
static void do_args(int argc, char * const argv[]) {
int resultcode;
char *cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v")==0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage: -h -c -d <num_dbs> -r <num_rows> %s\n", cmd);
exit(resultcode);
} else if (strcmp(argv[0], "-d")==0) {
argc--; argv++;
NUM_DBS = atoi(argv[0]);
if ( NUM_DBS > MAX_DBS ) {
fprintf(stderr, "max value for -d field is %d\n", MAX_DBS);
resultcode=1;
goto do_usage;
}
} else if (strcmp(argv[0], "-r")==0) {
argc--; argv++;
NUM_ROWS = atoi(argv[0]);
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else if (strcmp(argv[0], "-V")==0) {
argc--; argv++;
SRC_VERSION = atoi(argv[0]);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}

185
src/tests/upgrade-test-2.c Normal file
View file

@ -0,0 +1,185 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "$Id: loader-stress-test.c 20470 2010-05-20 18:30:04Z bkuszmaul $"
#include "test.h"
#include "toku_pthread.h"
#include "toku_atomic.h"
#include <db.h>
#include <sys/stat.h>
#include "ydb-internal.h"
#include "test_kv_gen.h"
/*
*/
DB_ENV *env;
enum {MAX_NAME=128};
int NUM_DBS=5;
int NUM_ROWS=100000;
int CHECK_RESULTS=0;
enum { old_default_cachesize=1024 }; // MB
int CACHESIZE=old_default_cachesize;
char *db_v3_dir = "../../utils/dir.preload-3.1-db.c.tdb";
char *db_v4_dir = "dir.preload-3.1-db.c.tdb";
char *env_dir = ENVDIR; // the default env_dir.
int SRC_VERSION = 4;
static void upgrade_test_2(DB **dbs) {
int r;
// open the DBS
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// close
{
for(int i=0;i<NUM_DBS;i++) {
dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
// open
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// read and verify all rows
{
if ( verbose ) {printf("checking");fflush(stdout);}
check_results(env, dbs, NUM_DBS, NUM_ROWS);
if ( verbose) {printf("\ndone\n");fflush(stdout);}
}
// close
{
for(int i=0;i<NUM_DBS;i++) {
dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
}
static void run_test(void)
{
int r;
char *src_db_dir;
if ( SRC_VERSION == 3 )
src_db_dir = db_v3_dir;
else if ( SRC_VERSION == 4 )
src_db_dir = db_v4_dir;
else {
fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
assert(0);
}
{
int len = 256;
char syscmd[len];
r = snprintf(syscmd, len, "rm -rf %s", env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
r = snprintf(syscmd, len, "cp -r %s %s", src_db_dir, env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
}
generate_permute_tables();
r = db_env_create(&env, 0); CKERR(r);
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE;
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
env->set_errfile(env, stderr);
r = env->checkpointing_set_period(env, 60); CKERR(r);
DB **dbs = (DB**)toku_malloc(sizeof(DB*) * NUM_DBS);
assert(dbs != NULL);
// --------------------------
upgrade_test_2(dbs);
// --------------------------
if (verbose >= 2)
print_engine_status(env);
r = env->close(env, 0); CKERR(r);
toku_free(dbs);
}
// ------------ infrastructure ----------
static void do_args(int argc, char * const argv[]);
int test_main(int argc, char * const *argv) {
do_args(argc, argv);
run_test();
return 0;
}
static void do_args(int argc, char * const argv[]) {
int resultcode;
char *cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v")==0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage: -h -c -d <num_dbs> -r <num_rows> %s\n", cmd);
exit(resultcode);
} else if (strcmp(argv[0], "-d")==0) {
argc--; argv++;
NUM_DBS = atoi(argv[0]);
if ( NUM_DBS > MAX_DBS ) {
fprintf(stderr, "max value for -d field is %d\n", MAX_DBS);
resultcode=1;
goto do_usage;
}
} else if (strcmp(argv[0], "-r")==0) {
argc--; argv++;
NUM_ROWS = atoi(argv[0]);
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else if (strcmp(argv[0], "-V")==0) {
argc--; argv++;
SRC_VERSION = atoi(argv[0]);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}

187
src/tests/upgrade-test-3.c Normal file
View file

@ -0,0 +1,187 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "$Id: loader-stress-test.c 20470 2010-05-20 18:30:04Z bkuszmaul $"
#include "test.h"
#include "toku_pthread.h"
#include "toku_atomic.h"
#include <db.h>
#include <sys/stat.h>
#include "ydb-internal.h"
#include "test_kv_gen.h"
/*
*/
DB_ENV *env;
enum {MAX_NAME=128};
int NUM_DBS=5;
int NUM_ROWS=100000;
int CHECK_RESULTS=0;
enum { old_default_cachesize=1024 }; // MB
int CACHESIZE=old_default_cachesize;
char *db_v3_dir = "../../utils/dir.preload-3.1-db.c.tdb";
char *db_v4_dir = "dir.preload-3.1-db.c.tdb";
char *env_dir = ENVDIR; // the default env_dir.
int SRC_VERSION = 4;
static void upgrade_test_3(DB **dbs) {
int r;
// open the DBS
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// insert some rows
printf("ToDo : insert rows\n");
// close
{
for(int i=0;i<NUM_DBS;i++) {
dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
// open
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// read and verify all rows
{
if ( verbose ) {printf("checking");fflush(stdout);}
check_results(env, dbs, NUM_DBS, NUM_ROWS);
if ( verbose) {printf("\ndone\n");fflush(stdout);}
}
// close
{
for(int i=0;i<NUM_DBS;i++) {
dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
}
static void run_test(void)
{
int r;
char *src_db_dir;
if ( SRC_VERSION == 3 )
src_db_dir = db_v3_dir;
else if ( SRC_VERSION == 4 )
src_db_dir = db_v4_dir;
else {
fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
assert(0);
}
{
int len = 256;
char syscmd[len];
r = snprintf(syscmd, len, "rm -rf %s", env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
r = snprintf(syscmd, len, "cp -r %s %s", src_db_dir, env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
}
generate_permute_tables();
r = db_env_create(&env, 0); CKERR(r);
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE;
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
env->set_errfile(env, stderr);
r = env->checkpointing_set_period(env, 60); CKERR(r);
DB **dbs = (DB**)toku_malloc(sizeof(DB*) * NUM_DBS);
assert(dbs != NULL);
// --------------------------
upgrade_test_3(dbs);
// --------------------------
if (verbose >= 2)
print_engine_status(env);
r = env->close(env, 0); CKERR(r);
toku_free(dbs);
}
// ------------ infrastructure ----------
static void do_args(int argc, char * const argv[]);
int test_main(int argc, char * const *argv) {
do_args(argc, argv);
run_test();
return 0;
}
static void do_args(int argc, char * const argv[]) {
int resultcode;
char *cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v")==0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage: -h -c -d <num_dbs> -r <num_rows> %s\n", cmd);
exit(resultcode);
} else if (strcmp(argv[0], "-d")==0) {
argc--; argv++;
NUM_DBS = atoi(argv[0]);
if ( NUM_DBS > MAX_DBS ) {
fprintf(stderr, "max value for -d field is %d\n", MAX_DBS);
resultcode=1;
goto do_usage;
}
} else if (strcmp(argv[0], "-r")==0) {
argc--; argv++;
NUM_ROWS = atoi(argv[0]);
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else if (strcmp(argv[0], "-V")==0) {
argc--; argv++;
SRC_VERSION = atoi(argv[0]);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}

222
src/tests/upgrade-test-4.c Normal file
View file

@ -0,0 +1,222 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "$Id: loader-stress-test.c 20470 2010-05-20 18:30:04Z bkuszmaul $"
#include "test.h"
#include "toku_pthread.h"
#include "toku_atomic.h"
#include <db.h>
#include <sys/stat.h>
#include "ydb-internal.h"
#include "test_kv_gen.h"
/*
*/
DB_ENV *env;
enum {MAX_NAME=128};
int NUM_DBS=5;
int NUM_ROWS=100000;
int CHECK_RESULTS=0;
enum { old_default_cachesize=1024 }; // MB
int CACHESIZE=old_default_cachesize;
enum {ROWS_PER_TRANSACTION=10000};
char *db_v3_dir = "../../utils/dir.preload-3.1-db.c.tdb";
char *db_v4_dir = "dir.preload-3.1-db.c.tdb";
char *env_dir = ENVDIR; // the default env_dir.
int SRC_VERSION = 4;
static void upgrade_test_4(DB **dbs) {
int r;
// open the DBS
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// append some rows
DB_TXN *txn;
DBT skey, sval;
DBT key, val;
dbt_init_realloc(&key);
dbt_init_realloc(&val);
unsigned int k, v;
if ( verbose ) { printf("appending");fflush(stdout); }
int outer_loop_num = ( NUM_ROWS <= ROWS_PER_TRANSACTION ) ? 1 : (NUM_ROWS / ROWS_PER_TRANSACTION);
for(int x=0;x<outer_loop_num;x++) {
r = env->txn_begin(env, NULL, &txn, 0); CKERR(r);
for(int i=1;i<=ROWS_PER_TRANSACTION;i++) {
k = i + (x*ROWS_PER_TRANSACTION) + NUM_ROWS;
v = generate_val(k, 0);
dbt_init(&skey, &k, sizeof(unsigned int));
dbt_init(&sval, &v, sizeof(unsigned int));
for(int db = 0;db < NUM_DBS;db++) {
put_multiple_generate(dbs[db], // dest_db
NULL, // src_db, ignored
&key, &val, //
&skey, &sval, // src_key, src_val
NULL); // extra, ignored
r = dbs[db]->put(dbs[db], txn, &key, &val, 0); CKERR(r);
if (key.flags == 0) { dbt_init_realloc(&key); }
if (val.flags == 0) { dbt_init_realloc(&val); }
}
}
r = txn->commit(txn, 0); CKERR(r);
if ( verbose ) {printf(".");fflush(stdout);}
}
if ( key.flags ) { toku_free(key.data); key.data = NULL; }
if ( val.flags ) { toku_free(val.data); key.data = NULL; }
// close
{
for(int i=0;i<NUM_DBS;i++) {
r = dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
// open
{
DBT desc;
dbt_init(&desc, "foo", sizeof("foo"));
char name[MAX_NAME*2];
int idx[MAX_DBS];
for(int i=0;i<NUM_DBS;i++) {
idx[i] = i;
r = db_create(&dbs[i], env, 0); CKERR(r);
r = dbs[i]->set_descriptor(dbs[i], 1, &desc); CKERR(r);
dbs[i]->app_private = &idx[i];
snprintf(name, sizeof(name), "db_%04x", i);
r = dbs[i]->open(dbs[i], NULL, name, NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
}
// read and verify all rows
{
if ( verbose ) {printf("\nchecking");fflush(stdout);}
check_results(env, dbs, NUM_DBS, NUM_ROWS * 2);
if ( verbose) {printf("\ndone\n");fflush(stdout);}
}
// close
{
for(int i=0;i<NUM_DBS;i++) {
r = dbs[i]->close(dbs[i], 0); CKERR(r);
dbs[i] = NULL;
}
}
}
static void run_test(void)
{
int r;
char *src_db_dir;
if ( SRC_VERSION == 3 )
src_db_dir = db_v3_dir;
else if ( SRC_VERSION == 4 )
src_db_dir = db_v4_dir;
else {
fprintf(stderr, "unsupported TokuDB version %d to upgrade\n", SRC_VERSION);
assert(0);
}
{
int len = 256;
char syscmd[len];
r = snprintf(syscmd, len, "rm -rf %s", env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
r = snprintf(syscmd, len, "cp -r %s %s", src_db_dir, env_dir);
assert(r<len);
r = system(syscmd); CKERR(r);
}
generate_permute_tables();
r = db_env_create(&env, 0); CKERR(r);
int envflags = DB_INIT_LOCK | DB_INIT_LOG | DB_INIT_MPOOL | DB_INIT_TXN | DB_CREATE | DB_PRIVATE;
r = env->open(env, env_dir, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
env->set_errfile(env, stderr);
r = env->checkpointing_set_period(env, 60); CKERR(r);
DB **dbs = (DB**)toku_malloc(sizeof(DB*) * NUM_DBS);
assert(dbs != NULL);
// --------------------------
upgrade_test_4(dbs);
// --------------------------
if (verbose >= 2)
print_engine_status(env);
r = env->close(env, 0); CKERR(r);
toku_free(dbs);
}
// ------------ infrastructure ----------
static void do_args(int argc, char * const argv[]);
int test_main(int argc, char * const *argv) {
do_args(argc, argv);
run_test();
return 0;
}
static void do_args(int argc, char * const argv[]) {
int resultcode;
char *cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v")==0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage: -h -c -d <num_dbs> -r <num_rows> %s\n", cmd);
exit(resultcode);
} else if (strcmp(argv[0], "-d")==0) {
argc--; argv++;
NUM_DBS = atoi(argv[0]);
if ( NUM_DBS > MAX_DBS ) {
fprintf(stderr, "max value for -d field is %d\n", MAX_DBS);
resultcode=1;
goto do_usage;
}
} else if (strcmp(argv[0], "-r")==0) {
argc--; argv++;
NUM_ROWS = atoi(argv[0]);
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else if (strcmp(argv[0], "-V")==0) {
argc--; argv++;
SRC_VERSION = atoi(argv[0]);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}

View file

@ -569,9 +569,9 @@ static const char * orig_env_ver_key = "original_version";
// requires: persistent environment dictionary is already open
static int
upgrade_env(DB_ENV * env, DB_TXN * txn) {
maybe_upgrade_persistent_environment_dictionary(DB_ENV * env, DB_TXN * txn) {
int r;
uint64_t stored_env_version;
uint32_t stored_env_version;
DBT key, val;
toku_fill_dbt(&key, curr_env_ver_key, strlen(curr_env_ver_key));
@ -579,8 +579,18 @@ upgrade_env(DB_ENV * env, DB_TXN * txn) {
r = toku_db_get(env->i->persistent_environment, txn, &key, &val, 0);
assert(r == 0);
stored_env_version = toku_dtoh32(*(uint32_t*)val.data);
if (stored_env_version != BRT_LAYOUT_VERSION)
if (stored_env_version > BRT_LAYOUT_VERSION)
r = TOKUDB_DICTIONARY_TOO_NEW;
else if (stored_env_version < BRT_LAYOUT_MIN_SUPPORTED_VERSION)
r = TOKUDB_DICTIONARY_TOO_OLD;
else if (stored_env_version < BRT_LAYOUT_VERSION) {
const uint32_t environment_version = toku_htod32(BRT_LAYOUT_VERSION);
toku_fill_dbt(&key, curr_env_ver_key, strlen(curr_env_ver_key));
toku_fill_dbt(&val, &environment_version, sizeof(environment_version));
r = toku_db_put(env->i->persistent_environment, txn, &key, &val, DB_YESOVERWRITE);
assert(r==0);
}
// TODO: add key/val for timestamp of VERSION_12_CREATION (could be upgrade)
return r;
}
@ -640,7 +650,7 @@ validate_env(DB_ENV * env, BOOL * valid_newenv, BOOL need_rollback_cachefile) {
r = 0; // both rollback cachefile and persistent env are missing
}
else {
r = toku_ydb_do_error(env, errno, "Unable to access rollback cachefile\n");
r = toku_ydb_do_error(env, stat_errno, "Unable to access rollback cachefile\n");
assert(r);
}
}
@ -663,7 +673,7 @@ validate_env(DB_ENV * env, BOOL * valid_newenv, BOOL need_rollback_cachefile) {
r = 0; // both fileops directory and persistent env are missing
}
else {
r = toku_ydb_do_error(env, errno, "Unable to access fileops directory\n");
r = toku_ydb_do_error(env, stat_errno, "Unable to access fileops directory\n");
assert(r);
}
}
@ -687,6 +697,18 @@ validate_env(DB_ENV * env, BOOL * valid_newenv, BOOL need_rollback_cachefile) {
return r;
}
static int
ydb_maybe_upgrade_env (DB_ENV *env) {
int r = 0;
if (env->i->open_flags & DB_INIT_TXN && env->i->open_flags & DB_INIT_LOG) {
toku_ydb_unlock();
r = toku_maybe_upgrade_log(env->i->dir, env->i->real_log_dir);
toku_ydb_lock();
}
return r;
}
// Open the environment.
@ -767,6 +789,9 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
need_rollback_cachefile = TRUE;
}
r = ydb_maybe_upgrade_env(env);
if (r!=0) return r;
r = validate_env(env, &newenv, need_rollback_cachefile); // make sure that environment is either new or complete
if (r != 0) return r;
@ -848,11 +873,11 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
r = db_use_builtin_val_cmp(env->i->persistent_environment);
assert(r==0);
r = db_open_iname(env->i->persistent_environment, txn, environmentdictionary, DB_CREATE, mode);
assert(r==0);
if (newenv) {
// create new persistent_environment
DBT key, val;
const uint32_t environment_version = toku_htod32(BRT_LAYOUT_VERSION);
assert(r==0);
toku_fill_dbt(&key, orig_env_ver_key, strlen(orig_env_ver_key));
toku_fill_dbt(&val, &environment_version, sizeof(environment_version));
r = toku_db_put(env->i->persistent_environment, txn, &key, &val, 0);
@ -863,8 +888,8 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
assert(r==0);
}
else {
r = maybe_upgrade_persistent_environment_dictionary(env, txn);
assert(r==0);
r = upgrade_env(env, txn);
}
}
{
@ -1664,11 +1689,13 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
engstat->logsuppressfail = logsuppressfail;
}
{
// dummy values until upgrade logic is complete and counters are available
engstat->upgrade_env_status = 0;
engstat->upgrade_header = 0;
engstat->upgrade_nonleaf = 0;
engstat->upgrade_leaf = 0;
BRT_UPGRADE_STATUS_S brt_upgrade_stat;
toku_brt_get_upgrade_status(&brt_upgrade_stat);
engstat->upgrade_env_status = toku_log_upgrade_get_footprint();
engstat->upgrade_header = brt_upgrade_stat.header;
engstat->upgrade_nonleaf = brt_upgrade_stat.nonleaf;
engstat->upgrade_leaf = brt_upgrade_stat.leaf;
}
}
return r;
@ -5012,13 +5039,13 @@ toku_db_set_dup_compare(DB *db, int (*dup_compare)(DB *, const DBT *, const DBT
return r;
}
static int toku_db_set_descriptor(DB *db, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) {
static int toku_db_set_descriptor(DB *db, u_int32_t version, const DBT* descriptor) {
HANDLE_PANICKED_DB(db);
int r;
if (db_opened(db)) return EINVAL;
else if (!descriptor) r = EINVAL;
else if (descriptor->size>0 && !descriptor->data) r = EINVAL;
else r = toku_brt_set_descriptor(db->i->brt, version, descriptor, dbt_userformat_upgrade);
else r = toku_brt_set_descriptor(db->i->brt, version, descriptor);
return r;
}
@ -5410,9 +5437,9 @@ static int locked_db_set_dup_compare(DB * db, int (*dup_compare) (DB *, const DB
toku_ydb_lock(); int r = toku_db_set_dup_compare(db, dup_compare); toku_ydb_unlock(); return r;
}
static int locked_db_set_descriptor(DB *db, u_int32_t version, const DBT* descriptor, toku_dbt_upgradef dbt_userformat_upgrade) {
static int locked_db_set_descriptor(DB *db, u_int32_t version, const DBT* descriptor) {
toku_ydb_lock();
int r = toku_db_set_descriptor(db, version, descriptor, dbt_userformat_upgrade);
int r = toku_db_set_descriptor(db, version, descriptor);
toku_ydb_unlock();
return r;
}

View file

@ -81,6 +81,8 @@ void toku_fs_get_write_info(time_t *enospc_last_time, uint64_t *enospc_current,
int toku_fsync_dirfd_without_accounting(DIR *dirp);
int toku_fsync_dir_by_name_without_accounting(const char *dir_name);
// Get the file system free and total space for the file system that contains a path name
// *avail_size is set to the bytes of free space in the file system available for non-root
// *free_size is set to the bytes of free space in the file system

View file

@ -140,7 +140,7 @@ toku_fstat(int fd, toku_struct_stat *statbuf) {
int
toku_fsync_dirfd_without_accounting(DIR *dirp) {
//Not supported in windows.
//Believed to not be supported in windows.
//Possibly not needed
return 0;
}
@ -149,3 +149,11 @@ int
toku_fsync_directory(const char *UU(fname)) {
return 0; // toku_fsync_dirfd
}
int
toku_fsync_dir_by_name_without_accounting(const char *dir_name) {
//Believed to not be supported in windows.
//Possibly not needed
return 0;
}