[t:4570] Merging verification tool and partial removal of deserialization custom errors.

git-svn-id: file:///svn/toku/tokudb@44286 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Christian Rober 2013-04-17 00:00:43 -04:00 committed by Yoni Fogel
parent 50c74b6536
commit 123cbff2ba
7 changed files with 755 additions and 120 deletions

View file

@ -119,6 +119,7 @@ enum {
TOKUDB_TRY_AGAIN = -100012,
TOKUDB_NEEDS_REPAIR = -100013,
TOKUDB_CURSOR_CONTINUE = -100014,
TOKUDB_BAD_CHECKSUM = -100015,
DONTUSE_I_JUST_PUT_THIS_HERE_SO_I_COULD_HAVE_A_COMMA_AFTER_EACH_ITEM
};
@ -269,6 +270,7 @@ static void print_defines (void) {
dodefine(TOKUDB_TRY_AGAIN);
dodefine(TOKUDB_NEEDS_REPAIR);
dodefine(TOKUDB_CURSOR_CONTINUE);
dodefine(TOKUDB_BAD_CHECKSUM);
/* LOADER flags */
printf("/* LOADER flags */\n");

View file

@ -37,6 +37,7 @@ set(FT_SOURCES
ftloader-callback.c
ft_msg.c
ft_node-serialize.c
ft-node-deserialize.c
ft-ops.c
ft-pwrite.c
ft-serialize.c
@ -111,6 +112,7 @@ set(bins
ftdump
tdb_logprint
tdb-recover
ftverify
)
foreach(bin ${bins})
add_executable(${bin} ${bin}.c)
@ -122,6 +124,10 @@ foreach(bin ${bins})
target_link_libraries(${bin}_static ft_static ${LIBTOKUPORTABILITY_STATIC})
endforeach(bin)
# link in math.h library just for this tool.
target_link_libraries(ftverify m)
target_link_libraries(ftverify_static m)
install(
TARGETS ftdump_static
DESTINATION bin

View file

@ -518,9 +518,68 @@ int toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE
FT h, int n_workitems, int n_threads,
BOOL for_checkpoint);
int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, ROLLBACK_LOG_NODE *logp, FT h);
enum deserialize_error_code toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
enum deserialize_error_code toku_deserialize_bp_from_compressed(FTNODE node, int childnum, DESCRIPTOR desc, ft_compare_func cmp);
enum deserialize_error_code toku_deserialize_ftnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
int toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe);
int toku_deserialize_bp_from_compressed(FTNODE node, int childnum, DESCRIPTOR desc, ft_compare_func cmp);
int toku_deserialize_ftnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, FTNODE *ftnode, FTNODE_DISK_DATA* ndd, struct ftnode_fetch_extra* bfe);
//////////////// <CER> TODO: Move these function declarations
int
deserialize_ft_from_fd_into_rbuf(int fd,
toku_off_t offset_of_header,
struct rbuf *rb,
u_int64_t *checkpoint_count,
LSN *checkpoint_lsn,
u_int32_t * version_p,
enum deserialize_error_code *e);
enum deserialize_error_code
deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version);
int
read_block_from_fd_into_rbuf(
int fd,
BLOCKNUM blocknum,
FT h,
struct rbuf *rb
);
int
read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb);
int
verify_ftnode_sub_block (struct sub_block *sb);
void
just_decompress_sub_block(struct sub_block *sb);
/* Beginning of ft-node-deserialize.c helper functions. */
//
inline void
initialize_ftnode(FTNODE node, BLOCKNUM blocknum);
//
inline int
read_and_check_magic(struct rbuf *rb);
//
inline int
read_and_check_version(FTNODE node, struct rbuf *rb);
//
inline void
read_node_info(FTNODE node, struct rbuf *rb, int version);
//
inline void
allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd);
//
inline int
check_node_info_checksum(struct rbuf *rb);
//////////////// <CER>
unsigned int toku_serialize_ftnode_size(FTNODE node); /* How much space will it take? */
int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
@ -983,6 +1042,6 @@ toku_ft_node_put_cmd (
void toku_flusher_thread_set_callback(void (*callback_f)(int, void*), void* extra);
enum deserialize_error_code toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h);
int toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h);
#endif

155
ft/ft-node-deserialize.c Normal file
View file

@ -0,0 +1,155 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
// vim: expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: ft-serialize.c 43686 2012-05-18 23:21:00Z leifwalsh $"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <ft-internal.h>
#include <db.h>
////////////////////////////////////////
// 1. Sets initial values
//
inline void
initialize_ftnode(FTNODE node, BLOCKNUM blocknum)
{
node->fullhash = 0xDEADBEEF; // <CER> Is this 'spoof' ok?
node->thisnodename = blocknum;
node->dirty = 0;
node->bp = NULL;
// <CER> Can we use this initialization as a correctness assert in
// a later function?
node->layout_version_read_from_disk = 0;
}
// TODO:
/************************
* In other deserialization code, we check the rb size member. We
* verify that it is greater than or equal to 24. Ignoring this magic
* number for a moment, should we put this check in its own function? *
*************************/
/////////////////////////////////////////////////////////////////////
// 2. Read and check the 'magic' bytes on disk. Returns an error if
// the magic does not match.
//
inline int
read_and_check_magic(struct rbuf *rb)
{
int r = 0;
bytevec magic;
rbuf_literal_bytes(rb, &magic, 8);
if (memcmp(magic, "tokuleaf", 8)!=0 &&
memcmp(magic, "tokunode", 8)!=0) {
r = DB_BADFORMAT; // TODO: Return more meaningful error.
}
return r;
}
////////////////////
// 3.
inline int
read_and_check_version(FTNODE node, struct rbuf *rb)
{
int r = 0;
int version = rbuf_int(rb);
node->layout_version_read_from_disk = version;
if (version < FT_LAYOUT_MIN_SUPPORTED_VERSION) {
r = 1; // TODO: Better error reporting.
}
return r;
}
////////////////////
// 4.
inline void
read_node_info(FTNODE node, struct rbuf *rb, int version)
{
node->layout_version = version;
node->layout_version_original = rbuf_int(rb);
node->build_id = rbuf_int(rb);
node->n_children = rbuf_int(rb);
}
////////////////////
// 5.
// <CER> Should these be two seperate functions?
inline void
allocate_and_read_partition_offsets(FTNODE node, struct rbuf *rb, FTNODE_DISK_DATA *ndd)
{
XMALLOC_N(node->n_children, node->bp);
// TODO: Fix this to use xmalloc_n
*ndd = toku_xmalloc(node->n_children * sizeof(**ndd));
// Read the partition locations.
for (int i = 0; i < node->n_children; i++) {
BP_START(*ndd, i) = rbuf_int(rb);
BP_SIZE (*ndd, i) = rbuf_int(rb);
}
}
////////////////////
// 6. Check the node info checksum.
inline int
check_node_info_checksum(struct rbuf *rb)
{
int r = 0;
// Verify checksum of header stored.
u_int32_t checksum = x1764_memory(rb->buf, rb->ndone);
u_int32_t stored_checksum = rbuf_int(rb);
if (stored_checksum != checksum) {
// TODO: dump_bad_block(rb->buf, rb->size);
r = TOKUDB_BAD_CHECKSUM;
}
return r;
}
// Two functions, at this point, SHOULD be split into sperate read and
// checksum check calls:
// 1. read_and_decompress_sub_block - this is generic, used elsewhere.
// So....
// a. call read_compressed_sub_block() directly
// then....
//
// 2. deserialize_ftnode_info() - this actually reads in the node
// 'info' fields, such as height, nodesize, etc.
/////////////////////////////////
// ?. ----
// setup_ftnode_paritions() -
// calls :
// a. update_bfe_using_ftnode
// b. setup_partitions_using_bfe()
/////////////////////////////////
// ?. partition from sub-block deserialization.
// A.decompress_and_deserialize_worker()
// calls:
// a. read_and_decompress_sub_block (SEE ABOVE) -
// -calls:
// -ii. read_compressed_sub_block() - Returns checksum error AND reads out buffer.
// b. deserialize_ftnode_partition()
// -calls:
// -ii. verify_ftnode_sub_block() - JUST verifies checksum.
//
// OR
//
// B. check_and_copy_compressed_sub_block_worker
// calls:
// a. read_compressed_sub_block() - Returns checksum AND reads out of buffer.
/////////////////////////////////
// 1. first calls verify_ftnode_sub_block() which must be refactored
// into two seperate calls.

View file

@ -138,7 +138,7 @@ exit:
}
// We only deserialize brt header once and then share everything with all the brts.
static enum deserialize_error_code
enum deserialize_error_code
deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
{
enum deserialize_error_code e = DS_OK;
@ -423,7 +423,7 @@ serialize_ft_min_size (u_int32_t version) {
//
// TOKUDB_DICTIONARY_NO_HEADER means we can overwrite everything in the
// file AND the header is useless
static int
int
deserialize_ft_from_fd_into_rbuf(int fd,
toku_off_t offset_of_header,
struct rbuf *rb,

View file

@ -1108,8 +1108,7 @@ void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl)
toku_free(nl);
}
//
static int
int
read_block_from_fd_into_rbuf(
int fd,
BLOCKNUM blocknum,
@ -1163,10 +1162,10 @@ static void read_ftnode_header_from_fd_into_rbuf_if_small_enough (int fd, BLOCKN
// read the compressed partition into the sub_block,
// validate the checksum of the compressed data
//
static enum deserialize_error_code
int
read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb)
{
enum deserialize_error_code e = DS_OK;
int r = 0;
sb->compressed_size = rbuf_int(rb);
sb->uncompressed_size = rbuf_int(rb);
bytevec* cp = (bytevec*)&sb->compressed_ptr;
@ -1175,18 +1174,17 @@ read_compressed_sub_block(struct rbuf *rb, struct sub_block *sb)
// let's check the checksum
u_int32_t actual_xsum = x1764_memory((char *)sb->compressed_ptr-8, 8+sb->compressed_size);
if (sb->xsum != actual_xsum) {
e = DS_XSUM_FAIL;
r = TOKUDB_BAD_CHECKSUM;
}
return e;
return r;
}
static enum deserialize_error_code
static int
read_and_decompress_sub_block(struct rbuf *rb, struct sub_block *sb)
{
enum deserialize_error_code e = DS_OK;
e = read_compressed_sub_block(rb, sb);
if (e != DS_OK) {
int r = 0;
r = read_compressed_sub_block(rb, sb);
if (r != 0) {
goto exit;
}
@ -1200,27 +1198,44 @@ read_and_decompress_sub_block(struct rbuf *rb, struct sub_block *sb)
sb->compressed_size
);
exit:
return e;
return r;
}
// Allocates space for the sub-block and de-compresses the data from
// the supplied compressed pointer..
void
just_decompress_sub_block(struct sub_block *sb)
{
// <CER> TODO: Add assert thta the subblock was read in.
sb->uncompressed_ptr = toku_xmalloc(sb->uncompressed_size);
assert(sb->uncompressed_ptr);
toku_decompress(
sb->uncompressed_ptr,
sb->uncompressed_size,
sb->compressed_ptr,
sb->compressed_size
);
}
// verify the checksum
static enum deserialize_error_code
int
verify_ftnode_sub_block (struct sub_block *sb)
{
enum deserialize_error_code e = DS_OK;
int r = 0;
// first verify the checksum
u_int32_t data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end
u_int32_t stored_xsum = toku_dtoh32(*((u_int32_t *)((char *)sb->uncompressed_ptr + data_size)));
u_int32_t actual_xsum = x1764_memory(sb->uncompressed_ptr, data_size);
if (stored_xsum != actual_xsum) {
dump_bad_block(sb->uncompressed_ptr, sb->uncompressed_size);
e = DS_XSUM_FAIL;
r = TOKUDB_BAD_CHECKSUM;
}
return e;
return r;
}
// This function deserializes the data stored by serialize_ftnode_info
static enum deserialize_error_code
static int
deserialize_ftnode_info(
struct sub_block *sb,
FTNODE node
@ -1230,9 +1245,9 @@ deserialize_ftnode_info(
// this function puts that information into node
// first verify the checksum
enum deserialize_error_code e = DS_OK;
e = verify_ftnode_sub_block(sb);
if (e != DS_OK) {
int r = 0;
r = verify_ftnode_sub_block(sb);
if (r != 0) {
goto exit;
}
@ -1292,7 +1307,7 @@ deserialize_ftnode_info(
assert(FALSE);
}
exit:
return e;
return r;
}
static void
@ -1398,7 +1413,7 @@ static void setup_ftnode_partitions(FTNODE node, struct ftnode_fetch_extra* bfe,
/* deserialize the partition from the sub-block's uncompressed buffer
* and destroy the uncompressed buffer
*/
static enum deserialize_error_code
static int
deserialize_ftnode_partition(
struct sub_block *sb,
FTNODE node,
@ -1407,9 +1422,9 @@ deserialize_ftnode_partition(
ft_compare_func cmp
)
{
enum deserialize_error_code e = DS_OK;
e = verify_ftnode_sub_block(sb);
if (e != DS_OK) {
int r = 0;
r = verify_ftnode_sub_block(sb);
if (r != 0) {
goto exit;
}
u_int32_t data_size = sb->uncompressed_size - 4; // checksum is 4 bytes at end
@ -1452,35 +1467,35 @@ deserialize_ftnode_partition(
// destroy old omt (bn.buffer) that was created by toku_create_empty_bn(), so we can create a new one
toku_omt_destroy(&BLB_BUFFER(node, childnum));
int r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, childnum), &array, num_entries, num_entries);
r = toku_omt_create_steal_sorted_array(&BLB_BUFFER(node, childnum), &array, num_entries, num_entries);
invariant_zero(r);
}
assert(rb.ndone == rb.size);
toku_free(sb->uncompressed_ptr);
exit:
return e;
return r;
}
static enum deserialize_error_code
static int
decompress_and_deserialize_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child, DESCRIPTOR desc, ft_compare_func cmp)
{
enum deserialize_error_code e = DS_OK;
e = read_and_decompress_sub_block(&curr_rbuf, &curr_sb);
if (e != DS_OK) {
int r = 0;
r = read_and_decompress_sub_block(&curr_rbuf, &curr_sb);
if (r != 0) {
goto exit;
}
// at this point, sb->uncompressed_ptr stores the serialized node partition
e = deserialize_ftnode_partition(&curr_sb, node, child, desc, cmp);
r = deserialize_ftnode_partition(&curr_sb, node, child, desc, cmp);
exit:
return e;
return r;
}
static enum deserialize_error_code
static int
check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_block curr_sb, FTNODE node, int child)
{
enum deserialize_error_code e = DS_OK;
e = read_compressed_sub_block(&curr_rbuf, &curr_sb);
if (e != DS_OK) {
int r = 0;
r = read_compressed_sub_block(&curr_rbuf, &curr_sb);
if (r != 0) {
goto exit;
}
@ -1490,10 +1505,10 @@ check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_blo
bp_sb->compressed_ptr = toku_xmalloc(bp_sb->compressed_size);
memcpy(bp_sb->compressed_ptr, curr_sb.compressed_ptr, bp_sb->compressed_size);
exit:
return e;
return r;
}
static enum deserialize_error_code
static int
deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
FTNODE_DISK_DATA* ndd,
BLOCKNUM blocknum,
@ -1505,7 +1520,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
// Also fetch in the basement node if needed.
// Return 0 if it worked. If something goes wrong (including that we are looking at some old data format that doesn't have partitions) then return nonzero.
{
enum deserialize_error_code e = DS_OK;
int r = 0;
FTNODE node = toku_xmalloc(sizeof(*node));
// fill in values that are known and not stored in rb
@ -1515,7 +1530,9 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
node->bp = NULL; // fill this in so we can free without a leak.
if (rb->size < 24) {
e = DS_ERRNO;
// TODO: What error do we return here?
// Does it even matter?
r = toku_db_badformat();
goto cleanup;
}
@ -1523,16 +1540,14 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
rbuf_literal_bytes(rb, &magic, 8);
if (memcmp(magic, "tokuleaf", 8)!=0 &&
memcmp(magic, "tokunode", 8)!=0) {
// int r = toku_db_badformat();
// #define DB_BADFORMAT -30500
e = DS_ERRNO;
r = toku_db_badformat();
goto cleanup;
}
node->layout_version_read_from_disk = rbuf_int(rb);
if (node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) {
// This code path doesn't have to worry about upgrade.
e = DS_ERRNO;
r = toku_db_badformat();
goto cleanup;
}
@ -1555,7 +1570,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
unsigned int nhsize = serialize_node_header_size(node); // we can do this because n_children is filled in.
unsigned int needed_size = nhsize + 12; // we need 12 more so that we can read the compressed block size information that follows for the nodeinfo.
if (needed_size > rb->size) {
e = DS_ERRNO;
r = toku_db_badformat();
goto cleanup;
}
@ -1571,7 +1586,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
u_int32_t stored_checksum = rbuf_int(rb);
if (stored_checksum != checksum) {
dump_bad_block(rb->buf, rb->size);
e = DS_XSUM_FAIL;
r = TOKUDB_BAD_CHECKSUM;
goto cleanup;
}
@ -1581,8 +1596,8 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
sb_node_info.compressed_size = rbuf_int(rb); // we'll be able to read these because we checked the size earlier.
sb_node_info.uncompressed_size = rbuf_int(rb);
if (rb->size-rb->ndone < sb_node_info.compressed_size + 8) {
e = DS_ERRNO; // we won't
goto cleanup;
r = toku_db_badformat();
goto cleanup;
}
// We got the entire header and node info!
toku_ft_status_update_pivot_fetch_reason(bfe);
@ -1594,7 +1609,7 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
// let's check the checksum
u_int32_t actual_xsum = x1764_memory((char *)sb_node_info.compressed_ptr-8, 8+sb_node_info.compressed_size);
if (sb_node_info.xsum != actual_xsum) {
e = DS_XSUM_FAIL;
r = TOKUDB_BAD_CHECKSUM;
goto cleanup;
}
@ -1610,8 +1625,8 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
);
// at this point sb->uncompressed_ptr stores the serialized node info.
e = deserialize_ftnode_info(&sb_node_info, node);
if (e != DS_OK) {
r = deserialize_ftnode_info(&sb_node_info, node);
if (r != 0) {
goto cleanup;
}
@ -1631,9 +1646,9 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
if (bfe->type != ftnode_fetch_none) {
PAIR_ATTR attr;
int r = toku_ftnode_pf_callback(node, *ndd, bfe, fd, &attr);
r = toku_ftnode_pf_callback(node, *ndd, bfe, fd, &attr);
if (r != 0) {
e = DS_ERRNO;
goto cleanup;
}
}
@ -1645,17 +1660,17 @@ deserialize_ftnode_header_from_rbuf_if_small_enough (FTNODE *ftnode,
}
}
*ftnode = node;
e = DS_OK;
r = 0; // TODO: Why do we do this???
cleanup:
if (e != DS_OK) {
if (r != 0) {
if (node) {
toku_free(*ndd);
toku_free(node->bp);
toku_free(node);
}
}
return e;
return r;
}
// This function takes a deserialized version 13 or 14 buffer and
@ -2140,7 +2155,7 @@ deserialize_and_upgrade_ftnode(FTNODE node,
}
*ndd = toku_xmalloc(node->n_children*sizeof(**ndd));
// Initialize the partition locations to zero, becuse version 14
// Initialize the partition locations to zero, because version 14
// and below have no notion of partitions on disk.
for (int i=0; i<node->n_children; i++) {
BP_START(*ndd,i) = 0;
@ -2152,7 +2167,7 @@ exit:
return r;
}
static enum deserialize_error_code
static int
deserialize_ftnode_from_rbuf(
FTNODE *ftnode,
FTNODE_DISK_DATA* ndd,
@ -2166,7 +2181,6 @@ deserialize_ftnode_from_rbuf(
// Effect: deserializes a ftnode that is in rb (with pointer of rb just past the magic) into a FTNODE.
{
int r = 0;
enum deserialize_error_code e = DS_OK;
FTNODE node = toku_xmalloc(sizeof(*node));
struct sub_block sb_node_info;
// fill in values that are known and not stored in rb
@ -2237,14 +2251,14 @@ deserialize_ftnode_from_rbuf(
//now we read and decompress the pivot and child information
sub_block_init(&sb_node_info);
e = read_and_decompress_sub_block(rb, &sb_node_info);
if (e != DS_OK) {
r = read_and_decompress_sub_block(rb, &sb_node_info);
if (r != 0) {
goto cleanup;
}
// at this point, sb->uncompressed_ptr stores the serialized node info
e = deserialize_ftnode_info(&sb_node_info, node);
if (e != DS_OK) {
r = deserialize_ftnode_info(&sb_node_info, node);
if (r != 0) {
goto cleanup;
}
toku_free(sb_node_info.uncompressed_ptr);
@ -2294,15 +2308,15 @@ deserialize_ftnode_from_rbuf(
switch (BP_STATE(node,i)) {
case PT_AVAIL:
// case where we read and decompress the partition
e = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
if (e != DS_OK) {
r = decompress_and_deserialize_worker(curr_rbuf, curr_sb, node, i, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
if (r != 0) {
goto cleanup;
}
continue;
case PT_COMPRESSED:
// case where we leave the partition in the compressed state
e = check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i);
if (e != DS_OK) {
r = check_and_copy_compressed_sub_block_worker(curr_rbuf, curr_sb, node, i);
if (r != 0) {
goto cleanup;
}
continue;
@ -2317,7 +2331,6 @@ deserialize_ftnode_from_rbuf(
r = 0;
cleanup:
if (r != 0) {
e = DS_ERRNO;
// NOTE: Right now, callers higher in the stack will assert on
// failure, so this is OK for production. However, if we
// create tools that use this function to search for errors in
@ -2325,12 +2338,12 @@ cleanup:
if (node) toku_free(node);
}
return e;
return r;
}
enum deserialize_error_code
int
toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, int fd, struct ftnode_fetch_extra* bfe) {
enum deserialize_error_code e = DS_OK;
int r = 0;
assert(BP_STATE(node,childnum) == PT_ON_DISK);
assert(node->bp[childnum].ptr.tag == BCT_NULL);
@ -2368,24 +2381,24 @@ toku_deserialize_bp_from_disk(FTNODE node, FTNODE_DISK_DATA ndd, int childnum, i
struct sub_block curr_sb;
sub_block_init(&curr_sb);
e = read_and_decompress_sub_block(&rb, &curr_sb);
if (e != DS_OK) {
r = read_and_decompress_sub_block(&rb, &curr_sb);
if (r != 0) {
goto exit;
}
// at this point, sb->uncompressed_ptr stores the serialized node partition
e = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
r = deserialize_ftnode_partition(&curr_sb, node, childnum, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
exit:
toku_free(raw_block);
return e;
return r;
}
// Take a ftnode partition that is in the compressed state, and make it avail
enum deserialize_error_code
int
toku_deserialize_bp_from_compressed(FTNODE node, int childnum,
DESCRIPTOR desc, ft_compare_func cmp) {
enum deserialize_error_code e = DS_OK;
int r = 0;
assert(BP_STATE(node, childnum) == PT_COMPRESSED);
SUB_BLOCK curr_sb = BSB(node, childnum);
@ -2401,13 +2414,13 @@ toku_deserialize_bp_from_compressed(FTNODE node, int childnum,
curr_sb->compressed_ptr,
curr_sb->compressed_size
);
e = deserialize_ftnode_partition(curr_sb, node, childnum, desc, cmp);
r = deserialize_ftnode_partition(curr_sb, node, childnum, desc, cmp);
toku_free(curr_sb->compressed_ptr);
toku_free(curr_sb);
return e;
return r;
}
static enum deserialize_error_code
static int
deserialize_ftnode_from_fd(int fd,
BLOCKNUM blocknum,
u_int32_t fullhash,
@ -2416,26 +2429,24 @@ deserialize_ftnode_from_fd(int fd,
struct ftnode_fetch_extra *bfe,
STAT64INFO info)
{
enum deserialize_error_code e;
struct rbuf rb = RBUF_INITIALIZER;
int r = 0;
r = read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb);
if (r != 0) {
e = DS_ERRNO;
goto cleanup;
} // if we were successful, then we are done.
e = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd);
if (e != DS_OK) {
r = deserialize_ftnode_from_rbuf(ftnode, ndd, blocknum, fullhash, bfe, info, &rb, fd);
if (r != 0) {
dump_bad_block(rb.buf,rb.size);
}
cleanup:
toku_free(rb.buf);
return e;
return r;
}
// Read brt node from file into struct. Perform version upgrade if necessary.
enum deserialize_error_code
int
toku_deserialize_ftnode_from (int fd,
BLOCKNUM blocknum,
u_int32_t fullhash,
@ -2446,19 +2457,19 @@ toku_deserialize_ftnode_from (int fd,
// Effect: Read a node in. If possible, read just the header.
{
toku_trace("deserial start");
enum deserialize_error_code e = DS_OK;
int r = 0;
struct rbuf rb = RBUF_INITIALIZER;
read_ftnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb);
e = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd);
if (e != DS_OK) {
r = deserialize_ftnode_header_from_rbuf_if_small_enough(ftnode, ndd, blocknum, fullhash, bfe, &rb, fd);
if (r != 0) {
// Something went wrong, go back to doing it the old way.
e = deserialize_ftnode_from_fd(fd, blocknum, fullhash, ftnode, ndd, bfe, NULL);
r = deserialize_ftnode_from_fd(fd, blocknum, fullhash, ftnode, ndd, bfe, NULL);
}
toku_trace("deserial done");
toku_free(rb.buf);
return e;
return r;
}
void
@ -2729,11 +2740,10 @@ deserialize_rollback_log_from_rbuf_versioned (u_int32_t version, BLOCKNUM blockn
return r;
}
static enum deserialize_error_code
static int
decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
toku_trace("decompress");
int r = 0;
enum deserialize_error_code e = DS_OK;
// get the number of compressed sub blocks
int n_sub_blocks;
n_sub_blocks = toku_dtoh32(*(u_int32_t*)(&raw_block[node_header_overhead]));
@ -2747,7 +2757,7 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
u_int32_t xsum = x1764_memory(raw_block, header_length);
u_int32_t stored_xsum = toku_dtoh32(*(u_int32_t *)(raw_block + header_length));
if (xsum != stored_xsum) {
e = DS_XSUM_FAIL;
r = TOKUDB_BAD_CHECKSUM;
}
}
@ -2764,7 +2774,7 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
// This predicate needs to be here and instead of where it is set
// for the compiler.
if (e == DS_XSUM_FAIL) {
if (r == TOKUDB_BAD_CHECKSUM) {
goto exit;
}
@ -2773,7 +2783,6 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
u_int32_t compressed_size = sub_block[i].compressed_size;
if (compressed_size<=0 || compressed_size>(1<<30)) {
r = toku_db_badformat();
e = DS_ERRNO;
goto exit;
}
@ -2781,7 +2790,6 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size);
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) {
r = toku_db_badformat();
e = DS_ERRNO;
goto exit;
}
}
@ -2809,7 +2817,6 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
if (r != 0) {
fprintf(stderr, "%s:%d block %"PRId64" failed %d at %p size %lu\n", __FUNCTION__, __LINE__, blocknum.b, r, raw_block, raw_block_size);
dump_bad_block(raw_block, raw_block_size);
e = DS_ERRNO;
goto exit;
}
lazy_assert_zero(r);
@ -2818,24 +2825,23 @@ decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, size_t raw_block_size,
rb->ndone=0;
exit:
return e;
return r;
}
static enum deserialize_error_code
static int
decompress_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_block, size_t raw_block_size, struct rbuf *rb, BLOCKNUM blocknum) {
// This function exists solely to accomodate future changes in compression.
enum deserialize_error_code e = DS_OK;
int r = 0;
switch (version) {
case FT_LAYOUT_VERSION_13:
case FT_LAYOUT_VERSION_14:
case FT_LAYOUT_VERSION:
e = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum);
r = decompress_from_raw_block_into_rbuf(raw_block, raw_block_size, rb, blocknum);
break;
default:
lazy_assert(FALSE);
}
return e;
return r;
}
static int
@ -2876,12 +2882,11 @@ read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
}
}
enum deserialize_error_code e = DS_OK;
e = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, size, rb, blocknum);
if (e != DS_OK) {
r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, size, rb, blocknum);
if (r != 0) {
// We either failed the checksome, or there is a bad format in
// the buffer.
if (e == DS_XSUM_FAIL) {
if (r == TOKUDB_BAD_CHECKSUM) {
fprintf(stderr,
"Checksum failure while reading raw block in file %s.\n",
toku_cachefile_fname_in_env(h->cf));
@ -2898,7 +2903,9 @@ cleanup:
if (rb->buf) toku_free(rb->buf);
rb->buf = NULL;
}
if (raw_block) toku_free(raw_block);
if (raw_block) {
toku_free(raw_block);
}
return r;
}
@ -2933,10 +2940,10 @@ cleanup:
}
enum deserialize_error_code
int
toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
{
enum deserialize_error_code e;
int r = 0;
// 15 was the last version with subtree estimates
invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_15);
@ -2944,7 +2951,7 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
FTNODE_DISK_DATA unused_ndd = NULL;
struct ftnode_fetch_extra bfe;
fill_bfe_for_min_read(&bfe, h);
e = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd,
r = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd,
&bfe, &h->h->on_disk_stats);
h->in_memory_stats = h->h->on_disk_stats;
@ -2954,7 +2961,7 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
if (unused_ndd) {
toku_free(unused_ndd);
}
return e;
return r;
}
#undef UPGRADE_STATUS_VALUE

406
ft/ftverify.c Normal file
View file

@ -0,0 +1,406 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
// vim: expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: ft-serialize.c 43686 2012-05-18 23:21:00Z leifwalsh $"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
////////////////////////////////////////////////////////////////////
// ftverify - Command line tool that checks the validity of a given
// fractal tree file, one block at a time.
////////////////////////////////////////////////////////////////////
#include <toku_portability.h>
#include <toku_assert.h>
#include <fcntl.h>
#include <math.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include "fttypes.h"
#include "ft-internal.h"
#include "ft_layout_version.h"
#include "block_table.h"
#include "x1764.h"
#include "rbuf.h"
#include "sub_block.h"
#include "threadpool.h"
#include "toku_list.h"
static int num_cores = 0; // cache the number of cores for the parallelization
static struct toku_thread_pool *ft_pool = NULL;
static FILE *outf;
static double pct = 0.5;
// Struct for reporting sub block stats.
struct verify_block_extra {
BLOCKNUM b;
int n_sub_blocks;
u_int32_t header_length;
u_int32_t calc_xsum;
u_int32_t stored_xsum;
bool header_valid;
bool sub_blocks_valid;
struct sub_block_info *sub_block_results;
};
// Initialization function for the sub block stats.
static void
init_verify_block_extra(BLOCKNUM b, struct verify_block_extra *e)
{
static const struct verify_block_extra default_vbe =
{
.b = { 0 },
.n_sub_blocks = 0,
.header_length = 0,
.calc_xsum = 0,
.stored_xsum = 0,
.header_valid = true,
.sub_blocks_valid = true,
.sub_block_results = NULL
};
*e = default_vbe;
e->b = b;
}
// Reports percentage of completed blocks.
static void
report(int64_t blocks_done, int64_t blocks_failed, int64_t total_blocks)
{
int64_t blocks_per_report = llrint(pct * total_blocks / 100.0);
if (blocks_per_report < 1) {
blocks_per_report = 1;
}
if (blocks_done % blocks_per_report == 0) {
double pct_actually_done = (100.0 * blocks_done) / total_blocks;
printf("% 3.3lf%% | %"PRId64" blocks checked, %"PRId64" bad block(s) detected\n",
pct_actually_done, blocks_done, blocks_failed);
fflush(stdout);
}
}
// Helper function to deserialize one of the two headers for the ft
// we are checking.
static void
deserialize_headers(int fd, struct ft **h1p, struct ft **h2p)
{
struct rbuf rb_0;
struct rbuf rb_1;
u_int64_t checkpoint_count_0;
u_int64_t checkpoint_count_1;
LSN checkpoint_lsn_0;
LSN checkpoint_lsn_1;
u_int32_t version_0, version_1;
BOOL h0_acceptable = FALSE;
BOOL h1_acceptable = FALSE;
int r0, r1;
enum deserialize_error_code e = DS_OK;
{
toku_off_t header_0_off = 0;
r0 = deserialize_ft_from_fd_into_rbuf(
fd,
header_0_off,
&rb_0,
&checkpoint_count_0,
&checkpoint_lsn_0,
&version_0,
&e
);
if ((r0==0) && (checkpoint_lsn_0.lsn <= MAX_LSN.lsn)) {
h0_acceptable = TRUE;
}
}
{
toku_off_t header_1_off = BLOCK_ALLOCATOR_HEADER_RESERVE;
r1 = deserialize_ft_from_fd_into_rbuf(
fd,
header_1_off,
&rb_1,
&checkpoint_count_1,
&checkpoint_lsn_1,
&version_1,
&e
);
if ((r1==0) && (checkpoint_lsn_1.lsn <= MAX_LSN.lsn)) {
h1_acceptable = TRUE;
}
}
// If either header is too new, the dictionary is unreadable
if (r0 == TOKUDB_DICTIONARY_TOO_NEW || r1 == TOKUDB_DICTIONARY_TOO_NEW) {
fprintf(stderr, "This dictionary was created with too new a version of TokuDB. Aborting.\n");
abort();
}
if (h0_acceptable) {
printf("Found dictionary header 1 with LSN %"PRIu64"\n", checkpoint_lsn_0.lsn);
e = deserialize_ft_versioned(fd, &rb_0, h1p, version_0);
if (e != DS_OK) {
printf("---Header Error----\n");
}
} else {
*h1p = NULL;
}
if (h1_acceptable) {
printf("Found dictionary header 2 with LSN %"PRIu64"\n", checkpoint_lsn_1.lsn);
e = deserialize_ft_versioned(fd, &rb_1, h2p, version_1);
if (e != DS_OK) {
printf("---Header Error----\n");
}
} else {
*h2p = NULL;
}
if (rb_0.buf) toku_free(rb_0.buf);
if (rb_1.buf) toku_free(rb_1.buf);
}
// Helper struct for tracking block checking progress.
struct check_block_table_extra {
int fd;
int64_t blocks_done, blocks_failed, total_blocks;
struct ft *h;
};
// Read, decompress, and check the given block.
static int
check_block(BLOCKNUM blocknum, int64_t UU(blocksize), int64_t UU(address), void *extra)
{
int r = 0;
int failure = 0;
struct check_block_table_extra *cbte = extra;
int fd = cbte->fd;
FT ft = cbte->h;
struct verify_block_extra be;
init_verify_block_extra(blocknum, &be);
// Let's read the block off of disk and fill a buffer with that
// block.
struct rbuf rb = RBUF_INITIALIZER;
r = read_block_from_fd_into_rbuf(fd, blocknum, ft, &rb);
if (r != 0) {
// This is impossible without setting the panic member in
// the ft, let's just pretend that it is not and exit.
printf(" Read block failed.\n");
failure++;
}
// Allocate the node.
FTNODE node = toku_xmalloc(sizeof(*node));
initialize_ftnode(node, blocknum);
r = read_and_check_magic(&rb);
if (r == DB_BADFORMAT) {
printf(" Magic failed.\n");
failure++;
}
r = read_and_check_version(node, &rb);
if (r != 0) {
printf(" Version check failed.\n");
failure++;
}
int version = node->layout_version_read_from_disk;
////////////////////////////
// UPGRADE FORK GOES HERE //
////////////////////////////
read_node_info(node, &rb, version);
FTNODE_DISK_DATA ndd;
allocate_and_read_partition_offsets(node, &rb, &ndd);
r = check_node_info_checksum(&rb);
if (r == TOKUDB_BAD_CHECKSUM) {
printf(" Node info checksum failed.\n");
failure++;
}
// Get the partition info sub block.
struct sub_block sb;
sub_block_init(&sb);
r = read_compressed_sub_block(&rb, &sb);
if (r != 0) {
printf(" Partition info checksum failed.\n");
failure++;
}
just_decompress_sub_block(&sb);
// If we want to inspect the data inside the partitions, we need
// to call setup_ftnode_partitions(node, bfe, true)
// <CER> TODO: Create function for this.
// Using the node info, decompress all the keys and pivots to
// detect any corruptions.
for (int i = 0; i < node->n_children; ++i) {
u_int32_t curr_offset = BP_START(ndd,i);
u_int32_t curr_size = BP_SIZE(ndd,i);
struct rbuf curr_rbuf = {.buf = NULL, .size = 0, .ndone = 0};
rbuf_init(&curr_rbuf, rb.buf + curr_offset, curr_size);
struct sub_block curr_sb;
sub_block_init(&curr_sb);
r = read_compressed_sub_block(&rb, &sb);
if (r != 0) {
printf(" Compressed child partition %d checksum failed.\n", i);
failure++;
}
just_decompress_sub_block(&sb);
r = verify_ftnode_sub_block(&sb);
if (r != 0) {
printf(" Uncompressed child partition %d checksum failed.\n", i);
failure++;
}
// <CER> If needed, we can print row and/or pivot info at this
// point.
}
// Cleanup and error incrementing.
if (failure) {
cbte->blocks_failed++;
}
cbte->blocks_done++;
if (node) {
toku_free(node);
}
// Print the status of this block to the console.
report(cbte->blocks_done, cbte->blocks_failed, cbte->total_blocks);
// We need to ALWAYS return 0 if we want to continue iterating
// through the nodes in the file.
r = 0;
return r;
}
// This calls toku_blocktable_iterate on the given block table.
// Passes our check_block() function to be called as we iterate over
// the block table. This will print any interesting failures and
// update us on our progress.
static void
check_block_table(int fd, BLOCK_TABLE bt, struct ft *h)
{
int64_t num_blocks = toku_block_get_blocks_in_use_unlocked(bt);
printf("Starting verification of checkpoint containing");
printf(" %"PRId64" blocks.\n", num_blocks);
fflush(stdout);
struct check_block_table_extra extra = { .fd = fd,
.blocks_done = 0,
.blocks_failed = 0,
.total_blocks = num_blocks,
.h = h };
int r = 0;
r = toku_blocktable_iterate(bt,
TRANSLATION_CURRENT,
check_block,
&extra,
true,
true);
if (r != 0) {
// We can print more information here if necessary.
}
assert(extra.blocks_done == extra.total_blocks);
printf("Finished verification. ");
printf(" %"PRId64" blocks checked,", extra.blocks_done);
printf(" %"PRId64" bad block(s) detected\n", extra.blocks_failed);
fflush(stdout);
}
// Validate arguments and print usage if number of arguments is
// incorrect.
static int
check_args(int argc)
{
int r = 0;
if (argc < 3 || argc > 4) {
printf("ERROR: ");
printf("Too few arguments.\n");
printf("USAGE:\n");
printf(" verify_block_checksum");
printf(" DICTIONARY_FILE OUTPUT_FILE [PERCENTAGE]\n");
printf(" [PERCENTAGE] is optional.\n");
r = 1;
}
return r;
}
// Main diver for verify_block_checksum.
int
main(int argc, char *argv[])
{
// open the file
int r = 0;
int dictfd;
char *dictfname, *outfname;
r = check_args(argc);
if (r) {
goto exit;
}
assert(argc == 3 || argc == 4);
dictfname = argv[1];
outfname = argv[2];
if (argc == 4) {
errno = 0;
pct = strtod(argv[3], NULL);
assert_zero(errno);
assert(pct > 0.0 && pct <= 100.0);
}
// Open the file as read-only.
dictfd = open(dictfname, O_RDONLY | O_BINARY, S_IRWXU | S_IRWXG | S_IRWXO);
if (dictfd < 0) {
perror(dictfname);
fflush(stderr);
abort();
}
outf = fopen(outfname, "w");
if (!outf) {
perror(outfname);
fflush(stderr);
abort();
}
// body of toku_ft_serialize_init();
num_cores = toku_os_get_number_active_processors();
r = toku_thread_pool_create(&ft_pool, num_cores); lazy_assert_zero(r);
assert_zero(r);
// deserialize the header(s)
struct ft *h1, *h2;
deserialize_headers(dictfd, &h1, &h2);
// walk over the block table and check blocks
if (h1) {
printf("Checking dictionary from header 1.\n");
check_block_table(dictfd, h1->blocktable, h1);
}
if (h2) {
printf("Checking dictionary from header 2.\n");
check_block_table(dictfd, h2->blocktable, h2);
}
if (h1 == NULL && h2 == NULL) {
printf("Both headers have a corruption and could not be used.\n");
}
toku_thread_pool_destroy(&ft_pool);
exit:
return 0;
}