2007-07-13 19:37:47 +00:00
# define _XOPEN_SOURCE 500
//#include "pma.h"
# include "brt-internal.h"
2007-07-24 01:32:03 +00:00
# include "key.h"
2007-08-21 23:32:17 +00:00
# include "rbuf.h"
# include "wbuf.h"
2007-07-13 19:37:47 +00:00
# include <assert.h>
# include <unistd.h>
# include <stdio.h>
# include <arpa/inet.h>
2007-11-14 17:58:38 +00:00
2007-11-19 23:54:17 +00:00
static const int brtnode_header_overhead = ( 8 + // magic "tokunode" or "tokuleaf"
8 + // checkpoint number
4 + // block size
4 + // data size
4 + // height
4 + // random for fingerprint
4 + // localfingerprint
4 ) ; // crc32 at the end
2007-11-14 17:58:38 +00:00
2007-11-19 23:54:17 +00:00
static unsigned int toku_serialize_brtnode_size_slow ( BRTNODE node ) {
2007-11-14 17:58:38 +00:00
unsigned int size = brtnode_header_overhead ;
2007-07-13 19:37:47 +00:00
if ( node - > height > 0 ) {
unsigned int hsize = 0 ;
unsigned int csize = 0 ;
int i ;
size + = 4 ; /* n_children */
2007-11-14 17:58:38 +00:00
size + = 4 ; /* subtree fingerprint. */
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
size + = 4 ;
2007-11-17 13:23:32 +00:00
size + = 1 ; /* pivotflags */
2007-07-13 19:37:47 +00:00
csize + = node - > u . n . childkeylens [ i ] ;
}
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
2007-11-14 17:58:38 +00:00
size + = 8 ; // diskoff
size + = 4 ; // subsum
2007-07-13 19:37:47 +00:00
}
2007-11-14 17:58:38 +00:00
int n_hashtables = node - > u . n . n_children ;
2007-07-13 19:37:47 +00:00
size + = 4 ; /* n_entries */
2007-11-14 17:58:38 +00:00
assert ( 0 < = n_hashtables & & n_hashtables < TREE_FANOUT + 1 ) ;
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < n_hashtables ; i + + ) {
HASHTABLE_ITERATE ( node - > u . n . htables [ i ] ,
key __attribute__ ( ( __unused__ ) ) , keylen ,
data __attribute__ ( ( __unused__ ) ) , datalen ,
2007-09-06 21:36:45 +00:00
type __attribute__ ( ( __unused__ ) ) ,
( hsize + = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + keylen + datalen ) ) ;
2007-07-13 19:37:47 +00:00
}
assert ( hsize = = node - > u . n . n_bytes_in_hashtables ) ;
assert ( csize = = node - > u . n . totalchildkeylens ) ;
return size + hsize + csize ;
} else {
unsigned int hsize = 0 ;
PMA_ITERATE ( node - > u . l . buffer ,
key __attribute__ ( ( __unused__ ) ) , keylen ,
data __attribute__ ( ( __unused__ ) ) , datalen ,
2007-09-06 21:36:45 +00:00
( hsize + = KEY_VALUE_OVERHEAD + keylen + datalen ) ) ;
2007-07-13 19:37:47 +00:00
assert ( hsize = = node - > u . l . n_bytes_in_buffer ) ;
hsize + = 4 ; /* add n entries in buffer table. */
return size + hsize ;
}
}
2007-11-19 23:54:17 +00:00
unsigned int toku_serialize_brtnode_size ( BRTNODE node ) {
2007-11-14 17:58:38 +00:00
unsigned int result = brtnode_header_overhead ;
2007-07-13 19:37:47 +00:00
assert ( sizeof ( off_t ) = = 8 ) ;
if ( node - > height > 0 ) {
result + = 4 ; /* n_children */
2007-11-14 17:58:38 +00:00
result + = 4 ; /* subtree fingerpirnt */
2007-11-17 13:23:32 +00:00
result + = ( 4 + 1 ) * ( node - > u . n . n_children - 1 ) ; /* key lengths + pivotflags*/
2007-07-13 19:37:47 +00:00
result + = node - > u . n . totalchildkeylens ; /* the lengths of the pivot keys, without their key lengths. */
2007-11-14 17:58:38 +00:00
result + = ( 8 + 4 + 4 ) * ( node - > u . n . n_children ) ; /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
2007-07-13 19:37:47 +00:00
result + = node - > u . n . n_bytes_in_hashtables ;
} else {
result + = 4 ; /* n_entries in buffer table. */
result + = node - > u . l . n_bytes_in_buffer ;
if ( memory_check ) {
2007-11-19 23:54:17 +00:00
unsigned int slowresult = toku_serialize_brtnode_size_slow ( node ) ;
2007-07-13 19:37:47 +00:00
if ( result ! = slowresult ) printf ( " %s:%d result=%d slowresult=%d \n " , __FILE__ , __LINE__ , result , slowresult ) ;
assert ( result = = slowresult ) ;
}
}
return result ;
}
2007-11-19 23:54:17 +00:00
void toku_seralize_brtnode_to ( int fd , DISKOFF off , DISKOFF size , BRTNODE node ) {
2007-11-14 17:58:38 +00:00
//printf("%s:%d serializing\n", __FILE__, __LINE__);
2007-08-21 23:32:17 +00:00
struct wbuf w ;
2007-07-13 19:37:47 +00:00
int i ;
2007-11-19 23:54:17 +00:00
unsigned int calculated_size = toku_serialize_brtnode_size ( node ) ;
2007-10-16 21:02:53 +00:00
//char buf[size];
char * MALLOC_N ( size , buf ) ;
2007-07-13 19:37:47 +00:00
assert ( size > 0 ) ;
2007-09-28 17:11:22 +00:00
wbuf_init ( & w , buf , size ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d serializing %lld w height=%d p0=%p\n", __FILE__, __LINE__, off, node->height, node->mdicts[0]);
2007-11-14 17:58:38 +00:00
wbuf_literal_bytes ( & w , " toku " , 4 ) ;
if ( node - > height = = 0 ) wbuf_literal_bytes ( & w , " leaf " , 4 ) ;
else wbuf_literal_bytes ( & w , " node " , 4 ) ;
wbuf_int ( & w , node - > layout_version ) ;
wbuf_ulonglong ( & w , node - > lsn . lsn ) ;
//printf("%s:%d %lld.calculated_size=%d\n", __FILE__, __LINE__, off, calculated_size);
2007-07-13 19:37:47 +00:00
wbuf_int ( & w , calculated_size ) ;
wbuf_int ( & w , node - > height ) ;
2007-11-14 17:58:38 +00:00
//printf("%s:%d %lld rand=%08x sum=%08x height=%d\n", __FILE__, __LINE__, node->thisnodename, node->rand4fingerprint, node->subtree_fingerprint, node->height);
wbuf_int ( & w , node - > rand4fingerprint ) ;
wbuf_int ( & w , node - > local_fingerprint ) ;
//printf("%s:%d local_fingerprint=%8x\n", __FILE__, __LINE__, node->local_fingerprint);
2007-07-13 19:37:47 +00:00
//printf("%s:%d w.ndone=%d n_children=%d\n", __FILE__, __LINE__, w.ndone, node->n_children);
2007-11-14 17:58:38 +00:00
if ( node - > height > 0 ) {
// Local fingerprint is not actually stored while in main memory. Must calculate it.
// Subtract the child fingerprints from the subtree fingerprint to get the local fingerprint.
{
u_int32_t subtree_fingerprint = node - > local_fingerprint ;
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
subtree_fingerprint + = node - > u . n . child_subtree_fingerprints [ i ] ;
}
wbuf_int ( & w , subtree_fingerprint ) ;
}
2007-07-13 19:37:47 +00:00
wbuf_int ( & w , node - > u . n . n_children ) ;
2007-11-14 17:58:38 +00:00
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
wbuf_int ( & w , node - > u . n . child_subtree_fingerprints [ i ] ) ;
}
2007-07-13 19:37:47 +00:00
//printf("%s:%d w.ndone=%d\n", __FILE__, __LINE__, w.ndone);
2007-11-17 13:23:32 +00:00
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + )
wbuf_char ( & w , node - > u . n . pivotflags [ i ] ) ;
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
wbuf_bytes ( & w , node - > u . n . childkeys [ i ] , node - > u . n . childkeylens [ i ] ) ;
//printf("%s:%d w.ndone=%d (childkeylen[%d]=%d\n", __FILE__, __LINE__, w.ndone, i, node->childkeylens[i]);
}
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
wbuf_diskoff ( & w , node - > u . n . children [ i ] ) ;
//printf("%s:%d w.ndone=%d\n", __FILE__, __LINE__, w.ndone);
}
{
2007-07-24 01:32:03 +00:00
int n_hash_tables = node - > u . n . n_children ;
2007-11-14 17:58:38 +00:00
u_int32_t check_local_fingerprint = 0 ;
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < n_hash_tables ; i + + ) {
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
2007-07-24 15:08:05 +00:00
wbuf_int ( & w , toku_hashtable_n_entries ( node - > u . n . htables [ i ] ) ) ;
2007-09-06 21:36:45 +00:00
HASHTABLE_ITERATE ( node - > u . n . htables [ i ] , key , keylen , data , datalen , type ,
2007-11-14 17:58:38 +00:00
( {
wbuf_char ( & w , type ) ;
wbuf_bytes ( & w , key , keylen ) ;
wbuf_bytes ( & w , data , datalen ) ;
check_local_fingerprint + = node - > rand4fingerprint * toku_calccrc32_cmd ( type , key , keylen , data , datalen ) ;
} ) ) ;
2007-07-13 19:37:47 +00:00
}
2007-11-14 17:58:38 +00:00
//printf("%s:%d check_local_fingerprint=%8x\n", __FILE__, __LINE__, check_local_fingerprint);
assert ( check_local_fingerprint = = node - > local_fingerprint ) ;
2007-07-13 19:37:47 +00:00
}
} else {
2007-11-20 00:32:25 +00:00
//printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer));
wbuf_int ( & w , toku_pma_n_entries ( node - > u . l . buffer ) ) ;
2007-07-13 19:37:47 +00:00
PMA_ITERATE ( node - > u . l . buffer , key , keylen , data , datalen ,
( wbuf_bytes ( & w , key , keylen ) ,
wbuf_bytes ( & w , data , datalen ) ) ) ;
}
assert ( w . ndone < = w . size ) ;
2007-11-14 17:58:38 +00:00
# ifdef CRC_ATEND
wbuf_int ( & w , crc32 ( toku_null_crc , w . buf , w . ndone ) ) ;
# endif
# ifdef CRC_INCR
wbuf_int ( & w , w . crc32 ) ;
# endif
//write_now: printf("%s:%d Writing %d bytes\n", __FILE__, __LINE__, w.ndone);
2007-07-13 19:37:47 +00:00
{
ssize_t r = pwrite ( fd , w . buf , w . ndone , off ) ;
2007-07-31 21:23:00 +00:00
if ( r < 0 ) printf ( " r=%ld errno=%d \n " , ( long ) r , errno ) ;
2007-07-13 19:37:47 +00:00
assert ( ( size_t ) r = = w . ndone ) ;
}
//printf("%s:%d w.done=%d r=%d\n", __FILE__, __LINE__, w.ndone, r);
assert ( calculated_size = = w . ndone ) ;
//printf("%s:%d wrote %d bytes for %lld size=%lld\n", __FILE__, __LINE__, w.ndone, off, size);
assert ( w . ndone < = size ) ;
2007-10-16 21:02:53 +00:00
toku_free ( buf ) ;
2007-07-13 19:37:47 +00:00
}
2007-11-19 23:54:17 +00:00
int toku_deserialize_brtnode_from ( int fd , DISKOFF off , BRTNODE * brtnode , int flags , int nodesize ,
2007-11-17 13:23:32 +00:00
int ( * bt_compare ) ( DB * , const DBT * , const DBT * ) ,
int ( * dup_compare ) ( DB * , const DBT * , const DBT * ) ) {
2007-07-13 19:37:47 +00:00
TAGMALLOC ( BRTNODE , result ) ;
2007-08-21 23:32:17 +00:00
struct rbuf rc ;
2007-07-13 19:37:47 +00:00
int i ;
2007-11-14 17:58:38 +00:00
u_int32_t datasize ;
2007-07-13 19:37:47 +00:00
int r ;
if ( errno ! = 0 ) {
r = errno ;
2007-07-20 18:00:14 +00:00
if ( 0 ) { died0 : toku_free ( result ) ; }
2007-07-13 19:37:47 +00:00
return r ;
}
{
2007-11-14 17:58:38 +00:00
u_int32_t datasize_n ;
r = pread ( fd , & datasize_n , sizeof ( datasize_n ) , off + 8 + 4 + 8 ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d r=%d the datasize=%d\n", __FILE__, __LINE__, r, ntohl(datasize_n));
if ( r ! = sizeof ( datasize_n ) ) {
if ( r = = - 1 ) r = errno ;
else r = DB_BADFORMAT ;
goto died0 ;
}
datasize = ntohl ( datasize_n ) ;
if ( datasize < = 0 | | datasize > ( 1 < < 30 ) ) { r = DB_BADFORMAT ; goto died0 ; }
}
2007-07-20 18:00:14 +00:00
rc . buf = toku_malloc ( datasize ) ;
2007-11-14 17:58:38 +00:00
//printf("%s:%d errno=%d\n", __FILE__, __LINE__, errno);
2007-07-13 19:37:47 +00:00
if ( errno ! = 0 ) {
2007-07-20 18:00:14 +00:00
if ( 0 ) { died1 : toku_free ( rc . buf ) ; }
2007-07-13 19:37:47 +00:00
r = errno ;
goto died0 ;
}
rc . size = datasize ;
assert ( rc . size > 0 ) ;
rc . ndone = 0 ;
//printf("Deserializing %lld datasize=%d\n", off, datasize);
{
2007-11-14 17:58:38 +00:00
ssize_t rlen = pread ( fd , rc . buf , datasize , off ) ;
//printf("%s:%d pread->%d datasize=%d\n", __FILE__, __LINE__, r, datasize);
if ( ( size_t ) rlen ! = datasize ) {
//printf("%s:%d size messed up\n", __FILE__, __LINE__);
r = errno ;
goto died1 ;
}
2007-07-13 19:37:47 +00:00
//printf("Got %d %d %d %d\n", rc.buf[0], rc.buf[1], rc.buf[2], rc.buf[3]);
}
2007-11-14 17:58:38 +00:00
{
bytevec tmp ;
rbuf_literal_bytes ( & rc , & tmp , 8 ) ;
if ( memcmp ( tmp , " tokuleaf " , 8 ) ! = 0
& & memcmp ( tmp , " tokunode " , 8 ) ! = 0 ) {
r = DB_BADFORMAT ;
goto died1 ;
}
}
result - > layout_version = rbuf_int ( & rc ) ;
if ( result - > layout_version ! = 0 ) {
r = DB_BADFORMAT ;
goto died1 ;
}
result - > lsn . lsn = rbuf_ulonglong ( & rc ) ;
2007-07-13 19:37:47 +00:00
{
unsigned int stored_size = rbuf_int ( & rc ) ;
if ( stored_size ! = datasize ) { r = DB_BADFORMAT ; goto died1 ; }
}
result - > nodesize = nodesize ; // How to compute the nodesize?
result - > thisnodename = off ;
result - > height = rbuf_int ( & rc ) ;
2007-11-14 17:58:38 +00:00
result - > rand4fingerprint = rbuf_int ( & rc ) ;
result - > local_fingerprint = rbuf_int ( & rc ) ;
2007-09-18 16:09:55 +00:00
result - > dirty = 0 ;
2007-07-13 19:37:47 +00:00
//printf("height==%d\n", result->height);
if ( result - > height > 0 ) {
result - > u . n . totalchildkeylens = 0 ;
2007-08-23 18:07:18 +00:00
for ( i = 0 ; i < TREE_FANOUT ; i + + ) {
2007-11-14 17:58:38 +00:00
result - > u . n . child_subtree_fingerprints [ i ] = 0 ;
2007-08-23 18:07:18 +00:00
result - > u . n . childkeys [ i ] = 0 ;
result - > u . n . childkeylens [ i ] = 0 ;
}
for ( i = 0 ; i < TREE_FANOUT + 1 ; i + + ) {
result - > u . n . children [ i ] = 0 ;
result - > u . n . htables [ i ] = 0 ;
result - > u . n . n_bytes_in_hashtable [ i ] = 0 ;
result - > u . n . n_cursors [ i ] = 0 ;
}
2007-11-14 17:58:38 +00:00
u_int32_t subtree_fingerprint = rbuf_int ( & rc ) ;
u_int32_t check_subtree_fingerprint = 0 ;
2007-07-13 19:37:47 +00:00
result - > u . n . n_children = rbuf_int ( & rc ) ;
//printf("n_children=%d\n", result->n_children);
assert ( result - > u . n . n_children > = 0 & & result - > u . n . n_children < = TREE_FANOUT ) ;
2007-11-14 17:58:38 +00:00
for ( i = 0 ; i < result - > u . n . n_children ; i + + ) {
u_int32_t childfp = rbuf_int ( & rc ) ;
result - > u . n . child_subtree_fingerprints [ i ] = childfp ;
check_subtree_fingerprint + = childfp ;
}
2007-11-17 13:23:32 +00:00
for ( i = 0 ; i < result - > u . n . n_children - 1 ; i + + )
result - > u . n . pivotflags [ i ] = rbuf_char ( & rc ) ;
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < result - > u . n . n_children - 1 ; i + + ) {
bytevec childkeyptr ;
rbuf_bytes ( & rc , & childkeyptr , & result - > u . n . childkeylens [ i ] ) ; /* Returns a pointer into the rbuf. */
result - > u . n . childkeys [ i ] = memdup ( childkeyptr , result - > u . n . childkeylens [ i ] ) ;
//printf(" key %d length=%d data=%s\n", i, result->childkeylens[i], result->childkeys[i]);
result - > u . n . totalchildkeylens + = result - > u . n . childkeylens [ i ] ;
}
for ( i = 0 ; i < result - > u . n . n_children ; i + + ) {
result - > u . n . children [ i ] = rbuf_diskoff ( & rc ) ;
//printf("Child %d at %lld\n", i, result->children[i]);
}
for ( i = 0 ; i < TREE_FANOUT + 1 ; i + + ) {
result - > u . n . n_bytes_in_hashtable [ i ] = 0 ;
}
result - > u . n . n_bytes_in_hashtables = 0 ;
2007-07-24 01:32:03 +00:00
for ( i = 0 ; i < result - > u . n . n_children ; i + + ) {
2007-11-14 17:58:38 +00:00
r = toku_hashtable_create ( & result - > u . n . htables [ i ] ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
int j ;
2007-07-24 01:32:03 +00:00
if ( 0 ) { died_12 : j = result - > u . n . n_bytes_in_hashtables ; }
2007-07-24 15:08:05 +00:00
for ( j = 0 ; j < i ; j + + ) toku_hashtable_free ( & result - > u . n . htables [ j ] ) ;
2007-07-13 19:37:47 +00:00
goto died1 ;
}
}
{
2007-07-24 01:32:03 +00:00
int cnum ;
2007-11-14 17:58:38 +00:00
u_int32_t check_local_fingerprint = 0 ;
2007-07-24 01:32:03 +00:00
for ( cnum = 0 ; cnum < result - > u . n . n_children ; cnum + + ) {
int n_in_this_hash = rbuf_int ( & rc ) ;
//printf("%d in hash\n", n_in_hash);
for ( i = 0 ; i < n_in_this_hash ; i + + ) {
int diff ;
2007-09-06 21:36:45 +00:00
int type ;
2007-07-24 01:32:03 +00:00
bytevec key ; ITEMLEN keylen ;
bytevec val ; ITEMLEN vallen ;
2007-11-19 23:54:17 +00:00
toku_verify_counts ( result ) ;
2007-09-06 21:36:45 +00:00
type = rbuf_char ( & rc ) ;
2007-07-24 01:32:03 +00:00
rbuf_bytes ( & rc , & key , & keylen ) ; /* Returns a pointer into the rbuf. */
rbuf_bytes ( & rc , & val , & vallen ) ;
2007-11-14 17:58:38 +00:00
check_local_fingerprint + = result - > rand4fingerprint * toku_calccrc32_cmd ( type , key , keylen , val , vallen ) ;
2007-09-06 21:36:45 +00:00
//printf("Found %s,%s\n", (char*)key, (char*)val);
2007-07-24 01:32:03 +00:00
{
2007-11-14 17:58:38 +00:00
r = toku_hash_insert ( result - > u . n . htables [ cnum ] , key , keylen , val , vallen , type ) ; /* Copies the data into the hash table. */
2007-07-24 01:32:03 +00:00
if ( r ! = 0 ) { goto died_12 ; }
}
2007-09-06 21:36:45 +00:00
diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ;
2007-07-24 01:32:03 +00:00
result - > u . n . n_bytes_in_hashtables + = diff ;
result - > u . n . n_bytes_in_hashtable [ cnum ] + = diff ;
//printf("Inserted\n");
2007-07-13 19:37:47 +00:00
}
}
2007-11-14 17:58:38 +00:00
if ( check_local_fingerprint ! = result - > local_fingerprint ) {
fprintf ( stderr , " %s:%d local fingerprint is wrong (found %8x calcualted %8x \n " , __FILE__ , __LINE__ , result - > local_fingerprint , check_local_fingerprint ) ;
return DB_BADFORMAT ;
}
if ( check_subtree_fingerprint + check_local_fingerprint ! = subtree_fingerprint ) {
fprintf ( stderr , " %s:%d subtree fingerprint is wrong \n " , __FILE__ , __LINE__ ) ;
return DB_BADFORMAT ;
}
2007-07-13 19:37:47 +00:00
}
} else {
int n_in_buf = rbuf_int ( & rc ) ;
result - > u . l . n_bytes_in_buffer = 0 ;
2007-11-20 00:32:25 +00:00
r = toku_pma_create ( & result - > u . l . buffer , bt_compare , nodesize ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
2007-11-20 00:32:25 +00:00
if ( 0 ) { died_21 : toku_pma_free ( & result - > u . l . buffer ) ; }
2007-07-13 19:37:47 +00:00
goto died1 ;
}
2007-11-20 00:32:25 +00:00
toku_pma_set_dup_mode ( result - > u . l . buffer , flags ) ;
if ( flags & TOKU_DB_DUPSORT ) toku_pma_set_dup_compare ( result - > u . l . buffer , dup_compare ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d r PMA= %p\n", __FILE__, __LINE__, result->u.l.buffer);
2007-08-08 20:58:25 +00:00
# define BRT_USE_PMA_BULK_INSERT 1
# if BRT_USE_PMA_BULK_INSERT
{
DBT keys [ n_in_buf ] , vals [ n_in_buf ] ;
for ( i = 0 ; i < n_in_buf ; i + + ) {
bytevec key ; ITEMLEN keylen ;
bytevec val ; ITEMLEN vallen ;
2007-11-19 23:54:17 +00:00
toku_verify_counts ( result ) ;
2007-08-08 20:58:25 +00:00
rbuf_bytes ( & rc , & key , & keylen ) ; /* Returns a pointer into the rbuf. */
fill_dbt ( & keys [ i ] , key , keylen ) ;
rbuf_bytes ( & rc , & val , & vallen ) ;
fill_dbt ( & vals [ i ] , val , vallen ) ;
result - > u . l . n_bytes_in_buffer + = keylen + vallen + KEY_VALUE_OVERHEAD ;
}
if ( n_in_buf > 0 ) {
2007-11-14 17:58:38 +00:00
u_int32_t actual_sum = 0 ;
2007-11-20 00:32:25 +00:00
r = toku_pma_bulk_insert ( result - > u . l . buffer , keys , vals , n_in_buf , result - > rand4fingerprint , & actual_sum ) ;
2007-08-08 20:58:25 +00:00
if ( r ! = 0 ) goto died_21 ;
2007-11-14 17:58:38 +00:00
if ( actual_sum ! = result - > local_fingerprint ) {
//fprintf(stderr, "%s:%d Corrupted checksum stored=%08x rand=%08x actual=%08x height=%d n_keys=%d\n", __FILE__, __LINE__, result->rand4fingerprint, result->local_fingerprint, actual_sum, result->height, n_in_buf);
return DB_BADFORMAT ;
goto died_21 ;
} else {
//fprintf(stderr, "%s:%d Good checksum=%08x height=%d\n", __FILE__, __LINE__, actual_sum, result->height);
}
2007-08-08 20:58:25 +00:00
}
}
# else
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < n_in_buf ; i + + ) {
bytevec key ; ITEMLEN keylen ;
bytevec val ; ITEMLEN vallen ;
2007-11-19 23:54:17 +00:00
toku_verify_counts ( result ) ;
2007-07-13 19:37:47 +00:00
rbuf_bytes ( & rc , & key , & keylen ) ; /* Returns a pointer into the rbuf. */
rbuf_bytes ( & rc , & val , & vallen ) ;
{
2007-07-24 01:32:03 +00:00
DBT k , v ;
2007-11-20 00:32:25 +00:00
r = toku_pma_insert ( result - > u . l . buffer , fill_dbt ( & k , key , keylen ) , fill_dbt ( & v , val , vallen ) , 0 ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) goto died_21 ;
}
result - > u . l . n_bytes_in_buffer + = keylen + vallen + KEY_VALUE_OVERHEAD ;
}
2007-08-08 20:58:25 +00:00
# endif
2007-07-13 19:37:47 +00:00
}
2007-11-14 17:58:38 +00:00
{
unsigned int n_read_so_far = rc . ndone ;
if ( n_read_so_far + 4 ! = rc . size ) {
r = DB_BADFORMAT ; goto died_21 ;
}
uint32_t crc = toku_crc32 ( toku_null_crc , rc . buf , n_read_so_far ) ;
uint32_t storedcrc = rbuf_int ( & rc ) ;
if ( crc ! = storedcrc ) {
printf ( " Bad CRC \n " ) ;
assert ( 0 ) ; //this is wrong!!!
r = DB_BADFORMAT ;
goto died_21 ;
}
}
2007-07-13 19:37:47 +00:00
//printf("%s:%d Ok got %lld n_children=%d\n", __FILE__, __LINE__, result->thisnodename, result->n_children);
2007-07-20 18:00:14 +00:00
toku_free ( rc . buf ) ;
2007-07-13 19:37:47 +00:00
* brtnode = result ;
2007-11-19 23:54:17 +00:00
toku_verify_counts ( result ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
2007-11-19 23:54:17 +00:00
void toku_verify_counts ( BRTNODE node ) {
2007-07-24 02:36:00 +00:00
/*foo*/
2007-07-13 19:37:47 +00:00
if ( node - > height = = 0 ) {
assert ( node - > u . l . buffer ) ;
} else {
unsigned int sum = 0 ;
int i ;
for ( i = 0 ; i < node - > u . n . n_children ; i + + )
sum + = node - > u . n . n_bytes_in_hashtable [ i ] ;
2007-11-14 17:58:38 +00:00
// We don't rally care of the later hashtables have garbage in them. Valgrind would do a better job noticing if we leave it uninitialized.
// But for now the code always initializes the later tables so they are 0.
2007-07-13 19:37:47 +00:00
for ( ; i < TREE_FANOUT + 1 ; i + + ) {
assert ( node - > u . n . n_bytes_in_hashtable [ i ] = = 0 ) ;
2007-11-14 17:58:38 +00:00
}
2007-07-13 19:37:47 +00:00
assert ( sum = = node - > u . n . n_bytes_in_hashtables ) ;
}
}
2007-11-19 23:54:17 +00:00
int toku_serialize_brt_header_to ( int fd , struct brt_header * h ) {
2007-08-21 23:32:17 +00:00
struct wbuf w ;
2007-07-13 19:37:47 +00:00
int i ;
unsigned int size = 0 ; /* I don't want to mess around calculating it exactly. */
2007-11-14 17:58:38 +00:00
size + = 4 + 4 + 4 + 8 + 8 + 4 ; /* this size, flags, the tree's nodesize, freelist, unused_memory, nnamed_rootse. */
2007-07-13 19:37:47 +00:00
if ( h - > n_named_roots < 0 ) {
size + = 8 ;
} else {
for ( i = 0 ; i < h - > n_named_roots ; i + + ) {
size + = 12 + 1 + strlen ( h - > names [ i ] ) ;
}
}
2007-11-14 17:58:38 +00:00
wbuf_init ( & w , toku_malloc ( size ) , size ) ;
2007-07-13 19:37:47 +00:00
wbuf_int ( & w , size ) ;
2007-11-14 17:58:38 +00:00
wbuf_int ( & w , h - > flags ) ;
2007-07-13 19:37:47 +00:00
wbuf_int ( & w , h - > nodesize ) ;
wbuf_diskoff ( & w , h - > freelist ) ;
wbuf_diskoff ( & w , h - > unused_memory ) ;
wbuf_int ( & w , h - > n_named_roots ) ;
if ( h - > n_named_roots > 0 ) {
for ( i = 0 ; i < h - > n_named_roots ; i + + ) {
char * s = h - > names [ i ] ;
unsigned int l = 1 + strlen ( s ) ;
wbuf_diskoff ( & w , h - > roots [ i ] ) ;
wbuf_bytes ( & w , s , l ) ;
assert ( l > 0 & & s [ l - 1 ] = = 0 ) ;
}
} else {
wbuf_diskoff ( & w , h - > unnamed_root ) ;
}
assert ( w . ndone = = size ) ;
{
ssize_t r = pwrite ( fd , w . buf , w . ndone , 0 ) ;
2007-08-13 22:51:05 +00:00
if ( r < 0 ) perror ( " pwrite " ) ;
2007-07-13 19:37:47 +00:00
assert ( ( size_t ) r = = w . ndone ) ;
}
2007-07-20 18:00:14 +00:00
toku_free ( w . buf ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
2007-11-19 23:54:17 +00:00
int toku_deserialize_brtheader_from ( int fd , DISKOFF off , struct brt_header * * brth ) {
2007-10-17 22:10:47 +00:00
//printf("%s:%d calling MALLOC\n", __FILE__, __LINE__);
2007-07-13 19:37:47 +00:00
struct brt_header * MALLOC ( h ) ;
2007-08-21 23:32:17 +00:00
struct rbuf rc ;
2007-07-13 19:37:47 +00:00
int size ;
int sizeagain ;
assert ( off = = 0 ) ;
2007-10-17 22:10:47 +00:00
//printf("%s:%d malloced %p\n", __FILE__, __LINE__, h);
2007-07-13 19:37:47 +00:00
{
uint32_t size_n ;
ssize_t r = pread ( fd , & size_n , sizeof ( size_n ) , off ) ;
2007-07-20 18:00:14 +00:00
if ( r = = 0 ) { toku_free ( h ) ; return - 1 ; }
2007-07-13 19:37:47 +00:00
assert ( r = = sizeof ( size_n ) ) ;
size = ntohl ( size_n ) ;
}
2007-07-20 18:00:14 +00:00
rc . buf = toku_malloc ( size ) ;
2007-07-13 19:37:47 +00:00
rc . size = size ;
assert ( rc . size > 0 ) ;
rc . ndone = 0 ;
{
ssize_t r = pread ( fd , rc . buf , size , off ) ;
assert ( r = = size ) ;
}
h - > dirty = 0 ;
sizeagain = rbuf_int ( & rc ) ;
assert ( sizeagain = = size ) ;
2007-11-14 17:58:38 +00:00
h - > flags = rbuf_int ( & rc ) ;
2007-07-13 19:37:47 +00:00
h - > nodesize = rbuf_int ( & rc ) ;
h - > freelist = rbuf_diskoff ( & rc ) ;
h - > unused_memory = rbuf_diskoff ( & rc ) ;
h - > n_named_roots = rbuf_int ( & rc ) ;
if ( h - > n_named_roots > = 0 ) {
int i ;
MALLOC_N ( h - > n_named_roots , h - > roots ) ;
MALLOC_N ( h - > n_named_roots , h - > names ) ;
for ( i = 0 ; i < h - > n_named_roots ; i + + ) {
bytevec nameptr ;
unsigned int len ;
h - > roots [ i ] = rbuf_diskoff ( & rc ) ;
rbuf_bytes ( & rc , & nameptr , & len ) ;
assert ( strlen ( nameptr ) + 1 = = len ) ;
h - > names [ i ] = memdup ( nameptr , len ) ;
}
h - > unnamed_root = - 1 ;
} else {
h - > roots = 0 ;
h - > names = 0 ;
h - > unnamed_root = rbuf_diskoff ( & rc ) ;
}
assert ( rc . ndone = = rc . size ) ;
2007-07-20 18:00:14 +00:00
toku_free ( rc . buf ) ;
2007-07-13 19:37:47 +00:00
* brth = h ;
return 0 ;
}
2007-11-14 17:58:38 +00:00