Add some information needed for recovery. Changed the size of the serialization, and caused a new cursor bug to show up. Addresses #27.

git-svn-id: file:///svn/tokudb@927 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Bradley C. Kuszmaul 2007-12-04 22:18:21 +00:00
parent 3ca8f1e01d
commit accc92e4b0
5 changed files with 37 additions and 26 deletions

View file

@ -14,6 +14,7 @@
#endif
enum { TREE_FANOUT = BRT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { PMA_ITEM_OVERHEAD = 4 };
enum { BRT_CMD_OVERHEAD = 1 };
enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 };
@ -59,7 +60,7 @@ struct brtnode {
} n;
struct leaf {
PMA buffer;
unsigned int n_bytes_in_buffer;
unsigned int n_bytes_in_buffer; /* How many bytes to represent the PMA (including the per-key overheads, but not including the overheads for the node. */
} l;
} u;
};

View file

@ -61,8 +61,9 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) {
PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=KEY_VALUE_OVERHEAD+keylen+datalen));
(hsize+=PMA_ITEM_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* the PMA size */
hsize+=4; /* add n entries in buffer table. */
return size+hsize;
}
@ -80,7 +81,8 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
result+=node->u.n.n_bytes_in_hashtables;
} else {
result+=4; /* n_entries in buffer table. */
result+=(4 /* n_entries in buffer table. */
+4); /* the pma size */
result+=node->u.l.n_bytes_in_buffer;
if (toku_memory_check) {
unsigned int slowresult = toku_serialize_brtnode_size_slow(node);
@ -166,9 +168,14 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
} else {
//printf(" n_entries=%d\n", toku_pma_n_entries(node->u.l.buffer));
wbuf_int(&w, toku_pma_n_entries(node->u.l.buffer));
PMA_ITERATE(node->u.l.buffer, key, keylen, data, datalen,
(wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen)));
wbuf_int(&w, toku_pma_index_limit(node->u.l.buffer));
PMA_ITERATE_IDX(node->u.l.buffer, idx,
key, keylen, data, datalen,
({
wbuf_int(&w, idx);
wbuf_bytes(&w, key, keylen);
wbuf_bytes(&w, data, datalen);
}));
}
assert(w.ndone<=w.size);
#ifdef CRC_ATEND
@ -377,16 +384,17 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
#if BRT_USE_PMA_BULK_INSERT
{
DBT keys[n_in_buf], vals[n_in_buf];
int index_limit __attribute__((__unused__))= rbuf_int(&rc);
for (i=0; i<n_in_buf; i++) {
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
toku_verify_counts(result);
int idx __attribute__((__unused__)) = rbuf_int(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
toku_fill_dbt(&keys[i], key, keylen);
rbuf_bytes(&rc, &val, &vallen);
toku_fill_dbt(&vals[i], val, vallen);
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
if (n_in_buf > 0) {
u_int32_t actual_sum = 0;
@ -413,7 +421,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
r = toku_pma_insert(result->u.l.buffer, toku_fill_dbt(&k, key, keylen), toku_fill_dbt(&v, val, vallen), 0);
if (r!=0) goto died_21;
}
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
#endif
}

View file

@ -565,7 +565,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val;
int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize;
int to_child=toku_serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD <= child->nodesize;
if (toku_brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) {
@ -944,7 +944,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
if (replaced_v_size>=0) {
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
} else {
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD + PMA_ITEM_OVERHEAD;
}
node->dirty = 1;
@ -2912,7 +2912,7 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags __attribute__((__unused_
int kvsize;
r = toku_pma_cursor_delete_under(cursor->pmacurs, &kvsize);
if (r == 0) {
node->u.l.n_bytes_in_buffer -= KEY_VALUE_OVERHEAD + kvsize;
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kvsize;
node->dirty = 1;
}
} else

View file

@ -1083,7 +1083,7 @@ static int pma_delete_dup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *finge
struct kv_pair *kv = pma->pairs[righthere];
if (kv_pair_valid(kv)) {
/* mark the pair as deleted */
*deleted_size += KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*deleted_size += PMA_ITEM_OVERHEAD+ KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[righthere] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, righthere) == 0) {
@ -1109,7 +1109,7 @@ static int pma_delete_nodup (PMA pma, DBT *k, u_int32_t rand4sem, u_int32_t *fin
if (0) printf("%s:%d l=%d r=%d\n", __FILE__, __LINE__, idx, DB_NOTFOUND);
return DB_NOTFOUND;
}
*deleted_size = KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*deleted_size = PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + kv_pair_keylen(kv) + kv_pair_vallen(kv);
*fingerprint -= rand4sem*toku_calccrc32_kvpair (kv_pair_key_const(kv), kv_pair_keylen(kv), kv_pair_val_const(kv), kv_pair_vallen(kv));
pma->pairs[idx] = kv_pair_set_deleted(kv);
if (__pma_count_cursor_refs(pma, idx) == 0)
@ -1430,14 +1430,14 @@ int toku_pma_split(TOKUTXN txn, FILENUM filenum,
/* debug check the kv length sum */
sumlen = 0;
for (i=0; i<npairs; i++)
sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD;
sumlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
if (origpma_size)
assert(*(int *)origpma_size == sumlen);
runlen = 0;
for (i=0; i<npairs;) {
runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + KEY_VALUE_OVERHEAD;
runlen += kv_pair_keylen(pairs[i].pair) + kv_pair_vallen(pairs[i].pair) + PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD;
i++;
if (2*runlen >= sumlen)
break;

View file

@ -122,7 +122,7 @@ int toku_pma_cursor_delete_under(PMA_CURSOR c, int *kvsize);
int toku_pma_random_pick(PMA, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
int toku_pma_index_limit(PMA);
int toku_pma_index_limit(PMA); // How many slots are in the PMA right now?
int toku_pmanode_valid(PMA,int);
bytevec toku_pmanode_key(PMA,int);
ITEMLEN toku_pmanode_keylen(PMA,int);
@ -131,16 +131,18 @@ ITEMLEN toku_pmanode_vallen(PMA,int);
void toku_pma_iterate (PMA, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*);
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int __i; \
for (__i=0; __i<toku_pma_index_limit(table); __i++) { \
if (toku_pmanode_valid(table,__i)) { \
bytevec keyvar = toku_pmanode_key(table,__i); \
ITEMLEN keylenvar = toku_pmanode_keylen(table,__i); \
bytevec datavar = toku_pmanode_val(table, __i); \
ITEMLEN datalenvar = toku_pmanode_vallen(table, __i); \
#define PMA_ITERATE_IDX(table,idx,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int idx; \
for (idx=0; idx<toku_pma_index_limit(table); idx++) { \
if (toku_pmanode_valid(table,idx)) { \
bytevec keyvar = toku_pmanode_key(table,idx); \
ITEMLEN keylenvar = toku_pmanode_keylen(table,idx); \
bytevec datavar = toku_pmanode_val(table, idx); \
ITEMLEN datalenvar = toku_pmanode_vallen(table, idx); \
body; \
} } })
} } })
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) PMA_ITERATE_IDX(table, __i, keyvar, keylenvar, datavar, datalenvar, body)
void toku_pma_verify_fingerprint (PMA pma, u_int32_t rand4fingerprint, u_int32_t fingerprint);