Add XID (Addresses #242), and also fix the problem where deletes were sneaking around inserts (Fixes #332.)

git-svn-id: file:///svn/tokudb@2098 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Bradley C. Kuszmaul 2008-02-05 18:25:23 +00:00
parent 54fe4c77d7
commit eeaef7284a
12 changed files with 126 additions and 117 deletions

View file

@ -8,7 +8,7 @@
# GCOV_FLAGS = -fprofile-arcs -ftest-coverage
# PROF_FLAGS = -pg
OPTFLAGS = -O2
# OPTFLAGS = -O2
ifeq ($(VERBOSE),2)
VERBVERBOSE=-v

View file

@ -16,7 +16,9 @@
enum { TREE_FANOUT = BRT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { PMA_ITEM_OVERHEAD = 4 };
enum { BRT_CMD_OVERHEAD = 1 };
enum { BRT_CMD_OVERHEAD = (1 // the type
+ 8) // the xid
};
enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 };
struct nodeheader_in_file {
@ -56,7 +58,7 @@ struct brtnode {
// When we checkpoint: Create a checkpoint record, and cause every dirty node to be written to disk. The new checkpoint record is *not* incorporated into the disk_lsn of the written nodes.
// While we are checkpointing, someone may modify a dirty node that has not yet been written. In that case, when we unpin the node, we make the new copy (because the disk_lsn<checkpoint_lsn), just as we would usually.
//
int layout_version; // What version of the data structure?
int layout_version; // What version of the data structure? (version 2 adds the xid to the brt cmds)
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
u_int32_t rand4fingerprint;
u_int32_t local_fingerprint; /* For leaves this is everything in the buffer. For nonleaves, this is everything in the buffers, but does not include child subtree fingerprints. */
@ -159,6 +161,7 @@ enum brt_cmd_type {
/* tree commands */
struct brt_cmd {
enum brt_cmd_type type;
TXNID xid;
union {
/* insert or delete */
struct brt_cmd_insert_delete {
@ -185,8 +188,8 @@ extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt);
static const BRTNODE null_brtnode=0;
extern u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen);
extern u_int32_t toku_calccrc32_cmd (int type, const void *key, int keylen, const void *val, int vallen);
extern u_int32_t toku_calccrc32_cmdstruct (BRT_CMD_S *cmd);
extern u_int32_t toku_calccrc32_cmd (int type, TXNID xid, const void *key, int keylen, const void *val, int vallen);
extern u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd);
// How long is the pivot key?
unsigned int toku_brt_pivot_key_len (BRT, struct kv_pair *); // Given the tree

View file

@ -26,7 +26,7 @@ static void test_serialize(void) {
sn.thisnodename = sn.nodesize*20;
sn.disk_lsn.lsn = 789;
sn.log_lsn.lsn = 123456;
sn.layout_version = 1;
sn.layout_version = 2;
sn.height = 1;
sn.rand4fingerprint = randval;
sn.local_fingerprint = 0;
@ -40,9 +40,9 @@ static void test_serialize(void) {
BNC_SUBTREE_FINGERPRINT(&sn, 1) = random();
r = toku_fifo_create(&BNC_BUFFER(&sn,0)); assert(r==0);
r = toku_fifo_create(&BNC_BUFFER(&sn,1)); assert(r==0);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "a", 2, "aval", 5, BRT_NONE); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, "a", 2, "aval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "b", 2, "bval", 5, BRT_NONE); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, "b", 2, "bval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,1), "x", 2, "xval", 5, BRT_NONE); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, "x", 2, "xval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "a", 2, "aval", 5, BRT_NONE, (TXNID)0); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)0, "a", 2, "aval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "b", 2, "bval", 5, BRT_NONE, (TXNID)123); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)123, "b", 2, "bval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,1), "x", 2, "xval", 5, BRT_NONE, (TXNID)234); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)234, "x", 2, "xval", 5);
BNC_NBYTESINBUF(&sn, 0) = 2*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
BNC_NBYTESINBUF(&sn, 1) = 1*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
{
@ -59,7 +59,7 @@ static void test_serialize(void) {
assert(dn->thisnodename==nodesize*20);
assert(dn->disk_lsn.lsn==123456);
assert(dn->layout_version ==1);
assert(dn->layout_version ==2);
assert(dn->height == 1);
assert(dn->rand4fingerprint==randval);
assert(dn->u.n.n_children==2);

View file

@ -49,7 +49,7 @@ static unsigned int toku_serialize_brtnode_size_slow(BRTNODE node) {
FIFO_ITERATE(BNC_BUFFER(node,i),
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
type __attribute__((__unused__)),
type __attribute__((__unused__)), xid __attribute__((__unused__)),
(hsize+=BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
}
assert(hsize==node->u.n.n_bytes_in_buffers);
@ -154,12 +154,13 @@ void toku_serialize_brtnode_to(int fd, DISKOFF off, DISKOFF size, BRTNODE node)
for (i=0; i< n_buffers; i++) {
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
wbuf_int(&w, toku_fifo_n_entries(BNC_BUFFER(node,i)));
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type,
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({
wbuf_char(&w, type);
wbuf_TXNID(&w, xid);
wbuf_bytes(&w, key, keylen);
wbuf_bytes(&w, data, datalen);
check_local_fingerprint+=node->rand4fingerprint*toku_calccrc32_cmd(type, key, keylen, data, datalen);
check_local_fingerprint+=node->rand4fingerprint*toku_calccrc32_cmd(type, xid, key, keylen, data, datalen);
}));
}
//printf("%s:%d check_local_fingerprint=%8x\n", __FILE__, __LINE__, check_local_fingerprint);
@ -257,7 +258,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
}
}
result->layout_version = rbuf_int(&rc);
if (result->layout_version!=1) {
if (result->layout_version!=2) {
r=DB_BADFORMAT;
goto died1;
}
@ -337,17 +338,17 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode, int fl
//printf("%d in hash\n", n_in_hash);
for (i=0; i<n_in_this_hash; i++) {
int diff;
int type;
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
toku_verify_counts(result);
type = rbuf_char(&rc);
int type = rbuf_char(&rc);
TXNID xid = rbuf_ulonglong(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen);
check_local_fingerprint += result->rand4fingerprint * toku_calccrc32_cmd(type, key, keylen, val, vallen);
check_local_fingerprint += result->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, val, vallen);
//printf("Found %s,%s\n", (char*)key, (char*)val);
{
r=toku_fifo_enq(BNC_BUFFER(result, cnum), key, keylen, val, vallen, type); /* Copies the data into the hash table. */
r=toku_fifo_enq(BNC_BUFFER(result, cnum), key, keylen, val, vallen, type, xid); /* Copies the data into the hash table. */
if (r!=0) { goto died_12; }
}
diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;

View file

@ -2287,6 +2287,7 @@ static void test_brt_delete() {
test_brt_delete_cursor_first(100); toku_memory_check_all_free();
test_brt_delete_cursor_first(500); toku_memory_check_all_free();
test_brt_delete_cursor_first(10000); toku_memory_check_all_free();
test_insert_delete_lookup(2); toku_memory_check_all_free();
test_insert_delete_lookup(512); toku_memory_check_all_free();
}
@ -2831,6 +2832,7 @@ static void brt_blackbox_test (void) {
int main (int argc , const char *argv[]) {
default_parse_args(argc, argv);
brt_blackbox_test();
toku_malloc_cleanup();
if (verbose) printf("test ok\n");

View file

@ -24,9 +24,9 @@ static void verify_local_fingerprint (BRTNODE node) {
int i;
if (node->height>0) {
for (i=0; i<node->u.n.n_children; i++)
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type,
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, key, keylen, data, datalen);
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, data, datalen);
}));
assert(fp==node->local_fingerprint);
} else {
@ -69,6 +69,7 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
bytevec data __attribute__((__unused__)),
unsigned int datalen __attribute__((__unused__)),
int type __attribute__((__unused__)),
TXNID xid __attribute__((__unused__)),
void *ignore __attribute__((__unused__))) {
if (thislorange) assert(toku_keycompare(thislorange,thislolen,key,keylen)<0);
if (thishirange && toku_keycompare(key,keylen,thishirange,thishilen)>0) {

View file

@ -24,6 +24,7 @@
*
*/
#include <arpa/inet.h>
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
@ -254,7 +255,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height)
n->thisnodename = nodename;
n->disk_lsn.lsn = 0; // a new one can always be 0.
n->log_lsn = n->disk_lsn;
n->layout_version = 1;
n->layout_version = 2;
n->height = height;
n->rand4fingerprint = random();
n->local_fingerprint = 0;
@ -308,11 +309,11 @@ static void create_new_brtnode (BRT t, BRTNODE *result, int height, TOKUTXN txn)
toku_update_brtnode_lsn(n, txn);
}
static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v, int type) {
static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v, int type, TXNID xid) {
unsigned int n_bytes_added = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + k->size + v->size;
int r = toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type);
int r = toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type, xid);
if (r!=0) return r;
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_cmd(type, k->data, k->size, v->data, v->size);
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_cmd(type, xid, k->data, k->size, v->data, v->size);
BNC_NBYTESINBUF(node,childnum) += n_bytes_added;
node->u.n.n_bytes_in_buffers += n_bytes_added;
node->dirty = 1;
@ -392,21 +393,22 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node
bytevec key, data;
unsigned int keylen, datalen;
int type;
int fr = toku_fifo_peek(from_htab, &key, &keylen, &data, &datalen, &type);
TXNID xid;
int fr = toku_fifo_peek(from_htab, &key, &keylen, &data, &datalen, &type, &xid);
if (fr!=0) break;
int n_bytes_moved = keylen+datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
BYTESTRING keybs = { .len = keylen, .data = (char*)key };
BYTESTRING databs = { .len = datalen, .data = (char*)data };
u_int32_t old_from_fingerprint = node->local_fingerprint;
u_int32_t old_to_fingerprint = B->local_fingerprint;
u_int32_t delta = toku_calccrc32_cmd(type, key, keylen, data, datalen);
u_int32_t delta = toku_calccrc32_cmd(type, xid, key, keylen, data, datalen);
u_int32_t new_from_fingerprint = old_from_fingerprint - node->rand4fingerprint*delta;
u_int32_t new_to_fingerprint = old_to_fingerprint + B->rand4fingerprint *delta;
if (r!=0) return r;
r = toku_log_brtdeq(txn, txnid, fnum, node->thisnodename, n_children_in_a, type, keybs, databs, old_from_fingerprint, new_from_fingerprint);
r = toku_log_brtdeq(txn, xid, fnum, node->thisnodename, n_children_in_a, type, keybs, databs, old_from_fingerprint, new_from_fingerprint);
if (r!=0) return r;
r = toku_log_brtenq(txn, txnid, fnum, B->thisnodename, targchild, type, keybs, databs, old_to_fingerprint, new_to_fingerprint);
r = toku_fifo_enq(to_htab, key, keylen, data, datalen, type);
r = toku_log_brtenq(txn, xid, fnum, B->thisnodename, targchild, type, keybs, databs, old_to_fingerprint, new_to_fingerprint);
r = toku_fifo_enq(to_htab, key, keylen, data, datalen, type, xid);
if (r!=0) return r;
toku_fifo_deq(from_htab);
// key and data will no longer be valid
@ -525,7 +527,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
if (r!=0) return r;
assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
} else {
r=insert_to_buffer_in_nonleaf(node, childnum_of_node, k, v, cmd->type);
r=insert_to_buffer_in_nonleaf(node, childnum_of_node, k, v, cmd->type, cmd->xid);
}
fixup_child_fingerprint(node, childnum_of_node, child, t, txn);
return r;
@ -576,7 +578,7 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
static int split_count=0;
/* NODE is a node with a child.
* childnum was split into two nodes childa, and childb.
* childnum was split into two nodes childa, and childb. childa is the same as the original child. childb is a new child.
* We must slide things around, & move things from the old table to the new tables.
* We also move things to the new children as much as we can without doing any pushdowns or splitting of the child.
* We must delete the old buffer (but the old child is already deleted.)
@ -623,8 +625,8 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
BNC_NBYTESINBUF(node, childnum+1) = 0;
// Remove all the cmds from the local fingerprint. Some may get added in again when we try to push to the child.
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type,
node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_cmd(type, skey, skeylen, sval, svallen));
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, xid,
node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_cmd(type, xid, skey, skeylen, sval, svallen));
// Slide the keys over
{
@ -653,40 +655,44 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
node->u.n.n_bytes_in_buffers -= old_count; /* By default, they are all removed. We might add them back in. */
/* Keep pushing to the children, but not if the children would require a pushdown */
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, ({
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, xid, ({
DBT skd, svd;
toku_fill_dbt(&skd, skey, skeylen);
toku_fill_dbt(&svd, sval, svallen);
BRT_CMD_S brtcmd;
brtcmd.type = type; brtcmd.u.id.key = &skd; brtcmd.u.id.val = &svd;
BRT_CMD_S brtcmd = { type, xid, .u.id= {toku_fill_dbt(&skd, skey, skeylen),
toku_fill_dbt(&svd, sval, svallen)} };
//verify_local_fingerprint_nonleaf(childa); verify_local_fingerprint_nonleaf(childb);
int tochildnum = childnum;
BRTNODE tochild = childa;
if (type == BRT_INSERT || type == BRT_DELETE_BOTH) {
int cmp = brt_compare_pivot(t, &skd, &svd, childsplitk->data);
if (cmp > 0) {
tochildnum = childnum+1; tochild = childb;
}
}
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, tochild, &brtcmd, tochildnum, txn);
//verify_local_fingerprint_nonleaf(childa); verify_local_fingerprint_nonleaf(childb);
if (type == BRT_DELETE) {
int r2 = push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &brtcmd, childnum+1, txn);
//verify_local_fingerprint_nonleaf(childa); verify_local_fingerprint_nonleaf(childb);
if (r2!=0) {
// In this case we must put things from old_h into the new buffers.
// This code is wrong, so I'll abort.
abort();
return r2;
int tochildnum;
BRTNODE tochild;
switch (type) {
case BRT_INSERT:
case BRT_DELETE_BOTH:
case BRT_DELETE:
//case BRT_DELETE:
{
int cmp = brt_compare_pivot(t, &skd, &svd, childsplitk->data);
if (cmp > 0) {
tochildnum = childnum+1; tochild = childb;
} else {
tochildnum = childnum; tochild = childa;
}
}
}
if (r!=0) {
// In this case we must put things from old_h into the new buffers.
// This code is wrong, so I'll abort.
abort();
return r;
goto ok;
case BRT_NONE:
// Don't have to do anything in this case, can just drop the command
goto ok;
}
}));
printf("Bad type %d\n", type); // Don't use default: because I want a compiler warning if I forget a enum case, and I want a runtime error if the type isn't one of the expected ones.
assert(0);
ok:
// If we already have something in the buffer, we must add the new command to the buffer so that commands don't get out of order.
if (toku_fifo_n_entries(BNC_BUFFER(node,tochildnum))==0) {
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, tochild, &brtcmd, tochildnum, txn);
} else {
r=insert_to_buffer_in_nonleaf(node, tochildnum, &skd, &svd, type, xid);
}
//verify_local_fingerprint_nonleaf(childa); verify_local_fingerprint_nonleaf(childb);
if (r!=0) printf("r=%d\n", r);
assert(r==0);
}));
toku_fifo_free(&old_h);
@ -769,17 +775,14 @@ static int push_some_brt_cmds_down (BRT t, BRTNODE node, int childnum,
//printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, BNC_NBYTESINBUF(node, childnum));
assert(toku_fifo_n_entries(BNC_BUFFER(node,childnum))>0);
int type;
while(0==toku_fifo_peek(BNC_BUFFER(node,childnum), &key, &keylen, &val, &vallen, &type)) {
TXNID xid;
while(0==toku_fifo_peek(BNC_BUFFER(node,childnum), &key, &keylen, &val, &vallen, &type, &xid)) {
int child_did_split=0; BRTNODE childa, childb;
DBT hk,hv;
DBT childsplitk;
BRT_CMD_S brtcmd;
toku_fill_dbt(&hk, key, keylen);
toku_fill_dbt(&hv, val, vallen);
brtcmd.type = type;
brtcmd.u.id.key = &hk;
brtcmd.u.id.val = &hv;
BRT_CMD_S brtcmd = { type, xid, .u.id= {toku_fill_dbt(&hk, key, keylen),
toku_fill_dbt(&hv, val, vallen)} };
//printf("%s:%d random_picked\n", __FILE__, __LINE__);
toku_init_dbt(&childsplitk);
@ -792,7 +795,7 @@ static int push_some_brt_cmds_down (BRT t, BRTNODE node, int childnum,
if (0){
unsigned int sum=0;
FIFO_ITERATE(BNC_BUFFER(node,childnum), subhk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, subtype __attribute__((__unused__)),
FIFO_ITERATE(BNC_BUFFER(node,childnum), subhk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, subtype __attribute__((__unused__)), subxid __attribute__((__unused__)),
sum+=hkl+hdl+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD);
printf("%s:%d sum=%d\n", __FILE__, __LINE__, sum);
assert(sum==BNC_NBYTESINBUF(node, childnum));
@ -1030,9 +1033,9 @@ static int brt_nonleaf_put_cmd_child (BRT t, BRTNODE node, BRT_CMD cmd,
DBT *v = cmd->u.id.val;
int diff = k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
int r=toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type);
int r=toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type, cmd->xid);
assert(r==0);
node->local_fingerprint += node->rand4fingerprint * toku_calccrc32_cmd(type, k->data, k->size, v->data, v->size);
node->local_fingerprint += node->rand4fingerprint * toku_calccrc32_cmd(type, cmd->xid, k->data, k->size, v->data, v->size);
node->u.n.n_bytes_in_buffers += diff;
BNC_NBYTESINBUF(node, childnum) += diff;
node->dirty = 1;
@ -1174,9 +1177,9 @@ static void verify_local_fingerprint_nonleaf (BRTNODE node) {
int i;
if (node->height==0) return;
for (i=0; i<node->u.n.n_children; i++)
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type,
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, key, keylen, data, datalen);
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, data, datalen);
}));
assert(fp==node->local_fingerprint);
}
@ -1682,11 +1685,8 @@ static int brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKUTXN txn) {
int toku_brt_insert (BRT brt, DBT *key, DBT *val, TOKUTXN txn) {
int r;
BRT_CMD_S brtcmd;
BRT_CMD_S brtcmd = { BRT_INSERT, toku_txn_get_txnid(txn), .u.id={key,val}};
brtcmd.type = BRT_INSERT;
brtcmd.u.id.key = key;
brtcmd.u.id.val = val;
r = brt_root_put_cmd(brt, &brtcmd, txn);
return r;
}
@ -1708,25 +1708,15 @@ int toku_brt_lookup (BRT brt, DBT *k, DBT *v) {
int toku_brt_delete(BRT brt, DBT *key, TOKUTXN txn) {
int r;
BRT_CMD_S brtcmd;
DBT val;
toku_init_dbt(&val);
val.size = 0;
brtcmd.type = BRT_DELETE;
brtcmd.u.id.key = key;
brtcmd.u.id.val = &val;
BRT_CMD_S brtcmd = { BRT_DELETE, toku_txn_get_txnid(txn), .u.id={key, toku_init_dbt(&val)}};
r = brt_root_put_cmd(brt, &brtcmd, txn);
return r;
}
int toku_brt_delete_both(BRT brt, DBT *key, DBT *val, TOKUTXN txn) {
int r;
BRT_CMD_S brtcmd;
brtcmd.type = BRT_DELETE_BOTH;
brtcmd.u.id.key = key;
brtcmd.u.id.val = val;
BRT_CMD_S brtcmd = { BRT_DELETE_BOTH, toku_txn_get_txnid(txn), .u.id={key,val}};
r = brt_root_put_cmd(brt, &brtcmd, txn);
return r;
}
@ -1750,19 +1740,20 @@ int toku_dump_brtnode (BRT brt, DISKOFF off, int depth, bytevec lorange, ITEMLEN
//printf("%s %s\n", lorange ? lorange : "NULL", hirange ? hirange : "NULL");
{
int i;
for (i=0; i< node->u.n.n_children-1; i++) {
for (i=0; i< node->u.n.n_children; i++) {
printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, toku_fifo_n_entries(BNC_BUFFER(node,i)));
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type,
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({
printf("%*s %s %s %d\n", depth+2, "", (char*)key, (char*)data, type);
assert(strlen((char*)key)+1==keylen);
assert(strlen((char*)data)+1==datalen);
data=data; datalen=datalen; keylen=keylen;
printf("%*s xid=%lld %d (type=%d)\n", depth+2, "", xid, ntohl(*(int*)key), type);
//assert(strlen((char*)key)+1==keylen);
//assert(strlen((char*)data)+1==datalen);
}));
}
for (i=0; i<node->u.n.n_children; i++) {
printf("%*schild %d\n", depth, "", i);
if (i>0) {
printf("%*spivot %d=%s\n", depth+1, "", i-1, (char*)node->u.n.childkeys[i-1]);
printf("%*spivot %d len=%d %d\n", depth+1, "", i-1, node->u.n.childkeys[i-1]->keylen, ntohl(*(int*)&node->u.n.childkeys[i-1]->key));
}
toku_dump_brtnode(brt, BNC_DISKOFF(node, i), depth+4,
(i==0) ? lorange : node->u.n.childkeys[i-1],
@ -1774,10 +1765,10 @@ int toku_dump_brtnode (BRT brt, DISKOFF off, int depth, bytevec lorange, ITEMLEN
}
}
} else {
printf("%*sNode %lld nodesize=%d height=%d n_bytes_in_buffer=%d keyrange=%s %s\n",
depth, "", off, node->nodesize, node->height, node->u.l.n_bytes_in_buffer, (char*)lorange, (char*)hirange);
PMA_ITERATE(node->u.l.buffer, key, keylen, val, vallen,
( keylen=keylen, vallen=vallen, printf(" %s:%s", (char*)key, (char*)val)));
printf("%*sNode %lld nodesize=%d height=%d n_bytes_in_buffer=%d keyrange=%d %d\n",
depth, "", off, node->nodesize, node->height, node->u.l.n_bytes_in_buffer, lorange ? ntohl(*(int*)lorange) : 0, hirange ? ntohl(*(int*)hirange) : 0);
PMA_ITERATE(node->u.l.buffer, key, keylen, val __attribute__((__unused__)), vallen,
( keylen=keylen, vallen=vallen, printf(" (%d)%d ", keylen, ntohl(*(int*)key))));
printf("\n");
}
r = toku_cachetable_unpin(brt->cf, off, 0, 0);
@ -1788,6 +1779,7 @@ int toku_dump_brtnode (BRT brt, DISKOFF off, int depth, bytevec lorange, ITEMLEN
int toku_dump_brt (BRT brt) {
int r;
CACHEKEY *rootp;
struct brt_header *prev_header = brt->h;
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
@ -1796,6 +1788,7 @@ int toku_dump_brt (BRT brt) {
printf("split_count=%d\n", split_count);
if ((r = toku_dump_brtnode(brt, *rootp, 0, 0, 0, 0, 0, null_brtnode))) goto died0;
if ((r = toku_unpin_brt_header(brt))!=0) return r;
brt->h = prev_header;
return 0;
}

View file

@ -46,10 +46,10 @@ void test_fifo_enq(int n) {
for (i=0; i<n; i++) {
buildkey(i);
buildval(i);
r = toku_fifo_enq(f, thekey, thekeylen, theval, thevallen, i); assert(r == 0);
r = toku_fifo_enq(f, thekey, thekeylen, theval, thevallen, i, (TXNID)i); assert(r == 0);
}
void checkit(bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen, int type, void *arg) {
void checkit(bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen, int type, TXNID xid, void *arg) {
if (verbose) printf("checkit %d %d\n", i, type);
assert(arg == 0);
buildkey(i);
@ -57,6 +57,7 @@ void test_fifo_enq(int n) {
assert((int) keylen == thekeylen); assert(memcmp(key, thekey, keylen) == 0);
assert((int) vallen == thevallen); assert(memcmp(val, theval, vallen) == 0);
assert(i % 256 == type);
assert((TXNID)i==xid);
i += 1;
}

View file

@ -65,10 +65,11 @@ int toku_fifo_n_entries(FIFO fifo) {
return fifo->n;
}
int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, int type) {
int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *data, unsigned int datalen, int type, TXNID xid) {
struct fifo_entry *entry = toku_malloc(sizeof (struct fifo_entry) + keylen + datalen);
if (entry == 0) return ENOMEM;
entry->type = type;
entry->xid = xid;
entry->keylen = keylen;
memcpy(entry->key, key, keylen);
entry->vallen = datalen;
@ -78,7 +79,7 @@ int toku_fifo_enq(FIFO fifo, const void *key, unsigned int keylen, const void *d
}
/* peek at the head (the oldest entry) of the fifo */
int toku_fifo_peek(FIFO fifo, bytevec *key, unsigned int *keylen, bytevec *data, unsigned int *datalen, int *type) {
int toku_fifo_peek(FIFO fifo, bytevec *key, unsigned int *keylen, bytevec *data, unsigned int *datalen, int *type, TXNID *xid) {
struct fifo_entry *entry = fifo_peek(fifo);
if (entry == 0) return -1;
*key = entry->key;
@ -86,6 +87,7 @@ int toku_fifo_peek(FIFO fifo, bytevec *key, unsigned int *keylen, bytevec *data,
*data = entry->key + entry->keylen;
*datalen = entry->vallen;
*type = entry->type;
*xid = entry->xid;
return 0;
}
@ -96,10 +98,10 @@ int toku_fifo_deq(FIFO fifo) {
return 0;
}
void toku_fifo_iterate (FIFO fifo, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void *arg) {
void toku_fifo_iterate (FIFO fifo, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, TXNID xid, void*), void *arg) {
struct fifo_entry *entry;
for (entry = fifo_peek(fifo); entry; entry = entry->next)
f(entry->key, entry->keylen, entry->key + entry->keylen, entry->vallen, entry->type, arg);
f(entry->key, entry->keylen, entry->key + entry->keylen, entry->vallen, entry->type, entry->xid, arg);
}

View file

@ -5,6 +5,7 @@ struct fifo_entry {
unsigned int keylen;
unsigned int vallen;
unsigned char type;
TXNID xid;
unsigned char key[];
};
@ -18,20 +19,21 @@ typedef struct fifo *FIFO;
int toku_fifo_create(FIFO *);
void toku_fifo_free(FIFO *);
int toku_fifo_n_entries(FIFO);
int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type);
int toku_fifo_peek (FIFO, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type);
int toku_fifo_enq (FIFO, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type, TXNID xid);
int toku_fifo_peek (FIFO, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, TXNID *xid);
int toku_fifo_deq(FIFO);
int toku_fifo_peek_deq (FIFO, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type);
void toku_fifo_iterate (FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void*);
void toku_fifo_iterate (FIFO, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, TXNID xid, void*), void*);
#define FIFO_ITERATE(fifo,keyvar,keylenvar,datavar,datalenvar,typevar,body) ({ \
#define FIFO_ITERATE(fifo,keyvar,keylenvar,datavar,datalenvar,typevar,xidvar,body) ({ \
struct fifo_entry *entry; \
for (entry = fifo->head; entry; entry = entry->next) { \
unsigned int keylenvar = entry->keylen; \
void *keyvar = entry->key; \
unsigned int datalenvar = entry->vallen; \
void *datavar = entry->key + entry->keylen; \
unsigned int typevar = entry->type; \
enum brt_cmd_type typevar = entry->type; \
TXNID xidvar = entry->xid; \
body; \
} \
})

View file

@ -21,16 +21,20 @@ u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, i
return toku_calc_more_crc32_kvpair(toku_null_crc, key, keylen, val, vallen);
}
u_int32_t toku_calccrc32_cmd (int type, const void *key, int keylen, const void *val, int vallen) {
u_int32_t toku_calccrc32_cmd (int type, TXNID xid, const void *key, int keylen, const void *val, int vallen) {
unsigned char type_c = type;
return toku_calc_more_crc32_kvpair(toku_crc32(toku_null_crc,
&type_c, 1),
unsigned int a = htonl(xid>>32);
unsigned int b = htonl(xid&0xffffffff);
return toku_calc_more_crc32_kvpair(toku_crc32(toku_crc32(toku_crc32(toku_null_crc,
&type_c, 1),
&a, 4),
&b, 4),
key, keylen, val, vallen);
}
u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd) {
if (cmd->type <= BRT_DELETE_BOTH)
return toku_calccrc32_cmd (cmd->type, cmd->u.id.key->data, cmd->u.id.key->size, cmd->u.id.val->data, cmd->u.id.val->size);
return toku_calccrc32_cmd (cmd->type, cmd->xid, cmd->u.id.key->data, cmd->u.id.key->size, cmd->u.id.val->data, cmd->u.id.val->size);
else
assert(0); /* Should not have come here. */
}

View file

@ -171,7 +171,7 @@ void toku_recover_newbrtnode (struct logtype_newbrtnode *c) {
n->thisnodename = c->diskoff;
n->log_lsn = n->disk_lsn = c->lsn;
//printf("%s:%d %p->disk_lsn=%"PRId64"\n", __FILE__, __LINE__, n, n->disk_lsn.lsn);
n->layout_version = 1;
n->layout_version = 2;
n->height = c->height;
n->rand4fingerprint = c->rand4fingerprint;
n->flags = c->is_dup_sort ? TOKU_DB_DUPSORT : 0; // Don't have TOKU_DB_DUP ???