mirror of
https://github.com/MariaDB/server.git
synced 2025-02-02 12:01:42 +01:00
Change to implement delete by key. A BRT_DELETE command is injected into the
tree. It replaces any INSERT or DELETE command with the same key at all interior nodes. It is translated into a PMA delete operation at a leaf node. The database file format was changed for the contents of interior node buffers to include a 1 byte type field. There are currently 2 types: BRT_INSERT and BRT_DELETE. git-svn-id: file:///svn/tokudb@278 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
parent
de47452c10
commit
c10712dee6
9 changed files with 535 additions and 181 deletions
|
@ -11,9 +11,10 @@ typedef long long diskoff; /* Offset in a disk. -1 is the NULL pointer. */
|
|||
#endif
|
||||
enum { TREE_FANOUT = BRT_FANOUT }; //, NODESIZE=1<<20 };
|
||||
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
|
||||
enum { BRT_CMD_OVERHEAD = 1 };
|
||||
|
||||
struct nodeheader_in_file {
|
||||
int n_in_buffer;
|
||||
|
||||
};
|
||||
enum { BUFFER_HEADER_SIZE = (4 // height//
|
||||
+ 4 // n_children
|
||||
|
@ -140,3 +141,23 @@ void brt_update_cursors_new_root(BRT t, BRTNODE newroot, BRTNODE left, BRTNODE r
|
|||
void brt_update_cursors_leaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
|
||||
void brt_update_cursors_nonleaf_expand(BRT t, BRTNODE oldnode, int childnum, BRTNODE left, BRTNODE right);
|
||||
void brt_update_cursors_nonleaf_split(BRT t, BRTNODE oldnode, BRTNODE left, BRTNODE right);
|
||||
|
||||
enum brt_cmd_type {
|
||||
BRT_NONE = 0,
|
||||
BRT_INSERT = 1,
|
||||
BRT_DELETE = 2,
|
||||
};
|
||||
|
||||
struct brt_cmd {
|
||||
enum brt_cmd_type type;
|
||||
union {
|
||||
/* insert or delete */
|
||||
struct brt_cmd_insert_delete {
|
||||
DBT *key;
|
||||
DBT *val;
|
||||
DB *db;
|
||||
} id;
|
||||
} u;
|
||||
};
|
||||
typedef struct brt_cmd BRT_CMD;
|
||||
|
||||
|
|
|
@ -27,10 +27,10 @@ void test_serialize(void) {
|
|||
sn.u.n.children[1] = sn.nodesize*35;
|
||||
r = toku_hashtable_create(&sn.u.n.htables[0]); assert(r==0);
|
||||
r = toku_hashtable_create(&sn.u.n.htables[1]); assert(r==0);
|
||||
r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5); assert(r==0);
|
||||
r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5); assert(r==0);
|
||||
r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5); assert(r==0);
|
||||
sn.u.n.n_bytes_in_hashtables = 3*(KEY_VALUE_OVERHEAD+2+5);
|
||||
r = toku_hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5, BRT_NONE); assert(r==0);
|
||||
r = toku_hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5, BRT_NONE); assert(r==0);
|
||||
r = toku_hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5, BRT_NONE); assert(r==0);
|
||||
sn.u.n.n_bytes_in_hashtables = 3*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
|
||||
|
||||
deserialize_brtnode_from(fd, nodesize*20, &dn, nodesize);
|
||||
|
||||
|
@ -46,24 +46,26 @@ void test_serialize(void) {
|
|||
assert(dn->u.n.children[0]==nodesize*30);
|
||||
assert(dn->u.n.children[1]==nodesize*35);
|
||||
{
|
||||
bytevec data; ITEMLEN datalen;
|
||||
int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen);
|
||||
bytevec data; ITEMLEN datalen; int type;
|
||||
int r = toku_hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen, &type);
|
||||
assert(r==0);
|
||||
assert(strcmp(data,"aval")==0);
|
||||
assert(datalen==5);
|
||||
assert(type == BRT_NONE);
|
||||
|
||||
r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen);
|
||||
r=toku_hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen, &type);
|
||||
assert(r==0);
|
||||
assert(strcmp(data,"bval")==0);
|
||||
assert(datalen==5);
|
||||
assert(type == BRT_NONE);
|
||||
|
||||
r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen);
|
||||
r=toku_hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen, &type);
|
||||
assert(r==0);
|
||||
assert(strcmp(data,"xval")==0);
|
||||
assert(datalen==5);
|
||||
|
||||
assert(type == BRT_NONE);
|
||||
}
|
||||
brtnode_free(&dn);
|
||||
// brtnode_free(&dn);
|
||||
|
||||
toku_free(hello_string);
|
||||
toku_hashtable_free(&sn.u.n.htables[0]);
|
||||
|
|
|
@ -34,7 +34,8 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
|
|||
HASHTABLE_ITERATE(node->u.n.htables[i],
|
||||
key __attribute__((__unused__)), keylen,
|
||||
data __attribute__((__unused__)), datalen,
|
||||
(hsize+=8+keylen+datalen));
|
||||
type __attribute__((__unused__)),
|
||||
(hsize+=BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+keylen+datalen));
|
||||
}
|
||||
assert(hsize==node->u.n.n_bytes_in_hashtables);
|
||||
assert(csize==node->u.n.totalchildkeylens);
|
||||
|
@ -44,12 +45,11 @@ static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
|
|||
PMA_ITERATE(node->u.l.buffer,
|
||||
key __attribute__((__unused__)), keylen,
|
||||
data __attribute__((__unused__)), datalen,
|
||||
(hsize+=8+keylen+datalen));
|
||||
(hsize+=KEY_VALUE_OVERHEAD+keylen+datalen));
|
||||
assert(hsize==node->u.l.n_bytes_in_buffer);
|
||||
hsize+=4; /* add n entries in buffer table. */
|
||||
return size+hsize;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
unsigned int serialize_brtnode_size (BRTNODE node) {
|
||||
|
@ -101,8 +101,8 @@ int serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node) {
|
|||
for (i=0; i< n_hash_tables; i++) {
|
||||
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
|
||||
wbuf_int(&w, toku_hashtable_n_entries(node->u.n.htables[i]));
|
||||
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
|
||||
(wbuf_bytes(&w, key, keylen),
|
||||
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
|
||||
(wbuf_char(&w, type), wbuf_bytes(&w, key, keylen),
|
||||
wbuf_bytes(&w, data, datalen)));
|
||||
}
|
||||
}
|
||||
|
@ -220,17 +220,19 @@ int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesiz
|
|||
//printf("%d in hash\n", n_in_hash);
|
||||
for (i=0; i<n_in_this_hash; i++) {
|
||||
int diff;
|
||||
int type;
|
||||
bytevec key; ITEMLEN keylen;
|
||||
bytevec val; ITEMLEN vallen;
|
||||
verify_counts(result);
|
||||
type = rbuf_char(&rc);
|
||||
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
|
||||
rbuf_bytes(&rc, &val, &vallen);
|
||||
//printf("Found %s,%s\n", key, val);
|
||||
//printf("Found %s,%s\n", (char*)key, (char*)val);
|
||||
{
|
||||
int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen); /* Copies the data into the hash table. */
|
||||
int r=toku_hash_insert(result->u.n.htables[cnum], key, keylen, val, vallen, type); /* Copies the data into the hash table. */
|
||||
if (r!=0) { goto died_12; }
|
||||
}
|
||||
diff = keylen + vallen + KEY_VALUE_OVERHEAD;
|
||||
diff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
|
||||
result->u.n.n_bytes_in_hashtables += diff;
|
||||
result->u.n.n_bytes_in_hashtable[cnum] += diff;
|
||||
//printf("Inserted\n");
|
||||
|
|
|
@ -1328,8 +1328,264 @@ void test_brt_cursor() {
|
|||
}
|
||||
}
|
||||
|
||||
void test_large_kv(int bsize, int ksize, int vsize) {
|
||||
BRT t;
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
char fname[]="testbrt.brt";
|
||||
|
||||
printf("test_large_kv: %d %d %d\n", bsize, ksize, vsize);
|
||||
|
||||
r = brt_create_cachetable(&ct, 0);
|
||||
assert(r==0);
|
||||
unlink(fname);
|
||||
r = open_brt(fname, 0, 1, &t, bsize, ct, default_compare_fun);
|
||||
assert(r==0);
|
||||
|
||||
DBT key, val;
|
||||
char *k, *v;
|
||||
k = toku_malloc(ksize); assert(k); memset(k, 0, ksize);
|
||||
v = toku_malloc(vsize); assert(v); memset(v, 0, vsize);
|
||||
fill_dbt(&key, k, ksize);
|
||||
fill_dbt(&val, v, vsize);
|
||||
|
||||
r = brt_insert(t, &key, &val, 0);
|
||||
assert(r == 0);
|
||||
|
||||
toku_free(k);
|
||||
toku_free(v);
|
||||
|
||||
r = close_brt(t); assert(r==0);
|
||||
r = cachetable_close(&ct); assert(r==0);
|
||||
}
|
||||
|
||||
/*
|
||||
* test the key and value limits
|
||||
* the current implementation crashes when kvsize == bsize/2 rather than fails
|
||||
*/
|
||||
void test_brt_limits() {
|
||||
int bsize = 1024;
|
||||
int kvsize = 4;
|
||||
while (kvsize < bsize/2) {
|
||||
test_large_kv(bsize, kvsize, kvsize); memory_check_all_free();
|
||||
kvsize *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* verify that a delete on an empty tree fails
|
||||
*/
|
||||
void test_brt_delete_empty() {
|
||||
printf("test_brt_delete_empty\n");
|
||||
|
||||
BRT t;
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
char fname[]="testbrt.brt";
|
||||
|
||||
r = brt_create_cachetable(&ct, 0);
|
||||
assert(r==0);
|
||||
unlink(fname);
|
||||
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
|
||||
assert(r==0);
|
||||
|
||||
DBT key;
|
||||
int k = 1;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
r = brt_delete(t, &key, 0);
|
||||
assert(r != 0);
|
||||
|
||||
r = close_brt(t); assert(r==0);
|
||||
r = cachetable_close(&ct); assert(r==0);
|
||||
}
|
||||
|
||||
/*
|
||||
* insert n keys, delete all n keys, verify that lookups for all the keys fail,
|
||||
* verify that a cursor walk of the tree finds nothing
|
||||
*/
|
||||
void test_brt_delete_present(int n) {
|
||||
printf("test_brt_delete_present:%d\n", n);
|
||||
|
||||
BRT t;
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
char fname[]="testbrt.brt";
|
||||
int i;
|
||||
|
||||
r = brt_create_cachetable(&ct, 0);
|
||||
assert(r==0);
|
||||
unlink(fname);
|
||||
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
|
||||
assert(r==0);
|
||||
|
||||
DBT key, val;
|
||||
int k, v;
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
k = i; v = n + i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
fill_dbt(&val, &v, sizeof v);
|
||||
r = brt_insert(t, &key, &val, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
k = i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
r = brt_delete(t, &key, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
/* lookups should all fail */
|
||||
for (i=0; i<n; i++) {
|
||||
k = i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
init_dbt(&val); val.flags = DB_DBT_MALLOC;
|
||||
r = brt_lookup(t, &key, &val, 0);
|
||||
assert(r == DB_NOTFOUND);
|
||||
}
|
||||
|
||||
/* cursor should not find anything */
|
||||
BRT_CURSOR cursor;
|
||||
|
||||
r = brt_cursor(t, &cursor);
|
||||
assert(r == 0);
|
||||
|
||||
init_dbt(&key); key.flags = DB_DBT_MALLOC;
|
||||
init_dbt(&val); val.flags = DB_DBT_MALLOC;
|
||||
r = brt_c_get(cursor, &key, &val, DB_FIRST);
|
||||
assert(r != 0);
|
||||
|
||||
r = brt_cursor_close(cursor);
|
||||
assert(r == 0);
|
||||
|
||||
r = close_brt(t); assert(r==0);
|
||||
r = cachetable_close(&ct); assert(r==0);
|
||||
}
|
||||
|
||||
void test_brt_delete_not_present(int n) {
|
||||
printf("test_brt_delete_not_present:%d\n", n);
|
||||
|
||||
BRT t;
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
char fname[]="testbrt.brt";
|
||||
int i;
|
||||
|
||||
r = brt_create_cachetable(&ct, 0);
|
||||
assert(r==0);
|
||||
unlink(fname);
|
||||
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
|
||||
assert(r==0);
|
||||
|
||||
DBT key, val;
|
||||
int k, v;
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
k = i; v = n + i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
fill_dbt(&val, &v, sizeof v);
|
||||
r = brt_insert(t, &key, &val, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
k = i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
r = brt_delete(t, &key, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
k = n+1;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
r = brt_delete(t, &key, 0);
|
||||
printf("brt_delete k=%d %d\n", k, r);
|
||||
|
||||
r = close_brt(t); assert(r==0);
|
||||
r = cachetable_close(&ct); assert(r==0);
|
||||
}
|
||||
|
||||
void test_brt_delete_cursor_first(int n) {
|
||||
printf("test_brt_delete_cursor_first:%d\n", n);
|
||||
|
||||
BRT t;
|
||||
int r;
|
||||
CACHETABLE ct;
|
||||
char fname[]="testbrt.brt";
|
||||
int i;
|
||||
|
||||
r = brt_create_cachetable(&ct, 0);
|
||||
assert(r==0);
|
||||
unlink(fname);
|
||||
r = open_brt(fname, 0, 1, &t, 4096, ct, default_compare_fun);
|
||||
assert(r==0);
|
||||
|
||||
DBT key, val;
|
||||
int k, v;
|
||||
|
||||
for (i=0; i<n; i++) {
|
||||
k = i; v = ~i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
fill_dbt(&val, &v, sizeof v);
|
||||
r = brt_insert(t, &key, &val, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
for (i=0; i<n-1; i++) {
|
||||
k = i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
r = brt_delete(t, &key, 0);
|
||||
assert(r == 0);
|
||||
}
|
||||
|
||||
/* lookups should all fail */
|
||||
for (i=0; i<n-1; i++) {
|
||||
k = i;
|
||||
fill_dbt(&key, &k, sizeof k);
|
||||
init_dbt(&val); val.flags = DB_DBT_MALLOC;
|
||||
r = brt_lookup(t, &key, &val, 0);
|
||||
assert(r == DB_NOTFOUND);
|
||||
}
|
||||
|
||||
/* cursor should find the last key */
|
||||
BRT_CURSOR cursor;
|
||||
|
||||
r = brt_cursor(t, &cursor);
|
||||
assert(r == 0);
|
||||
|
||||
init_dbt(&key); key.flags = DB_DBT_MALLOC;
|
||||
init_dbt(&val); val.flags = DB_DBT_MALLOC;
|
||||
r = brt_c_get(cursor, &key, &val, DB_FIRST);
|
||||
assert(r == 0);
|
||||
int vv;
|
||||
assert(val.size == sizeof vv);
|
||||
memcpy(&vv, val.data, val.size);
|
||||
assert(vv == ~(n-1));
|
||||
toku_free(key.data);
|
||||
toku_free(val.data);
|
||||
|
||||
r = brt_cursor_close(cursor);
|
||||
assert(r == 0);
|
||||
|
||||
r = close_brt(t); assert(r==0);
|
||||
r = cachetable_close(&ct); assert(r==0);
|
||||
}
|
||||
|
||||
void test_brt_delete() {
|
||||
test_brt_delete_empty(); memory_check_all_free();
|
||||
test_brt_delete_present(1); memory_check_all_free();
|
||||
test_brt_delete_present(100); memory_check_all_free();
|
||||
test_brt_delete_present(500); memory_check_all_free();
|
||||
test_brt_delete_not_present(1); memory_check_all_free();
|
||||
test_brt_delete_not_present(100); memory_check_all_free();
|
||||
test_brt_delete_not_present(500); memory_check_all_free();
|
||||
test_brt_delete_cursor_first(1); memory_check_all_free();
|
||||
test_brt_delete_cursor_first(100); memory_check_all_free();
|
||||
test_brt_delete_cursor_first(500); memory_check_all_free();
|
||||
}
|
||||
|
||||
static void brt_blackbox_test (void) {
|
||||
test_brt_cursor();
|
||||
memory_check = 1;
|
||||
test_wrongendian_compare(0, 2); memory_check_all_free();
|
||||
test_wrongendian_compare(1, 2); memory_check_all_free();
|
||||
test_wrongendian_compare(1, 257); memory_check_all_free();
|
||||
|
@ -1377,7 +1633,11 @@ static void brt_blackbox_test (void) {
|
|||
// Once upon a time srandom(8) caused this test to fail.
|
||||
srandom(8); test4(2048, 1<<15, 1);
|
||||
|
||||
memory_check = 1;
|
||||
|
||||
test_brt_limits();
|
||||
test_brt_cursor();
|
||||
test_brt_delete();
|
||||
|
||||
// test3(1<<19, 1<<20, 0);
|
||||
// test4(1<<19, 1<<20, 0);
|
||||
|
|
345
newbrt/brt.c
345
newbrt/brt.c
|
@ -277,9 +277,9 @@ static void insert_to_buffer_in_leaf (BRTNODE node, DBT *k, DBT *v, DB *db) {
|
|||
}
|
||||
#endif
|
||||
|
||||
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v) {
|
||||
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + k->size + v->size;
|
||||
int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size);
|
||||
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT *v, int type) {
|
||||
unsigned int n_bytes_added = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + k->size + v->size;
|
||||
int r = toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
|
||||
if (r!=0) return r;
|
||||
node->u.n.n_bytes_in_hashtable[childnum] += n_bytes_added;
|
||||
node->u.n.n_bytes_in_hashtables += n_bytes_added;
|
||||
|
@ -463,7 +463,7 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
|
|||
if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0)
|
||||
break;
|
||||
}
|
||||
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD;
|
||||
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
|
||||
child_counts[cnum]++;
|
||||
}));
|
||||
{
|
||||
|
@ -504,25 +504,25 @@ void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int
|
|||
}
|
||||
#endif
|
||||
|
||||
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
||||
static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
|
||||
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
|
||||
DBT *split,
|
||||
int debug,
|
||||
DB *db);
|
||||
int debug);
|
||||
|
||||
/* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */
|
||||
static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
|
||||
DBT *k, DBT *v,
|
||||
int childnum_of_node,
|
||||
DB *db) {
|
||||
static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
|
||||
BRT_CMD *cmd,
|
||||
int childnum_of_node) {
|
||||
assert(node->height>0); /* Not a leaf. */
|
||||
DBT *k = cmd->u.id.key;
|
||||
DBT *v = cmd->u.id.val;
|
||||
int to_child=serialize_brtnode_size(child)+k->size+v->size+KEY_VALUE_OVERHEAD <= child->nodesize;
|
||||
if (brt_debug_mode) {
|
||||
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
|
||||
if (childnum_of_node+1<node->u.n.n_children) {
|
||||
DBT k2;
|
||||
printf(" nextsplitkey=%s\n", (char*)node->u.n.childkeys[childnum_of_node]);
|
||||
assert(t->compare_fun(db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0);
|
||||
assert(t->compare_fun(cmd->u.id.db, k, fill_dbt(&k2, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node]))<=0);
|
||||
} else {
|
||||
printf("\n");
|
||||
}
|
||||
|
@ -532,36 +532,35 @@ static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTN
|
|||
DBT againk;
|
||||
init_dbt(&againk);
|
||||
//printf("%s:%d hello!\n", __FILE__, __LINE__);
|
||||
int r = brtnode_insert(t, child, k, v,
|
||||
int r = brtnode_put_cmd(t, child, cmd,
|
||||
&again_split, &againa, &againb, &againk,
|
||||
0,
|
||||
db);
|
||||
0);
|
||||
if (r!=0) return r;
|
||||
assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
|
||||
return r;
|
||||
} else {
|
||||
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v);
|
||||
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, k, v, cmd->type);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
|
||||
DBT *k, DBT *v,
|
||||
static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
|
||||
BRT_CMD *cmd,
|
||||
int *child_did_split, BRTNODE *childa, BRTNODE *childb,
|
||||
DBT *childsplitk,
|
||||
DB *db) {
|
||||
DBT *childsplitk) {
|
||||
//if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, "");
|
||||
//printf("%s:%d hello!\n", __FILE__, __LINE__);
|
||||
assert(node->height>0);
|
||||
|
||||
{
|
||||
int r = brtnode_insert(t, child, k, v,
|
||||
int r = brtnode_put_cmd(t, child, cmd,
|
||||
child_did_split, childa, childb, childsplitk,
|
||||
0,
|
||||
db);
|
||||
0);
|
||||
if (r!=0) return r;
|
||||
}
|
||||
|
||||
DBT *k = cmd->u.id.key;
|
||||
DBT *v = cmd->u.id.val;
|
||||
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
|
||||
{
|
||||
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size); // Must delete after doing the insert, to avoid operating on freed' key
|
||||
|
@ -569,7 +568,7 @@ static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
|
|||
if (r!=0) return r;
|
||||
}
|
||||
{
|
||||
int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD);
|
||||
int n_bytes_removed = (k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD);
|
||||
node->u.n.n_bytes_in_hashtables -= n_bytes_removed;
|
||||
node->u.n.n_bytes_in_hashtable[childnum] -= n_bytes_removed;
|
||||
}
|
||||
|
@ -643,17 +642,20 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
|
|||
|
||||
node->u.n.n_bytes_in_hashtables -= old_count; /* By default, they are all removed. We might add them back in. */
|
||||
/* Keep pushing to the children, but not if the children would require a pushdown */
|
||||
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, ({
|
||||
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, type, ({
|
||||
DBT skd, svd;
|
||||
fill_dbt_ap(&skd, skey, skeylen, app_private);
|
||||
fill_dbt(&svd, sval, svallen);
|
||||
BRT_CMD brtcmd;
|
||||
brtcmd.type = type; brtcmd.u.id.key = &skd; brtcmd.u.id.val = &svd; brtcmd.u.id.db = db;
|
||||
if (t->compare_fun(db, &skd, childsplitk)<=0) {
|
||||
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &skd, &svd, childnum, db);
|
||||
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childa, &brtcmd, childnum);
|
||||
} else {
|
||||
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &skd, &svd, childnum+1, db);
|
||||
r=push_brt_cmd_down_only_if_it_wont_push_more_else_put_here(t, node, childb, &brtcmd, childnum+1);
|
||||
}
|
||||
if (r!=0) return r;
|
||||
}));
|
||||
|
||||
toku_hashtable_free(&old_h);
|
||||
|
||||
r=cachetable_unpin(t->cf, childa->thisnodename, 1);
|
||||
|
@ -687,7 +689,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
|
||||
static int push_some_brt_cmds_down (BRT t, BRTNODE node, int childnum,
|
||||
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
|
||||
DBT *splitk,
|
||||
int debug,
|
||||
|
@ -706,7 +708,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
|
|||
verify_counts(child);
|
||||
//printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]);
|
||||
if (child->height>0 && child->u.n.n_children>0) assert(child->u.n.children[child->u.n.n_children-1]!=0);
|
||||
if (debug) printf("%s:%d %*spush_some_kvpairs_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
|
||||
if (debug) printf("%s:%d %*spush_some_brt_cmds_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
|
||||
/* I am exposing the internals of the hash table here, mostly because I am not thinking of a really
|
||||
* good way to do it otherwise. I want to loop over the elements of the hash table, deleting some as I
|
||||
* go. The HASHTABLE_ITERATE macro will break if I delete something from the hash table. */
|
||||
|
@ -722,30 +724,39 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
|
|||
long int randomnumber = random();
|
||||
//printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, node->u.n.n_bytes_in_hashtable[childnum]);
|
||||
assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
|
||||
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &randomnumber)) {
|
||||
int type;
|
||||
while(0==toku_hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen, &type, &randomnumber)) {
|
||||
int child_did_split=0; BRTNODE childa, childb;
|
||||
DBT hk,hv;
|
||||
DBT childsplitk;
|
||||
BRT_CMD brtcmd;
|
||||
|
||||
fill_dbt_ap(&hk, key, keylen, app_private);
|
||||
fill_dbt(&hv, val, vallen);
|
||||
brtcmd.type = type;
|
||||
brtcmd.u.id.key = &hk;
|
||||
brtcmd.u.id.val = &hv;
|
||||
brtcmd.u.id.db = db;
|
||||
|
||||
//printf("%s:%d random_picked\n", __FILE__, __LINE__);
|
||||
init_dbt(&childsplitk);
|
||||
childsplitk.app_private = splitk->app_private;
|
||||
|
||||
if (debug) printf("%s:%d %*spush down %s\n", __FILE__, __LINE__, debug, "", (char*)key);
|
||||
r = push_a_kvpair_down (t, node, child, childnum,
|
||||
fill_dbt_ap(&hk, key, keylen, app_private), fill_dbt(&hv, val, vallen),
|
||||
r = push_a_brt_cmd_down (t, node, child, childnum,
|
||||
&brtcmd,
|
||||
&child_did_split, &childa, &childb,
|
||||
&childsplitk,
|
||||
db);
|
||||
&childsplitk);
|
||||
|
||||
if (0){
|
||||
unsigned int sum=0;
|
||||
HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl,
|
||||
sum+=hkl+hdl+KEY_VALUE_OVERHEAD);
|
||||
HASHTABLE_ITERATE(node->u.n.htables[childnum], hk __attribute__((__unused__)), hkl, hd __attribute__((__unused__)), hdl, type __attribute__((__unused__)),
|
||||
sum+=hkl+hdl+KEY_VALUE_OVERHEAD+BRT_CMD_OVERHEAD);
|
||||
printf("%s:%d sum=%d\n", __FILE__, __LINE__, sum);
|
||||
assert(sum==node->u.n.n_bytes_in_hashtable[childnum]);
|
||||
}
|
||||
if (node->u.n.n_bytes_in_hashtable[childnum]>0) assert(toku_hashtable_n_entries(node->u.n.htables[childnum])>0);
|
||||
//printf("%s:%d %d=push_a_kvpair_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
|
||||
//printf("%s:%d %d=push_a_brt_cmd_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
|
||||
if (r!=0) return r;
|
||||
if (child_did_split) {
|
||||
// If the child splits, we don't push down any further.
|
||||
|
@ -759,7 +770,7 @@ static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
|
|||
}
|
||||
if (0) printf("%s:%d done random picking\n", __FILE__, __LINE__);
|
||||
}
|
||||
if (debug) printf("%s:%d %*sdone push_some_kvpairs_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
|
||||
if (debug) printf("%s:%d %*sdone push_some_brt_cmds_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
|
||||
r=cachetable_unpin(t->cf, targetchild, 1);
|
||||
if (r!=0) return r;
|
||||
*did_split=0;
|
||||
|
@ -787,10 +798,10 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
|
|||
find_heaviest_child(node, &childnum);
|
||||
if (0) printf("%s:%d %*spush some down from %lld into %lld (child %d)\n", __FILE__, __LINE__, debug, "", node->thisnodename, node->u.n.children[childnum], childnum);
|
||||
assert(node->u.n.children[childnum]!=0);
|
||||
int r = push_some_kvpairs_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db);
|
||||
int r = push_some_brt_cmds_down(t, node, childnum, did_split, nodea, nodeb, splitk, debugp1(debug), app_private, db);
|
||||
if (r!=0) return r;
|
||||
assert(*did_split==0 || *did_split==1);
|
||||
if (debug) printf("%s:%d %*sdid push_some_kvpairs_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
|
||||
if (debug) printf("%s:%d %*sdid push_some_brt_cmds_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
|
||||
if (*did_split) {
|
||||
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
|
||||
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
|
||||
|
@ -811,45 +822,69 @@ static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE
|
|||
|
||||
#define INSERT_ALL_AT_ONCE
|
||||
|
||||
static int brt_leaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
||||
static int brt_leaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
|
||||
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
|
||||
int debug,
|
||||
DB *db) {
|
||||
int debug) {
|
||||
if (cmd->type == BRT_INSERT) {
|
||||
DBT *k = cmd->u.id.key;
|
||||
DBT *v = cmd->u.id.val;
|
||||
DB *db = cmd->u.id.db;
|
||||
#ifdef INSERT_ALL_AT_ONCE
|
||||
int replaced_v_size;
|
||||
enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size);
|
||||
assert(pma_status==BRT_OK);
|
||||
//printf("replaced_v_size=%d\n", replaced_v_size);
|
||||
if (replaced_v_size>=0) {
|
||||
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
|
||||
} else {
|
||||
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
|
||||
}
|
||||
int replaced_v_size;
|
||||
enum pma_errors pma_status = pma_insert_or_replace(node->u.l.buffer, k, v, db, &replaced_v_size);
|
||||
assert(pma_status==BRT_OK);
|
||||
//printf("replaced_v_size=%d\n", replaced_v_size);
|
||||
if (replaced_v_size>=0) {
|
||||
node->u.l.n_bytes_in_buffer += v->size - replaced_v_size;
|
||||
} else {
|
||||
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
|
||||
}
|
||||
#else
|
||||
DBT v2;
|
||||
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db);
|
||||
if (pma_status==BRT_OK) {
|
||||
pma_status = pma_delete(node->u.l.buffer, k, db);
|
||||
assert(pma_status==BRT_OK);
|
||||
node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD;
|
||||
}
|
||||
pma_status = pma_insert(node->u.l.buffer, k, v, db);
|
||||
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
|
||||
DBT v2;
|
||||
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, k, init_dbt(&v2), db);
|
||||
if (pma_status==BRT_OK) {
|
||||
pma_status = pma_delete(node->u.l.buffer, k, db);
|
||||
assert(pma_status==BRT_OK);
|
||||
node->u.l.n_bytes_in_buffer -= k->size + v2.size + KEY_VALUE_OVERHEAD;
|
||||
}
|
||||
pma_status = pma_insert(node->u.l.buffer, k, v, db);
|
||||
node->u.l.n_bytes_in_buffer += k->size + v->size + KEY_VALUE_OVERHEAD;
|
||||
#endif
|
||||
// If it doesn't fit, then split the leaf.
|
||||
if (serialize_brtnode_size(node) > node->nodesize) {
|
||||
int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db);
|
||||
if (r!=0) return r;
|
||||
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
|
||||
split_count++;
|
||||
*did_split = 1;
|
||||
verify_counts(*nodea); verify_counts(*nodeb);
|
||||
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
|
||||
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
|
||||
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
|
||||
} else {
|
||||
*did_split = 0;
|
||||
// If it doesn't fit, then split the leaf.
|
||||
if (serialize_brtnode_size(node) > node->nodesize) {
|
||||
int r = brtleaf_split (t, node, nodea, nodeb, splitk, k->app_private, db);
|
||||
if (r!=0) return r;
|
||||
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
|
||||
split_count++;
|
||||
*did_split = 1;
|
||||
verify_counts(*nodea); verify_counts(*nodeb);
|
||||
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
|
||||
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
|
||||
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
|
||||
} else {
|
||||
*did_split = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (cmd->type == BRT_DELETE) {
|
||||
int r;
|
||||
DBT val;
|
||||
|
||||
/* TODO combine lookup and delete */
|
||||
init_dbt(&val);
|
||||
r = pma_lookup(node->u.l.buffer, cmd->u.id.key, &val, cmd->u.id.db);
|
||||
if (r == 0) {
|
||||
r = pma_delete(node->u.l.buffer, cmd->u.id.key, cmd->u.id.db);
|
||||
assert(r == BRT_OK);
|
||||
node->u.l.n_bytes_in_buffer -= cmd->u.id.key->size + val.size + KEY_VALUE_OVERHEAD;
|
||||
}
|
||||
*did_split = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
/* unknown message */
|
||||
assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -866,16 +901,20 @@ static unsigned int brtnode_which_child (BRTNODE node , DBT *k, BRT t, DB *db) {
|
|||
}
|
||||
|
||||
|
||||
static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
||||
static int brt_nonleaf_insertm (BRT t, BRTNODE node, BRT_CMD *cmd,
|
||||
int *did_split, BRTNODE *nodea, BRTNODE *nodeb,
|
||||
DBT *splitk,
|
||||
int debug,
|
||||
DB *db) {
|
||||
int debug) {
|
||||
bytevec olddata;
|
||||
ITEMLEN olddatalen;
|
||||
unsigned int childnum;
|
||||
int found;
|
||||
|
||||
int type = cmd->type;
|
||||
DBT *k = cmd->u.id.key;
|
||||
DBT *v = cmd->u.id.val;
|
||||
DB *db = cmd->u.id.db;
|
||||
|
||||
childnum = brtnode_which_child(node, k, t, db);
|
||||
|
||||
/* non-buffering mode when cursors are open on this child */
|
||||
|
@ -895,8 +934,9 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
assert(r == 0);
|
||||
child = child_v;
|
||||
|
||||
r = brtnode_insert(t, child, k, v,
|
||||
&child_did_split, &childa, &childb, &childsplitk, 0, db);
|
||||
child_did_split = 0;
|
||||
r = brtnode_put_cmd(t, child, cmd,
|
||||
&child_did_split, &childa, &childb, &childsplitk, 0);
|
||||
assert(r == 0);
|
||||
if (child_did_split) {
|
||||
if (0) printf("brt_nonleaf_insert child_split %p\n", child);
|
||||
|
@ -913,7 +953,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
return r;
|
||||
}
|
||||
|
||||
found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen);
|
||||
found = !toku_hash_find(node->u.n.htables[childnum], k->data, k->size, &olddata, &olddatalen, &type);
|
||||
|
||||
if (0) { // It is faster to do this, except on yobiduck where things grind to a halt.
|
||||
void *child_v;
|
||||
|
@ -922,7 +962,7 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
/* If the child is in memory, then go ahead and put it in the child. */
|
||||
BRTNODE child = child_v;
|
||||
if (found) {
|
||||
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD;
|
||||
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
|
||||
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
|
||||
assert(r==0);
|
||||
node->u.n.n_bytes_in_hashtables -= diff;
|
||||
|
@ -932,8 +972,8 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
int child_did_split;
|
||||
BRTNODE childa, childb;
|
||||
DBT childsplitk;
|
||||
int r = brtnode_insert(t, child, k, v,
|
||||
&child_did_split, &childa, &childb, &childsplitk, 0, db);
|
||||
int r = brtnode_put_cmd(t, child, cmd,
|
||||
&child_did_split, &childa, &childb, &childsplitk, 0);
|
||||
if (r!=0) return r;
|
||||
if (child_did_split) {
|
||||
r=handle_split_of_child(t, node, childnum,
|
||||
|
@ -954,19 +994,18 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
verify_counts(node);
|
||||
if (found) {
|
||||
int r = toku_hash_delete(node->u.n.htables[childnum], k->data, k->size);
|
||||
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD;
|
||||
int diff = k->size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
|
||||
assert(r==0);
|
||||
node->u.n.n_bytes_in_hashtables -= diff;
|
||||
node->u.n.n_bytes_in_hashtable[childnum] -= diff;
|
||||
//printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff);
|
||||
}
|
||||
{
|
||||
int diff = k->size + v->size + KEY_VALUE_OVERHEAD;
|
||||
int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size);
|
||||
int diff = k->size + v->size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
|
||||
int r=toku_hash_insert(node->u.n.htables[childnum], k->data, k->size, v->data, v->size, type);
|
||||
assert(r==0);
|
||||
node->u.n.n_bytes_in_hashtables += diff;
|
||||
node->u.n.n_bytes_in_hashtable[childnum] += diff;
|
||||
|
||||
}
|
||||
if (debug) printf("%s:%d %*sDoing maybe_push_down\n", __FILE__, __LINE__, debug, "");
|
||||
int r = brtnode_maybe_push_down(t, node, did_split, nodea, nodeb, splitk, debugp1(debug), k->app_private, db);
|
||||
|
@ -989,20 +1028,17 @@ static int brt_nonleaf_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
|||
}
|
||||
|
||||
|
||||
static int brtnode_insert (BRT t, BRTNODE node, DBT *k, DBT *v,
|
||||
static int brtnode_put_cmd (BRT t, BRTNODE node, BRT_CMD *cmd,
|
||||
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
|
||||
int debug,
|
||||
DB *db) {
|
||||
int debug) {
|
||||
if (node->height==0) {
|
||||
return brt_leaf_insert(t, node, k, v,
|
||||
return brt_leaf_insertm(t, node, cmd,
|
||||
did_split, nodea, nodeb, splitk,
|
||||
debug,
|
||||
db);
|
||||
debug);
|
||||
} else {
|
||||
return brt_nonleaf_insert(t, node, k, v,
|
||||
return brt_nonleaf_insertm(t, node, cmd,
|
||||
did_split, nodea, nodeb, splitk,
|
||||
debug,
|
||||
db);
|
||||
debug);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1218,10 +1254,11 @@ int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, CACHEKE
|
|||
return 0;
|
||||
}
|
||||
|
||||
int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
|
||||
int brt_root_put_cmd(BRT brt, BRT_CMD *cmd) {
|
||||
void *node_v;
|
||||
BRTNODE node;
|
||||
CACHEKEY *rootp;
|
||||
int result;
|
||||
int r;
|
||||
int did_split; BRTNODE nodea=0, nodeb=0;
|
||||
DBT splitk;
|
||||
|
@ -1239,10 +1276,10 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
|
|||
}
|
||||
node=node_v;
|
||||
if (debug) printf("%s:%d node inserting\n", __FILE__, __LINE__);
|
||||
r = brtnode_insert(brt, node, k, v,
|
||||
did_split = 0;
|
||||
result = brtnode_put_cmd(brt, node, cmd,
|
||||
&did_split, &nodea, &nodeb, &splitk,
|
||||
debug, db);
|
||||
if (r!=0) return r;
|
||||
debug);
|
||||
if (debug) printf("%s:%d did_insert\n", __FILE__, __LINE__);
|
||||
if (did_split) {
|
||||
//printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize);
|
||||
|
@ -1252,65 +1289,76 @@ int brt_insert (BRT brt, DBT *k, DBT *v, DB* db) {
|
|||
}
|
||||
if (did_split) {
|
||||
r = brt_init_new_root(brt, nodea, nodeb, splitk, rootp);
|
||||
if (r != 0)
|
||||
return r;
|
||||
assert(r == 0);
|
||||
} else {
|
||||
if (node->height>0)
|
||||
assert(node->u.n.n_children<=TREE_FANOUT);
|
||||
}
|
||||
cachetable_unpin(brt->cf, *rootp, 1);
|
||||
if ((r = unpin_brt_header(brt))!=0) return r;
|
||||
r = unpin_brt_header(brt);
|
||||
assert(r == 0);
|
||||
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
|
||||
return 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
int brt_insert (BRT brt, DBT *key, DBT *val, DB* db) {
|
||||
int r;
|
||||
BRT_CMD brtcmd;
|
||||
|
||||
brtcmd.type = BRT_INSERT;
|
||||
brtcmd.u.id.key = key;
|
||||
brtcmd.u.id.val = val;
|
||||
brtcmd.u.id.db = db;
|
||||
r = brt_root_put_cmd(brt, &brtcmd);
|
||||
return r;
|
||||
}
|
||||
|
||||
int brt_lookup_node (BRT brt, diskoff off, DBT *k, DBT *v, DB *db) {
|
||||
int result;
|
||||
void *node_v;
|
||||
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
|
||||
brtnode_flush_callback, brtnode_fetch_callback, (void*)(long)brt->h->nodesize);
|
||||
BRTNODE node;
|
||||
int childnum;
|
||||
if (r!=0) {
|
||||
int r2;
|
||||
died0:
|
||||
// printf("%s:%d r=%d\n", __FILE__, __LINE__, r);
|
||||
r2 = cachetable_unpin(brt->cf, off, 0);
|
||||
return r;
|
||||
}
|
||||
|
||||
if (r!=0)
|
||||
return r;
|
||||
|
||||
node=node_v;
|
||||
// Leaves have a single mdict, where the data is found.
|
||||
if (node->height==0) {
|
||||
r = pma_lookup(node->u.l.buffer, k, v, db);
|
||||
result = pma_lookup(node->u.l.buffer, k, v, db);
|
||||
//printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen);
|
||||
if (r!=0) goto died0;
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
return r;
|
||||
assert(r == 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
childnum = brtnode_which_child(node, k, brt, db);
|
||||
// Leaves have a single mdict, where the data is found.
|
||||
{
|
||||
bytevec hanswer;
|
||||
ITEMLEN hanswerlen;
|
||||
if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen)==0) {
|
||||
//printf("Found %d bytes\n", *vallen);
|
||||
ybt_set_value(v, hanswer, hanswerlen, &brt->sval);
|
||||
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
assert(r==0);
|
||||
return 0;
|
||||
int type;
|
||||
if (toku_hash_find (node->u.n.htables[childnum], k->data, k->size, &hanswer, &hanswerlen, &type)==0) {
|
||||
if (type == BRT_INSERT) {
|
||||
//printf("Found %d bytes\n", *vallen);
|
||||
ybt_set_value(v, hanswer, hanswerlen, &brt->sval);
|
||||
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
|
||||
result = 0;
|
||||
} else if (type == BRT_DELETE) {
|
||||
result = DB_NOTFOUND;
|
||||
} else
|
||||
assert(0);
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
assert(r == 0);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
if (node->height==0) {
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
if (r==0) return DB_NOTFOUND;
|
||||
else return r;
|
||||
}
|
||||
{
|
||||
int result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
if (r!=0) return r;
|
||||
return result;
|
||||
}
|
||||
|
||||
result = brt_lookup_node(brt, node->u.n.children[childnum], k, v, db);
|
||||
r = cachetable_unpin(brt->cf, off, 0);
|
||||
assert(r == 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1336,6 +1384,22 @@ int brt_lookup (BRT brt, DBT *k, DBT *v, DB *db) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int brt_delete(BRT brt, DBT *key, DB *db) {
|
||||
int r;
|
||||
BRT_CMD brtcmd;
|
||||
DBT val;
|
||||
|
||||
init_dbt(&val);
|
||||
val.size = 0;
|
||||
brtcmd.type = BRT_DELETE;
|
||||
brtcmd.u.id.key = key;
|
||||
brtcmd.u.id.val = &val;
|
||||
brtcmd.u.id.db = db;
|
||||
r = brt_root_put_cmd(brt, &brtcmd);
|
||||
return r;
|
||||
}
|
||||
|
||||
int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse);
|
||||
|
||||
int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen) {
|
||||
|
@ -1356,9 +1420,9 @@ int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lole
|
|||
int i;
|
||||
for (i=0; i< node->u.n.n_children-1; i++) {
|
||||
printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, toku_hashtable_n_entries(node->u.n.htables[i]));
|
||||
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
|
||||
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen, type,
|
||||
({
|
||||
printf("%*s %s %s\n", depth+2, "", (char*)key, (char*)data);
|
||||
printf("%*s %s %s %d\n", depth+2, "", (char*)key, (char*)data, type);
|
||||
assert(strlen((char*)key)+1==keylen);
|
||||
assert(strlen((char*)data)+1==datalen);
|
||||
}));
|
||||
|
@ -1468,7 +1532,9 @@ int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, byteve
|
|||
}
|
||||
{
|
||||
void verify_pair (bytevec key, unsigned int keylen,
|
||||
bytevec data __attribute__((__unused__)), unsigned int datalen __attribute__((__unused__)),
|
||||
bytevec data __attribute__((__unused__)),
|
||||
unsigned int datalen __attribute__((__unused__)),
|
||||
int type __attribute__((__unused__)),
|
||||
void *ignore __attribute__((__unused__))) {
|
||||
if (thislorange) assert(keycompare(thislorange,thislolen,key,keylen)<0);
|
||||
if (thishirange && keycompare(key,keylen,thishirange,thishilen)>0) {
|
||||
|
@ -1525,11 +1591,6 @@ void brt_flush (BRT brt) {
|
|||
}
|
||||
#endif
|
||||
|
||||
int brtnode_flush_child (BRT brt, BRTNODE node, int cnum) {
|
||||
brt=brt; node=node; cnum=cnum;
|
||||
abort(); /* Algorithm: For each key in the cnum'th mdict, insert it to the childnode. It may cause a split. */
|
||||
}
|
||||
|
||||
int brt_flush_debug = 0;
|
||||
|
||||
/*
|
||||
|
@ -1550,7 +1611,7 @@ void brt_flush_child(BRT t, BRTNODE node, int childnum, BRT_CURSOR cursor) {
|
|||
}
|
||||
|
||||
init_dbt(&child_splitk);
|
||||
r = push_some_kvpairs_down(t, node, childnum,
|
||||
r = push_some_brt_cmds_down(t, node, childnum,
|
||||
&child_did_split, &childa, &childb, &child_splitk, brt_flush_debug, 0, 0);
|
||||
assert(r == 0);
|
||||
if (brt_flush_debug) {
|
||||
|
|
|
@ -14,6 +14,7 @@ int open_brt (const char *fname, const char *dbname, int is_create, BRT *, int n
|
|||
//int brt_open (BRT *, char *fname, char *dbname);
|
||||
int brt_insert (BRT brt, DBT *k, DBT *v, DB*db);
|
||||
int brt_lookup (BRT brt, DBT *k, DBT *v, DB*db);
|
||||
int brt_delete (BRT brt, DBT *k, DB *db);
|
||||
int close_brt (BRT);
|
||||
int dump_brt (BRT brt);
|
||||
void brt_fsync (BRT); /* fsync, but don't clear the caches. */
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
#include "hashtable.h"
|
||||
#include "memory.h"
|
||||
#include "primes.h"
|
||||
#include "../include/db.h"
|
||||
// #include "../include/ydb-constants.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -41,7 +41,7 @@ static void hash_find_internal (HASHTABLE tab, unsigned int hash, const unsigned
|
|||
*hashelt = 0;
|
||||
}
|
||||
|
||||
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen) {
|
||||
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen, int *type) {
|
||||
HASHELT he, *prev_ptr;
|
||||
hash_find_internal(tab, hash_key (key, keylen), key, keylen, &he, &prev_ptr);
|
||||
if (he==0) {
|
||||
|
@ -49,6 +49,7 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I
|
|||
} else {
|
||||
*data = &he->keyval[he->keylen];
|
||||
*datalen = he->vallen;
|
||||
*type = he->type;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
@ -82,7 +83,7 @@ int toku_hash_rehash_everything (HASHTABLE tab, unsigned int primeindexdelta) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen)
|
||||
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *val, ITEMLEN vallen, int type)
|
||||
{
|
||||
unsigned int hk = hash_key (key,keylen);
|
||||
unsigned int h = hk%tab->arraysize;
|
||||
|
@ -97,6 +98,7 @@ int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void
|
|||
/* Otherwise the key is not already present, so we need to add it. */
|
||||
HASHELT he=toku_malloc(sizeof(*he)+keylen+vallen);
|
||||
assert(he); // ?????
|
||||
he->type = type;
|
||||
he->keylen = keylen;
|
||||
he->vallen = vallen;
|
||||
memmove(&he->keyval[0], key, keylen);
|
||||
|
@ -134,7 +136,7 @@ int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen) {
|
|||
}
|
||||
|
||||
|
||||
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber) {
|
||||
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber) {
|
||||
unsigned int i;
|
||||
unsigned int usei = (*randomnumber)%h->arraysize;
|
||||
for (i=0; i<h->arraysize; i++, usei++) {
|
||||
|
@ -145,6 +147,7 @@ int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytev
|
|||
*keylen = he->keylen;
|
||||
*data = &he->keyval[he->keylen];
|
||||
*datalen = he->vallen;
|
||||
*type = he->type;
|
||||
*randomnumber = usei;
|
||||
return 0;
|
||||
}
|
||||
|
@ -177,7 +180,7 @@ int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *dat
|
|||
}
|
||||
#endif
|
||||
|
||||
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, void*args), void* args) {
|
||||
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, int type, void*args), void* args) {
|
||||
/*
|
||||
int i;
|
||||
for (i=0; i<tab->arraysize; i++) {
|
||||
|
@ -187,7 +190,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen
|
|||
}
|
||||
}
|
||||
*/
|
||||
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, f(key,keylen,val,vallen,args));
|
||||
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, type, f(key,keylen,val,vallen,type,args));
|
||||
}
|
||||
|
||||
int toku_hashtable_n_entries(HASHTABLE tab) {
|
||||
|
|
|
@ -12,10 +12,10 @@ int toku_hashtable_create (HASHTABLE*);
|
|||
|
||||
/* Return 0 if the key is found in the hashtable, -1 otherwise. */
|
||||
/* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */
|
||||
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen);
|
||||
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen, int *type);
|
||||
|
||||
/* Replace the key if it was already there. */
|
||||
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen);
|
||||
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type);
|
||||
|
||||
/* It is OK to delete something that isn't there. */
|
||||
int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen);
|
||||
|
@ -24,15 +24,16 @@ int toku_hashtable_n_entries(HASHTABLE);
|
|||
|
||||
void toku_hashtable_clear(HASHTABLE);
|
||||
|
||||
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, long int *randomnumber);
|
||||
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber);
|
||||
//int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
|
||||
|
||||
typedef struct hashelt *HASHELT;
|
||||
struct hashelt {
|
||||
ITEMLEN keylen;
|
||||
ITEMLEN vallen;
|
||||
unsigned int hash;
|
||||
HASHELT next;
|
||||
int type;
|
||||
ITEMLEN keylen;
|
||||
ITEMLEN vallen;
|
||||
char keyval[]; /* the first KEYLEN bytes are the key. The next bytes are the value. */
|
||||
};
|
||||
|
||||
|
@ -44,9 +45,9 @@ struct hashtable {
|
|||
};
|
||||
|
||||
/* You cannot add or delete elements from the hashtable while iterating. */
|
||||
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*);
|
||||
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void*);
|
||||
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
|
||||
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
|
||||
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,typevar,body) ({ \
|
||||
unsigned int hi_counter; \
|
||||
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
|
||||
HASHELT hi_he; \
|
||||
|
@ -55,6 +56,7 @@ void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,
|
|||
ITEMLEN keylenvar = hi_he->keylen; \
|
||||
const char *datavar = &hi_he->keyval[hi_he->keylen]; \
|
||||
ITEMLEN datalenvar = hi_he->vallen; \
|
||||
int typevar = hi_he->type; \
|
||||
body; \
|
||||
}}})
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
|
|||
fprintf(stderr, "%s isn't there\n", kv); abort();
|
||||
}
|
||||
|
||||
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
|
||||
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, int type,
|
||||
int N, int *data, char *saw) {
|
||||
char *kv = (char*)kv_v;
|
||||
char *dv = (char*)dv_v;
|
||||
|
@ -38,6 +38,7 @@ void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
|
|||
assert(strcmp(kv+1, dv+1)==0);
|
||||
assert(strlen(kv)+1==kl);
|
||||
assert(strlen(dv)+1==dl);
|
||||
assert(type == 0);
|
||||
num = atoi(kv+1);
|
||||
for (k=0; k<N; k++) {
|
||||
if (data[k]==num) {
|
||||
|
@ -54,8 +55,8 @@ void verify_htable (HASHTABLE htable, int N, int *data, char *saw) {
|
|||
for (j=0; j<N; j++) {
|
||||
saw[j]=0;
|
||||
}
|
||||
HASHTABLE_ITERATE(htable, kv, kl, dv, dl,
|
||||
verify_htable_instance (kv, kl, dv, dl,
|
||||
HASHTABLE_ITERATE(htable, kv, kl, dv, dl, type,
|
||||
verify_htable_instance (kv, kl, dv, dl, type,
|
||||
N, data, saw));
|
||||
for (j=0; j<N; j++) {
|
||||
assert(saw[j]);
|
||||
|
@ -99,7 +100,7 @@ void test0 (void) {
|
|||
}
|
||||
snprintf(kv, 99, "k%d", ra);
|
||||
snprintf(dv, 99, "d%d", ra);
|
||||
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1);
|
||||
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1, 0);
|
||||
data[data_n++]=ra;
|
||||
}
|
||||
} else {
|
||||
|
@ -122,13 +123,14 @@ void test1(void) {
|
|||
for (j=0; j<4; j++) {
|
||||
snprintf(keys[j], 100, "k%ld", (long)(random()));
|
||||
snprintf(vals[j], 100, "v%d", j);
|
||||
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1);
|
||||
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1, 0);
|
||||
}
|
||||
for (j=0; j<4; j++) {
|
||||
bytevec key, val;
|
||||
ITEMLEN keylen, vallen;
|
||||
int type;
|
||||
long int randnum=random();
|
||||
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &randnum);
|
||||
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &type, &randnum);
|
||||
assert(r==0);
|
||||
r = toku_hash_delete(table, key, keylen);
|
||||
assert(r==0);
|
||||
|
|
Loading…
Add table
Reference in a new issue