[t:3884] fixed the problem in brtleaf_split, added back the assert in move_leafentries, and added a test (test3884.c). this required exporting brtleaf_split in brt-internal.h

git-svn-id: file:///svn/toku/tokudb@34127 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Leif Walsh 2013-04-16 23:59:47 -04:00 committed by Yoni Fogel
parent bab840d771
commit 1893af748e
3 changed files with 219 additions and 96 deletions

View file

@ -759,6 +759,9 @@ typedef struct brt_status {
void toku_brt_get_status(BRT_STATUS);
void
brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node);
void
brt_leaf_apply_cmd_once (
BASEMENTNODE bn,

View file

@ -1217,6 +1217,7 @@ move_leafentries(
)
//Effect: move leafentries in the range [lbi, upe) from src_omt to newly created dest_omt
{
assert(lbi < ube);
OMTVALUE *MALLOC_N(ube-lbi, new_le);
u_int32_t i = 0;
*num_bytes_moved = 0;
@ -1245,7 +1246,7 @@ move_leafentries(
}
}
static void
void
brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk, BOOL create_new_node)
// Effect: Split a leaf node.
{
@ -1261,122 +1262,133 @@ brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk,
//printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize);
// variables that say where we will do the split. We do it in the basement node indexed at
// variables that say where we will do the split. We do it in the basement node indexed at
// at split_node, and at the index split_at_in_node within that basement node.
int split_node = 0;
int split_at_in_node = 0;
{
{
// TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice
u_int64_t sumlesizes=0;
sumlesizes = brtleaf_disk_size(node);
// TODO: (Zardosht) #3537, figure out serial insertion optimization again later
// split in half
brtleaf_get_split_loc(
node,
sumlesizes,
&split_node,
&split_at_in_node
);
}
// Now we know where we are going to break it
// the two nodes will have a total of n_children+1 basement nodes
// and n_children-1 pivots
// the left node, node, will have split_node+1 basement nodes
// the right node, B, will have n_children-split_node basement nodes
// the pivots of node will be the first split_node pivots that originally exist
// the pivots of B will be the last (n_children - 1 - split_node) pivots that originally exist
{
// TODO: (Zardosht) see if we can/should make this faster, we iterate over the rows twice
u_int64_t sumlesizes=0;
sumlesizes = brtleaf_disk_size(node);
// TODO: (Zardosht) #3537, figure out serial insertion optimization again later
// split in half
brtleaf_get_split_loc(
node,
sumlesizes,
&split_node,
&split_at_in_node
);
}
// did we split right on the boundary between basement nodes?
BOOL split_on_boundary = (split_at_in_node == ((int) toku_omt_size(BLB_BUFFER(node, split_node)) - 1));
// Now we know where we are going to break it
// the two nodes will have a total of n_children+1 basement nodes
// and n_children-1 pivots
// the left node, node, will have split_node+1 basement nodes
// the right node, B, will have n_children-split_node basement nodes
// the pivots of node will be the first split_node pivots that originally exist
// the pivots of B will be the last (n_children - 1 - split_node) pivots that originally exist
//set up the basement nodes in the new node
int num_children_in_node = split_node + 1;
int num_children_in_b = node->n_children - split_node;
if (create_new_node) {
toku_create_new_brtnode(
t,
&B,
0,
num_children_in_b
);
assert(B->nodesize>0);
}
else {
B = *nodeb;
REALLOC_N(num_children_in_b-1, B->childkeys);
REALLOC_N(num_children_in_b, B->bp);
B->n_children = num_children_in_b;
//set up the basement nodes in the new node
int num_children_in_node = split_node + 1;
int num_children_in_b = node->n_children - split_node - (split_on_boundary ? 1 : 0);
if (create_new_node) {
toku_create_new_brtnode(
t,
&B,
0,
num_children_in_b
);
assert(B->nodesize>0);
}
else {
B = *nodeb;
REALLOC_N(num_children_in_b-1, B->childkeys);
REALLOC_N(num_children_in_b, B->bp);
B->n_children = num_children_in_b;
for (int i = 0; i < num_children_in_b; i++) {
BP_STATE(B,i) = PT_AVAIL;
BP_OFFSET(B,i) = 0;
BP_BLOCKNUM(B,i).b = 0;
BP_SUBTREE_EST(B,i)= zero_estimates;
BP_WORKDONE(B,i) = 0;
set_BLB(B, i, toku_create_empty_bn());
BP_WORKDONE(B,i) = 0;
set_BLB(B, i, toku_create_empty_bn());
}
}
//
// first move all the data
//
}
//
// first move all the data
//
// handle the move of a subset of data in split_node from node to B
int curr_src_bn_index = split_node;
int curr_dest_bn_index = 0;
BP_STATE(B,0) = PT_AVAIL;
struct subtree_estimates se_diff = zero_estimates;
u_int32_t diff_size = 0;
destroy_basement_node (BLB(B, 0)); // Destroy B's empty OMT, so I can rebuild it from an array
set_BNULL(B, 0);
set_BLB(B, 0, toku_create_empty_bn_no_buffer());
move_leafentries(
&BLB_BUFFER(B, 0),
BLB_BUFFER(node, split_node),
split_at_in_node+1,
toku_omt_size(BLB_BUFFER(node, split_node)),
&se_diff,
&diff_size
);
BLB_NBYTESINBUF(node, split_node) -= diff_size;
BLB_NBYTESINBUF(B, 0) += diff_size;
subtract_estimates(&BP_SUBTREE_EST(node,split_node), &se_diff);
add_estimates(&BP_SUBTREE_EST(B,0), &se_diff);
// handle the move of a subset of data in split_node from node to B
if (!split_on_boundary) {
BP_STATE(B,curr_dest_bn_index) = PT_AVAIL;
struct subtree_estimates se_diff = zero_estimates;
u_int32_t diff_size = 0;
destroy_basement_node (BLB(B, curr_dest_bn_index)); // Destroy B's empty OMT, so I can rebuild it from an array
set_BNULL(B, curr_dest_bn_index);
set_BLB(B, curr_dest_bn_index, toku_create_empty_bn_no_buffer());
move_leafentries(
&BLB_BUFFER(B, curr_dest_bn_index),
BLB_BUFFER(node, curr_src_bn_index),
split_at_in_node+1,
toku_omt_size(BLB_BUFFER(node, curr_src_bn_index)),
&se_diff,
&diff_size
);
BLB_NBYTESINBUF(node, curr_src_bn_index) -= diff_size;
BLB_NBYTESINBUF(B, curr_dest_bn_index) += diff_size;
subtract_estimates(&BP_SUBTREE_EST(node,curr_src_bn_index), &se_diff);
add_estimates(&BP_SUBTREE_EST(B,curr_dest_bn_index), &se_diff);
curr_dest_bn_index++;
} else {
curr_src_bn_index++;
}
// move the rest of the basement nodes
int curr_dest_bn_index = 1;
for (int i = num_children_in_node; i < node->n_children; i++, curr_dest_bn_index++) {
destroy_basement_node(BLB(B, curr_dest_bn_index));
set_BNULL(B, curr_dest_bn_index);
B->bp[curr_dest_bn_index] = node->bp[i];
}
node->n_children = num_children_in_node;
// move the rest of the basement nodes
for ( ; curr_src_bn_index < node->n_children; curr_src_bn_index++, curr_dest_bn_index++) {
destroy_basement_node(BLB(B, curr_dest_bn_index));
set_BNULL(B, curr_dest_bn_index);
B->bp[curr_dest_bn_index] = node->bp[curr_src_bn_index];
}
node->n_children = num_children_in_node;
//
// now handle the pivots
//
//
// now handle the pivots
//
// make pivots in B
for (int i=0; i < num_children_in_b-1; i++) {
B->childkeys[i] = node->childkeys[i+split_node];
B->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i+split_node]);
node->totalchildkeylens -= toku_brt_pivot_key_len(node->childkeys[i+split_node]);
node->childkeys[i+split_node] = NULL;
}
REALLOC_N(num_children_in_node, node->bp);
REALLOC_N(num_children_in_node-1, node->childkeys);
// the child index in the original node that corresponds to the
// first node in the right node of the split
int base_index = (split_on_boundary ? split_node + 1 : split_node);
// make pivots in B
for (int i=0; i < num_children_in_b-1; i++) {
B->childkeys[i] = node->childkeys[i+base_index];
B->totalchildkeylens += toku_brt_pivot_key_len(node->childkeys[i+base_index]);
node->totalchildkeylens -= toku_brt_pivot_key_len(node->childkeys[i+base_index]);
node->childkeys[i+base_index] = NULL;
}
REALLOC_N(num_children_in_node, node->bp);
REALLOC_N(num_children_in_node-1, node->childkeys);
toku_brt_leaf_reset_calc_leaf_stats(node);
toku_brt_leaf_reset_calc_leaf_stats(B);
toku_brt_leaf_reset_calc_leaf_stats(node);
toku_brt_leaf_reset_calc_leaf_stats(B);
}
if (splitk) {
memset(splitk, 0, sizeof *splitk);
OMTVALUE lev = 0;
int r=toku_omt_fetch(BLB_BUFFER(node, split_node), toku_omt_size(BLB_BUFFER(node, split_node))-1, &lev);
assert_zero(r); // that fetch should have worked.
LEAFENTRY le=lev;
splitk->size = le_keylen(le);
splitk->data = kv_pair_malloc(le_key(le), le_keylen(le), 0, 0);
splitk->flags=0;
memset(splitk, 0, sizeof *splitk);
OMTVALUE lev = 0;
int r=toku_omt_fetch(BLB_BUFFER(node, split_node), toku_omt_size(BLB_BUFFER(node, split_node))-1, &lev);
assert_zero(r); // that fetch should have worked.
LEAFENTRY le=lev;
splitk->size = le_keylen(le);
splitk->data = kv_pair_malloc(le_key(le), le_keylen(le), 0, 0);
splitk->flags=0;
}
node->max_msn_applied_to_node_on_disk= max_msn_applied_to_node;
B ->max_msn_applied_to_node_on_disk = max_msn_applied_to_node;
node->max_msn_applied_to_node_on_disk = max_msn_applied_to_node;
B->max_msn_applied_to_node_on_disk = max_msn_applied_to_node;
node->dirty = 1;
B->dirty = 1;

108
newbrt/tests/test3884.c Normal file
View file

@ -0,0 +1,108 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "$Id: test3856.c 33984 2011-08-17 03:03:54Z leifwalsh $"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
// it used to be the case that we copied the left and right keys of a
// range to be prelocked but never freed them, this test checks that they
// are freed (as of this time, this happens in destroy_bfe_for_prefetch)
#include "test.h"
#include "includes.h"
static TOKUTXN const null_txn = 0;
static DB * const null_db = 0;
static const char fname[]= __FILE__ ".brt";
static int omt_long_cmp(OMTVALUE p, void *q)
{
LEAFENTRY a = p, b = q;
void *ak, *bk;
u_int32_t al, bl;
ak = le_key_and_len(a, &al);
bk = le_key_and_len(b, &bl);
assert(al == sizeof(long) && bl == sizeof(long));
long *ai = (long *) ak;
long *bi = (long *) bk;
return (*ai > *bi) - (*ai < *bi);
}
static LEAFENTRY
le_fastmalloc(char *key, int keylen, char *val, int vallen)
{
LEAFENTRY r = toku_malloc(sizeof(r->type) + sizeof(r->keylen) + sizeof(r->u.clean.vallen) +
keylen + vallen);
resource_assert(r);
r->type = LE_CLEAN;
r->keylen = keylen;
r->u.clean.vallen = vallen;
memcpy(&r->u.clean.key_val[0], key, keylen);
memcpy(&r->u.clean.key_val[keylen], val, vallen);
return r;
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
toku_memory_check = 1;
const int nodesize = 1024, eltsize = 64, bnsize = 256;
const int keylen = sizeof(long), vallen = eltsize - keylen - (sizeof(((LEAFENTRY)NULL)->type) // overhead from LE_CLEAN_MEMSIZE
+sizeof(((LEAFENTRY)NULL)->keylen)
+sizeof(((LEAFENTRY)NULL)->u.clean.vallen));
const int eltsperbn = bnsize / eltsize;
struct brtnode sn;
int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0);
int r;
sn.max_msn_applied_to_node_on_disk.msn = 0;
sn.nodesize = nodesize;
sn.flags = 0x11223344;
sn.thisnodename.b = 20;
sn.layout_version = BRT_LAYOUT_VERSION;
sn.layout_version_original = BRT_LAYOUT_VERSION;
sn.height = 0;
const int nelts = 2 * nodesize / eltsize;
sn.n_children = nelts * eltsize / bnsize;
sn.dirty = 1;
LEAFENTRY elts[nelts];
MALLOC_N(sn.n_children, sn.bp);
MALLOC_N(sn.n_children - 1, sn.childkeys);
sn.totalchildkeylens = 0;
for (int bn = 0; bn < sn.n_children; ++bn) {
BP_SUBTREE_EST(&sn,bn).ndata = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).nkeys = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).dsize = random() + (((long long)random())<<32);
BP_SUBTREE_EST(&sn,bn).exact = (BOOL)(random()%2 != 0);
BP_STATE(&sn,bn) = PT_AVAIL;
set_BLB(&sn, bn, toku_create_empty_bn());
BLB_NBYTESINBUF(&sn,bn) = 0;
BLB_OPTIMIZEDFORUPGRADE(&sn, bn) = BRT_LAYOUT_VERSION;
long k;
for (int i = 0; i < eltsperbn; ++i) {
k = bn * eltsperbn + i;
char val[vallen];
memset(val, k, sizeof val);
elts[k] = le_fastmalloc((char *) &k, keylen, val, vallen);
r = toku_omt_insert(BLB_BUFFER(&sn, bn), elts[k], omt_long_cmp, elts[k], NULL); assert(r == 0);
BLB_NBYTESINBUF(&sn, bn) += OMT_ITEM_OVERHEAD + leafentry_disksize(elts[k]);
}
if (bn < sn.n_children - 1) {
sn.childkeys[bn] = kv_pair_malloc(&k, sizeof k, 0, 0);
sn.totalchildkeylens += (sizeof k);
}
}
CACHETABLE ct;
r = toku_brt_create_cachetable(&ct, 0, ZERO_LSN, NULL_LOGGER); assert(r==0);
BRT brt;
r = toku_open_brt(fname, 1, &brt, nodesize, bnsize, ct, null_txn, toku_builtin_compare_fun, null_db); assert(r==0);
BRTNODE nodea, nodeb;
DBT splitk;
// if we haven't done it right, we should hit the assert in the top of move_leafentries
brtleaf_split(brt, &sn, &nodea, &nodeb, &splitk, TRUE);
return 0;
}