2007-07-13 19:37:47 +00:00
/* -*- mode: C; c-basic-offset: 4 -*- */
/* Buffered repository tree.
* Observation : The in - memory representation of a node doesn ' t have to be the same as the on - disk representation .
* Goal for the in - memory representation : fast
* Goal for on - disk : small
*
* So to get this running fast , I ' ll make a version that doesn ' t do range queries :
* use a hash table for in - memory
* simply write the strings on disk .
* Later I ' ll do a PMA or a skiplist for the in - memory version .
* Also , later I ' ll convert the format to network order fromn host order .
* Later , for on disk , I ' ll compress it ( perhaps with gzip , perhaps with the bzip2 algorithm . )
*
* The collection of nodes forms a data structure like a B - tree . The complexities of keeping it balanced apply .
*
* We always write nodes to a new location on disk .
* The nodes themselves contain the information about the tree structure .
* Q : During recovery , how do we find the root node without looking at every block on disk ?
* A : The root node is either the designated root near the front of the freelist .
* The freelist is updated infrequently . Before updating the stable copy of the freelist , we make sure that
* the root is up - to - date . We can make the freelist - and - root update be an arbitrarily small fraction of disk bandwidth .
*
*/
# include "brttypes.h"
# include "brt.h"
# include "memory.h"
# include "brt-internal.h"
# include "cachetable.h"
# include <stdlib.h>
# include <assert.h>
# include <string.h>
# include <unistd.h>
# include <stdio.h>
# include <errno.h>
2007-10-03 19:34:31 +00:00
const BRTNODE null_brtnode = 0 ;
2007-07-13 19:37:47 +00:00
extern long long n_items_malloced ;
/* Frees a node, including all the stuff in the hash table. */
2007-08-01 02:37:21 +00:00
void brtnode_free ( BRTNODE * nodep ) {
BRTNODE node = * nodep ;
2007-07-13 19:37:47 +00:00
int i ;
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, node, node->mdicts[0]);
if ( node - > height > 0 ) {
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
2007-07-20 18:00:14 +00:00
toku_free ( ( void * ) node - > u . n . childkeys [ i ] ) ;
2007-07-13 19:37:47 +00:00
}
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
if ( node - > u . n . htables [ i ] ) {
2007-07-24 15:08:05 +00:00
toku_hashtable_free ( & node - > u . n . htables [ i ] ) ;
2007-07-13 19:37:47 +00:00
}
2007-08-23 18:07:18 +00:00
assert ( node - > u . n . n_cursors [ i ] = = 0 ) ;
2007-07-13 19:37:47 +00:00
}
} else {
if ( node - > u . l . buffer ) // The buffer may have been freed already, in some cases.
pma_free ( & node - > u . l . buffer ) ;
}
2007-07-20 18:00:14 +00:00
toku_free ( node ) ;
2007-08-01 02:37:21 +00:00
* nodep = 0 ;
2007-07-13 19:37:47 +00:00
}
2007-09-21 17:55:49 +00:00
long brtnode_size ( BRTNODE node ) {
long size ;
2007-10-10 19:33:31 +00:00
assert ( node - > tag = = TYP_BRTNODE ) ;
2007-09-21 17:55:49 +00:00
if ( node - > height > 0 )
size = node - > u . n . n_bytes_in_hashtables ;
else
size = node - > u . l . n_bytes_in_buffer ;
return size ;
}
2007-10-03 19:34:31 +00:00
void fix_up_parent_pointers_of_children ( BRT t , BRTNODE node ) {
int i ;
assert ( node - > height > 0 ) ;
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
void * v ;
int r = cachetable_maybe_get_and_pin ( t - > cf , node - > u . n . children [ i ] , & v ) ;
if ( r = = 0 ) {
BRTNODE child = v ;
//printf("%s:%d pin %p\n", __FILE__, __LINE__, v);
child - > parent_brtnode = node ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , node - > u . n . children [ i ] , child - > dirty , brtnode_size ( child ) ) ;
2007-10-03 19:34:31 +00:00
}
}
}
void fix_up_parent_pointers_of_children_now_that_parent_is_gone ( CACHEFILE cf , BRTNODE node ) {
int i ;
if ( node - > height = = 0 ) return ;
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
void * v ;
int r = cachetable_maybe_get_and_pin ( cf , node - > u . n . children [ i ] , & v ) ;
if ( r = = 0 ) {
BRTNODE child = v ;
//printf("%s:%d pin %p\n", __FILE__, __LINE__, v);
child - > parent_brtnode = 0 ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( cf , node - > u . n . children [ i ] , child - > dirty , brtnode_size ( child ) ) ;
2007-10-03 19:34:31 +00:00
}
}
}
2007-09-21 17:55:49 +00:00
void brtnode_flush_callback ( CACHEFILE cachefile , diskoff nodename , void * brtnode_v , long size __attribute ( ( unused ) ) , int write_me , int keep_me ) {
2007-07-13 19:37:47 +00:00
BRTNODE brtnode = brtnode_v ;
if ( 0 ) {
printf ( " %s:%d brtnode_flush_callback %p keep_me=%d height=%d " , __FILE__ , __LINE__ , brtnode , keep_me , brtnode - > height ) ;
if ( brtnode - > height = = 0 ) printf ( " pma=%p " , brtnode - > u . l . buffer ) ;
printf ( " \n " ) ;
}
2007-10-03 19:34:31 +00:00
fix_up_parent_pointers_of_children_now_that_parent_is_gone ( cachefile , brtnode ) ;
2007-07-13 19:37:47 +00:00
assert ( brtnode - > thisnodename = = nodename ) ;
2007-10-03 19:34:31 +00:00
{
BRTNODE parent = brtnode - > parent_brtnode ;
//printf("%s:%d Looking at %p (offset=%lld) tag=%d parent=%p height=%d\n", __FILE__, __LINE__, brtnode, nodename, brtnode->tag, parent, brtnode->height);
if ( parent ! = 0 ) {
/* make sure we are one of the children of the parent. */
int i ;
//int pheight=0;//parent->height;
//int nc = 0;//parent->u.n.n_children;
//printf("%s:%d parent height=%d has %d children: The first few are", __FILE__, __LINE__, pheight, nc);
assert ( parent - > u . n . n_children < = TREE_FANOUT + 1 ) ;
for ( i = 0 ; i < parent - > u . n . n_children ; i + + ) {
//printf(" %lld\n", parent->u.n.children[i]);
if ( parent - > u . n . children [ i ] = = nodename ) goto ok ;
}
printf ( " %s:%d Whoops, the parent of %p (%p) isn't right \n " , __FILE__ , __LINE__ , brtnode , parent ) ;
assert ( 0 ) ;
ok : ;
//printf("\n");
}
}
2007-07-13 19:37:47 +00:00
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]);
if ( write_me ) {
2007-09-28 17:11:22 +00:00
serialize_brtnode_to ( cachefile_fd ( cachefile ) , brtnode - > thisnodename , brtnode - > nodesize , brtnode ) ;
2007-07-13 19:37:47 +00:00
}
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]);
if ( ! keep_me ) {
2007-08-01 02:37:21 +00:00
brtnode_free ( & brtnode ) ;
2007-07-13 19:37:47 +00:00
}
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
}
2007-09-21 17:55:49 +00:00
int brtnode_fetch_callback ( CACHEFILE cachefile , diskoff nodename , void * * brtnode_pv , long * sizep __attribute__ ( ( unused ) ) , void * extraargs ) {
2007-07-13 19:37:47 +00:00
long nodesize = ( long ) extraargs ;
BRTNODE * result = ( BRTNODE * ) brtnode_pv ;
2007-09-21 17:55:49 +00:00
int r = deserialize_brtnode_from ( cachefile_fd ( cachefile ) , nodename , result , nodesize ) ;
if ( r = = 0 )
* sizep = brtnode_size ( * result ) ;
2007-10-03 19:34:31 +00:00
//(*result)->parent_brtnode = 0; /* Don't know it right now. */
//printf("%s:%d installed %p (offset=%lld)\n", __FILE__, __LINE__, *result, nodename);
2007-09-21 17:55:49 +00:00
return r ;
2007-07-13 19:37:47 +00:00
}
2007-09-21 17:55:49 +00:00
void brtheader_flush_callback ( CACHEFILE cachefile , diskoff nodename , void * header_v , long size __attribute ( ( unused ) ) , int write_me , int keep_me ) {
2007-07-13 19:37:47 +00:00
struct brt_header * h = header_v ;
assert ( nodename = = 0 ) ;
assert ( ! h - > dirty ) ; // shouldn't be dirty once it is unpinned.
if ( write_me ) {
serialize_brt_header_to ( cachefile_fd ( cachefile ) , h ) ;
}
if ( ! keep_me ) {
if ( h - > n_named_roots > 0 ) {
int i ;
for ( i = 0 ; i < h - > n_named_roots ; i + + ) {
2007-07-20 18:00:14 +00:00
toku_free ( h - > names [ i ] ) ;
2007-07-13 19:37:47 +00:00
}
2007-07-20 18:00:14 +00:00
toku_free ( h - > names ) ;
toku_free ( h - > roots ) ;
2007-07-13 19:37:47 +00:00
}
2007-07-20 18:00:14 +00:00
toku_free ( h ) ;
2007-07-13 19:37:47 +00:00
}
}
2007-09-21 17:55:49 +00:00
int brtheader_fetch_callback ( CACHEFILE cachefile , diskoff nodename , void * * headerp_v , long * sizep __attribute__ ( ( unused ) ) , void * extraargs __attribute__ ( ( __unused__ ) ) ) {
2007-07-13 19:37:47 +00:00
struct brt_header * * h = ( struct brt_header * * ) headerp_v ;
assert ( nodename = = 0 ) ;
2007-09-21 17:55:49 +00:00
int r = deserialize_brtheader_from ( cachefile_fd ( cachefile ) , nodename , h ) ;
return r ;
2007-07-13 19:37:47 +00:00
}
int read_and_pin_brt_header ( CACHEFILE cf , struct brt_header * * header ) {
void * header_p ;
2007-07-20 14:20:58 +00:00
//fprintf(stderr, "%s:%d read_and_pin_brt_header(...)\n", __FILE__, __LINE__);
2007-10-19 14:07:41 +00:00
int r = cachetable_get_and_pin ( cf , 0 , & header_p , NULL ,
2007-07-13 19:37:47 +00:00
brtheader_flush_callback , brtheader_fetch_callback , 0 ) ;
if ( r ! = 0 ) return r ;
* header = header_p ;
return 0 ;
}
int unpin_brt_header ( BRT brt ) {
2007-10-19 14:07:41 +00:00
int r = cachetable_unpin ( brt - > cf , 0 , brt - > h - > dirty , 0 ) ;
2007-07-13 19:37:47 +00:00
brt - > h - > dirty = 0 ;
brt - > h = 0 ;
return r ;
}
typedef struct kvpair {
bytevec key ;
unsigned int keylen ;
bytevec val ;
unsigned int vallen ;
} * KVPAIR ;
int kvpair_compare ( const void * av , const void * bv ) {
const KVPAIR a = ( const KVPAIR ) av ;
const KVPAIR b = ( const KVPAIR ) bv ;
int r = keycompare ( a - > key , a - > keylen , b - > key , b - > keylen ) ;
//printf("keycompare(%s,\n %s)-->%d\n", a->key, b->key, r);
return r ;
}
/* Forgot to handle the case where there is something in the freelist. */
diskoff malloc_diskblock_header_is_in_memory ( BRT brt , int size ) {
diskoff result = brt - > h - > unused_memory ;
brt - > h - > unused_memory + = size ;
return result ;
}
diskoff malloc_diskblock ( BRT brt , int size ) {
#if 0
int r = read_and_pin_brt_header ( brt - > fd , & brt - > h ) ;
assert ( r = = 0 ) ;
{
diskoff result = malloc_diskblock_header_is_in_memory ( brt , size ) ;
r = write_brt_header ( brt - > fd , & brt - > h ) ;
assert ( r = = 0 ) ;
return result ;
}
# else
return malloc_diskblock_header_is_in_memory ( brt , size ) ;
# endif
}
static void initialize_brtnode ( BRT t , BRTNODE n , diskoff nodename , int height ) {
int i ;
n - > tag = TYP_BRTNODE ;
n - > nodesize = t - > h - > nodesize ;
n - > thisnodename = nodename ;
n - > height = height ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( n ) ;
2007-07-13 19:37:47 +00:00
assert ( height > = 0 ) ;
if ( height > 0 ) {
n - > u . n . n_children = 0 ;
for ( i = 0 ; i < TREE_FANOUT ; i + + ) {
n - > u . n . childkeys [ i ] = 0 ;
n - > u . n . childkeylens [ i ] = 0 ;
}
n - > u . n . totalchildkeylens = 0 ;
for ( i = 0 ; i < TREE_FANOUT + 1 ; i + + ) {
n - > u . n . children [ i ] = 0 ;
n - > u . n . htables [ i ] = 0 ;
n - > u . n . n_bytes_in_hashtable [ i ] = 0 ;
2007-08-23 18:07:18 +00:00
n - > u . n . n_cursors [ i ] = 0 ;
2007-07-13 19:37:47 +00:00
}
n - > u . n . n_bytes_in_hashtables = 0 ;
} else {
2007-10-10 19:33:31 +00:00
int r = pma_create ( & n - > u . l . buffer , t - > compare_fun , n - > nodesize ) ;
2007-07-13 19:37:47 +00:00
static int rcount = 0 ;
assert ( r = = 0 ) ;
//printf("%s:%d n PMA= %p (rcount=%d)\n", __FILE__, __LINE__, n->u.l.buffer, rcount);
rcount + + ;
n - > u . l . n_bytes_in_buffer = 0 ;
}
}
2007-10-03 19:34:31 +00:00
static void create_new_brtnode ( BRT t , BRTNODE * result , int height , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
TAGMALLOC ( BRTNODE , n ) ;
int r ;
diskoff name = malloc_diskblock ( t , t - > h - > nodesize ) ;
assert ( n ) ;
assert ( t - > h - > nodesize > 0 ) ;
//printf("%s:%d malloced %lld (and malloc again=%lld)\n", __FILE__, __LINE__, name, malloc_diskblock(t, t->nodesize));
initialize_brtnode ( t , n , name , height ) ;
* result = n ;
assert ( n - > nodesize > 0 ) ;
2007-10-03 19:34:31 +00:00
n - > parent_brtnode = parent_brtnode ;
//printf("%s:%d putting %p (%lld) parent=%p\n", __FILE__, __LINE__, n, n->thisnodename, parent_brtnode);
2007-10-19 14:07:41 +00:00
r = cachetable_put ( t - > cf , n - > thisnodename , n , brtnode_size ( n ) ,
2007-10-03 19:34:31 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) t - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
}
void delete_node ( BRT t , BRTNODE node ) {
int i ;
assert ( node - > height > = 0 ) ;
if ( node - > height = = 0 ) {
if ( node - > u . l . buffer ) {
pma_free ( & node - > u . l . buffer ) ;
}
node - > u . l . n_bytes_in_buffer = 0 ;
} else {
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
if ( node - > u . n . htables [ i ] ) {
2007-07-24 15:08:05 +00:00
toku_hashtable_free ( & node - > u . n . htables [ i ] ) ;
2007-07-13 19:37:47 +00:00
}
node - > u . n . n_bytes_in_hashtable [ 0 ] = 0 ;
2007-08-23 18:07:18 +00:00
assert ( node - > u . n . n_cursors [ i ] = = 0 ) ;
2007-07-13 19:37:47 +00:00
}
node - > u . n . n_bytes_in_hashtables = 0 ;
node - > u . n . totalchildkeylens = 0 ;
node - > u . n . n_children = 0 ;
node - > height = 0 ;
node - > u . l . buffer = 0 ; /* It's a leaf now (height==0) so set the buffer to NULL. */
}
cachetable_remove ( t - > cf , node - > thisnodename , 0 ) ; /* Don't write it back to disk. */
}
2007-08-09 18:54:58 +00:00
# define USE_PMA_SPLIT 1
# if ! USE_PMA_SPLIT
2007-07-24 01:32:03 +00:00
static void insert_to_buffer_in_leaf ( BRTNODE node , DBT * k , DBT * v , DB * db ) {
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + k - > size + v - > size ;
int r = pma_insert ( node - > u . l . buffer , k , v , db ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
node - > u . l . n_bytes_in_buffer + = n_bytes_added ;
}
2007-08-09 18:54:58 +00:00
# endif
2007-07-13 19:37:47 +00:00
2007-09-06 21:36:45 +00:00
static int insert_to_hash_in_nonleaf ( BRTNODE node , int childnum , DBT * k , DBT * v , int type ) {
unsigned int n_bytes_added = BRT_CMD_OVERHEAD + KEY_VALUE_OVERHEAD + k - > size + v - > size ;
int r = toku_hash_insert ( node - > u . n . htables [ childnum ] , k - > data , k - > size , v - > data , v - > size , type ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
node - > u . n . n_bytes_in_hashtable [ childnum ] + = n_bytes_added ;
node - > u . n . n_bytes_in_hashtables + = n_bytes_added ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
2007-07-24 11:13:42 +00:00
int brtleaf_split ( BRT t , BRTNODE node , BRTNODE * nodea , BRTNODE * nodeb , DBT * splitk , void * app_private , DB * db ) {
2007-07-13 19:37:47 +00:00
BRTNODE A , B ;
assert ( node - > height = = 0 ) ;
assert ( t - > h - > nodesize > = node - > nodesize ) ; /* otherwise we might be in trouble because the nodesize shrank. */
2007-10-03 19:34:31 +00:00
create_new_brtnode ( t , & A , 0 , node - > parent_brtnode ) ;
create_new_brtnode ( t , & B , 0 , node - > parent_brtnode ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d A PMA= %p\n", __FILE__, __LINE__, A->u.l.buffer);
//printf("%s:%d B PMA= %p\n", __FILE__, __LINE__, A->u.l.buffer);
assert ( A - > nodesize > 0 ) ;
assert ( B - > nodesize > 0 ) ;
assert ( node - > nodesize > 0 ) ;
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
//printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize);
assert ( node - > height > 0 | | node - > u . l . buffer ! = 0 ) ;
2007-08-09 18:54:58 +00:00
# if USE_PMA_SPLIT
{
int r ;
r = pma_split ( node - > u . l . buffer , & node - > u . l . n_bytes_in_buffer ,
A - > u . l . buffer , & A - > u . l . n_bytes_in_buffer ,
B - > u . l . buffer , & B - > u . l . n_bytes_in_buffer ) ;
assert ( r = = 0 ) ;
r = pma_get_last ( A - > u . l . buffer , splitk , 0 ) ;
assert ( r = = 0 ) ;
/* unused */
app_private = app_private ;
db = db ;
}
# else
{
int did_split = 0 ;
2007-07-13 19:37:47 +00:00
PMA_ITERATE ( node - > u . l . buffer , key , keylen , val , vallen ,
( {
2007-07-24 01:32:03 +00:00
DBT k , v ;
2007-07-13 19:37:47 +00:00
if ( ! did_split ) {
2007-07-24 11:13:42 +00:00
insert_to_buffer_in_leaf ( A , fill_dbt_ap ( & k , key , keylen , app_private ) , fill_dbt ( & v , val , vallen ) , db ) ;
2007-07-13 19:37:47 +00:00
if ( A - > u . l . n_bytes_in_buffer * 2 > = node - > u . l . n_bytes_in_buffer ) {
2007-07-24 01:32:03 +00:00
fill_dbt ( splitk , memdup ( key , keylen ) , keylen ) ;
2007-07-13 19:37:47 +00:00
did_split = 1 ;
}
} else {
2007-07-24 11:13:42 +00:00
insert_to_buffer_in_leaf ( B , fill_dbt_ap ( & k , key , keylen , app_private ) , fill_dbt ( & v , val , vallen ) , db ) ;
2007-07-13 19:37:47 +00:00
}
} ) ) ;
2007-08-09 18:54:58 +00:00
assert ( did_split = = 1 ) ;
}
# endif
2007-07-13 19:37:47 +00:00
assert ( node - > height > 0 | | node - > u . l . buffer ! = 0 ) ;
/* Remove it from the cache table, and free its storage. */
//printf("%s:%d old pma = %p\n", __FILE__, __LINE__, node->u.l.buffer);
2007-08-23 18:07:18 +00:00
brt_update_cursors_leaf_split ( t , node , A , B ) ;
2007-07-13 19:37:47 +00:00
delete_node ( t , node ) ;
* nodea = A ;
* nodeb = B ;
assert ( serialize_brtnode_size ( A ) < A - > nodesize ) ;
assert ( serialize_brtnode_size ( B ) < B - > nodesize ) ;
return 0 ;
}
2007-07-24 01:32:03 +00:00
/* Side effect: sets splitk->data pointer to a malloc'd value */
void brt_nonleaf_split ( BRT t , BRTNODE node , BRTNODE * nodea , BRTNODE * nodeb , DBT * splitk ) {
2007-07-13 19:37:47 +00:00
int n_children_in_a = node - > u . n . n_children / 2 ;
BRTNODE A , B ;
assert ( node - > height > 0 ) ;
assert ( node - > u . n . n_children > = 2 ) ; // Otherwise, how do we split? We need at least two children to split. */
assert ( t - > h - > nodesize > = node - > nodesize ) ; /* otherwise we might be in trouble because the nodesize shrank. */
2007-10-03 19:34:31 +00:00
create_new_brtnode ( t , & A , node - > height , node - > parent_brtnode ) ;
create_new_brtnode ( t , & B , node - > height , node - > parent_brtnode ) ;
2007-07-13 19:37:47 +00:00
A - > u . n . n_children = n_children_in_a ;
B - > u . n . n_children = node - > u . n . n_children - n_children_in_a ;
2007-10-03 19:34:31 +00:00
//printf("%s:%d %p (%lld) becomes %p and %p\n", __FILE__, __LINE__, node, node->thisnodename, A, B);
2007-07-13 19:37:47 +00:00
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
{
/* The first n_children_in_a go into node a.
* That means that the first n_children_in_a - 1 keys go into node a .
* The splitter key is key number n_children_in_a */
int i ;
for ( i = 0 ; i < n_children_in_a ; i + + ) {
A - > u . n . children [ i ] = node - > u . n . children [ i ] ;
A - > u . n . htables [ i ] = node - > u . n . htables [ i ] ;
A - > u . n . n_bytes_in_hashtables + = ( A - > u . n . n_bytes_in_hashtable [ i ] = node - > u . n . n_bytes_in_hashtable [ i ] ) ;
node - > u . n . htables [ i ] = 0 ;
node - > u . n . n_bytes_in_hashtables - = node - > u . n . n_bytes_in_hashtable [ i ] ;
node - > u . n . n_bytes_in_hashtable [ i ] = 0 ;
}
for ( i = n_children_in_a ; i < node - > u . n . n_children ; i + + ) {
int targchild = i - n_children_in_a ;
B - > u . n . children [ targchild ] = node - > u . n . children [ i ] ;
B - > u . n . htables [ targchild ] = node - > u . n . htables [ i ] ;
B - > u . n . n_bytes_in_hashtables + = ( B - > u . n . n_bytes_in_hashtable [ targchild ] = node - > u . n . n_bytes_in_hashtable [ i ] ) ;
node - > u . n . htables [ i ] = 0 ;
node - > u . n . n_bytes_in_hashtables - = node - > u . n . n_bytes_in_hashtable [ i ] ;
node - > u . n . n_bytes_in_hashtable [ i ] = 0 ;
}
for ( i = 0 ; i < n_children_in_a - 1 ; i + + ) {
A - > u . n . childkeys [ i ] = node - > u . n . childkeys [ i ] ;
A - > u . n . childkeylens [ i ] = node - > u . n . childkeylens [ i ] ;
A - > u . n . totalchildkeylens + = node - > u . n . childkeylens [ i ] ;
node - > u . n . totalchildkeylens - = node - > u . n . childkeylens [ i ] ;
node - > u . n . childkeys [ i ] = 0 ;
node - > u . n . childkeylens [ i ] = 0 ;
}
2007-07-24 01:32:03 +00:00
splitk - > data = ( void * ) ( node - > u . n . childkeys [ n_children_in_a - 1 ] ) ;
splitk - > size = node - > u . n . childkeylens [ n_children_in_a - 1 ] ;
2007-07-13 19:37:47 +00:00
node - > u . n . totalchildkeylens - = node - > u . n . childkeylens [ n_children_in_a - 1 ] ;
node - > u . n . childkeys [ n_children_in_a - 1 ] = 0 ;
node - > u . n . childkeylens [ n_children_in_a - 1 ] = 0 ;
for ( i = n_children_in_a ; i < node - > u . n . n_children - 1 ; i + + ) {
B - > u . n . childkeys [ i - n_children_in_a ] = node - > u . n . childkeys [ i ] ;
B - > u . n . childkeylens [ i - n_children_in_a ] = node - > u . n . childkeylens [ i ] ;
B - > u . n . totalchildkeylens + = node - > u . n . childkeylens [ i ] ;
node - > u . n . totalchildkeylens - = node - > u . n . childkeylens [ i ] ;
node - > u . n . childkeys [ i ] = 0 ;
node - > u . n . childkeylens [ i ] = 0 ;
}
assert ( node - > u . n . totalchildkeylens = = 0 ) ;
2007-10-03 19:34:31 +00:00
fix_up_parent_pointers_of_children ( t , A ) ;
fix_up_parent_pointers_of_children ( t , B ) ;
2007-07-13 19:37:47 +00:00
}
{
int i ;
for ( i = 0 ; i < TREE_FANOUT + 1 ; i + + ) {
assert ( node - > u . n . htables [ i ] = = 0 ) ;
assert ( node - > u . n . n_bytes_in_hashtable [ i ] = = 0 ) ;
}
assert ( node - > u . n . n_bytes_in_hashtables = = 0 ) ;
}
/* The buffer is all divied up between them, since just moved the hashtables over. */
* nodea = A ;
* nodeb = B ;
/* Remove it from the cache table, and free its storage. */
//printf("%s:%d removing %lld\n", __FILE__, __LINE__, node->thisnodename);
2007-08-23 18:07:18 +00:00
brt_update_cursors_nonleaf_split ( t , node , A , B ) ;
2007-07-13 19:37:47 +00:00
delete_node ( t , node ) ;
assert ( serialize_brtnode_size ( A ) < A - > nodesize ) ;
assert ( serialize_brtnode_size ( B ) < B - > nodesize ) ;
}
void find_heaviest_child ( BRTNODE node , int * childnum ) {
int max_child = 0 ;
int max_weight = node - > u . n . n_bytes_in_hashtable [ 0 ] ;
int i ;
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " %s:%d weights: %d " , __FILE__ , __LINE__ , max_weight ) ;
2007-07-13 19:37:47 +00:00
assert ( node - > u . n . n_children > 0 ) ;
for ( i = 1 ; i < node - > u . n . n_children ; i + + ) {
int this_weight = node - > u . n . n_bytes_in_hashtable [ i ] ;
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " %d " , this_weight ) ;
2007-07-13 19:37:47 +00:00
if ( max_weight < this_weight ) {
max_child = i ;
max_weight = this_weight ;
}
}
* childnum = max_child ;
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " \n " ) ;
2007-07-13 19:37:47 +00:00
}
2007-09-06 21:36:45 +00:00
static int brtnode_put_cmd ( BRT t , BRTNODE node , BRT_CMD * cmd ,
2007-09-28 17:11:22 +00:00
int * did_split , BRTNODE * nodea , BRTNODE * nodeb ,
DBT * split ,
int debug ,
TOKUTXN txn ) ;
2007-07-13 19:37:47 +00:00
/* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */
2007-09-06 21:36:45 +00:00
static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here ( BRT t , BRTNODE node , BRTNODE child ,
2007-09-28 17:11:22 +00:00
BRT_CMD * cmd ,
int childnum_of_node ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
assert ( node - > height > 0 ) ; /* Not a leaf. */
2007-09-06 21:36:45 +00:00
DBT * k = cmd - > u . id . key ;
DBT * v = cmd - > u . id . val ;
2007-07-24 01:32:03 +00:00
int to_child = serialize_brtnode_size ( child ) + k - > size + v - > size + KEY_VALUE_OVERHEAD < = child - > nodesize ;
2007-07-13 19:37:47 +00:00
if ( brt_debug_mode ) {
2007-07-24 01:32:03 +00:00
printf ( " %s:%d pushing %s to %s %d " , __FILE__ , __LINE__ , ( char * ) k - > data , to_child ? " child " : " hash " , childnum_of_node ) ;
2007-07-13 19:37:47 +00:00
if ( childnum_of_node + 1 < node - > u . n . n_children ) {
2007-07-24 01:32:03 +00:00
DBT k2 ;
2007-07-13 19:37:47 +00:00
printf ( " nextsplitkey=%s \n " , ( char * ) node - > u . n . childkeys [ childnum_of_node ] ) ;
2007-09-06 21:36:45 +00:00
assert ( t - > compare_fun ( cmd - > u . id . db , k , fill_dbt ( & k2 , node - > u . n . childkeys [ childnum_of_node ] , node - > u . n . childkeylens [ childnum_of_node ] ) ) < = 0 ) ;
2007-07-13 19:37:47 +00:00
} else {
printf ( " \n " ) ;
}
}
if ( to_child ) {
2007-07-24 01:32:03 +00:00
int again_split = - 1 ; BRTNODE againa , againb ;
DBT againk ;
init_dbt ( & againk ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d hello!\n", __FILE__, __LINE__);
2007-09-06 21:36:45 +00:00
int r = brtnode_put_cmd ( t , child , cmd ,
2007-09-28 17:11:22 +00:00
& again_split , & againa , & againb , & againk ,
0 ,
txn ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
assert ( again_split = = 0 ) ; /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
return r ;
} else {
2007-09-06 21:36:45 +00:00
int r = insert_to_hash_in_nonleaf ( node , childnum_of_node , k , v , cmd - > type ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
}
2007-09-06 21:36:45 +00:00
static int push_a_brt_cmd_down ( BRT t , BRTNODE node , BRTNODE child , int childnum ,
2007-09-28 17:11:22 +00:00
BRT_CMD * cmd ,
int * child_did_split , BRTNODE * childa , BRTNODE * childb ,
DBT * childsplitk ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
//if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, "");
//printf("%s:%d hello!\n", __FILE__, __LINE__);
assert ( node - > height > 0 ) ;
{
2007-09-06 21:36:45 +00:00
int r = brtnode_put_cmd ( t , child , cmd ,
2007-09-28 17:11:22 +00:00
child_did_split , childa , childb , childsplitk ,
0 ,
txn ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
}
2007-07-24 02:36:00 +00:00
2007-09-06 21:36:45 +00:00
DBT * k = cmd - > u . id . key ;
DBT * v = cmd - > u . id . val ;
2007-07-13 19:37:47 +00:00
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
{
2007-07-24 15:08:05 +00:00
int r = toku_hash_delete ( node - > u . n . htables [ childnum ] , k - > data , k - > size ) ; // Must delete after doing the insert, to avoid operating on freed' key
2007-07-24 02:36:00 +00:00
//printf("%s:%d deleted status=%d\n", __FILE__, __LINE__, r);
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
}
{
2007-09-06 21:36:45 +00:00
int n_bytes_removed = ( k - > size + v - > size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ) ;
2007-07-13 19:37:47 +00:00
node - > u . n . n_bytes_in_hashtables - = n_bytes_removed ;
node - > u . n . n_bytes_in_hashtable [ childnum ] - = n_bytes_removed ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-07-13 19:37:47 +00:00
}
2007-07-24 02:36:00 +00:00
2007-07-13 19:37:47 +00:00
return 0 ;
}
int split_count = 0 ;
/* NODE is a node with a child.
* childnum was split into two nodes childa , and childb .
* We must slide things around , & move things from the old table to the new tables .
* We also move things to the new children as much as we an without doing any pushdowns or splitting of the child .
* We must delete the old hashtable ( but the old child is already deleted . )
* We also unpin the new children .
*/
static int handle_split_of_child ( BRT t , BRTNODE node , int childnum ,
2007-07-24 01:32:03 +00:00
BRTNODE childa , BRTNODE childb ,
DBT * childsplitk , /* the data in the childsplitk is alloc'd and is consumed by this call. */
int * did_split , BRTNODE * nodea , BRTNODE * nodeb ,
DBT * splitk ,
2007-07-24 11:13:42 +00:00
void * app_private ,
2007-09-28 17:11:22 +00:00
DB * db ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
assert ( node - > height > 0 ) ;
2007-08-23 18:07:18 +00:00
assert ( 0 < = childnum & & childnum < node - > u . n . n_children ) ;
2007-07-13 19:37:47 +00:00
HASHTABLE old_h = node - > u . n . htables [ childnum ] ;
int old_count = node - > u . n . n_bytes_in_hashtable [ childnum ] ;
int cnum ;
int r ;
assert ( node - > u . n . n_children < = TREE_FANOUT ) ;
if ( brt_debug_mode ) {
int i ;
2007-07-24 01:32:03 +00:00
printf ( " %s:%d Child %d did split on %s \n " , __FILE__ , __LINE__ , childnum , ( char * ) childsplitk - > data ) ;
2007-07-13 19:37:47 +00:00
printf ( " %s:%d oldsplitkeys: " , __FILE__ , __LINE__ ) ;
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) printf ( " %s " , ( char * ) node - > u . n . childkeys [ i ] ) ;
printf ( " \n " ) ;
}
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-07-13 19:37:47 +00:00
// Slide the children over.
for ( cnum = node - > u . n . n_children ; cnum > childnum + 1 ; cnum - - ) {
node - > u . n . children [ cnum ] = node - > u . n . children [ cnum - 1 ] ;
node - > u . n . htables [ cnum ] = node - > u . n . htables [ cnum - 1 ] ;
node - > u . n . n_bytes_in_hashtable [ cnum ] = node - > u . n . n_bytes_in_hashtable [ cnum - 1 ] ;
2007-08-23 18:07:18 +00:00
node - > u . n . n_cursors [ cnum ] = node - > u . n . n_cursors [ cnum - 1 ] ;
2007-07-13 19:37:47 +00:00
}
node - > u . n . children [ childnum ] = childa - > thisnodename ;
node - > u . n . children [ childnum + 1 ] = childb - > thisnodename ;
2007-07-24 15:08:05 +00:00
toku_hashtable_create ( & node - > u . n . htables [ childnum ] ) ;
toku_hashtable_create ( & node - > u . n . htables [ childnum + 1 ] ) ;
2007-07-13 19:37:47 +00:00
node - > u . n . n_bytes_in_hashtable [ childnum ] = 0 ;
node - > u . n . n_bytes_in_hashtable [ childnum + 1 ] = 0 ;
// Slide the keys over
for ( cnum = node - > u . n . n_children - 1 ; cnum > childnum ; cnum - - ) {
node - > u . n . childkeys [ cnum ] = node - > u . n . childkeys [ cnum - 1 ] ;
node - > u . n . childkeylens [ cnum ] = node - > u . n . childkeylens [ cnum - 1 ] ;
}
2007-07-24 01:32:03 +00:00
node - > u . n . childkeys [ childnum ] = ( char * ) childsplitk - > data ;
node - > u . n . childkeylens [ childnum ] = childsplitk - > size ;
node - > u . n . totalchildkeylens + = childsplitk - > size ;
2007-07-13 19:37:47 +00:00
node - > u . n . n_children + + ;
2007-08-23 18:07:18 +00:00
brt_update_cursors_nonleaf_expand ( t , node , childnum , childa , childb ) ;
2007-07-13 19:37:47 +00:00
if ( brt_debug_mode ) {
int i ;
printf ( " %s:%d splitkeys: " , __FILE__ , __LINE__ ) ;
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) printf ( " %s " , ( char * ) node - > u . n . childkeys [ i ] ) ;
printf ( " \n " ) ;
}
node - > u . n . n_bytes_in_hashtables - = old_count ; /* By default, they are all removed. We might add them back in. */
/* Keep pushing to the children, but not if the children would require a pushdown */
2007-09-06 21:36:45 +00:00
HASHTABLE_ITERATE ( old_h , skey , skeylen , sval , svallen , type , ( {
2007-07-24 01:32:03 +00:00
DBT skd , svd ;
2007-07-24 11:13:42 +00:00
fill_dbt_ap ( & skd , skey , skeylen , app_private ) ;
2007-07-24 01:32:03 +00:00
fill_dbt ( & svd , sval , svallen ) ;
2007-09-06 21:36:45 +00:00
BRT_CMD brtcmd ;
brtcmd . type = type ; brtcmd . u . id . key = & skd ; brtcmd . u . id . val = & svd ; brtcmd . u . id . db = db ;
2007-07-24 01:32:03 +00:00
if ( t - > compare_fun ( db , & skd , childsplitk ) < = 0 ) {
2007-09-28 17:11:22 +00:00
r = push_brt_cmd_down_only_if_it_wont_push_more_else_put_here ( t , node , childa , & brtcmd , childnum , txn ) ;
2007-07-13 19:37:47 +00:00
} else {
2007-09-28 17:11:22 +00:00
r = push_brt_cmd_down_only_if_it_wont_push_more_else_put_here ( t , node , childb , & brtcmd , childnum + 1 , txn ) ;
2007-07-13 19:37:47 +00:00
}
if ( r ! = 0 ) return r ;
} ) ) ;
2007-09-06 21:36:45 +00:00
2007-07-24 15:08:05 +00:00
toku_hashtable_free ( & old_h ) ;
2007-07-13 19:37:47 +00:00
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , childa - > thisnodename , childa - > dirty , brtnode_size ( childa ) ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , childb - > thisnodename , childb - > dirty , brtnode_size ( childb ) ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
verify_counts ( node ) ;
verify_counts ( childa ) ;
verify_counts ( childb ) ;
if ( node - > u . n . n_children > TREE_FANOUT ) {
//printf("%s:%d about to split having pushed %d out of %d keys\n", __FILE__, __LINE__, i, n_pairs);
2007-07-24 01:32:03 +00:00
brt_nonleaf_split ( t , node , nodea , nodeb , splitk ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d did split\n", __FILE__, __LINE__);
split_count + + ;
* did_split = 1 ;
assert ( ( * nodea ) - > height > 0 ) ;
assert ( ( * nodeb ) - > height > 0 ) ;
assert ( ( * nodea ) - > u . n . n_children > 0 ) ;
assert ( ( * nodeb ) - > u . n . n_children > 0 ) ;
assert ( ( * nodea ) - > u . n . children [ ( * nodea ) - > u . n . n_children - 1 ] ! = 0 ) ;
assert ( ( * nodeb ) - > u . n . children [ ( * nodeb ) - > u . n . n_children - 1 ] ! = 0 ) ;
assert ( serialize_brtnode_size ( * nodea ) < = ( * nodea ) - > nodesize ) ;
assert ( serialize_brtnode_size ( * nodeb ) < = ( * nodeb ) - > nodesize ) ;
} else {
* did_split = 0 ;
assert ( serialize_brtnode_size ( node ) < = node - > nodesize ) ;
}
return 0 ;
}
2007-09-06 21:36:45 +00:00
static int push_some_brt_cmds_down ( BRT t , BRTNODE node , int childnum ,
2007-09-28 17:11:22 +00:00
int * did_split , BRTNODE * nodea , BRTNODE * nodeb ,
DBT * splitk ,
int debug ,
void * app_private ,
DB * db ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
void * childnode_v ;
BRTNODE child ;
int r ;
assert ( node - > height > 0 ) ;
diskoff targetchild = node - > u . n . children [ childnum ] ;
assert ( targetchild > = 0 & & targetchild < t - > h - > unused_memory ) ; // This assertion could fail in a concurrent setting since another process might have bumped unused memory.
2007-10-19 14:07:41 +00:00
r = cachetable_get_and_pin ( t - > cf , targetchild , & childnode_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) t - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
2007-10-03 19:34:31 +00:00
//printf("%s:%d pin %p\n", __FILE__, __LINE__, childnode_v);
2007-07-13 19:37:47 +00:00
child = childnode_v ;
2007-10-03 19:34:31 +00:00
child - > parent_brtnode = node ;
2007-07-13 19:37:47 +00:00
verify_counts ( child ) ;
//printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]);
if ( child - > height > 0 & & child - > u . n . n_children > 0 ) assert ( child - > u . n . children [ child - > u . n . n_children - 1 ] ! = 0 ) ;
2007-09-06 21:36:45 +00:00
if ( debug ) printf ( " %s:%d %*spush_some_brt_cmds_down to %lld \n " , __FILE__ , __LINE__ , debug , " " , child - > thisnodename ) ;
2007-07-13 19:37:47 +00:00
/* I am exposing the internals of the hash table here, mostly because I am not thinking of a really
* good way to do it otherwise . I want to loop over the elements of the hash table , deleting some as I
* go . The HASHTABLE_ITERATE macro will break if I delete something from the hash table . */
2007-07-30 21:44:27 +00:00
if ( 0 ) {
static int count = 0 ;
count + + ;
printf ( " %s:%d pushing %d count=%d \n " , __FILE__ , __LINE__ , childnum , count ) ;
}
2007-07-13 19:37:47 +00:00
{
bytevec key , val ;
ITEMLEN keylen , vallen ;
2007-08-14 18:50:52 +00:00
long int randomnumber = random ( ) ;
2007-07-24 02:36:00 +00:00
//printf("%s:%d Try random_pick, weight=%d \n", __FILE__, __LINE__, node->u.n.n_bytes_in_hashtable[childnum]);
2007-07-24 15:08:05 +00:00
assert ( toku_hashtable_n_entries ( node - > u . n . htables [ childnum ] ) > 0 ) ;
2007-09-06 21:36:45 +00:00
int type ;
while ( 0 = = toku_hashtable_random_pick ( node - > u . n . htables [ childnum ] , & key , & keylen , & val , & vallen , & type , & randomnumber ) ) {
2007-07-24 01:32:03 +00:00
int child_did_split = 0 ; BRTNODE childa , childb ;
DBT hk , hv ;
DBT childsplitk ;
2007-09-06 21:36:45 +00:00
BRT_CMD brtcmd ;
fill_dbt_ap ( & hk , key , keylen , app_private ) ;
fill_dbt ( & hv , val , vallen ) ;
brtcmd . type = type ;
brtcmd . u . id . key = & hk ;
brtcmd . u . id . val = & hv ;
brtcmd . u . id . db = db ;
2007-07-24 02:36:00 +00:00
//printf("%s:%d random_picked\n", __FILE__, __LINE__);
2007-07-24 01:32:03 +00:00
init_dbt ( & childsplitk ) ;
childsplitk . app_private = splitk - > app_private ;
2007-07-24 02:36:00 +00:00
2007-07-13 19:37:47 +00:00
if ( debug ) printf ( " %s:%d %*spush down %s \n " , __FILE__ , __LINE__ , debug , " " , ( char * ) key ) ;
2007-09-06 21:36:45 +00:00
r = push_a_brt_cmd_down ( t , node , child , childnum ,
2007-09-28 17:11:22 +00:00
& brtcmd ,
& child_did_split , & childa , & childb ,
& childsplitk ,
txn ) ;
2007-07-24 02:36:00 +00:00
if ( 0 ) {
unsigned int sum = 0 ;
2007-09-06 21:36:45 +00:00
HASHTABLE_ITERATE ( node - > u . n . htables [ childnum ] , hk __attribute__ ( ( __unused__ ) ) , hkl , hd __attribute__ ( ( __unused__ ) ) , hdl , type __attribute__ ( ( __unused__ ) ) ,
sum + = hkl + hdl + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ) ;
2007-07-24 02:36:00 +00:00
printf ( " %s:%d sum=%d \n " , __FILE__ , __LINE__ , sum ) ;
assert ( sum = = node - > u . n . n_bytes_in_hashtable [ childnum ] ) ;
}
2007-07-24 15:08:05 +00:00
if ( node - > u . n . n_bytes_in_hashtable [ childnum ] > 0 ) assert ( toku_hashtable_n_entries ( node - > u . n . htables [ childnum ] ) > 0 ) ;
2007-09-06 21:36:45 +00:00
//printf("%s:%d %d=push_a_brt_cmd_down=(); child_did_split=%d (weight=%d)\n", __FILE__, __LINE__, r, child_did_split, node->u.n.n_bytes_in_hashtable[childnum]);
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
if ( child_did_split ) {
// If the child splits, we don't push down any further.
2007-07-24 01:32:03 +00:00
if ( debug ) printf ( " %s:%d %*shandle split splitkey=%s \n " , __FILE__ , __LINE__ , debug , " " , ( char * ) childsplitk . data ) ;
2007-07-13 19:37:47 +00:00
r = handle_split_of_child ( t , node , childnum ,
2007-07-24 01:32:03 +00:00
childa , childb , & childsplitk ,
2007-07-24 11:13:42 +00:00
did_split , nodea , nodeb , splitk ,
2007-09-28 17:11:22 +00:00
app_private , db , txn ) ;
2007-07-13 19:37:47 +00:00
return r ; /* Don't do any more pushing if the child splits. */
}
}
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " %s:%d done random picking \n " , __FILE__ , __LINE__ ) ;
2007-07-13 19:37:47 +00:00
}
2007-09-06 21:36:45 +00:00
if ( debug ) printf ( " %s:%d %*sdone push_some_brt_cmds_down, unpinning %lld \n " , __FILE__ , __LINE__ , debug , " " , targetchild ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , targetchild , child - > dirty , brtnode_size ( child ) ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
* did_split = 0 ;
2007-07-24 02:36:00 +00:00
assert ( serialize_brtnode_size ( node ) < = node - > nodesize ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
int debugp1 ( int debug ) {
return debug ? debug + 1 : 0 ;
}
2007-09-28 17:11:22 +00:00
static int brtnode_maybe_push_down ( BRT t , BRTNODE node , int * did_split , BRTNODE * nodea , BRTNODE * nodeb , DBT * splitk , int debug , void * app_private , DB * db , TOKUTXN txn )
2007-07-13 19:37:47 +00:00
/* If the buffer is too full, then push down. Possibly the child will split. That may make us split. */
{
assert ( node - > height > 0 ) ;
if ( debug ) printf ( " %s:%d %*sIn maybe_push_down in_buffer=%d childkeylens=%d size=%d \n " , __FILE__ , __LINE__ , debug , " " , node - > u . n . n_bytes_in_hashtables , node - > u . n . totalchildkeylens , serialize_brtnode_size ( node ) ) ;
if ( serialize_brtnode_size ( node ) > node - > nodesize ) {
if ( debug ) printf ( " %s:%d %*stoo full, height=%d \n " , __FILE__ , __LINE__ , debug , " " , node - > height ) ;
{
/* Push to a child. */
/* Find the heaviest child, and push stuff to it. Keep pushing to the child until we run out.
* But if the child pushes something to its child and our buffer has gotten small enough , then we stop pushing . */
int childnum ;
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " %s:%d %*sfind_heaviest_data \n " , __FILE__ , __LINE__ , debug , " " ) ;
2007-07-13 19:37:47 +00:00
find_heaviest_child ( node , & childnum ) ;
2007-07-24 02:36:00 +00:00
if ( 0 ) printf ( " %s:%d %*spush some down from %lld into %lld (child %d) \n " , __FILE__ , __LINE__ , debug , " " , node - > thisnodename , node - > u . n . children [ childnum ] , childnum ) ;
2007-07-13 19:37:47 +00:00
assert ( node - > u . n . children [ childnum ] ! = 0 ) ;
2007-09-28 17:11:22 +00:00
int r = push_some_brt_cmds_down ( t , node , childnum , did_split , nodea , nodeb , splitk , debugp1 ( debug ) , app_private , db , txn ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
assert ( * did_split = = 0 | | * did_split = = 1 ) ;
2007-09-06 21:36:45 +00:00
if ( debug ) printf ( " %s:%d %*sdid push_some_brt_cmds_down did_split=%d \n " , __FILE__ , __LINE__ , debug , " " , * did_split ) ;
2007-07-13 19:37:47 +00:00
if ( * did_split ) {
assert ( serialize_brtnode_size ( * nodea ) < = ( * nodea ) - > nodesize ) ;
assert ( serialize_brtnode_size ( * nodeb ) < = ( * nodeb ) - > nodesize ) ;
assert ( ( * nodea ) - > u . n . n_children > 0 ) ;
assert ( ( * nodeb ) - > u . n . n_children > 0 ) ;
assert ( ( * nodea ) - > u . n . children [ ( * nodea ) - > u . n . n_children - 1 ] ! = 0 ) ;
assert ( ( * nodeb ) - > u . n . children [ ( * nodeb ) - > u . n . n_children - 1 ] ! = 0 ) ;
} else {
assert ( serialize_brtnode_size ( node ) < = node - > nodesize ) ;
}
}
} else {
* did_split = 0 ;
assert ( serialize_brtnode_size ( node ) < = node - > nodesize ) ;
}
return 0 ;
}
2007-08-13 18:19:57 +00:00
# define INSERT_ALL_AT_ONCE
2007-08-13 18:01:09 +00:00
2007-09-07 13:51:47 +00:00
static int brt_leaf_put_cmd ( BRT t , BRTNODE node , BRT_CMD * cmd ,
2007-09-28 17:11:22 +00:00
int * did_split , BRTNODE * nodea , BRTNODE * nodeb , DBT * splitk ,
int debug ,
TOKUTXN txn ) {
2007-09-06 21:36:45 +00:00
if ( cmd - > type = = BRT_INSERT ) {
DBT * k = cmd - > u . id . key ;
DBT * v = cmd - > u . id . val ;
DB * db = cmd - > u . id . db ;
2007-08-13 18:01:09 +00:00
# ifdef INSERT_ALL_AT_ONCE
2007-09-06 21:36:45 +00:00
int replaced_v_size ;
2007-09-28 17:11:22 +00:00
enum pma_errors pma_status = pma_insert_or_replace ( node - > u . l . buffer , k , v , & replaced_v_size , db , txn , node - > thisnodename ) ;
2007-09-06 21:36:45 +00:00
assert ( pma_status = = BRT_OK ) ;
//printf("replaced_v_size=%d\n", replaced_v_size);
if ( replaced_v_size > = 0 ) {
node - > u . l . n_bytes_in_buffer + = v - > size - replaced_v_size ;
} else {
node - > u . l . n_bytes_in_buffer + = k - > size + v - > size + KEY_VALUE_OVERHEAD ;
}
2007-08-13 18:01:09 +00:00
# else
2007-09-06 21:36:45 +00:00
DBT v2 ;
enum pma_errors pma_status = pma_lookup ( node - > u . l . buffer , k , init_dbt ( & v2 ) , db ) ;
if ( pma_status = = BRT_OK ) {
pma_status = pma_delete ( node - > u . l . buffer , k , db ) ;
assert ( pma_status = = BRT_OK ) ;
node - > u . l . n_bytes_in_buffer - = k - > size + v2 . size + KEY_VALUE_OVERHEAD ;
}
pma_status = pma_insert ( node - > u . l . buffer , k , v , db ) ;
node - > u . l . n_bytes_in_buffer + = k - > size + v - > size + KEY_VALUE_OVERHEAD ;
2007-08-13 18:01:09 +00:00
# endif
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-09-06 21:36:45 +00:00
// If it doesn't fit, then split the leaf.
if ( serialize_brtnode_size ( node ) > node - > nodesize ) {
int r = brtleaf_split ( t , node , nodea , nodeb , splitk , k - > app_private , db ) ;
if ( r ! = 0 ) return r ;
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
split_count + + ;
* did_split = 1 ;
verify_counts ( * nodea ) ; verify_counts ( * nodeb ) ;
if ( debug ) printf ( " %s:%d %*snodeb->thisnodename=%lld nodeb->size=%d \n " , __FILE__ , __LINE__ , debug , " " , ( * nodeb ) - > thisnodename , ( * nodeb ) - > nodesize ) ;
assert ( serialize_brtnode_size ( * nodea ) < = ( * nodea ) - > nodesize ) ;
assert ( serialize_brtnode_size ( * nodeb ) < = ( * nodeb ) - > nodesize ) ;
} else {
* did_split = 0 ;
}
return 0 ;
2007-07-13 19:37:47 +00:00
}
2007-09-06 21:36:45 +00:00
if ( cmd - > type = = BRT_DELETE ) {
int r ;
DBT val ;
/* TODO combine lookup and delete */
init_dbt ( & val ) ;
r = pma_lookup ( node - > u . l . buffer , cmd - > u . id . key , & val , cmd - > u . id . db ) ;
if ( r = = 0 ) {
r = pma_delete ( node - > u . l . buffer , cmd - > u . id . key , cmd - > u . id . db ) ;
assert ( r = = BRT_OK ) ;
node - > u . l . n_bytes_in_buffer - = cmd - > u . id . key - > size + val . size + KEY_VALUE_OVERHEAD ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-09-06 21:36:45 +00:00
}
* did_split = 0 ;
return r ;
}
/* unknown message */
assert ( 0 ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
2007-07-24 01:32:03 +00:00
static unsigned int brtnode_which_child ( BRTNODE node , DBT * k , BRT t , DB * db ) {
int i ;
assert ( node - > height > 0 ) ;
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
DBT k2 ;
if ( t - > compare_fun ( db , k , fill_dbt ( & k2 , node - > u . n . childkeys [ i ] , node - > u . n . childkeylens [ i ] ) ) < = 0 ) {
return i ;
}
}
return node - > u . n . n_children - 1 ;
}
2007-10-02 16:17:44 +00:00
static int brt_nonleaf_put_cmd_child ( BRT t , BRTNODE node , BRT_CMD * cmd ,
int * did_split , BRTNODE * nodea , BRTNODE * nodeb ,
DBT * splitk , int debug , TOKUTXN txn , int childnum , int maybe ) {
int r ;
void * child_v ;
BRTNODE child ;
int child_did_split ;
BRTNODE childa , childb ;
DBT childsplitk ;
* did_split = 0 ;
if ( maybe )
r = cachetable_maybe_get_and_pin ( t - > cf , node - > u . n . children [ childnum ] , & child_v ) ;
else
2007-10-19 14:07:41 +00:00
r = cachetable_get_and_pin ( t - > cf , node - > u . n . children [ childnum ] , & child_v , NULL ,
2007-10-02 16:17:44 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) t - > h - > nodesize ) ;
if ( r ! = 0 )
return r ;
child = child_v ;
2007-10-03 19:34:31 +00:00
child - > parent_brtnode = node ;
2007-10-02 16:17:44 +00:00
child_did_split = 0 ;
r = brtnode_put_cmd ( t , child , cmd ,
& child_did_split , & childa , & childb , & childsplitk , debug , txn ) ;
2007-10-02 19:19:44 +00:00
if ( r ! = 0 ) {
/* putting to the child failed for some reason, so unpin the child and return the error code */
2007-10-19 14:07:41 +00:00
int rr = cachetable_unpin ( t - > cf , child - > thisnodename , child - > dirty , brtnode_size ( child ) ) ;
2007-10-02 19:19:44 +00:00
assert ( rr = = 0 ) ;
return r ;
}
2007-10-02 16:17:44 +00:00
if ( child_did_split ) {
if ( 0 ) printf ( " brt_nonleaf_insert child_split %p \n " , child ) ;
assert ( cmd - > type = = BRT_INSERT | | cmd - > type = = BRT_DELETE ) ;
DBT * k = cmd - > u . id . key ;
DB * db = cmd - > u . id . db ;
r = handle_split_of_child ( t , node , childnum ,
childa , childb , & childsplitk ,
did_split , nodea , nodeb , splitk ,
k - > app_private , db , txn ) ;
assert ( r = = 0 ) ;
} else {
2007-10-19 14:07:41 +00:00
int rr = cachetable_unpin ( t - > cf , child - > thisnodename , child - > dirty , brtnode_size ( child ) ) ;
2007-10-02 19:19:44 +00:00
assert ( rr = = 0 ) ;
2007-10-02 16:17:44 +00:00
}
return r ;
}
2007-07-13 19:37:47 +00:00
2007-10-03 14:51:23 +00:00
int brt_do_push_cmd = 1 ;
2007-09-07 13:51:47 +00:00
static int brt_nonleaf_put_cmd ( BRT t , BRTNODE node , BRT_CMD * cmd ,
2007-09-28 17:11:22 +00:00
int * did_split , BRTNODE * nodea , BRTNODE * nodeb ,
DBT * splitk ,
int debug ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
bytevec olddata ;
ITEMLEN olddatalen ;
2007-08-23 18:07:18 +00:00
unsigned int childnum ;
int found ;
2007-09-06 21:36:45 +00:00
int type = cmd - > type ;
DBT * k = cmd - > u . id . key ;
DBT * v = cmd - > u . id . val ;
DB * db = cmd - > u . id . db ;
2007-08-23 18:07:18 +00:00
childnum = brtnode_which_child ( node , k , t , db ) ;
/* non-buffering mode when cursors are open on this child */
if ( node - > u . n . n_cursors [ childnum ] > 0 ) {
assert ( node - > u . n . n_bytes_in_hashtable [ childnum ] = = 0 ) ;
2007-10-02 16:17:44 +00:00
int r = brt_nonleaf_put_cmd_child ( t , node , cmd , did_split , nodea , nodeb , splitk , debug , txn , childnum , 0 ) ;
2007-08-23 18:07:18 +00:00
return r ;
}
2007-10-02 16:17:44 +00:00
2007-09-06 21:36:45 +00:00
found = ! toku_hash_find ( node - > u . n . htables [ childnum ] , k - > data , k - > size , & olddata , & olddatalen , & type ) ;
2007-07-13 19:37:47 +00:00
if ( debug ) printf ( " %s:%d %*sDoing hash_insert \n " , __FILE__ , __LINE__ , debug , " " ) ;
verify_counts ( node ) ;
if ( found ) {
2007-07-24 15:08:05 +00:00
int r = toku_hash_delete ( node - > u . n . htables [ childnum ] , k - > data , k - > size ) ;
2007-09-06 21:36:45 +00:00
int diff = k - > size + olddatalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
node - > u . n . n_bytes_in_hashtables - = diff ;
2007-09-18 16:09:55 +00:00
node - > u . n . n_bytes_in_hashtable [ childnum ] - = diff ;
brtnode_set_dirty ( node ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff);
}
2007-10-02 16:17:44 +00:00
/* if the child is in the cache table then push the cmd to it
otherwise just put it into this node ' s buffer */
2007-10-03 14:51:23 +00:00
if ( brt_do_push_cmd ) {
2007-10-02 16:17:44 +00:00
int r = brt_nonleaf_put_cmd_child ( t , node , cmd , did_split , nodea , nodeb , splitk , debug , txn , childnum , 1 ) ;
if ( r = = 0 )
return r ;
}
2007-07-13 19:37:47 +00:00
{
2007-09-06 21:36:45 +00:00
int diff = k - > size + v - > size + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD ;
int r = toku_hash_insert ( node - > u . n . htables [ childnum ] , k - > data , k - > size , v - > data , v - > size , type ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
node - > u . n . n_bytes_in_hashtables + = diff ;
node - > u . n . n_bytes_in_hashtable [ childnum ] + = diff ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
2007-07-13 19:37:47 +00:00
}
if ( debug ) printf ( " %s:%d %*sDoing maybe_push_down \n " , __FILE__ , __LINE__ , debug , " " ) ;
2007-09-28 17:11:22 +00:00
int r = brtnode_maybe_push_down ( t , node , did_split , nodea , nodeb , splitk , debugp1 ( debug ) , k - > app_private , db , txn ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) return r ;
if ( debug ) printf ( " %s:%d %*sDid maybe_push_down \n " , __FILE__ , __LINE__ , debug , " " ) ;
if ( * did_split ) {
assert ( serialize_brtnode_size ( * nodea ) < = ( * nodea ) - > nodesize ) ;
assert ( serialize_brtnode_size ( * nodeb ) < = ( * nodeb ) - > nodesize ) ;
assert ( ( * nodea ) - > u . n . n_children > 0 ) ;
assert ( ( * nodeb ) - > u . n . n_children > 0 ) ;
assert ( ( * nodea ) - > u . n . children [ ( * nodea ) - > u . n . n_children - 1 ] ! = 0 ) ;
assert ( ( * nodeb ) - > u . n . children [ ( * nodeb ) - > u . n . n_children - 1 ] ! = 0 ) ;
verify_counts ( * nodea ) ;
verify_counts ( * nodeb ) ;
} else {
assert ( serialize_brtnode_size ( node ) < = node - > nodesize ) ;
verify_counts ( node ) ;
}
return 0 ;
}
2007-09-06 21:36:45 +00:00
static int brtnode_put_cmd ( BRT t , BRTNODE node , BRT_CMD * cmd ,
2007-09-28 17:11:22 +00:00
int * did_split , BRTNODE * nodea , BRTNODE * nodeb , DBT * splitk ,
int debug ,
TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
if ( node - > height = = 0 ) {
2007-09-07 13:51:47 +00:00
return brt_leaf_put_cmd ( t , node , cmd ,
2007-09-28 17:11:22 +00:00
did_split , nodea , nodeb , splitk ,
debug , txn ) ;
2007-07-13 19:37:47 +00:00
} else {
2007-09-07 13:51:47 +00:00
return brt_nonleaf_put_cmd ( t , node , cmd ,
2007-09-28 17:11:22 +00:00
did_split , nodea , nodeb , splitk ,
debug , txn ) ;
2007-07-13 19:37:47 +00:00
}
}
2007-09-21 17:55:49 +00:00
int brt_create_cachetable_size ( CACHETABLE * ct , int hashsize , long cachesize ) {
2007-10-19 14:07:41 +00:00
return create_cachetable ( ct , hashsize , cachesize ) ;
2007-09-21 17:55:49 +00:00
}
2007-08-13 20:26:23 +00:00
//enum {n_nodes_in_cache =64};
2007-08-13 21:23:11 +00:00
enum { n_nodes_in_cache = 127 } ;
2007-07-13 19:37:47 +00:00
int brt_create_cachetable ( CACHETABLE * ct , int cachelines ) {
if ( cachelines = = 0 ) cachelines = n_nodes_in_cache ;
assert ( cachelines > 0 ) ;
2007-09-22 15:30:29 +00:00
return brt_create_cachetable_size ( ct , cachelines , ( cachelines + 1 ) * 1024 * 1024 ) ;
2007-07-13 19:37:47 +00:00
}
static int setup_brt_root_node ( BRT t , diskoff offset ) {
int r ;
2007-09-18 16:09:55 +00:00
TAGMALLOC ( BRTNODE , node ) ;
2007-07-13 19:37:47 +00:00
assert ( node ) ;
//printf("%s:%d\n", __FILE__, __LINE__);
initialize_brtnode ( t , node ,
offset , /* the location is one nodesize offset from 0. */
0 ) ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = 0 ;
2007-07-13 19:37:47 +00:00
if ( 0 ) {
printf ( " %s:%d for tree %p node %p mdict_create--> %p \n " , __FILE__ , __LINE__ , t , node , node - > u . l . buffer ) ;
printf ( " %s:%d put root at %lld \n " , __FILE__ , __LINE__ , offset ) ;
}
2007-10-03 19:34:31 +00:00
//printf("%s:%d putting %p (%lld)\n", __FILE__, __LINE__, node, node->thisnodename);
2007-10-19 14:07:41 +00:00
r = cachetable_put ( t - > cf , offset , node , brtnode_size ( node ) ,
2007-10-03 19:34:31 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) t - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
2007-07-20 18:00:14 +00:00
toku_free ( node ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
//printf("%s:%d created %lld\n", __FILE__, __LINE__, node->thisnodename);
verify_counts ( node ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , node - > thisnodename , node - > dirty , brtnode_size ( node ) ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
2007-07-20 18:00:14 +00:00
toku_free ( node ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
return 0 ;
}
2007-07-20 14:20:58 +00:00
//#define BRT_TRACE
2007-07-13 19:37:47 +00:00
# ifdef BRT_TRACE
# define WHEN_BRTTRACE(x) x
# else
# define WHEN_BRTTRACE(x) ((void)0)
# endif
2007-07-24 01:32:03 +00:00
int open_brt ( const char * fname , const char * dbname , int is_create , BRT * newbrt , int nodesize , CACHETABLE cachetable ,
2007-07-24 14:34:05 +00:00
int ( * compare_fun ) ( DB * , const DBT * , const DBT * ) ) {
2007-07-13 19:37:47 +00:00
/* If dbname is NULL then we setup to hold a single tree. Otherwise we setup an array. */
int r ;
BRT t ;
char * malloced_name = 0 ;
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
2007-07-20 14:20:58 +00:00
WHEN_BRTTRACE ( fprintf ( stderr , " BRTTRACE: %s:%d open_brt(%s, \" %s \" , %d, %p, %d, %p) \n " ,
__FILE__ , __LINE__ , fname , dbname , is_create , newbrt , nodesize , cachetable ) ) ;
2007-07-13 19:37:47 +00:00
if ( ( MALLOC ( t ) ) = = 0 ) {
assert ( errno = = ENOMEM ) ;
r = ENOMEM ;
2007-07-20 18:00:14 +00:00
if ( 0 ) { died0 : toku_free ( t ) ; }
2007-07-13 19:37:47 +00:00
return r ;
}
2007-07-24 01:32:03 +00:00
t - > compare_fun = compare_fun ;
t - > skey = t - > sval = 0 ;
2007-07-13 19:37:47 +00:00
if ( dbname ) {
2007-08-01 02:37:21 +00:00
malloced_name = toku_strdup ( dbname ) ;
2007-07-13 19:37:47 +00:00
if ( malloced_name = = 0 ) {
r = ENOMEM ;
2007-07-20 18:00:14 +00:00
if ( 0 ) { died0a : if ( malloced_name ) toku_free ( malloced_name ) ; }
2007-07-13 19:37:47 +00:00
goto died0 ;
}
}
t - > database_name = malloced_name ;
r = cachetable_openf ( & t - > cf , cachetable , fname , O_RDWR | ( is_create ? O_CREAT : 0 ) , 0777 ) ;
if ( r ! = 0 ) {
2007-08-01 02:37:21 +00:00
if ( 0 ) { died1 : cachefile_close ( & t - > cf ) ; }
2007-07-13 19:37:47 +00:00
goto died0a ;
}
assert ( nodesize > 0 ) ;
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
if ( is_create ) {
r = read_and_pin_brt_header ( t - > cf , & t - > h ) ;
if ( r = = - 1 ) {
/* construct a new header. */
if ( ( MALLOC ( t - > h ) ) = = 0 ) {
assert ( errno = = ENOMEM ) ;
r = ENOMEM ;
2007-07-20 18:00:14 +00:00
if ( 0 ) { died2 : toku_free ( t - > h ) ; }
2007-07-13 19:37:47 +00:00
goto died1 ;
}
2007-08-01 16:01:52 +00:00
t - > h - > dirty = 1 ;
2007-07-13 19:37:47 +00:00
t - > h - > nodesize = nodesize ;
t - > h - > freelist = - 1 ;
t - > h - > unused_memory = 2 * nodesize ;
if ( dbname ) {
t - > h - > unnamed_root = - 1 ;
t - > h - > n_named_roots = 1 ;
2007-08-01 02:37:21 +00:00
if ( ( MALLOC_N ( 1 , t - > h - > names ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; if ( 0 ) { died3 : toku_free ( t - > h - > names ) ; } goto died2 ; }
if ( ( MALLOC_N ( 1 , t - > h - > roots ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; if ( 0 ) { died4 : toku_free ( t - > h - > roots ) ; } goto died3 ; }
if ( ( t - > h - > names [ 0 ] = toku_strdup ( dbname ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; if ( 0 ) { died5 : toku_free ( t - > h - > names [ 0 ] ) ; } goto died4 ; }
2007-07-13 19:37:47 +00:00
t - > h - > roots [ 0 ] = nodesize ;
} else {
t - > h - > unnamed_root = nodesize ;
t - > h - > n_named_roots = - 1 ;
t - > h - > names = 0 ;
t - > h - > roots = 0 ;
}
if ( ( r = setup_brt_root_node ( t , nodesize ) ) ! = 0 ) { if ( dbname ) goto died5 ; else goto died2 ; }
2007-10-19 14:07:41 +00:00
if ( ( r = cachetable_put ( t - > cf , 0 , t - > h , 0 , brtheader_flush_callback , brtheader_fetch_callback , 0 ) ) ) { if ( dbname ) goto died5 ; else goto died2 ; }
2007-07-13 19:37:47 +00:00
} else {
int i ;
assert ( r = = 0 ) ;
assert ( t - > h - > unnamed_root = = - 1 ) ;
assert ( t - > h - > n_named_roots > = 0 ) ;
for ( i = 0 ; i < t - > h - > n_named_roots ; i + + ) {
if ( strcmp ( t - > h - > names [ i ] , dbname ) = = 0 ) {
r = EEXIST ;
goto died1 ; /* deallocate everything. */
}
}
2007-07-20 18:00:14 +00:00
if ( ( t - > h - > names = toku_realloc ( t - > h - > names , ( 1 + t - > h - > n_named_roots ) * sizeof ( * t - > h - > names ) ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; goto died1 ; }
if ( ( t - > h - > roots = toku_realloc ( t - > h - > roots , ( 1 + t - > h - > n_named_roots ) * sizeof ( * t - > h - > roots ) ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; goto died1 ; }
2007-07-13 19:37:47 +00:00
t - > h - > n_named_roots + + ;
2007-08-01 02:37:21 +00:00
if ( ( t - > h - > names [ t - > h - > n_named_roots - 1 ] = toku_strdup ( dbname ) ) = = 0 ) { assert ( errno = = ENOMEM ) ; r = ENOMEM ; goto died1 ; }
2007-07-13 19:37:47 +00:00
printf ( " %s:%d t=%p \n " , __FILE__ , __LINE__ , t ) ;
t - > h - > roots [ t - > h - > n_named_roots - 1 ] = malloc_diskblock_header_is_in_memory ( t , t - > h - > nodesize ) ;
2007-10-17 13:39:08 +00:00
t - > h - > dirty = 1 ;
2007-07-13 19:37:47 +00:00
if ( ( r = setup_brt_root_node ( t , t - > h - > roots [ t - > h - > n_named_roots - 1 ] ) ) ! = 0 ) goto died1 ;
}
} else {
if ( ( r = read_and_pin_brt_header ( t - > cf , & t - > h ) ) ! = 0 ) goto died1 ;
if ( ! dbname ) {
if ( t - > h - > n_named_roots ! = - 1 ) { r = - 2 ; /* invalid args??? */ ; goto died1 ; }
} else {
int i ;
2007-10-17 13:39:08 +00:00
printf ( " %s:%d n_roots=%d \n " , __FILE__ , __LINE__ , t - > h - > n_named_roots ) ;
2007-07-13 19:37:47 +00:00
for ( i = 0 ; i < t - > h - > n_named_roots ; i + + ) {
if ( strcmp ( t - > h - > names [ i ] , dbname ) = = 0 ) {
goto found_it ;
}
}
r = ENOENT ; /* the database doesn't exist */
goto died1 ;
}
found_it : ;
}
assert ( t - > h ) ;
if ( ( r = unpin_brt_header ( t ) ) ! = 0 ) goto died1 ;
assert ( t - > h = = 0 ) ;
WHEN_BRTTRACE ( fprintf ( stderr , " BRTTRACE -> %p \n " , t ) ) ;
t - > cursors_head = t - > cursors_tail = 0 ;
* newbrt = t ;
return 0 ;
}
int close_brt ( BRT brt ) {
int r ;
while ( brt - > cursors_head ) {
BRT_CURSOR c = brt - > cursors_head ;
r = brt_cursor_close ( c ) ;
if ( r ! = 0 ) return r ;
}
2007-07-20 14:20:58 +00:00
assert ( 0 = = cachefile_count_pinned ( brt - > cf , 1 ) ) ;
2007-08-01 16:01:52 +00:00
//printf("%s:%d closing cachetable\n", __FILE__, __LINE__);
2007-08-01 02:37:21 +00:00
if ( ( r = cachefile_close ( & brt - > cf ) ) ! = 0 ) return r ;
2007-07-20 18:00:14 +00:00
if ( brt - > database_name ) toku_free ( brt - > database_name ) ;
2007-07-24 03:28:48 +00:00
if ( brt - > skey ) { toku_free ( brt - > skey ) ; }
if ( brt - > sval ) { toku_free ( brt - > sval ) ; }
2007-07-20 18:00:14 +00:00
toku_free ( brt ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
int brt_debug_mode = 0 ; //strcmp(key,"hello387")==0;
CACHEKEY * calculate_root_offset_pointer ( BRT brt ) {
if ( brt - > database_name = = 0 ) {
return & brt - > h - > unnamed_root ;
} else {
int i ;
for ( i = 0 ; i < brt - > h - > n_named_roots ; i + + ) {
if ( strcmp ( brt - > database_name , brt - > h - > names [ i ] ) = = 0 ) {
return & brt - > h - > roots [ i ] ;
}
}
}
abort ( ) ;
}
2007-08-23 18:07:18 +00:00
int brt_init_new_root ( BRT brt , BRTNODE nodea , BRTNODE nodeb , DBT splitk , CACHEKEY * rootp ) {
2007-09-18 16:09:55 +00:00
TAGMALLOC ( BRTNODE , newroot ) ;
2007-08-23 18:07:18 +00:00
int r ;
diskoff newroot_diskoff = malloc_diskblock ( brt , brt - > h - > nodesize ) ;
assert ( newroot ) ;
* rootp = newroot_diskoff ;
brt - > h - > dirty = 1 ;
// printf("new_root %lld\n", newroot_diskoff);
initialize_brtnode ( brt , newroot , newroot_diskoff , nodea - > height + 1 ) ;
2007-10-03 19:34:31 +00:00
newroot - > parent_brtnode = 0 ;
2007-08-23 18:07:18 +00:00
newroot - > u . n . n_children = 2 ;
//printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey);
newroot - > u . n . childkeys [ 0 ] = splitk . data ;
newroot - > u . n . childkeylens [ 0 ] = splitk . size ;
newroot - > u . n . totalchildkeylens = splitk . size ;
newroot - > u . n . children [ 0 ] = nodea - > thisnodename ;
newroot - > u . n . children [ 1 ] = nodeb - > thisnodename ;
r = toku_hashtable_create ( & newroot - > u . n . htables [ 0 ] ) ; if ( r ! = 0 ) return r ;
r = toku_hashtable_create ( & newroot - > u . n . htables [ 1 ] ) ; if ( r ! = 0 ) return r ;
verify_counts ( newroot ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , nodea - > thisnodename , nodea - > dirty , brtnode_size ( nodea ) ) ;
2007-09-21 17:55:49 +00:00
if ( r ! = 0 ) return r ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , nodeb - > thisnodename , nodeb - > dirty , brtnode_size ( nodeb ) ) ;
2007-09-21 17:55:49 +00:00
if ( r ! = 0 ) return r ;
2007-08-23 18:07:18 +00:00
//printf("%s:%d put %lld\n", __FILE__, __LINE__, brt->root);
2007-10-19 14:07:41 +00:00
cachetable_put ( brt - > cf , newroot_diskoff , newroot , brtnode_size ( newroot ) ,
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
2007-08-23 18:07:18 +00:00
brt_update_cursors_new_root ( brt , newroot , nodea , nodeb ) ;
return 0 ;
}
2007-09-28 17:11:22 +00:00
int brt_root_put_cmd ( BRT brt , BRT_CMD * cmd , TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
void * node_v ;
BRTNODE node ;
CACHEKEY * rootp ;
2007-09-06 21:36:45 +00:00
int result ;
2007-07-13 19:37:47 +00:00
int r ;
2007-07-24 01:32:03 +00:00
int did_split ; BRTNODE nodea = 0 , nodeb = 0 ;
DBT splitk ;
2007-07-13 19:37:47 +00:00
int debug = brt_debug_mode ; //strcmp(key,"hello387")==0;
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
if ( ( r = read_and_pin_brt_header ( brt - > cf , & brt - > h ) ) ) {
if ( 0 ) { died0 : unpin_brt_header ( brt ) ; }
return r ;
}
rootp = calculate_root_offset_pointer ( brt ) ;
if ( debug ) printf ( " %s:%d Getting %lld \n " , __FILE__ , __LINE__ , * rootp ) ;
2007-10-19 14:07:41 +00:00
if ( ( r = cachetable_get_and_pin ( brt - > cf , * rootp , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ) ) {
2007-07-13 19:37:47 +00:00
goto died0 ;
}
2007-10-03 19:34:31 +00:00
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
2007-07-13 19:37:47 +00:00
node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = 0 ;
2007-07-13 19:37:47 +00:00
if ( debug ) printf ( " %s:%d node inserting \n " , __FILE__ , __LINE__ ) ;
2007-09-06 21:36:45 +00:00
did_split = 0 ;
result = brtnode_put_cmd ( brt , node , cmd ,
2007-09-28 17:11:22 +00:00
& did_split , & nodea , & nodeb , & splitk ,
debug ,
txn ) ;
2007-07-13 19:37:47 +00:00
if ( debug ) printf ( " %s:%d did_insert \n " , __FILE__ , __LINE__ ) ;
if ( did_split ) {
//printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize);
//printf("Did split, splitkey=%s\n", splitkey);
if ( nodeb - > height > 0 ) assert ( nodeb - > u . n . children [ nodeb - > u . n . n_children - 1 ] ! = 0 ) ;
assert ( nodeb - > nodesize > 0 ) ;
}
2007-09-18 16:09:55 +00:00
int dirty ;
2007-10-18 17:16:35 +00:00
long size ;
2007-07-13 19:37:47 +00:00
if ( did_split ) {
2007-08-23 18:07:18 +00:00
r = brt_init_new_root ( brt , nodea , nodeb , splitk , rootp ) ;
2007-09-06 21:36:45 +00:00
assert ( r = = 0 ) ;
2007-09-18 16:09:55 +00:00
dirty = 1 ;
2007-10-18 17:16:35 +00:00
size = 0 ;
2007-07-13 19:37:47 +00:00
} else {
if ( node - > height > 0 )
assert ( node - > u . n . n_children < = TREE_FANOUT ) ;
2007-09-18 16:09:55 +00:00
dirty = node - > dirty ;
2007-10-18 17:16:35 +00:00
size = brtnode_size ( node ) ;
2007-07-13 19:37:47 +00:00
}
2007-10-19 14:07:41 +00:00
cachetable_unpin ( brt - > cf , * rootp , dirty , size ) ;
2007-09-06 21:36:45 +00:00
r = unpin_brt_header ( brt ) ;
assert ( r = = 0 ) ;
2007-07-13 19:37:47 +00:00
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
2007-09-06 21:36:45 +00:00
return result ;
}
2007-09-28 17:11:22 +00:00
int brt_insert ( BRT brt , DBT * key , DBT * val , DB * db , TOKUTXN txn ) {
2007-09-06 21:36:45 +00:00
int r ;
BRT_CMD brtcmd ;
brtcmd . type = BRT_INSERT ;
brtcmd . u . id . key = key ;
brtcmd . u . id . val = val ;
brtcmd . u . id . db = db ;
2007-09-28 17:11:22 +00:00
r = brt_root_put_cmd ( brt , & brtcmd , txn ) ;
2007-09-06 21:36:45 +00:00
return r ;
2007-07-13 19:37:47 +00:00
}
2007-10-03 19:34:31 +00:00
int brt_lookup_node ( BRT brt , diskoff off , DBT * k , DBT * v , DB * db , BRTNODE parent_brtnode ) {
2007-09-12 20:30:36 +00:00
int result ;
2007-07-13 19:37:47 +00:00
void * node_v ;
2007-10-19 14:07:41 +00:00
int r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
2007-09-06 21:36:45 +00:00
if ( r ! = 0 )
return r ;
2007-09-11 16:30:58 +00:00
BRTNODE node = node_v ;
2007-10-10 19:33:31 +00:00
assert ( node - > tag = = TYP_BRTNODE ) ;
2007-09-11 16:30:58 +00:00
int childnum ;
2007-10-03 19:34:31 +00:00
//printf("%s:%d pin %p height=%d children=%d\n", __FILE__, __LINE__, node_v, node->height, node->u.n.n_children);
node - > parent_brtnode = parent_brtnode ;
2007-07-13 19:37:47 +00:00
if ( node - > height = = 0 ) {
2007-09-12 20:30:36 +00:00
result = pma_lookup ( node - > u . l . buffer , k , v , db ) ;
2007-07-13 19:37:47 +00:00
//printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen);
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ;
2007-09-06 21:36:45 +00:00
assert ( r = = 0 ) ;
return result ;
2007-07-13 19:37:47 +00:00
}
2007-07-24 01:32:03 +00:00
childnum = brtnode_which_child ( node , k , brt , db ) ;
{
bytevec hanswer ;
ITEMLEN hanswerlen ;
2007-09-06 21:36:45 +00:00
int type ;
if ( toku_hash_find ( node - > u . n . htables [ childnum ] , k - > data , k - > size , & hanswer , & hanswerlen , & type ) = = 0 ) {
if ( type = = BRT_INSERT ) {
//printf("Found %d bytes\n", *vallen);
ybt_set_value ( v , hanswer , hanswerlen , & brt - > sval ) ;
//printf("%s:%d Returning %p\n", __FILE__, __LINE__, v->data);
result = 0 ;
} else if ( type = = BRT_DELETE ) {
result = DB_NOTFOUND ;
2007-09-12 18:12:31 +00:00
} else {
2007-09-06 21:36:45 +00:00
assert ( 0 ) ;
2007-09-12 20:30:36 +00:00
result = - 1 ; // some versions of gcc complain
}
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ;
2007-09-06 21:36:45 +00:00
assert ( r = = 0 ) ;
return result ;
2007-07-24 01:32:03 +00:00
}
2007-07-13 19:37:47 +00:00
}
2007-09-06 21:36:45 +00:00
2007-10-03 19:34:31 +00:00
result = brt_lookup_node ( brt , node - > u . n . children [ childnum ] , k , v , db , node ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ;
2007-09-12 20:30:36 +00:00
assert ( r = = 0 ) ;
return result ;
2007-07-13 19:37:47 +00:00
}
2007-07-24 01:32:03 +00:00
int brt_lookup ( BRT brt , DBT * k , DBT * v , DB * db ) {
2007-07-13 19:37:47 +00:00
int r ;
CACHEKEY * rootp ;
2007-07-20 14:20:58 +00:00
assert ( 0 = = cachefile_count_pinned ( brt - > cf , 1 ) ) ;
2007-07-13 19:37:47 +00:00
if ( ( r = read_and_pin_brt_header ( brt - > cf , & brt - > h ) ) ) {
printf ( " %s:%d \n " , __FILE__ , __LINE__ ) ;
if ( 0 ) { died0 : unpin_brt_header ( brt ) ; }
2007-08-27 12:08:12 +00:00
// printf("%s:%d returning %d\n", __FILE__, __LINE__, r);
2007-07-20 14:20:58 +00:00
assert ( 0 = = cachefile_count_pinned ( brt - > cf , 1 ) ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
rootp = calculate_root_offset_pointer ( brt ) ;
2007-10-03 19:34:31 +00:00
if ( ( r = brt_lookup_node ( brt , * rootp , k , v , db , 0 ) ) ) {
2007-08-27 12:08:12 +00:00
// printf("%s:%d\n", __FILE__, __LINE__);
2007-07-13 19:37:47 +00:00
goto died0 ;
}
//printf("%s:%d r=%d", __FILE__, __LINE__, r); if (r==0) printf(" vallen=%d", *vallen); printf("\n");
if ( ( r = unpin_brt_header ( brt ) ) ! = 0 ) return r ;
2007-07-20 14:20:58 +00:00
assert ( 0 = = cachefile_count_pinned ( brt - > cf , 1 ) ) ;
2007-07-13 19:37:47 +00:00
return 0 ;
}
2007-09-06 21:36:45 +00:00
int brt_delete ( BRT brt , DBT * key , DB * db ) {
int r ;
BRT_CMD brtcmd ;
DBT val ;
init_dbt ( & val ) ;
val . size = 0 ;
brtcmd . type = BRT_DELETE ;
brtcmd . u . id . key = key ;
brtcmd . u . id . val = & val ;
brtcmd . u . id . db = db ;
2007-09-28 17:11:22 +00:00
r = brt_root_put_cmd ( brt , & brtcmd , 0 ) ;
2007-09-06 21:36:45 +00:00
return r ;
}
2007-10-03 19:34:31 +00:00
int verify_brtnode ( BRT brt , diskoff off , bytevec lorange , ITEMLEN lolen , bytevec hirange , ITEMLEN hilen , int recurse , BRTNODE parent_brtnode ) ;
2007-07-13 19:37:47 +00:00
2007-10-03 19:34:31 +00:00
int dump_brtnode ( BRT brt , diskoff off , int depth , bytevec lorange , ITEMLEN lolen , bytevec hirange , ITEMLEN hilen , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
int result = 0 ;
BRTNODE node ;
void * node_v ;
2007-10-19 14:07:41 +00:00
int r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
2007-10-03 19:34:31 +00:00
printf ( " %s:%d pin %p \n " , __FILE__ , __LINE__ , node_v ) ;
2007-07-13 19:37:47 +00:00
node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
result = verify_brtnode ( brt , off , lorange , lolen , hirange , hilen , 0 , parent_brtnode ) ;
2007-07-13 19:37:47 +00:00
printf ( " %*sNode=%p \n " , depth , " " , node ) ;
if ( node - > height > 0 ) {
printf ( " %*sNode %lld nodesize=%d height=%d n_children=%d n_bytes_in_hashtables=%d keyrange=%s %s \n " ,
depth , " " , off , node - > nodesize , node - > height , node - > u . n . n_children , node - > u . n . n_bytes_in_hashtables , ( char * ) lorange , ( char * ) hirange ) ;
//printf("%s %s\n", lorange ? lorange : "NULL", hirange ? hirange : "NULL");
{
int i ;
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
2007-07-24 15:08:05 +00:00
printf ( " %*schild %d buffered (%d entries): \n " , depth + 1 , " " , i , toku_hashtable_n_entries ( node - > u . n . htables [ i ] ) ) ;
2007-09-06 21:36:45 +00:00
HASHTABLE_ITERATE ( node - > u . n . htables [ i ] , key , keylen , data , datalen , type ,
2007-07-13 19:37:47 +00:00
( {
2007-09-06 21:36:45 +00:00
printf ( " %*s %s %s %d \n " , depth + 2 , " " , ( char * ) key , ( char * ) data , type ) ;
2007-07-13 19:37:47 +00:00
assert ( strlen ( ( char * ) key ) + 1 = = keylen ) ;
assert ( strlen ( ( char * ) data ) + 1 = = datalen ) ;
} ) ) ;
}
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
printf ( " %*schild %d \n " , depth , " " , i ) ;
if ( i > 0 ) {
printf ( " %*spivot %d=%s \n " , depth + 1 , " " , i - 1 , ( char * ) node - > u . n . childkeys [ i - 1 ] ) ;
}
dump_brtnode ( brt , node - > u . n . children [ i ] , depth + 4 ,
( i = = 0 ) ? lorange : node - > u . n . childkeys [ i - 1 ] ,
( i = = 0 ) ? lolen : node - > u . n . childkeylens [ i - 1 ] ,
( i = = node - > u . n . n_children - 1 ) ? hirange : node - > u . n . childkeys [ i ] ,
2007-10-03 19:34:31 +00:00
( i = = node - > u . n . n_children - 1 ) ? hilen : node - > u . n . childkeylens [ i ] ,
node
2007-07-13 19:37:47 +00:00
) ;
}
}
} else {
printf ( " %*sNode %lld nodesize=%d height=%d n_bytes_in_buffer=%d keyrange=%s %s \n " ,
depth , " " , off , node - > nodesize , node - > height , node - > u . l . n_bytes_in_buffer , ( char * ) lorange , ( char * ) hirange ) ;
PMA_ITERATE ( node - > u . l . buffer , key , keylen , val , vallen ,
( keylen = keylen , vallen = vallen , printf ( " %s:%s " , ( char * ) key , ( char * ) val ) ) ) ;
printf ( " \n " ) ;
}
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ;
2007-07-13 19:37:47 +00:00
assert ( r = = 0 ) ;
return result ;
}
int dump_brt ( BRT brt ) {
int r ;
CACHEKEY * rootp ;
if ( ( r = read_and_pin_brt_header ( brt - > cf , & brt - > h ) ) ) {
if ( 0 ) { died0 : unpin_brt_header ( brt ) ; }
return r ;
}
rootp = calculate_root_offset_pointer ( brt ) ;
printf ( " split_count=%d \n " , split_count ) ;
2007-10-03 19:34:31 +00:00
if ( ( r = dump_brtnode ( brt , * rootp , 0 , 0 , 0 , 0 , 0 , null_brtnode ) ) ) goto died0 ;
2007-07-13 19:37:47 +00:00
if ( ( r = unpin_brt_header ( brt ) ) ! = 0 ) return r ;
return 0 ;
}
2007-10-03 19:34:31 +00:00
int show_brtnode_blocknumbers ( BRT brt , diskoff off , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
BRTNODE node ;
void * node_v ;
int i , r ;
assert ( off % brt - > h - > nodesize = = 0 ) ;
2007-10-19 14:07:41 +00:00
if ( ( r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ) ) {
2007-10-19 14:07:41 +00:00
if ( 0 ) { died0 : cachetable_unpin ( brt - > cf , off , 0 , 0 ) ; }
2007-07-13 19:37:47 +00:00
return r ;
}
2007-10-03 19:34:31 +00:00
printf ( " %s:%d pin %p \n " , __FILE__ , __LINE__ , node_v ) ;
2007-07-13 19:37:47 +00:00
node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-07-13 19:37:47 +00:00
printf ( " %lld " , off / brt - > h - > nodesize ) ;
if ( node - > height > 0 ) {
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
2007-10-03 19:34:31 +00:00
if ( ( r = show_brtnode_blocknumbers ( brt , node - > u . n . children [ i ] , node ) ) ) goto died0 ;
2007-07-13 19:37:47 +00:00
}
}
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
int show_brt_blocknumbers ( BRT brt ) {
int r ;
CACHEKEY * rootp ;
if ( ( r = read_and_pin_brt_header ( brt - > cf , & brt - > h ) ) ) {
if ( 0 ) { died0 : unpin_brt_header ( brt ) ; }
return r ;
}
rootp = calculate_root_offset_pointer ( brt ) ;
printf ( " BRT %p has blocks: " , brt ) ;
2007-10-03 19:34:31 +00:00
if ( ( r = show_brtnode_blocknumbers ( brt , * rootp , 0 ) ) ) goto died0 ;
2007-07-13 19:37:47 +00:00
printf ( " \n " ) ;
if ( ( r = unpin_brt_header ( brt ) ) ! = 0 ) return r ;
return 0 ;
}
2007-10-03 19:34:31 +00:00
int verify_brtnode ( BRT brt , diskoff off , bytevec lorange , ITEMLEN lolen , bytevec hirange , ITEMLEN hilen , int recurse , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
int result = 0 ;
BRTNODE node ;
void * node_v ;
int r ;
2007-10-19 14:07:41 +00:00
if ( ( r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ) )
2007-07-13 19:37:47 +00:00
return r ;
2007-10-03 19:34:31 +00:00
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
2007-07-13 19:37:47 +00:00
node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-07-13 19:37:47 +00:00
if ( node - > height > 0 ) {
int i ;
for ( i = 0 ; i < node - > u . n . n_children - 1 ; i + + ) {
bytevec thislorange , thishirange ;
ITEMLEN thislolen , thishilen ;
if ( node - > u . n . n_children = = 0 | | i = = 0 ) {
thislorange = lorange ;
thislolen = lolen ;
} else {
thislorange = node - > u . n . childkeys [ i - 1 ] ;
thislolen = node - > u . n . childkeylens [ i - 1 ] ;
}
if ( node - > u . n . n_children = = 0 | | i + 1 > = node - > u . n . n_children ) {
thishirange = hirange ;
thishilen = hilen ;
} else {
thishirange = node - > u . n . childkeys [ i ] ;
thishilen = node - > u . n . childkeylens [ i ] ;
}
{
void verify_pair ( bytevec key , unsigned int keylen ,
2007-09-06 21:36:45 +00:00
bytevec data __attribute__ ( ( __unused__ ) ) ,
unsigned int datalen __attribute__ ( ( __unused__ ) ) ,
int type __attribute__ ( ( __unused__ ) ) ,
2007-07-13 19:37:47 +00:00
void * ignore __attribute__ ( ( __unused__ ) ) ) {
if ( thislorange ) assert ( keycompare ( thislorange , thislolen , key , keylen ) < 0 ) ;
if ( thishirange & & keycompare ( key , keylen , thishirange , thishilen ) > 0 ) {
printf ( " %s:%d in buffer %d key %s is bigger than %s \n " , __FILE__ , __LINE__ , i , ( char * ) key , ( char * ) thishirange ) ;
result = 1 ;
}
}
2007-07-24 15:08:05 +00:00
toku_hashtable_iterate ( node - > u . n . htables [ i ] , verify_pair , 0 ) ;
2007-07-13 19:37:47 +00:00
}
}
for ( i = 0 ; i < node - > u . n . n_children ; i + + ) {
if ( i > 0 ) {
if ( lorange ) assert ( keycompare ( lorange , lolen , node - > u . n . childkeys [ i - 1 ] , node - > u . n . childkeylens [ i - 1 ] ) < 0 ) ;
if ( hirange ) assert ( keycompare ( node - > u . n . childkeys [ i - 1 ] , node - > u . n . childkeylens [ i - 1 ] , hirange , hilen ) < = 0 ) ;
}
if ( recurse ) {
result | = verify_brtnode ( brt , node - > u . n . children [ i ] ,
( i = = 0 ) ? lorange : node - > u . n . childkeys [ i - 1 ] ,
( i = = 0 ) ? lolen : node - > u . n . childkeylens [ i - 1 ] ,
( i = = node - > u . n . n_children - 1 ) ? hirange : node - > u . n . childkeys [ i ] ,
( i = = node - > u . n . n_children - 1 ) ? hilen : node - > u . n . childkeylens [ i ] ,
2007-10-03 19:34:31 +00:00
recurse ,
node ) ;
2007-07-13 19:37:47 +00:00
}
}
}
2007-10-19 14:07:41 +00:00
if ( ( r = cachetable_unpin ( brt - > cf , off , 0 , 0 ) ) ) return r ;
2007-07-13 19:37:47 +00:00
return result ;
}
int verify_brt ( BRT brt ) {
int r ;
CACHEKEY * rootp ;
if ( ( r = read_and_pin_brt_header ( brt - > cf , & brt - > h ) ) ) {
if ( 0 ) { died0 : unpin_brt_header ( brt ) ; }
return r ;
}
rootp = calculate_root_offset_pointer ( brt ) ;
2007-10-03 19:34:31 +00:00
if ( ( r = verify_brtnode ( brt , * rootp , 0 , 0 , 0 , 0 , 1 , null_brtnode ) ) ) goto died0 ;
2007-07-13 19:37:47 +00:00
if ( ( r = unpin_brt_header ( brt ) ) ! = 0 ) return r ;
return 0 ;
}
2007-08-24 12:10:49 +00:00
int brt_flush_debug = 0 ;
2007-08-23 18:07:18 +00:00
/*
2007-08-24 12:10:49 +00:00
* Flush the buffer for a child of a node .
* If the node split when pushing kvpairs to a child of the node
* then reflect the node split up the cursor path towards the tree root .
* If the root is reached then create a new root
2007-08-23 18:07:18 +00:00
*/
2007-10-03 14:51:23 +00:00
void brt_flush_child ( BRT t , BRTNODE node , int childnum , BRT_CURSOR cursor , void * app_private , DB * db , TOKUTXN txn ) {
2007-08-23 18:07:18 +00:00
int r ;
int child_did_split ;
BRTNODE childa , childb ;
DBT child_splitk ;
2007-08-24 12:10:49 +00:00
if ( brt_flush_debug ) {
printf ( " brt_flush_child %lld %d \n " , node - > thisnodename , childnum ) ;
brt_cursor_print ( cursor ) ;
}
2007-08-23 18:07:18 +00:00
init_dbt ( & child_splitk ) ;
2007-09-06 21:36:45 +00:00
r = push_some_brt_cmds_down ( t , node , childnum ,
2007-10-03 14:51:23 +00:00
& child_did_split , & childa , & childb , & child_splitk , brt_flush_debug , app_private , db , txn ) ;
2007-08-23 18:07:18 +00:00
assert ( r = = 0 ) ;
2007-08-24 12:10:49 +00:00
if ( brt_flush_debug ) {
printf ( " brt_flush_child done %lld %d \n " , node - > thisnodename , childnum ) ;
brt_cursor_print ( cursor ) ;
}
2007-08-23 18:07:18 +00:00
if ( child_did_split ) {
int i ;
for ( i = cursor - > path_len - 1 ; i > = 0 ; i - - ) {
if ( cursor - > path [ i ] = = childa | | cursor - > path [ i ] = = childb )
break ;
}
assert ( i = = cursor - > path_len - 1 ) ;
while ( child_did_split ) {
child_did_split = 0 ;
2007-08-24 12:10:49 +00:00
if ( 0 ) printf ( " child_did_split %lld %lld \n " , childa - > thisnodename , childb - > thisnodename ) ;
2007-08-23 18:07:18 +00:00
if ( i = = 0 ) {
CACHEKEY * rootp = calculate_root_offset_pointer ( t ) ;
r = brt_init_new_root ( t , childa , childb , child_splitk , rootp ) ;
assert ( r = = 0 ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , * rootp , CACHETABLE_DIRTY , 0 ) ;
2007-08-23 18:07:18 +00:00
assert ( r = = 0 ) ;
} else {
BRTNODE upnode ;
assert ( i > 0 ) ;
i = i - 1 ;
upnode = cursor - > path [ i ] ;
childnum = cursor - > pathcnum [ i ] ;
r = handle_split_of_child ( t , upnode , childnum ,
2007-09-28 17:11:22 +00:00
childa , childb , & child_splitk ,
& child_did_split , & childa , & childb , & child_splitk ,
2007-10-03 14:51:23 +00:00
app_private , db , txn ) ;
2007-08-23 18:07:18 +00:00
assert ( r = = 0 ) ;
}
}
}
}
/*
* Add a cursor to child of a node . Increment the cursor count on the child . Flush the buffer associated with the child .
*/
void brt_node_add_cursor ( BRTNODE node , int childnum , BRT_CURSOR cursor ) {
if ( node - > height > 0 ) {
if ( 0 ) printf ( " brt_node_add_cursor %lld %d %p \n " , node - > thisnodename , childnum , cursor ) ;
node - > u . n . n_cursors [ childnum ] + = 1 ;
}
}
/*
* Remove a cursor from the child of a node . Decrement the cursor count on the child .
*/
void brt_node_remove_cursor ( BRTNODE node , int childnum , BRT_CURSOR cursor __attribute__ ( ( unused ) ) ) {
if ( node - > height > 0 ) {
if ( 0 ) printf ( " brt_node_remove_cursor %lld %d %p \n " , node - > thisnodename , childnum , cursor ) ;
assert ( node - > u . n . n_cursors [ childnum ] > 0 ) ;
node - > u . n . n_cursors [ childnum ] - = 1 ;
}
}
2007-08-24 12:10:49 +00:00
int brt_update_debug = 0 ;
2007-08-23 18:07:18 +00:00
void brt_update_cursors_new_root ( BRT t , BRTNODE newroot , BRTNODE left , BRTNODE right ) {
BRT_CURSOR cursor ;
2007-08-24 12:10:49 +00:00
if ( brt_update_debug ) printf ( " brt_update_cursors_new_root %lld %lld %lld \n " , newroot - > thisnodename ,
2007-08-23 18:07:18 +00:00
left - > thisnodename , right - > thisnodename ) ;
for ( cursor = t - > cursors_head ; cursor ; cursor = cursor - > next ) {
if ( brt_cursor_active ( cursor ) ) {
brt_cursor_new_root ( cursor , t , newroot , left , right ) ;
}
}
}
void brt_update_cursors_leaf_split ( BRT t , BRTNODE oldnode , BRTNODE left , BRTNODE right ) {
BRT_CURSOR cursor ;
2007-08-24 12:10:49 +00:00
if ( brt_update_debug ) printf ( " brt_update_cursors_leaf_split %lld %lld %lld \n " , oldnode - > thisnodename ,
2007-08-23 18:07:18 +00:00
left - > thisnodename , right - > thisnodename ) ;
for ( cursor = t - > cursors_head ; cursor ; cursor = cursor - > next ) {
if ( brt_cursor_active ( cursor ) ) {
brt_cursor_leaf_split ( cursor , t , oldnode , left , right ) ;
}
}
}
void brt_update_cursors_nonleaf_expand ( BRT t , BRTNODE node , int childnum , BRTNODE left , BRTNODE right ) {
BRT_CURSOR cursor ;
2007-08-24 12:10:49 +00:00
if ( brt_update_debug ) printf ( " brt_update_cursors_nonleaf_expand %lld h=%d c=%d nc=%d %lld %lld \n " , node - > thisnodename , node - > height , childnum ,
2007-08-23 18:07:18 +00:00
node - > u . n . n_children , left - > thisnodename , right - > thisnodename ) ;
for ( cursor = t - > cursors_head ; cursor ; cursor = cursor - > next ) {
if ( brt_cursor_active ( cursor ) ) {
brt_cursor_nonleaf_expand ( cursor , t , node , childnum , left , right ) ;
}
}
}
void brt_update_cursors_nonleaf_split ( BRT t , BRTNODE oldnode , BRTNODE left , BRTNODE right ) {
BRT_CURSOR cursor ;
2007-08-24 12:10:49 +00:00
if ( brt_update_debug ) printf ( " brt_update_cursors_nonleaf_split %lld %lld %lld \n " , oldnode - > thisnodename ,
2007-08-23 18:07:18 +00:00
left - > thisnodename , right - > thisnodename ) ;
for ( cursor = t - > cursors_head ; cursor ; cursor = cursor - > next ) {
if ( brt_cursor_active ( cursor ) ) {
brt_cursor_nonleaf_split ( cursor , t , oldnode , left , right ) ;
}
}
}
void brt_cursor_new_root ( BRT_CURSOR cursor , BRT t , BRTNODE newroot , BRTNODE left , BRTNODE right ) {
int i ;
int childnum ;
int r ;
void * v ;
assert ( ! brt_cursor_path_full ( cursor ) ) ;
if ( 0 ) printf ( " brt_cursor_new_root %p %lld newroot %lld \n " , cursor , cursor - > path [ 0 ] - > thisnodename , newroot - > thisnodename ) ;
assert ( cursor - > path [ 0 ] = = left | | cursor - > path [ 0 ] = = right ) ;
/* make room for the newroot at the path base */
for ( i = cursor - > path_len ; i > 0 ; i - - ) {
cursor - > path [ i ] = cursor - > path [ i - 1 ] ;
cursor - > pathcnum [ i ] = cursor - > pathcnum [ i - 1 ] ;
}
cursor - > path_len + + ;
/* shift the newroot */
cursor - > path [ 0 ] = newroot ;
childnum = cursor - > path [ 1 ] = = left ? 0 : 1 ;
cursor - > pathcnum [ 0 ] = childnum ;
r = cachetable_maybe_get_and_pin ( t - > cf , newroot - > thisnodename , & v ) ;
assert ( r = = 0 & & v = = newroot ) ;
brt_node_add_cursor ( newroot , childnum , cursor ) ;
}
void brt_cursor_leaf_split ( BRT_CURSOR cursor , BRT t , BRTNODE oldnode , BRTNODE left , BRTNODE right ) {
int r ;
BRTNODE newnode ;
PMA pma ;
void * v ;
assert ( oldnode - > height = = 0 ) ;
if ( cursor - > path [ cursor - > path_len - 1 ] = = oldnode ) {
assert ( left - > height = = 0 & & right - > height = = 0 ) ;
r = pma_cursor_get_pma ( cursor - > pmacurs , & pma ) ;
assert ( r = = 0 ) ;
if ( pma = = left - > u . l . buffer )
newnode = left ;
else if ( pma = = right - > u . l . buffer )
newnode = right ;
else
newnode = 0 ;
assert ( newnode ) ;
if ( 0 ) printf ( " brt_cursor_leaf_split %p oldnode %lld newnode %lld \n " , cursor ,
oldnode - > thisnodename , newnode - > thisnodename ) ;
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , oldnode - > thisnodename , oldnode - > dirty , brtnode_size ( oldnode ) ) ;
2007-08-23 18:07:18 +00:00
assert ( r = = 0 ) ;
r = cachetable_maybe_get_and_pin ( t - > cf , newnode - > thisnodename , & v ) ;
assert ( r = = 0 & & v = = newnode ) ;
cursor - > path [ cursor - > path_len - 1 ] = newnode ;
}
}
void brt_cursor_nonleaf_expand ( BRT_CURSOR cursor , BRT t __attribute__ ( ( unused ) ) , BRTNODE node , int childnum , BRTNODE left , BRTNODE right ) {
int i ;
int oldchildnum , newchildnum ;
assert ( node - > height > 0 ) ;
2007-09-28 17:11:22 +00:00
// i = cursor->path_len - node->height - 1;
// if (i < 0)
// i = cursor->path_len - 1;
// if (i >= 0 && cursor->path[i] == node) {
// }
2007-08-23 18:07:18 +00:00
if ( 0 ) brt_cursor_print ( cursor ) ;
for ( i = 0 ; i < cursor - > path_len ; i + + )
if ( cursor - > path [ i ] = = node )
break ;
if ( i < cursor - > path_len ) {
if ( cursor - > pathcnum [ i ] < childnum )
return ;
if ( cursor - > pathcnum [ i ] > childnum ) {
2007-09-07 20:25:54 +00:00
setnewchild :
2007-08-23 18:07:18 +00:00
oldchildnum = cursor - > pathcnum [ i ] ;
newchildnum = oldchildnum + 1 ;
brt_node_remove_cursor ( node , oldchildnum , cursor ) ;
brt_node_add_cursor ( node , newchildnum , cursor ) ;
cursor - > pathcnum [ i ] = newchildnum ;
2007-09-07 20:25:54 +00:00
return ;
}
2007-09-11 16:30:58 +00:00
if ( i = = cursor - > path_len - 1 & & ( cursor - > op = = DB_PREV | | cursor - > op = = DB_LAST ) ) {
2007-09-07 20:25:54 +00:00
goto setnewchild ;
}
if ( i + 1 < cursor - > path_len ) {
2007-08-23 18:07:18 +00:00
assert ( cursor - > path [ i + 1 ] = = left | | cursor - > path [ i + 1 ] = = right ) ;
if ( cursor - > path [ i + 1 ] = = right ) {
goto setnewchild ;
}
}
}
}
void brt_cursor_nonleaf_split ( BRT_CURSOR cursor , BRT t , BRTNODE oldnode , BRTNODE left , BRTNODE right ) {
int i ;
BRTNODE newnode ;
int r ;
void * v ;
int childnum ;
assert ( oldnode - > height > 0 & & left - > height > 0 & & right - > height > 0 ) ;
2007-09-28 17:11:22 +00:00
// i = cursor->path_len - oldnode->height - 1;
// if (i < 0)
// i = cursor->path_len - 1;
// if (i >= 0 && cursor->path[i] == oldnode) {
2007-08-23 18:07:18 +00:00
for ( i = 0 ; i < cursor - > path_len ; i + + )
if ( cursor - > path [ i ] = = oldnode )
break ;
if ( i < cursor - > path_len ) {
childnum = cursor - > pathcnum [ i ] ;
brt_node_remove_cursor ( oldnode , childnum , cursor ) ;
if ( childnum < left - > u . n . n_children ) {
newnode = left ;
} else {
newnode = right ;
childnum - = left - > u . n . n_children ;
}
if ( 0 ) printf ( " brt_cursor_nonleaf_split %p oldnode %lld newnode %lld \n " ,
2007-09-28 17:11:22 +00:00
cursor , oldnode - > thisnodename , newnode - > thisnodename ) ;
2007-08-23 18:07:18 +00:00
2007-10-19 14:07:41 +00:00
r = cachetable_unpin ( t - > cf , oldnode - > thisnodename , oldnode - > dirty , brtnode_size ( oldnode ) ) ;
2007-08-23 18:07:18 +00:00
assert ( r = = 0 ) ;
r = cachetable_maybe_get_and_pin ( t - > cf , newnode - > thisnodename , & v ) ;
assert ( r = = 0 & & v = = newnode ) ;
brt_node_add_cursor ( newnode , childnum , cursor ) ;
cursor - > path [ i ] = newnode ;
cursor - > pathcnum [ i ] = childnum ;
}
}
2007-07-13 19:37:47 +00:00
int brt_cursor ( BRT brt , BRT_CURSOR * cursor ) {
BRT_CURSOR MALLOC ( result ) ;
assert ( result ) ;
result - > brt = brt ;
result - > path_len = 0 ;
result - > pmacurs = 0 ;
if ( brt - > cursors_head ) {
brt - > cursors_head - > prev = result ;
} else {
brt - > cursors_tail = result ;
}
result - > next = brt - > cursors_head ;
result - > prev = 0 ;
brt - > cursors_head = result ;
* cursor = result ;
return 0 ;
2007-08-23 18:07:18 +00:00
2007-07-13 19:37:47 +00:00
}
2007-08-23 18:07:18 +00:00
static int unpin_cursor ( BRT_CURSOR ) ;
2007-07-13 19:37:47 +00:00
int brt_cursor_close ( BRT_CURSOR curs ) {
BRT brt = curs - > brt ;
int r = unpin_cursor ( curs ) ;
if ( curs - > prev = = 0 ) {
assert ( brt - > cursors_head = = curs ) ;
brt - > cursors_head = curs - > next ;
} else {
curs - > prev - > next = curs - > next ;
}
if ( curs - > next = = 0 ) {
assert ( brt - > cursors_tail = = curs ) ;
brt - > cursors_tail = curs - > prev ;
} else {
curs - > next - > prev = curs - > prev ;
}
if ( curs - > pmacurs ) {
int r2 = pma_cursor_free ( & curs - > pmacurs ) ;
if ( r = = 0 ) r = r2 ;
}
2007-07-20 18:00:14 +00:00
toku_free ( curs ) ;
2007-07-13 19:37:47 +00:00
return r ;
}
2007-08-23 18:07:18 +00:00
/*
* Print the path of a cursor
*/
void brt_cursor_print ( BRT_CURSOR cursor ) {
int i ;
printf ( " cursor %p: " , cursor ) ;
for ( i = 0 ; i < cursor - > path_len ; i + + ) {
printf ( " %lld " , cursor - > path [ i ] - > thisnodename ) ;
if ( cursor - > path [ i ] - > height > 0 )
2007-08-24 12:10:49 +00:00
printf ( " ,%d:%d " , cursor - > pathcnum [ i ] , cursor - > path [ i ] - > u . n . n_children ) ;
2007-08-23 18:07:18 +00:00
else
printf ( " " ) ;
}
printf ( " \n " ) ;
}
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_last ( BRT_CURSOR cursor , diskoff off , DBT * key , DB * db , TOKUTXN txn , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
BRT brt = cursor - > brt ;
void * node_v ;
2007-08-24 12:10:49 +00:00
2007-10-19 14:07:41 +00:00
int r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
2007-10-19 14:07:41 +00:00
if ( 0 ) { died0 : cachetable_unpin ( brt - > cf , off , 1 , 0 ) ; }
2007-07-13 19:37:47 +00:00
return r ;
}
BRTNODE node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-07-13 19:37:47 +00:00
assert ( cursor - > path_len < CURSOR_PATHLEN_LIMIT ) ;
cursor - > path [ cursor - > path_len + + ] = node ;
if ( node - > height > 0 ) {
2007-08-24 12:10:49 +00:00
int childnum ;
try_last_child :
childnum = node - > u . n . n_children - 1 ;
2007-07-13 19:37:47 +00:00
try_prev_child :
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
2007-08-23 18:07:18 +00:00
brt_node_add_cursor ( node , childnum , cursor ) ;
2007-08-24 12:10:49 +00:00
if ( node - > u . n . n_bytes_in_hashtable [ childnum ] > 0 ) {
2007-10-04 15:23:41 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-08-24 12:10:49 +00:00
/*
* the flush may have been partially successfull . it may have also
* changed the tree such that the current node have expanded or been
* replaced . lets start over .
*/
2007-08-23 18:07:18 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
2007-08-24 12:10:49 +00:00
brt_node_remove_cursor ( node , childnum , cursor ) ;
goto try_last_child ;
2007-08-23 18:07:18 +00:00
}
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_last ( cursor , node - > u . n . children [ childnum ] , key , db , txn , node ) ;
2007-08-23 18:07:18 +00:00
if ( r = = 0 )
return 0 ;
assert ( node = = cursor - > path [ cursor - > path_len - 1 ] ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
if ( r = = DB_NOTFOUND ) {
2007-07-13 19:37:47 +00:00
if ( childnum > 0 ) {
childnum - - ;
goto try_prev_child ;
}
}
2007-08-23 18:07:18 +00:00
/* we ran out of children without finding anything, or had some other trouble. */
cursor - > path_len - - ;
goto died0 ;
2007-07-13 19:37:47 +00:00
} else {
r = pma_cursor ( node - > u . l . buffer , & cursor - > pmacurs ) ;
if ( r ! = 0 ) {
if ( 0 ) { died10 : pma_cursor_free ( & cursor - > pmacurs ) ; }
cursor - > path_len - - ;
goto died0 ;
}
r = pma_cursor_set_position_last ( cursor - > pmacurs ) ;
if ( r ! = 0 ) goto died10 ; /* we'll deallocate this cursor, and unpin this node, and go back up. */
return 0 ;
}
}
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_first ( BRT_CURSOR cursor , diskoff off , DBT * key , DB * db , TOKUTXN txn , BRTNODE parent_brtnode ) {
2007-07-13 19:37:47 +00:00
BRT brt = cursor - > brt ;
void * node_v ;
2007-08-23 18:07:18 +00:00
2007-10-19 14:07:41 +00:00
int r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL ,
2007-07-31 21:20:46 +00:00
brtnode_flush_callback , brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
2007-07-13 19:37:47 +00:00
if ( r ! = 0 ) {
2007-10-19 14:07:41 +00:00
if ( 0 ) { died0 : cachetable_unpin ( brt - > cf , off , 1 , 0 ) ; }
2007-07-13 19:37:47 +00:00
return r ;
}
BRTNODE node = node_v ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-07-13 19:37:47 +00:00
assert ( cursor - > path_len < CURSOR_PATHLEN_LIMIT ) ;
cursor - > path [ cursor - > path_len + + ] = node ;
if ( node - > height > 0 ) {
2007-08-24 12:10:49 +00:00
int childnum
;
try_first_child :
childnum = 0 ;
2007-07-13 19:37:47 +00:00
try_next_child :
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
2007-08-24 12:10:49 +00:00
brt_node_add_cursor ( node , childnum , cursor ) ;
if ( node - > u . n . n_bytes_in_hashtable [ childnum ] > 0 ) {
2007-10-04 15:23:41 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-08-24 12:10:49 +00:00
/*
* the flush may have been partially successfull . it may have also
* changed the tree such that the current node have expanded or been
* replaced . lets start over .
*/
2007-08-23 18:07:18 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
2007-08-24 12:10:49 +00:00
brt_node_remove_cursor ( node , childnum , cursor ) ;
goto try_first_child ;
}
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_first ( cursor , node - > u . n . children [ childnum ] , key , db , txn , node ) ;
2007-08-23 18:07:18 +00:00
if ( r = = 0 )
return r ;
assert ( node = = cursor - > path [ cursor - > path_len - 1 ] ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
if ( r = = DB_NOTFOUND ) {
2007-07-13 19:37:47 +00:00
if ( childnum + 1 < node - > u . n . n_children ) {
childnum + + ;
goto try_next_child ;
}
}
2007-08-23 18:07:18 +00:00
/* we ran out of children without finding anything, or had some other trouble. */
cursor - > path_len - - ;
goto died0 ;
2007-07-13 19:37:47 +00:00
} else {
r = pma_cursor ( node - > u . l . buffer , & cursor - > pmacurs ) ;
if ( r ! = 0 ) {
if ( 0 ) { died10 : pma_cursor_free ( & cursor - > pmacurs ) ; }
cursor - > path_len - - ;
goto died0 ;
}
r = pma_cursor_set_position_first ( cursor - > pmacurs ) ;
if ( r ! = 0 ) goto died10 ; /* we'll deallocate this cursor, and unpin this node, and go back up. */
return 0 ;
}
}
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_next2 ( BRT_CURSOR cursor , DBT * key , DB * db , TOKUTXN txn ) {
2007-08-23 18:07:18 +00:00
BRTNODE node ;
int childnum ;
int r ;
int more ;
assert ( cursor - > path_len > 0 ) ;
/* pop the node and childnum from the cursor path */
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
cursor - > path_len - = 1 ;
2007-10-19 14:07:41 +00:00
cachetable_unpin ( cursor - > brt - > cf , node - > thisnodename , node - > dirty , brtnode_size ( node ) ) ;
2007-08-23 18:07:18 +00:00
if ( brt_cursor_path_empty ( cursor ) )
return DB_NOTFOUND ;
/* set position first in the next right tree */
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
assert ( node - > height > 0 ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
childnum + = 1 ;
while ( childnum < node - > u . n . n_children ) {
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
brt_node_add_cursor ( node , childnum , cursor ) ;
for ( ; ; ) {
more = node - > u . n . n_bytes_in_hashtable [ childnum ] ;
if ( more = = 0 )
break ;
2007-10-04 15:23:41 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-08-23 18:07:18 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
}
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_first ( cursor , node - > u . n . children [ childnum ] , key , db , txn , node ) ;
2007-08-23 18:07:18 +00:00
if ( r = = 0 )
return 0 ;
assert ( node = = cursor - > path [ cursor - > path_len - 1 ] ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
childnum + = 1 ;
}
2007-10-04 15:23:41 +00:00
return brtcurs_set_position_next2 ( cursor , key , db , txn ) ;
2007-08-23 18:07:18 +00:00
}
2007-07-20 14:20:58 +00:00
/* requires that the cursor is initialized. */
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_next ( BRT_CURSOR cursor , DBT * key , DB * db , TOKUTXN txn ) {
2007-07-20 12:41:23 +00:00
int r = pma_cursor_set_position_next ( cursor - > pmacurs ) ;
if ( r = = DB_NOTFOUND ) {
/* We fell off the end of the pma. */
2007-07-20 14:20:58 +00:00
if ( cursor - > path_len = = 1 ) return DB_NOTFOUND ;
2007-08-23 18:07:18 +00:00
/* Part of the trickyness is we need to leave the cursor pointing at the current (possibly deleted) value if there is no next value. */
r = pma_cursor_free ( & cursor - > pmacurs ) ;
assert ( r = = 0 ) ;
2007-10-04 15:23:41 +00:00
return brtcurs_set_position_next2 ( cursor , key , db , txn ) ;
2007-07-20 12:41:23 +00:00
}
return 0 ;
}
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_prev2 ( BRT_CURSOR cursor , DBT * key , DB * db , TOKUTXN txn ) {
2007-09-07 20:25:54 +00:00
BRTNODE node ;
int childnum ;
int r ;
int more ;
assert ( cursor - > path_len > 0 ) ;
/* pop the node and childnum from the cursor path */
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
cursor - > path_len - = 1 ;
2007-10-19 14:07:41 +00:00
cachetable_unpin ( cursor - > brt - > cf , node - > thisnodename , node - > dirty , brtnode_size ( node ) ) ;
2007-09-07 20:25:54 +00:00
if ( brt_cursor_path_empty ( cursor ) )
return DB_NOTFOUND ;
/* set position last in the next left tree */
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
assert ( node - > height > 0 ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
childnum - = 1 ;
while ( childnum > = 0 ) {
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
brt_node_add_cursor ( node , childnum , cursor ) ;
for ( ; ; ) {
more = node - > u . n . n_bytes_in_hashtable [ childnum ] ;
if ( more = = 0 )
break ;
2007-10-04 15:23:41 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-09-07 20:25:54 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
}
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_last ( cursor , node - > u . n . children [ childnum ] , key , db , txn , node ) ;
2007-09-07 20:25:54 +00:00
if ( r = = 0 )
return 0 ;
assert ( node = = cursor - > path [ cursor - > path_len - 1 ] ) ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
childnum - = 1 ;
}
2007-10-04 15:23:41 +00:00
return brtcurs_set_position_prev2 ( cursor , key , db , txn ) ;
2007-09-07 20:25:54 +00:00
}
2007-10-04 15:23:41 +00:00
int brtcurs_set_position_prev ( BRT_CURSOR cursor , DBT * key , DB * db , TOKUTXN txn ) {
2007-09-07 20:25:54 +00:00
int r = pma_cursor_set_position_prev ( cursor - > pmacurs ) ;
if ( r = = DB_NOTFOUND ) {
if ( cursor - > path_len = = 1 )
return DB_NOTFOUND ;
r = pma_cursor_free ( & cursor - > pmacurs ) ;
assert ( r = = 0 ) ;
2007-10-04 15:23:41 +00:00
return brtcurs_set_position_prev2 ( cursor , key , db , txn ) ;
2007-09-07 20:25:54 +00:00
}
return 0 ;
}
2007-10-03 19:34:31 +00:00
int brtcurs_set_key ( BRT_CURSOR cursor , diskoff off , DBT * key , DBT * val , int flag , DB * db , TOKUTXN txn , BRTNODE parent_brtnode ) {
2007-09-11 16:30:58 +00:00
BRT brt = cursor - > brt ;
void * node_v ;
int r ;
2007-10-19 14:07:41 +00:00
r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL , brtnode_flush_callback ,
2007-09-11 16:30:58 +00:00
brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
if ( r ! = 0 )
return r ;
BRTNODE node = node_v ;
int childnum ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-09-11 16:30:58 +00:00
if ( node - > height > 0 ) {
cursor - > path_len + = 1 ;
for ( ; ; ) {
childnum = brtnode_which_child ( node , key , brt , db ) ;
cursor - > path [ cursor - > path_len - 1 ] = node ;
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
brt_node_add_cursor ( node , childnum , cursor ) ;
int more = node - > u . n . n_bytes_in_hashtable [ childnum ] ;
if ( more > 0 ) {
2007-10-03 14:51:23 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-09-11 16:30:58 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
/* the node may have split. search the node keys again */
continue ;
}
break ;
}
2007-10-03 19:34:31 +00:00
r = brtcurs_set_key ( cursor , node - > u . n . children [ childnum ] , key , val , flag , db , txn , node ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 )
brt_node_remove_cursor ( node , childnum , cursor ) ;
} else {
cursor - > path_len + = 1 ;
cursor - > path [ cursor - > path_len - 1 ] = node ;
r = pma_cursor ( node - > u . l . buffer , & cursor - > pmacurs ) ;
if ( r = = 0 ) {
2007-09-12 20:30:36 +00:00
if ( flag = = DB_SET )
r = pma_cursor_set_key ( cursor - > pmacurs , key , db ) ;
else if ( flag = = DB_GET_BOTH )
r = pma_cursor_set_both ( cursor - > pmacurs , key , val , db ) ;
else {
assert ( 0 ) ;
r = DB_NOTFOUND ;
}
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) {
int rr = pma_cursor_free ( & cursor - > pmacurs ) ;
assert ( rr = = 0 ) ;
}
}
}
if ( r ! = 0 ) {
cursor - > path_len - = 1 ;
2007-10-19 14:07:41 +00:00
cachetable_unpin ( brt - > cf , off , node - > dirty , brtnode_size ( node ) ) ;
2007-09-11 16:30:58 +00:00
}
return r ;
}
2007-10-03 19:34:31 +00:00
int brtcurs_set_range ( BRT_CURSOR cursor , diskoff off , DBT * key , DB * db , TOKUTXN txn , BRTNODE parent_brtnode ) {
2007-09-11 16:30:58 +00:00
BRT brt = cursor - > brt ;
void * node_v ;
int r ;
2007-10-19 14:07:41 +00:00
r = cachetable_get_and_pin ( brt - > cf , off , & node_v , NULL , brtnode_flush_callback ,
2007-09-11 16:30:58 +00:00
brtnode_fetch_callback , ( void * ) ( long ) brt - > h - > nodesize ) ;
if ( r ! = 0 )
return r ;
BRTNODE node = node_v ;
int childnum ;
2007-10-03 19:34:31 +00:00
node - > parent_brtnode = parent_brtnode ;
2007-09-11 16:30:58 +00:00
if ( node - > height > 0 ) {
cursor - > path_len + = 1 ;
/* select a subtree by key */
childnum = brtnode_which_child ( node , key , brt , db ) ;
next_child :
for ( ; ; ) {
cursor - > path [ cursor - > path_len - 1 ] = node ;
cursor - > pathcnum [ cursor - > path_len - 1 ] = childnum ;
brt_node_add_cursor ( node , childnum , cursor ) ;
int more = node - > u . n . n_bytes_in_hashtable [ childnum ] ;
if ( more > 0 ) {
2007-10-03 14:51:23 +00:00
brt_flush_child ( cursor - > brt , node , childnum , cursor , key - > app_private , db , txn ) ;
2007-09-11 16:30:58 +00:00
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
continue ;
}
break ;
}
2007-10-03 19:34:31 +00:00
r = brtcurs_set_range ( cursor , node - > u . n . children [ childnum ] , key , db , txn , node ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) {
node = cursor - > path [ cursor - > path_len - 1 ] ;
childnum = cursor - > pathcnum [ cursor - > path_len - 1 ] ;
brt_node_remove_cursor ( node , childnum , cursor ) ;
/* no key in the child subtree is >= key, need to search the next child */
childnum + = 1 ;
if ( 0 ) printf ( " set_range %d %d \n " , childnum , node - > u . n . n_children ) ;
if ( childnum < node - > u . n . n_children )
goto next_child ;
}
} else {
cursor - > path_len + = 1 ;
cursor - > path [ cursor - > path_len - 1 ] = node ;
r = pma_cursor ( node - > u . l . buffer , & cursor - > pmacurs ) ;
if ( r = = 0 ) {
r = pma_cursor_set_range ( cursor - > pmacurs , key , db ) ;
if ( r ! = 0 ) {
int rr = pma_cursor_free ( & cursor - > pmacurs ) ;
assert ( rr = = 0 ) ;
}
}
}
if ( r ! = 0 ) {
cursor - > path_len - = 1 ;
2007-10-19 14:07:41 +00:00
cachetable_unpin ( brt - > cf , off , node - > dirty , brtnode_size ( node ) ) ;
2007-09-11 16:30:58 +00:00
}
return r ;
}
2007-07-13 19:37:47 +00:00
static int unpin_cursor ( BRT_CURSOR cursor ) {
BRT brt = cursor - > brt ;
int i ;
int r = 0 ;
for ( i = 0 ; i < cursor - > path_len ; i + + ) {
2007-08-23 18:07:18 +00:00
BRTNODE node = cursor - > path [ i ] ;
brt_node_remove_cursor ( node , cursor - > pathcnum [ i ] , cursor ) ;
2007-10-19 14:07:41 +00:00
int r2 = cachetable_unpin ( brt - > cf , node - > thisnodename , node - > dirty , brtnode_size ( node ) ) ;
2007-07-13 19:37:47 +00:00
if ( r = = 0 ) r = r2 ;
}
2007-08-24 12:10:49 +00:00
if ( cursor - > pmacurs ) {
r = pma_cursor_free ( & cursor - > pmacurs ) ;
assert ( r = = 0 ) ;
}
2007-07-13 19:37:47 +00:00
cursor - > path_len = 0 ;
return r ;
}
2007-08-24 12:10:49 +00:00
static void assert_cursor_path ( BRT_CURSOR cursor ) {
int i ;
BRTNODE node ;
int child ;
if ( cursor - > path_len < = 0 )
return ;
for ( i = 0 ; i < cursor - > path_len - 1 ; i + + ) {
node = cursor - > path [ i ] ;
child = cursor - > pathcnum [ i ] ;
assert ( node - > height > 0 ) ;
assert ( node - > u . n . n_bytes_in_hashtable [ child ] = = 0 ) ;
assert ( node - > u . n . n_cursors [ child ] > 0 ) ;
}
node = cursor - > path [ i ] ;
assert ( node - > height = = 0 ) ;
}
2007-09-28 17:11:22 +00:00
int brt_cursor_get ( BRT_CURSOR cursor , DBT * kbt , DBT * vbt , int flags , DB * db , TOKUTXN txn ) {
2007-07-13 19:37:47 +00:00
int do_rmw = 0 ;
int r ;
CACHEKEY * rootp ;
2007-07-23 15:11:54 +00:00
//dump_brt(cursor->brt);
//fprintf(stderr, "%s:%d in brt_c_get(...)\n", __FILE__, __LINE__);
2007-07-13 19:37:47 +00:00
if ( ( r = read_and_pin_brt_header ( cursor - > brt - > cf , & cursor - > brt - > h ) ) ) {
if ( 0 ) { died0 : unpin_brt_header ( cursor - > brt ) ; }
return r ;
}
rootp = calculate_root_offset_pointer ( cursor - > brt ) ;
if ( flags & DB_RMW ) {
do_rmw = 1 ;
flags & = ~ DB_RMW ;
}
2007-09-07 20:25:54 +00:00
cursor - > op = flags ;
2007-07-13 19:37:47 +00:00
switch ( flags ) {
case DB_LAST :
2007-09-07 20:25:54 +00:00
do_db_last :
2007-07-13 19:37:47 +00:00
r = unpin_cursor ( cursor ) ; if ( r ! = 0 ) goto died0 ;
2007-08-24 12:10:49 +00:00
assert ( cursor - > pmacurs = = 0 ) ;
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_last ( cursor , * rootp , kbt , db , txn , null_brtnode ) ; if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ;
2007-08-24 12:10:49 +00:00
if ( r = = 0 ) assert_cursor_path ( cursor ) ;
break ;
2007-07-13 19:37:47 +00:00
case DB_FIRST :
2007-07-20 14:20:58 +00:00
do_db_first :
2007-07-13 19:37:47 +00:00
r = unpin_cursor ( cursor ) ; if ( r ! = 0 ) goto died0 ;
2007-08-24 12:10:49 +00:00
assert ( cursor - > pmacurs = = 0 ) ;
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_first ( cursor , * rootp , kbt , db , txn , null_brtnode ) ; if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ;
2007-08-24 12:10:49 +00:00
if ( r = = 0 ) assert_cursor_path ( cursor ) ;
break ;
2007-07-20 12:41:23 +00:00
case DB_NEXT :
2007-09-07 20:25:54 +00:00
if ( cursor - > path_len < = 0 )
2007-07-20 14:20:58 +00:00
goto do_db_first ;
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_next ( cursor , kbt , db , txn ) ; if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ; if ( r ! = 0 ) goto died0 ;
2007-09-07 20:25:54 +00:00
if ( r = = 0 ) assert_cursor_path ( cursor ) ;
break ;
case DB_PREV :
if ( cursor - > path_len < = 0 )
goto do_db_last ;
2007-10-04 15:23:41 +00:00
r = brtcurs_set_position_prev ( cursor , kbt , db , txn ) ; if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ; if ( r ! = 0 ) goto died0 ;
2007-09-07 20:25:54 +00:00
if ( r = = 0 ) assert_cursor_path ( cursor ) ;
break ;
2007-09-11 16:30:58 +00:00
case DB_SET :
r = unpin_cursor ( cursor ) ;
assert ( r = = 0 ) ;
2007-10-03 19:34:31 +00:00
r = brtcurs_set_key ( cursor , * rootp , kbt , vbt , DB_SET , db , txn , null_brtnode ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ;
if ( r ! = 0 ) goto died0 ;
break ;
case DB_GET_BOTH :
r = unpin_cursor ( cursor ) ;
assert ( r = = 0 ) ;
2007-10-03 19:34:31 +00:00
r = brtcurs_set_key ( cursor , * rootp , kbt , vbt , DB_GET_BOTH , db , txn , null_brtnode ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) goto died0 ;
break ;
case DB_SET_RANGE :
r = unpin_cursor ( cursor ) ;
assert ( r = = 0 ) ;
2007-10-03 19:34:31 +00:00
r = brtcurs_set_range ( cursor , * rootp , kbt , db , txn , null_brtnode ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) goto died0 ;
2007-09-12 20:30:36 +00:00
r = pma_cursor_get_current ( cursor - > pmacurs , kbt , vbt ) ;
2007-09-11 16:30:58 +00:00
if ( r ! = 0 ) goto died0 ;
break ;
2007-07-13 19:37:47 +00:00
default :
fprintf ( stderr , " %s:%d c_get(...,%d) not ready \n " , __FILE__ , __LINE__ , flags ) ;
abort ( ) ;
}
2007-07-23 15:11:54 +00:00
//printf("%s:%d unpinning header\n", __FILE__, __LINE__);
2007-07-13 19:37:47 +00:00
if ( ( r = unpin_brt_header ( cursor - > brt ) ) ! = 0 ) return r ;
return 0 ;
}
2007-09-11 18:32:10 +00:00
2007-09-12 20:30:36 +00:00
/* delete the key and value under the cursor */
2007-09-11 18:32:10 +00:00
int brt_cursor_delete ( BRT_CURSOR cursor , int flags __attribute__ ( ( __unused__ ) ) ) {
int r ;
if ( cursor - > path_len > 0 ) {
BRTNODE node = cursor - > path [ cursor - > path_len - 1 ] ;
assert ( node - > height = = 0 ) ;
int kvsize ;
r = pma_cursor_delete_under ( cursor - > pmacurs , & kvsize ) ;
2007-09-18 16:09:55 +00:00
if ( r = = 0 ) {
2007-09-11 18:32:10 +00:00
node - > u . l . n_bytes_in_buffer - = KEY_VALUE_OVERHEAD + kvsize ;
2007-09-18 16:09:55 +00:00
brtnode_set_dirty ( node ) ;
}
2007-09-11 18:32:10 +00:00
} else
r = DB_NOTFOUND ;
return r ;
}