2007-11-29 14:18:54 +00:00
/* -*- mode: C; c-basic-offset: 4 -*- */
2007-07-13 19:37:47 +00:00
# ifndef BRT_H
# define BRT_H
2013-04-16 23:57:48 -04:00
# ident "$Id$"
2013-04-16 23:57:48 -04:00
# ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
# ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11 / 760379 and to the patents and / or patent applications resulting from it."
2007-07-13 19:37:47 +00:00
// This must be first to make the 64-bit file mode work right in Linux
# define _FILE_OFFSET_BITS 64
# include "brttypes.h"
# include "ybt.h"
2013-04-16 23:57:48 -04:00
# include <db.h>
2007-07-13 19:37:47 +00:00
# include "cachetable.h"
2007-09-28 17:11:22 +00:00
# include "log.h"
2008-01-25 15:43:37 +00:00
# include "brt-search.h"
2007-11-21 13:07:49 +00:00
2013-04-16 23:57:41 -04:00
// A callback function is invoked with the key, and the data.
// The pointers (to the bytevecs) must not be modified. The data must be copied out before the callback function returns.
// Note: In the thread-safe version, the brt node remains locked while the callback function runs. So return soon, and don't call the BRT code from the callback function.
// If the callback function returns a nonzero value (an error code), then that error code is returned from the get function itself.
// The cursor object will have been updated (so that if result==0 the current value is the value being passed)
// (If r!=0 then the cursor won't have been updated.)
// If r!=0, it's up to the callback function to return that value of r.
//A 'key' bytevec of NULL means that element is not found (effectively infinity or
//-infinity depending on direction)
typedef int ( * BRT_GET_CALLBACK_FUNCTION ) ( ITEMLEN , bytevec , ITEMLEN , bytevec , void * ) ;
//Same as BRT_GET_CALLBACK_FUNCTION but returns both the answer to the query and
//the element on the other side of the border (as in heaviside function).
typedef int ( * BRT_GET_STRADDLE_CALLBACK_FUNCTION ) ( ITEMLEN , bytevec , ITEMLEN , bytevec , ITEMLEN , bytevec , ITEMLEN , bytevec , void * ) ;
2013-04-16 23:57:47 -04:00
int toku_open_brt ( const char * fname , int is_create , BRT * , int nodesize , CACHETABLE , TOKUTXN , int ( * ) ( DB * , const DBT * , const DBT * ) , DB * ) ;
2007-11-14 17:58:38 +00:00
2013-04-16 23:57:48 -04:00
u_int32_t toku_serialize_descriptor_size ( struct descriptor * desc ) ;
2007-11-29 14:44:03 +00:00
int toku_brt_create ( BRT * ) ;
2007-11-29 19:32:53 +00:00
int toku_brt_set_flags ( BRT , unsigned int flags ) ;
2013-04-16 23:57:48 -04:00
int toku_brt_set_descriptor ( BRT t , u_int32_t version , const DBT * descriptor , toku_dbt_upgradef dbt_userformat_upgrade ) ;
2007-11-29 19:32:53 +00:00
int toku_brt_get_flags ( BRT , unsigned int * flags ) ;
int toku_brt_set_nodesize ( BRT , unsigned int nodesize ) ;
int toku_brt_get_nodesize ( BRT , unsigned int * nodesize ) ;
2013-04-16 23:57:59 -04:00
int toku_brt_set_bt_compare ( BRT , brt_compare_func ) ;
int toku_brt_set_dup_compare ( BRT , brt_compare_func ) ;
2013-04-16 23:58:01 -04:00
int toku_brt_set_filenum ( BRT brt , FILENUM filenum ) ;
2007-11-14 17:58:38 +00:00
int brt_set_cachetable ( BRT , CACHETABLE ) ;
2013-04-16 23:57:47 -04:00
int toku_brt_open ( BRT , const char * fname , const char * fname_in_env , int is_create , int only_create , CACHETABLE ct , TOKUTXN txn , DB * db ) ;
2013-04-16 23:58:52 -04:00
int toku_brt_open_recovery ( BRT , const char * fname , const char * fname_in_env , int is_create , int only_create , CACHETABLE ct , TOKUTXN txn , DB * db , int recovery_force_fcreate ) ;
2008-07-21 02:34:13 +00:00
2007-11-29 15:09:14 +00:00
int toku_brt_remove_subdb ( BRT brt , const char * dbname , u_int32_t flags ) ;
2013-04-16 23:58:00 -04:00
int toku_brt_broadcast_commit_all ( BRT brt ) ;
2013-04-16 23:57:41 -04:00
int toku_brt_lookup ( BRT brt , DBT * k , DBT * v , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
2013-04-16 23:58:01 -04:00
// Effect: Insert a key and data pair into a brt
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:01 -04:00
int toku_brt_insert ( BRT brt , DBT * k , DBT * v , TOKUTXN txn ) ;
// Effect: Insert a key and data pair into a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:58 -04:00
int toku_brt_maybe_insert ( BRT brt , DBT * k , DBT * v , TOKUTXN txn , BOOL oplsn_valid , LSN oplsn , int do_logging ) ;
int toku_brt_log_put_multiple ( TOKUTXN txn , BRT * brts , int num_brts , DBT * row ) ;
int toku_brt_log_del_multiple ( TOKUTXN txn , BRT * brts , int num_brts , DBT * row ) ;
2013-04-16 23:58:01 -04:00
// Effect: Delete a key from a brt
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:01 -04:00
int toku_brt_delete ( BRT brt , DBT * k , TOKUTXN txn ) ;
// Effect: Delete a key from a brt if the oplsn is newer than the brt lsn. This function is called during recovery.
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:58 -04:00
int toku_brt_maybe_delete ( BRT brt , DBT * k , TOKUTXN txn , BOOL oplsn_valid , LSN oplsn , int do_logging ) ;
2013-04-16 23:58:01 -04:00
// Effect: Delete a pair only if both k and v are equal according to the comparison function.
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:01 -04:00
int toku_brt_delete_both ( BRT brt , DBT * k , DBT * v , TOKUTXN txn ) ;
// Effect: Delete a pair only if both k and v are equal according to the comparison function and the
// oplsn is newer than the brt lsn. This function is called by recovery.
2013-04-16 23:58:01 -04:00
// Returns 0 if successful
2013-04-16 23:58:02 -04:00
int toku_brt_maybe_delete_both ( BRT brt , DBT * k , DBT * v , TOKUTXN txn , BOOL oplsn_valid , LSN oplsn ) ;
2013-04-16 23:58:01 -04:00
2013-04-16 23:57:48 -04:00
int toku_brt_db_delay_closed ( BRT brt , DB * db , int ( * close_db ) ( DB * , u_int32_t ) , u_int32_t close_flags ) ;
2013-04-16 23:58:06 -04:00
int toku_close_brt ( BRT , char * * error_string ) ;
int toku_close_brt_lsn ( BRT brt , char * * error_string , BOOL oplsn_valid , LSN oplsn ) ;
2008-07-21 02:34:13 +00:00
2013-04-16 23:58:03 -04:00
int toku_brt_set_panic ( BRT brt , int panic , char * panic_string ) ;
2013-04-16 23:57:24 -04:00
int toku_dump_brt ( FILE * , BRT brt ) ;
2007-07-13 19:37:47 +00:00
2008-07-21 02:34:13 +00:00
void brt_fsync ( BRT ) ; /* fsync, but don't clear the caches. */
2013-04-16 23:57:41 -04:00
void brt_flush ( BRT ) ; /* fsync and clear the caches. */
2007-07-13 19:37:47 +00:00
2008-07-21 02:34:13 +00:00
int toku_brt_get_cursor_count ( BRT brt ) ;
// get the number of cursors in the tree
2013-04-16 23:57:41 -04:00
// returns: the number of cursors.
2008-07-21 02:34:13 +00:00
// asserts: the number of cursors >= 0.
int toku_brt_flush ( BRT brt ) ;
// effect: the tree's cachefile is flushed
// returns: 0 if success
2013-04-16 23:57:27 -04:00
int toku_brt_truncate ( BRT brt ) ;
// effect: remove everything from the tree
// returns: 0 if success
2007-11-14 17:58:38 +00:00
2013-04-16 23:58:01 -04:00
LSN toku_brt_checkpoint_lsn ( BRT brt ) ;
2013-04-16 23:57:27 -04:00
// create and initialize a cache table
// cachesize is the upper limit on the size of the size of the values in the table
// pass 0 if you want the default
2007-11-29 14:44:03 +00:00
int toku_brt_create_cachetable ( CACHETABLE * t , long cachesize , LSN initial_lsn , TOKULOGGER ) ;
2007-09-21 17:55:49 +00:00
2007-11-29 14:44:03 +00:00
extern int toku_brt_debug_mode ;
2007-11-20 00:35:31 +00:00
int toku_verify_brt ( BRT brt ) ;
2007-07-13 19:37:47 +00:00
2007-11-28 19:00:21 +00:00
//int show_brt_blocknumbers(BRT);
2007-07-13 19:37:47 +00:00
typedef struct brt_cursor * BRT_CURSOR ;
2013-04-16 23:57:56 -04:00
int toku_brt_cursor ( BRT , BRT_CURSOR * , TOKULOGGER ) ;
2013-04-16 23:57:41 -04:00
// get is deprecated in favor of the individual functions below
2013-04-16 23:58:00 -04:00
int toku_brt_cursor_get ( BRT_CURSOR cursor , DBT * key , DBT * val , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v , int get_flags ) ;
2013-04-16 23:57:41 -04:00
2013-04-16 23:57:59 -04:00
int toku_brt_flatten ( BRT , TOKULOGGER logger ) ;
2013-04-16 23:58:00 -04:00
int toku_brt_cursor_first ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_last ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_next ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_next_nodup ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_next_dup ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_prev ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_prev_nodup ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_prev_dup ( BRT_CURSOR cursor , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_current ( BRT_CURSOR cursor , int op , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_set ( BRT_CURSOR cursor , DBT * key , DBT * val , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_set_range ( BRT_CURSOR cursor , DBT * key , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_set_range_reverse ( BRT_CURSOR cursor , DBT * key , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_get_both_range ( BRT_CURSOR cursor , DBT * key , DBT * val , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
int toku_brt_cursor_get_both_range_reverse ( BRT_CURSOR cursor , DBT * key , DBT * val , BRT_GET_CALLBACK_FUNCTION getf , void * getf_v ) ;
2013-04-16 23:57:41 -04:00
typedef struct {
YDB_HEAVISIDE_FUNCTION h ;
2008-07-16 22:23:29 +00:00
void * extra_h ;
int r_h ;
2013-04-16 23:57:41 -04:00
int direction ;
} * HEAVI_WRAPPER , HEAVI_WRAPPER_S ;
2013-04-16 23:58:00 -04:00
int toku_brt_cursor_heaviside ( BRT_CURSOR cursor , BRT_GET_STRADDLE_CALLBACK_FUNCTION getf , void * getf_v , HEAVI_WRAPPER wrapper ) ;
2008-01-28 20:49:10 +00:00
int toku_brt_cursor_delete ( BRT_CURSOR cursor , int flags , TOKUTXN ) ;
2007-11-29 14:44:03 +00:00
int toku_brt_cursor_close ( BRT_CURSOR curs ) ;
2008-02-11 20:00:19 +00:00
BOOL toku_brt_cursor_uninitialized ( BRT_CURSOR c ) ;
2007-07-13 19:37:47 +00:00
2013-04-16 23:57:41 -04:00
void toku_brt_cursor_peek ( BRT_CURSOR cursor , const DBT * * pkey , const DBT * * pval ) ;
2008-06-12 20:32:22 +00:00
2007-11-14 17:58:38 +00:00
typedef struct brtenv * BRTENV ;
int brtenv_checkpoint ( BRTENV env ) ;
2007-11-28 19:00:21 +00:00
extern int toku_brt_do_push_cmd ; // control whether push occurs eagerly.
2013-04-16 23:57:41 -04:00
// TODO: Get rid of this
2008-05-07 20:03:13 +00:00
int toku_brt_dbt_set ( DBT * key , DBT * key_source ) ;
2013-04-16 23:57:41 -04:00
2008-01-11 14:38:49 +00:00
int toku_brt_get_fd ( BRT , int * ) ;
2008-02-25 22:46:48 +00:00
int toku_brt_height_of_root ( BRT , int * height ) ; // for an open brt, return the current height.
2008-04-02 23:40:36 +00:00
enum brt_header_flags {
2013-04-16 23:58:04 -04:00
TOKU_DB_DUP = ( 1 < < 0 ) ,
TOKU_DB_DUPSORT = ( 1 < < 1 ) ,
TOKU_DB_KEYCMP_BUILTIN = ( 1 < < 2 ) ,
TOKU_DB_VALCMP_BUILTIN = ( 1 < < 3 ) ,
2008-04-02 23:40:36 +00:00
} ;
2008-05-13 12:14:38 +00:00
int toku_brt_keyrange ( BRT brt , DBT * key , u_int64_t * less , u_int64_t * equal , u_int64_t * greater ) ;
2013-04-16 23:57:59 -04:00
struct brtstat64_s {
u_int64_t nkeys ; /* estimate how many unique keys (even when flattened this may be an estimate) */
u_int64_t ndata ; /* estimate the number of pairs (exact when flattened and committed) */
u_int64_t dsize ; /* estimate the sum of the sizes of the pairs (exact when flattened and committed) */
u_int64_t fsize ; /* the size of the underlying file */
u_int64_t ffree ; /* Number of free bytes in the underlying file */
} ;
2013-04-16 23:57:47 -04:00
int toku_brt_stat64 ( BRT , TOKUTXN ,
2013-04-16 23:57:59 -04:00
struct brtstat64_s * stat
2013-04-16 23:57:47 -04:00
) ;
2008-05-13 12:14:38 +00:00
2013-04-16 23:57:53 -04:00
int toku_brt_init ( void ( * ydb_lock_callback ) ( void ) , void ( * ydb_unlock_callback ) ( void ) ) ;
int toku_brt_destroy ( void ) ;
int toku_pwrite_lock_init ( void ) ;
int toku_pwrite_lock_destroy ( void ) ;
2013-04-16 23:57:30 -04:00
2013-04-16 23:57:47 -04:00
void toku_maybe_truncate_cachefile ( CACHEFILE cf , u_int64_t size_used ) ;
// Effect: truncate file if overallocated by at least 32MiB
2013-04-16 23:57:38 -04:00
int maybe_preallocate_in_file ( int fd , u_int64_t size ) ;
2013-04-16 23:57:30 -04:00
// Effect: If file size is less than SIZE, make it bigger by either doubling it or growing by 16MB whichever is less.
2013-04-16 23:57:38 -04:00
int toku_brt_note_table_lock ( BRT brt , TOKUTXN txn ) ;
// Effect: Record the fact that the BRT has a table lock (and thus no other txn will modify it until this txn completes. As a result, we can limit the amount of information in the rollback data structure.
2013-04-16 23:58:06 -04:00
int toku_brt_zombie_needed ( BRT brt ) ;
2013-04-16 23:57:56 -04:00
//TODO: #1485 once we have multiple main threads, restore this code, analyze performance.
# ifndef TOKU_MULTIPLE_MAIN_THREADS
# define TOKU_MULTIPLE_MAIN_THREADS 0
# endif
2007-07-13 19:37:47 +00:00
# endif