2013-04-16 23:59:09 -04:00
/* -*- mode: C; c-basic-offset: 4 -*- */
# ifndef _TOKU_BRTLOADER_INTERNAL_H
# define _TOKU_BRTLOADER_INTERNAL_H
2013-04-16 23:59:13 -04:00
# ident "$Id$"
2013-04-16 23:59:09 -04:00
# ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
2013-04-16 23:59:01 -04:00
# include <db.h>
# include "brttypes.h"
# include "brtloader.h"
2013-04-16 23:59:09 -04:00
# include "queue.h"
# include "toku_pthread.h"
2013-04-16 23:59:13 -04:00
# include "dbufio.h"
# include "c_dialects.h"
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:13 -04:00
C_BEGIN
2013-04-16 23:59:01 -04:00
/* These functions are exported to allow the tests to compile. */
2013-04-16 23:59:03 -04:00
/* These structures maintain a collection of all the open temporary files used by the loader. */
struct file_info {
BOOL is_open ;
BOOL is_extant ; // if true, the file must be unlinked.
char * fname ;
FILE * file ;
2013-04-16 23:59:04 -04:00
u_int64_t n_rows ; // how many rows were written into that file
2013-04-16 23:59:09 -04:00
size_t buffer_size ;
void * buffer ;
2013-04-16 23:59:03 -04:00
} ;
struct file_infos {
int n_files ;
int n_files_limit ;
struct file_info * file_infos ;
int n_files_open , n_files_extant ;
2013-04-16 23:59:11 -04:00
toku_pthread_mutex_t lock ; // must protect this data structure because current activity performs a REALLOC(fi->file_infos).
2013-04-16 23:59:03 -04:00
} ;
typedef struct fidx { int idx ; } FIDX ;
static const FIDX FIDX_NULL __attribute__ ( ( __unused__ ) ) = { - 1 } ;
2013-04-16 23:59:09 -04:00
static int fidx_is_null ( const FIDX f ) __attribute__ ( ( __unused__ ) ) ;
static int fidx_is_null ( const FIDX f ) { return f . idx = = - 1 ; }
2013-04-16 23:59:10 -04:00
FILE * toku_bl_fidx2file ( BRTLOADER bl , FIDX i ) ;
2013-04-16 23:59:03 -04:00
int brtloader_open_temp_file ( BRTLOADER bl , FIDX * file_idx ) ;
2013-04-16 23:59:09 -04:00
/* These data structures are used for manipulating a collection of rows in main memory. */
struct row {
size_t off ; // the offset in the data array.
int klen , vlen ;
} ;
struct rowset {
2013-04-16 23:59:12 -04:00
uint64_t memory_budget ;
2013-04-16 23:59:09 -04:00
size_t n_rows , n_rows_limit ;
struct row * rows ;
size_t n_bytes , n_bytes_limit ;
char * data ;
} ;
2013-04-16 23:59:12 -04:00
int init_rowset ( struct rowset * rows , uint64_t memory_budget ) ;
2013-04-16 23:59:09 -04:00
void destroy_rowset ( struct rowset * rows ) ;
2013-04-16 23:59:13 -04:00
int add_row ( struct rowset * rows , DBT * key , DBT * val ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:10 -04:00
int loader_write_row ( DBT * key , DBT * val , FIDX data , FILE * , u_int64_t * dataoff , BRTLOADER bl ) ;
2013-04-16 23:59:11 -04:00
int loader_read_row ( FILE * f , DBT * key , DBT * val ) ;
2013-04-16 23:59:09 -04:00
struct merge_fileset {
2013-04-16 23:59:20 -04:00
BOOL have_sorted_output ; // Is there an previous key?
FIDX sorted_output ; // this points to one of the data_fidxs. If output_is_sorted then this is the file containing sorted data. It's still open
DBT prev_key ; // What is it? If it's here, its the last output in the merge fileset
2013-04-16 23:59:09 -04:00
int n_temp_files , n_temp_files_limit ;
FIDX * data_fidxs ;
} ;
void init_merge_fileset ( struct merge_fileset * fs ) ;
void destroy_merge_fileset ( struct merge_fileset * fs ) ;
struct poll_callback_s {
brt_loader_poll_func poll_function ;
void * poll_extra ;
} ;
2013-04-16 23:59:09 -04:00
typedef struct poll_callback_s * brtloader_poll_callback ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_init_poll_callback ( brtloader_poll_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
void brt_loader_destroy_poll_callback ( brtloader_poll_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
void brt_loader_set_poll_function ( brtloader_poll_callback , brt_loader_poll_func poll_function , void * poll_extra ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_call_poll_function ( brtloader_poll_callback , float progress ) ;
2013-04-16 23:59:09 -04:00
struct error_callback_s {
2013-04-16 23:59:15 -04:00
int error ;
2013-04-16 23:59:09 -04:00
brt_loader_error_func error_callback ;
void * extra ;
DB * db ;
int which_db ;
DBT key ;
DBT val ;
2013-04-16 23:59:15 -04:00
BOOL did_callback ;
2013-04-16 23:59:09 -04:00
toku_pthread_mutex_t mutex ;
} ;
2013-04-16 23:59:09 -04:00
typedef struct error_callback_s * brtloader_error_callback ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_init_error_callback ( brtloader_error_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
void brt_loader_destroy_error_callback ( brtloader_error_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_get_error ( brtloader_error_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
void brt_loader_set_error_function ( brtloader_error_callback , brt_loader_error_func error_function , void * extra ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_set_error ( brtloader_error_callback , int error , DB * db , int which_db , DBT * key , DBT * val ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_call_error_function ( brtloader_error_callback ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_set_error_and_callback ( brtloader_error_callback , int error , DB * db , int which_db , DBT * key , DBT * val ) ;
2013-04-16 23:59:01 -04:00
struct brtloader_s {
2013-04-16 23:59:15 -04:00
// These two are set in the close function, and used while running close
struct error_callback_s error_callback ;
struct poll_callback_s poll_callback ;
2013-04-16 23:59:01 -04:00
generate_row_for_put_func generate_row_for_put ;
brt_compare_func * bt_compare_funs ;
DB * src_db ;
int N ;
2013-04-16 23:59:14 -04:00
DB * * dbs ; // N of these
2013-04-16 23:59:17 -04:00
DESCRIPTOR * descriptors ; // N of these.
2013-04-16 23:59:14 -04:00
const char * * new_fnames_in_env ; // N of these. The file names that the final data will be written to (relative to env).
uint64_t * extracted_datasizes ; // N of these.
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:09 -04:00
struct rowset primary_rowset ; // the primary rows that have been put, but the secondary rows haven't been generated.
struct rowset primary_rowset_temp ; // the primary rows that are being worked on by the extractor_thread.
QUEUE primary_rowset_queue ; // main thread enqueues rowsets in this queue (in maybe 64MB chunks). The extractor thread removes them, sorts them, adn writes to file.
toku_pthread_t extractor_thread ; // the thread that takes primary rowset and does extraction and the first level sort and write to file.
2013-04-16 23:59:10 -04:00
BOOL extractor_live ;
2013-04-16 23:59:11 -04:00
2013-04-16 23:59:15 -04:00
DBT * last_key ; // for each rowset, remember the most recently output key. The system may choose not to keep this up-to-date when a rowset is unsorted. These keys are malloced and ulen maintains the size of the malloced block.
2013-04-16 23:59:09 -04:00
struct rowset * rows ; // secondary rows that have been put, but haven't been sorted and written to a file.
2013-04-16 23:59:04 -04:00
u_int64_t n_rows ; // how many rows have been put?
2013-04-16 23:59:09 -04:00
struct merge_fileset * fs ;
2013-04-16 23:59:04 -04:00
2013-04-16 23:59:01 -04:00
const char * temp_file_template ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:03 -04:00
CACHETABLE cachetable ;
2013-04-16 23:59:12 -04:00
uint64_t reserved_memory ; // how much memory are we allowed to use?
2013-04-16 23:59:03 -04:00
/* To make it easier to recover from errors, we don't use FILE*, instead we use an index into the file_infos. */
struct file_infos file_infos ;
2013-04-16 23:59:04 -04:00
# define PROGRESS_MAX (1<<16)
int progress ; // Progress runs from 0 to PROGRESS_MAX. When we call the poll function we convert to a float from 0.0 to 1.0
// We use an integer so that we can add to the progress using a fetch-and-add instruction.
2013-04-16 23:59:19 -04:00
int progress_callback_result ; // initially zero, if any call to the poll function callback returns nonzero, we save the result here (and don't call the poll callback function again).
2013-04-16 23:59:05 -04:00
LSN load_lsn ; //LSN of the fsynced 'load' log entry. Write this LSN (as checkpoint_lsn) in brt headers made by this loader.
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:09 -04:00
QUEUE * fractal_queues ; // an array of work queues, one for each secondary index.
2013-04-16 23:59:10 -04:00
toku_pthread_t * fractal_threads ;
2013-04-16 23:59:09 -04:00
BOOL * fractal_threads_live ; // an array of bools indicating that fractal_threads[i] is a live thread. (There is no NULL for a pthread_t, so we have to maintain this separately).
2013-04-16 23:59:11 -04:00
2013-04-16 23:59:11 -04:00
toku_pthread_mutex_t mutex ;
2013-04-16 23:59:15 -04:00
BOOL mutex_init ;
2013-04-16 23:59:01 -04:00
} ;
2013-04-16 23:59:09 -04:00
// Set the number of rows in the loader. Used for test.
void toku_brt_loader_set_n_rows ( BRTLOADER bl , u_int64_t n_rows ) ;
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:09 -04:00
// Get the number of rows in the loader. Used for test.
u_int64_t toku_brt_loader_get_n_rows ( BRTLOADER bl ) ;
2013-04-16 23:59:03 -04:00
2013-04-16 23:59:09 -04:00
// The data passed into a fractal_thread via pthread_create.
struct fractal_thread_args {
2013-04-16 23:59:14 -04:00
BRTLOADER bl ;
2013-04-16 23:59:17 -04:00
const DESCRIPTOR descriptor ;
2013-04-16 23:59:14 -04:00
int fd ; // write the brt into tfd.
int progress_allocation ;
QUEUE q ;
uint64_t total_disksize_estimate ;
int errno_result ; // the final result.
2013-04-16 23:59:03 -04:00
} ;
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:09 -04:00
void toku_brt_loader_set_n_rows ( BRTLOADER bl , u_int64_t n_rows ) ;
u_int64_t toku_brt_loader_get_n_rows ( BRTLOADER bl ) ;
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:09 -04:00
int merge_row_arrays_base ( struct row dest [ /*an+bn*/ ] , struct row a [ /*an*/ ] , int an , struct row b [ /*bn*/ ] , int bn ,
int which_db , DB * dest_db , brt_compare_func ,
BRTLOADER ,
struct rowset * ) ;
int merge_files ( struct merge_fileset * fs , BRTLOADER bl , int which_db , DB * dest_db , brt_compare_func , int progress_allocation , QUEUE ) ;
2013-04-16 23:59:13 -04:00
CILK_BEGIN
2013-04-16 23:59:14 -04:00
int sort_and_write_rows ( struct rowset rows , struct merge_fileset * fs , BRTLOADER bl , int which_db , DB * dest_db , brt_compare_func ) ;
2013-04-16 23:59:09 -04:00
int mergesort_row_array ( struct row rows [ /*n*/ ] , int n , int which_db , DB * dest_db , brt_compare_func , BRTLOADER , struct rowset * ) ;
2013-04-16 23:59:13 -04:00
CILK_END
2013-04-16 23:59:13 -04:00
2013-04-16 23:59:17 -04:00
//int write_file_to_dbfile (int outfile, FIDX infile, BRTLOADER bl, const DESCRIPTOR descriptor, int progress_allocation);
2013-04-16 23:59:13 -04:00
int toku_merge_some_files_using_dbufio ( const BOOL to_q , FIDX dest_data , QUEUE q , int n_sources , DBUFIO_FILESET bfs , FIDX srcs_fidxs [ /*n_sources*/ ] , BRTLOADER bl , int which_db , DB * dest_db , brt_compare_func compare , int progress_allocation ) ;
2013-04-16 23:59:13 -04:00
2013-04-16 23:59:14 -04:00
int brt_loader_sort_and_write_rows ( struct rowset * rows , struct merge_fileset * fs , BRTLOADER bl , int which_db , DB * dest_db , brt_compare_func ) ;
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:14 -04:00
// This is probably only for testing.
int toku_loader_write_brt_from_q_in_C ( BRTLOADER bl ,
2013-04-16 23:59:17 -04:00
const DESCRIPTOR descriptor ,
2013-04-16 23:59:14 -04:00
int fd , // write to here
int progress_allocation ,
QUEUE q ,
uint64_t total_disksize_estimate ) ;
2013-04-16 23:59:03 -04:00
2013-04-16 23:59:09 -04:00
int brt_loader_mergesort_row_array ( struct row rows [ /*n*/ ] , int n , int which_db , DB * dest_db , brt_compare_func , BRTLOADER , struct rowset * ) ;
2013-04-16 23:59:17 -04:00
int brt_loader_write_file_to_dbfile ( int outfile , FIDX infile , BRTLOADER bl , const DESCRIPTOR descriptor , int progress_allocation ) ;
2013-04-16 23:59:01 -04:00
2013-04-16 23:59:03 -04:00
int brtloader_init_file_infos ( struct file_infos * fi ) ;
void brtloader_fi_destroy ( struct file_infos * fi , BOOL is_error ) ;
int brtloader_fi_close ( struct file_infos * fi , FIDX idx ) ;
2013-04-16 23:59:20 -04:00
void brtloader_fi_close_all ( struct file_infos * fi ) ;
2013-04-16 23:59:03 -04:00
int brtloader_fi_reopen ( struct file_infos * fi , FIDX idx , const char * mode ) ;
int brtloader_fi_unlink ( struct file_infos * fi , FIDX idx ) ;
2013-04-16 23:59:09 -04:00
2013-04-16 23:59:13 -04:00
int toku_brt_loader_internal_init ( /* out */ BRTLOADER * blp ,
CACHETABLE cachetable ,
generate_row_for_put_func g ,
DB * src_db ,
int N , DB * dbs [ /*N*/ ] ,
2013-04-16 23:59:17 -04:00
const DESCRIPTOR descriptors [ /*N*/ ] ,
2013-04-16 23:59:13 -04:00
const char * new_fnames_in_env [ /*N*/ ] ,
brt_compare_func bt_compare_functions [ /*N*/ ] ,
const char * temp_file_template ,
LSN load_lsn ) ;
2013-04-16 23:59:14 -04:00
void toku_brtloader_internal_destroy ( BRTLOADER bl , BOOL is_error ) ;
2013-04-16 23:59:14 -04:00
enum { disksize_row_overhead = 9 } ; // how much overhead for a row in the fractal tree
2013-04-16 23:59:15 -04:00
// For test purposes only. (In production, the rowset size is determined by negotation with the cachetable for some memory. See #2613.)
uint64_t toku_brtloader_get_rowset_budget_for_testing ( void ) ;
int toku_brt_loader_finish_extractor ( BRTLOADER bl ) ;
int toku_brt_loader_get_error ( BRTLOADER bl , int * loader_errno ) ;
2013-04-16 23:59:13 -04:00
C_END
2013-04-16 23:59:09 -04:00
# endif