#4503 #4504 support concurrent queries on the mainline refs[t:4503] refs[t:4504]

git-svn-id: file:///svn/toku/tokudb@40068 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Rich Prohaska 2013-04-17 00:00:10 -04:00 committed by Yoni Fogel
parent 4ecc1f49c7
commit 0ae8e7e249
89 changed files with 4208 additions and 6679 deletions

View file

@ -324,7 +324,6 @@ struct __toku_db {
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
@ -335,7 +334,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[11];
void* __toku_dummy0[12];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=236 size=4, 64=bit offset=376 size=8 */
void* __toku_dummy2[5];

View file

@ -333,7 +333,6 @@ struct __toku_db {
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
@ -344,7 +343,7 @@ struct __toku_db {
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[14];
void* __toku_dummy0[15];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=248 size=4, 64=bit offset=400 size=8 */
void* __toku_dummy2[5];

View file

@ -9,478 +9,3 @@
#if defined(__cplusplus)
extern "C" {
#endif
#define TOKUDB 1
#define TOKUDB_NATIVE_H 0
#define DB_VERSION_MAJOR 4
#define DB_VERSION_MINOR 4
#define DB_VERSION_PATCH 20
#ifndef _TOKUDB_WRAP_H
#define DB_VERSION_STRING "Tokutek: TokuDB 4.4.20"
#else
#define DB_VERSION_STRING_ydb "Tokutek: TokuDB (wrapped bdb)"
#endif
#ifndef TOKU_OFF_T_DEFINED
#define TOKU_OFF_T_DEFINED
typedef int64_t toku_off_t;
#endif
#define DB_GID_SIZE 128
typedef struct __toku_db_env DB_ENV;
typedef struct __toku_db_key_range DB_KEY_RANGE;
typedef struct __toku_db_lsn DB_LSN;
typedef struct __toku_db DB;
typedef struct __toku_db_txn DB_TXN;
typedef struct __toku_db_txn_active DB_TXN_ACTIVE;
typedef struct __toku_db_txn_stat DB_TXN_STAT;
typedef struct __toku_dbc DBC;
typedef struct __toku_dbt DBT;
typedef struct __toku_db_preplist { DB_TXN *txn; uint8_t gid[DB_GID_SIZE]; } DB_PREPLIST;
typedef u_int32_t db_recno_t;
typedef int(*YDB_CALLBACK_FUNCTION)(DBT const*, DBT const*, void*);
#include <tdb-internal.h>
#ifndef __BIGGEST_ALIGNMENT__
#define __BIGGEST_ALIGNMENT__ 16
#endif
typedef struct __toku_db_btree_stat64 {
u_int64_t bt_nkeys; /* how many unique keys (guaranteed only to be an estimate, even when flattened) */
u_int64_t bt_ndata; /* how many key-value pairs (an estimate, but exact when flattened) */
u_int64_t bt_dsize; /* how big are the keys+values (not counting the lengths) (an estimate, unless flattened) */
u_int64_t bt_fsize; /* how big is the underlying file */
u_int64_t bt_create_time_sec; /* Creation time, in seconds */
u_int64_t bt_modify_time_sec; /* Time of last serialization, in seconds */
u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */
} DB_BTREE_STAT64;
typedef struct __toku_loader DB_LOADER;
struct __toku_loader_internal;
struct __toku_loader {
struct __toku_loader_internal *i;
int (*set_error_callback)(DB_LOADER *loader, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_LOADER *loader, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*put)(DB_LOADER *loader, DBT *key, DBT* val); /* give a row to the loader */
int (*close)(DB_LOADER *loader); /* finish loading, free memory */
int (*abort)(DB_LOADER *loader); /* abort loading, free memory */
};
typedef struct __toku_indexer DB_INDEXER;
struct __toku_indexer_internal;
struct __toku_indexer {
struct __toku_indexer_internal *i;
int (*set_error_callback)(DB_INDEXER *indexer, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_INDEXER *indexer, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*build)(DB_INDEXER *indexer); /* build the indexes */
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
} DBTYPE;
#ifndef _TOKUDB_WRAP_H
#define DB_VERB_DEADLOCK 1
#define DB_VERB_RECOVERY 2
#define DB_VERB_REPLICATION 8
#define DB_VERB_WAITSFOR 16
#define DB_ARCH_ABS 1
#define DB_ARCH_LOG 4
#define DB_CREATE 1
#define DB_CXX_NO_EXCEPTIONS 1
#define DB_EXCL 8192
#define DB_PRIVATE 1048576
#define DB_RDONLY 16
#define DB_RECOVER 32
#define DB_RUNRECOVERY -30974
#define DB_THREAD 64
#define DB_TXN_NOSYNC 256
#define DB_LOCK_DEFAULT 1
#define DB_LOCK_OLDEST 7
#define DB_LOCK_RANDOM 8
#define DB_KEYFIRST 15
#define DB_KEYLAST 16
#define DB_NOOVERWRITE 22
#define DB_NODUPDATA 21
#define DB_NOOVERWRITE_NO_ERROR 1
#define DB_OPFLAGS_MASK 255
#define DB_AUTO_COMMIT 16777216
#define DB_INIT_LOCK 16384
#define DB_INIT_LOG 32768
#define DB_INIT_MPOOL 65536
#define DB_INIT_TXN 262144
#define DB_KEYEXIST -30996
#define DB_LOCK_DEADLOCK -30995
#define DB_LOCK_NOTGRANTED -30994
#define DB_NOTFOUND -30989
#define DB_SECONDARY_BAD -30973
#define DB_DONOTINDEX -30998
#define DB_BUFFER_SMALL -30999
#define DB_BADFORMAT -30500
#define DB_DELETE_ANY 65536
#define DB_TRUNCATE_WITHCURSORS 131072
#define DB_FIRST 9
#define DB_LAST 17
#define DB_CURRENT 7
#define DB_NEXT 18
#define DB_NEXT_NODUP 20
#define DB_PREV 25
#define DB_PREV_NODUP 26
#define DB_SET 28
#define DB_SET_RANGE 30
#define DB_CURRENT_BINDING 253
#define DB_SET_RANGE_REVERSE 252
#define DB_RMW 536870912
#define DB_IS_RESETTING_OP 0x01000000
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 64
#define DB_DBT_MALLOC 4
#define DB_DBT_REALLOC 16
#define DB_DBT_USERMEM 32
#define DB_LOG_AUTOREMOVE 262144
#define DB_TXN_WRITE_NOSYNC 1024
#define DB_TXN_NOWAIT 8192
#define DB_TXN_SYNC 16384
#define DB_READ_UNCOMMITTED 67108864
#define DB_READ_COMMITTED 33554432
#define DB_TXN_SNAPSHOT 1
#define DB_INHERIT_ISOLATION 2
#define DB_SERIALIZABLE 4
#endif
/* TOKUDB specific error codes */
#define TOKUDB_OUT_OF_LOCKS -100000
#define TOKUDB_SUCCEEDED_EARLY -100001
#define TOKUDB_FOUND_BUT_REJECTED -100002
#define TOKUDB_USER_CALLBACK_ERROR -100003
#define TOKUDB_DICTIONARY_TOO_OLD -100004
#define TOKUDB_DICTIONARY_TOO_NEW -100005
#define TOKUDB_DICTIONARY_NO_HEADER -100006
#define TOKUDB_CANCELED -100007
#define TOKUDB_NO_DATA -100008
#define TOKUDB_ACCEPT -100009
#define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010
#define TOKUDB_UPGRADE_FAILURE -100011
#define TOKUDB_TRY_AGAIN -100012
#define TOKUDB_NEEDS_REPAIR -100013
#define TOKUDB_CURSOR_CONTINUE -100014
/* LOADER flags */
#define LOADER_USE_PUTS 1
typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val);
typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT *dest_key, const DBT *src_key, const DBT *src_val);
/* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/
#ifdef _TOKUDB_WRAP_H
#undef txn_begin
#endif
struct __toku_db_env {
struct __toku_db_env_internal *i;
#define db_env_struct_i(x) ((x)->i)
int (*checkpointing_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */;
int (*checkpointing_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */;
int (*cleaner_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_set_iterations) (DB_ENV*, u_int32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*cleaner_get_iterations) (DB_ENV*, u_int32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */;
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
void *app_private; /* 32-bit offset=44 size=4, 64=bit offset=88 size=8 */
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags);
int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags);
int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */;
int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put);
int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */;
int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del);
int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
DBT *old_src_key, DBT *old_src_data,
DBT *new_src_key, DBT *new_src_data,
uint32_t num_dbs, DB **db_array, uint32_t *flags_array,
uint32_t num_keys, DBT *keys,
uint32_t num_vals, DBT *vals) /* update multiple DBs */;
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[20];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[7];
int (*close) (DB_ENV *, u_int32_t); /* 32-bit offset=368 size=4, 64=bit offset=608 size=8 */
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t); /* 32-bit offset=372 size=4, 64=bit offset=616 size=8 */
int (*dbrename) (DB_ENV *, DB_TXN *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=376 size=4, 64=bit offset=624 size=8 */
void (*err) (const DB_ENV *, int, const char *, ...); /* 32-bit offset=380 size=4, 64=bit offset=632 size=8 */
void* __toku_dummy3[3];
int (*get_cachesize) (DB_ENV *, u_int32_t *, u_int32_t *, int *); /* 32-bit offset=396 size=4, 64=bit offset=664 size=8 */
void* __toku_dummy4[4];
int (*get_flags) (DB_ENV *, u_int32_t *); /* 32-bit offset=416 size=4, 64=bit offset=704 size=8 */
void* __toku_dummy5[4];
int (*get_lg_max) (DB_ENV *, u_int32_t*); /* 32-bit offset=436 size=4, 64=bit offset=744 size=8 */
void* __toku_dummy6[4];
int (*get_lk_max_locks) (DB_ENV *, u_int32_t *); /* 32-bit offset=456 size=4, 64=bit offset=784 size=8 */
void* __toku_dummy7[22];
int (*log_archive) (DB_ENV *, char **[], u_int32_t); /* 32-bit offset=548 size=4, 64=bit offset=968 size=8 */
void* __toku_dummy8[2];
int (*log_flush) (DB_ENV *, const DB_LSN *); /* 32-bit offset=560 size=4, 64=bit offset=992 size=8 */
void* __toku_dummy9[25];
int (*open) (DB_ENV *, const char *, u_int32_t, int); /* 32-bit offset=664 size=4, 64=bit offset=1200 size=8 */
void* __toku_dummy10[12];
int (*set_cachesize) (DB_ENV *, u_int32_t, u_int32_t, int); /* 32-bit offset=716 size=4, 64=bit offset=1304 size=8 */
int (*set_data_dir) (DB_ENV *, const char *); /* 32-bit offset=720 size=4, 64=bit offset=1312 size=8 */
void* __toku_dummy11[1];
void (*set_errcall) (DB_ENV *, void (*)(const DB_ENV *, const char *, const char *)); /* 32-bit offset=728 size=4, 64=bit offset=1328 size=8 */
void (*set_errfile) (DB_ENV *, FILE*); /* 32-bit offset=732 size=4, 64=bit offset=1336 size=8 */
void (*set_errpfx) (DB_ENV *, const char *); /* 32-bit offset=736 size=4, 64=bit offset=1344 size=8 */
void* __toku_dummy12[1];
int (*set_flags) (DB_ENV *, u_int32_t, int); /* 32-bit offset=744 size=4, 64=bit offset=1360 size=8 */
void* __toku_dummy13[2];
int (*set_lg_bsize) (DB_ENV *, u_int32_t); /* 32-bit offset=756 size=4, 64=bit offset=1384 size=8 */
int (*set_lg_dir) (DB_ENV *, const char *); /* 32-bit offset=760 size=4, 64=bit offset=1392 size=8 */
void* __toku_dummy14[1];
int (*set_lg_max) (DB_ENV *, u_int32_t); /* 32-bit offset=768 size=4, 64=bit offset=1408 size=8 */
void* __toku_dummy15[2];
int (*set_lk_detect) (DB_ENV *, u_int32_t); /* 32-bit offset=780 size=4, 64=bit offset=1432 size=8 */
int (*set_lk_max) (DB_ENV *, u_int32_t); /* 32-bit offset=784 size=4, 64=bit offset=1440 size=8 */
void* __toku_dummy16[1];
int (*set_lk_max_locks) (DB_ENV *, u_int32_t); /* 32-bit offset=792 size=4, 64=bit offset=1456 size=8 */
void* __toku_dummy17[16];
int (*set_tmp_dir) (DB_ENV *, const char *); /* 32-bit offset=860 size=4, 64=bit offset=1592 size=8 */
void* __toku_dummy18[2];
int (*set_verbose) (DB_ENV *, u_int32_t, int); /* 32-bit offset=872 size=4, 64=bit offset=1616 size=8 */
void* __toku_dummy19[1];
int (*txn_begin) (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t); /* 32-bit offset=880 size=4, 64=bit offset=1632 size=8 */
int (*txn_checkpoint) (DB_ENV *, u_int32_t, u_int32_t, u_int32_t); /* 32-bit offset=884 size=4, 64=bit offset=1640 size=8 */
int (*txn_recover) (DB_ENV *, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags); /* 32-bit offset=888 size=4, 64=bit offset=1648 size=8 */
int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, u_int32_t); /* 32-bit offset=892 size=4, 64=bit offset=1656 size=8 */
void* __toku_dummy20[2]; /* Padding at the end */
char __toku_dummy21[16]; /* Padding at the end */
};
struct __toku_db_key_range {
double less; /* 32-bit offset=0 size=8, 64=bit offset=0 size=8 */
double equal; /* 32-bit offset=8 size=8, 64=bit offset=8 size=8 */
double greater; /* 32-bit offset=16 size=8, 64=bit offset=16 size=8 */
void* __toku_dummy0[194]; /* Padding at the end */
char __toku_dummy1[120]; /* Padding at the end */
};
struct __toku_db_lsn {
char __toku_dummy0[8]; /* Padding at the end */
};
struct __toku_dbt {
void*data; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
u_int32_t size; /* 32-bit offset=4 size=4, 64=bit offset=8 size=4 */
u_int32_t ulen; /* 32-bit offset=8 size=4, 64=bit offset=12 size=4 */
char __toku_dummy0[8];
u_int32_t flags; /* 32-bit offset=20 size=4, 64=bit offset=24 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef struct __toku_descriptor {
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
uint64_t file_size_bytes; //Total file size in bytes
uint64_t data_bytes; //Compressed User Data in bytes
uint64_t data_blocks; //Number of blocks of compressed User Data
uint64_t checkpoint_bytes_additional; //Additional bytes used for checkpoint system
uint64_t checkpoint_blocks_additional; //Additional blocks used for checkpoint system
uint64_t unused_bytes; //Unused space in file
uint64_t unused_blocks; //Number of contiguous regions of unused space
uint64_t largest_unused_block; //Size of largest contiguous unused space
} *TOKU_DB_FRAGMENTATION, TOKU_DB_FRAGMENTATION_S;
struct __toku_db {
struct __toku_db_internal *i;
#define db_struct_i(x) ((x)->i)
int (*key_range64)(DB*, DB_TXN *, DBT *, u_int64_t *less, u_int64_t *equal, u_int64_t *greater, int *is_exact);
int (*stat64)(DB *, DB_TXN *, DB_BTREE_STAT64 *);
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
void (*get_max_row_size) (DB*, u_int32_t *max_key_size, u_int32_t *max_row_size);
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[16];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=256 size=4, 64=bit offset=416 size=8 */
void* __toku_dummy2[5];
int (*close) (DB*, u_int32_t); /* 32-bit offset=280 size=4, 64=bit offset=464 size=8 */
void* __toku_dummy3[1];
int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t); /* 32-bit offset=288 size=4, 64=bit offset=480 size=8 */
int (*del) (DB *, DB_TXN *, DBT *, u_int32_t); /* 32-bit offset=292 size=4, 64=bit offset=488 size=8 */
void* __toku_dummy4[2];
int (*fd) (DB *, int *); /* 32-bit offset=304 size=4, 64=bit offset=512 size=8 */
int (*get) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=308 size=4, 64=bit offset=520 size=8 */
void* __toku_dummy5[8];
int (*get_flags) (DB *, u_int32_t *); /* 32-bit offset=344 size=4, 64=bit offset=592 size=8 */
void* __toku_dummy6[6];
int (*get_pagesize) (DB *, u_int32_t *); /* 32-bit offset=372 size=4, 64=bit offset=648 size=8 */
void* __toku_dummy7[8];
int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t); /* 32-bit offset=408 size=4, 64=bit offset=720 size=8 */
int (*open) (DB *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int); /* 32-bit offset=412 size=4, 64=bit offset=728 size=8 */
void* __toku_dummy8[1];
int (*put) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=420 size=4, 64=bit offset=744 size=8 */
int (*remove) (DB *, const char *, const char *, u_int32_t); /* 32-bit offset=424 size=4, 64=bit offset=752 size=8 */
int (*rename) (DB *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=428 size=4, 64=bit offset=760 size=8 */
void* __toku_dummy9[9];
void (*set_errfile) (DB *, FILE*); /* 32-bit offset=468 size=4, 64=bit offset=840 size=8 */
void* __toku_dummy10[2];
int (*set_flags) (DB *, u_int32_t); /* 32-bit offset=480 size=4, 64=bit offset=864 size=8 */
void* __toku_dummy11[6];
int (*set_pagesize) (DB *, u_int32_t); /* 32-bit offset=508 size=4, 64=bit offset=920 size=8 */
void* __toku_dummy12[6];
int (*stat) (DB *, void *, u_int32_t); /* 32-bit offset=536 size=4, 64=bit offset=976 size=8 */
void* __toku_dummy13[2];
int (*truncate) (DB *, DB_TXN *, u_int32_t *, u_int32_t); /* 32-bit offset=548 size=4, 64=bit offset=1000 size=8 */
void* __toku_dummy14[1];
int (*verify) (DB *, const char *, const char *, FILE *, u_int32_t); /* 32-bit offset=556 size=4, 64=bit offset=1016 size=8 */
void* __toku_dummy15[5]; /* Padding at the end */
char __toku_dummy16[16]; /* Padding at the end */
};
struct __toku_db_txn_active {
u_int32_t txnid; /* 32-bit offset=0 size=4, 64=bit offset=0 size=4 */
void* __toku_dummy0[2];
char __toku_dummy1[4];
DB_LSN lsn; /* 32-bit offset=16 size=8, 64=bit offset=24 size=8 */
char __toku_dummy2[184]; /* Padding at the end */
};
typedef struct __toku_txn_progress {
uint64_t entries_total;
uint64_t entries_processed;
uint8_t is_commit;
uint8_t stalled_on_checkpoint;
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
struct txn_stat {
u_int64_t rollback_raw_count;
};
struct __toku_db_txn {
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
DB_TXN *parent; /* 32-bit offset=4 size=4, 64=bit offset=8 size=8 */
int (*txn_stat)(DB_TXN *, struct txn_stat **);
struct toku_list open_txns;
int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*);
int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*);
void* __toku_dummy0[13];
char __toku_dummy1[8];
void *api_internal; /* 32-bit offset=84 size=4, 64=bit offset=160 size=8 */
void* __toku_dummy2[2];
int (*abort) (DB_TXN *); /* 32-bit offset=96 size=4, 64=bit offset=184 size=8 */
int (*commit) (DB_TXN*, u_int32_t); /* 32-bit offset=100 size=4, 64=bit offset=192 size=8 */
void* __toku_dummy3[2];
u_int32_t (*id) (DB_TXN *); /* 32-bit offset=112 size=4, 64=bit offset=216 size=8 */
int (*prepare) (DB_TXN*, u_int8_t gid[DB_GID_SIZE]); /* 32-bit offset=116 size=4, 64=bit offset=224 size=8 */
void* __toku_dummy4[4]; /* Padding at the end */
};
struct __toku_db_txn_stat {
void* __toku_dummy0[1];
char __toku_dummy1[28];
u_int32_t st_nactive; /* 32-bit offset=32 size=4, 64=bit offset=36 size=4 */
char __toku_dummy2[8];
DB_TXN_ACTIVE *st_txnarray; /* 32-bit offset=44 size=4, 64=bit offset=48 size=8 */
void* __toku_dummy3[1]; /* Padding at the end */
char __toku_dummy4[8]; /* Padding at the end */
};
struct __toku_dbc {
DB *dbp; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
int (*c_getf_first)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_last)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_next)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_prev)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current_binding)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[10];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=188 size=4, 64=bit offset=272 size=8 */
int (*c_count) (DBC *, db_recno_t *, u_int32_t); /* 32-bit offset=192 size=4, 64=bit offset=280 size=8 */
int (*c_del) (DBC *, u_int32_t); /* 32-bit offset=196 size=4, 64=bit offset=288 size=8 */
void* __toku_dummy2[1];
int (*c_get) (DBC *, DBT *, DBT *, u_int32_t); /* 32-bit offset=204 size=4, 64=bit offset=304 size=8 */
void* __toku_dummy3[10]; /* Padding at the end */
};
#ifdef _TOKUDB_WRAP_H
#define txn_begin txn_begin_tokudb
#endif
int db_env_create(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));
int db_create(DB **, DB_ENV *, u_int32_t) __attribute__((__visibility__("default")));
char *db_strerror(int) __attribute__((__visibility__("default")));
const char *db_version(int*,int *,int *) __attribute__((__visibility__("default")));
int log_compare (const DB_LSN*, const DB_LSN *) __attribute__((__visibility__("default")));
int db_env_set_func_fsync (int (*)(int)) __attribute__((__visibility__("default")));
int toku_set_trace_file (char *fname) __attribute__((__visibility__("default")));
int toku_close_trace_file (void) __attribute__((__visibility__("default")));
int db_env_set_func_free (void (*)(void*)) __attribute__((__visibility__("default")));
int db_env_set_func_malloc (void *(*)(size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_realloc (void *(*)(void*, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_fdopen (FILE* (*)(int, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_fopen (FILE* (*)(const char *, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_open (int (*)(const char *, int, int)) __attribute__((__visibility__("default")));
int db_env_set_func_fclose (int (*)(FILE*)) __attribute__((__visibility__("default")));
int db_env_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)) __attribute__((__visibility__("default")));
void db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_loader_size_factor (uint32_t) __attribute__((__visibility__("default")));
void db_env_set_mvcc_garbage_collection_verification(u_int32_t) __attribute__((__visibility__("default")));
void db_env_enable_engine_status(u_int32_t) __attribute__((__visibility__("default")));
void db_env_set_flusher_thread_callback (void (*)(int, void*), void*) __attribute__((__visibility__("default")));
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -9,478 +9,3 @@
#if defined(__cplusplus)
extern "C" {
#endif
#define TOKUDB 1
#define TOKUDB_NATIVE_H 0
#define DB_VERSION_MAJOR 4
#define DB_VERSION_MINOR 5
#define DB_VERSION_PATCH 20
#ifndef _TOKUDB_WRAP_H
#define DB_VERSION_STRING "Tokutek: TokuDB 4.5.20"
#else
#define DB_VERSION_STRING_ydb "Tokutek: TokuDB (wrapped bdb)"
#endif
#ifndef TOKU_OFF_T_DEFINED
#define TOKU_OFF_T_DEFINED
typedef int64_t toku_off_t;
#endif
#define DB_GID_SIZE 128
typedef struct __toku_db_env DB_ENV;
typedef struct __toku_db_key_range DB_KEY_RANGE;
typedef struct __toku_db_lsn DB_LSN;
typedef struct __toku_db DB;
typedef struct __toku_db_txn DB_TXN;
typedef struct __toku_db_txn_active DB_TXN_ACTIVE;
typedef struct __toku_db_txn_stat DB_TXN_STAT;
typedef struct __toku_dbc DBC;
typedef struct __toku_dbt DBT;
typedef struct __toku_db_preplist { DB_TXN *txn; uint8_t gid[DB_GID_SIZE]; } DB_PREPLIST;
typedef u_int32_t db_recno_t;
typedef int(*YDB_CALLBACK_FUNCTION)(DBT const*, DBT const*, void*);
#include <tdb-internal.h>
#ifndef __BIGGEST_ALIGNMENT__
#define __BIGGEST_ALIGNMENT__ 16
#endif
typedef struct __toku_db_btree_stat64 {
u_int64_t bt_nkeys; /* how many unique keys (guaranteed only to be an estimate, even when flattened) */
u_int64_t bt_ndata; /* how many key-value pairs (an estimate, but exact when flattened) */
u_int64_t bt_dsize; /* how big are the keys+values (not counting the lengths) (an estimate, unless flattened) */
u_int64_t bt_fsize; /* how big is the underlying file */
u_int64_t bt_create_time_sec; /* Creation time, in seconds */
u_int64_t bt_modify_time_sec; /* Time of last serialization, in seconds */
u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */
} DB_BTREE_STAT64;
typedef struct __toku_loader DB_LOADER;
struct __toku_loader_internal;
struct __toku_loader {
struct __toku_loader_internal *i;
int (*set_error_callback)(DB_LOADER *loader, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_LOADER *loader, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*put)(DB_LOADER *loader, DBT *key, DBT* val); /* give a row to the loader */
int (*close)(DB_LOADER *loader); /* finish loading, free memory */
int (*abort)(DB_LOADER *loader); /* abort loading, free memory */
};
typedef struct __toku_indexer DB_INDEXER;
struct __toku_indexer_internal;
struct __toku_indexer {
struct __toku_indexer_internal *i;
int (*set_error_callback)(DB_INDEXER *indexer, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_INDEXER *indexer, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*build)(DB_INDEXER *indexer); /* build the indexes */
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
} DBTYPE;
#ifndef _TOKUDB_WRAP_H
#define DB_VERB_DEADLOCK 1
#define DB_VERB_RECOVERY 2
#define DB_VERB_REPLICATION 8
#define DB_VERB_WAITSFOR 16
#define DB_ARCH_ABS 1
#define DB_ARCH_LOG 4
#define DB_CREATE 1
#define DB_CXX_NO_EXCEPTIONS 1
#define DB_EXCL 16384
#define DB_PRIVATE 2097152
#define DB_RDONLY 32
#define DB_RECOVER 64
#define DB_RUNRECOVERY -30975
#define DB_THREAD 128
#define DB_TXN_NOSYNC 512
#define DB_LOCK_DEFAULT 1
#define DB_LOCK_OLDEST 7
#define DB_LOCK_RANDOM 8
#define DB_KEYFIRST 13
#define DB_KEYLAST 14
#define DB_NOOVERWRITE 20
#define DB_NODUPDATA 19
#define DB_NOOVERWRITE_NO_ERROR 1
#define DB_OPFLAGS_MASK 255
#define DB_AUTO_COMMIT 33554432
#define DB_INIT_LOCK 32768
#define DB_INIT_LOG 65536
#define DB_INIT_MPOOL 131072
#define DB_INIT_TXN 524288
#define DB_KEYEXIST -30996
#define DB_LOCK_DEADLOCK -30995
#define DB_LOCK_NOTGRANTED -30994
#define DB_NOTFOUND -30989
#define DB_SECONDARY_BAD -30974
#define DB_DONOTINDEX -30998
#define DB_BUFFER_SMALL -30999
#define DB_BADFORMAT -30500
#define DB_DELETE_ANY 65536
#define DB_TRUNCATE_WITHCURSORS 131072
#define DB_FIRST 7
#define DB_LAST 15
#define DB_CURRENT 6
#define DB_NEXT 16
#define DB_NEXT_NODUP 18
#define DB_PREV 23
#define DB_PREV_NODUP 24
#define DB_SET 25
#define DB_SET_RANGE 27
#define DB_CURRENT_BINDING 253
#define DB_SET_RANGE_REVERSE 252
#define DB_RMW 1073741824
#define DB_IS_RESETTING_OP 0x01000000
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 128
#define DB_DBT_MALLOC 4
#define DB_DBT_REALLOC 16
#define DB_DBT_USERMEM 64
#define DB_LOG_AUTOREMOVE 524288
#define DB_TXN_WRITE_NOSYNC 2048
#define DB_TXN_NOWAIT 16384
#define DB_TXN_SYNC 32768
#define DB_TXN_SNAPSHOT 268435456
#define DB_READ_UNCOMMITTED 134217728
#define DB_READ_COMMITTED 67108864
#define DB_INHERIT_ISOLATION 1
#define DB_SERIALIZABLE 2
#endif
/* TOKUDB specific error codes */
#define TOKUDB_OUT_OF_LOCKS -100000
#define TOKUDB_SUCCEEDED_EARLY -100001
#define TOKUDB_FOUND_BUT_REJECTED -100002
#define TOKUDB_USER_CALLBACK_ERROR -100003
#define TOKUDB_DICTIONARY_TOO_OLD -100004
#define TOKUDB_DICTIONARY_TOO_NEW -100005
#define TOKUDB_DICTIONARY_NO_HEADER -100006
#define TOKUDB_CANCELED -100007
#define TOKUDB_NO_DATA -100008
#define TOKUDB_ACCEPT -100009
#define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010
#define TOKUDB_UPGRADE_FAILURE -100011
#define TOKUDB_TRY_AGAIN -100012
#define TOKUDB_NEEDS_REPAIR -100013
#define TOKUDB_CURSOR_CONTINUE -100014
/* LOADER flags */
#define LOADER_USE_PUTS 1
typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val);
typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT *dest_key, const DBT *src_key, const DBT *src_val);
/* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/
#ifdef _TOKUDB_WRAP_H
#undef txn_begin
#endif
struct __toku_db_env {
struct __toku_db_env_internal *i;
#define db_env_struct_i(x) ((x)->i)
int (*checkpointing_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */;
int (*checkpointing_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */;
int (*cleaner_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_set_iterations) (DB_ENV*, u_int32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*cleaner_get_iterations) (DB_ENV*, u_int32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */;
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags);
int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags);
int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */;
int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put);
int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */;
int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del);
int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
DBT *old_src_key, DBT *old_src_data,
DBT *new_src_key, DBT *new_src_data,
uint32_t num_dbs, DB **db_array, uint32_t *flags_array,
uint32_t num_keys, DBT *keys,
uint32_t num_vals, DBT *vals) /* update multiple DBs */;
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[20];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[8];
int (*close) (DB_ENV *, u_int32_t); /* 32-bit offset=372 size=4, 64=bit offset=616 size=8 */
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t); /* 32-bit offset=376 size=4, 64=bit offset=624 size=8 */
int (*dbrename) (DB_ENV *, DB_TXN *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=380 size=4, 64=bit offset=632 size=8 */
void (*err) (const DB_ENV *, int, const char *, ...); /* 32-bit offset=384 size=4, 64=bit offset=640 size=8 */
void* __toku_dummy3[3];
int (*get_cachesize) (DB_ENV *, u_int32_t *, u_int32_t *, int *); /* 32-bit offset=400 size=4, 64=bit offset=672 size=8 */
void* __toku_dummy4[4];
int (*get_flags) (DB_ENV *, u_int32_t *); /* 32-bit offset=420 size=4, 64=bit offset=712 size=8 */
void* __toku_dummy5[4];
int (*get_lg_max) (DB_ENV *, u_int32_t*); /* 32-bit offset=440 size=4, 64=bit offset=752 size=8 */
void* __toku_dummy6[4];
int (*get_lk_max_locks) (DB_ENV *, u_int32_t *); /* 32-bit offset=460 size=4, 64=bit offset=792 size=8 */
void* __toku_dummy7[21];
int (*log_archive) (DB_ENV *, char **[], u_int32_t); /* 32-bit offset=548 size=4, 64=bit offset=968 size=8 */
void* __toku_dummy8[2];
int (*log_flush) (DB_ENV *, const DB_LSN *); /* 32-bit offset=560 size=4, 64=bit offset=992 size=8 */
void* __toku_dummy9[25];
int (*open) (DB_ENV *, const char *, u_int32_t, int); /* 32-bit offset=664 size=4, 64=bit offset=1200 size=8 */
void* __toku_dummy10[27];
int (*set_cachesize) (DB_ENV *, u_int32_t, u_int32_t, int); /* 32-bit offset=776 size=4, 64=bit offset=1424 size=8 */
int (*set_data_dir) (DB_ENV *, const char *); /* 32-bit offset=780 size=4, 64=bit offset=1432 size=8 */
void* __toku_dummy11[1];
void (*set_errcall) (DB_ENV *, void (*)(const DB_ENV *, const char *, const char *)); /* 32-bit offset=788 size=4, 64=bit offset=1448 size=8 */
void (*set_errfile) (DB_ENV *, FILE*); /* 32-bit offset=792 size=4, 64=bit offset=1456 size=8 */
void (*set_errpfx) (DB_ENV *, const char *); /* 32-bit offset=796 size=4, 64=bit offset=1464 size=8 */
void* __toku_dummy12[2];
int (*set_flags) (DB_ENV *, u_int32_t, int); /* 32-bit offset=808 size=4, 64=bit offset=1488 size=8 */
void* __toku_dummy13[2];
int (*set_lg_bsize) (DB_ENV *, u_int32_t); /* 32-bit offset=820 size=4, 64=bit offset=1512 size=8 */
int (*set_lg_dir) (DB_ENV *, const char *); /* 32-bit offset=824 size=4, 64=bit offset=1520 size=8 */
void* __toku_dummy14[1];
int (*set_lg_max) (DB_ENV *, u_int32_t); /* 32-bit offset=832 size=4, 64=bit offset=1536 size=8 */
void* __toku_dummy15[2];
int (*set_lk_detect) (DB_ENV *, u_int32_t); /* 32-bit offset=844 size=4, 64=bit offset=1560 size=8 */
void* __toku_dummy16[1];
int (*set_lk_max_locks) (DB_ENV *, u_int32_t); /* 32-bit offset=852 size=4, 64=bit offset=1576 size=8 */
void* __toku_dummy17[14];
int (*set_tmp_dir) (DB_ENV *, const char *); /* 32-bit offset=912 size=4, 64=bit offset=1696 size=8 */
void* __toku_dummy18[2];
int (*set_verbose) (DB_ENV *, u_int32_t, int); /* 32-bit offset=924 size=4, 64=bit offset=1720 size=8 */
void* __toku_dummy19[1];
int (*txn_begin) (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t); /* 32-bit offset=932 size=4, 64=bit offset=1736 size=8 */
int (*txn_checkpoint) (DB_ENV *, u_int32_t, u_int32_t, u_int32_t); /* 32-bit offset=936 size=4, 64=bit offset=1744 size=8 */
int (*txn_recover) (DB_ENV *, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags); /* 32-bit offset=940 size=4, 64=bit offset=1752 size=8 */
int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, u_int32_t); /* 32-bit offset=944 size=4, 64=bit offset=1760 size=8 */
void* __toku_dummy20[2]; /* Padding at the end */
char __toku_dummy21[16]; /* Padding at the end */
};
struct __toku_db_key_range {
double less; /* 32-bit offset=0 size=8, 64=bit offset=0 size=8 */
double equal; /* 32-bit offset=8 size=8, 64=bit offset=8 size=8 */
double greater; /* 32-bit offset=16 size=8, 64=bit offset=16 size=8 */
void* __toku_dummy0[207]; /* Padding at the end */
char __toku_dummy1[120]; /* Padding at the end */
};
struct __toku_db_lsn {
char __toku_dummy0[8]; /* Padding at the end */
};
struct __toku_dbt {
void*data; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
u_int32_t size; /* 32-bit offset=4 size=4, 64=bit offset=8 size=4 */
u_int32_t ulen; /* 32-bit offset=8 size=4, 64=bit offset=12 size=4 */
void* __toku_dummy0[1];
char __toku_dummy1[8];
u_int32_t flags; /* 32-bit offset=24 size=4, 64=bit offset=32 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef struct __toku_descriptor {
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
uint64_t file_size_bytes; //Total file size in bytes
uint64_t data_bytes; //Compressed User Data in bytes
uint64_t data_blocks; //Number of blocks of compressed User Data
uint64_t checkpoint_bytes_additional; //Additional bytes used for checkpoint system
uint64_t checkpoint_blocks_additional; //Additional blocks used for checkpoint system
uint64_t unused_bytes; //Unused space in file
uint64_t unused_blocks; //Number of contiguous regions of unused space
uint64_t largest_unused_block; //Size of largest contiguous unused space
} *TOKU_DB_FRAGMENTATION, TOKU_DB_FRAGMENTATION_S;
struct __toku_db {
struct __toku_db_internal *i;
#define db_struct_i(x) ((x)->i)
int (*key_range64)(DB*, DB_TXN *, DBT *, u_int64_t *less, u_int64_t *equal, u_int64_t *greater, int *is_exact);
int (*stat64)(DB *, DB_TXN *, DB_BTREE_STAT64 *);
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
void *app_private; /* 32-bit offset=16 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=20 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
void (*get_max_row_size) (DB*, u_int32_t *max_key_size, u_int32_t *max_row_size);
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy0[19];
char __toku_dummy1[96];
void *api_internal; /* 32-bit offset=268 size=4, 64=bit offset=440 size=8 */
void* __toku_dummy2[5];
int (*close) (DB*, u_int32_t); /* 32-bit offset=292 size=4, 64=bit offset=488 size=8 */
void* __toku_dummy3[1];
int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t); /* 32-bit offset=300 size=4, 64=bit offset=504 size=8 */
int (*del) (DB *, DB_TXN *, DBT *, u_int32_t); /* 32-bit offset=304 size=4, 64=bit offset=512 size=8 */
void* __toku_dummy4[2];
int (*fd) (DB *, int *); /* 32-bit offset=316 size=4, 64=bit offset=536 size=8 */
int (*get) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=320 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy5[8];
int (*get_flags) (DB *, u_int32_t *); /* 32-bit offset=356 size=4, 64=bit offset=616 size=8 */
void* __toku_dummy6[6];
int (*get_pagesize) (DB *, u_int32_t *); /* 32-bit offset=384 size=4, 64=bit offset=672 size=8 */
void* __toku_dummy7[8];
int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t); /* 32-bit offset=420 size=4, 64=bit offset=744 size=8 */
int (*open) (DB *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int); /* 32-bit offset=424 size=4, 64=bit offset=752 size=8 */
void* __toku_dummy8[1];
int (*put) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=432 size=4, 64=bit offset=768 size=8 */
int (*remove) (DB *, const char *, const char *, u_int32_t); /* 32-bit offset=436 size=4, 64=bit offset=776 size=8 */
int (*rename) (DB *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=440 size=4, 64=bit offset=784 size=8 */
void* __toku_dummy9[9];
void (*set_errfile) (DB *, FILE*); /* 32-bit offset=480 size=4, 64=bit offset=864 size=8 */
void* __toku_dummy10[2];
int (*set_flags) (DB *, u_int32_t); /* 32-bit offset=492 size=4, 64=bit offset=888 size=8 */
void* __toku_dummy11[6];
int (*set_pagesize) (DB *, u_int32_t); /* 32-bit offset=520 size=4, 64=bit offset=944 size=8 */
void* __toku_dummy12[6];
int (*stat) (DB *, void *, u_int32_t); /* 32-bit offset=548 size=4, 64=bit offset=1000 size=8 */
void* __toku_dummy13[2];
int (*truncate) (DB *, DB_TXN *, u_int32_t *, u_int32_t); /* 32-bit offset=560 size=4, 64=bit offset=1024 size=8 */
void* __toku_dummy14[1];
int (*verify) (DB *, const char *, const char *, FILE *, u_int32_t); /* 32-bit offset=568 size=4, 64=bit offset=1040 size=8 */
void* __toku_dummy15[5]; /* Padding at the end */
char __toku_dummy16[16]; /* Padding at the end */
};
struct __toku_db_txn_active {
u_int32_t txnid; /* 32-bit offset=0 size=4, 64=bit offset=0 size=4 */
void* __toku_dummy0[2];
char __toku_dummy1[4];
DB_LSN lsn; /* 32-bit offset=16 size=8, 64=bit offset=24 size=8 */
char __toku_dummy2[200]; /* Padding at the end */
};
typedef struct __toku_txn_progress {
uint64_t entries_total;
uint64_t entries_processed;
uint8_t is_commit;
uint8_t stalled_on_checkpoint;
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
struct txn_stat {
u_int64_t rollback_raw_count;
};
struct __toku_db_txn {
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
DB_TXN *parent; /* 32-bit offset=4 size=4, 64=bit offset=8 size=8 */
int (*txn_stat)(DB_TXN *, struct txn_stat **);
struct toku_list open_txns;
int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*);
int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*);
void* __toku_dummy0[13];
char __toku_dummy1[8];
void *api_internal; /* 32-bit offset=84 size=4, 64=bit offset=160 size=8 */
void* __toku_dummy2[2];
int (*abort) (DB_TXN *); /* 32-bit offset=96 size=4, 64=bit offset=184 size=8 */
int (*commit) (DB_TXN*, u_int32_t); /* 32-bit offset=100 size=4, 64=bit offset=192 size=8 */
void* __toku_dummy3[2];
u_int32_t (*id) (DB_TXN *); /* 32-bit offset=112 size=4, 64=bit offset=216 size=8 */
int (*prepare) (DB_TXN*, u_int8_t gid[DB_GID_SIZE]); /* 32-bit offset=116 size=4, 64=bit offset=224 size=8 */
void* __toku_dummy4[4]; /* Padding at the end */
};
struct __toku_db_txn_stat {
void* __toku_dummy0[1];
char __toku_dummy1[28];
u_int32_t st_nactive; /* 32-bit offset=32 size=4, 64=bit offset=36 size=4 */
char __toku_dummy2[16];
DB_TXN_ACTIVE *st_txnarray; /* 32-bit offset=52 size=4, 64=bit offset=56 size=8 */
void* __toku_dummy3[1]; /* Padding at the end */
char __toku_dummy4[8]; /* Padding at the end */
};
struct __toku_dbc {
DB *dbp; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
int (*c_getf_first)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_last)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_next)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_prev)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current_binding)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[14];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=204 size=4, 64=bit offset=304 size=8 */
int (*c_count) (DBC *, db_recno_t *, u_int32_t); /* 32-bit offset=208 size=4, 64=bit offset=312 size=8 */
int (*c_del) (DBC *, u_int32_t); /* 32-bit offset=212 size=4, 64=bit offset=320 size=8 */
void* __toku_dummy2[1];
int (*c_get) (DBC *, DBT *, DBT *, u_int32_t); /* 32-bit offset=220 size=4, 64=bit offset=336 size=8 */
void* __toku_dummy3[10]; /* Padding at the end */
};
#ifdef _TOKUDB_WRAP_H
#define txn_begin txn_begin_tokudb
#endif
int db_env_create(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));
int db_create(DB **, DB_ENV *, u_int32_t) __attribute__((__visibility__("default")));
char *db_strerror(int) __attribute__((__visibility__("default")));
const char *db_version(int*,int *,int *) __attribute__((__visibility__("default")));
int log_compare (const DB_LSN*, const DB_LSN *) __attribute__((__visibility__("default")));
int db_env_set_func_fsync (int (*)(int)) __attribute__((__visibility__("default")));
int toku_set_trace_file (char *fname) __attribute__((__visibility__("default")));
int toku_close_trace_file (void) __attribute__((__visibility__("default")));
int db_env_set_func_free (void (*)(void*)) __attribute__((__visibility__("default")));
int db_env_set_func_malloc (void *(*)(size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_realloc (void *(*)(void*, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_fdopen (FILE* (*)(int, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_fopen (FILE* (*)(const char *, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_open (int (*)(const char *, int, int)) __attribute__((__visibility__("default")));
int db_env_set_func_fclose (int (*)(FILE*)) __attribute__((__visibility__("default")));
int db_env_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)) __attribute__((__visibility__("default")));
void db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_loader_size_factor (uint32_t) __attribute__((__visibility__("default")));
void db_env_set_mvcc_garbage_collection_verification(u_int32_t) __attribute__((__visibility__("default")));
void db_env_enable_engine_status(u_int32_t) __attribute__((__visibility__("default")));
void db_env_set_flusher_thread_callback (void (*)(int, void*), void*) __attribute__((__visibility__("default")));
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -9,482 +9,3 @@
#if defined(__cplusplus)
extern "C" {
#endif
#define TOKUDB 1
#define TOKUDB_NATIVE_H 0
#define DB_VERSION_MAJOR 4
#define DB_VERSION_MINOR 6
#define DB_VERSION_PATCH 19
#ifndef _TOKUDB_WRAP_H
#define DB_VERSION_STRING "Tokutek: TokuDB 4.6.19"
#else
#define DB_VERSION_STRING_ydb "Tokutek: TokuDB (wrapped bdb)"
#endif
#ifndef TOKU_OFF_T_DEFINED
#define TOKU_OFF_T_DEFINED
typedef int64_t toku_off_t;
#endif
#define DB_GID_SIZE 128
typedef struct __toku_db_env DB_ENV;
typedef struct __toku_db_key_range DB_KEY_RANGE;
typedef struct __toku_db_lsn DB_LSN;
typedef struct __toku_db DB;
typedef struct __toku_db_txn DB_TXN;
typedef struct __toku_db_txn_active DB_TXN_ACTIVE;
typedef struct __toku_db_txn_stat DB_TXN_STAT;
typedef struct __toku_dbc DBC;
typedef struct __toku_dbt DBT;
typedef struct __toku_db_preplist { DB_TXN *txn; uint8_t gid[DB_GID_SIZE]; } DB_PREPLIST;
typedef u_int32_t db_recno_t;
typedef int(*YDB_CALLBACK_FUNCTION)(DBT const*, DBT const*, void*);
#include <tdb-internal.h>
#ifndef __BIGGEST_ALIGNMENT__
#define __BIGGEST_ALIGNMENT__ 16
#endif
typedef struct __toku_db_btree_stat64 {
u_int64_t bt_nkeys; /* how many unique keys (guaranteed only to be an estimate, even when flattened) */
u_int64_t bt_ndata; /* how many key-value pairs (an estimate, but exact when flattened) */
u_int64_t bt_dsize; /* how big are the keys+values (not counting the lengths) (an estimate, unless flattened) */
u_int64_t bt_fsize; /* how big is the underlying file */
u_int64_t bt_create_time_sec; /* Creation time, in seconds */
u_int64_t bt_modify_time_sec; /* Time of last serialization, in seconds */
u_int64_t bt_verify_time_sec; /* Time of last verification, in seconds */
} DB_BTREE_STAT64;
typedef struct __toku_loader DB_LOADER;
struct __toku_loader_internal;
struct __toku_loader {
struct __toku_loader_internal *i;
int (*set_error_callback)(DB_LOADER *loader, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_LOADER *loader, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*put)(DB_LOADER *loader, DBT *key, DBT* val); /* give a row to the loader */
int (*close)(DB_LOADER *loader); /* finish loading, free memory */
int (*abort)(DB_LOADER *loader); /* abort loading, free memory */
};
typedef struct __toku_indexer DB_INDEXER;
struct __toku_indexer_internal;
struct __toku_indexer {
struct __toku_indexer_internal *i;
int (*set_error_callback)(DB_INDEXER *indexer, void (*error_cb)(DB *db, int i, int err, DBT *key, DBT *val, void *error_extra), void *error_extra); /* set the error callback */
int (*set_poll_function)(DB_INDEXER *indexer, int (*poll_func)(void *extra, float progress), void *poll_extra); /* set the polling function */
int (*build)(DB_INDEXER *indexer); /* build the indexes */
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
} DBTYPE;
#ifndef _TOKUDB_WRAP_H
#define DB_VERB_DEADLOCK 1
#define DB_VERB_RECOVERY 8
#define DB_VERB_REPLICATION 32
#define DB_VERB_WAITSFOR 64
#define DB_ARCH_ABS 1
#define DB_ARCH_LOG 4
#define DB_CREATE 1
#define DB_CXX_NO_EXCEPTIONS 1
#define DB_EXCL 16384
#define DB_PRIVATE 8388608
#define DB_RDONLY 32
#define DB_RECOVER 64
#define DB_RUNRECOVERY -30975
#define DB_THREAD 128
#define DB_TXN_NOSYNC 512
#define DB_LOCK_DEFAULT 1
#define DB_LOCK_OLDEST 7
#define DB_LOCK_RANDOM 8
#define DB_KEYFIRST 13
#define DB_KEYLAST 14
#define DB_NOOVERWRITE 20
#define DB_NODUPDATA 19
#define DB_NOOVERWRITE_NO_ERROR 1
#define DB_OPFLAGS_MASK 255
#define DB_AUTO_COMMIT 33554432
#define DB_INIT_LOCK 131072
#define DB_INIT_LOG 262144
#define DB_INIT_MPOOL 524288
#define DB_INIT_TXN 2097152
#define DB_KEYEXIST -30996
#define DB_LOCK_DEADLOCK -30995
#define DB_LOCK_NOTGRANTED -30994
#define DB_NOTFOUND -30989
#define DB_SECONDARY_BAD -30974
#define DB_DONOTINDEX -30998
#define DB_BUFFER_SMALL -30999
#define DB_BADFORMAT -30500
#define DB_DELETE_ANY 65536
#define DB_TRUNCATE_WITHCURSORS 131072
#define DB_FIRST 7
#define DB_LAST 15
#define DB_CURRENT 6
#define DB_NEXT 16
#define DB_NEXT_NODUP 18
#define DB_PREV 23
#define DB_PREV_NODUP 25
#define DB_SET 26
#define DB_SET_RANGE 27
#define DB_CURRENT_BINDING 253
#define DB_SET_RANGE_REVERSE 252
#define DB_RMW 1073741824
#define DB_IS_RESETTING_OP 0x01000000
#define DB_PRELOCKED 0x00800000
#define DB_PRELOCKED_WRITE 0x00400000
#define DB_IS_HOT_INDEX 0x00100000
#define DBC_DISABLE_PREFETCHING 0x20000000
#define DB_DBT_APPMALLOC 1
#define DB_DBT_DUPOK 2
#define DB_DBT_MALLOC 8
#define DB_DBT_MULTIPLE 16
#define DB_DBT_REALLOC 64
#define DB_DBT_USERMEM 256
#define DB_LOG_AUTOREMOVE 524288
#define DB_TXN_WRITE_NOSYNC 4096
#define DB_TXN_NOWAIT 1024
#define DB_TXN_SYNC 16384
#define DB_TXN_SNAPSHOT 268435456
#define DB_READ_UNCOMMITTED 134217728
#define DB_READ_COMMITTED 67108864
#define DB_INHERIT_ISOLATION 1
#define DB_SERIALIZABLE 2
#endif
/* TOKUDB specific error codes */
#define TOKUDB_OUT_OF_LOCKS -100000
#define TOKUDB_SUCCEEDED_EARLY -100001
#define TOKUDB_FOUND_BUT_REJECTED -100002
#define TOKUDB_USER_CALLBACK_ERROR -100003
#define TOKUDB_DICTIONARY_TOO_OLD -100004
#define TOKUDB_DICTIONARY_TOO_NEW -100005
#define TOKUDB_DICTIONARY_NO_HEADER -100006
#define TOKUDB_CANCELED -100007
#define TOKUDB_NO_DATA -100008
#define TOKUDB_ACCEPT -100009
#define TOKUDB_MVCC_DICTIONARY_TOO_NEW -100010
#define TOKUDB_UPGRADE_FAILURE -100011
#define TOKUDB_TRY_AGAIN -100012
#define TOKUDB_NEEDS_REPAIR -100013
#define TOKUDB_CURSOR_CONTINUE -100014
/* LOADER flags */
#define LOADER_USE_PUTS 1
typedef int (*generate_row_for_put_func)(DB *dest_db, DB *src_db, DBT *dest_key, DBT *dest_val, const DBT *src_key, const DBT *src_val);
typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT *dest_key, const DBT *src_key, const DBT *src_val);
/* in wrap mode, top-level function txn_begin is renamed, but the field isn't renamed, so we have to hack it here.*/
#ifdef _TOKUDB_WRAP_H
#undef txn_begin
#endif
struct __toku_db_env {
struct __toku_db_env_internal *i;
#define db_env_struct_i(x) ((x)->i)
int (*checkpointing_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */;
int (*checkpointing_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */;
int (*cleaner_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */;
int (*cleaner_set_iterations) (DB_ENV*, u_int32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*cleaner_get_iterations) (DB_ENV*, u_int32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */;
int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */;
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags);
int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags);
int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */;
int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put);
int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */;
int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del);
int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,
DBT *old_src_key, DBT *old_src_data,
DBT *new_src_key, DBT *new_src_data,
uint32_t num_dbs, DB **db_array, uint32_t *flags_array,
uint32_t num_keys, DBT *keys,
uint32_t num_vals, DBT *vals) /* update multiple DBs */;
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
int (*set_lk_max_memory) (DB_ENV *env, uint64_t max);
int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max);
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[21];
char __toku_dummy1[144];
void *api1_internal; /* 32-bit offset=356 size=4, 64=bit offset=568 size=8 */
void* __toku_dummy2[8];
int (*close) (DB_ENV *, u_int32_t); /* 32-bit offset=392 size=4, 64=bit offset=640 size=8 */
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t); /* 32-bit offset=396 size=4, 64=bit offset=648 size=8 */
int (*dbrename) (DB_ENV *, DB_TXN *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=400 size=4, 64=bit offset=656 size=8 */
void (*err) (const DB_ENV *, int, const char *, ...); /* 32-bit offset=404 size=4, 64=bit offset=664 size=8 */
void* __toku_dummy3[3];
int (*get_cachesize) (DB_ENV *, u_int32_t *, u_int32_t *, int *); /* 32-bit offset=420 size=4, 64=bit offset=696 size=8 */
void* __toku_dummy4[5];
int (*get_flags) (DB_ENV *, u_int32_t *); /* 32-bit offset=444 size=4, 64=bit offset=744 size=8 */
void* __toku_dummy5[4];
int (*get_lg_max) (DB_ENV *, u_int32_t*); /* 32-bit offset=464 size=4, 64=bit offset=784 size=8 */
void* __toku_dummy6[4];
int (*get_lk_max_locks) (DB_ENV *, u_int32_t *); /* 32-bit offset=484 size=4, 64=bit offset=824 size=8 */
void* __toku_dummy7[22];
int (*log_archive) (DB_ENV *, char **[], u_int32_t); /* 32-bit offset=576 size=4, 64=bit offset=1008 size=8 */
void* __toku_dummy8[2];
int (*log_flush) (DB_ENV *, const DB_LSN *); /* 32-bit offset=588 size=4, 64=bit offset=1032 size=8 */
void* __toku_dummy9[25];
int (*open) (DB_ENV *, const char *, u_int32_t, int); /* 32-bit offset=692 size=4, 64=bit offset=1240 size=8 */
void* __toku_dummy10[30];
int (*set_cachesize) (DB_ENV *, u_int32_t, u_int32_t, int); /* 32-bit offset=816 size=4, 64=bit offset=1488 size=8 */
void* __toku_dummy11[1];
int (*set_data_dir) (DB_ENV *, const char *); /* 32-bit offset=824 size=4, 64=bit offset=1504 size=8 */
void* __toku_dummy12[1];
void (*set_errcall) (DB_ENV *, void (*)(const DB_ENV *, const char *, const char *)); /* 32-bit offset=832 size=4, 64=bit offset=1520 size=8 */
void (*set_errfile) (DB_ENV *, FILE*); /* 32-bit offset=836 size=4, 64=bit offset=1528 size=8 */
void (*set_errpfx) (DB_ENV *, const char *); /* 32-bit offset=840 size=4, 64=bit offset=1536 size=8 */
void* __toku_dummy13[2];
int (*set_flags) (DB_ENV *, u_int32_t, int); /* 32-bit offset=852 size=4, 64=bit offset=1560 size=8 */
void* __toku_dummy14[2];
int (*set_lg_bsize) (DB_ENV *, u_int32_t); /* 32-bit offset=864 size=4, 64=bit offset=1584 size=8 */
int (*set_lg_dir) (DB_ENV *, const char *); /* 32-bit offset=868 size=4, 64=bit offset=1592 size=8 */
void* __toku_dummy15[1];
int (*set_lg_max) (DB_ENV *, u_int32_t); /* 32-bit offset=876 size=4, 64=bit offset=1608 size=8 */
void* __toku_dummy16[2];
int (*set_lk_detect) (DB_ENV *, u_int32_t); /* 32-bit offset=888 size=4, 64=bit offset=1632 size=8 */
void* __toku_dummy17[1];
int (*set_lk_max_locks) (DB_ENV *, u_int32_t); /* 32-bit offset=896 size=4, 64=bit offset=1648 size=8 */
void* __toku_dummy18[14];
int (*set_tmp_dir) (DB_ENV *, const char *); /* 32-bit offset=956 size=4, 64=bit offset=1768 size=8 */
void* __toku_dummy19[2];
int (*set_verbose) (DB_ENV *, u_int32_t, int); /* 32-bit offset=968 size=4, 64=bit offset=1792 size=8 */
void* __toku_dummy20[1];
int (*txn_begin) (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t); /* 32-bit offset=976 size=4, 64=bit offset=1808 size=8 */
int (*txn_checkpoint) (DB_ENV *, u_int32_t, u_int32_t, u_int32_t); /* 32-bit offset=980 size=4, 64=bit offset=1816 size=8 */
int (*txn_recover) (DB_ENV *, DB_PREPLIST preplist[/*count*/], long count, /*out*/ long *retp, u_int32_t flags); /* 32-bit offset=984 size=4, 64=bit offset=1824 size=8 */
int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, u_int32_t); /* 32-bit offset=988 size=4, 64=bit offset=1832 size=8 */
void* __toku_dummy21[2]; /* Padding at the end */
char __toku_dummy22[16]; /* Padding at the end */
};
struct __toku_db_key_range {
double less; /* 32-bit offset=0 size=8, 64=bit offset=0 size=8 */
double equal; /* 32-bit offset=8 size=8, 64=bit offset=8 size=8 */
double greater; /* 32-bit offset=16 size=8, 64=bit offset=16 size=8 */
void* __toku_dummy0[214]; /* Padding at the end */
char __toku_dummy1[136]; /* Padding at the end */
};
struct __toku_db_lsn {
char __toku_dummy0[8]; /* Padding at the end */
};
struct __toku_dbt {
void*data; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
u_int32_t size; /* 32-bit offset=4 size=4, 64=bit offset=8 size=4 */
u_int32_t ulen; /* 32-bit offset=8 size=4, 64=bit offset=12 size=4 */
void* __toku_dummy0[1];
char __toku_dummy1[8];
u_int32_t flags; /* 32-bit offset=24 size=4, 64=bit offset=32 size=4 */
/* 4 more bytes of alignment in the 64-bit case. */
};
typedef struct __toku_descriptor {
DBT dbt;
} *DESCRIPTOR, DESCRIPTOR_S;
//One header is included in 'data'
//One header is included in 'additional for checkpoint'
typedef struct __toku_db_fragmentation {
uint64_t file_size_bytes; //Total file size in bytes
uint64_t data_bytes; //Compressed User Data in bytes
uint64_t data_blocks; //Number of blocks of compressed User Data
uint64_t checkpoint_bytes_additional; //Additional bytes used for checkpoint system
uint64_t checkpoint_blocks_additional; //Additional blocks used for checkpoint system
uint64_t unused_bytes; //Unused space in file
uint64_t unused_blocks; //Number of contiguous regions of unused space
uint64_t largest_unused_block; //Size of largest contiguous unused space
} *TOKU_DB_FRAGMENTATION, TOKU_DB_FRAGMENTATION_S;
struct __toku_db {
struct __toku_db_internal *i;
#define db_struct_i(x) ((x)->i)
int (*key_range64)(DB*, DB_TXN *, DBT *, u_int64_t *less, u_int64_t *equal, u_int64_t *greater, int *is_exact);
int (*stat64)(DB *, DB_TXN *, DB_BTREE_STAT64 *);
char __toku_dummy0[8];
void *app_private; /* 32-bit offset=20 size=4, 64=bit offset=32 size=8 */
DB_ENV *dbenv; /* 32-bit offset=24 size=4, 64=bit offset=40 size=8 */
int (*pre_acquire_table_lock)(DB*, DB_TXN*);
int (*pre_acquire_fileops_lock)(DB*, DB_TXN*);
const DBT* (*dbt_pos_infty)(void) /* Return the special DBT that refers to positive infinity in the lock table.*/;
const DBT* (*dbt_neg_infty)(void)/* Return the special DBT that refers to negative infinity in the lock table.*/;
void (*get_max_row_size) (DB*, u_int32_t *max_key_size, u_int32_t *max_row_size);
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);
int (*get_readpagesize)(DB*,u_int32_t*);
int (*set_readpagesize)(DB*,u_int32_t);
int (*set_indexer)(DB*, DB_INDEXER*);
void (*get_indexer)(DB*, DB_INDEXER**);
int (*verify_with_progress)(DB *, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra, int verbose, int keep_going);
int (*update)(DB *, DB_TXN*, const DBT *key, const DBT *extra, u_int32_t flags);
int (*update_broadcast)(DB *, DB_TXN*, const DBT *extra, u_int32_t flags);
void* __toku_dummy1[23];
char __toku_dummy2[80];
void *api_internal; /* 32-bit offset=276 size=4, 64=bit offset=464 size=8 */
void* __toku_dummy3[5];
int (*close) (DB*, u_int32_t); /* 32-bit offset=300 size=4, 64=bit offset=512 size=8 */
void* __toku_dummy4[1];
int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t); /* 32-bit offset=308 size=4, 64=bit offset=528 size=8 */
int (*del) (DB *, DB_TXN *, DBT *, u_int32_t); /* 32-bit offset=312 size=4, 64=bit offset=536 size=8 */
void* __toku_dummy5[3];
int (*fd) (DB *, int *); /* 32-bit offset=328 size=4, 64=bit offset=568 size=8 */
int (*get) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=332 size=4, 64=bit offset=576 size=8 */
void* __toku_dummy6[8];
int (*get_flags) (DB *, u_int32_t *); /* 32-bit offset=368 size=4, 64=bit offset=648 size=8 */
void* __toku_dummy7[7];
int (*get_pagesize) (DB *, u_int32_t *); /* 32-bit offset=400 size=4, 64=bit offset=712 size=8 */
void* __toku_dummy8[9];
int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t); /* 32-bit offset=440 size=4, 64=bit offset=792 size=8 */
int (*open) (DB *, DB_TXN *, const char *, const char *, DBTYPE, u_int32_t, int); /* 32-bit offset=444 size=4, 64=bit offset=800 size=8 */
void* __toku_dummy9[1];
int (*put) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t); /* 32-bit offset=452 size=4, 64=bit offset=816 size=8 */
int (*remove) (DB *, const char *, const char *, u_int32_t); /* 32-bit offset=456 size=4, 64=bit offset=824 size=8 */
int (*rename) (DB *, const char *, const char *, const char *, u_int32_t); /* 32-bit offset=460 size=4, 64=bit offset=832 size=8 */
void* __toku_dummy10[9];
void (*set_errfile) (DB *, FILE*); /* 32-bit offset=500 size=4, 64=bit offset=912 size=8 */
void* __toku_dummy11[2];
int (*set_flags) (DB *, u_int32_t); /* 32-bit offset=512 size=4, 64=bit offset=936 size=8 */
void* __toku_dummy12[7];
int (*set_pagesize) (DB *, u_int32_t); /* 32-bit offset=544 size=4, 64=bit offset=1000 size=8 */
void* __toku_dummy13[7];
int (*stat) (DB *, void *, u_int32_t); /* 32-bit offset=576 size=4, 64=bit offset=1064 size=8 */
void* __toku_dummy14[2];
int (*truncate) (DB *, DB_TXN *, u_int32_t *, u_int32_t); /* 32-bit offset=588 size=4, 64=bit offset=1088 size=8 */
void* __toku_dummy15[1];
int (*verify) (DB *, const char *, const char *, FILE *, u_int32_t); /* 32-bit offset=596 size=4, 64=bit offset=1104 size=8 */
void* __toku_dummy16[5]; /* Padding at the end */
char __toku_dummy17[16]; /* Padding at the end */
};
struct __toku_db_txn_active {
u_int32_t txnid; /* 32-bit offset=0 size=4, 64=bit offset=0 size=4 */
void* __toku_dummy0[2];
char __toku_dummy1[4];
DB_LSN lsn; /* 32-bit offset=16 size=8, 64=bit offset=24 size=8 */
char __toku_dummy2[200]; /* Padding at the end */
};
typedef struct __toku_txn_progress {
uint64_t entries_total;
uint64_t entries_processed;
uint8_t is_commit;
uint8_t stalled_on_checkpoint;
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
struct txn_stat {
u_int64_t rollback_raw_count;
};
struct __toku_db_txn {
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
DB_TXN *parent; /* 32-bit offset=4 size=4, 64=bit offset=8 size=8 */
int (*txn_stat)(DB_TXN *, struct txn_stat **);
struct toku_list open_txns;
int (*commit_with_progress)(DB_TXN*, uint32_t, TXN_PROGRESS_POLL_FUNCTION, void*);
int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*);
void* __toku_dummy0[14];
char __toku_dummy1[8];
void *api_internal; /* 32-bit offset=88 size=4, 64=bit offset=168 size=8 */
void* __toku_dummy2[2];
int (*abort) (DB_TXN *); /* 32-bit offset=100 size=4, 64=bit offset=192 size=8 */
int (*commit) (DB_TXN*, u_int32_t); /* 32-bit offset=104 size=4, 64=bit offset=200 size=8 */
void* __toku_dummy3[2];
u_int32_t (*id) (DB_TXN *); /* 32-bit offset=116 size=4, 64=bit offset=224 size=8 */
int (*prepare) (DB_TXN*, u_int8_t gid[DB_GID_SIZE]); /* 32-bit offset=120 size=4, 64=bit offset=232 size=8 */
void* __toku_dummy4[4]; /* Padding at the end */
};
struct __toku_db_txn_stat {
void* __toku_dummy0[2];
char __toku_dummy1[28];
u_int32_t st_nactive; /* 32-bit offset=36 size=4, 64=bit offset=44 size=4 */
void* __toku_dummy2[1];
char __toku_dummy3[8];
DB_TXN_ACTIVE *st_txnarray; /* 32-bit offset=52 size=4, 64=bit offset=64 size=8 */
void* __toku_dummy4[1]; /* Padding at the end */
char __toku_dummy5[8]; /* Padding at the end */
};
struct __toku_dbc {
DB *dbp; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
int (*c_getf_first)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_last)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_next)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_prev)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_current_binding)(DBC *, u_int32_t, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_getf_set_range_reverse)(DBC *, u_int32_t, DBT *, YDB_CALLBACK_FUNCTION, void *);
int (*c_pre_acquire_range_lock)(DBC*, const DBT*, const DBT*);
void* __toku_dummy0[24];
char __toku_dummy1[104];
int (*c_close) (DBC *); /* 32-bit offset=244 size=4, 64=bit offset=384 size=8 */
int (*c_count) (DBC *, db_recno_t *, u_int32_t); /* 32-bit offset=248 size=4, 64=bit offset=392 size=8 */
int (*c_del) (DBC *, u_int32_t); /* 32-bit offset=252 size=4, 64=bit offset=400 size=8 */
void* __toku_dummy2[1];
int (*c_get) (DBC *, DBT *, DBT *, u_int32_t); /* 32-bit offset=260 size=4, 64=bit offset=416 size=8 */
void* __toku_dummy3[10]; /* Padding at the end */
};
#ifdef _TOKUDB_WRAP_H
#define txn_begin txn_begin_tokudb
#endif
int db_env_create(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));
int db_create(DB **, DB_ENV *, u_int32_t) __attribute__((__visibility__("default")));
char *db_strerror(int) __attribute__((__visibility__("default")));
const char *db_version(int*,int *,int *) __attribute__((__visibility__("default")));
int log_compare (const DB_LSN*, const DB_LSN *) __attribute__((__visibility__("default")));
int db_env_set_func_fsync (int (*)(int)) __attribute__((__visibility__("default")));
int toku_set_trace_file (char *fname) __attribute__((__visibility__("default")));
int toku_close_trace_file (void) __attribute__((__visibility__("default")));
int db_env_set_func_free (void (*)(void*)) __attribute__((__visibility__("default")));
int db_env_set_func_malloc (void *(*)(size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_realloc (void *(*)(void*, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_pwrite (ssize_t (*)(int, const void *, size_t, toku_off_t)) __attribute__((__visibility__("default")));
int db_env_set_func_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_full_write (ssize_t (*)(int, const void *, size_t)) __attribute__((__visibility__("default")));
int db_env_set_func_fdopen (FILE* (*)(int, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_fopen (FILE* (*)(const char *, const char *)) __attribute__((__visibility__("default")));
int db_env_set_func_open (int (*)(const char *, int, int)) __attribute__((__visibility__("default")));
int db_env_set_func_fclose (int (*)(FILE*)) __attribute__((__visibility__("default")));
int db_env_set_func_pread (ssize_t (*)(int, void *, size_t, off_t)) __attribute__((__visibility__("default")));
void db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_checkpoint_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_recover_callback2 (void (*)(void*), void*) __attribute__((__visibility__("default")));
void db_env_set_loader_size_factor (uint32_t) __attribute__((__visibility__("default")));
void db_env_set_mvcc_garbage_collection_verification(u_int32_t) __attribute__((__visibility__("default")));
void db_env_enable_engine_status(u_int32_t) __attribute__((__visibility__("default")));
void db_env_set_flusher_thread_callback (void (*)(int, void*), void*) __attribute__((__visibility__("default")));
#if defined(__cplusplus)
}
#endif
#endif

View file

@ -599,7 +599,6 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
"DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */",
"int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */",
"int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */",
"int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */",
"int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */",
"int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra)",
"int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION)",

View file

@ -308,7 +308,6 @@ struct __toku_db {
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);

View file

@ -308,7 +308,6 @@ struct __toku_db {
DESCRIPTOR descriptor /* saved row/dictionary descriptor for aiding in comparisons */;
int (*change_descriptor) (DB*, DB_TXN*, const DBT* descriptor, u_int32_t) /* change row/dictionary descriptor for a db. Available only while db is open */;
int (*getf_set)(DB*, DB_TXN*, u_int32_t, DBT*, YDB_CALLBACK_FUNCTION, void*) /* same as DBC->c_getf_set without a persistent cursor) */;
int (*flatten)(DB*, DB_TXN*) /* Flatten a dictionary, similar to (but faster than) a table scan */;
int (*optimize)(DB*) /* Run garbage collecion and promote all transactions older than oldest. Amortized (happens during flattening) */;
int (*hot_optimize)(DB*, int (*progress_callback)(void *progress_extra, float progress), void *progress_extra);
int (*get_fragmentation)(DB*,TOKU_DB_FRAGMENTATION);

View file

@ -5276,13 +5276,6 @@ brt_search_node(
return r;
}
// When this is called, the cachetable lock is held
static void
unlock_root_tree_lock (void *v) {
struct brt_header* h = v;
toku_brtheader_release_treelock(h);
}
static int
toku_brt_search (BRT brt, brt_search_t *search, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v, BRT_CURSOR brtcursor, BOOL can_bulk_fetch)
// Effect: Perform a search. Associate cursor with a leaf if possible.
@ -5332,7 +5325,6 @@ try_again:
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
fill_bfe_for_subset_read(
&bfe,
brt->h,
@ -5343,22 +5335,15 @@ try_again:
brtcursor->right_is_pos_infty,
brtcursor->disable_prefetching
);
struct unlockers root_unlockers = {
.locked = TRUE,
.f = unlock_root_tree_lock,
.extra = brt->h,
.next = NULL
};
r = toku_pin_brtnode(brt, *rootp, fullhash,&root_unlockers,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node);
assert(r==0 || r== TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
// unlock_root_tree_lock will have released tree_lock of header
assert(!root_unlockers.locked);
root_tries++;
goto try_again;
}
assert(root_unlockers.locked);
toku_pin_brtnode_off_client_thread(
brt->h,
*rootp,
fullhash,
&bfe,
0,
NULL,
&node
);
toku_brtheader_release_treelock(brt->h);
}
@ -5500,31 +5485,6 @@ toku_brt_cursor_current(BRT_CURSOR cursor, int op, BRT_GET_CALLBACK_FUNCTION get
return getf(cursor->key.size, cursor->key.data, cursor->val.size, cursor->val.data, getf_v, false); // brt_cursor_copyout(cursor, outkey, outval);
}
static int
brt_flatten_getf(ITEMLEN UU(keylen), bytevec UU(key),
ITEMLEN UU(vallen), bytevec UU(val),
void *UU(v), bool UU(lock_only)) {
return DB_NOTFOUND;
}
int
toku_brt_flatten(BRT brt, TOKUTXN ttxn)
{
BRT_CURSOR tmp_cursor;
int r = toku_brt_cursor(brt, &tmp_cursor, ttxn, FALSE, FALSE);
if (r!=0) return r;
brt_search_t search; brt_search_init(&search, brt_cursor_compare_one, BRT_SEARCH_LEFT, 0, tmp_cursor->brt);
r = brt_cursor_search(tmp_cursor, &search, brt_flatten_getf, NULL, FALSE);
brt_search_finish(&search);
if (r==DB_NOTFOUND) r = 0;
{
//Cleanup temporary cursor
int r2 = toku_brt_cursor_close(tmp_cursor);
if (r==0) r = r2;
}
return r;
}
int
toku_brt_cursor_first(BRT_CURSOR cursor, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v)
{
@ -5939,24 +5899,15 @@ toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less_p, u_int64_t *equal_p, u_i
u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt->h, &fullhash);
{
struct unlockers root_unlockers = {
.locked = TRUE,
.f = unlock_root_tree_lock,
.extra = brt->h,
.next = NULL
};
int r = toku_pin_brtnode(brt, *rootp, fullhash, &root_unlockers,(ANCESTORS)NULL, &infinite_bounds, &bfe, FALSE, &node);
assert(r == 0 || r == TOKUDB_TRY_AGAIN);
if (r == TOKUDB_TRY_AGAIN) {
assert(!root_unlockers.locked);
goto try_again;
}
assert(root_unlockers.locked);
}
toku_pin_brtnode_off_client_thread(
brt->h,
*rootp,
fullhash,
&bfe,
0,
NULL,
&node
);
toku_brtheader_release_treelock(brt->h);
}

View file

@ -197,7 +197,6 @@ void toku_brt_cursor_set_range_lock(BRT_CURSOR, const DBT *, const DBT *, BOOL,
// get is deprecated in favor of the individual functions below
int toku_brt_cursor_get (BRT_CURSOR cursor, DBT *key, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags) __attribute__ ((warn_unused_result));
int toku_brt_flatten(BRT, TOKUTXN ttxn) __attribute__ ((warn_unused_result));
int toku_brt_cursor_first(BRT_CURSOR cursor, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
int toku_brt_cursor_last(BRT_CURSOR cursor, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));
int toku_brt_cursor_next(BRT_CURSOR cursor, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v) __attribute__ ((warn_unused_result));

View file

@ -2492,10 +2492,10 @@ int toku_cachetable_get_and_pin_nonblocking (
note_hash_count(count);
//
// In Dr. No, the ydb lock ensures that only one client may be successfully
// doing a query on a dictionary at any given time. This function
// is called with the ydb lock held. This implies that only ONE client can ever be
// in get_and_pin_nonblocking while the ydb lock is held.
// In Doofenshmirts, we keep the root to leaf path pinned
// as we perform a quiry on a dictionary at any given time.
// This implies that only ONE query client can ever be
// in get_and_pin_nonblocking for this dictionary.
// So, if there is a write lock grabbed
// on the PAIR that we want to lock, then some expensive operation
// MUST be happening (read from disk, write to disk, flush, etc...),
@ -2516,13 +2516,9 @@ int toku_cachetable_get_and_pin_nonblocking (
if (partial_fetch_required) {
p->state = CTPAIR_READING;
run_unlockers(unlockers); // The contract says the unlockers are run with the ct lock being held.
if (ct->ydb_unlock_callback) ct->ydb_unlock_callback();
// Now wait for the I/O to occur.
do_partial_fetch(ct, cf, p, pf_callback, read_extraargs, FALSE);
cachetable_unlock(ct);
if (ct->ydb_lock_callback) ct->ydb_lock_callback();
return TOKUDB_TRY_AGAIN;
}
pair_touch(p);
@ -2535,7 +2531,6 @@ int toku_cachetable_get_and_pin_nonblocking (
}
else {
run_unlockers(unlockers); // The contract says the unlockers are run with the ct lock being held.
if (ct->ydb_unlock_callback) ct->ydb_unlock_callback();
// Now wait for the I/O to occur.
// We need to obtain the lock (waiting for the write to finish), but then we only waited so we could wake up again
if (p->checkpoint_pending) {
@ -2578,7 +2573,6 @@ int toku_cachetable_get_and_pin_nonblocking (
}
}
cachetable_unlock(ct);
if (ct->ydb_lock_callback) ct->ydb_lock_callback();
return TOKUDB_TRY_AGAIN;
}
}
@ -2600,13 +2594,11 @@ int toku_cachetable_get_and_pin_nonblocking (
assert(p);
nb_mutex_write_lock(&p->nb_mutex, ct->mutex);
run_unlockers(unlockers); // we hold the ct mutex.
if (ct->ydb_unlock_callback) ct->ydb_unlock_callback();
u_int64_t t0 = get_tnow();
cachetable_fetch_pair(ct, cf, p, fetch_callback, read_extraargs, FALSE);
cachetable_miss++;
cachetable_misstime += get_tnow() - t0;
cachetable_unlock(ct);
if (ct->ydb_lock_callback) ct->ydb_lock_callback();
return TOKUDB_TRY_AGAIN;
}

View file

@ -21,6 +21,11 @@ endif
OBJS_RAW = \
ydb_lib \
ydb \
ydb_cursor \
ydb_row_lock \
ydb_env_func \
ydb_write \
ydb_db \
errors \
dlmalloc \
loader \

View file

@ -131,9 +131,4 @@ toku_ydb_unlock_and_yield(unsigned long useconds) {
ydb_unlock_internal(useconds);
}
toku_pthread_mutex_t *
toku_ydb_mutex(void) {
return &ydb_big_lock.lock;
}
#undef STATUS_VALUE

View file

@ -20,6 +20,7 @@
#include "leafentry.h"
#include "ule.h"
#include "xids.h"
#include "ydb_row_lock.h"
#include "indexer-internal.h"

View file

@ -20,6 +20,7 @@
#include "ydb_load.h"
#include "checkpoint.h"
#include "brt-internal.h"
#include "ydb_db.h"
#define lazy_assert(a) assert(a) // indicates code is incomplete
@ -239,7 +240,7 @@ int toku_loader_create_loader(DB_ENV *env,
}
// time to open the big kahuna
if ( loader->i->loader_flags & LOADER_USE_PUTS ) {
if ( FALSE && (loader->i->loader_flags & LOADER_USE_PUTS) ) {
XCALLOC_N(loader->i->N, loader->i->ekeys);
XCALLOC_N(loader->i->N, loader->i->evals);
for (int i=0; i<N; i++) {
@ -338,7 +339,7 @@ int toku_loader_put(DB_LOADER *loader, DBT *key, DBT *val)
goto cleanup;
}
if ( loader->i->loader_flags & LOADER_USE_PUTS ) {
if ( FALSE && (loader->i->loader_flags & LOADER_USE_PUTS) ) {
r = loader->i->env->put_multiple(loader->i->env,
loader->i->src_db, // src_db
loader->i->txn,
@ -389,7 +390,7 @@ int toku_loader_close(DB_LOADER *loader)
if ( loader->i->error_callback != NULL ) {
loader->i->error_callback(loader->i->dbs[loader->i->err_i], loader->i->err_i, loader->i->err_errno, &loader->i->err_key, &loader->i->err_val, loader->i->error_extra);
}
if ( !(loader->i->loader_flags & LOADER_USE_PUTS ) ) {
if (TRUE || !(loader->i->loader_flags & LOADER_USE_PUTS ) ) {
r = toku_brt_loader_abort(loader->i->brt_loader, TRUE);
}
else {
@ -397,7 +398,7 @@ int toku_loader_close(DB_LOADER *loader)
}
}
else { // no error outstanding
if ( !(loader->i->loader_flags & LOADER_USE_PUTS ) ) {
if (TRUE || !(loader->i->loader_flags & LOADER_USE_PUTS ) ) {
// use the bulk loader
// in case you've been looking - here is where the real work is done!
r = toku_brt_loader_close(loader->i->brt_loader,
@ -436,7 +437,7 @@ int toku_loader_abort(DB_LOADER *loader)
}
}
if ( !(loader->i->loader_flags & LOADER_USE_PUTS) ) {
if (TRUE || !(loader->i->loader_flags & LOADER_USE_PUTS) ) {
r = toku_brt_loader_abort(loader->i->brt_loader, TRUE);
}
toku_ydb_lock();

View file

@ -14,6 +14,7 @@
#include <ydb-internal.h>
#include <brt-internal.h>
#include <toku_stdint.h>
#include <valgrind/drd.h>
/* TODO: Yoni should check that all asserts make sense instead of panic,
and all early returns make sense instead of panic,
@ -86,12 +87,17 @@ toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS statp) {
*statp = mgr->status;
}
static inline int lt_panic(toku_lock_tree *tree, int r) {
return tree->panic(tree->db, r);
return tree->mgr->panic(tree->db, r);
}
// forward defs of lock request tree functions
static void toku_lock_request_tree_init(toku_lock_tree *tree);
static void toku_lock_request_tree_destroy(toku_lock_tree *tree);
static void toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request);
static void toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request);
static toku_lock_request *toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id);
const uint32_t __toku_default_buflen = 2;
static const DBT __toku_lt_infinity;
@ -100,40 +106,6 @@ static const DBT __toku_lt_neg_infinity;
const DBT* const toku_lt_infinity = &__toku_lt_infinity;
const DBT* const toku_lt_neg_infinity = &__toku_lt_neg_infinity;
static toku_pthread_mutex_t *
toku_ltm_get_mutex(toku_ltm *ltm) {
toku_pthread_mutex_t *lock = ltm->use_lock;
if (lock == NULL)
lock = &ltm->lock;
return lock;
}
void
toku_ltm_set_mutex(toku_ltm *ltm, toku_pthread_mutex_t *use_lock) {
ltm->use_lock = use_lock;
}
static void
toku_ltm_init_mutex(toku_ltm *ltm) {
int r = toku_pthread_mutex_init(&ltm->lock, NULL); assert_zero(r);
ltm->use_lock = NULL;
}
static void
toku_ltm_destroy_mutex(toku_ltm *ltm) {
int r = toku_pthread_mutex_destroy(&ltm->lock); assert_zero(r);
}
void
toku_ltm_lock_mutex(toku_ltm *ltm) {
int r = toku_pthread_mutex_lock(toku_ltm_get_mutex(ltm)); assert_zero(r);
}
void
toku_ltm_unlock_mutex(toku_ltm *ltm) {
int r = toku_pthread_mutex_unlock(toku_ltm_get_mutex(ltm)); assert_zero(r);
}
char*
toku_lt_strerror(TOKU_LT_ERROR r) {
if (r >= 0)
@ -143,6 +115,7 @@ toku_lt_strerror(TOKU_LT_ERROR r) {
}
return "Unknown error in locking data structures.\n";
}
/* Compare two payloads assuming that at least one of them is infinite */
static inline int
infinite_compare(const DBT* a, const DBT* b) {
@ -233,60 +206,70 @@ int
toku_ltm_create(toku_ltm** pmgr,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*)) {
int (*panic)(DB*, int)) {
int r = ENOSYS;
toku_ltm* tmp_mgr = NULL;
toku_ltm* mgr = NULL;
if (!pmgr || !locks_limit) {
r = EINVAL; goto cleanup;
}
assert(panic && get_compare_fun_from_db);
assert(panic);
tmp_mgr = (toku_ltm*) toku_malloc(sizeof(*tmp_mgr));
if (!tmp_mgr) {
mgr = (toku_ltm*) toku_malloc(sizeof(*mgr));
if (!mgr) {
r = ENOMEM; goto cleanup;
}
memset(tmp_mgr, 0, sizeof(toku_ltm));
memset(mgr, 0, sizeof(toku_ltm));
r = toku_ltm_set_max_locks(tmp_mgr, locks_limit);
r = toku_ltm_set_max_locks(mgr, locks_limit);
if (r != 0)
goto cleanup;
r = toku_ltm_set_max_lock_memory(tmp_mgr, lock_memory_limit);
r = toku_ltm_set_max_lock_memory(mgr, lock_memory_limit);
if (r != 0)
goto cleanup;
tmp_mgr->panic = panic;
tmp_mgr->get_compare_fun_from_db = get_compare_fun_from_db;
mgr->panic = panic;
r = toku_lth_create(&tmp_mgr->lth);
r = toku_lth_create(&mgr->lth);
if (r != 0)
goto cleanup;
if (!tmp_mgr->lth) {
if (!mgr->lth) {
r = ENOMEM; goto cleanup;
}
r = toku_idlth_create(&tmp_mgr->idlth);
r = toku_idlth_create(&mgr->idlth);
if (r != 0)
goto cleanup;
if (!tmp_mgr->idlth) {
if (!mgr->idlth) {
r = ENOMEM; goto cleanup;
}
toku_ltm_init_mutex(tmp_mgr);
toku_mutex_init(&mgr->mutex, NULL);
DRD_IGNORE_VAR(mgr->status);
r = 0;
*pmgr = tmp_mgr;
*pmgr = mgr;
cleanup:
if (r != 0) {
if (tmp_mgr) {
if (tmp_mgr->lth)
toku_lth_close(tmp_mgr->lth);
if (tmp_mgr->idlth)
toku_idlth_close(tmp_mgr->idlth);
toku_free(tmp_mgr);
if (mgr) {
if (mgr->lth)
toku_lth_close(mgr->lth);
if (mgr->idlth)
toku_idlth_close(mgr->idlth);
toku_free(mgr);
}
}
return r;
}
// For now, ltm_open does nothing.
int
toku_ltm_open(toku_ltm *mgr) {
int r;
if (!mgr)
r = EINVAL;
else
r = 0;
return r;
}
int
toku_ltm_close(toku_ltm* mgr) {
int r = ENOSYS;
@ -305,7 +288,8 @@ toku_ltm_close(toku_ltm* mgr) {
}
toku_lth_close(mgr->lth);
toku_idlth_close(mgr->idlth);
toku_ltm_destroy_mutex(mgr);
toku_mutex_destroy(&mgr->mutex);
DRD_STOP_IGNORING_VAR(mgr->status);
assert(mgr->curr_locks == 0 && mgr->curr_lock_memory == 0);
toku_free(mgr);
@ -314,7 +298,6 @@ cleanup:
return r;
}
int
toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit) {
if (!mgr || !locks_limit)
@ -354,15 +337,15 @@ toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit) {
static inline void
ltm_incr_locks(toku_ltm* tree_mgr, uint32_t replace_locks) {
assert(replace_locks <= tree_mgr->curr_locks);
tree_mgr->curr_locks -= replace_locks;
tree_mgr->curr_locks += 1;
(void) __sync_fetch_and_sub(&tree_mgr->curr_locks, replace_locks);
(void) __sync_fetch_and_add(&tree_mgr->curr_locks, 1);
}
static inline void
ltm_decr_locks(toku_ltm* tree_mgr, uint32_t locks) {
assert(tree_mgr);
assert(tree_mgr->curr_locks >= locks);
tree_mgr->curr_locks -= locks;
(void) __sync_fetch_and_sub(&tree_mgr->curr_locks, locks);
}
static int
@ -375,7 +358,7 @@ ltm_out_of_locks(toku_ltm *mgr) {
static void
ltm_incr_lock_memory(toku_ltm *mgr, size_t s) {
mgr->curr_lock_memory += s;
(void) __sync_add_and_fetch(&mgr->curr_lock_memory, s);
}
void
@ -387,7 +370,7 @@ toku_ltm_incr_lock_memory(void *extra, size_t s) {
static void
ltm_decr_lock_memory(toku_ltm *mgr, size_t s) {
assert(mgr->curr_lock_memory >= s);
mgr->curr_lock_memory -= s;
(void) __sync_sub_and_fetch(&mgr->curr_lock_memory, s);
}
void
@ -468,21 +451,8 @@ toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn) {
static inline int
lt_add_locked_txn(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS;
bool half_done = FALSE;
/* Neither selfread nor selfwrite exist. */
r = toku_rth_insert(tree->rth, txn);
if (r != 0)
goto cleanup;
r = toku_rth_insert(tree->txns_still_locked, txn);
if (r != 0) {
half_done = TRUE; goto cleanup;
}
r = 0;
cleanup:
if (half_done)
toku_rth_delete(tree->rth, txn);
int r = toku_rth_insert(tree->rth, txn);
return r;
}
@ -1128,21 +1098,16 @@ r_backwards(toku_interval* range) {
(toku_lt_point_cmp(left, right) > 0));
}
static inline int lt_unlock_deferred_txns(toku_lock_tree* tree);
static inline void
lt_set_comparison_functions(toku_lock_tree* tree, DB* db) {
assert(!tree->db && !tree->compare_fun);
assert(!tree->db);
tree->db = db;
tree->compare_fun = tree->get_compare_fun_from_db(tree->db);
assert(tree->compare_fun);
}
static inline void
lt_clear_comparison_functions(toku_lock_tree* tree) {
assert(tree);
tree->db = NULL;
tree->compare_fun = NULL;
}
/* Preprocess step for acquire functions. */
@ -1155,10 +1120,6 @@ lt_preprocess(toku_lock_tree* tree, DB* db,
toku_interval* query) {
int r = ENOSYS;
if (!tree || !db || !key_left || !key_right) {
r = EINVAL; goto cleanup;
}
/* Verify that NULL keys have payload and size that are mutually
consistent*/
if ((r = lt_verify_null_key(key_left)) != 0)
@ -1179,11 +1140,7 @@ lt_preprocess(toku_lock_tree* tree, DB* db,
r = 0;
cleanup:
if (r == 0) {
assert(tree->db && tree->compare_fun);
/* Cleanup all existing deleted transactions */
if (!toku_rth_is_empty(tree->txns_to_unlock)) {
r = lt_unlock_deferred_txns(tree);
}
assert(tree->db);
}
return r;
}
@ -1359,22 +1316,19 @@ lt_borderwrite_insert(toku_lock_tree* tree, toku_interval* query, toku_range* to
/* TODO: Investigate better way of passing comparison functions. */
int
toku_lt_create(toku_lock_tree** ptree,
int (*panic)(DB*, int),
toku_ltm* mgr,
toku_dbt_cmp (*get_compare_fun_from_db)(DB*)) {
toku_dbt_cmp compare_fun) {
int r = ENOSYS;
toku_lock_tree* tmp_tree = NULL;
if (!ptree || !mgr ||
!get_compare_fun_from_db || !panic) {
if (!ptree || !mgr || !compare_fun) {
r = EINVAL; goto cleanup;
}
tmp_tree = (toku_lock_tree*)toku_malloc(sizeof(*tmp_tree));
if (!tmp_tree) { r = ENOMEM; goto cleanup; }
memset(tmp_tree, 0, sizeof(toku_lock_tree));
tmp_tree->panic = panic;
tmp_tree->mgr = mgr;
tmp_tree->get_compare_fun_from_db = get_compare_fun_from_db;
tmp_tree->compare_fun = compare_fun;
tmp_tree->lock_escalation_allowed = TRUE;
r = toku_rt_create(&tmp_tree->borderwrite, toku_lt_point_cmp, lt_txn_cmp, FALSE,
toku_ltm_incr_lock_memory, toku_ltm_decr_lock_memory, mgr);
@ -1384,12 +1338,7 @@ toku_lt_create(toku_lock_tree** ptree,
r = toku_rth_create(&tmp_tree->rth);
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->txns_to_unlock);
if (r != 0)
goto cleanup;
r = toku_rth_create(&tmp_tree->txns_still_locked);
if (r != 0)
goto cleanup;
tmp_tree->buflen = __toku_default_buflen;
tmp_tree->buf = (toku_range*)
toku_malloc(tmp_tree->buflen * sizeof(toku_range));
@ -1404,7 +1353,7 @@ toku_lt_create(toku_lock_tree** ptree,
if (r != 0)
goto cleanup;
toku_lock_request_tree_init(tmp_tree);
toku_mutex_init(&tmp_tree->mutex, NULL);
tmp_tree->ref_count = 1;
*ptree = tmp_tree;
r = 0;
@ -1415,8 +1364,6 @@ cleanup:
toku_rt_close(tmp_tree->borderwrite);
if (tmp_tree->rth)
toku_rth_close(tmp_tree->rth);
if (tmp_tree->txns_to_unlock)
toku_rth_close(tmp_tree->txns_to_unlock);
if (tmp_tree->buf)
toku_free(tmp_tree->buf);
if (tmp_tree->bw_buf)
@ -1433,13 +1380,14 @@ void
toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id) {
assert(mgr && dict_id.dictid != DICTIONARY_ID_NONE.dictid);
toku_lt_map* map = NULL;
toku_mutex_lock(&mgr->mutex);
map = toku_idlth_find(mgr->idlth, dict_id);
if (map) {
toku_idlth_delete(mgr->idlth, dict_id);
}
toku_mutex_unlock(&mgr->mutex);
}
static inline void
toku_lt_set_dict_id(toku_lock_tree* lt, DICTIONARY_ID dict_id) {
assert(lt && dict_id.dictid != DICTIONARY_ID_NONE.dictid);
@ -1450,8 +1398,7 @@ static void lt_add_db(toku_lock_tree* tree, DB *db);
static void lt_remove_db(toku_lock_tree* tree, DB *db);
int
toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
DICTIONARY_ID dict_id, DB *db) {
toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *db, toku_dbt_cmp compare_fun) {
/* first look in hash table to see if lock tree exists for that db,
if so return it */
int r = ENOSYS;
@ -1461,6 +1408,7 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
bool added_to_idlth = FALSE;
bool added_extant_db = FALSE;
toku_mutex_lock(&mgr->mutex);
map = toku_idlth_find(mgr->idlth, dict_id);
if (map != NULL) {
/* Load already existing lock tree. */
@ -1473,7 +1421,7 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
goto cleanup;
}
/* Must create new lock tree for this dict_id*/
r = toku_lt_create(&tree, mgr->panic, mgr, mgr->get_compare_fun_from_db);
r = toku_lt_create(&tree, mgr, compare_fun);
if (r != 0)
goto cleanup;
toku_lt_set_dict_id(tree, dict_id);
@ -1500,6 +1448,7 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
*ptree = tree;
r = 0;
cleanup:
toku_mutex_unlock(&mgr->mutex);
if (r == 0) {
mgr->STATUS_VALUE(LTM_LT_CREATE)++;
mgr->STATUS_VALUE(LTM_LT_NUM)++;
@ -1508,6 +1457,7 @@ cleanup:
}
else {
if (tree != NULL) {
toku_mutex_lock(&mgr->mutex);
if (added_to_ltm)
toku_ltm_remove_lt(mgr, tree);
if (added_to_idlth)
@ -1515,6 +1465,7 @@ cleanup:
if (added_extant_db)
lt_remove_db(tree, db);
toku_lt_close(tree);
toku_mutex_unlock(&mgr->mutex);
}
mgr->STATUS_VALUE(LTM_LT_CREATE_FAIL)++;
}
@ -1547,10 +1498,8 @@ toku_lt_close(toku_lock_tree* tree) {
first_error = r;
}
toku_rth_close(tree->rth);
toku_rth_close(tree->txns_to_unlock);
toku_rth_close(tree->txns_still_locked);
toku_omt_destroy(&tree->dbs);
toku_mutex_destroy(&tree->mutex);
toku_free(tree->buf);
toku_free(tree->bw_buf);
toku_free(tree->verify_buf);
@ -1560,13 +1509,6 @@ cleanup:
return r;
}
// toku_lt_acquire_read_lock() used only by test programs
int
toku_lt_acquire_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key) {
return toku_lt_acquire_range_read_lock(tree, db, txn, key, key);
}
static int
lt_try_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
int r;
@ -1871,8 +1813,8 @@ cleanup:
return r;
}
int
toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
static int
toku_lt_acquire_range_read_lock_internal(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
int r = ENOSYS;
r = lt_try_acquire_range_read_lock(tree, db, txn,
@ -1904,6 +1846,24 @@ toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const D
return r;
}
int
toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT *key_right) {
int r = 0;
if (!tree || !db || !key_left || !key_right)
r = EINVAL;
if (r == 0) {
toku_mutex_lock(&tree->mutex);
r = toku_lt_acquire_range_read_lock_internal(tree, db, txn, key_left, key_right);
toku_mutex_unlock(&tree->mutex);
}
return r;
}
int
toku_lt_acquire_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key) {
return toku_lt_acquire_range_read_lock(tree, db, txn, key, key);
}
static int
lt_try_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
int r;
@ -1990,8 +1950,8 @@ cleanup:
return r;
}
int
toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
static int
toku_lt_acquire_range_write_lock_internal(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
int r = ENOSYS;
r = lt_try_acquire_range_write_lock(tree, db, txn, key_left, key_right);
@ -2021,6 +1981,19 @@ toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const
return r;
}
int
toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key_left, const DBT* key_right) {
int r = 0;
if (!tree || !db || !key_left || !key_right)
r = EINVAL;
if (r == 0) {
toku_mutex_lock(&tree->mutex);
r = toku_lt_acquire_range_write_lock_internal(tree, db, txn, key_left, key_right);
toku_mutex_unlock(&tree->mutex);
}
return r;
}
int
toku_lt_acquire_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const DBT* key) {
return toku_lt_acquire_range_write_lock(tree, db, txn, key, key);
@ -2130,24 +2103,6 @@ lt_border_delete(toku_lock_tree* tree, toku_range_tree* rt) {
return 0;
}
static inline int
lt_defer_unlocking_txn(toku_lock_tree* tree, TXNID txnid) {
int r = ENOSYS;
rt_forest* forest = toku_rth_find(tree->txns_to_unlock, txnid);
/* Should not be unlocking a transaction twice. */
assert(!forest);
r = toku_rth_insert(tree->txns_to_unlock, txnid);
if (r != 0)
goto cleanup;
if (toku_rth_find(tree->txns_still_locked, txnid) != NULL) {
toku_rth_delete(tree->txns_still_locked, txnid);
}
r = 0;
cleanup:
return r;
}
static inline int
lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
if (!tree)
@ -2169,7 +2124,17 @@ lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
if (selfwrite) {
uint32_t size = toku_rt_get_size(selfwrite);
ranges += size;
DB *db = NULL;
if (toku_omt_size(tree->dbs) > 0) {
OMTVALUE dbv;
r = toku_omt_fetch(tree->dbs, 0, &dbv);
assert_zero(r);
db = dbv;
}
lt_set_comparison_functions(tree, db);
r = lt_border_delete(tree, selfwrite);
lt_clear_comparison_functions(tree);
if (r != 0)
return lt_panic(tree, r);
r = lt_free_contents(tree, selfwrite);
@ -2185,69 +2150,22 @@ lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
return 0;
}
static inline int
lt_unlock_deferred_txns(toku_lock_tree* tree) {
int r = ENOSYS;
toku_rth_start_scan(tree->txns_to_unlock);
rt_forest* forest = NULL;
while ((forest = toku_rth_next(tree->txns_to_unlock)) != NULL) {
/* This can only fail with a panic so it is fine to quit immediately. */
r = lt_unlock_txn(tree, forest->hash_key);
if (r != 0)
goto cleanup;
}
toku_rth_clear(tree->txns_to_unlock);
r = 0;
cleanup:
return r;
}
static inline void
lt_clear(toku_lock_tree* tree) {
int r;
assert(tree);
toku_rt_clear(tree->borderwrite);
toku_rth_start_scan(tree->rth);
rt_forest* forest;
uint32_t ranges = 0;
while ((forest = toku_rth_next(tree->rth)) != NULL) {
if (forest->self_read) {
ranges += toku_rt_get_size(forest->self_read);
r = lt_free_contents(tree, forest->self_read);
assert_zero(r);
}
if (forest->self_write) {
ranges += toku_rt_get_size(forest->self_write);
r = lt_free_contents(tree, forest->self_write);
assert_zero(r);
}
}
toku_rth_clear(tree->rth);
toku_rth_clear(tree->txns_to_unlock);
/* tree->txns_still_locked is already empty, so we do not clear it. */
ltm_decr_locks(tree->mgr, ranges);
}
static void lt_retry_lock_requests(toku_lock_tree *tree);
int
toku_lt_unlock(toku_lock_tree* tree, TXNID txn) {
int r = ENOSYS;
if (!tree) {
r = EINVAL; goto cleanup;
}
toku_lt_unlock_txn(toku_lock_tree* tree, TXNID txn) {
#if TOKU_LT_DEBUG
if (toku_lt_debug)
printf("%s:%u %lu\n", __FUNCTION__, __LINE__, txn);
#endif
r = lt_defer_unlocking_txn(tree, txn);
if (r != 0)
goto cleanup;
if (toku_rth_is_empty(tree->txns_still_locked))
lt_clear(tree);
toku_lt_retry_lock_requests_locked(tree);
r = 0;
int r = 0;
if (!tree) {
r = EINVAL; goto cleanup;
}
toku_mutex_lock(&tree->mutex);
lt_unlock_txn(tree, txn);
lt_retry_lock_requests(tree);
toku_mutex_unlock(&tree->mutex);
cleanup:
return r;
}
@ -2256,16 +2174,18 @@ void
toku_lt_add_ref(toku_lock_tree* tree) {
assert(tree);
assert(tree->ref_count > 0);
tree->ref_count++;
(void) __sync_add_and_fetch(&tree->ref_count, 1);
}
static void
toku_ltm_stop_managing_lt(toku_ltm* mgr, toku_lock_tree* tree) {
toku_mutex_lock(&mgr->mutex);
toku_ltm_remove_lt(mgr, tree);
toku_lt_map* map = toku_idlth_find(mgr->idlth, tree->dict_id);
if (map && map->tree == tree) {
toku_idlth_delete(mgr->idlth, tree->dict_id);
}
toku_mutex_unlock(&mgr->mutex);
}
int
@ -2273,8 +2193,8 @@ toku_lt_remove_ref(toku_lock_tree* tree) {
int r = ENOSYS;
assert(tree);
assert(tree->ref_count > 0);
tree->ref_count--;
if (tree->ref_count > 0) {
uint32_t ref_count = __sync_sub_and_fetch(&tree->ref_count, 1);
if (ref_count > 0) {
r = 0; goto cleanup;
}
assert(tree->dict_id.dictid != DICTIONARY_ID_NONE.dictid);
@ -2302,6 +2222,7 @@ find_db (OMTVALUE v, void *dbv) {
static void
lt_add_db(toku_lock_tree* tree, DB *db) {
toku_mutex_lock(&tree->mutex);
if (db != NULL) {
int r;
OMTVALUE get_dbv = NULL;
@ -2311,10 +2232,12 @@ lt_add_db(toku_lock_tree* tree, DB *db) {
r = toku_omt_insert_at(tree->dbs, db, index);
assert_zero(r);
}
toku_mutex_unlock(&tree->mutex);
}
static void
lt_remove_db(toku_lock_tree* tree, DB *db) {
toku_mutex_lock(&tree->mutex);
if (db != NULL) {
int r;
OMTVALUE get_dbv = NULL;
@ -2325,13 +2248,13 @@ lt_remove_db(toku_lock_tree* tree, DB *db) {
r = toku_omt_delete_at(tree->dbs, index);
assert_zero(r);
}
toku_mutex_unlock(&tree->mutex);
}
void
toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db) {
int r;
lt_remove_db(tree, db);
r = toku_lt_remove_ref(tree);
int r = toku_lt_remove_ref(tree);
assert_zero(r);
}
@ -2384,8 +2307,12 @@ toku_lock_request_init(toku_lock_request *lock_request, DB *db, TXNID txnid, con
void
toku_lock_request_destroy(toku_lock_request *lock_request) {
if (lock_request->state == LOCK_REQUEST_PENDING)
if (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_tree *tree = lock_request->tree;
toku_mutex_lock(&tree->mutex);
toku_lock_request_tree_delete(lock_request->tree, lock_request);
toku_mutex_unlock(&tree->mutex);
}
toku_lock_request_destroy_wait(lock_request);
toku_free(lock_request->key_left_copy.data);
toku_free(lock_request->key_right_copy.data);
@ -2399,8 +2326,8 @@ toku_lock_request_complete(toku_lock_request *lock_request, int complete_r) {
static const struct timeval max_timeval = { ~0, 0 };
int
toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time) {
static int
toku_lock_request_wait_internal(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time, bool tree_locked) {
#if TOKU_LT_DEBUG
if (toku_lt_debug)
printf("%s:%u %lu\n", __FUNCTION__, __LINE__, lock_request->txnid);
@ -2414,28 +2341,37 @@ toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, st
long int d_sec = usec / 1000000;
long int d_usec = usec % 1000000;
struct timespec ts = { sec + d_sec, d_usec * 1000 };
if (!tree_locked) toku_mutex_lock(&tree->mutex);
while (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_init_wait(lock_request);
r = pthread_cond_timedwait(&lock_request->wait, toku_ltm_get_mutex(tree->mgr), &ts);
r = pthread_cond_timedwait(&lock_request->wait, &tree->mutex, &ts);
assert(r == 0 || r == ETIMEDOUT);
if (r == ETIMEDOUT && lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_tree_delete(tree, lock_request);
toku_lock_request_complete(lock_request, DB_LOCK_NOTGRANTED);
}
}
if (!tree_locked) toku_mutex_unlock(&tree->mutex);
} else {
if (!tree_locked) toku_mutex_lock(&tree->mutex);
while (lock_request->state == LOCK_REQUEST_PENDING) {
toku_lock_request_init_wait(lock_request);
r = toku_pthread_cond_wait(&lock_request->wait, toku_ltm_get_mutex(tree->mgr)); assert_zero(r);
r = toku_pthread_cond_wait(&lock_request->wait, &tree->mutex); assert_zero(r);
}
if (!tree_locked) toku_mutex_unlock(&tree->mutex);
}
assert(lock_request->state == LOCK_REQUEST_COMPLETE);
return lock_request->complete_r;
}
int
toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time) {
return toku_lock_request_wait_internal(lock_request, tree, wait_time, false);
}
int
toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree) {
return toku_lock_request_wait(lock_request, tree, &tree->mgr->lock_wait_time);
return toku_lock_request_wait_internal(lock_request, tree, &tree->mgr->lock_wait_time, false);
}
void
@ -2445,12 +2381,16 @@ toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *tree U
}
}
void
// a lock request tree contains pending lock requests.
// initialize a lock request tree.
static void
toku_lock_request_tree_init(toku_lock_tree *tree) {
int r = toku_omt_create(&tree->lock_requests); assert_zero(r);
}
void
// destroy a lock request tree.
// the tree must be empty when destroyed.
static void
toku_lock_request_tree_destroy(toku_lock_tree *tree) {
assert(toku_omt_size(tree->lock_requests) == 0);
toku_omt_destroy(&tree->lock_requests);
@ -2467,7 +2407,8 @@ compare_lock_request(OMTVALUE a, void *b) {
return 0;
}
void
// insert a lock request into the tree.
static void
toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request) {
lock_request->tree = tree;
int r;
@ -2477,7 +2418,8 @@ toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_requ
r = toku_omt_insert_at(tree->lock_requests, lock_request, idx); assert_zero(r);
}
void
// delete a lock request from the tree.
static void
toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request) {
int r;
OMTVALUE v;
@ -2488,7 +2430,8 @@ toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_requ
}
}
toku_lock_request *
// find a lock request for a given transaction id.
static toku_lock_request *
toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id) {
int r;
OMTVALUE v;
@ -2531,14 +2474,16 @@ static void print_key(const char *sp, const DBT *k) {
}
#endif
int
static void toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request);
static int
toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted) {
int r;
assert(lock_request->state == LOCK_REQUEST_INIT);
if (lock_request->type == LOCK_REQUEST_READ) {
r = toku_lt_acquire_range_read_lock(tree, lock_request->db, lock_request->txnid, lock_request->key_left, lock_request->key_right);
r = toku_lt_acquire_range_read_lock_internal(tree, lock_request->db, lock_request->txnid, lock_request->key_left, lock_request->key_right);
} else if (lock_request->type == LOCK_REQUEST_WRITE) {
r = toku_lt_acquire_range_write_lock(tree, lock_request->db, lock_request->txnid, lock_request->key_left, lock_request->key_right);
r = toku_lt_acquire_range_write_lock_internal(tree, lock_request->db, lock_request->txnid, lock_request->key_left, lock_request->key_right);
} else
assert(0);
#if TOKU_LT_DEBUG
@ -2572,43 +2517,36 @@ toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *
int
toku_lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted) {
toku_ltm_lock_mutex(tree->mgr);
toku_mutex_lock(&tree->mutex);
int r = toku_lock_request_start_locked(lock_request, tree, copy_keys_if_not_granted);
toku_ltm_unlock_mutex(tree->mgr);
toku_mutex_unlock(&tree->mutex);
return r;
}
int
static int
toku_lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) {
int r = toku_lock_request_start_locked(lock_request, tree, false);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait(lock_request, tree, wait_time);
r = toku_lock_request_wait_internal(lock_request, tree, wait_time, true);
return r;
}
int
toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time) {
toku_ltm_lock_mutex(tree->mgr);
toku_mutex_lock(&tree->mutex);
int r = toku_lt_acquire_lock_request_with_timeout_locked(tree, lock_request, wait_time);
toku_ltm_unlock_mutex(tree->mgr);
toku_mutex_unlock(&tree->mutex);
return r;
}
int
toku_lt_acquire_lock_request_with_default_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request) {
return toku_lt_acquire_lock_request_with_timeout_locked(tree, lock_request, &tree->mgr->lock_wait_time);
}
int
toku_lt_acquire_lock_request_with_default_timeout(toku_lock_tree *tree, toku_lock_request *lock_request) {
toku_ltm_lock_mutex(tree->mgr);
int r = toku_lt_acquire_lock_request_with_timeout_locked(tree, lock_request, &tree->mgr->lock_wait_time);
toku_ltm_unlock_mutex(tree->mgr);
int r = toku_lt_acquire_lock_request_with_timeout(tree, lock_request, &tree->mgr->lock_wait_time);
return r;
}
void
toku_lt_retry_lock_requests_locked(toku_lock_tree *tree) {
static void
lt_retry_lock_requests(toku_lock_tree *tree) {
int r;
for (uint32_t i = 0; i < toku_omt_size(tree->lock_requests); ) {
OMTVALUE v;
@ -2627,6 +2565,13 @@ toku_lt_retry_lock_requests_locked(toku_lock_tree *tree) {
}
}
void
toku_lt_retry_lock_requests(toku_lock_tree *tree) {
toku_mutex_lock(&tree->mutex);
lt_retry_lock_requests(tree);
toku_mutex_unlock(&tree->mutex);
}
#include <stdbool.h>
#include "wfg.h"
@ -2653,7 +2598,7 @@ build_wfg_for_a_lock_request(toku_lock_tree *tree, struct wfg *wfg, toku_lock_re
}
// check if a given lock request could deadlock with any granted locks.
void
static void
toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *a_lock_request) {
// init the wfg
struct wfg wfg_static;
@ -2821,9 +2766,11 @@ lt_verify(toku_lock_tree *lt) {
void
toku_lt_verify(toku_lock_tree *lt, DB *db) {
toku_mutex_lock(&lt->mutex);
lt_set_comparison_functions(lt, db);
lt_verify(lt);
lt_clear_comparison_functions(lt);
toku_mutex_unlock(&lt->mutex);
}
#undef STATUS_VALUE

View file

@ -59,26 +59,26 @@ typedef struct __toku_ltm toku_ltm;
/** \brief The lock tree structure */
struct __toku_lock_tree {
/** Lock tree manager */
toku_ltm* mgr;
/** The database for which this locktree will be handling locks */
DB* db;
toku_range_tree* borderwrite; /**< See design document */
toku_rth* rth; /**< Stores local(read|write)set tables */
/**
Stores a list of transactions to unlock when it is safe.
When we get a PUT or a GET, the comparison function is valid
and we can delete locks held in txns_to_unlock, even if txns_still_locked
is nonempty.
*/
toku_rth* txns_to_unlock;
/** Stores a list of transactions that hold locks. txns_still_locked = rth - txns_to_unlock
rth != txns_still_locked + txns_to_unlock, we may get an unlock call for a txn that has
no locks in rth.
When txns_still_locked becomes empty, we can throw away the contents of the lock tree
quickly. */
toku_rth* txns_still_locked;
/** Whether lock escalation is allowed. */
bool lock_escalation_allowed;
/** Function to retrieve the key compare function from the database. */
toku_dbt_cmp compare_fun;
/** The number of references held by DB instances and transactions to this lock tree*/
uint32_t ref_count;
/** DICTIONARY_ID associated with the lock tree */
DICTIONARY_ID dict_id;
OMT dbs; //The extant dbs using this lock tree.
OMT lock_requests;
toku_pthread_mutex_t mutex;
/** A temporary area where we store the results of various find on
the range trees that this lock tree owns
Memory ownership:
- tree->buf is an array of toku_range's, which the lt owns
The contents of tree->buf are volatile (this is a buffer space
@ -98,26 +98,8 @@ struct __toku_lock_tree {
uint32_t bw_buflen;
toku_range* verify_buf;
uint32_t verify_buflen;
/** Whether lock escalation is allowed. */
bool lock_escalation_allowed;
/** Lock tree manager */
toku_ltm* mgr;
/** Function to retrieve the key compare function from the database. */
toku_dbt_cmp (*get_compare_fun_from_db)(DB*);
/** The key compare function */
toku_dbt_cmp compare_fun;
/** The panic function */
int (*panic)(DB*, int);
/** The number of references held by DB instances and transactions to this lock tree*/
uint32_t ref_count;
/** DICTIONARY_ID associated with the lock tree */
DICTIONARY_ID dict_id;
OMT dbs; //The extant dbs using this lock tree.
OMT lock_requests;
};
typedef enum {
LTM_LOCKS_LIMIT, // number of locks allowed (obsolete)
LTM_LOCKS_CURR, // number of locks in existence
@ -162,13 +144,10 @@ struct __toku_ltm {
is retrieved from this list, otherwise, a new lock tree is created
and the new mapping of DB and Lock tree is stored here */
toku_idlth* idlth;
/** Function to retrieve the key compare function from the database. */
toku_dbt_cmp (*get_compare_fun_from_db)(DB*);
/** The panic function */
int (*panic)(DB*, int);
toku_pthread_mutex_t lock;
toku_pthread_mutex_t *use_lock;
toku_pthread_mutex_t mutex;
struct timeval lock_wait_time;
};
@ -203,10 +182,6 @@ typedef struct __toku_point toku_point;
Create a lock tree. Should be called only inside DB->open.
\param ptree We set *ptree to the newly allocated tree.
\param get_compare_fun_from_db Accessor for the key compare function.
\param panic The function to cause the db to panic.
i.e., godzilla_rampage()
\param payload_capacity The maximum amount of memory to use for dbt payloads.
\return
- 0 Success
@ -222,17 +197,8 @@ typedef struct __toku_point toku_point;
instead.
*/
int toku_lt_create(toku_lock_tree** ptree,
int (*panic)(DB*, int),
toku_ltm* mgr,
toku_dbt_cmp (*get_compare_fun_from_db)(DB*));
/**
Gets a lock tree for a given DB with id dict_id
*/
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
DICTIONARY_ID dict_id, DB *db);
void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id);
toku_dbt_cmp compare_fun);
/**
Closes and frees a lock tree.
@ -398,11 +364,28 @@ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn,
- EINVAL If (tree == NULL || txn == NULL).
- EINVAL If panicking.
*/
int toku_lt_unlock(toku_lock_tree* tree, TXNID txn);
int toku_lt_unlock_txn(toku_lock_tree* tree, TXNID txn);
void toku_lt_retry_lock_requests(toku_lock_tree *tree);
void toku_lt_add_ref(toku_lock_tree* tree);
int toku_lt_remove_ref(toku_lock_tree* tree);
void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db);
toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
void toku_lt_verify(toku_lock_tree *tree, DB *db);
int toku_lt_point_cmp(const toku_point* x, const toku_point* y);
/* Lock tree manager functions begin here */
/**
Creates a lock tree manager..
Creates a lock tree manager.
\param pmgr A buffer for the new lock tree manager.
\param locks_limit The maximum number of locks.
@ -415,8 +398,10 @@ int toku_lt_unlock(toku_lock_tree* tree, TXNID txn);
int toku_ltm_create(toku_ltm** pmgr,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*));
int (*panic)(DB*, int));
/** Open the lock tree manager */
int toku_ltm_open(toku_ltm *mgr);
/**
Closes and frees a lock tree manager..
@ -444,31 +429,30 @@ int toku_ltm_close(toku_ltm* mgr);
*/
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit);
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit);
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit);
void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s);
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit);
// set the default lock timeout. units are milliseconds
void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec);
// get the default lock timeout
void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec);
/**
Gets a lock tree for a given DB with id dict_id
*/
int toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree, DICTIONARY_ID dict_id, DB *dbp, toku_dbt_cmp compare_fun);
void toku_ltm_invalidate_lt(toku_ltm* mgr, DICTIONARY_ID dict_id);
void toku_ltm_incr_lock_memory(void *extra, size_t s);
void toku_ltm_decr_lock_memory(void *extra, size_t s);
void toku_lt_add_ref(toku_lock_tree* tree);
int toku_lt_remove_ref(toku_lock_tree* tree);
void toku_lt_remove_db_ref(toku_lock_tree* tree, DB *db);
int toku_lt_point_cmp(const toku_point* x, const toku_point* y);
toku_range_tree* toku_lt_ifexist_selfread(toku_lock_tree* tree, TXNID txn);
toku_range_tree* toku_lt_ifexist_selfwrite(toku_lock_tree* tree, TXNID txn);
void toku_lt_verify(toku_lock_tree *tree, DB *db);
typedef enum {
LOCK_REQUEST_INIT = 0,
LOCK_REQUEST_PENDING = 1,
@ -524,17 +508,10 @@ void toku_lock_request_destroy(toku_lock_request *lock_request);
// returns 0 (success), DB_LOCK_NOTGRANTED, DB_LOCK_DEADLOCK
int toku_lock_request_start(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted);
// try to acquire a lock described by a lock request.
// if the lock is not granted and copy_keys_if_not_granted is true, then make a copy of the keys in the key range.
// this is necessary when used in the ydb cursor callbacks where the keys are only valid when in the callback function.
// called with the lock tree already locked.
int toku_lock_request_start_locked(toku_lock_request *lock_request, toku_lock_tree *tree, bool copy_keys_if_not_granted);
// sleep on the lock request until it becomes resolved or the wait time occurs.
// if the wait time is not specified, then wait for as long as it takes.
int toku_lock_request_wait(toku_lock_request *lock_request, toku_lock_tree *tree, struct timeval *wait_time);
// use the default timeouts set in the ltm
int toku_lock_request_wait_with_default_timeout(toku_lock_request *lock_request, toku_lock_tree *tree);
// wakeup any threads that are waiting on a lock request.
@ -543,46 +520,14 @@ void toku_lock_request_wakeup(toku_lock_request *lock_request, toku_lock_tree *t
// returns the lock request state
toku_lock_request_state toku_lock_request_get_state(toku_lock_request *lock_request);
// a lock request tree contains pending lock requests.
// initialize a lock request tree.
void toku_lock_request_tree_init(toku_lock_tree *tree);
// destroy a lock request tree.
// the tree must be empty when destroyed.
void toku_lock_request_tree_destroy(toku_lock_tree *tree);
// insert a lock request into the tree.
void toku_lock_request_tree_insert(toku_lock_tree *tree, toku_lock_request *lock_request);
// delete a lock request from the tree.
void toku_lock_request_tree_delete(toku_lock_tree *tree, toku_lock_request *lock_request);
// find a lock request for a given transaction id.
toku_lock_request *toku_lock_request_tree_find(toku_lock_tree *tree, TXNID id);
// retry all pending lock requests.
// for all lock requests, if the lock request is resolved, then wakeup any threads waiting on the lock request.
// called with the lock tree already locked.
void toku_lt_retry_lock_requests_locked(toku_lock_tree *tree);
// try to acquire a lock described by a lock request. if the lock is granted then return success.
// otherwise wait on the lock request until the lock request is resolved (either granted or
// deadlocks), or the given timer has expired.
// returns 0 (success), DB_LOCK_NOTGRANTED
int toku_lt_acquire_lock_request_with_timeout(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time);
// called with the lock tree already locked
int toku_lt_acquire_lock_request_with_timeout_locked(toku_lock_tree *tree, toku_lock_request *lock_request, struct timeval *wait_time);
// call acquire_lock_request_with_timeout with the default lock wait timeout
int toku_lt_acquire_lock_request_with_default_timeout(toku_lock_tree *tree, toku_lock_request *lock_request);
// called with the lock tree already locked
int toku_lt_acquire_lock_request_with_default_timeout_locked (toku_lock_tree *tree, toku_lock_request *lock_request);
// check if a given lock request could deadlock with any granted locks.
void toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *lock_request);
#include "txnid_set.h"
// internal function that finds all transactions that conflict with a given lock request
@ -595,23 +540,6 @@ void toku_lt_check_deadlock(toku_lock_tree *tree, toku_lock_request *lock_reques
// returns an error code (0 == success)
int toku_lt_get_lock_request_conflicts(toku_lock_tree *tree, toku_lock_request *lock_request, txnid_set *conflicts);
// set the ltm mutex (used to override the internal mutex) and use a user supplied mutex instead to protect the
// lock tree). the first use is to use the ydb mutex to protect the lock tree. eventually, the ydb code will
// be refactored to use the ltm mutex instead.
void toku_ltm_set_mutex(toku_ltm *ltm, toku_pthread_mutex_t *use_lock);
// lock the lock tree
void toku_ltm_lock_mutex(toku_ltm *mgr);
// unlock the lock tree
void toku_ltm_unlock_mutex(toku_ltm *mgr);
// set the default lock timeout. units are milliseconds
void toku_ltm_set_lock_wait_time(toku_ltm *mgr, uint64_t lock_wait_time_msec);
// get the default lock timeout
void toku_ltm_get_lock_wait_time(toku_ltm *mgr, uint64_t *lock_wait_time_msec);
#if defined(__cplusplus)
}
#endif

View file

@ -38,11 +38,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DB *db_a = (DB *) 2;
@ -55,7 +55,7 @@ int main(int argc, const char *argv[]) {
}
// release the locks
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -47,7 +47,7 @@ struct test_arg {
uint64_t iterations;
};
static void runtest(DB *db, TXNID txn, toku_ltm *ltm, toku_lock_tree *lt, uint64_t locks_per_txn, uint64_t nrows, uint64_t iterations) {
static void runtest(DB *db, TXNID txn, toku_ltm *ltm UU(), toku_lock_tree *lt, uint64_t locks_per_txn, uint64_t nrows, uint64_t iterations) {
int r;
uint64_t notgranted = 0, deadlocked = 0;
@ -60,9 +60,7 @@ static void runtest(DB *db, TXNID txn, toku_ltm *ltm, toku_lock_tree *lt, uint64
DBT key = { .data = &keys[i], .size = sizeof keys[i] };
toku_lock_request lr;
toku_lock_request_init(&lr, db, txn, &key, &key, LOCK_REQUEST_WRITE);
toku_ltm_lock_mutex(ltm);
r = toku_lt_acquire_lock_request_with_default_timeout_locked(lt, &lr);
toku_ltm_unlock_mutex(ltm);
r = toku_lt_acquire_lock_request_with_default_timeout(lt, &lr);
if (r == 0) {
get_lock(keys[i], txn);
continue;
@ -80,9 +78,7 @@ static void runtest(DB *db, TXNID txn, toku_ltm *ltm, toku_lock_tree *lt, uint64
// usleep(random() % 1000);
release_locks(keys, i, txn);
toku_ltm_lock_mutex(ltm);
r = toku_lt_unlock(lt, txn); assert(r == 0);
toku_ltm_unlock_mutex(ltm);
r = toku_lt_unlock_txn(lt, txn); assert(r == 0);
if ((iter % 10000) == 0)
printf("%lu %lu %lu\n", (long unsigned) iter, (long unsigned) notgranted, (long unsigned) deadlocked);
@ -143,11 +139,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DB *fake_db = (DB *) 1;

View file

@ -33,12 +33,6 @@ static inline int dbcmp (DB *db __attribute__((__unused__)), const DBT *a, const
return toku_keycompare(a->data, a->size, b->data, b->size);
}
toku_dbt_cmp compare_fun = dbcmp;
static inline toku_dbt_cmp get_compare_fun_from_db(__attribute__((unused)) DB* db) {
return compare_fun;
}
bool panicked = false;
static inline int dbpanic(DB* db, int r) {

View file

@ -7,13 +7,11 @@ int main(void) {
uint32_t max_locks = 1000;
uint64_t max_lock_memory = max_locks*64;
r = toku_ltm_create(&mgr, max_locks, max_lock_memory, dbpanic,
get_compare_fun_from_db);
r = toku_ltm_create(&mgr, max_locks, max_lock_memory, dbpanic);
CKERR(r);
{
r = toku_lt_create(&lt, dbpanic, mgr,
get_compare_fun_from_db);
r = toku_lt_create(&lt, mgr, dbcmp);
CKERR(r);
assert(lt);
r = toku_lt_close(lt);

View file

@ -22,7 +22,7 @@ static void do_range_test(int (*acquire)(toku_lock_tree*, DB*, TXNID,
DBT* key_l = &_key_l;
DBT* key_r = &_key_r;
{
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
@ -64,11 +64,11 @@ static void do_point_test(int (*acquire)(toku_lock_tree*, DB*, TXNID,
/* Point read tests. */
key = &_key;
{
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
r = toku_lt_unlock(NULL, (TXNID)1);
r = toku_lt_unlock_txn(NULL, (TXNID)1);
CKERR2(r, EINVAL);
r = acquire(NULL, db, txn, key);
@ -91,18 +91,18 @@ int main(int argc, const char *argv[]) {
int r;
toku_lock_tree* lt = NULL;
r = toku_ltm_create(NULL, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(NULL, max_locks, max_lock_memory, dbpanic);
CKERR2(r, EINVAL);
assert(ltm == NULL);
r = toku_ltm_create(&ltm, 0, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, 0, max_lock_memory, dbpanic);
CKERR2(r, EINVAL);
assert(ltm == NULL);
r = toku_ltm_create(&ltm, max_locks, 0, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, 0, dbpanic);
CKERR2(r, EINVAL);
assert(ltm == NULL);
/* Actually create it. */
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
@ -144,17 +144,12 @@ int main(int argc, const char *argv[]) {
/* create tests. */
{
r = toku_lt_create(NULL, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(NULL, ltm, dbcmp);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, NULL, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, NULL, dbcmp);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, NULL, get_compare_fun_from_db);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm, NULL);
CKERR2(r, EINVAL);
}
/* Close tests. */

View file

@ -42,17 +42,17 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
}
static void close_tree(void) {
r = toku_lt_unlock(lt, txn); CKERR(r);
r = toku_lt_unlock_txn(lt, txn); CKERR(r);
assert(lt && ltm);
r = toku_lt_close(lt); CKERR(r);
r = toku_ltm_close(ltm); CKERR(r);
@ -103,22 +103,9 @@ static void setup_payload_len(void** payload, uint32_t* len, int val) {
}
}
static void temporarily_fake_comparison_functions(void) {
assert(!lt->db && !lt->compare_fun);
lt->db = db;
lt->compare_fun = get_compare_fun_from_db(db);
}
static void stop_fake_comparison_functions(void) {
assert(lt->db && lt->compare_fun);
lt->db = NULL;
lt->compare_fun = NULL;
}
static void lt_find(toku_range_tree* rt,
unsigned k, int key_l, int key_r,
TXNID find_txn) {
temporarily_fake_comparison_functions();
r = toku_rt_find(rt, &query, 0, &buf, &buflen, &numfound);
CKERR(r);
assert(numfound==k);
@ -136,10 +123,9 @@ temporarily_fake_comparison_functions();
}
assert(false); //Crash since we didn't find it.
cleanup:
stop_fake_comparison_functions();
return;
}
static void insert_1(int key_l, int key_r,
const void* kl, const void* kr) {
DBT _key_left;

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -102,7 +102,7 @@ static void lt_insert_write(int r_expect, char txn, int key_l) {
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
retval = toku_lt_unlock_txn(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}

View file

@ -37,12 +37,12 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
//ask ltm for lock tree
DICTIONARY_ID dict_id = {0x1234};
r = toku_ltm_get_lt(ltm, &lt, dict_id, db);
r = toku_ltm_get_lt(ltm, &lt, dict_id, db, intcmp);
CKERR(r);
assert(lt);
@ -108,7 +108,7 @@ static void lt_insert_write(int r_expect, char txn, int key_l) {
}
static void lt_unlock(char ctxn) {
int retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn); CKERR(retval);
int retval = toku_lt_unlock_txn(lt, (TXNID) (size_t) ctxn); CKERR(retval);
}
static void run_escalation_test(void) {
@ -370,7 +370,6 @@ static void init_test(void) {
buflen = 64;
buf = (toku_range*) toku_malloc(buflen*sizeof(toku_range));
compare_fun = intcmp;
}
static void close_test(void) {

View file

@ -17,14 +17,14 @@ int nums[10000];
static void setup_ltm(void) {
assert(!ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
}
static void setup_tree(size_t index, DICTIONARY_ID dict_id) {
assert(!lt[index] && ltm);
r = toku_ltm_get_lt(ltm, &lt[index], dict_id, NULL);
r = toku_ltm_get_lt(ltm, &lt[index], dict_id, NULL, intcmp);
CKERR(r);
assert(lt[index]);
}
@ -67,7 +67,6 @@ static void run_test(void) {
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
compare_fun = intcmp;
r = system("rm -rf " TESTDIR);
CKERR(r);

View file

@ -20,7 +20,7 @@ int nums[10000];
static void setup_ltm(void) {
assert(!ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
}
@ -30,7 +30,7 @@ static void db_open_tree(size_t index, size_t db_id_index) {
(lt_refs[index] > 0 && lts[index]));
assert(ltm);
lt_refs[index]++;
r = toku_ltm_get_lt(ltm, &lts[index], dict_ids[db_id_index], NULL);
r = toku_ltm_get_lt(ltm, &lts[index], dict_ids[db_id_index], NULL, intcmp);
CKERR(r);
assert(lts[index]);
}
@ -136,7 +136,6 @@ static void close_test(void) {
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
compare_fun = intcmp;
r = system("rm -rf " TESTDIR);
CKERR(r);

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -87,7 +87,7 @@ static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r)
}
static void lt_unlock(TXNID txnid) {
r= toku_lt_unlock(lt, txnid); CKERR(r);
r= toku_lt_unlock_txn(lt, txnid); CKERR(r);
}
static void runtest(void) {

View file

@ -43,11 +43,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DBT key_l; dbt_init(&key_l, "L", 1);
@ -90,7 +90,7 @@ int main(int argc, const char *argv[]) {
assert(txnid_set_get(&conflicts, 1) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_PENDING);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &c_w_l, &conflicts);
@ -99,10 +99,10 @@ int main(int argc, const char *argv[]) {
assert(txnid_set_get(&conflicts, 0) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_COMPLETE && c_w_l.complete_r == 0);
toku_lock_request_destroy(&c_w_l);
r = toku_lt_unlock(lt, txn_c); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -43,11 +43,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DBT key_l; dbt_init(&key_l, "L", 1);
@ -91,7 +91,7 @@ int main(int argc, const char *argv[]) {
assert(txnid_set_get(&conflicts, 1) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_PENDING);
txnid_set_init(&conflicts);
r = toku_lt_get_lock_request_conflicts(lt, &c_w_l, &conflicts);
@ -100,10 +100,10 @@ int main(int argc, const char *argv[]) {
assert(txnid_set_get(&conflicts, 0) == txn_b);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
assert(c_w_l.state == LOCK_REQUEST_COMPLETE && c_w_l.complete_r == 0);
toku_lock_request_destroy(&c_w_l);
r = toku_lt_unlock(lt, txn_c); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -33,11 +33,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -68,10 +68,10 @@ int main(int argc, const char *argv[]) {
assert(txnid_set_get(&conflicts, 0) == txn_a);
txnid_set_destroy(&conflicts);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(b_r_l.state == LOCK_REQUEST_COMPLETE && b_r_l.complete_r == 0);
toku_lock_request_destroy(&b_r_l);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -33,11 +33,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DBT key_l; dbt_init(&key_l, "L", 1);
@ -70,8 +70,8 @@ int main(int argc, const char *argv[]) {
txnid_set_destroy(&conflicts);
toku_lock_request_destroy(&b_r_l);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -33,11 +33,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DBT key_l; dbt_init(&key_l, "L", 1);
@ -71,7 +71,7 @@ int main(int argc, const char *argv[]) {
toku_lock_request_destroy(&b_w_l);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -50,11 +50,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -68,7 +68,7 @@ int main(int argc, const char *argv[]) {
r = write_lock(lt, txn_b, "L");
assert(r == DB_LOCK_NOTGRANTED);
}
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -30,7 +30,7 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
uint64_t target_wait_time, the_wait_time;

View file

@ -93,7 +93,7 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
struct my_ltm_status s;
@ -104,7 +104,7 @@ int main(int argc, const char *argv[]) {
assert(s.curr_lock_memory == 0);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DB *db_a = (DB *) 2;
@ -135,7 +135,7 @@ int main(int argc, const char *argv[]) {
// release the locks
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
my_ltm_get_status(ltm, &s);
assert(s.curr_locks == 0);

View file

@ -95,7 +95,7 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
struct my_ltm_status s;
@ -106,7 +106,7 @@ int main(int argc, const char *argv[]) {
assert(s.curr_lock_memory == 0);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
DB *db_a = (DB *) 2;
@ -139,7 +139,7 @@ int main(int argc, const char *argv[]) {
// release the locks
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
my_ltm_get_status(ltm, &s);
assert(s.curr_locks == 0);

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -103,7 +103,7 @@ static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r)
}
static void lt_unlock(TXNID txnid) {
r = toku_lt_unlock(lt, txnid); CKERR(r);
r = toku_lt_unlock_txn(lt, txnid); CKERR(r);
}
static void runtest(void) {

View file

@ -50,11 +50,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -68,7 +68,7 @@ int main(int argc, const char *argv[]) {
r = write_lock(lt, txn_b, "L", &wait_time);
assert(r == DB_LOCK_NOTGRANTED);
}
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -20,7 +20,7 @@ int main(int argc, const char *argv[]) {
int r;
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, MAX_LOCKS, MAX_LOCK_MEMORY, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, MAX_LOCKS, MAX_LOCK_MEMORY, dbpanic);
CKERR(r);
do_ltm_status(ltm);
#if 0
@ -63,17 +63,17 @@ int main(int argc, const char *argv[]) {
/* create tests. */
{
r = toku_lt_create(NULL, dbpanic, ltm,
get_compare_fun_from_db,
dbcmp,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, NULL, ltm,
get_compare_fun_from_db,
dbcmp,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, NULL,
get_compare_fun_from_db,
dbcmp,
toku_malloc, toku_free, toku_realloc);
CKERR2(r, EINVAL);
@ -83,15 +83,15 @@ int main(int argc, const char *argv[]) {
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
dbcmp,
NULL, toku_free, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
dbcmp,
toku_malloc, NULL, toku_realloc);
CKERR2(r, EINVAL);
r = toku_lt_create(&lt, dbpanic, ltm,
get_compare_fun_from_db,
dbcmp,
toku_malloc, toku_free, NULL);
CKERR2(r, EINVAL);
}

View file

@ -49,11 +49,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -65,11 +65,11 @@ int main(int argc, const char *argv[]) {
const TXNID txn_c = 3;
r = read_lock(lt, txn_c, "L"); assert(r == DB_LOCK_NOTGRANTED);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
r = read_lock(lt, txn_b, "L"); assert(r == 0);
r = read_lock(lt, txn_c, "L"); assert(r == 0);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock(lt, txn_c); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -38,11 +38,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -62,13 +62,13 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&c_w_l, lt, false); assert(r != 0);
assert(c_w_l.state == LOCK_REQUEST_PENDING);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == 0);
assert(c_w_l.state == LOCK_REQUEST_COMPLETE && c_w_l.complete_r == TOKUDB_OUT_OF_LOCKS);
toku_lock_request_destroy(&b_w_l);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
toku_lock_request_destroy(&c_w_l);
r = toku_lt_unlock(lt, txn_c); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -37,11 +37,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -61,13 +61,13 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&c_w_l, lt, false); assert(r != 0);
assert(c_w_l.state == LOCK_REQUEST_PENDING);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == 0);
assert(c_w_l.state == LOCK_REQUEST_COMPLETE && c_w_l.complete_r == 0);;
toku_lock_request_destroy(&b_w_l);
toku_lock_request_destroy(&c_w_l);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock(lt, txn_c); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_c); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -1,84 +0,0 @@
// verify that a user supplied mutex works
// T(A) gets W(L)
// T(B) tries W(L), gets lock request blocked
// T(B) lock request W(L) times out
// T(A) releases locks
// T(B) releases locks
#include "test.h"
int
main(int argc, const char *argv[]) {
int r;
uint32_t max_locks = 2;
uint64_t max_lock_memory = 4096;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
if (verbose > 0) verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) {
if (verbose > 0) verbose--;
continue;
}
if (strcmp(argv[i], "--max_locks") == 0 && i+1 < argc) {
max_locks = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoi(argv[++i]);
continue;
}
assert(0);
}
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
assert(r == 0 && ltm);
toku_ltm_set_lock_wait_time(ltm, 5000);
toku_pthread_mutex_t my_mutex = TOKU_PTHREAD_MUTEX_INITIALIZER;
toku_ltm_set_mutex(ltm, &my_mutex);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
assert(r == 0 && lt);
const TXNID txn_a = 1;
const TXNID txn_b = 2;
DBT key_l; dbt_init(&key_l, "L", 1);
toku_lock_request a_w_l; toku_lock_request_init(&a_w_l, (DB *)1, txn_a, &key_l, &key_l, LOCK_REQUEST_WRITE);
toku_ltm_lock_mutex(ltm);
r = toku_lock_request_start_locked(&a_w_l, lt, false); assert(r == 0);
toku_ltm_unlock_mutex(ltm);
assert(a_w_l.state == LOCK_REQUEST_COMPLETE && a_w_l.complete_r == 0);
toku_lock_request b_w_l; toku_lock_request_init(&b_w_l, (DB *)1, txn_b, &key_l, &key_l, LOCK_REQUEST_WRITE);
toku_ltm_lock_mutex(ltm);
r = toku_lock_request_start_locked(&b_w_l, lt, false); assert(r != 0);
toku_ltm_unlock_mutex(ltm);
assert(b_w_l.state == LOCK_REQUEST_PENDING);
toku_ltm_lock_mutex(ltm);
r = toku_lock_request_wait_with_default_timeout(&b_w_l, lt);
toku_ltm_unlock_mutex(ltm);
assert(r == DB_LOCK_NOTGRANTED);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE);
toku_lock_request_destroy(&a_w_l);
toku_lock_request_destroy(&b_w_l);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);
r = toku_ltm_close(ltm); assert(r == 0);
return 0;
}

View file

@ -1,87 +0,0 @@
// T(A) gets W(L)
// T(B) tries W(L) with timeout, gets DB_LOCK_NOTGRANTED
// T(B) releases locks
#include "test.h"
static int read_lock(toku_ltm *ltm, toku_lock_tree *lt, TXNID txnid, char *k) {
DBT key; dbt_init(&key, k, strlen(k));
toku_lock_request lr;
toku_lock_request_init(&lr, (DB*)1, txnid, &key, &key, LOCK_REQUEST_READ);
toku_ltm_lock_mutex(ltm);
int r = toku_lt_acquire_lock_request_with_default_timeout_locked(lt, &lr);
toku_ltm_unlock_mutex(ltm);
toku_lock_request_destroy(&lr);
return r;
}
static int write_lock(toku_ltm *ltm, toku_lock_tree *lt, TXNID txnid, char *k) {
DBT key; dbt_init(&key, k, strlen(k));
toku_lock_request lr;
toku_lock_request_init(&lr, (DB*)1, txnid, &key, &key, LOCK_REQUEST_WRITE);
toku_ltm_lock_mutex(ltm);
int r = toku_lt_acquire_lock_request_with_default_timeout_locked(lt, &lr);
toku_ltm_unlock_mutex(ltm);
toku_lock_request_destroy(&lr);
return r;
}
int main(int argc, const char *argv[]) {
int r;
uint32_t max_locks = 1;
uint64_t max_lock_memory = 4096;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) {
if (verbose > 0) verbose--;
continue;
}
if (strcmp(argv[i], "--max_locks") == 0 && i+1 < argc) {
max_locks = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--max_lock_memory") == 0 && i+1 < argc) {
max_lock_memory = atoi(argv[++i]);
continue;
}
assert(0);
}
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
assert(r == 0 && ltm);
toku_pthread_mutex_t my_mutex = TOKU_PTHREAD_MUTEX_INITIALIZER;
toku_ltm_set_mutex(ltm, &my_mutex);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
assert(r == 0 && lt);
const TXNID txn_a = 1;
const TXNID txn_b = 2;
r = write_lock(ltm, lt, txn_a, "L"); assert(r == 0);
for (int t = 1; t < 10; t++) {
toku_ltm_set_lock_wait_time(ltm, t * 1000);
r = read_lock(ltm, lt, txn_b, "L");
assert(r == DB_LOCK_NOTGRANTED);
r = write_lock(ltm, lt, txn_b, "L");
assert(r == DB_LOCK_NOTGRANTED);
}
toku_ltm_lock_mutex(ltm);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
toku_ltm_unlock_mutex(ltm);
// shutdown
r = toku_lt_close(lt); assert(r == 0);
r = toku_ltm_close(ltm); assert(r == 0);
return 0;
}

View file

@ -36,11 +36,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -65,14 +65,14 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&b_w_l, lt, false); assert(r == DB_LOCK_DEADLOCK);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == DB_LOCK_DEADLOCK);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
toku_lock_request_destroy(&b_w_l);
assert(a_w_m.state == LOCK_REQUEST_COMPLETE && a_w_m.complete_r == 0
);
toku_lock_request_destroy(&a_w_m);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -36,11 +36,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -64,13 +64,13 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&b_w_l, lt, false); assert(r == DB_LOCK_DEADLOCK);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == DB_LOCK_DEADLOCK);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
toku_lock_request_destroy(&b_w_l);
assert(a_w_l.state == LOCK_REQUEST_COMPLETE && a_w_l.complete_r == 0);
toku_lock_request_destroy(&a_w_l);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -36,11 +36,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -64,13 +64,13 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&b_w_l, lt, true); assert(r == DB_LOCK_DEADLOCK);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == DB_LOCK_DEADLOCK);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
toku_lock_request_destroy(&b_w_l);
assert(a_w_l.state == LOCK_REQUEST_COMPLETE && a_w_l.complete_r == 0);
toku_lock_request_destroy(&a_w_l);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -27,7 +27,7 @@ static void *writer_thread(void *arg) {
int r = write_lock(writer_arg->lt, writer_arg->id, writer_arg->name); assert(r == 0);
printf("%lu locked\n", writer_arg->id);
sleep(1);
toku_lt_unlock(writer_arg->lt, writer_arg->id);
toku_lt_unlock_txn(writer_arg->lt, writer_arg->id);
printf("%lu unlocked\n", writer_arg->id);
return arg;
}
@ -65,11 +65,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -84,7 +84,7 @@ int main(int argc, const char *argv[]) {
r = toku_pthread_create(&tids[i], NULL, writer_thread, writer_arg); assert(r == 0);
}
sleep(10);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
printf("main unlocked\n");
for (int i = 0; i < max_threads; i++) {

View file

@ -40,11 +40,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -52,9 +52,9 @@ int main(int argc, const char *argv[]) {
r = write_lock(lt, txn_a, "L"); assert(r == 0);
r = write_lock(lt, txn_b, "L"); assert(r == DB_LOCK_NOTGRANTED);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
r = write_lock(lt, txn_b, "L"); assert(r == 0);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -125,7 +125,7 @@ static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r)
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
retval = toku_lt_unlock_txn(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -101,7 +101,7 @@ static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r)
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
retval = toku_lt_unlock_txn(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}

View file

@ -36,10 +36,10 @@ static void init_query(void) {
static void setup_tree(void) {
assert(!lt && !ltm);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
CKERR(r);
assert(ltm);
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
CKERR(r);
assert(lt);
init_query();
@ -102,7 +102,7 @@ static void lt_insert_write_range(int r_expect, char txn, int key_l, int key_r)
static void lt_unlock(char ctxn) UU();
static void lt_unlock(char ctxn) {
int retval;
retval = toku_lt_unlock(lt, (TXNID) (size_t) ctxn);
retval = toku_lt_unlock_txn(lt, (TXNID) (size_t) ctxn);
CKERR(retval);
}

View file

@ -34,11 +34,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -54,10 +54,10 @@ int main(int argc, const char *argv[]) {
r = toku_lock_request_start(&b_w_l, lt, false); assert(r != 0);
assert(b_w_l.state == LOCK_REQUEST_PENDING);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
assert(b_w_l.state == LOCK_REQUEST_COMPLETE && b_w_l.complete_r == 0);
toku_lock_request_destroy(&b_w_l);
r = toku_lt_unlock(lt, txn_b); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_b); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -33,11 +33,11 @@ int main(int argc, const char *argv[]) {
// setup
toku_ltm *ltm = NULL;
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic, get_compare_fun_from_db);
r = toku_ltm_create(&ltm, max_locks, max_lock_memory, dbpanic);
assert(r == 0 && ltm);
toku_lock_tree *lt = NULL;
r = toku_lt_create(&lt, dbpanic, ltm, get_compare_fun_from_db);
r = toku_lt_create(&lt, ltm, dbcmp);
assert(r == 0 && lt);
const TXNID txn_a = 1;
@ -57,7 +57,7 @@ int main(int argc, const char *argv[]) {
assert(a_w_l_2.state == LOCK_REQUEST_COMPLETE && a_w_l_2.complete_r == 0);
toku_lock_request_destroy(&a_w_l_2);
r = toku_lt_unlock(lt, txn_a); assert(r == 0);
r = toku_lt_unlock_txn(lt, txn_a); assert(r == 0);
// shutdown
r = toku_lt_close(lt); assert(r == 0);

View file

@ -215,8 +215,6 @@ toku_rt_insert(toku_range_tree* tree, toku_range* range) {
insert_range = toku_xmalloc(sizeof *insert_range);
*insert_range = *range;
size_t start_omt_size = toku_omt_memory_size(tree->i.omt);
static int count = 0;
count++;
r = toku_omt_insert_at(tree->i.omt, insert_range, index);
assert_zero(r);
size_t end_omt_size = toku_omt_memory_size(tree->i.omt);

View file

@ -178,6 +178,7 @@ BDB_DONTRUN_TESTS = \
perf_malloc_free \
perf_nop \
perf_ptquery \
perf_ptquery2 \
perf_xmalloc_free \
prelock-read-read \
prelock-read-write \

View file

@ -1,215 +0,0 @@
// verify that cursor deletes without write locks can detect deadlocks.
#include "test.h"
#include "toku_pthread.h"
static void populate(DB_ENV *db_env, DB *db, uint64_t nrows) {
int r;
DB_TXN *txn = NULL;
r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
for (uint64_t i = 0; i < nrows; i++) {
uint64_t k = htonl(i);
uint64_t v = i;
DBT key = { .data = &k, .size = sizeof k };
DBT val = { .data = &v, .size = sizeof v };
r = db->put(db, txn, &key, &val, 0); assert(r == 0);
}
r = txn->commit(txn, 0); assert(r == 0);
}
struct my_callback_context {
DBT key;
DBT val;
};
#if TOKUDB
static void copy_dbt(DBT *dest, DBT const *src) {
assert(dest->flags == DB_DBT_REALLOC);
dest->size = src->size;
dest->data = toku_xrealloc(dest->data, dest->size);
memcpy(dest->data, src->data, dest->size);
}
static int blocking_c_del_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) {
DBT const *found_key = a;
DBT const *found_val = b;
struct my_callback_context *context = (struct my_callback_context *) e;
copy_dbt(&context->key, found_key);
copy_dbt(&context->val, found_val);
return 0;
}
#endif
static void blocking_c_del(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) {
int r;
struct my_callback_context context;
context.key = (DBT) { .data = NULL, .size = 0, .flags = DB_DBT_REALLOC };
context.val = (DBT) { .data = NULL, .size = 0, .flags = DB_DBT_REALLOC };
for (uint64_t i = 0; i < nrows; i++) {
DB_TXN *txn = NULL;
r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
DBC *cursor = NULL;
r = db->cursor(db, txn, &cursor, 0); assert(r == 0);
uint64_t k = htonl(i);
DBT key = { .data = &k, .size = sizeof k };
#if TOKUDB
r = cursor->c_getf_set(cursor, 0, &key, blocking_c_del_callback, &context);
#else
r = cursor->c_get(cursor, &key, &context.val, DB_SET);
#endif
assert(r == 0 || r == DB_NOTFOUND);
if (r == 0) {
usleep(sleeptime);
if (verbose) {
uint64_t kk;
#if TOKUDB
assert(context.key.size == sizeof kk);
memcpy(&kk, context.key.data, sizeof kk);
#else
assert(key.size == sizeof kk);
memcpy(&kk, key.data, sizeof kk);
#endif
printf("%lu deleting %lu\n", toku_pthread_self(), (long unsigned) htonl(kk));
}
r = cursor->c_del(cursor, 0);
assert(r == 0 || r == DB_LOCK_DEADLOCK);
}
{ int rr = cursor->c_close(cursor); assert(rr == 0); }
if (r == 0) {
if (verbose) printf("%lu commit\n", toku_pthread_self());
r = txn->commit(txn, 0);
} else {
if (verbose) printf("%lu abort\n", toku_pthread_self());
r = txn->abort(txn);
}
assert(r == 0);
if (verbose)
printf("%lu %lu\n", toku_pthread_self(), i);
}
toku_free(context.key.data);
toku_free(context.val.data);
}
struct blocking_c_del_args {
DB_ENV *db_env;
DB *db;
uint64_t nrows;
long sleeptime;
};
static void *blocking_c_del_thread(void *arg) {
struct blocking_c_del_args *a = (struct blocking_c_del_args *) arg;
blocking_c_del(a->db_env, a->db, a->nrows, a->sleeptime);
return arg;
}
static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) {
int r;
toku_pthread_t tids[nthreads];
struct blocking_c_del_args a = { db_env, db, nrows, sleeptime };
for (int i = 0; i < nthreads-1; i++) {
r = toku_pthread_create(&tids[i], NULL, blocking_c_del_thread, &a); assert(r == 0);
}
blocking_c_del(db_env, db, nrows, sleeptime);
for (int i = 0; i < nthreads-1; i++) {
void *ret;
r = toku_pthread_join(tids[i], &ret); assert(r == 0);
}
}
int test_main(int argc, char * const argv[]) {
uint64_t cachesize = 0;
uint32_t pagesize = 0;
uint64_t nrows = 10;
int nthreads = 2;
long sleeptime = 100000;
#if defined(USE_TDB)
char *db_env_dir = "dir." __FILE__ ".tokudb";
#elif defined(USE_BDB)
char *db_env_dir = "dir." __FILE__ ".bdb";
#else
#error
#endif
char *db_filename = "test.db";
int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD;
// parse_args(argc, argv);
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) {
if (verbose > 0)
verbose--;
continue;
}
if (strcmp(argv[i], "--nrows") == 0 && i+1 < argc) {
nrows = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--nthreads") == 0 && i+1 < argc) {
nthreads = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--sleeptime") == 0 && i+1 < argc) {
sleeptime = atol(argv[++i]);
continue;
}
assert(0);
}
// setup env
int r;
char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir);
r = system(rm_cmd); assert(r == 0);
r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0);
DB_ENV *db_env = NULL;
r = db_env_create(&db_env, 0); assert(r == 0);
if (cachesize) {
const u_int64_t gig = 1 << 30;
r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0);
}
r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
#if TOKUDB
r = db_env->set_lock_timeout(db_env, 30 * 1000); assert(r == 0);
#else
r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0);
#endif
// create the db
DB *db = NULL;
r = db_create(&db, db_env, 0); assert(r == 0);
if (pagesize) {
r = db->set_pagesize(db, pagesize); assert(r == 0);
}
r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
// populate the db
populate(db_env, db, nrows);
run_test(db_env, db, nthreads, nrows, sleeptime);
// close env
r = db->close(db, 0); assert(r == 0); db = NULL;
r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL;
return 0;
}

View file

@ -1,213 +0,0 @@
// verify that cursor deletes with write locking cause transactions with lock conflicts to
// suspend the conflicting threads.
#include "test.h"
#include "toku_pthread.h"
static void populate(DB_ENV *db_env, DB *db, uint64_t nrows) {
int r;
DB_TXN *txn = NULL;
r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
for (uint64_t i = 0; i < nrows; i++) {
uint64_t k = htonl(i);
uint64_t v = i;
DBT key = { .data = &k, .size = sizeof k };
DBT val = { .data = &v, .size = sizeof v };
r = db->put(db, txn, &key, &val, 0); assert(r == 0);
}
r = txn->commit(txn, 0); assert(r == 0);
}
struct my_callback_context {
DBT key;
DBT val;
};
#if TOKUDB
static void copy_dbt(DBT *dest, DBT const *src) {
assert(dest->flags == DB_DBT_REALLOC);
dest->size = src->size;
dest->data = toku_xrealloc(dest->data, dest->size);
memcpy(dest->data, src->data, dest->size);
}
static int blocking_c_del_callback(DBT const *a UU(), DBT const *b UU(), void *e UU()) {
DBT const *found_key = a;
DBT const *found_val = b;
struct my_callback_context *context = (struct my_callback_context *) e;
copy_dbt(&context->key, found_key);
copy_dbt(&context->val, found_val);
return 0;
}
#endif
static void blocking_c_del(DB_ENV *db_env, DB *db, uint64_t nrows, long sleeptime) {
int r;
struct my_callback_context context;
context.key = (DBT) { .data = NULL, .size = 0, .flags = DB_DBT_REALLOC };
context.val = (DBT) { .data = NULL, .size = 0, .flags = DB_DBT_REALLOC };
for (uint64_t i = 0; i < nrows; i++) {
DB_TXN *txn = NULL;
r = db_env->txn_begin(db_env, NULL, &txn, 0); assert(r == 0);
DBC *cursor = NULL;
r = db->cursor(db, txn, &cursor, 0); assert(r == 0);
uint64_t k = htonl(i);
DBT key = { .data = &k, .size = sizeof k };
#if TOKUDB
r = cursor->c_getf_set(cursor, DB_RMW, &key, blocking_c_del_callback, &context);
#else
r = cursor->c_get(cursor, &key, &context.val, DB_SET + DB_RMW);
#endif
assert(r == 0 || r == DB_NOTFOUND);
if (r == 0) {
usleep(sleeptime);
if (verbose) {
uint64_t kk;
#if TOKUDB
assert(context.key.size == sizeof kk);
memcpy(&kk, context.key.data, sizeof kk);
#else
assert(key.size == sizeof kk);
memcpy(&kk, key.data, sizeof kk);
#endif
printf("%lu deleting %lu\n", toku_pthread_self(), (long unsigned) htonl(kk));
}
r = cursor->c_del(cursor, 0);
assert(r == 0 || r == DB_LOCK_DEADLOCK);
}
{ int rr = cursor->c_close(cursor); assert(rr == 0); }
if (r == 0)
r = txn->commit(txn, 0);
else
r = txn->abort(txn);
assert(r == 0);
if (verbose)
printf("%lu %lu\n", toku_pthread_self(), i);
}
toku_free(context.key.data);
toku_free(context.val.data);
}
struct blocking_c_del_args {
DB_ENV *db_env;
DB *db;
uint64_t nrows;
long sleeptime;
};
static void *blocking_c_del_thread(void *arg) {
struct blocking_c_del_args *a = (struct blocking_c_del_args *) arg;
blocking_c_del(a->db_env, a->db, a->nrows, a->sleeptime);
return arg;
}
static void run_test(DB_ENV *db_env, DB *db, int nthreads, uint64_t nrows, long sleeptime) {
int r;
toku_pthread_t tids[nthreads];
struct blocking_c_del_args a = { db_env, db, nrows, sleeptime };
for (int i = 0; i < nthreads-1; i++) {
r = toku_pthread_create(&tids[i], NULL, blocking_c_del_thread, &a); assert(r == 0);
}
blocking_c_del(db_env, db, nrows, sleeptime);
for (int i = 0; i < nthreads-1; i++) {
void *ret;
r = toku_pthread_join(tids[i], &ret); assert(r == 0);
}
}
int test_main(int argc, char * const argv[]) {
uint64_t cachesize = 0;
uint32_t pagesize = 0;
uint64_t nrows = 10;
int nthreads = 2;
long sleeptime = 100000;
#if defined(USE_TDB)
char *db_env_dir = "dir." __FILE__ ".tokudb";
#elif defined(USE_BDB)
char *db_env_dir = "dir." __FILE__ ".bdb";
#else
#error
#endif
char *db_filename = "test.db";
int db_env_open_flags = DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL | DB_INIT_TXN | DB_INIT_LOCK | DB_INIT_LOG | DB_THREAD;
// parse_args(argc, argv);
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose++;
continue;
}
if (strcmp(argv[i], "-q") == 0 || strcmp(argv[i], "--quiet") == 0) {
if (verbose > 0)
verbose--;
continue;
}
if (strcmp(argv[i], "--nrows") == 0 && i+1 < argc) {
nrows = atoll(argv[++i]);
continue;
}
if (strcmp(argv[i], "--nthreads") == 0 && i+1 < argc) {
nthreads = atoi(argv[++i]);
continue;
}
if (strcmp(argv[i], "--sleeptime") == 0 && i+1 < argc) {
sleeptime = atol(argv[++i]);
continue;
}
assert(0);
}
// setup env
int r;
char rm_cmd[strlen(db_env_dir) + strlen("rm -rf ") + 1];
snprintf(rm_cmd, sizeof(rm_cmd), "rm -rf %s", db_env_dir);
r = system(rm_cmd); assert(r == 0);
r = toku_os_mkdir(db_env_dir, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); assert(r == 0);
DB_ENV *db_env = NULL;
r = db_env_create(&db_env, 0); assert(r == 0);
if (cachesize) {
const u_int64_t gig = 1 << 30;
r = db_env->set_cachesize(db_env, cachesize / gig, cachesize % gig, 1); assert(r == 0);
}
r = db_env->open(db_env, db_env_dir, db_env_open_flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
#if TOKUDB
r = db_env->set_lock_timeout(db_env, 30 * 1000); assert(r == 0);
#else
r = db_env->set_lk_detect(db_env, DB_LOCK_YOUNGEST); assert(r == 0);
#endif
// create the db
DB *db = NULL;
r = db_create(&db, db_env, 0); assert(r == 0);
if (pagesize) {
r = db->set_pagesize(db, pagesize); assert(r == 0);
}
r = db->open(db, NULL, db_filename, NULL, DB_BTREE, DB_CREATE|DB_AUTO_COMMIT|DB_THREAD, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); assert(r == 0);
// populate the db
populate(db_env, db, nrows);
run_test(db_env, db, nthreads, nrows, sleeptime);
// close env
r = db->close(db, 0); assert(r == 0); db = NULL;
r = db_env->close(db_env, 0); assert(r == 0); db_env = NULL;
return 0;
}

View file

@ -1,75 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
/* See #627. */
#include <sys/stat.h>
#include <memory.h>
static void
do_627 (void) {
int r;
DB_ENV *env;
DB *db;
r = system("rm -rf " ENVDIR);
CKERR(r);
r=toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO); assert(r==0);
r=db_env_create(&env, 0); assert(r==0);
env->set_errfile(env, stderr);
r=env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r=db_create(&db, env, 0); CKERR(r);
DB_TXN *t1, *t2;
DBT a,b;
r=env->txn_begin(env, 0, &t1, 0); assert(r==0);
r=db->open(db, t1, "foo.db", 0, DB_BTREE, DB_CREATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
r=db->put(db, t1, dbt_init(&a, "a", 2), dbt_init(&b, "b", 2), 0);
r=t1->commit(t1, 0); assert(r==0);
r=env->txn_begin(env, 0, &t1, 0); assert(r==0);
r=env->txn_begin(env, 0, &t2, 0); assert(r==0);
DBC *c1,*c2;
r=db->cursor(db, t1, &c1, 0); CKERR(r);
r=db->cursor(db, t2, &c2, 0); CKERR(r);
r=c1->c_get(c1, dbt_init(&a, "a", 2), dbt_init_malloc(&b), DB_SET); CKERR(r);
toku_free(b.data);
r=c2->c_get(c2, dbt_init(&a, "a", 2), dbt_init_malloc(&b), DB_SET); CKERR(r);
toku_free(b.data);
// This causes all hell to break loose in BDB 4.6, so we just cannot run this under BDB.
// PANIC: Invalid argument
// Expected DB_LOCK_NOTGRANTED, got DB_RUNRECOVERY: Fatal error, run database recovery
// bug627.bdb: bug627.c:44: do_627: Assertion `r==(-30994)' failed.
// Aborted
r=c1->c_del(c1, 0);
if (r!=DB_LOCK_NOTGRANTED) {
fprintf(stderr, "Expected DB_LOCK_NOTGRANTED, got %s\n", db_strerror(r));
}
assert(r==DB_LOCK_NOTGRANTED);
r=c1->c_close(c1); CKERR(r);
r=t1->commit(t1, 0); assert(r==0);
r=c2->c_del(c2, 0); CKERR(r);
r=c2->c_close(c2); CKERR(r);
r=t2->commit(t2, 0); assert(r==0);
r=db->close(db, 0); CKERR(r);
r=env->close(env, 0); CKERR(r);
}
int
test_main (int argc, char * const argv[]) {
parse_args(argc, argv);
do_627();
return 0;
}

View file

@ -66,7 +66,7 @@ doit (BOOL committed_provdels) {
r = dbc->c_get(dbc, &key, &data, DB_NEXT); CKERR(r);
assert(*(int*)key.data == i);
assert(*(int*)data.data == j);
r = dbc->c_del(dbc, 0); CKERR(r);
r = db->del(db, txn, &key, DB_DELETE_ANY); CKERR(r);
}
r = dbc->c_get(dbc, &key, &data, DB_NEXT); CKERR2(r, DB_NOTFOUND);
r = dbc->c_get(dbc, &key, &data, DB_FIRST); CKERR2(r, DB_NOTFOUND);

View file

@ -1004,8 +1004,8 @@ static void do_args(int argc, char * const argv[]) {
} else if (strcmp(argv[0], "-c")==0) {
CHECK_RESULTS = 1;
} else if (strcmp(argv[0], "-p")==0) {
USE_PUTS = LOADER_USE_PUTS;
printf("Using puts\n");
USE_PUTS = 0;
printf("DISABLED Using puts as part of #4503\n");
} else if (strcmp(argv[0], "-k")==0) {
test_only_abort_via_poll = 1;
printf("Perform only abort_via_poll test\n");

80
src/tests/perf_ptquery2.c Normal file
View file

@ -0,0 +1,80 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#ident "$Id: test_stress1.c 39258 2012-01-27 13:51:58Z zardosht $"
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <toku_pthread.h>
#include <unistd.h>
#include <memory.h>
#include <sys/stat.h>
#include <db.h>
#include "threaded_stress_test_helpers.h"
static int ptquery_op2(DB_TXN *txn, ARG arg, void* operation_extra) {
int db_index = *(int *)operation_extra;
DB* db = arg->dbp[db_index];
return ptquery_and_maybe_check_op(db, txn, arg, TRUE);
}
//
// This test is a form of stress that does operations on a single dictionary:
// We create a dictionary bigger than the cachetable (around 4x greater).
// Then, we spawn a bunch of pthreads that do the following:
// - scan dictionary forward with bulk fetch
// - scan dictionary forward slowly
// - scan dictionary backward with bulk fetch
// - scan dictionary backward slowly
// - Grow the dictionary with insertions
// - do random point queries into the dictionary
// With the small cachetable, this should produce quite a bit of churn in reading in and evicting nodes.
// If the test runs to completion without crashing, we consider it a success. It also tests that snapshots
// work correctly by verifying that table scans sum their vals to 0.
//
// This does NOT test:
// - splits and merges
// - multiple DBs
//
// Variables that are interesting to tweak and run:
// - small cachetable
// - number of elements
//
static void
stress_table(DB_ENV* env, DB** dbp, struct cli_args *cli_args) {
int n = cli_args->num_elements;
//
// the threads that we want:
// - some threads constantly updating random values
// - one thread doing table scan with bulk fetch
// - one thread doing table scan without bulk fetch
// - some threads doing random point queries
//
if (verbose) printf("starting creation of pthreads\n");
const int num_threads = cli_args->num_ptquery_threads;
struct arg myargs[num_threads];
int thread_ids[num_threads];
for (int i = 0; i < num_threads; i++) {
arg_init(&myargs[i], n, dbp, env, cli_args);
}
for (int i = 0; i < num_threads; i++) {
thread_ids[i] = i % cli_args->num_DBs;
myargs[i].operation = ptquery_op2;
myargs[i].operation_extra = &thread_ids[i];
}
run_workers(myargs, num_threads, cli_args->time_of_test, false, cli_args);
}
int
test_main(int argc, char *const argv[]) {
struct cli_args args = get_default_args_for_perf();
parse_stress_test_args(argc, argv, &args);
stress_test_main(&args);
return 0;
}

View file

@ -18,7 +18,11 @@ int test_main (int argc, char * const argv[]) {
env->set_errfile(env, stderr);
// set a cachetable size of 10K
u_int32_t cachesize = 10*1024;
r = env->set_cachesize(env, 0, cachesize, 1); CKERR(r);
// as part of #4503, arbitrarily increasing sizze of cachetable
// the idea is to make it small enough such that all data
// cannot fit in the cachetable, but big enough such that
// we don't have cachet pressure
r = env->set_cachesize(env, 0, 4*cachesize, 1); CKERR(r);
r = env->set_default_bt_compare(env, int64_dbt_cmp); CKERR(r);
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);

View file

@ -84,7 +84,7 @@ run (int choice) {
i=0;
while (0==(r=(c->c_get(c, &kdbt, &vdbt, DB_FIRST)))) {
i++;
r=c->c_del(c, 0);
r = db->del(db, txn, &kdbt, DB_DELETE_ANY);
CKERR(r);
}
assert(r==DB_NOTFOUND);

View file

@ -86,7 +86,7 @@ test_789(void) {
r = db->cursor(db, txn, &cursor, 0); assert(r == 0);
DBT key, val;
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_NEXT); assert(r == 0);
r = cursor->c_del(cursor, 0); assert(r == 0);
r = db->del(db, txn, &key, DB_DELETE_ANY); assert(r == 0);
r = cursor->c_close(cursor); assert(r == 0);
toku_free(key.data); toku_free(val.data);
r = txn->commit(txn, 0); assert(r == 0);
@ -121,7 +121,7 @@ test_789(void) {
r = db->cursor(db, txn, &cursor, 0); assert(r == 0);
DBT key, val;
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_NEXT); assert(r == 0);
r = cursor->c_del(cursor, 0); assert(r == 0);
r = db->del(db, txn, &key, DB_DELETE_ANY); assert(r == 0);
r = cursor->c_close(cursor); assert(r == 0);
toku_free(key.data); toku_free(val.data);
r = txn->commit(txn, 0); assert(r == 0);

View file

@ -102,7 +102,10 @@ test_bulk_fetch (u_int64_t n, BOOL prelock, BOOL disable_prefetching) {
DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0);
r=env->set_default_bt_compare(env, int64_dbt_cmp); CKERR(r);
r = env->set_cachesize(env, 0, (u_int32_t)n, 1); assert(r == 0);
// arbitrarily have cachetable size be 4*n
// goal is to make it small enough such that all of data
// does not fit in cachetable, but not so small that we get thrashing
r = env->set_cachesize(env, 0, (u_int32_t)4*n, 1); assert(r == 0);
r = env->open(env, ENVDIR, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0);
DB *db;

View file

@ -46,9 +46,6 @@ test_cursor_current (void) {
DBT key, data; int kk, vv;
r = cursor->c_del(cursor, 0);
assert(r == EINVAL);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&data), DB_CURRENT);
assert(r == EINVAL);
@ -70,17 +67,12 @@ test_cursor_current (void) {
assert(data.size == sizeof vv);
memcpy(&vv, data.data, data.size);
assert(vv == v);
r = db->del(db, null_txn, &key, DB_DELETE_ANY);
toku_free(key.data); toku_free(data.data);
r = cursor->c_del(cursor, 0);
CKERR(r);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&data), DB_CURRENT);
CKERR2(r,DB_KEYEMPTY);
r = cursor->c_del(cursor, 0);
CKERR2(r,DB_KEYEMPTY);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&data), DB_CURRENT);
CKERR2(r,DB_KEYEMPTY);

View file

@ -1,103 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <errno.h>
#include <sys/stat.h>
#include <db.h>
static void
cursor_expect (DBC *cursor, int k, int v, int op) {
DBT key, val;
int r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), op);
assert(r == 0);
assert(key.size == sizeof k);
int kk;
memcpy(&kk, key.data, key.size);
assert(val.size == sizeof v);
int vv;
memcpy(&vv, val.data, val.size);
if (kk != k || vv != v) printf("expect key %u got %u - %u %u\n", (uint32_t)htonl(k), (uint32_t)htonl(kk), (uint32_t)htonl(v), (uint32_t)htonl(vv));
assert(kk == k);
assert(vv == v);
toku_free(key.data);
toku_free(val.data);
}
/* generate a multi-level tree and delete all entries with a cursor
verify that the pivot flags are toggled (currently by inspection) */
static void
test_cursor_delete (int dup_mode) {
if (verbose) printf("test_cursor_delete:%d\n", dup_mode);
int pagesize = 4096;
int elementsize = 32;
int npp = pagesize/elementsize;
int n = 16*npp; /* build a 2 level tree */
DB_TXN * const null_txn = 0;
const char * const fname = "test.cursor.delete.brt";
int r;
r = system("rm -rf " ENVDIR); assert(r == 0);
r = toku_os_mkdir(ENVDIR, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
/* create the dup database file */
DB_ENV *env;
r = db_env_create(&env, 0); assert(r == 0);
#ifdef USE_TDB
r = env->set_redzone(env, 0); CKERR(r);
#endif
r = env->open(env, ENVDIR, DB_CREATE+DB_PRIVATE+DB_INIT_MPOOL, 0); assert(r == 0);
DB *db;
r = db_create(&db, env, 0); assert(r == 0);
db->set_errfile(db,0); // Turn off those annoying errors
r = db->set_flags(db, dup_mode); assert(r == 0);
r = db->set_pagesize(db, pagesize); assert(r == 0);
r = db->open(db, null_txn, fname, "main", DB_BTREE, DB_CREATE, 0666); assert(r == 0);
int i;
for (i=0; i<n; i++) {
int k = htonl(i);
int v = htonl(i);
DBT key, val;
r = db->put(db, null_txn, dbt_init(&key, &k, sizeof k), dbt_init(&val, &v, sizeof v), 0); assert(r == 0);
}
/* verify the sort order with a cursor */
DBC *cursor;
r = db->cursor(db, null_txn, &cursor, 0); assert(r == 0);
for (i=0; i<n; i++) {
cursor_expect(cursor, htonl(i), htonl(i), DB_NEXT);
r = cursor->c_del(cursor, 0); assert(r == 0);
}
r = cursor->c_close(cursor); assert(r == 0);
r = db->close(db, 0); assert(r == 0);
r = env->close(env, 0); assert(r == 0);
}
int
test_main(int argc, char *const argv[]) {
parse_args(argc, argv);
test_cursor_delete(0);
#ifdef USE_BDB
test_cursor_delete(DB_DUP);
#endif
return 0;
}

View file

@ -1,76 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <errno.h>
#include <sys/stat.h>
#include <db.h>
static DB_ENV *dbenv;
static DB *db;
static DB_TXN * txn;
static DBC *cursor;
static void
test_cursor_delete2 (void) {
int r;
DBT key,val;
r = db_env_create(&dbenv, 0); CKERR(r);
r = dbenv->open(dbenv, ENVDIR, DB_PRIVATE|DB_INIT_MPOOL|DB_CREATE|DB_INIT_TXN, 0); CKERR(r);
r = db_create(&db, dbenv, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r);
r = db->open(db, txn, "primary.db", NULL, DB_BTREE, DB_CREATE, 0600); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r);
r = db->put(db, txn, dbt_init(&key, "a", 2), dbt_init(&val, "b", 2), 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r);
r = db->del(db, txn, dbt_init(&key, "a", 2), 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, 0); CKERR(r);
r = db->put(db, txn, dbt_init(&key, "a", 2), dbt_init(&val, "c", 2), 0); CKERR(r);
cursor=cursor;
r = db->cursor(db, txn, &cursor, 0); CKERR(r);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_FIRST); CKERR(r);
assert(strcmp(key.data, "a")==0); toku_free(key.data);
assert(strcmp(val.data, "c")==0); toku_free(val.data);
r = cursor->c_del(cursor, 0); CKERR(r);
r = cursor->c_del(cursor, 0); assert(r==DB_KEYEMPTY);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_NEXT); assert(r==DB_NOTFOUND);
r = cursor->c_close(cursor); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = db->close(db, 0); CKERR(r);
r = dbenv->close(dbenv, 0); CKERR(r);
}
int
test_main(int argc, char *const argv[]) {
parse_args(argc, argv);
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
test_cursor_delete2();
return 0;
}

View file

@ -1,89 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <memory.h>
#include <errno.h>
#include <sys/stat.h>
#include <db.h>
static DB_ENV *dbenv;
static DB *db;
static DB_TXN * txn;
static DBC *cursor;
static void
test_cursor_delete_2119 (u_int32_t c_del_flags, u_int32_t txn_isolation_flags) {
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
r = toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
DBT key,val;
r = db_env_create(&dbenv, 0); CKERR(r);
r = dbenv->open(dbenv, ENVDIR, DB_PRIVATE|DB_INIT_MPOOL|DB_CREATE|DB_INIT_TXN|DB_INIT_LOCK, 0); CKERR(r);
r = db_create(&db, dbenv, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, txn_isolation_flags); CKERR(r);
r = db->open(db, txn, "primary.db", NULL, DB_BTREE, DB_CREATE, 0600); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, txn_isolation_flags); CKERR(r);
r = db->put(db, txn, dbt_init(&key, "a", 2), dbt_init(&val, "b", 2), 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, txn_isolation_flags); CKERR(r);
r = db->del(db, txn, dbt_init(&key, "a", 2), 0); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = dbenv->txn_begin(dbenv, 0, &txn, txn_isolation_flags); CKERR(r);
r = db->put(db, txn, dbt_init(&key, "a", 2), dbt_init(&val, "c", 2), 0); CKERR(r);
cursor=cursor;
r = db->cursor(db, txn, &cursor, 0); CKERR(r);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_FIRST); CKERR(r);
assert(strcmp(key.data, "a")==0); toku_free(key.data);
assert(strcmp(val.data, "c")==0); toku_free(val.data);
r = cursor->c_del(cursor, c_del_flags); CKERR(r);
r = cursor->c_del(cursor, c_del_flags); assert(r==DB_KEYEMPTY);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_NEXT); assert(r==DB_NOTFOUND);
r = cursor->c_close(cursor); CKERR(r);
r = txn->commit(txn, 0); CKERR(r);
r = db->close(db, 0); CKERR(r);
r = dbenv->close(dbenv, 0); CKERR(r);
}
int
test_main(int argc, char *const argv[]) {
parse_args(argc, argv);
int isolation;
int read_prelocked;
int write_prelocked;
for (isolation = 0; isolation < 2; isolation++) {
u_int32_t isolation_flag = isolation ? DB_READ_UNCOMMITTED : 0;
for (read_prelocked = 0; read_prelocked < 2; read_prelocked++) {
u_int32_t read_prelocked_flag = read_prelocked ? DB_PRELOCKED : 0;
for (write_prelocked = 0; write_prelocked < 2; write_prelocked++) {
u_int32_t write_prelocked_flag = write_prelocked ? DB_PRELOCKED_WRITE : 0;
test_cursor_delete_2119(read_prelocked_flag | write_prelocked_flag,
isolation_flag);
}
}
}
return 0;
}

View file

@ -1,82 +0,0 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "test.h"
#include <memory.h>
#include <db.h>
#include <errno.h>
#include <sys/stat.h>
// ENVDIR is defined in the Makefile
DB *db;
DB_ENV *env;
DBT key;
DBT value;
DBC *dbc;
DB_TXN *const null_txn = 0;
static void
setup_db (char* name) {
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
r = db_env_create(&env, 0); CKERR(r);
#ifdef USE_TDB
r = env->set_redzone(env, 0); CKERR(r);
#endif
r = env->open(env, ENVDIR, DB_CREATE | DB_PRIVATE | DB_INIT_MPOOL, 0666); CKERR(r);
r = db_create(&db, env, 0); CKERR(r);
r = db->set_pagesize(db, 4096); CKERR(r);
r = db->open(db, null_txn, name, "main", DB_BTREE, DB_CREATE, 0666); CKERR(r);
}
static void
close_db (void) {
int r;
r = db->close(db, 0); CKERR(r);
r = env->close(env, 0); CKERR(r);
}
static void
insert (void) {
int r;
dbt_init(&key, "key", sizeof("key"));
dbt_init(&value, "value1", sizeof("value1"));
r = db->put(db, null_txn, &key, &value, 0); CKERR(r);
dbt_init(&key, "key2", sizeof("key2"));
dbt_init(&value, "value2", sizeof("value2"));
r = db->put(db, null_txn, &key, &value, 0); CKERR(r);
}
static void
cursor_range_with_delete (u_int32_t flag) {
int r;
r = db->cursor(db, null_txn, &dbc, 0); CKERR(r);
r = dbc->c_get(dbc, &key, &value, DB_FIRST); CKERR(r);
r = dbc->c_del(dbc, 0); CKERR(r);
r = dbc->c_get(dbc, &key, &value, flag); CKERR(r);
r = dbc->c_del(dbc, 0); CKERR(r);
r = dbc->c_close(dbc); CKERR(r);
}
int
test_main(int argc, char *const argv[]) {
parse_args(argc, argv);
setup_db("next.db");
insert();
cursor_range_with_delete(DB_NEXT);
close_db();
return 0;
}

View file

@ -60,7 +60,7 @@ test_main (int UU(argc), char UU(*const argv[])) {
assert(*(int*)val.data == v1); // Will bring up valgrind error.
r = cursor->c_del(cursor, 0);
r = db->del(db, null_txn, &ckey, DB_DELETE_ANY); assert(r == 0);
CKERR(r);
assert(*(int*)val.data == v1); // Will bring up valgrind error.

View file

@ -59,18 +59,6 @@ cget(BOOL success, BOOL find, char txn, int _key, int _data,
else CKERR2s(r, DB_LOCK_DEADLOCK, DB_LOCK_NOTGRANTED);
}
static void
cdel (BOOL success, BOOL find, char txn) {
int r;
r = cursors[(int)txn]->c_del(cursors[(int)txn], 0);
if (success) {
if (find) CKERR(r);
else CKERR2(r, DB_NOTFOUND);
}
else CKERR2s(r, DB_LOCK_DEADLOCK, DB_LOCK_NOTGRANTED);
}
static void
dbdel (BOOL success, BOOL find, char txn, int _key) {
int r;
@ -457,30 +445,6 @@ test_prev (u_int32_t next_type) {
close_dbs();
}
static void
test_cdel (void) {
/* ********************************************************************** */
setup_dbs();
put(TRUE, 'c', 1, 1);
early_commit('c');
cget(TRUE, TRUE, 'a', 1, 1, 1, 1, DB_SET);
cdel(TRUE, TRUE, 'a');
cget(FALSE, TRUE, 'b', 1, 1, 1, 1, DB_SET);
cget(FALSE, FALSE, 'b', 1, 2, 1, 2, DB_SET);
cget(FALSE, FALSE, 'b', 1, 0, 1, 0, DB_SET);
cget(TRUE, FALSE, 'b', 0, 0, 0, 0, DB_SET);
cget(TRUE, FALSE, 'b', 2, 10, 2, 10, DB_SET);
close_dbs();
/* ********************************************************************** */
setup_dbs();
put(TRUE, 'c', 1, 1);
early_commit('c');
cget(TRUE, TRUE, 'a', 1, 1, 1, 1, DB_SET);
cget(TRUE, TRUE, 'b', 1, 1, 1, 1, DB_SET);
cdel(FALSE, TRUE, 'a');
close_dbs();
}
static void
test_dbdel (void) {
/* If DB_DELETE_ANY changes to 0, then find is meaningful and
@ -524,8 +488,6 @@ test_current (void) {
early_commit('a');
cget(TRUE, TRUE, 'b', 1, 1, 1, 1, DB_SET);
cget(TRUE, TRUE, 'b', 1, 1, 1, 1, DB_CURRENT);
cdel(TRUE, TRUE, 'b');
cget(TRUE, FALSE, 'b', 1, 1, 1, 1, DB_CURRENT);
close_dbs();
}
@ -582,8 +544,6 @@ test (void) {
test_prev( DB_PREV);
test_prev( DB_PREV_NODUP);
/* ********************************************************************** */
test_cdel();
/* ********************************************************************** */
test_dbdel();
/* ********************************************************************** */
test_current();

View file

@ -45,7 +45,7 @@ test_insert_delete_insert (void) {
assert(r == 0);
toku_free(val.data);
r = cursor->c_del(cursor, 0);
r = db->del(db, null_txn, &key, DB_DELETE_ANY); assert(r == 0);
assert(r == 0);
r = cursor->c_get(cursor, dbt_init_malloc(&key), dbt_init_malloc(&val), DB_CURRENT);

View file

@ -1234,12 +1234,10 @@ do_warm_cache(DB_ENV *env, DB **dbs, struct cli_args *args)
scan_arg.operation_extra = &soe;
scan_arg.operation = scan_op_no_check;
scan_arg.lock_type = STRESS_LOCK_NONE;
struct worker_extra we;
we.thread_arg = &scan_arg;
we.operation_lock = NULL;
we.operation_lock_mutex = NULL;
we.num_operations_completed = 0;
worker(&we);
DB_TXN* txn = NULL;
int r = env->txn_begin(env, 0, &txn, 0); CKERR(r);
scan_op_no_check(txn, &scan_arg, &soe);
r = txn->commit(txn,0); CKERR(r);
}
static void

View file

@ -67,10 +67,12 @@ struct __toku_db_env_internal {
generate_row_for_put_func generate_row_for_put;
generate_row_for_del_func generate_row_for_del;
//void (*noticecall)(DB_ENV *, db_notices);
unsigned long cachetable_size;
CACHETABLE cachetable;
TOKULOGGER logger;
toku_ltm* ltm;
int open_txns; // Number of open transactions
DB *directory; // Maps dnames to inames
DB *persistent_environment; // Stores environment settings, can be used for upgrade
@ -127,7 +129,6 @@ int toku_ydb_lock_destroy(void);
void toku_ydb_lock(void);
void toku_ydb_unlock(void);
void toku_ydb_unlock_and_yield(unsigned long useconds);
toku_pthread_mutex_t *toku_ydb_mutex(void);
void toku_ydb_lock_get_status(YDB_LOCK_STATUS statp);
@ -240,10 +241,28 @@ struct __toku_dbc_external {
#define dbc_struct_i(x) (&((struct __toku_dbc_external *)x)->internal_part)
// needed in ydb_db.c
#define DB_ISOLATION_FLAGS (DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT | DB_SERIALIZABLE | DB_INHERIT_ISOLATION)
int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock);
int toku_grab_write_lock(DB *db, DBT *key, TOKUTXN tokutxn);
static inline int
env_opened(DB_ENV *env) {
return env->i->cachetable != 0;
}
void env_note_zombie_db(DB_ENV *env, DB *db);
void env_panic(DB_ENV * env, int cause, char * msg);
void env_note_db_opened(DB_ENV *env, DB *db);
void env_note_db_closed(DB_ENV *env, DB *db);
void env_note_zombie_db_closed(DB_ENV *env, DB *db);
int toku_env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, u_int32_t flags);
int toku_env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, const char *newname, u_int32_t flags);
int toku_txn_begin_internal(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, u_int32_t flags, bool internal, bool holds_ydb_lock);
int toku_txn_commit(DB_TXN * txn, u_int32_t flags, TXN_PROGRESS_POLL_FUNCTION, void*, bool release_multi_operation_client_lock);
int toku_txn_abort(DB_TXN * txn, TXN_PROGRESS_POLL_FUNCTION, void*, bool release_multi_operation_client_lock);
int locked_txn_commit(DB_TXN *txn, u_int32_t flags);
int locked_txn_abort(DB_TXN *txn);
#if defined(__cplusplus)
}

3696
src/ydb.c

File diff suppressed because it is too large Load diff

View file

@ -14,9 +14,6 @@ int toku_ydb_init(void);
// Called when the ydb library is unloaded.
int toku_ydb_destroy(void);
// Called to use dlmalloc functions.
void setup_dlmalloc(void) __attribute__((__visibility__("default")));
// db_env_create for the trace library
int db_env_create_toku10(DB_ENV **, u_int32_t) __attribute__((__visibility__("default")));

916
src/ydb_cursor.c Executable file
View file

@ -0,0 +1,916 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2007-2009 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <db.h>
#include "toku_assert.h"
#include "ydb-internal.h"
#include "ydb_cursor.h"
#include "ydb_row_lock.h"
static YDB_C_LAYER_STATUS_S ydb_c_layer_status;
#ifdef STATUS_VALUE
#undef STATUS_VALUE
#endif
#define STATUS_VALUE(x) ydb_c_layer_status.status[x].value.num
#define STATUS_INIT(k,t,l) { \
ydb_c_layer_status.status[k].keyname = #k; \
ydb_c_layer_status.status[k].type = t; \
ydb_c_layer_status.status[k].legend = l; \
}
static void
ydb_c_layer_status_init (void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(YDB_C_LAYER_NUM_POINT_QUERIES, UINT64, "dictionary point queries");
STATUS_INIT(YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES, UINT64, "dictionary sequential queries");
ydb_c_layer_status.initialized = true;
}
#undef STATUS_INIT
void
ydb_c_layer_get_status(YDB_C_LAYER_STATUS statp) {
if (!ydb_c_layer_status.initialized)
ydb_c_layer_status_init();
*statp = ydb_c_layer_status;
}
/* lightweight cursor methods. */
static int toku_c_getf_current_binding(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra);
//Get the main portion of a cursor flag (excluding the bitwise or'd components).
static int
get_main_cursor_flag(u_int32_t flags) {
return flags & DB_OPFLAGS_MASK;
}
static int
get_nonmain_cursor_flags(u_int32_t flags) {
return flags & ~(DB_OPFLAGS_MASK);
}
static inline BOOL
toku_c_uninitialized(DBC* c) {
return toku_brt_cursor_uninitialized(dbc_struct_i(c)->c);
}
typedef struct query_context_wrapped_t {
DBT *key;
DBT *val;
struct simple_dbt *skey;
struct simple_dbt *sval;
} *QUERY_CONTEXT_WRAPPED, QUERY_CONTEXT_WRAPPED_S;
static inline void
query_context_wrapped_init(QUERY_CONTEXT_WRAPPED context, DBC *c, DBT *key, DBT *val) {
context->key = key;
context->val = val;
context->skey = dbc_struct_i(c)->skey;
context->sval = dbc_struct_i(c)->sval;
}
static int
c_get_wrapper_callback(DBT const *key, DBT const *val, void *extra) {
QUERY_CONTEXT_WRAPPED context = extra;
int r;
r = toku_dbt_set(key->size, key->data, context->key, context->skey);
if (r==0) r = toku_dbt_set(val->size, val->data, context->val, context->sval);
return r;
}
static int
toku_c_get_current_unconditional(DBC* c, u_int32_t flags, DBT* key, DBT* val) {
int r;
QUERY_CONTEXT_WRAPPED_S context;
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_current_binding(c, flags, c_get_wrapper_callback, &context);
return r;
}
static inline u_int32_t
get_cursor_prelocked_flags(u_int32_t flags, DBC* dbc) {
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE);
//DB_READ_UNCOMMITTED and DB_READ_COMMITTED transactions 'own' all read locks for user-data dictionaries.
if (dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE) {
lock_flags |= DB_PRELOCKED;
}
return lock_flags;
}
//This is the user level callback function given to ydb layer functions like
//toku_c_getf_first
typedef struct query_context_base_t {
BRT_CURSOR c;
DB_TXN *txn;
DB *db;
YDB_CALLBACK_FUNCTION f;
void *f_extra;
int r_user_callback;
BOOL do_locking;
BOOL is_write_op;
toku_lock_request lock_request;
} *QUERY_CONTEXT_BASE, QUERY_CONTEXT_BASE_S;
typedef struct query_context_t {
QUERY_CONTEXT_BASE_S base;
} *QUERY_CONTEXT, QUERY_CONTEXT_S;
typedef struct query_context_with_input_t {
QUERY_CONTEXT_BASE_S base;
DBT *input_key;
DBT *input_val;
} *QUERY_CONTEXT_WITH_INPUT, QUERY_CONTEXT_WITH_INPUT_S;
static void
query_context_base_init(QUERY_CONTEXT_BASE context, DBC *c, u_int32_t flag, BOOL is_write_op, YDB_CALLBACK_FUNCTION f, void *extra) {
context->c = dbc_struct_i(c)->c;
context->txn = dbc_struct_i(c)->txn;
context->db = c->dbp;
context->f = f;
context->f_extra = extra;
context->is_write_op = is_write_op;
u_int32_t lock_flags = get_cursor_prelocked_flags(flag, c);
if (context->is_write_op)
lock_flags &= DB_PRELOCKED_WRITE; // Only care about whether already locked for write
context->do_locking = (BOOL)(context->db->i->lt!=NULL && !(lock_flags & (DB_PRELOCKED|DB_PRELOCKED_WRITE)));
context->r_user_callback = 0;
toku_lock_request_default_init(&context->lock_request);
}
static void
query_context_base_destroy(QUERY_CONTEXT_BASE context) {
toku_lock_request_destroy(&context->lock_request);
}
static void
query_context_init_read(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write = FALSE;
query_context_base_init(&context->base, c, flag, is_write, f, extra);
}
static void
query_context_init_write(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write = TRUE;
query_context_base_init(&context->base, c, flag, is_write, f, extra);
}
static void
query_context_with_input_init(QUERY_CONTEXT_WITH_INPUT context, DBC *c, u_int32_t flag, DBT *key, DBT *val, YDB_CALLBACK_FUNCTION f, void *extra) {
// grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
BOOL is_write = ((flag & DB_RMW) != 0) || dbc_struct_i(c)->rmw;
query_context_base_init(&context->base, c, flag, is_write, f, extra);
context->input_key = key;
context->input_val = val;
}
static int c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static void
c_query_context_init(QUERY_CONTEXT context, DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
BOOL is_write_op = FALSE;
// grab write locks if the DB_RMW flag is set or the cursor was created with the DB_RMW flag
if ((flag & DB_RMW) || dbc_struct_i(c)->rmw)
is_write_op = TRUE;
if (is_write_op)
query_context_init_write(context, c, flag, f, extra);
else
query_context_init_read(context, c, flag, f, extra);
}
static void
c_query_context_destroy(QUERY_CONTEXT context) {
query_context_base_destroy(&context->base);
}
static int
toku_c_getf_first(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
STATUS_VALUE(YDB_C_LAYER_NUM_POINT_QUERIES)++;
int r = 0;
QUERY_CONTEXT_S context; //Describes the context of this query.
c_query_context_init(&context, c, flag, f, extra);
while (r == 0) {
//toku_brt_cursor_first will call c_getf_first_callback(..., context) (if query is successful)
r = toku_brt_cursor_first(dbc_struct_i(c)->c, c_getf_first_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
c_query_context_destroy(&context);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_first_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
if (context->do_locking) {
const DBT *left_key = toku_lt_neg_infinity;
const DBT *right_key = key != NULL ? &found_key : toku_lt_infinity;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_first
return r;
}
static int c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_last(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
STATUS_VALUE(YDB_C_LAYER_NUM_POINT_QUERIES)++;
int r = 0;
QUERY_CONTEXT_S context; //Describes the context of this query.
c_query_context_init(&context, c, flag, f, extra);
while (r == 0) {
//toku_brt_cursor_last will call c_getf_last_callback(..., context) (if query is successful)
r = toku_brt_cursor_last(dbc_struct_i(c)->c, c_getf_last_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
c_query_context_destroy(&context);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_last_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
if (context->do_locking) {
const DBT *left_key = key != NULL ? &found_key : toku_lt_neg_infinity;
const DBT *right_key = toku_lt_infinity;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_last
return r;
}
static int c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_next(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
int r;
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
if (toku_c_uninitialized(c))
r = toku_c_getf_first(c, flag, f, extra);
else {
r = 0;
QUERY_CONTEXT_S context; //Describes the context of this query.
c_query_context_init(&context, c, flag, f, extra);
while (r == 0) {
//toku_brt_cursor_next will call c_getf_next_callback(..., context) (if query is successful)
r = toku_brt_cursor_next(dbc_struct_i(c)->c, c_getf_next_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
c_query_context_destroy(&context);
}
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_next_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
if (context->do_locking) {
const DBT *prevkey, *prevval;
toku_brt_cursor_peek(context->c, &prevkey, &prevval);
const DBT *left_key = prevkey;
const DBT *right_key = key != NULL ? &found_key : toku_lt_infinity;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
STATUS_VALUE(YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES)++; // accountability
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_next
return r;
}
static int c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_prev(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
int r;
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
if (toku_c_uninitialized(c))
r = toku_c_getf_last(c, flag, f, extra);
else {
r = 0;
QUERY_CONTEXT_S context; //Describes the context of this query.
c_query_context_init(&context, c, flag, f, extra);
while (r == 0) {
//toku_brt_cursor_prev will call c_getf_prev_callback(..., context) (if query is successful)
r = toku_brt_cursor_prev(dbc_struct_i(c)->c, c_getf_prev_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
c_query_context_destroy(&context);
}
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_prev_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
if (context->do_locking) {
const DBT *prevkey, *prevval;
toku_brt_cursor_peek(context->c, &prevkey, &prevval);
const DBT *left_key = key != NULL ? &found_key : toku_lt_neg_infinity;
const DBT *right_key = prevkey;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
STATUS_VALUE(YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES)++; // accountability
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_prev
return r;
}
static int c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_current(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
QUERY_CONTEXT_S context; //Describes the context of this query.
STATUS_VALUE(YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES)++; // accountability
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT, c_getf_current_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
c_query_context_destroy(&context);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_current_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
//Call application-layer callback if found.
if (key!=NULL && !lock_only) {
DBT found_key = { .data = (void *) key, .size = keylen };
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
} else
r = 0;
//Give brt-layer an error (if any) to return from toku_brt_cursor_current
return r;
}
static int
toku_c_getf_current_binding(DBC *c, u_int32_t flag, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
QUERY_CONTEXT_S context; //Describes the context of this query.
STATUS_VALUE(YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES)++; // accountability
c_query_context_init(&context, c, flag, f, extra);
//toku_brt_cursor_current will call c_getf_current_callback(..., context) (if query is successful)
int r = toku_brt_cursor_current(dbc_struct_i(c)->c, DB_CURRENT_BINDING, c_getf_current_callback, &context);
if (r == TOKUDB_USER_CALLBACK_ERROR) r = context.base.r_user_callback;
c_query_context_destroy(&context);
return r;
}
static int c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
int
toku_c_getf_set(DBC *c, u_int32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
int r = 0;
QUERY_CONTEXT_WITH_INPUT_S context; //Describes the context of this query.
STATUS_VALUE(YDB_C_LAYER_NUM_POINT_QUERIES)++;
query_context_with_input_init(&context, c, flag, key, NULL, f, extra);
while (r == 0) {
//toku_brt_cursor_set will call c_getf_set_callback(..., context) (if query is successful)
r = toku_brt_cursor_set(dbc_struct_i(c)->c, key, c_getf_set_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
query_context_base_destroy(&context.base);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_set_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT_WITH_INPUT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
//Lock:
// left(key,val) = (input_key, -infinity)
// right(key,val) = (input_key, found ? found_val : infinity)
if (context->do_locking) {
r = start_range_lock(context->db, context->txn, super_context->input_key, super_context->input_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
DBT found_key = { .data = (void *) key, .size = keylen };
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_set
return r;
}
static int c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_set_range(DBC *c, u_int32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
int r = 0;
QUERY_CONTEXT_WITH_INPUT_S context; //Describes the context of this query.
STATUS_VALUE(YDB_C_LAYER_NUM_POINT_QUERIES)++;
query_context_with_input_init(&context, c, flag, key, NULL, f, extra);
while (r == 0) {
//toku_brt_cursor_set_range will call c_getf_set_range_callback(..., context) (if query is successful)
r = toku_brt_cursor_set_range(dbc_struct_i(c)->c, key, c_getf_set_range_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
query_context_base_destroy(&context.base);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_set_range_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT_WITH_INPUT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
//Lock:
// left(key,val) = (input_key, -infinity)
// right(key) = found ? found_key : infinity
// right(val) = found ? found_val : infinity
if (context->do_locking) {
const DBT *left_key = super_context->input_key;
const DBT *right_key = key != NULL ? &found_key : toku_lt_infinity;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_set_range
return r;
}
static int c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool);
static int
toku_c_getf_set_range_reverse(DBC *c, u_int32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
int r = 0;
QUERY_CONTEXT_WITH_INPUT_S context; //Describes the context of this query.
STATUS_VALUE(YDB_C_LAYER_NUM_POINT_QUERIES)++;
query_context_with_input_init(&context, c, flag, key, NULL, f, extra);
while (r == 0) {
//toku_brt_cursor_set_range_reverse will call c_getf_set_range_reverse_callback(..., context) (if query is successful)
r = toku_brt_cursor_set_range_reverse(dbc_struct_i(c)->c, key, c_getf_set_range_reverse_callback, &context);
if (r == DB_LOCK_NOTGRANTED)
r = toku_lock_request_wait_with_default_timeout(&context.base.lock_request, c->dbp->i->lt);
else {
if (r == TOKUDB_USER_CALLBACK_ERROR)
r = context.base.r_user_callback;
break;
}
}
query_context_base_destroy(&context.base);
return r;
}
//result is the result of the query (i.e. 0 means found, DB_NOTFOUND, etc..)
static int
c_getf_set_range_reverse_callback(ITEMLEN keylen, bytevec key, ITEMLEN vallen, bytevec val, void *extra, bool lock_only) {
QUERY_CONTEXT_WITH_INPUT super_context = extra;
QUERY_CONTEXT_BASE context = &super_context->base;
int r;
DBT found_key = { .data = (void *) key, .size = keylen };
//Lock:
// left(key) = found ? found_key : -infinity
// left(val) = found ? found_val : -infinity
// right(key,val) = (input_key, infinity)
if (context->do_locking) {
const DBT *left_key = key != NULL ? &found_key : toku_lt_neg_infinity;
const DBT *right_key = super_context->input_key;
r = start_range_lock(context->db, context->txn, left_key, right_key,
context->is_write_op ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ, &context->lock_request);
} else
r = 0;
//Call application-layer callback if found and locks were successfully obtained.
if (r==0 && key!=NULL && !lock_only) {
DBT found_val = { .data = (void *) val, .size = vallen };
context->r_user_callback = context->f(&found_key, &found_val, context->f_extra);
r = context->r_user_callback;
}
//Give brt-layer an error (if any) to return from toku_brt_cursor_set_range_reverse
return r;
}
// Close a cursor.
// Does not require the ydb lock held when called.
int
toku_c_close(DBC * c) {
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
int r = toku_brt_cursor_close(dbc_struct_i(c)->c);
toku_sdbt_cleanup(&dbc_struct_i(c)->skey_s);
toku_sdbt_cleanup(&dbc_struct_i(c)->sval_s);
#if !TOKUDB_NATIVE_H
toku_free(dbc_struct_i(c));
#endif
toku_free(c);
return r;
}
// these next two static functions are defined
// both here and ydb.c. We should find a good
// place for them.
static int
ydb_getf_do_nothing(DBT const* UU(key), DBT const* UU(val), void* UU(extra)) {
return 0;
}
static inline DBT*
init_dbt_realloc(DBT *dbt) {
memset(dbt, 0, sizeof(*dbt));
dbt->flags = DB_DBT_REALLOC;
return dbt;
}
// Return the number of entries whose key matches the key currently
// pointed to by the brt cursor.
static int
toku_c_count(DBC *cursor, db_recno_t *count, u_int32_t flags) {
HANDLE_PANICKED_DB(cursor->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(cursor);
int r;
DBC *count_cursor = 0;
DBT currentkey;
init_dbt_realloc(&currentkey);
u_int32_t lock_flags = get_cursor_prelocked_flags(flags, cursor);
flags &= ~lock_flags;
if (flags != 0) {
r = EINVAL; goto finish;
}
r = toku_c_get_current_unconditional(cursor, lock_flags, &currentkey, NULL);
if (r != 0) goto finish;
//TODO: Optimization
//if (do_locking) {
// do a lock from currentkey,-infinity to currentkey,infinity
// lock_flags |= DB_PRELOCKED
//}
r = toku_db_cursor_internal(cursor->dbp, dbc_struct_i(cursor)->txn, &count_cursor, DBC_DISABLE_PREFETCHING, 0);
if (r != 0) goto finish;
r = toku_c_getf_set(count_cursor, lock_flags, &currentkey, ydb_getf_do_nothing, NULL);
if (r==0) {
*count = 1; // there is a key, so the count is one (since we don't have DUP dbs anymore, the only answers are 0 or 1.
} else {
*count = 0;
}
r = 0;
finish:
if (currentkey.data) toku_free(currentkey.data);
if (count_cursor) {
int rr = toku_c_close(count_cursor); assert(rr == 0);
}
return r;
}
static int
toku_c_pre_acquire_range_lock(DBC *dbc, const DBT *key_left, const DBT *key_right) {
DB *db = dbc->dbp;
DB_TXN *txn = dbc_struct_i(dbc)->txn;
HANDLE_PANICKED_DB(db);
toku_brt_cursor_set_range_lock(dbc_struct_i(dbc)->c, key_left, key_right,
(key_left == toku_lt_neg_infinity),
(key_right == toku_lt_infinity));
if (!db->i->lt || !txn)
return 0;
//READ_UNCOMMITTED and READ_COMMITTED transactions do not need read locks.
if (!dbc_struct_i(dbc)->rmw && dbc_struct_i(dbc)->iso != TOKU_ISO_SERIALIZABLE)
return 0;
toku_lock_type lock_type = dbc_struct_i(dbc)->rmw ? LOCK_REQUEST_WRITE : LOCK_REQUEST_READ;
int r = get_range_lock(db, txn, key_left, key_right, lock_type);
return r;
}
int
toku_c_get(DBC* c, DBT* key, DBT* val, u_int32_t flag) {
//This function exists for legacy (test compatibility) purposes/parity with bdb.
HANDLE_PANICKED_DB(c->dbp);
HANDLE_CURSOR_ILLEGAL_WORKING_PARENT_TXN(c);
u_int32_t main_flag = get_main_cursor_flag(flag);
u_int32_t remaining_flags = get_nonmain_cursor_flags(flag);
int r;
QUERY_CONTEXT_WRAPPED_S context;
//Passing in NULL for a key or val means that it is NOT an output.
// Both key and val are output:
// query_context_wrapped_init(&context, c, key, val);
// Val is output, key is not:
// query_context_wrapped_init(&context, c, NULL, val);
// Neither key nor val are output:
// query_context_wrapped_init(&context, c, NULL, NULL); // Used for DB_GET_BOTH
switch (main_flag) {
case (DB_FIRST):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_first(c, remaining_flags, c_get_wrapper_callback, &context);
break;
case (DB_LAST):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_last(c, remaining_flags, c_get_wrapper_callback, &context);
break;
case (DB_NEXT):
case (DB_NEXT_NODUP):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_next(c, remaining_flags, c_get_wrapper_callback, &context);
break;
case (DB_PREV):
case (DB_PREV_NODUP):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_prev(c, remaining_flags, c_get_wrapper_callback, &context);
break;
#ifdef DB_PREV_DUP
case (DB_PREV_DUP):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_prev_dup(c, remaining_flags, c_get_wrapper_callback, &context);
break;
#endif
case (DB_CURRENT):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_current(c, remaining_flags, c_get_wrapper_callback, &context);
break;
case (DB_CURRENT_BINDING):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_current_binding(c, remaining_flags, c_get_wrapper_callback, &context);
break;
case (DB_SET):
query_context_wrapped_init(&context, c, NULL, val);
r = toku_c_getf_set(c, remaining_flags, key, c_get_wrapper_callback, &context);
break;
case (DB_SET_RANGE):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_set_range(c, remaining_flags, key, c_get_wrapper_callback, &context);
break;
case (DB_SET_RANGE_REVERSE):
query_context_wrapped_init(&context, c, key, val);
r = toku_c_getf_set_range_reverse(c, remaining_flags, key, c_get_wrapper_callback, &context);
break;
default:
r = EINVAL;
break;
}
return r;
}
int
toku_db_cursor_internal(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int is_temporary_cursor) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
DB_ENV* env = db->dbenv;
if (flags & ~(DB_SERIALIZABLE | DB_INHERIT_ISOLATION | DB_RMW | DBC_DISABLE_PREFETCHING)) {
return toku_ydb_do_error(
env,
EINVAL,
"Invalid flags set for toku_db_cursor\n"
);
}
int r = 0;
struct __toku_dbc_external *XMALLOC(eresult); // so the internal stuff is stuck on the end
memset(eresult, 0, sizeof(*eresult));
DBC *result = &eresult->external_part;
// these methods DO NOT grab the ydb lock
#define SCRS(name) result->name = toku_ ## name
SCRS(c_get);
SCRS(c_count);
SCRS(c_getf_first);
SCRS(c_getf_last);
SCRS(c_getf_next);
SCRS(c_getf_prev);
SCRS(c_getf_current);
SCRS(c_getf_current_binding);
SCRS(c_getf_set);
SCRS(c_getf_set_range);
SCRS(c_getf_set_range_reverse);
SCRS(c_pre_acquire_range_lock);
SCRS(c_close);
#undef SCRS
#if !TOKUDB_NATIVE_H
MALLOC(result->i); // otherwise it is allocated as part of result->ii
assert(result->i);
#endif
result->dbp = db;
dbc_struct_i(result)->txn = txn;
dbc_struct_i(result)->skey_s = (struct simple_dbt){0,0};
dbc_struct_i(result)->sval_s = (struct simple_dbt){0,0};
if (is_temporary_cursor) {
dbc_struct_i(result)->skey = &db->i->skey;
dbc_struct_i(result)->sval = &db->i->sval;
} else {
dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s;
dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s;
}
if (flags & DB_SERIALIZABLE) {
dbc_struct_i(result)->iso = TOKU_ISO_SERIALIZABLE;
} else {
dbc_struct_i(result)->iso = txn ? db_txn_struct_i(txn)->iso : TOKU_ISO_SERIALIZABLE;
}
dbc_struct_i(result)->rmw = (flags & DB_RMW) != 0;
BOOL is_snapshot_read = FALSE;
if (txn) {
is_snapshot_read = (dbc_struct_i(result)->iso == TOKU_ISO_READ_COMMITTED ||
dbc_struct_i(result)->iso == TOKU_ISO_SNAPSHOT);
}
r = toku_brt_cursor(
db->i->brt,
&dbc_struct_i(result)->c,
txn ? db_txn_struct_i(txn)->tokutxn : NULL,
is_snapshot_read,
((flags & DBC_DISABLE_PREFETCHING) != 0)
);
assert(r == 0 || r == TOKUDB_MVCC_DICTIONARY_TOO_NEW);
if (r == 0) {
*c = result;
}
else {
#if !TOKUDB_NATIVE_H
toku_free(result->i); // otherwise it is allocated as part of result->ii
#endif
toku_free(result);
}
return r;
}
static inline int
autotxn_db_cursor(DB *db, DB_TXN *txn, DBC **c, u_int32_t flags) {
if (!txn && (db->dbenv->i->open_flags & DB_INIT_TXN)) {
return toku_ydb_do_error(db->dbenv, EINVAL,
"Cursors in a transaction environment must have transactions.\n");
}
return toku_db_cursor_internal(db, txn, c, flags, 0);
}
// Create a cursor on a db.
// Called without holding the ydb lock.
int
toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, u_int32_t flags) {
int r = autotxn_db_cursor(db, txn, c, flags);
return r;
}
#undef STATUS_VALUE
#include <valgrind/drd.h>
void __attribute__((constructor)) toku_ydb_cursor_drd_ignore(void);
void
toku_ydb_cursor_drd_ignore(void) {
DRD_IGNORE_VAR(ydb_c_layer_status);
}

34
src/ydb_cursor.h Executable file
View file

@ -0,0 +1,34 @@
// This file defines the public interface to the ydb library
#if !defined(TOKU_YDB_CURSOR_H)
#define TOKU_YDB_CURSOR_H
#if defined(__cplusplus)
extern "C" {
#endif
typedef enum {
YDB_C_LAYER_NUM_POINT_QUERIES = 0,
YDB_C_LAYER_NUM_SEQUENTIAL_QUERIES,
YDB_C_LAYER_STATUS_NUM_ROWS /* number of rows in this status array */
} ydb_c_lock_layer_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[YDB_C_LAYER_STATUS_NUM_ROWS];
} YDB_C_LAYER_STATUS_S, *YDB_C_LAYER_STATUS;
void ydb_c_layer_get_status(YDB_C_LAYER_STATUS statp);
int toku_c_get(DBC * c, DBT * key, DBT * data, u_int32_t flag);
int toku_c_getf_set(DBC *c, u_int32_t flag, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra);
int toku_c_close(DBC * c);
int toku_db_cursor_internal(DB *db, DB_TXN * txn, DBC **c, u_int32_t flags, int is_temporary_cursor);
int toku_db_cursor(DB *db, DB_TXN *txn, DBC **c, u_int32_t flags);
#if defined(__cplusplus)
}
#endif
#endif

1179
src/ydb_db.c Normal file

File diff suppressed because it is too large Load diff

84
src/ydb_db.h Normal file
View file

@ -0,0 +1,84 @@
// This file defines the public interface to the ydb library
#if !defined(TOKU_YDB_DB_H)
#define TOKU_YDB_DB_H
#if defined(__cplusplus)
extern "C" {
#endif
typedef enum {
YDB_LAYER_DIRECTORY_WRITE_LOCKS = 0, /* total directory write locks taken */
YDB_LAYER_DIRECTORY_WRITE_LOCKS_FAIL, /* total directory write locks unable to be taken */
YDB_LAYER_LOGSUPPRESS, /* number of times logs are suppressed for empty table (2440) */
YDB_LAYER_LOGSUPPRESS_FAIL, /* number of times unable to suppress logs for empty table (2440) */
YDB_DB_LAYER_STATUS_NUM_ROWS /* number of rows in this status array */
} ydb_db_lock_layer_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[YDB_DB_LAYER_STATUS_NUM_ROWS];
} YDB_DB_LAYER_STATUS_S, *YDB_DB_LAYER_STATUS;
void ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp);
/* db methods */
static inline int db_opened(DB *db) {
return db->i->opened != 0;
}
static inline toku_dbt_cmp
toku_db_get_compare_fun(DB* db) {
return db->i->brt->compare_fun;
}
int toku_db_pre_acquire_fileops_lock(DB *db, DB_TXN *txn);
int db_open_iname(DB * db, DB_TXN * txn, const char *iname, u_int32_t flags, int mode);
int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn, BOOL just_lock);
int toku_db_get (DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags);
int toku_db_create(DB ** db, DB_ENV * env, u_int32_t flags);
int toku_db_close(DB * db, u_int32_t flags);
int db_getf_set(DB *db, DB_TXN *txn, u_int32_t flags, DBT *key, YDB_CALLBACK_FUNCTION f, void *extra);
int autotxn_db_get(DB* db, DB_TXN* txn, DBT* key, DBT* data, u_int32_t flags);
//TODO: DB_AUTO_COMMIT.
//TODO: Nowait only conditionally?
//TODO: NOSYNC change to SYNC if DB_ENV has something in set_flags
static inline int
toku_db_construct_autotxn(DB* db, DB_TXN **txn, BOOL* changed, BOOL force_auto_commit, BOOL holds_ydb_lock) {
assert(db && txn && changed);
DB_ENV* env = db->dbenv;
if (*txn || !(env->i->open_flags & DB_INIT_TXN)) {
*changed = FALSE;
return 0;
}
BOOL nosync = (BOOL)(!force_auto_commit && !(env->i->open_flags & DB_AUTO_COMMIT));
u_int32_t txn_flags = DB_TXN_NOWAIT | (nosync ? DB_TXN_NOSYNC : 0);
int r = toku_txn_begin_internal(env, NULL, txn, txn_flags, 1, holds_ydb_lock);
if (r!=0) return r;
*changed = TRUE;
return 0;
}
static inline int
toku_db_destruct_autotxn(DB_TXN *txn, int r, BOOL changed, BOOL holds_ydb_lock) {
if (!changed) return r;
if (!holds_ydb_lock) toku_ydb_lock();
if (r==0) {
r = toku_txn_commit(txn, 0, NULL, NULL, false);
}
else {
toku_txn_abort(txn, NULL, NULL, false);
}
if (!holds_ydb_lock) toku_ydb_unlock();
return r;
}
#if defined(__cplusplus)
}
#endif
#endif

158
src/ydb_env_func.c Normal file
View file

@ -0,0 +1,158 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2007-2009 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_portability.h>
#include <db.h>
#include "brt-internal.h"
#include "brt-flusher.h"
#include "dlmalloc.h"
#include "checkpoint.h"
#include "brtloader.h"
#include "ydb_env_func.h"
// For test purposes only.
// These callbacks are never used in production code, only as a way to test the system
// (for example, by causing crashes at predictable times).
void (*checkpoint_callback_f)(void*) = NULL;
void * checkpoint_callback_extra = NULL;
void (*checkpoint_callback2_f)(void*) = NULL;
void * checkpoint_callback2_extra = NULL;
uint32_t engine_status_enable = 1; // if zero, suppress engine status output on failed assert, for test programs only
int
db_env_set_func_fsync (int (*fsync_function)(int)) {
return toku_set_func_fsync(fsync_function);
}
int
db_env_set_func_pwrite (ssize_t (*pwrite_function)(int, const void *, size_t, toku_off_t)) {
return toku_set_func_pwrite(pwrite_function);
}
int
db_env_set_func_full_pwrite (ssize_t (*pwrite_function)(int, const void *, size_t, toku_off_t)) {
return toku_set_func_full_pwrite(pwrite_function);
}
int
db_env_set_func_write (ssize_t (*write_function)(int, const void *, size_t)) {
return toku_set_func_write(write_function);
}
int
db_env_set_func_full_write (ssize_t (*write_function)(int, const void *, size_t)) {
return toku_set_func_full_write(write_function);
}
int
db_env_set_func_fdopen (FILE * (*fdopen_function)(int, const char *)) {
return toku_set_func_fdopen(fdopen_function);
}
int
db_env_set_func_fopen (FILE * (*fopen_function)(const char *, const char *)) {
return toku_set_func_fopen(fopen_function);
}
int
db_env_set_func_open (int (*open_function)(const char *, int, int)) {
return toku_set_func_open(open_function);
}
int
db_env_set_func_fclose (int (*fclose_function)(FILE*)) {
return toku_set_func_fclose(fclose_function);
}
int
db_env_set_func_pread (ssize_t (*fun)(int, void *, size_t, off_t)) {
return toku_set_func_pread(fun);
}
void
db_env_set_func_loader_fwrite (size_t (*fwrite_fun)(const void*,size_t,size_t,FILE*)) {
brtloader_set_os_fwrite(fwrite_fun);
}
int
db_env_set_func_malloc (void *(*f)(size_t)) {
toku_set_func_malloc(f);
return 0;
}
int
db_env_set_func_realloc (void *(*f)(void*, size_t)) {
toku_set_func_realloc(f);
return 0;
}
int
db_env_set_func_free (void (*f)(void*)) {
toku_set_func_free(f);
return 0;
}
// Got to call dlmalloc, or else it won't get included.
void
setup_dlmalloc (void) {
db_env_set_func_malloc(dlmalloc);
db_env_set_func_realloc(dlrealloc);
db_env_set_func_free(dlfree);
}
// For test purposes only.
// With this interface, all checkpoint users get the same callbacks and the same extras.
void
db_env_set_checkpoint_callback (void (*callback_f)(void*), void* extra) {
toku_checkpoint_safe_client_lock();
checkpoint_callback_f = callback_f;
checkpoint_callback_extra = extra;
toku_checkpoint_safe_client_unlock();
//printf("set callback = %p, extra = %p\n", callback_f, extra);
}
void
db_env_set_checkpoint_callback2 (void (*callback_f)(void*), void* extra) {
toku_checkpoint_safe_client_lock();
checkpoint_callback2_f = callback_f;
checkpoint_callback2_extra = extra;
toku_checkpoint_safe_client_unlock();
//printf("set callback2 = %p, extra2 = %p\n", callback2_f, extra2);
}
void
db_env_set_recover_callback (void (*callback_f)(void*), void* extra) {
toku_recover_set_callback(callback_f, extra);
}
void
db_env_set_recover_callback2 (void (*callback_f)(void*), void* extra) {
toku_recover_set_callback2(callback_f, extra);
}
void
db_env_set_flusher_thread_callback(void (*callback_f)(int, void*), void* extra) {
toku_flusher_thread_set_callback(callback_f, extra);
}
void
db_env_set_loader_size_factor (uint32_t factor) {
toku_brtloader_set_size_factor(factor);
}
void
db_env_set_mvcc_garbage_collection_verification(u_int32_t verification_mode) {
garbage_collection_debug = (verification_mode != 0);
}
// Purpose: allow test programs that expect to fail to suppress engine status output on failed assert.
void
db_env_enable_engine_status(uint32_t enable) {
engine_status_enable = enable;
}

23
src/ydb_env_func.h Normal file
View file

@ -0,0 +1,23 @@
// This file defines the public interface to the ydb library
#if !defined(TOKU_YDB_ENV_FUNC_H)
#define TOKU_YDB_ENV_FUNC_H
#if defined(__cplusplus)
extern "C" {
#endif
extern void (*checkpoint_callback_f)(void*);
extern void * checkpoint_callback_extra;
extern void (*checkpoint_callback2_f)(void*);
extern void * checkpoint_callback2_extra;
extern uint32_t engine_status_enable;
// Called to use dlmalloc functions.
void setup_dlmalloc(void) __attribute__((__visibility__("default")));
#if defined(__cplusplus)
}
#endif
#endif

106
src/ydb_row_lock.c Executable file
View file

@ -0,0 +1,106 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2007-2009 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <db.h>
#include "ydb-internal.h"
#include "ydb_row_lock.h"
static int
toku_txn_add_lt(DB_TXN* txn, toku_lock_tree* lt) {
int r = ENOSYS;
assert(txn && lt);
toku_mutex_lock(&lt->mgr->mutex);
toku_lth* lth = db_txn_struct_i(txn)->lth;
// we used to initialize the transaction's lth during begin.
// Now we initialize the lth only if the transaction needs the lth, here
if (!lth) {
r = toku_lth_create(&db_txn_struct_i(txn)->lth);
assert_zero(r);
lth = db_txn_struct_i(txn)->lth;
}
toku_lock_tree* find = toku_lth_find(lth, lt);
if (find) {
assert(find == lt);
r = 0;
goto cleanup;
}
r = toku_lth_insert(lth, lt);
if (r != 0) { goto cleanup; }
toku_lt_add_ref(lt);
r = 0;
cleanup:
toku_mutex_unlock(&lt->mgr->mutex);
return r;
}
/*
Used for partial implementation of nested transactions.
Work is done by children as normal, but all locking is done by the
root of the nested txn tree.
This may hold extra locks, and will not work as expected when
a node has two non-completed txns at any time.
*/
static inline DB_TXN*
toku_txn_ancestor(DB_TXN* txn) {
while (txn && txn->parent) txn = txn->parent;
return txn;
}
// Get a range lock.
// Return when the range lock is acquired or the default lock tree timeout has expired.
// The ydb mutex must be held when called and may be released when waiting in the lock tree.
int
get_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT *right_key, toku_lock_type lock_type) {
int r;
DB_TXN *txn_anc = toku_txn_ancestor(txn);
r = toku_txn_add_lt(txn_anc, db->i->lt);
if (r == 0) {
TXNID txn_anc_id = toku_txn_get_txnid(db_txn_struct_i(txn_anc)->tokutxn);
toku_lock_request lock_request;
toku_lock_request_init(&lock_request, db, txn_anc_id, left_key, right_key, lock_type);
r = toku_lt_acquire_lock_request_with_default_timeout(db->i->lt, &lock_request);
toku_lock_request_destroy(&lock_request);
}
return r;
}
// Setup and start an asynchronous lock request.
int
start_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT *right_key, toku_lock_type lock_type, toku_lock_request *lock_request) {
int r;
DB_TXN *txn_anc = toku_txn_ancestor(txn);
r = toku_txn_add_lt(txn_anc, db->i->lt);
if (r == 0) {
TXNID txn_anc_id = toku_txn_get_txnid(db_txn_struct_i(txn_anc)->tokutxn);
toku_lock_request_set(lock_request, db, txn_anc_id, left_key, right_key, lock_type);
r = toku_lock_request_start(lock_request, db->i->lt, true);
}
return r;
}
int
get_point_write_lock(DB *db, DB_TXN *txn, const DBT *key) {
int r = get_range_lock(db, txn, key, key, LOCK_REQUEST_WRITE);
return r;
}
// acquire a point write lock on the key for a given txn.
// this does not block the calling thread.
int
toku_grab_write_lock (DB *db, DBT *key, TOKUTXN tokutxn) {
DB_TXN *txn = toku_txn_get_container_db_txn(tokutxn);
DB_TXN *txn_anc = toku_txn_ancestor(txn);
int r = toku_txn_add_lt(txn_anc, db->i->lt);
if (r == 0) {
TXNID txn_anc_id = toku_txn_get_txnid(db_txn_struct_i(txn_anc)->tokutxn);
r = toku_lt_acquire_write_lock(db->i->lt, db, txn_anc_id, key);
}
return r;
}

27
src/ydb_row_lock.h Executable file
View file

@ -0,0 +1,27 @@
// This file defines the public interface to the ydb library
#if !defined(TOKU_YDB_ROWLOCK_H)
#define TOKU_YDB_ROWLOCK_H
#if defined(__cplusplus)
extern "C" {
#endif
int
get_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT *right_key, toku_lock_type lock_type);
int
start_range_lock(DB *db, DB_TXN *txn, const DBT *left_key, const DBT *right_key, toku_lock_type lock_type, toku_lock_request *lock_request);
int
get_point_write_lock(DB *db, DB_TXN *txn, const DBT *key);
int
toku_grab_write_lock (DB *db, DBT *key, TOKUTXN tokutxn);
#if defined(__cplusplus)
}
#endif
#endif

865
src/ydb_write.c Normal file
View file

@ -0,0 +1,865 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2007-2009 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_portability.h>
#include <toku_pthread.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <db.h>
#include "toku_assert.h"
#include "ydb.h"
#include "ydb-internal.h"
#include "brt-internal.h"
#include "brt-flusher.h"
#include "cachetable.h"
#include "log.h"
#include "memory.h"
#include "dlmalloc.h"
#include "checkpoint.h"
#include "key.h"
#include "loader.h"
#include "indexer.h"
#include "ydb_load.h"
#include "brtloader.h"
#include "log_header.h"
#include "ydb_cursor.h"
#include "ydb_row_lock.h"
#include "ydb_env_func.h"
#include "ydb_write.h"
#include "ydb_db.h"
static YDB_WRITE_LAYER_STATUS_S ydb_write_layer_status;
#ifdef STATUS_VALUE
#undef STATUS_VALUE
#endif
#define STATUS_VALUE(x) ydb_write_layer_status.status[x].value.num
#define STATUS_INIT(k,t,l) { \
ydb_write_layer_status.status[k].keyname = #k; \
ydb_write_layer_status.status[k].type = t; \
ydb_write_layer_status.status[k].legend = l; \
}
static void
ydb_write_layer_status_init (void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(YDB_LAYER_NUM_INSERTS, UINT64, "dictionary inserts");
STATUS_INIT(YDB_LAYER_NUM_INSERTS_FAIL, UINT64, "dictionary inserts fail");
STATUS_INIT(YDB_LAYER_NUM_DELETES, UINT64, "dictionary deletes");
STATUS_INIT(YDB_LAYER_NUM_DELETES_FAIL, UINT64, "dictionary deletes fail");
STATUS_INIT(YDB_LAYER_NUM_UPDATES, UINT64, "dictionary updates");
STATUS_INIT(YDB_LAYER_NUM_UPDATES_FAIL, UINT64, "dictionary updates fail");
STATUS_INIT(YDB_LAYER_NUM_UPDATES_BROADCAST, UINT64, "dictionary broadcast updates");
STATUS_INIT(YDB_LAYER_NUM_UPDATES_BROADCAST_FAIL, UINT64, "dictionary broadcast updates fail");
STATUS_INIT(YDB_LAYER_NUM_MULTI_INSERTS, UINT64, "dictionary multi inserts");
STATUS_INIT(YDB_LAYER_NUM_MULTI_INSERTS_FAIL, UINT64, "dictionary multi inserts fail");
STATUS_INIT(YDB_LAYER_NUM_MULTI_DELETES, UINT64, "dictionary multi deletes");
STATUS_INIT(YDB_LAYER_NUM_MULTI_DELETES_FAIL, UINT64, "dictionary multi deletes fail");
STATUS_INIT(YDB_LAYER_NUM_MULTI_UPDATES, UINT64, "dictionary updates multi");
STATUS_INIT(YDB_LAYER_NUM_MULTI_UPDATES_FAIL, UINT64, "dictionary updates multi fail");
ydb_write_layer_status.initialized = true;
}
#undef STATUS_INIT
void
ydb_write_layer_get_status(YDB_WRITE_LAYER_STATUS statp) {
if (!ydb_write_layer_status.initialized)
ydb_write_layer_status_init();
*statp = ydb_write_layer_status;
}
static inline u_int32_t
get_prelocked_flags(u_int32_t flags) {
u_int32_t lock_flags = flags & (DB_PRELOCKED | DB_PRELOCKED_WRITE);
return lock_flags;
}
// these next two static functions are defined
// both here and ydb.c. We should find a good
// place for them.
static int
ydb_getf_do_nothing(DBT const* UU(key), DBT const* UU(val), void* UU(extra)) {
return 0;
}
// Check if the available file system space is less than the reserve
// Returns ENOSPC if not enough space, othersize 0
static inline int
env_check_avail_fs_space(DB_ENV *env) {
int r = env->i->fs_state == FS_RED ? ENOSPC : 0;
if (r) env->i->enospc_redzone_ctr++;
return r;
}
// Return 0 if proposed pair do not violate size constraints of DB
// (insertion is legal)
// Return non zero otherwise.
static int
db_put_check_size_constraints(DB *db, const DBT *key, const DBT *val) {
int r = 0;
unsigned int klimit, vlimit;
toku_brt_get_maximum_advised_key_value_lengths(&klimit, &vlimit);
if (key->size > klimit) {
r = toku_ydb_do_error(db->dbenv, EINVAL,
"The largest key allowed is %u bytes", klimit);
} else if (val->size > vlimit) {
r = toku_ydb_do_error(db->dbenv, EINVAL,
"The largest value allowed is %u bytes", vlimit);
}
return r;
}
//Return 0 if insert is legal
static int
db_put_check_overwrite_constraint(DB *db, DB_TXN *txn, DBT *key,
u_int32_t lock_flags, u_int32_t overwrite_flag) {
int r;
if (overwrite_flag == 0) { // 0 (yesoverwrite) does not impose constraints.
r = 0;
} else if (overwrite_flag == DB_NOOVERWRITE) {
// Check if (key,anything) exists in dictionary.
// If exists, fail. Otherwise, do insert.
// The DB_RMW flag causes the cursor to grab a write lock instead of a read lock on the key if it exists.
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
if (r == DB_NOTFOUND)
r = 0;
else if (r == 0)
r = DB_KEYEXIST;
//Any other error is passed through.
} else if (overwrite_flag == DB_NOOVERWRITE_NO_ERROR) {
r = 0;
} else {
//Other flags are not (yet) supported.
r = EINVAL;
}
return r;
}
int
toku_db_del(DB *db, DB_TXN *txn, DBT *key, u_int32_t flags, BOOL holds_ydb_lock) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
u_int32_t unchecked_flags = flags;
//DB_DELETE_ANY means delete regardless of whether it exists in the db.
BOOL error_if_missing = (BOOL)(!(flags&DB_DELETE_ANY));
unchecked_flags &= ~DB_DELETE_ANY;
u_int32_t lock_flags = get_prelocked_flags(flags);
unchecked_flags &= ~lock_flags;
BOOL do_locking = (BOOL)(db->i->lt && !(lock_flags&DB_PRELOCKED_WRITE));
int r = 0;
if (unchecked_flags!=0) {
r = EINVAL;
}
if (r == 0 && error_if_missing) {
//Check if the key exists in the db.
r = db_getf_set(db, txn, lock_flags|DB_SERIALIZABLE|DB_RMW, key, ydb_getf_do_nothing, NULL);
}
if (r == 0 && do_locking) {
//Do locking if necessary.
r = get_point_write_lock(db, txn, key);
}
if (r == 0) {
//Do the actual deleting.
if (!holds_ydb_lock) toku_ydb_lock();
r = toku_brt_delete(db->i->brt, key, txn ? db_txn_struct_i(txn)->tokutxn : 0);
if (!holds_ydb_lock) toku_ydb_unlock();
}
if (r == 0) {
STATUS_VALUE(YDB_LAYER_NUM_DELETES)++; // accountability
}
else {
STATUS_VALUE(YDB_LAYER_NUM_DELETES_FAIL)++; // accountability
}
return r;
}
int
toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags, BOOL holds_ydb_lock) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
int r = 0;
u_int32_t lock_flags = get_prelocked_flags(flags);
flags &= ~lock_flags;
r = db_put_check_size_constraints(db, key, val);
if (r == 0) {
//Do any checking required by the flags.
r = db_put_check_overwrite_constraint(db, txn, key, lock_flags, flags);
}
BOOL do_locking = (BOOL)(db->i->lt && !(lock_flags&DB_PRELOCKED_WRITE));
if (r == 0 && do_locking) {
//Do locking if necessary.
r = get_point_write_lock(db, txn, key);
}
if (r == 0) {
//Insert into the brt.
TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL;
enum brt_msg_type type = BRT_INSERT;
if (flags==DB_NOOVERWRITE_NO_ERROR) {
type = BRT_INSERT_NO_OVERWRITE;
}
if (!holds_ydb_lock) toku_ydb_lock();
r = toku_brt_maybe_insert(db->i->brt, key, val, ttxn, FALSE, ZERO_LSN, TRUE, type);
if (!holds_ydb_lock) toku_ydb_unlock();
}
if (r == 0) {
// helgrind flags a race on this status update. we increment it atomically to satisfy helgrind.
// STATUS_VALUE(YDB_LAYER_NUM_INSERTS)++; // accountability
(void) __sync_fetch_and_add(&STATUS_VALUE(YDB_LAYER_NUM_INSERTS), 1);
} else {
// STATUS_VALUE(YDB_LAYER_NUM_INSERTS_FAIL)++; // accountability
(void) __sync_fetch_and_add(&STATUS_VALUE(YDB_LAYER_NUM_INSERTS_FAIL), 1);
}
return r;
}
static int
toku_db_update(DB *db, DB_TXN *txn,
const DBT *key,
const DBT *update_function_extra,
u_int32_t flags) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
int r = 0;
u_int32_t lock_flags = get_prelocked_flags(flags);
flags &= ~lock_flags;
r = db_put_check_size_constraints(db, key, update_function_extra);
if (r != 0) { goto cleanup; }
BOOL do_locking = (db->i->lt && !(lock_flags & DB_PRELOCKED_WRITE));
if (do_locking) {
r = get_point_write_lock(db, txn, key);
if (r != 0) { goto cleanup; }
}
TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL;
toku_ydb_lock();
r = toku_brt_maybe_update(db->i->brt, key, update_function_extra, ttxn,
FALSE, ZERO_LSN, TRUE);
toku_ydb_unlock();
cleanup:
if (r == 0)
STATUS_VALUE(YDB_LAYER_NUM_UPDATES)++; // accountability
else
STATUS_VALUE(YDB_LAYER_NUM_UPDATES_FAIL)++; // accountability
return r;
}
// DB_IS_RESETTING_OP is true if the dictionary should be considered as if created by this transaction.
// For example, it will be true if toku_db_update_broadcast() is used to implement a schema change (such
// as adding a column), and will be false if used simply to update all the rows of a table (such as
// incrementing a field).
static int
toku_db_update_broadcast(DB *db, DB_TXN *txn,
const DBT *update_function_extra,
u_int32_t flags) {
HANDLE_PANICKED_DB(db);
HANDLE_DB_ILLEGAL_WORKING_PARENT_TXN(db, txn);
int r = 0;
u_int32_t lock_flags = get_prelocked_flags(flags);
flags &= ~lock_flags;
u_int32_t is_resetting_op_flag = flags & DB_IS_RESETTING_OP;
flags &= is_resetting_op_flag;
BOOL is_resetting_op = (is_resetting_op_flag != 0);
if (is_resetting_op) {
if (txn->parent != NULL) {
r = EINVAL; // cannot have a parent if you are a resetting op
goto cleanup;
}
r = toku_db_pre_acquire_fileops_lock(db, txn);
if (r != 0) { goto cleanup; }
}
{
DBT null_key;
toku_init_dbt(&null_key);
r = db_put_check_size_constraints(db, &null_key, update_function_extra);
if (r != 0) { goto cleanup; }
}
BOOL do_locking = (db->i->lt && !(lock_flags & DB_PRELOCKED_WRITE));
if (do_locking) {
r = toku_db_pre_acquire_table_lock(db, txn, TRUE);
if (r != 0) { goto cleanup; }
}
TOKUTXN ttxn = txn ? db_txn_struct_i(txn)->tokutxn : NULL;
toku_ydb_lock();
r = toku_brt_maybe_update_broadcast(db->i->brt, update_function_extra, ttxn,
FALSE, ZERO_LSN, TRUE, is_resetting_op);
toku_ydb_unlock();
cleanup:
if (r == 0)
STATUS_VALUE(YDB_LAYER_NUM_UPDATES_BROADCAST)++; // accountability
else
STATUS_VALUE(YDB_LAYER_NUM_UPDATES_BROADCAST_FAIL)++; // accountability
return r;
}
static int
log_del_single(DB_TXN *txn, BRT brt, const DBT *key) {
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
int r = toku_brt_log_del(ttxn, brt, key);
return r;
}
static uint32_t
sum_size(uint32_t num_keys, DBT keys[], uint32_t overhead) {
uint32_t sum = 0;
for (uint32_t i = 0; i < num_keys; i++)
sum += keys[i].size + overhead;
return sum;
}
static int
log_del_multiple(DB_TXN *txn, DB *src_db, const DBT *key, const DBT *val, uint32_t num_dbs, BRT brts[], DBT keys[]) {
int r = 0;
if (num_dbs > 0) {
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
BRT src_brt = src_db ? src_db->i->brt : NULL;
uint32_t del_multiple_size = key->size + val->size + num_dbs*sizeof (uint32_t) + toku_log_enq_delete_multiple_overhead;
uint32_t del_single_sizes = sum_size(num_dbs, keys, toku_log_enq_delete_any_overhead);
if (del_single_sizes < del_multiple_size) {
for (uint32_t i = 0; r == 0 && i < num_dbs; i++)
r = log_del_single(txn, brts[i], &keys[i]);
} else {
r = toku_brt_log_del_multiple(ttxn, src_brt, brts, num_dbs, key, val);
}
}
return r;
}
static uint32_t
lookup_src_db(uint32_t num_dbs, DB *db_array[], DB *src_db) {
uint32_t which_db;
for (which_db = 0; which_db < num_dbs; which_db++)
if (db_array[which_db] == src_db)
break;
return which_db;
}
static int
do_del_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT keys[], DB *src_db, const DBT *src_key) {
src_db = src_db; src_key = src_key;
int r = 0;
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
for (uint32_t which_db = 0; r == 0 && which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
// if db is being indexed by an indexer, then insert a delete message into the db if the src key is to the left or equal to the
// indexers cursor. we have to get the src_db from the indexer and find it in the db_array.
int do_delete = TRUE;
DB_INDEXER *indexer = toku_db_get_indexer(db);
if (indexer) { // if this db is the index under construction
DB *indexer_src_db = toku_indexer_get_src_db(indexer);
invariant(indexer_src_db != NULL);
const DBT *indexer_src_key;
if (src_db == indexer_src_db)
indexer_src_key = src_key;
else {
uint32_t which_src_db = lookup_src_db(num_dbs, db_array, indexer_src_db);
invariant(which_src_db < num_dbs);
indexer_src_key = &keys[which_src_db];
}
do_delete = !toku_indexer_is_key_right_of_le_cursor(indexer, indexer_src_db, indexer_src_key);
}
if (r == 0 && do_delete) {
r = toku_brt_maybe_delete(db->i->brt, &keys[which_db], ttxn, FALSE, ZERO_LSN, FALSE);
}
}
return r;
}
int
env_del_multiple(
DB_ENV *env,
DB *src_db,
DB_TXN *txn,
const DBT *src_key,
const DBT *src_val,
uint32_t num_dbs,
DB **db_array,
DBT *keys,
uint32_t *flags_array)
{
int r;
DBT del_keys[num_dbs];
HANDLE_PANICKED_ENV(env);
if (!txn) {
r = EINVAL;
goto cleanup;
}
if (!env->i->generate_row_for_del) {
r = EINVAL;
goto cleanup;
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
{
uint32_t lock_flags[num_dbs];
uint32_t remaining_flags[num_dbs];
BRT brts[num_dbs];
for (uint32_t which_db = 0; which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
lock_flags[which_db] = get_prelocked_flags(flags_array[which_db]);
remaining_flags[which_db] = flags_array[which_db] & ~lock_flags[which_db];
if (db == src_db) {
del_keys[which_db] = *src_key;
}
else {
//Generate the key
r = env->i->generate_row_for_del(db, src_db, &keys[which_db], src_key, src_val);
if (r != 0) goto cleanup;
del_keys[which_db] = keys[which_db];
}
if (remaining_flags[which_db] & ~DB_DELETE_ANY) {
r = EINVAL;
goto cleanup;
}
BOOL error_if_missing = (BOOL)(!(remaining_flags[which_db]&DB_DELETE_ANY));
if (error_if_missing) {
//Check if the key exists in the db.
r = db_getf_set(db, txn, lock_flags[which_db]|DB_SERIALIZABLE|DB_RMW, &del_keys[which_db], ydb_getf_do_nothing, NULL);
if (r != 0) goto cleanup;
}
//Do locking if necessary.
if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) {
//Needs locking
r = get_point_write_lock(db, txn, &del_keys[which_db]);
if (r != 0) goto cleanup;
}
brts[which_db] = db->i->brt;
}
toku_ydb_lock();
if (num_dbs == 1) {
r = log_del_single(txn, brts[0], &del_keys[0]);
}
else {
r = log_del_multiple(txn, src_db, src_key, src_val, num_dbs, brts, del_keys);
}
if (r == 0)
r = do_del_multiple(txn, num_dbs, db_array, del_keys, src_db, src_key);
}
toku_ydb_unlock();
cleanup:
if (r == 0)
STATUS_VALUE(YDB_LAYER_NUM_MULTI_DELETES) += num_dbs; // accountability
else
STATUS_VALUE(YDB_LAYER_NUM_MULTI_DELETES_FAIL) += num_dbs; // accountability
return r;
}
static int
log_put_single(DB_TXN *txn, BRT brt, const DBT *key, const DBT *val) {
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
int r = toku_brt_log_put(ttxn, brt, key, val);
return r;
}
static int
log_put_multiple(DB_TXN *txn, DB *src_db, const DBT *src_key, const DBT *src_val, uint32_t num_dbs, BRT brts[]) {
int r = 0;
if (num_dbs > 0) {
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
BRT src_brt = src_db ? src_db->i->brt : NULL;
r = toku_brt_log_put_multiple(ttxn, src_brt, brts, num_dbs, src_key, src_val);
}
return r;
}
static int
do_put_multiple(DB_TXN *txn, uint32_t num_dbs, DB *db_array[], DBT keys[], DBT vals[], DB *src_db, const DBT *src_key) {
int r = 0;
TOKUTXN ttxn = db_txn_struct_i(txn)->tokutxn;
for (uint32_t which_db = 0; r == 0 && which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
// if db is being indexed by an indexer, then put into that db if the src key is to the left or equal to the
// indexers cursor. we have to get the src_db from the indexer and find it in the db_array.
int do_put = TRUE;
DB_INDEXER *indexer = toku_db_get_indexer(db);
if (indexer) { // if this db is the index under construction
DB *indexer_src_db = toku_indexer_get_src_db(indexer);
invariant(indexer_src_db != NULL);
const DBT *indexer_src_key;
if (src_db == indexer_src_db)
indexer_src_key = src_key;
else {
uint32_t which_src_db = lookup_src_db(num_dbs, db_array, indexer_src_db);
invariant(which_src_db < num_dbs);
indexer_src_key = &keys[which_src_db];
}
do_put = !toku_indexer_is_key_right_of_le_cursor(indexer, indexer_src_db, indexer_src_key);
}
if (r == 0 && do_put) {
r = toku_brt_maybe_insert(db->i->brt, &keys[which_db], &vals[which_db], ttxn, FALSE, ZERO_LSN, FALSE, BRT_INSERT);
}
}
return r;
}
static int
env_put_multiple_internal(
DB_ENV *env,
DB *src_db,
DB_TXN *txn,
const DBT *src_key,
const DBT *src_val,
uint32_t num_dbs,
DB **db_array,
DBT *keys,
DBT *vals,
uint32_t *flags_array)
{
int r;
DBT put_keys[num_dbs];
DBT put_vals[num_dbs];
HANDLE_PANICKED_ENV(env);
uint32_t lock_flags[num_dbs];
uint32_t remaining_flags[num_dbs];
BRT brts[num_dbs];
if (!txn || !num_dbs) {
r = EINVAL;
goto cleanup;
}
if (!env->i->generate_row_for_put) {
r = EINVAL;
goto cleanup;
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
for (uint32_t which_db = 0; which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
lock_flags[which_db] = get_prelocked_flags(flags_array[which_db]);
remaining_flags[which_db] = flags_array[which_db] & ~lock_flags[which_db];
//Generate the row
if (db == src_db) {
put_keys[which_db] = *src_key;
put_vals[which_db] = *src_val;
}
else {
r = env->i->generate_row_for_put(db, src_db, &keys[which_db], &vals[which_db], src_key, src_val);
if (r != 0) goto cleanup;
put_keys[which_db] = keys[which_db];
put_vals[which_db] = vals[which_db];
}
// check size constraints
r = db_put_check_size_constraints(db, &put_keys[which_db], &put_vals[which_db]);
if (r != 0) goto cleanup;
//Check overwrite constraints
r = db_put_check_overwrite_constraint(db, txn,
&put_keys[which_db],
lock_flags[which_db], remaining_flags[which_db]);
if (r != 0) goto cleanup;
if (remaining_flags[which_db] == DB_NOOVERWRITE_NO_ERROR) {
//put_multiple does not support delaying the no error, since we would
//have to log the flag in the put_multiple.
r = EINVAL; goto cleanup;
}
//Do locking if necessary.
if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) {
//Needs locking
r = get_point_write_lock(db, txn, &put_keys[which_db]);
if (r != 0) goto cleanup;
}
brts[which_db] = db->i->brt;
}
toku_ydb_lock();
if (num_dbs == 1) {
r = log_put_single(txn, brts[0], &put_keys[0], &put_vals[0]);
}
else {
r = log_put_multiple(txn, src_db, src_key, src_val, num_dbs, brts);
}
if (r == 0) {
r = do_put_multiple(txn, num_dbs, db_array, put_keys, put_vals, src_db, src_key);
}
toku_ydb_unlock();
cleanup:
if (r == 0)
STATUS_VALUE(YDB_LAYER_NUM_MULTI_INSERTS) += num_dbs; // accountability
else
STATUS_VALUE(YDB_LAYER_NUM_MULTI_INSERTS_FAIL) += num_dbs; // accountability
return r;
}
int
env_update_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn,
DBT *old_src_key, DBT *old_src_data,
DBT *new_src_key, DBT *new_src_data,
uint32_t num_dbs, DB **db_array, uint32_t* flags_array,
uint32_t num_keys, DBT keys[],
uint32_t num_vals, DBT vals[]) {
int r = 0;
HANDLE_PANICKED_ENV(env);
if (!txn) {
r = EINVAL;
goto cleanup;
}
if (!env->i->generate_row_for_put) {
r = EINVAL;
goto cleanup;
}
HANDLE_ILLEGAL_WORKING_PARENT_TXN(env, txn);
{
uint32_t n_del_dbs = 0;
DB *del_dbs[num_dbs];
BRT del_brts[num_dbs];
DBT del_keys[num_dbs];
uint32_t n_put_dbs = 0;
DB *put_dbs[num_dbs];
BRT put_brts[num_dbs];
DBT put_keys[num_dbs];
DBT put_vals[num_dbs];
uint32_t lock_flags[num_dbs];
uint32_t remaining_flags[num_dbs];
for (uint32_t which_db = 0; which_db < num_dbs; which_db++) {
DB *db = db_array[which_db];
DBT curr_old_key, curr_new_key, curr_new_val;
lock_flags[which_db] = get_prelocked_flags(flags_array[which_db]);
remaining_flags[which_db] = flags_array[which_db] & ~lock_flags[which_db];
// keys[0..num_dbs-1] are the new keys
// keys[num_dbs..2*num_dbs-1] are the old keys
// vals[0..num_dbs-1] are the new vals
// Generate the old key and val
if (which_db + num_dbs >= num_keys) {
r = ENOMEM; goto cleanup;
}
if (db == src_db) {
curr_old_key = *old_src_key;
}
else {
r = env->i->generate_row_for_put(db, src_db, &keys[which_db + num_dbs], NULL, old_src_key, old_src_data);
if (r != 0) goto cleanup;
curr_old_key = keys[which_db + num_dbs];
}
// Generate the new key and val
if (which_db >= num_keys || which_db >= num_vals) {
r = ENOMEM; goto cleanup;
}
if (db == src_db) {
curr_new_key = *new_src_key;
curr_new_val = *new_src_data;
}
else {
r = env->i->generate_row_for_put(db, src_db, &keys[which_db], &vals[which_db], new_src_key, new_src_data);
if (r != 0) goto cleanup;
curr_new_key = keys[which_db];
curr_new_val = vals[which_db];
}
toku_dbt_cmp cmpfun = toku_db_get_compare_fun(db);
BOOL key_eq = cmpfun(db, &curr_old_key, &curr_new_key) == 0;
if (!key_eq) {
//Check overwrite constraints only in the case where
// the keys are not equal.
// If the keys are equal, then we do not care of the flag is DB_NOOVERWRITE or 0
r = db_put_check_overwrite_constraint(db, txn,
&curr_new_key,
lock_flags[which_db], remaining_flags[which_db]);
if (r != 0) goto cleanup;
if (remaining_flags[which_db] == DB_NOOVERWRITE_NO_ERROR) {
//update_multiple does not support delaying the no error, since we would
//have to log the flag in the put_multiple.
r = EINVAL; goto cleanup;
}
// lock old key
if (db->i->lt && !(lock_flags[which_db] & DB_PRELOCKED_WRITE)) {
r = get_point_write_lock(db, txn, &curr_old_key);
if (r != 0) goto cleanup;
}
del_dbs[n_del_dbs] = db;
del_brts[n_del_dbs] = db->i->brt;
del_keys[n_del_dbs] = curr_old_key;
n_del_dbs++;
}
// we take a shortcut and avoid generating the old val
// we assume that any new vals with size > 0 are different than the old val
// if (!key_eq || !(dbt_cmp(&vals[which_db], &vals[which_db + num_dbs]) == 0)) {
if (!key_eq || curr_new_val.size > 0) {
r = db_put_check_size_constraints(db, &curr_new_key, &curr_new_val);
if (r != 0) goto cleanup;
// lock new key
if (db->i->lt) {
r = get_point_write_lock(db, txn, &curr_new_key);
if (r != 0) goto cleanup;
}
put_dbs[n_put_dbs] = db;
put_brts[n_put_dbs] = db->i->brt;
put_keys[n_put_dbs] = curr_new_key;
put_vals[n_put_dbs] = curr_new_val;
n_put_dbs++;
}
}
// grab the ydb lock for the actual work that
// depends on it
toku_ydb_lock();
if (r == 0 && n_del_dbs > 0) {
if (n_del_dbs == 1)
r = log_del_single(txn, del_brts[0], &del_keys[0]);
else
r = log_del_multiple(txn, src_db, old_src_key, old_src_data, n_del_dbs, del_brts, del_keys);
if (r == 0)
r = do_del_multiple(txn, n_del_dbs, del_dbs, del_keys, src_db, old_src_key);
}
if (r == 0 && n_put_dbs > 0) {
if (n_put_dbs == 1)
r = log_put_single(txn, put_brts[0], &put_keys[0], &put_vals[0]);
else
r = log_put_multiple(txn, src_db, new_src_key, new_src_data, n_put_dbs, put_brts);
if (r == 0)
r = do_put_multiple(txn, n_put_dbs, put_dbs, put_keys, put_vals, src_db, new_src_key);
}
toku_ydb_unlock();
}
cleanup:
if (r == 0)
STATUS_VALUE(YDB_LAYER_NUM_MULTI_UPDATES) += num_dbs; // accountability
else
STATUS_VALUE(YDB_LAYER_NUM_MULTI_UPDATES_FAIL) += num_dbs; // accountability
return r;
}
int
autotxn_db_del(DB* db, DB_TXN* txn, DBT* key, u_int32_t flags) {
BOOL changed; int r;
r = toku_db_construct_autotxn(db, &txn, &changed, FALSE, FALSE);
if (r!=0) return r;
r = toku_db_del(db, txn, key, flags, FALSE);
return toku_db_destruct_autotxn(txn, r, changed, FALSE);
}
int
autotxn_db_put(DB* db, DB_TXN* txn, DBT* key, DBT* data, u_int32_t flags) {
//{ unsigned i; printf("put %p keylen=%d key={", db, key->size); for(i=0; i<key->size; i++) printf("%d,", ((char*)key->data)[i]); printf("} datalen=%d data={", data->size); for(i=0; i<data->size; i++) printf("%d,", ((char*)data->data)[i]); printf("}\n"); }
BOOL changed; int r;
r = env_check_avail_fs_space(db->dbenv);
if (r != 0) { goto cleanup; }
r = toku_db_construct_autotxn(db, &txn, &changed, FALSE, FALSE);
if (r!=0) {
goto cleanup;
}
r = toku_db_put(db, txn, key, data, flags, FALSE);
r = toku_db_destruct_autotxn(txn, r, changed, FALSE);
cleanup:
return r;
}
int
autotxn_db_update(DB *db, DB_TXN *txn,
const DBT *key,
const DBT *update_function_extra,
u_int32_t flags) {
BOOL changed; int r;
r = env_check_avail_fs_space(db->dbenv);
if (r != 0) { goto cleanup; }
r = toku_db_construct_autotxn(db, &txn, &changed, FALSE, FALSE);
if (r != 0) { return r; }
r = toku_db_update(db, txn, key, update_function_extra, flags);
r = toku_db_destruct_autotxn(txn, r, changed, FALSE);
cleanup:
return r;
}
int
autotxn_db_update_broadcast(DB *db, DB_TXN *txn,
const DBT *update_function_extra,
u_int32_t flags) {
BOOL changed; int r;
r = env_check_avail_fs_space(db->dbenv);
if (r != 0) { goto cleanup; }
r = toku_db_construct_autotxn(db, &txn, &changed, FALSE, FALSE);
if (r != 0) { return r; }
r = toku_db_update_broadcast(db, txn, update_function_extra, flags);
r = toku_db_destruct_autotxn(txn, r, changed, FALSE);
cleanup:
return r;
}
int
env_put_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, const DBT *src_key, const DBT *src_val, uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) {
int r = env_check_avail_fs_space(env);
if (r == 0) {
r = env_put_multiple_internal(env, src_db, txn, src_key, src_val, num_dbs, db_array, keys, vals, flags_array);
}
return r;
}
int
toku_ydb_check_avail_fs_space(DB_ENV *env) {
int rval = env_check_avail_fs_space(env);
return rval;
}
#undef STATUS_VALUE
#include <valgrind/drd.h>
void __attribute__((constructor)) toku_ydb_write_drd_ignore(void);
void
toku_ydb_write_drd_ignore(void) {
DRD_IGNORE_VAR(ydb_write_layer_status);
}

82
src/ydb_write.h Normal file
View file

@ -0,0 +1,82 @@
// This file defines the public interface to the ydb library
#if !defined(TOKU_YDB_WRITE_H)
#define TOKU_YDB_WRITE_H
#if defined(__cplusplus)
extern "C" {
#endif
typedef enum {
YDB_LAYER_NUM_INSERTS = 0,
YDB_LAYER_NUM_INSERTS_FAIL,
YDB_LAYER_NUM_DELETES,
YDB_LAYER_NUM_DELETES_FAIL,
YDB_LAYER_NUM_UPDATES,
YDB_LAYER_NUM_UPDATES_FAIL,
YDB_LAYER_NUM_UPDATES_BROADCAST,
YDB_LAYER_NUM_UPDATES_BROADCAST_FAIL,
YDB_LAYER_NUM_MULTI_INSERTS,
YDB_LAYER_NUM_MULTI_INSERTS_FAIL,
YDB_LAYER_NUM_MULTI_DELETES,
YDB_LAYER_NUM_MULTI_DELETES_FAIL,
YDB_LAYER_NUM_MULTI_UPDATES,
YDB_LAYER_NUM_MULTI_UPDATES_FAIL,
YDB_WRITE_LAYER_STATUS_NUM_ROWS /* number of rows in this status array */
} ydb_write_lock_layer_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[YDB_WRITE_LAYER_STATUS_NUM_ROWS];
} YDB_WRITE_LAYER_STATUS_S, *YDB_WRITE_LAYER_STATUS;
void ydb_write_layer_get_status(YDB_WRITE_LAYER_STATUS statp);
int toku_db_del(DB *db, DB_TXN *txn, DBT *key, u_int32_t flags, BOOL holds_ydb_lock);
int toku_db_put(DB *db, DB_TXN *txn, DBT *key, DBT *val, u_int32_t flags, BOOL holds_ydb_lock);
int autotxn_db_del(DB* db, DB_TXN* txn, DBT* key, u_int32_t flags);
int autotxn_db_put(DB* db, DB_TXN* txn, DBT* key, DBT* data, u_int32_t flags);
int autotxn_db_update(DB *db, DB_TXN *txn, const DBT *key, const DBT *update_function_extra, u_int32_t flags);
int autotxn_db_update_broadcast(DB *db, DB_TXN *txn, const DBT *update_function_extra, u_int32_t flags);
int env_put_multiple(
DB_ENV *env,
DB *src_db,
DB_TXN *txn,
const DBT *src_key, const DBT *src_val,
uint32_t num_dbs,
DB **db_array,
DBT *keys, DBT *vals,
uint32_t *flags_array
);
int env_del_multiple(
DB_ENV *env,
DB *src_db,
DB_TXN *txn,
const DBT *src_key,
const DBT *src_val,
uint32_t num_dbs,
DB **db_array,
DBT *keys,
uint32_t *flags_array
);
int env_update_multiple(
DB_ENV *env,
DB *src_db,
DB_TXN *txn,
DBT *old_src_key, DBT *old_src_data,
DBT *new_src_key, DBT *new_src_data,
uint32_t num_dbs,
DB **db_array,
uint32_t* flags_array,
uint32_t num_keys, DBT keys[],
uint32_t num_vals, DBT vals[]
);
#if defined(__cplusplus)
}
#endif
#endif