diff --git a/src/lock_tree/Makefile b/src/lock_tree/Makefile index 122fca128b8..911f55b6d07 100644 --- a/src/lock_tree/Makefile +++ b/src/lock_tree/Makefile @@ -6,7 +6,7 @@ LIBNAME=liblocktree OPTFLAGS = #-O2 # GCOV_FLAGS = -fprofile-arcs -ftest-coverage CFLAGS = -W -Wall -Werror -g3 -ggdb3 -fPIC $(OPTFLAGS) $(GCOV_FLAGS) -CPPFLAGS = -I. -I.. -I../range_tree -I../../include -I../../newbrt +CPPFLAGS = -I. -I.. -I../range_tree -I../../include -I../../newbrt -L../range_tree CPPFLAGS += -D_GNU_SOURCE -D_THREAD_SAFE -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE ifeq ($(OSX),OSX) @@ -33,7 +33,7 @@ locktree.o: locktree.c locktree.h gcc $(CFLAGS) $(CPPFLAGS) -c -DTOKU_LINEAR_RANGE_TREE $< -o $@ $(LIBNAME).$(LIBEXT): locktree.o - cc $(CPPFLAGS) $< $(SHARED) -o $@ $(CFLAGS) -lz + cc $(CPPFLAGS) $< $(SHARED) -o $@ $(CFLAGS) -lrangetreelinear -$(LIBNAME).a: log.o +$(LIBNAME).a: locktree.o $(AR) rv $@ $< diff --git a/src/lock_tree/locktree.c b/src/lock_tree/locktree.c index 8f6520eb2ff..447fb1f7e1f 100644 --- a/src/lock_tree/locktree.c +++ b/src/lock_tree/locktree.c @@ -6,13 +6,15 @@ #include #include #include -#include +//TODO: Modify toku_lt_panic to use the panic thingy from DB static int __toku_lt_panic(toku_lock_tree *tree, int r) { tree->panicked = TRUE; return r; } +const unsigned __toku_default_buflen = 2; + static const DBT __toku_lt_infinity; static const DBT __toku_lt_neg_infinity; @@ -29,7 +31,7 @@ static int __toku_infinite_compare(void* a, void* b) { assert(FALSE); } -static BOOL __toku_lt_is_infinite(void* p) { +static BOOL __toku_lt_is_infinite(const void* p) { return (p == toku_lt_infinity) || (p == toku_lt_neg_infinity); } @@ -62,7 +64,7 @@ int __toku_lt_point_cmp(void* a, void* b) { be the sole determinant of the comparison */ return __toku_infinite_compare(x->key_payload, y->key_payload); } - partial_result = x->lt->db->i->brt->compare_fun(x->lt->db, + partial_result = x->lt->compare_fun(x->lt->db, __toku_recreate_DBT(&point_1, x->key_payload, x->key_len), __toku_recreate_DBT(&point_2, y->key_payload, y->key_len)); if (partial_result) return partial_result; @@ -73,27 +75,28 @@ int __toku_lt_point_cmp(void* a, void* b) { __toku_lt_is_infinite(y->data_payload)) { return __toku_infinite_compare(x->data_payload, y->data_payload); } - return x->lt->db->i->brt->dup_compare(x->lt->db, + return x->lt->dup_compare(x->lt->db, __toku_recreate_DBT(&point_1, x->data_payload, x->data_len), __toku_recreate_DBT(&point_2, y->data_payload, y->data_len)); } -static int __toku_p_free(toku_point* point) { +static int __toku_p_free(toku_lock_tree* tree, toku_point* point) { assert(point); if (!__toku_lt_is_infinite(point->key_payload)) { - toku_free(point->key_payload); + tree->free(point->key_payload); } if (!__toku_lt_is_infinite(point->data_payload)) { - toku_free(point->data_payload); + tree->free(point->data_payload); } - toku_free(point); + tree->free(point); return 0; } /* Allocate and copy the payload. */ -static int __toku_payload_copy(void** payload_out, u_int32_t* len_out, +static int __toku_payload_copy(toku_lock_tree* tree, + void** payload_out, u_int32_t* len_out, void* payload_in, u_int32_t len_in) { assert(payload_out && len_out); if (__toku_lt_is_infinite(payload_in)) { @@ -106,7 +109,7 @@ static int __toku_payload_copy(void** payload_out, u_int32_t* len_out, *len_out = 0; } else { - *payload_out = malloc(len_in); + *payload_out = tree->malloc(len_in); if (!*payload_out) return errno; *len_out = len_in; memcpy(payload_out, payload_in, len_in); @@ -114,32 +117,34 @@ static int __toku_payload_copy(void** payload_out, u_int32_t* len_out, return 0; } -static int __toku_p_makecopy(void** ppoint) { +static int __toku_p_makecopy(toku_lock_tree* tree, void** ppoint) { assert(ppoint); toku_point* point = *(toku_point**)ppoint; toku_point* temp_point = NULL; int r; - temp_point = (toku_point*)toku_malloc(sizeof(toku_point)); + temp_point = (toku_point*)tree->malloc(sizeof(toku_point)); if (0) { died1: - toku_free(temp_point); + tree->free(temp_point); return r; } if (!temp_point) return errno; memcpy(temp_point, point, sizeof(toku_point)); - r = __toku_payload_copy(&temp_point->key_payload, &temp_point->key_len, + r = __toku_payload_copy(tree, + &temp_point->key_payload, &temp_point->key_len, point->key_payload, point->key_len); if (0) { died2: if (!__toku_lt_is_infinite(temp_point->key_payload)) { - toku_free(temp_point->key_payload); + tree->free(temp_point->key_payload); } goto died1; } if (r!=0) goto died1; - __toku_payload_copy(&temp_point->data_payload, &temp_point->data_len, + __toku_payload_copy(tree, + &temp_point->data_payload, &temp_point->data_len, point->data_payload, point->data_len); if (r!=0) goto died2; *ppoint = temp_point; @@ -332,10 +337,11 @@ static int __toku_lt_check_borderwrite_conflict(toku_lock_tree* tree, return 0; } -static void __toku_payload_from_dbt(void** payload, u_int32_t* len, DBT* dbt) { +static void __toku_payload_from_dbt(void** payload, u_int32_t* len, + const DBT* dbt) { assert(payload && len && dbt); if (__toku_lt_is_infinite(dbt)) { - *payload = dbt; + *payload = (void*)dbt; *len = 0; } else if (!dbt->data || !dbt->size) { @@ -349,7 +355,7 @@ static void __toku_payload_from_dbt(void** payload, u_int32_t* len, DBT* dbt) { } static void __toku_init_point(toku_point* point, toku_lock_tree* tree, - DBT* key, DBT* data) { + const DBT* key, const DBT* data) { assert(point && tree && key && data); memset(point, 0, sizeof(toku_point)); point->lt = tree; @@ -381,12 +387,6 @@ static void __toku_init_insert(toku_range* to_insert, to_insert->data = txn; } -static BOOL __toku_db_is_dupsort(DB* db) { - unsigned int brtflags; - toku_brt_get_flags(db->i->brt, &brtflags); - return (brtflags & TOKU_DB_DUPSORT) != 0; -} - static void __toku_lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert, BOOL* alloc_left, BOOL* alloc_right, unsigned numfound) { @@ -417,7 +417,7 @@ static void __toku_lt_extend_extreme(toku_lock_tree* tree,toku_range* to_insert, } } -static int __toku_lt_alloc_extreme(toku_range* to_insert, +static int __toku_lt_alloc_extreme(toku_lock_tree* tree, toku_range* to_insert, BOOL alloc_left, BOOL* alloc_right) { assert(to_insert && alloc_right); BOOL copy_left = FALSE; @@ -429,17 +429,17 @@ static int __toku_lt_alloc_extreme(toku_range* to_insert, copy_left = TRUE; } if (alloc_left) { - r = __toku_p_makecopy(&to_insert->left); + r = __toku_p_makecopy(tree, &to_insert->left); if (0) { died1: - if (alloc_left) __toku_p_free(to_insert->left); + if (alloc_left) __toku_p_free(tree, to_insert->left); return r; } if (r!=0) return r; } if (*alloc_right) { assert(!copy_left); - r = __toku_p_makecopy(&to_insert->right); + r = __toku_p_makecopy(tree, &to_insert->right); if (r!=0) goto died1; } else if (copy_left) to_insert->right = to_insert->left; @@ -473,11 +473,11 @@ static void __toku_lt_free_points(toku_lock_tree* tree, toku_range* to_insert, if (tree->buf[i].right != tree->buf[i].left && tree->buf[i].right != to_insert->left && tree->buf[i].right != to_insert->right) { - __toku_p_free(tree->buf[i].right); + __toku_p_free(tree, tree->buf[i].right); } if (tree->buf[i].left != to_insert->left && tree->buf[i].left != to_insert->right) { - __toku_p_free(tree->buf[i].left); + __toku_p_free(tree, tree->buf[i].left); } } } @@ -536,11 +536,11 @@ static int __toku_consolidate(toku_lock_tree* tree, numfound); /* Allocate the consolidated range */ - r = __toku_lt_alloc_extreme(to_insert, alloc_left, &alloc_right); + r = __toku_lt_alloc_extreme(tree, to_insert, alloc_left, &alloc_right); if (0) { died1: - if (alloc_left) __toku_p_free(to_insert->left); - if (alloc_right) __toku_p_free(to_insert->right); + if (alloc_left) __toku_p_free(tree, to_insert->left); + if (alloc_right) __toku_p_free(tree, to_insert->right); return r; } if (r!=0) return r; @@ -565,7 +565,7 @@ static int __toku_consolidate(toku_lock_tree* tree, goto died1; } if (r!=0) { - r = __toku_lt_panic(tree, r); + if (numfound) r = __toku_lt_panic(tree, r); goto died1; } assert(tree->mainread); @@ -573,29 +573,79 @@ static int __toku_consolidate(toku_lock_tree* tree, /* Insert extreme range into mainread. */ r = toku_rt_insert(tree->mainread, to_insert); if (r!=0) { - r = __toku_lt_panic(tree, r); + if (numfound) r = __toku_lt_panic(tree, r); goto died2; } return 0; } -int toku_lt_create(toku_lock_tree** ptree, DB* db) { - if (!ptree || !db) return EINVAL; +static void __toku_lt_free_contents(toku_lock_tree* tree, toku_range_tree* rt) { + assert(tree); + if (!rt) return; + int r; - toku_lock_tree* temp_tree = (toku_lock_tree*)toku_malloc(sizeof(*temp_tree)); + toku_range query; + toku_point left; + toku_point right; + __toku_init_point(&left, tree, (DBT*)toku_lt_neg_infinity, + tree->duplicates ? (DBT*)toku_lt_neg_infinity : NULL); + __toku_init_point(&right, tree, (DBT*)toku_lt_infinity, + tree->duplicates ? (DBT*)toku_lt_infinity : NULL); + __toku_init_query(&query, &left, &right); + + unsigned numfound; + r = toku_rt_find(rt, &query, 0, &tree->buf, &tree->buflen, &numfound); + if (r!=0) { + /* + To free space the fast way, we need to allocate more space. + Since we can't, we free the slow way. + We do not optimize this, we take one entry at a time, + delete it from the tree, and then free the memory. + */ + do { + r = toku_rt_find(rt, &query, 1, &tree->buf, &tree->buflen, + &numfound); + assert(r==0); + if (!numfound) break; + assert(numfound == 1); + r = toku_rt_delete(rt, &tree->buf[0]); + assert(r==0); + __toku_lt_free_points(tree, &query, numfound); + } while (TRUE); + } + else __toku_lt_free_points(tree, &query, numfound); + r = toku_rt_close(rt); + assert(r == 0); +} + +int toku_lt_create(toku_lock_tree** ptree, DB* db, BOOL duplicates, + int (*compare_fun)(DB*,const DBT*,const DBT*), + int (*dup_compare)(DB*,const DBT*,const DBT*), + void* (*user_malloc) (size_t), + void (*user_free) (void*), + void* (*user_realloc)(void*, size_t)) { + if (!ptree || !db || !compare_fun || !dup_compare) return EINVAL; + int r; + + toku_lock_tree* temp_tree =(toku_lock_tree*)user_malloc(sizeof(*temp_tree)); if (0) { died1: - free(temp_tree); + user_free(temp_tree); return r; } if (!temp_tree) return errno; memset(temp_tree, 0, sizeof(*temp_tree)); - temp_tree->db = db; - temp_tree->duplicates = __toku_db_is_dupsort(db); + temp_tree->db = db; + temp_tree->duplicates = duplicates; + temp_tree->compare_fun = compare_fun; + temp_tree->dup_compare = dup_compare; + temp_tree->malloc = user_malloc; + temp_tree->free = user_free; + temp_tree->realloc = user_realloc; r = toku_rt_create(&temp_tree->mainread, __toku_lt_point_cmp, __toku_lt_txn_cmp, TRUE, - toku_malloc, toku_free, toku_realloc); + user_malloc, user_free, user_realloc); if (0) { died2: toku_rt_close(temp_tree->mainread); @@ -604,7 +654,7 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db) { if (r!=0) goto died1; r = toku_rt_create(&temp_tree->borderwrite, __toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE, - toku_malloc, toku_free, toku_realloc); + user_malloc, user_free, user_realloc); if (0) { died3: toku_rt_close(temp_tree->borderwrite); @@ -614,44 +664,52 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db) { //TODO: Remove this, and use multiples per transaction r = toku_rt_create(&temp_tree->selfwrite, __toku_lt_point_cmp, __toku_lt_txn_cmp, FALSE, - toku_malloc, toku_free, toku_realloc); + user_malloc, user_free, user_realloc); assert(temp_tree->selfwrite); //TODO: Remove this, and use multiples per transaction r = toku_rt_create(&temp_tree->selfread, __toku_lt_point_cmp, __toku_lt_txn_cmp, TRUE, - toku_malloc, toku_free, toku_realloc); + user_malloc, user_free, user_realloc); assert(temp_tree->selfread); - temp_tree->buflen = __toku_default_buflen; - /* Using malloc here because range trees do not use toku_malloc/free. */ temp_tree->buf = (toku_range*) - malloc(temp_tree->buflen * sizeof(toku_range)); + user_malloc(temp_tree->buflen * sizeof(toku_range)); if (!temp_tree->buf) { r = errno; goto died3; } //TODO: Create list of selfreads //TODO: Create list of selfwrites - assert(FALSE); //Not implemented yet. + *ptree = temp_tree; + return 0; } int toku_lt_close(toku_lock_tree* tree) { if (!tree) return EINVAL; - //TODO: Free all memory held by things inside of trees! - //TODO: Close mainread, borderwrite, all selfreads, all selfwrites, - //TODO: remove selfreads and selfwrites from txns. - //TODO: Free buf; - assert(FALSE); //Not implemented yet. + int r; + r = toku_rt_close(tree->mainread); + assert(r==0); + r = toku_rt_close(tree->borderwrite); + assert(r==0); +//TODO: Do this to ALLLLLLLL selfread and ALLLLLL selfwrite tables. + /* Free all points referred to in a range tree (and close the tree). */ + __toku_lt_free_contents(tree,__toku_lt_ifexist_selfread (tree, (DB_TXN*)1)); + __toku_lt_free_contents(tree,__toku_lt_ifexist_selfwrite(tree, (DB_TXN*)1)); +//TODO: After freeing the tree, need to remove it from both lists!!! +// One list probably IN the transaction, and one additional one here. + tree->free(tree->buf); + tree->free(tree); + return 0; } int toku_lt_acquire_read_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key, DBT* data) { + const DBT* key, const DBT* data) { return toku_lt_acquire_range_read_lock(tree, txn, key, data, key, data); } int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key_left, DBT* data_left, - DBT* key_right, DBT* data_right) { + const DBT* key_left, const DBT* data_left, + const DBT* key_right, const DBT* data_right) { if (!tree || !txn || !key_left || !key_right) return EINVAL; if (!tree->duplicates && ( data_left || data_right)) return EINVAL; if (tree->duplicates && (!data_left || !data_right)) return EINVAL; @@ -708,7 +766,7 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn, } int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key, DBT* data) { + const DBT* key, const DBT* data) { if (!tree || !txn || !key) return EINVAL; if (!tree->duplicates && data) return EINVAL; if (tree->duplicates && !data) return EINVAL; @@ -782,10 +840,10 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, This is a point, and if merging was possible it would have been dominated by selfwrite. */ - r = __toku_p_makecopy(&to_insert.left); + r = __toku_p_makecopy(tree, &to_insert.left); if (0) { died1: - __toku_p_free(to_insert.left); + __toku_p_free(tree, to_insert.left); return __toku_lt_panic(tree, r); } to_insert.right = to_insert.left; @@ -858,8 +916,8 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, } int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key_left, DBT* data_left, - DBT* key_right, DBT* data_right) { + const DBT* key_left, const DBT* data_left, + const DBT* key_right, const DBT* data_right) { if (!tree || !txn || !key_left || !key_right) return EINVAL; if (!tree->duplicates && ( data_left || data_right)) return EINVAL; if (tree->duplicates && (!data_left || !data_right)) return EINVAL; diff --git a/src/lock_tree/locktree.h b/src/lock_tree/locktree.h index a2261e69bf5..1d8574158e8 100644 --- a/src/lock_tree/locktree.h +++ b/src/lock_tree/locktree.h @@ -20,13 +20,20 @@ typedef struct { toku_range* buf; unsigned buflen; BOOL panicked; + int (*compare_fun)(DB*,const DBT*,const DBT*); + int (*dup_compare)(DB*,const DBT*,const DBT*); + /** The user malloc function */ + void* (*malloc) (size_t); + /** The user free function */ + void (*free) (void*); + /** The user realloc function */ + void* (*realloc)(void*, size_t); } toku_lock_tree; -#warning TODO: Handle 'panicked' variable in every api call. + extern const DBT* const toku_lt_infinity; extern const DBT* const toku_lt_neg_infinity; -const unsigned __toku_default_buflen = 2; /* * key_data = (void*)toku_lt_infinity is how we refer to the infinities. */ @@ -50,13 +57,14 @@ int __toku_lt_point_cmp(void* a, void* b); /* * Create a lock tree. Should be called only inside DB->open. * Params: - * ptree: We set *ptree to the newly allocated tree. - * db: This is the db that the lock tree will be performing locking for. - * We will get flags to determine if it is a nodup or dupsort db. - * db should NOT be opened yet, but should have the DB_DUPSORT flag - * set appropriately. - * We will use the key compare (and if appropriate the data compare) - * functions in order to understand ranges. + * ptree: We set *ptree to the newly allocated tree. + * db: This is the db that the lock tree will be performing locking for. + * duplicates whether the db supports duplicates. + * compare_fun the key compare function. + * dup_compare the data compare function. + \param user_malloc A user provided malloc(3) function. + \param user_free A user provided free(3) function. + \param user_realloc A user provided realloc(3) function. * Returns: * 0: Success * EINVAL: If (ptree == NULL || db == NULL). @@ -67,7 +75,13 @@ int __toku_lt_point_cmp(void* a, void* b); * The EINVAL cases described will use assert to abort instead of returning errors. * If this library is ever exported to users, we will use error datas instead. */ -int toku_lt_create(toku_lock_tree** ptree, DB* db); +int toku_lt_create(toku_lock_tree** ptree, DB* db, BOOL duplicates, + int (*compare_fun)(DB*,const DBT*,const DBT*), + int (*dup_compare)(DB*,const DBT*,const DBT*), + void* (*user_malloc) (size_t), + void (*user_free) (void*), + void* (*user_realloc)(void*, size_t)); + /* * Closes/Frees a lock tree. @@ -76,7 +90,6 @@ int toku_lt_create(toku_lock_tree** ptree, DB* db); * Returns: * 0: Success. * EINVAL: If (tree == NULL) - * May return other errors due to system calls. * Asserts: * The EINVAL cases described will use assert to abort instead of returning errors. * If this library is ever exported to users, we will use error datas instead. @@ -118,7 +131,8 @@ int toku_lt_close(toku_lock_tree* tree); * to its local memory. * *** Note that txn == NULL is not supported at this time. */ -int toku_lt_acquire_read_lock(toku_lock_tree* tree, DB_TXN* txn, DBT* key, DBT* data); +int toku_lt_acquire_read_lock(toku_lock_tree* tree, DB_TXN* txn, + const DBT* key, const DBT* data); //In BDB, txn can actually be NULL (mixed operations with transactions and no transactions). //This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use @@ -163,8 +177,8 @@ int toku_lt_acquire_read_lock(toku_lock_tree* tree, DB_TXN* txn, DBT* key, DBT* * *** Note that txn == NULL is not supported at this time. */ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key_left, DBT* data_left, - DBT* key_right, DBT* data_right); + const DBT* key_left, const DBT* data_left, + const DBT* key_right, const DBT* data_right); //In BDB, txn can actually be NULL (mixed operations with transactions and no transactions). //This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use @@ -196,7 +210,8 @@ int toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB_TXN* txn, * to its local memory. * *** Note that txn == NULL is not supported at this time. */ -int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, DBT* key, DBT* data); +int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, + const DBT* key, const DBT* data); //In BDB, txn can actually be NULL (mixed operations with transactions and no transactions). //This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use @@ -246,8 +261,8 @@ int toku_lt_acquire_write_lock(toku_lock_tree* tree, DB_TXN* txn, DBT* key, DBT* * *** Note that txn == NULL is not supported at this time. */ int toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB_TXN* txn, - DBT* key_left, DBT* data_left, - DBT* key_right, DBT* data_right); + const DBT* key_left, const DBT* data_left, + const DBT* key_right, const DBT* data_right); //In BDB, txn can actually be NULL (mixed operations with transactions and no transactions). //This can cause conflicts, I was unable (so far) to verify that MySQL does or does not use