fixes #5723 refs #5801 merge 5723 to main and merge some last-minute 5801 fixes:

- widely adopt the use of ybt abstractions instead of raw dbt management. TODO: the loader and the descriptor are still manually managed.
- remove templates from the perf framework in favor of explicit key size checks that more accurately describe "How Keys/Vals Work", and prevent type related bugs.
- also: removed some dead code, refs #5101



git-svn-id: file:///svn/toku/tokudb@51665 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
John Esmet 2013-04-17 00:01:25 -04:00 committed by Yoni Fogel
parent b32f4f5247
commit ea0736aa1c
12 changed files with 156 additions and 316 deletions

View file

@ -67,43 +67,13 @@ hot_set_highest_key(struct hot_flusher_extra *flusher)
// The max current key will be NULL if we are traversing in the
// rightmost subtree of a given parent. As such, we don't want to
// allocate memory for this case.
if (flusher->max_current_key.data == NULL) {
if (flusher->highest_pivot_key.data) {
toku_free(flusher->highest_pivot_key.data);
}
flusher->highest_pivot_key.data = NULL;
} else {
toku_destroy_dbt(&flusher->highest_pivot_key);
if (flusher->max_current_key.data != NULL) {
// Otherwise, let's copy all the contents from one key to the other.
void *source = flusher->max_current_key.data;
void *destination = flusher->highest_pivot_key.data;
uint32_t size = flusher->max_current_key.size;
destination = toku_xrealloc(destination, size);
memcpy(destination, source, size);
// Finish copying all fields from the max current key.
// Add free here.
toku_fill_dbt(&(flusher->highest_pivot_key), destination, size);
toku_clone_dbt(&flusher->highest_pivot_key, flusher->max_current_key);
}
}
// Copies the pivot key in the parent to the given DBT key, using the
// pivot corresponding to the given child.
static void
hot_set_key(DBT *key, FTNODE parent, int childnum)
{
// assert that childnum is less than number of children - 1.
DBT *pivot = &parent->childkeys[childnum];
void *data = key->data;
uint32_t size = pivot->size;
data = toku_xrealloc(data, size);
memcpy(data, pivot->data, size);
toku_fill_dbt(key, data, size);
}
static int
hot_just_pick_child(FT h,
FTNODE parent,
@ -137,7 +107,8 @@ hot_update_flusher_keys(FTNODE parent,
// Update maximum current key if the child is NOT the rightmost
// child node.
if (childnum < (parent->n_children - 1)) {
hot_set_key(&flusher->max_current_key, parent, childnum);
toku_destroy_dbt(&flusher->max_current_key);
toku_clone_dbt(&flusher->max_current_key, parent->childkeys[childnum]);
}
}
@ -227,13 +198,8 @@ hot_flusher_init(struct flusher_advice *advice,
static void
hot_flusher_destroy(struct hot_flusher_extra *flusher)
{
if (flusher->highest_pivot_key.data) {
toku_free(flusher->highest_pivot_key.data);
}
if (flusher->max_current_key.data) {
toku_free(flusher->max_current_key.data);
}
toku_destroy_dbt(&flusher->highest_pivot_key);
toku_destroy_dbt(&flusher->max_current_key);
}
// Entry point for Hot Optimize Table (HOT). Note, this function is
@ -254,9 +220,7 @@ toku_ft_hot_optimize(FT_HANDLE brt,
// start of HOT operation
(void) toku_sync_fetch_and_add(&STATUS_VALUE(FT_HOT_NUM_STARTED), 1);
{
toku_ft_note_hot_begin(brt);
}
toku_ft_note_hot_begin(brt);
// Higher level logic prevents a dictionary from being deleted or
// truncated during a hot optimize operation. Doing so would violate
@ -297,10 +261,7 @@ toku_ft_hot_optimize(FT_HANDLE brt,
// Initialize the maximum current key. We need to do this for
// every traversal.
if (flusher.max_current_key.data) {
toku_free(flusher.max_current_key.data);
}
flusher.max_current_key.data = NULL;
toku_destroy_dbt(&flusher.max_current_key);
flusher.sub_tree_size = 1.0;
flusher.percentage_done = 0.0;

View file

@ -78,7 +78,7 @@ struct ftnode_fetch_extra {
// used in the case where type == ftnode_fetch_subset
// parameters needed to find out which child needs to be decompressed (so it can be read)
ft_search_t* search;
DBT *range_lock_left_key, *range_lock_right_key;
DBT range_lock_left_key, range_lock_right_key;
bool left_is_neg_infty, right_is_pos_infty;
// states if we should try to aggressively fetch basement nodes
// that are not specifically needed for current query,
@ -721,8 +721,8 @@ static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h)
bfe->type = ftnode_fetch_all;
bfe->h = h;
bfe->search = NULL;
bfe->range_lock_left_key = NULL;
bfe->range_lock_right_key = NULL;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
bfe->left_is_neg_infty = false;
bfe->right_is_pos_infty = false;
bfe->child_to_read = -1;
@ -754,8 +754,14 @@ static inline void fill_bfe_for_subset_read(
bfe->type = ftnode_fetch_subset;
bfe->h = h;
bfe->search = search;
bfe->range_lock_left_key = (left->data ? left : NULL);
bfe->range_lock_right_key = (right->data ? right : NULL);
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
if (left) {
toku_copyref_dbt(&bfe->range_lock_left_key, *left);
}
if (right) {
toku_copyref_dbt(&bfe->range_lock_right_key, *right);
}
bfe->left_is_neg_infty = left_is_neg_infty;
bfe->right_is_pos_infty = right_is_pos_infty;
bfe->child_to_read = -1;
@ -776,8 +782,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
bfe->type = ftnode_fetch_none;
bfe->h = h;
bfe->search = NULL;
bfe->range_lock_left_key = NULL;
bfe->range_lock_right_key = NULL;
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
bfe->left_is_neg_infty = false;
bfe->right_is_pos_infty = false;
bfe->child_to_read = -1;
@ -789,18 +795,8 @@ static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
paranoid_invariant(bfe->type == ftnode_fetch_prefetch);
if (bfe->range_lock_left_key != NULL) {
toku_free(bfe->range_lock_left_key->data);
toku_destroy_dbt(bfe->range_lock_left_key);
toku_free(bfe->range_lock_left_key);
bfe->range_lock_left_key = NULL;
}
if (bfe->range_lock_right_key != NULL) {
toku_free(bfe->range_lock_right_key->data);
toku_destroy_dbt(bfe->range_lock_right_key);
toku_free(bfe->range_lock_right_key);
bfe->range_lock_right_key = NULL;
}
toku_destroy_dbt(&bfe->range_lock_left_key);
toku_destroy_dbt(&bfe->range_lock_right_key);
}
// this is in a strange place because it needs the cursor struct to be defined
@ -811,21 +807,15 @@ static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
bfe->type = ftnode_fetch_prefetch;
bfe->h = h;
bfe->search = NULL;
{
const DBT *left = &c->range_lock_left_key;
const DBT *right = &c->range_lock_right_key;
if (left->data) {
XMALLOC(bfe->range_lock_left_key);
toku_fill_dbt(bfe->range_lock_left_key, toku_xmemdup(left->data, left->size), left->size);
} else {
bfe->range_lock_left_key = NULL;
}
if (right->data) {
XMALLOC(bfe->range_lock_right_key);
toku_fill_dbt(bfe->range_lock_right_key, toku_xmemdup(right->data, right->size), right->size);
} else {
bfe->range_lock_right_key = NULL;
}
toku_init_dbt(&bfe->range_lock_left_key);
toku_init_dbt(&bfe->range_lock_right_key);
const DBT *left = &c->range_lock_left_key;
if (left->data) {
toku_clone_dbt(&bfe->range_lock_left_key, *left);
}
const DBT *right = &c->range_lock_right_key;
if (right->data) {
toku_clone_dbt(&bfe->range_lock_right_key, *right);
}
bfe->left_is_neg_infty = c->left_is_neg_infty;
bfe->right_is_pos_infty = c->right_is_pos_infty;

View file

@ -602,10 +602,10 @@ toku_bfe_leftmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch);
if (bfe->left_is_neg_infty) {
return 0;
} else if (bfe->range_lock_left_key == NULL) {
} else if (bfe->range_lock_left_key.data == nullptr) {
return -1;
} else {
return toku_ftnode_which_child(node, bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
return toku_ftnode_which_child(node, &bfe->range_lock_left_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
}
}
@ -615,10 +615,10 @@ toku_bfe_rightmost_child_wanted(struct ftnode_fetch_extra *bfe, FTNODE node)
paranoid_invariant(bfe->type == ftnode_fetch_subset || bfe->type == ftnode_fetch_prefetch);
if (bfe->right_is_pos_infty) {
return node->n_children - 1;
} else if (bfe->range_lock_right_key == NULL) {
} else if (bfe->range_lock_right_key.data == nullptr) {
return -1;
} else {
return toku_ftnode_which_child(node, bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
return toku_ftnode_which_child(node, &bfe->range_lock_right_key, &bfe->h->cmp_descriptor, bfe->h->compare_fun);
}
}
@ -627,7 +627,7 @@ ft_cursor_rightmost_child_wanted(FT_CURSOR cursor, FT_HANDLE brt, FTNODE node)
{
if (cursor->right_is_pos_infty) {
return node->n_children - 1;
} else if (cursor->range_lock_right_key.data == NULL) {
} else if (cursor->range_lock_right_key.data == nullptr) {
return -1;
} else {
return toku_ftnode_which_child(node, &cursor->range_lock_right_key, &brt->ft->cmp_descriptor, brt->ft->compare_fun);
@ -1322,7 +1322,7 @@ ft_compare_pivot(DESCRIPTOR desc, ft_compare_func cmp, const DBT *key, const DBT
void toku_destroy_ftnode_internals(FTNODE node)
{
for (int i=0; i<node->n_children-1; i++) {
toku_free(node->childkeys[i].data);
toku_destroy_dbt(&node->childkeys[i]);
}
toku_free(node->childkeys);
node->childkeys = NULL;
@ -1345,16 +1345,11 @@ void toku_destroy_ftnode_internals(FTNODE node)
}
toku_free(node->bp);
node->bp = NULL;
}
/* Frees a node, including all the stuff in the hash table. */
void toku_ftnode_free (FTNODE *nodep) {
//TODO: #1378 Take omt lock (via ftnode) around call to toku_omt_destroy().
FTNODE node=*nodep;
void toku_ftnode_free(FTNODE *nodep) {
FTNODE node = *nodep;
if (node->height == 0) {
for (int i = 0; i < node->n_children; i++) {
if (BP_STATE(node,i) == PT_AVAIL) {
@ -1368,7 +1363,7 @@ void toku_ftnode_free (FTNODE *nodep) {
}
toku_destroy_ftnode_internals(node);
toku_free(node);
*nodep=0;
*nodep = nullptr;
}
void
@ -1505,7 +1500,7 @@ init_childinfo(FTNODE node, int childnum, FTNODE child) {
static void
init_childkey(FTNODE node, int childnum, const DBT *pivotkey) {
toku_copy_dbt(&node->childkeys[childnum], *pivotkey);
toku_clone_dbt(&node->childkeys[childnum], *pivotkey);
node->totalchildkeylens += pivotkey->size;
}
@ -2017,31 +2012,6 @@ static void ft_nonleaf_cmd_once_to_child(ft_compare_func compare_fun, DESCRIPTOR
*/
int toku_ftnode_which_child(FTNODE node, const DBT *k,
DESCRIPTOR desc, ft_compare_func cmp) {
#define DO_PIVOT_SEARCH_LR 0
#if DO_PIVOT_SEARCH_LR
int i;
for (i=0; i<node->n_children-1; i++) {
int c = ft_compare_pivot(desc, cmp, k, d, &node->childkeys[i]);
if (c > 0) continue;
if (c < 0) return i;
return i;
}
return node->n_children-1;
#else
#endif
#define DO_PIVOT_SEARCH_RL 0
#if DO_PIVOT_SEARCH_RL
// give preference for appending to the dictionary. no change for
// random keys
int i;
for (i = node->n_children-2; i >= 0; i--) {
int c = ft_compare_pivot(desc, cmp, k, d, &node->childkeys[i]);
if (c > 0) return i+1;
}
return 0;
#endif
#define DO_PIVOT_BIN_SEARCH 1
#if DO_PIVOT_BIN_SEARCH
// a funny case of no pivots
if (node->n_children <= 1) return 0;
@ -2068,7 +2038,6 @@ int toku_ftnode_which_child(FTNODE node, const DBT *k,
return mi;
}
return lo;
#endif
}
// Used for HOT.
@ -3905,10 +3874,8 @@ void toku_ft_handle_create(FT_HANDLE *ft_handle_ptr) {
static inline void
ft_cursor_cleanup_dbts(FT_CURSOR c) {
if (c->key.data) toku_free(c->key.data);
if (c->val.data) toku_free(c->val.data);
memset(&c->key, 0, sizeof(c->key));
memset(&c->val, 0, sizeof(c->val));
toku_destroy_dbt(&c->key);
toku_destroy_dbt(&c->val);
}
//
@ -3981,12 +3948,6 @@ int toku_ft_cursor (
}
}
FT_CURSOR XCALLOC(cursor);
#if 0
// if this cursor is to do read_committed fetches, then the txn objects must be valid.
if (cursor == 0)
return ENOMEM;
memset(cursor, 0, sizeof(*cursor));
#endif
cursor->ft_handle = brt;
cursor->prefetching = false;
toku_init_dbt(&cursor->range_lock_left_key);
@ -4108,36 +4069,6 @@ is_le_val_del(LEAFENTRY le, FT_CURSOR ftcursor) {
return rval;
}
static const DBT zero_dbt = {0,0,0,0};
static void search_save_bound (ft_search_t *search, DBT *pivot) {
if (search->have_pivot_bound) {
toku_free(search->pivot_bound.data);
}
search->pivot_bound = zero_dbt;
search->pivot_bound.data = toku_malloc(pivot->size);
search->pivot_bound.size = pivot->size;
memcpy(search->pivot_bound.data, pivot->data, pivot->size);
search->have_pivot_bound = true;
}
static bool search_pivot_is_bounded (ft_search_t *search, DESCRIPTOR desc, ft_compare_func cmp, DBT *pivot) __attribute__((unused));
static bool search_pivot_is_bounded (ft_search_t *search, DESCRIPTOR desc, ft_compare_func cmp, DBT *pivot)
// Effect: Return true iff the pivot has already been searched (for fixing #3522.)
// If searching from left to right, if we have already searched all the values less than pivot, we don't want to search again.
// If searching from right to left, if we have already searched all the vlaues greater than pivot, we don't want to search again.
{
if (!search->have_pivot_bound) return true; // isn't bounded.
FAKE_DB(db, desc);
int comp = cmp(&db, pivot, &search->pivot_bound);
if (search->direction == FT_SEARCH_LEFT) {
// searching from left to right. If the comparison function says the pivot is <= something we already compared, don't do it again.
return comp>0;
} else {
return comp<0;
}
}
struct store_fifo_offset_extra {
int32_t *offsets;
int i;
@ -4726,10 +4657,8 @@ got_a_good_value:
ft_cursor_cleanup_dbts(ftcursor);
if (!ftcursor->is_temporary) {
ftcursor->key.data = toku_memdup(key, keylen);
ftcursor->val.data = toku_memdup(val, vallen);
ftcursor->key.size = keylen;
ftcursor->val.size = vallen;
toku_memdup_dbt(&ftcursor->key, key, keylen);
toku_memdup_dbt(&ftcursor->val, val, vallen);
}
//The search was successful. Prefetching can continue.
*doprefetch = true;
@ -4755,10 +4684,6 @@ ft_search_node (
bool can_bulk_fetch
);
// the number of nodes to prefetch
#define TOKU_DO_PREFETCH 1
#if TOKU_DO_PREFETCH
static int
ftnode_fetch_callback_and_free_bfe(CACHEFILE cf, PAIR p, int fd, BLOCKNUM nodename, uint32_t fullhash, void **ftnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs)
{
@ -4781,12 +4706,14 @@ ftnode_pf_callback_and_free_bfe(void *ftnode_pv, void* disk_data, void *read_ext
static void
ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcursor, bool *doprefetch) {
// the number of nodes to prefetch
const int num_nodes_to_prefetch = 1;
// if we want to prefetch in the tree
// then prefetch the next children if there are any
if (*doprefetch && ft_cursor_prefetching(ftcursor) && !ftcursor->disable_prefetching) {
int rc = ft_cursor_rightmost_child_wanted(ftcursor, brt, node);
for (int i = childnum + 1; (i <= childnum + TOKU_DO_PREFETCH) && (i <= rc); i++) {
for (int i = childnum + 1; (i <= childnum + num_nodes_to_prefetch) && (i <= rc); i++) {
BLOCKNUM nextchildblocknum = BP_BLOCKNUM(node, i);
uint32_t nextfullhash = compute_child_fullhash(brt->ft->cf, node, i);
struct ftnode_fetch_extra *MALLOC(bfe);
@ -4812,8 +4739,6 @@ ft_node_maybe_prefetch(FT_HANDLE brt, FTNODE node, int childnum, FT_CURSOR ftcur
}
}
#endif
struct unlock_ftnode_extra {
FT_HANDLE ft_handle;
FTNODE node;
@ -4887,12 +4812,10 @@ ft_search_child(FT_HANDLE brt, FTNODE node, int childnum, ft_search_t *search, F
int r = ft_search_node(brt, childnode, search, bfe.child_to_read, getf, getf_v, doprefetch, ftcursor, &next_unlockers, &next_ancestors, bounds, can_bulk_fetch);
if (r!=TOKUDB_TRY_AGAIN) {
#if TOKU_DO_PREFETCH
// maybe prefetch the next child
if (r == 0 && node->height == 1) {
ft_node_maybe_prefetch(brt, node, childnum, ftcursor, doprefetch);
}
#endif
assert(next_unlockers.locked);
if (msgs_applied) {
@ -4937,8 +4860,6 @@ toku_ft_search_which_child(
ft_search_t *search
)
{
#define DO_SEARCH_WHICH_CHILD_BINARY 1
#if DO_SEARCH_WHICH_CHILD_BINARY
if (node->n_children <= 1) return 0;
DBT pivotkey;
@ -4972,7 +4893,7 @@ toku_ft_search_which_child(
}
// ready to return something, if the pivot is bounded, we have to move
// over a bit to get away from what we've already searched
if (search->have_pivot_bound) {
if (search->pivot_bound.data != nullptr) {
FAKE_DB(db, desc);
if (search->direction == FT_SEARCH_LEFT) {
while (lo < node->n_children - 1 &&
@ -4994,30 +4915,6 @@ toku_ft_search_which_child(
}
}
return lo;
#endif
#define DO_SEARCH_WHICH_CHILD_LINEAR 0
#if DO_SEARCH_WHICH_CHILD_LINEAR
int c;
DBT pivotkey;
toku_init_dbt(&pivotkey);
/* binary search is overkill for a small array */
int child[node->n_children];
/* scan left to right or right to left depending on the search direction */
for (c = 0; c < node->n_children; c++) {
child[c] = (search->direction == FT_SEARCH_LEFT) ? c : node->n_children - 1 - c;
}
for (c = 0; c < node->n_children-1; c++) {
int p = (search->direction == FT_SEARCH_LEFT) ? child[c] : child[c] - 1;
toku_copy_dbt(&pivotkey, node->childkeys[p]);
if (search_pivot_is_bounded(search, desc, cmp, &pivotkey) && search->compare(search, &pivotkey)) {
return child[c];
}
}
/* check the first (left) or last (right) node if nothing has been found */
return child[c];
#endif
}
static void
@ -5028,7 +4925,8 @@ maybe_search_save_bound(
{
int p = (search->direction == FT_SEARCH_LEFT) ? child_searched : child_searched - 1;
if (p >= 0 && p < node->n_children-1) {
search_save_bound(search, &node->childkeys[p]);
toku_destroy_dbt(&search->pivot_bound);
toku_clone_dbt(&search->pivot_bound, node->childkeys[p]);
}
}

View file

@ -50,8 +50,7 @@ typedef struct ft_search {
// There also remains a potential thrashing problem. When we get a TOKUDB_TRY_AGAIN, we unpin everything. There's
// no guarantee that we will get everything pinned again. We ought to keep nodes pinned when we retry, except that on the
// way out with a DB_NOTFOUND we ought to unpin those nodes. See #3528.
bool have_pivot_bound;
DBT pivot_bound;
DBT pivot_bound;
} ft_search_t;
/* initialize the search compare object */
@ -60,13 +59,12 @@ static inline ft_search_t *ft_search_init(ft_search_t *so, ft_search_compare_fun
so->direction = direction;
so->k = k;
so->context = context;
so->have_pivot_bound = false;
toku_init_dbt(&so->pivot_bound);
return so;
}
static inline void ft_search_finish(ft_search_t *so) {
if (so->have_pivot_bound) toku_free(so->pivot_bound.data);
toku_destroy_dbt(&so->pivot_bound);
}
#endif

View file

@ -7,10 +7,11 @@
#include <toku_portability.h>
#include <toku_assert.h>
#include <toku_pthread.h>
#include <string.h>
#include <errno.h>
#include "memory.h"
#include <string.h>
#include "ftloader-internal.h"
#include "ybt.h"
static void error_callback_lock(ft_loader_error_callback loader_error) {
toku_mutex_lock(&loader_error->mutex);
@ -22,13 +23,15 @@ static void error_callback_unlock(ft_loader_error_callback loader_error) {
void ft_loader_init_error_callback(ft_loader_error_callback loader_error) {
memset(loader_error, 0, sizeof *loader_error);
toku_init_dbt(&loader_error->key);
toku_init_dbt(&loader_error->val);
toku_mutex_init(&loader_error->mutex, NULL);
}
void ft_loader_destroy_error_callback(ft_loader_error_callback loader_error) {
toku_mutex_destroy(&loader_error->mutex);
toku_free(loader_error->key.data);
toku_free(loader_error->val.data);
toku_destroy_dbt(&loader_error->key);
toku_destroy_dbt(&loader_error->val);
memset(loader_error, 0, sizeof *loader_error);
}
@ -44,14 +47,6 @@ void ft_loader_set_error_function(ft_loader_error_callback loader_error, ft_load
loader_error->extra = error_extra;
}
static void copy_dbt(DBT *dest, DBT *src) {
if (src) {
dest->data = toku_malloc(src->size);
memcpy(dest->data, src->data, src->size);
dest->size = src->size;
}
}
int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db, int which_db, DBT *key, DBT *val) {
int r;
error_callback_lock(loader_error);
@ -62,8 +57,12 @@ int ft_loader_set_error(ft_loader_error_callback loader_error, int error, DB *db
loader_error->error = error; // set the error
loader_error->db = db;
loader_error->which_db = which_db;
copy_dbt(&loader_error->key, key); // copy the data
copy_dbt(&loader_error->val, val);
if (key != nullptr) {
toku_clone_dbt(&loader_error->key, *key);
}
if (val != nullptr) {
toku_clone_dbt(&loader_error->val, *val);
}
}
error_callback_unlock(loader_error);
return r;

View file

@ -216,11 +216,6 @@ toku_free(void *p) {
}
}
void
toku_free_n(void* p, size_t size __attribute__((unused))) {
toku_free(p);
}
void *
toku_xmalloc(size_t size) {
void *p = t_xmalloc ? t_xmalloc(size) : os_malloc(size);

View file

@ -106,8 +106,8 @@ struct __toku_loader_internal {
static void free_loader_resources(DB_LOADER *loader)
{
if ( loader->i ) {
if (loader->i->err_key.data) toku_free(loader->i->err_key.data);
if (loader->i->err_val.data) toku_free(loader->i->err_val.data);
toku_destroy_dbt(&loader->i->err_key);
toku_destroy_dbt(&loader->i->err_val);
if (loader->i->inames_in_env) {
for (int i=0; i<loader->i->N; i++) {
@ -194,8 +194,8 @@ toku_loader_create_loader(DB_ENV *env,
goto create_exit;
}
memset(&loader->i->err_key, 0, sizeof(loader->i->err_key));
memset(&loader->i->err_val, 0, sizeof(loader->i->err_val));
toku_init_dbt(&loader->i->err_key);
toku_init_dbt(&loader->i->err_val);
loader->i->err_i = 0;
loader->i->err_errno = 0;
@ -335,13 +335,8 @@ int toku_loader_put(DB_LOADER *loader, DBT *key, DBT *val)
if ( r != 0 ) {
// spec says errors all happen on close
// - have to save key, val, errno (r) and i for duplicate callback
loader->i->err_key.size = key->size;
loader->i->err_key.data = toku_malloc(key->size);
memcpy(loader->i->err_key.data, key->data, key->size);
loader->i->err_val.size = val->size;
loader->i->err_val.data = toku_malloc(val->size);
memcpy(loader->i->err_val.data, val->data, val->size);
toku_clone_dbt(&loader->i->err_key, *key);
toku_clone_dbt(&loader->i->err_val, *val);
loader->i->err_i = i;
loader->i->err_errno = r;

View file

@ -90,7 +90,7 @@ static int hi_inserts(DB_TXN* UU(txn), ARG arg, void* UU(operation_extra), void
// by the table size manually. fill_key_buf_random will
// do this iff arg->bounded_element_range is true.
invariant(arg->bounded_element_range);
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
fill_val_buf_random(arg->random_data, valbuf, arg->cli);
r = env->put_multiple(
env,

View file

@ -640,13 +640,23 @@ static int generate_row_for_put(
return 0;
}
template <typename integer_t>
static integer_t breverse(integer_t v)
// How Keys Work:
//
// Keys are either
// - 4 byte little endian non-negative integers
// - 8 byte little endian non-negative integers
// - 8 byte little endian non-negative integers, padded with zeroes.
//
// The comparison function treats the key as a 4 byte
// int if the key size is exactly 4, and it treats
// the key as an 8 byte int if the key size is 8 or more.
static uint64_t breverse(uint64_t v)
// Effect: return the bits in i, reversed
// Notes: implementation taken from http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious
// Rationale: just a hack to spread out the keys during loading, doesn't need to be fast but does need to be correct.
{
integer_t r = v; // r will be reversed bits of v; first get LSB of v
uint64_t r = v; // r will be reversed bits of v; first get LSB of v
int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end
for (v >>= 1; v; v >>= 1) {
@ -658,52 +668,58 @@ static integer_t breverse(integer_t v)
return r;
}
template <typename integer_t>
static void
fill_key_buf(integer_t key, uint8_t *data, struct cli_args *args) {
// Effect: Fill data with a little-endian integer with the given integer_t type
// If the data buf is bigger than the integer's size, pad with zeroes.
// Requires: *data is at least sizeof(integer_t)
// Note: If you want to store 4 bytes, pass a 4 byte type. 8 bytes? 8 byte type.
// to store an 8-byte integer valued 5:
// int k = 5; fill_key_buf(k, ...) // WRONG
// int64_t k = 5; fill_key_buf(k, ...) // RIGHT
invariant(sizeof(integer_t) >= min_key_size);
invariant(sizeof(integer_t) <= args->key_size);
integer_t *k = reinterpret_cast<integer_t *>(data);
fill_key_buf(int64_t key, uint8_t *data, struct cli_args *args) {
// Effect: Fill data with a specific little-endian integer, 4 or 8 bytes long
// depending on args->key_size, possibly padded with zeroes.
// Requires: *data is at least sizeof(uint64_t)
invariant(key >= 0);
if (args->disperse_keys) {
*k = static_cast<integer_t>(breverse(key));
} else {
*k = key;
key = breverse(key);
}
if (args->key_size > sizeof(integer_t)) {
memset(data + sizeof(integer_t), 0, args->key_size - sizeof(integer_t));
if (args->key_size == sizeof(int)) {
const int key32 = key;
memcpy(data, &key32, sizeof(key32));
} else {
invariant(args->key_size >= sizeof(key));
memcpy(data, &key, sizeof(key));
memset(data + sizeof(key), 0, args->key_size - sizeof(key));
}
}
template <typename integer_t>
static void
fill_key_buf_random(struct random_data *random_data, uint8_t *data, ARG arg) {
// Effect: Fill data with a random little-endian integer with the given integer_t type,
// possibly bounded by the size of the table, possibly padded with zeroes.
// Effect: Fill data with a random, little-endian, 4 or 8 byte integer, possibly
// bounded by the size of the table, and padded with zeroes until key_size.
// Requires, Notes: see fill_key_buf()
invariant(sizeof(integer_t) <= arg->cli->key_size);
integer_t key = static_cast<integer_t>(myrandom_r(random_data));
int64_t key = myrandom_r(random_data);
if (arg->bounded_element_range && arg->cli->num_elements > 0) {
key = key % arg->cli->num_elements;
}
fill_key_buf(key, data, arg->cli);
}
template <typename integer_t>
// How Vals Work:
//
// Values are either
// - 4 byte little endian integers
// - 4 byte little endian integers, padded with zeroes
// - X bytes random values, Y bytes zeroes, where X and Y
// are derived from the desired compressibility;
//
// Correctness tests use integer values, perf tests use random bytes.
// Both support padding out values > 4 bytes with zeroes.
static void
fill_val_buf(integer_t val, uint8_t *data, uint32_t val_size) {
fill_val_buf(int64_t val, uint8_t *data, uint32_t val_size) {
// Effect, Requires, Notes: see fill_key_buf().
invariant(sizeof(integer_t) <= val_size);
integer_t *v = reinterpret_cast<integer_t *>(data);
*v = val;
if (val_size > sizeof(integer_t)) {
memset(data + sizeof(integer_t), 0, val_size - sizeof(integer_t));
if (val_size == sizeof(int)) {
const int val32 = val;
memcpy(data, &val32, sizeof(val32));
} else {
invariant(val_size >= sizeof(val));
memcpy(data, &val, sizeof(val));
memset(data + sizeof(val), 0, val_size - sizeof(val));
}
}
@ -748,7 +764,7 @@ static int random_put_in_db(DB *db, DB_TXN *txn, ARG arg, bool ignore_errors, vo
uint64_t puts_to_increment = 0;
for (uint32_t i = 0; i < arg->cli->txn_size; ++i) {
fill_key_buf_random<uint64_t>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
fill_val_buf_random(arg->random_data, valbuf, arg->cli);
r = db->put(db, txn, &key, &val, put_flags);
if (!ignore_errors && r != 0) {
@ -868,7 +884,7 @@ static int UU() keyrange_op(DB_TXN *txn, ARG arg, void* UU(operation_extra), voi
DBT key;
dbt_init(&key, keybuf, sizeof keybuf);
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
uint64_t less,equal,greater;
int is_exact;
@ -959,7 +975,7 @@ static int UU() ptquery_and_maybe_check_op(DB* db, DB_TXN *txn, ARG arg, bool ch
DBT key, val;
dbt_init(&key, keybuf, sizeof keybuf);
dbt_init(&val, nullptr, 0);
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
r = db->getf_set(
db,
@ -1100,7 +1116,7 @@ static int UU() update_op2(DB_TXN* txn, ARG arg, void* UU(operation_extra), void
dbt_init(&val, &extra, sizeof extra);
for (uint32_t i = 0; i < arg->cli->txn_size; i++) {
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
extra.u.d.diff = 1;
curr_val_sum += extra.u.d.diff;
r = db->update(
@ -1222,7 +1238,7 @@ UU() update_op_db(DB *db, DB_TXN *txn, ARG arg, void* operation_extra, void *UU(
fill_key_buf(update_key, keybuf, arg->cli);
} else {
// just do a usual, random point update without locking first
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
}
@ -1295,7 +1311,7 @@ static int UU() update_with_history_op(DB_TXN *txn, ARG arg, void* operation_ext
dbt_init(&val, &extra, sizeof extra);
for (uint32_t i = 0; i < arg->cli->txn_size; i++) {
fill_key_buf_random<int>(arg->random_data, keybuf, arg);
fill_key_buf_random(arg->random_data, keybuf, arg);
int *rkp = (int *) keybuf;
rand_key = *rkp;
invariant(rand_key < arg->cli->num_elements);
@ -1729,16 +1745,13 @@ static void fill_single_table(DB_ENV *env, DB *db, struct cli_args *args, bool f
}
for (int i = 0; i < args->num_elements; i++) {
fill_key_buf(i, keybuf, args);
// Correctness tests map every key to zeroes. Perf tests fill
// values with random bytes, based on compressibility.
if (fill_with_zeroes) {
// Legacy test, 4 byte signed keys and 4 byte zero values.
const int k = i;
const int zero = 0;
fill_key_buf(k, keybuf, args);
fill_val_buf(zero, valbuf, args->val_size);
fill_val_buf(0, valbuf, args->val_size);
} else {
// Modern test, >= 8 byte unsigned keys, >= 8 byte random values.
const uint64_t k = i;
fill_key_buf(k, keybuf, args);
fill_val_buf_random(&random_data, valbuf, args);
}
@ -2413,8 +2426,10 @@ static inline void parse_stress_test_args (int argc, char *const argv[], struct
static void
stress_table(DB_ENV *, DB **, struct cli_args *);
template<typename integer_t>
static int int_cmp(integer_t x, integer_t y) {
static int
stress_dbt_cmp_legacy(const DBT *a, const DBT *b) {
int x = *(int *) a->data;
int y = *(int *) b->data;
if (x < y) {
return -1;
} else if (x > y) {
@ -2424,13 +2439,6 @@ static int int_cmp(integer_t x, integer_t y) {
}
}
static int
stress_dbt_cmp_legacy(const DBT *a, const DBT *b) {
int x = *(int *) a->data;
int y = *(int *) b->data;
return int_cmp(x, y);
}
static int
stress_dbt_cmp(const DBT *a, const DBT *b) {
// Keys are only compared by their first 8 bytes,
@ -2438,7 +2446,13 @@ stress_dbt_cmp(const DBT *a, const DBT *b) {
// The rest of the key is just padding.
uint64_t x = *(uint64_t *) a->data;
uint64_t y = *(uint64_t *) b->data;
return int_cmp(x, y);
if (x < y) {
return -1;
} else if (x > y) {
return +1;
} else {
return 0;
}
}
static int
@ -2510,6 +2524,12 @@ UU() stress_recover(struct cli_args *args) {
static void
test_main(struct cli_args *args, bool fill_with_zeroes)
{
if ((args->key_size < 8 && args->key_size != 4) ||
(args->val_size < 8 && args->val_size != 4)) {
fprintf(stderr, "The only valid key/val sizes are 4, 8, and > 8.\n");
return;
}
{ char *loc = setlocale(LC_NUMERIC, "en_US.UTF-8"); assert(loc); }
DB_ENV* env = nullptr;
DB* dbs[args->num_DBs];

View file

@ -184,13 +184,6 @@ single_process_unlock(int *lockfd) {
return 0;
}
static inline DBT*
init_dbt_realloc(DBT *dbt) {
memset(dbt, 0, sizeof(*dbt));
dbt->flags = DB_DBT_REALLOC;
return dbt;
}
int
toku_ydb_init(void) {
int r = 0;
@ -2337,7 +2330,7 @@ env_dbremove(DB_ENV * env, DB_TXN *txn, const char *fname, const char *dbname, u
DBT dname_dbt;
DBT iname_dbt;
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
init_dbt_realloc(&iname_dbt); // sets iname_dbt.data = NULL
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
// get iname
r = toku_db_get(env->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
@ -2448,7 +2441,7 @@ env_dbrename(DB_ENV *env, DB_TXN *txn, const char *fname, const char *dbname, co
DBT iname_dbt;
toku_fill_dbt(&old_dname_dbt, dname, strlen(dname)+1);
toku_fill_dbt(&new_dname_dbt, newname, strlen(newname)+1);
init_dbt_realloc(&iname_dbt); // sets iname_dbt.data = NULL
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
// get iname
r = toku_db_get(env->i->directory, txn, &old_dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
@ -2594,7 +2587,7 @@ toku_test_db_redirect_dictionary(DB * db, const char * dname_of_new_file, DB_TXN
TOKUTXN tokutxn = db_txn_struct_i(dbtxn)->tokutxn;
toku_fill_dbt(&dname_dbt, dname_of_new_file, strlen(dname_of_new_file)+1);
init_dbt_realloc(&iname_dbt); // sets iname_dbt.data = NULL
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
r = toku_db_get(db->dbenv->i->directory, dbtxn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
assert_zero(r);
new_iname_in_env = (char *) iname_dbt.data;

View file

@ -54,13 +54,6 @@ ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp) {
*statp = ydb_db_layer_status;
}
static inline DBT*
init_dbt_realloc(DBT *dbt) {
memset(dbt, 0, sizeof(*dbt));
dbt->flags = DB_DBT_REALLOC;
return dbt;
}
static void
create_iname_hint(const char *dname, char *hint) {
//Requires: size of hint array must be > strlen(dname)
@ -260,7 +253,7 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
DBT dname_dbt; // holds dname
DBT iname_dbt; // holds iname_in_env
toku_fill_dbt(&dname_dbt, dname, strlen(dname)+1);
init_dbt_realloc(&iname_dbt); // sets iname_dbt.data = NULL
toku_init_dbt_flags(&iname_dbt, DB_DBT_REALLOC);
r = toku_db_get(db->dbenv->i->directory, txn, &dname_dbt, &iname_dbt, DB_SERIALIZABLE); // allocates memory for iname
char *iname = (char *) iname_dbt.data;
if (r == DB_NOTFOUND && !is_db_create) {

View file

@ -28,8 +28,6 @@ void *toku_xmalloc(size_t size);
void *toku_xrealloc(void*, size_t size) __attribute__((__visibility__("default")));
void toku_free(void*) __attribute__((__visibility__("default")));
/* toku_free_n() should be used if the caller knows the size of the malloc'd object. */
void toku_free_n(void*, size_t size);
void *toku_realloc(void *, size_t size) __attribute__((__visibility__("default")));
size_t toku_malloc_usable_size(void *p) __attribute__((__visibility__("default")));