mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 06:44:16 +01:00
[t:4875], [t:4887], merge from tokudb.4875 to main
git-svn-id: file:///svn/toku/tokudb@43896 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
parent
939721e749
commit
f2c4fe13e8
26 changed files with 932 additions and 676 deletions
|
@ -84,15 +84,15 @@ static inline void unlock_for_blocktable (BLOCK_TABLE bt);
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ft_set_dirty(FT h, BOOL for_checkpoint){
|
ft_set_dirty(FT ft, BOOL for_checkpoint){
|
||||||
assert(toku_mutex_is_locked(&h->blocktable->mutex));
|
assert(toku_mutex_is_locked(&ft->blocktable->mutex));
|
||||||
assert(h->type == FT_CURRENT);
|
assert(ft->h->type == FT_CURRENT);
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
assert(h->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
|
assert(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
|
||||||
h->checkpoint_header->dirty = 1;
|
ft->checkpoint_header->dirty = 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
h->dirty = 1;
|
ft->h->dirty = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -449,9 +449,9 @@ PRNTF("blokAllokator", 1L, size, offset, bt);
|
||||||
//Fills wbuf with bt
|
//Fills wbuf with bt
|
||||||
//A clean shutdown runs checkpoint start so that current and inprogress are copies.
|
//A clean shutdown runs checkpoint start so that current and inprogress are copies.
|
||||||
void
|
void
|
||||||
toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w,
|
toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, struct wbuf *w,
|
||||||
int64_t *address, int64_t *size) {
|
int64_t *address, int64_t *size) {
|
||||||
assert(toku_mutex_is_locked(&bt->mutex));
|
lock_for_blocktable(bt);
|
||||||
struct translation *t = &bt->inprogress;
|
struct translation *t = &bt->inprogress;
|
||||||
|
|
||||||
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
||||||
|
@ -478,6 +478,7 @@ toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w,
|
||||||
wbuf_int(w, checksum);
|
wbuf_int(w, checksum);
|
||||||
*address = t->block_translation[b.b].u.diskoff;
|
*address = t->block_translation[b.b].u.diskoff;
|
||||||
*size = t->block_translation[b.b].size;
|
*size = t->block_translation[b.b].size;
|
||||||
|
unlock_for_blocktable(bt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ void toku_blocknum_realloc_on_disk(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF size, DIS
|
||||||
void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
void toku_translate_blocknum_to_offset_size(BLOCK_TABLE bt, BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
|
||||||
|
|
||||||
//Serialization
|
//Serialization
|
||||||
void toku_serialize_translation_to_wbuf_unlocked(BLOCK_TABLE bt, struct wbuf *w, int64_t *address, int64_t *size);
|
void toku_serialize_translation_to_wbuf(BLOCK_TABLE bt, struct wbuf *w, int64_t *address, int64_t *size);
|
||||||
|
|
||||||
void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt);
|
void toku_block_table_swap_for_redirect(BLOCK_TABLE old_bt, BLOCK_TABLE new_bt);
|
||||||
|
|
||||||
|
|
|
@ -67,9 +67,9 @@
|
||||||
static CHECKPOINT_STATUS_S cp_status;
|
static CHECKPOINT_STATUS_S cp_status;
|
||||||
|
|
||||||
#define STATUS_INIT(k,t,l) { \
|
#define STATUS_INIT(k,t,l) { \
|
||||||
cp_status.status[k].keyname = #k; \
|
cp_status.status[k].keyname = #k; \
|
||||||
cp_status.status[k].type = t; \
|
cp_status.status[k].type = t; \
|
||||||
cp_status.status[k].legend = "checkpoint: " l; \
|
cp_status.status[k].legend = "checkpoint: " l; \
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -106,7 +106,7 @@ status_init(void) {
|
||||||
void
|
void
|
||||||
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
|
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
|
||||||
if (!cp_status.initialized)
|
if (!cp_status.initialized)
|
||||||
status_init();
|
status_init();
|
||||||
STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
|
STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
|
||||||
*statp = cp_status;
|
*statp = cp_status;
|
||||||
}
|
}
|
||||||
|
@ -193,7 +193,7 @@ checkpoint_safe_checkpoint_unlock(void) {
|
||||||
void
|
void
|
||||||
toku_multi_operation_client_lock(void) {
|
toku_multi_operation_client_lock(void) {
|
||||||
if (locked_mo)
|
if (locked_mo)
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1);
|
||||||
toku_pthread_rwlock_rdlock(&multi_operation_lock);
|
toku_pthread_rwlock_rdlock(&multi_operation_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -205,7 +205,7 @@ toku_multi_operation_client_unlock(void) {
|
||||||
void
|
void
|
||||||
toku_checkpoint_safe_client_lock(void) {
|
toku_checkpoint_safe_client_lock(void) {
|
||||||
if (locked_cs)
|
if (locked_cs)
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
|
||||||
toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
|
toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
|
||||||
toku_multi_operation_client_lock();
|
toku_multi_operation_client_lock();
|
||||||
}
|
}
|
||||||
|
@ -241,23 +241,23 @@ toku_checkpoint_destroy(void) {
|
||||||
// Take a checkpoint of all currently open dictionaries
|
// Take a checkpoint of all currently open dictionaries
|
||||||
int
|
int
|
||||||
toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
void (*callback_f)(void*), void * extra,
|
void (*callback_f)(void*), void * extra,
|
||||||
void (*callback2_f)(void*), void * extra2,
|
void (*callback2_f)(void*), void * extra2,
|
||||||
checkpoint_caller_t caller_id) {
|
checkpoint_caller_t caller_id) {
|
||||||
int r;
|
int r;
|
||||||
int footprint_offset = (int) caller_id * 1000;
|
int footprint_offset = (int) caller_id * 1000;
|
||||||
|
|
||||||
assert(initialized);
|
assert(initialized);
|
||||||
|
|
||||||
if (locked_cs) {
|
if (locked_cs) {
|
||||||
if (caller_id == SCHEDULED_CHECKPOINT)
|
if (caller_id == SCHEDULED_CHECKPOINT)
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_SCHED_CS), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_SCHED_CS), 1);
|
||||||
else if (caller_id == CLIENT_CHECKPOINT)
|
else if (caller_id == CLIENT_CHECKPOINT)
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_CLIENT_CS), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_CLIENT_CS), 1);
|
||||||
else if (caller_id == TXN_COMMIT_CHECKPOINT)
|
else if (caller_id == TXN_COMMIT_CHECKPOINT)
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_TXN_CS), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_TXN_CS), 1);
|
||||||
else
|
else
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_OTHER_CS), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_OTHER_CS), 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1);
|
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1);
|
||||||
|
@ -265,27 +265,29 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
(void) __sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1);
|
(void) __sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1);
|
||||||
|
|
||||||
if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX))
|
if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX))
|
||||||
STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
|
STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
|
||||||
|
|
||||||
SET_CHECKPOINT_FOOTPRINT(10);
|
SET_CHECKPOINT_FOOTPRINT(10);
|
||||||
if (locked_mo) {
|
if (locked_mo) {
|
||||||
if (caller_id == SCHEDULED_CHECKPOINT)
|
if (caller_id == SCHEDULED_CHECKPOINT)
|
||||||
STATUS_VALUE(CP_WAIT_SCHED_MO)++; // threadsafe, within checkpoint_safe lock
|
STATUS_VALUE(CP_WAIT_SCHED_MO)++; // threadsafe, within checkpoint_safe lock
|
||||||
else if (caller_id == CLIENT_CHECKPOINT)
|
else if (caller_id == CLIENT_CHECKPOINT)
|
||||||
STATUS_VALUE(CP_WAIT_CLIENT_MO)++;
|
STATUS_VALUE(CP_WAIT_CLIENT_MO)++;
|
||||||
else if (caller_id == TXN_COMMIT_CHECKPOINT)
|
else if (caller_id == TXN_COMMIT_CHECKPOINT)
|
||||||
STATUS_VALUE(CP_WAIT_TXN_MO)++;
|
STATUS_VALUE(CP_WAIT_TXN_MO)++;
|
||||||
else
|
else
|
||||||
STATUS_VALUE(CP_WAIT_OTHER_MO)++;
|
STATUS_VALUE(CP_WAIT_OTHER_MO)++;
|
||||||
}
|
}
|
||||||
multi_operation_checkpoint_lock();
|
multi_operation_checkpoint_lock();
|
||||||
SET_CHECKPOINT_FOOTPRINT(20);
|
SET_CHECKPOINT_FOOTPRINT(20);
|
||||||
ydb_lock();
|
ydb_lock();
|
||||||
|
toku_ft_open_close_lock();
|
||||||
|
|
||||||
SET_CHECKPOINT_FOOTPRINT(30);
|
SET_CHECKPOINT_FOOTPRINT(30);
|
||||||
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
|
||||||
r = toku_cachetable_begin_checkpoint(ct, logger);
|
r = toku_cachetable_begin_checkpoint(ct, logger);
|
||||||
|
|
||||||
|
toku_ft_open_close_unlock();
|
||||||
multi_operation_checkpoint_unlock();
|
multi_operation_checkpoint_unlock();
|
||||||
ydb_unlock();
|
ydb_unlock();
|
||||||
|
|
||||||
|
@ -299,7 +301,7 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
if (r==0 && logger) {
|
if (r==0 && logger) {
|
||||||
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
|
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
|
||||||
r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
|
r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
|
||||||
STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
|
STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
SET_CHECKPOINT_FOOTPRINT(60);
|
SET_CHECKPOINT_FOOTPRINT(60);
|
||||||
|
@ -307,9 +309,9 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
|
||||||
|
|
||||||
if (r == 0)
|
if (r == 0)
|
||||||
STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
|
STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
|
||||||
else
|
else
|
||||||
STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++;
|
STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++;
|
||||||
|
|
||||||
STATUS_VALUE(CP_FOOTPRINT) = 0;
|
STATUS_VALUE(CP_FOOTPRINT) = 0;
|
||||||
checkpoint_safe_checkpoint_unlock();
|
checkpoint_safe_checkpoint_unlock();
|
||||||
|
|
|
@ -65,7 +65,7 @@ cachetable_put_empty_node_with_dep_nodes(
|
||||||
|
|
||||||
void
|
void
|
||||||
create_new_ftnode_with_dep_nodes(
|
create_new_ftnode_with_dep_nodes(
|
||||||
FT h,
|
FT ft,
|
||||||
FTNODE *result,
|
FTNODE *result,
|
||||||
int height,
|
int height,
|
||||||
int n_children,
|
int n_children,
|
||||||
|
@ -76,15 +76,15 @@ create_new_ftnode_with_dep_nodes(
|
||||||
BLOCKNUM name;
|
BLOCKNUM name;
|
||||||
|
|
||||||
cachetable_put_empty_node_with_dep_nodes(
|
cachetable_put_empty_node_with_dep_nodes(
|
||||||
h,
|
ft,
|
||||||
num_dependent_nodes,
|
num_dependent_nodes,
|
||||||
dependent_nodes,
|
dependent_nodes,
|
||||||
&name,
|
&name,
|
||||||
&fullhash,
|
&fullhash,
|
||||||
result);
|
result);
|
||||||
|
|
||||||
assert(h->nodesize > 0);
|
assert(ft->h->nodesize > 0);
|
||||||
assert(h->basementnodesize > 0);
|
assert(ft->h->basementnodesize > 0);
|
||||||
if (height == 0) {
|
if (height == 0) {
|
||||||
assert(n_children > 0);
|
assert(n_children > 0);
|
||||||
}
|
}
|
||||||
|
@ -94,9 +94,9 @@ create_new_ftnode_with_dep_nodes(
|
||||||
name,
|
name,
|
||||||
height,
|
height,
|
||||||
n_children,
|
n_children,
|
||||||
h->layout_version,
|
ft->h->layout_version,
|
||||||
h->nodesize,
|
ft->h->nodesize,
|
||||||
h->flags);
|
ft->h->flags);
|
||||||
|
|
||||||
assert((*result)->nodesize > 0);
|
assert((*result)->nodesize > 0);
|
||||||
(*result)->fullhash = fullhash;
|
(*result)->fullhash = fullhash;
|
||||||
|
@ -208,10 +208,10 @@ toku_pin_ftnode_off_client_thread(
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_unpin_ftnode_off_client_thread(FT h, FTNODE node)
|
toku_unpin_ftnode_off_client_thread(FT ft, FTNODE node)
|
||||||
{
|
{
|
||||||
int r = toku_cachetable_unpin(
|
int r = toku_cachetable_unpin(
|
||||||
h->cf,
|
ft->cf,
|
||||||
node->thisnodename,
|
node->thisnodename,
|
||||||
node->fullhash,
|
node->fullhash,
|
||||||
(enum cachetable_dirty) node->dirty,
|
(enum cachetable_dirty) node->dirty,
|
||||||
|
@ -221,11 +221,11 @@ toku_unpin_ftnode_off_client_thread(FT h, FTNODE node)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_unpin_ftnode(FT h, FTNODE node)
|
toku_unpin_ftnode(FT ft, FTNODE node)
|
||||||
{
|
{
|
||||||
// printf("%*sUnpin %ld\n", 8-node->height, "", node->thisnodename.b);
|
// printf("%*sUnpin %ld\n", 8-node->height, "", node->thisnodename.b);
|
||||||
//VERIFY_NODE(brt,node);
|
//VERIFY_NODE(brt,node);
|
||||||
toku_unpin_ftnode_off_client_thread(h, node);
|
toku_unpin_ftnode_off_client_thread(ft, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -718,15 +718,15 @@ ftleaf_split(
|
||||||
invariant(node->height == 0);
|
invariant(node->height == 0);
|
||||||
STATUS_VALUE(FT_FLUSHER_SPLIT_LEAF)++;
|
STATUS_VALUE(FT_FLUSHER_SPLIT_LEAF)++;
|
||||||
if (node->n_children) {
|
if (node->n_children) {
|
||||||
// First move all the accumulated stat64info deltas into the first basement.
|
// First move all the accumulated stat64info deltas into the first basement.
|
||||||
// After the split, either both nodes or neither node will be included in the next checkpoint.
|
// After the split, either both nodes or neither node will be included in the next checkpoint.
|
||||||
// The accumulated stats in the dictionary will be correct in either case.
|
// The accumulated stats in the dictionary will be correct in either case.
|
||||||
// By moving all the deltas into one (arbitrary) basement, we avoid the need to maintain
|
// By moving all the deltas into one (arbitrary) basement, we avoid the need to maintain
|
||||||
// correct information for a basement that is divided between two leafnodes (i.e. when split is
|
// correct information for a basement that is divided between two leafnodes (i.e. when split is
|
||||||
// not on a basement boundary).
|
// not on a basement boundary).
|
||||||
STAT64INFO_S delta_for_leafnode = toku_get_and_clear_basement_stats(node);
|
STAT64INFO_S delta_for_leafnode = toku_get_and_clear_basement_stats(node);
|
||||||
BASEMENTNODE bn = BLB(node,0);
|
BASEMENTNODE bn = BLB(node,0);
|
||||||
bn->stat64_delta = delta_for_leafnode;
|
bn->stat64_delta = delta_for_leafnode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -807,9 +807,9 @@ ftleaf_split(
|
||||||
name,
|
name,
|
||||||
0,
|
0,
|
||||||
num_children_in_b,
|
num_children_in_b,
|
||||||
h->layout_version,
|
h->h->layout_version,
|
||||||
h->nodesize,
|
h->h->nodesize,
|
||||||
h->flags);
|
h->h->flags);
|
||||||
assert(B->nodesize > 0);
|
assert(B->nodesize > 0);
|
||||||
B->fullhash = fullhash;
|
B->fullhash = fullhash;
|
||||||
}
|
}
|
||||||
|
@ -1002,7 +1002,7 @@ ft_split_child(
|
||||||
FTNODE nodea, nodeb;
|
FTNODE nodea, nodeb;
|
||||||
DBT splitk;
|
DBT splitk;
|
||||||
// printf("%s:%d node %" PRIu64 "->u.n.n_children=%d height=%d\n", __FILE__, __LINE__, node->thisnodename.b, node->u.n.n_children, node->height);
|
// printf("%s:%d node %" PRIu64 "->u.n.n_children=%d height=%d\n", __FILE__, __LINE__, node->thisnodename.b, node->u.n.n_children, node->height);
|
||||||
assert(h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
|
assert(h->h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
|
||||||
|
|
||||||
// for test
|
// for test
|
||||||
call_flusher_thread_callback(flt_flush_before_split);
|
call_flusher_thread_callback(flt_flush_before_split);
|
||||||
|
|
170
ft/ft-internal.h
170
ft/ft-internal.h
|
@ -335,67 +335,129 @@ u_int32_t compute_child_fullhash (CACHEFILE cf, FTNODE node, int childnum);
|
||||||
|
|
||||||
enum ft_type {FT_CURRENT=1, FT_CHECKPOINT_INPROGRESS};
|
enum ft_type {FT_CURRENT=1, FT_CHECKPOINT_INPROGRESS};
|
||||||
|
|
||||||
|
struct ft_header {
|
||||||
|
enum ft_type type;
|
||||||
|
|
||||||
|
int dirty;
|
||||||
|
|
||||||
|
// Free-running counter incremented once per checkpoint (toggling LSB).
|
||||||
|
// LSB indicates which header location is used on disk so this
|
||||||
|
// counter is effectively a boolean which alternates with each checkpoint.
|
||||||
|
uint64_t checkpoint_count;
|
||||||
|
// LSN of creation of "checkpoint-begin" record in log.
|
||||||
|
LSN checkpoint_lsn;
|
||||||
|
|
||||||
|
// see brt_layout_version.h. maybe don't need this if we assume
|
||||||
|
// it's always the current version after deserializing
|
||||||
|
const int layout_version;
|
||||||
|
// different (<) from layout_version if upgraded from a previous
|
||||||
|
// version (useful for debugging)
|
||||||
|
const int layout_version_original;
|
||||||
|
// build_id (svn rev number) of software that wrote this node to
|
||||||
|
// disk. (read from disk, overwritten when written to disk, I
|
||||||
|
// think).
|
||||||
|
const uint32_t build_id;
|
||||||
|
// build_id of software that created this tree
|
||||||
|
const uint32_t build_id_original;
|
||||||
|
|
||||||
|
// time this tree was created
|
||||||
|
const uint64_t time_of_creation;
|
||||||
|
// and the root transaction id that created it
|
||||||
|
TXNID root_xid_that_created;
|
||||||
|
// last time this header was serialized to disk (read from disk,
|
||||||
|
// overwritten when written to disk)
|
||||||
|
uint64_t time_of_last_modification;
|
||||||
|
// last time that this tree was verified
|
||||||
|
uint64_t time_of_last_verification;
|
||||||
|
|
||||||
|
// this field is protected by tree_lock, see comment for tree_lock
|
||||||
|
BLOCKNUM root_blocknum;
|
||||||
|
|
||||||
|
const unsigned int flags;
|
||||||
|
const unsigned int nodesize;
|
||||||
|
const unsigned int basementnodesize;
|
||||||
|
const enum toku_compression_method compression_method;
|
||||||
|
|
||||||
|
// Current Minimum MSN to be used when upgrading pre-MSN BRT's.
|
||||||
|
// This is decremented from our currnt MIN_MSN so as not to clash
|
||||||
|
// with any existing 'normal' MSN's.
|
||||||
|
MSN highest_unused_msn_for_upgrade;
|
||||||
|
|
||||||
|
// last time that a hot optimize operation was begun
|
||||||
|
uint64_t time_of_last_optimize_begin;
|
||||||
|
// last time that a hot optimize operation was successfully completed
|
||||||
|
uint64_t time_of_last_optimize_end;
|
||||||
|
// the number of hot optimize operations currently in progress on this tree
|
||||||
|
uint32_t count_of_optimize_in_progress;
|
||||||
|
// the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
|
||||||
|
uint32_t count_of_optimize_in_progress_read_from_disk;
|
||||||
|
// all messages before this msn have been applied to leaf nodes
|
||||||
|
MSN msn_at_start_of_last_completed_optimize;
|
||||||
|
|
||||||
|
STAT64INFO_S on_disk_stats;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
// brt_header is always the current version.
|
// brt_header is always the current version.
|
||||||
struct ft {
|
struct ft {
|
||||||
enum ft_type type;
|
FT_HEADER h;
|
||||||
FT checkpoint_header;
|
FT_HEADER checkpoint_header;
|
||||||
|
|
||||||
|
// These are (mostly) read-only.
|
||||||
|
|
||||||
CACHEFILE cf;
|
CACHEFILE cf;
|
||||||
|
// unique id for dictionary
|
||||||
|
DICTIONARY_ID dict_id;
|
||||||
|
ft_compare_func compare_fun;
|
||||||
|
ft_update_func update_fun;
|
||||||
|
|
||||||
|
// protected by locktree
|
||||||
|
DESCRIPTOR_S descriptor;
|
||||||
|
// protected by locktree and user. User
|
||||||
|
// makes sure this is only changed
|
||||||
|
// when no activity on tree
|
||||||
|
DESCRIPTOR_S cmp_descriptor;
|
||||||
|
|
||||||
|
// These are not read-only:
|
||||||
|
|
||||||
// lock used by a thread to pin the root node to start a descent into
|
// lock used by a thread to pin the root node to start a descent into
|
||||||
// the tree. This lock protects the blocknum of the root node (root_blocknum). Any
|
// the tree. This lock protects the blocknum of the root node (root_blocknum). Any
|
||||||
// thread that wants to descend down the tree starting at the root
|
// thread that wants to descend down the tree starting at the root
|
||||||
// must grab this lock before pinning the root.
|
// must grab this lock before pinning the root.
|
||||||
toku_mutex_t tree_lock;
|
toku_mutex_t tree_lock;
|
||||||
u_int64_t checkpoint_count; // Free-running counter incremented once per checkpoint (toggling LSB).
|
|
||||||
// LSB indicates which header location is used on disk so this
|
|
||||||
// counter is effectively a boolean which alternates with each checkpoint.
|
|
||||||
LSN checkpoint_lsn; // LSN of creation of "checkpoint-begin" record in log.
|
|
||||||
int dirty;
|
|
||||||
DICTIONARY_ID dict_id; // unique id for dictionary
|
|
||||||
int panic; // If nonzero there was a write error. Don't write any more, because it probably only gets worse. This is the error code.
|
|
||||||
char *panic_string; // A malloced string that can indicate what went wrong.
|
|
||||||
int layout_version;
|
|
||||||
int layout_version_original; // different (<) from layout_version if upgraded from a previous version (useful for debugging)
|
|
||||||
int layout_version_read_from_disk; // transient, not serialized to disk
|
|
||||||
uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk
|
|
||||||
uint32_t build_id_original; // build_id of software that created this tree (read from disk, overwritten when written to disk)
|
|
||||||
uint64_t time_of_creation; // time this tree was created
|
|
||||||
uint64_t time_of_last_modification; // last time this header was serialized to disk (read from disk, overwritten when written to disk)
|
|
||||||
uint64_t time_of_last_verification; // last time that this tree was verified
|
|
||||||
unsigned int nodesize;
|
|
||||||
unsigned int basementnodesize;
|
|
||||||
// this field is protected by tree_lock, see comment for tree_lock
|
|
||||||
BLOCKNUM root_blocknum; // roots of the dictionary
|
|
||||||
unsigned int flags;
|
|
||||||
DESCRIPTOR_S descriptor;
|
|
||||||
DESCRIPTOR_S cmp_descriptor;
|
|
||||||
|
|
||||||
|
// protected by blocktable lock
|
||||||
BLOCK_TABLE blocktable;
|
BLOCK_TABLE blocktable;
|
||||||
|
|
||||||
|
// protected by atomic builtins
|
||||||
|
STAT64INFO_S in_memory_stats;
|
||||||
|
|
||||||
|
// transient, not serialized to disk. updated when we do write to
|
||||||
|
// disk. tells us whether we can do partial eviction (we can't if
|
||||||
|
// the on-disk layout version is from before basement nodes)
|
||||||
|
int layout_version_read_from_disk;
|
||||||
|
|
||||||
// If a transaction created this BRT, which one?
|
// If a transaction created this BRT, which one?
|
||||||
// If a transaction locked the BRT when it was empty, which transaction? (Only the latest one matters)
|
// If a transaction locked the BRT when it was empty, which transaction? (Only the latest one matters)
|
||||||
// 0 if no such transaction
|
// 0 if no such transaction
|
||||||
|
// only one thread can write to these at once, this is enforced by
|
||||||
|
// the lock tree
|
||||||
TXNID txnid_that_created_or_locked_when_empty;
|
TXNID txnid_that_created_or_locked_when_empty;
|
||||||
TXNID root_that_created_or_locked_when_empty;
|
|
||||||
TXNID txnid_that_suppressed_recovery_logs;
|
TXNID txnid_that_suppressed_recovery_logs;
|
||||||
TXNID root_xid_that_created;
|
|
||||||
struct toku_list live_ft_handles;
|
|
||||||
OMT txns; // transactions that are using this header
|
|
||||||
bool pinned_by_checkpoint; //Keep this header around for checkpoint, like a transaction
|
|
||||||
|
|
||||||
ft_compare_func compare_fun;
|
// protects modifying live_ft_handles, txns, and pinned_by_checkpoint
|
||||||
ft_update_func update_fun;
|
toku_mutex_t ft_ref_lock;
|
||||||
STAT64INFO_S in_memory_stats;
|
struct toku_list live_ft_handles;
|
||||||
STAT64INFO_S on_disk_stats;
|
// transactions that are using this header. you should only be able
|
||||||
STAT64INFO_S checkpoint_staging_stats;
|
// to modify this if you have a valid handle in the list of live brts
|
||||||
uint64_t time_of_last_optimize_begin; // last time that a hot optimize operation was begun
|
OMT txns;
|
||||||
uint64_t time_of_last_optimize_end; // last time that a hot optimize operation was successfully completed
|
// Keep this header around for checkpoint, like a transaction
|
||||||
uint32_t count_of_optimize_in_progress; // the number of hot optimize operations currently in progress on this tree
|
bool pinned_by_checkpoint;
|
||||||
uint32_t count_of_optimize_in_progress_read_from_disk; // the number of hot optimize operations in progress on this tree at the time of the last crash (this field is in-memory only)
|
|
||||||
MSN msn_at_start_of_last_completed_optimize; // all messages before this msn have been applied to leaf nodes
|
// If nonzero there was a write error. Don't write any more, because it probably only gets worse. This is the error code.
|
||||||
enum toku_compression_method compression_method;
|
int panic;
|
||||||
// Current Minimum MSN to be used when upgrading pre-MSN BRT's.
|
// A malloced string that can indicate what went wrong.
|
||||||
// This is decremented from our currnt MIN_MSN so as not to clash
|
char *panic_string;
|
||||||
// with any existing 'normal' MSN's.
|
|
||||||
MSN highest_unused_msn_for_upgrade;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Copy the descriptor into a temporary variable, and tell DRD that subsequent code happens after reading that pointer.
|
// Copy the descriptor into a temporary variable, and tell DRD that subsequent code happens after reading that pointer.
|
||||||
|
@ -464,9 +526,14 @@ int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2le
|
||||||
|
|
||||||
void toku_verify_or_set_counts(FTNODE);
|
void toku_verify_or_set_counts(FTNODE);
|
||||||
|
|
||||||
int toku_serialize_ft_size (FT h);
|
int toku_serialize_ft_size (FT_HEADER h);
|
||||||
int toku_serialize_ft_to (int fd, FT h);
|
int toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf);
|
||||||
int toku_serialize_ft_to_wbuf (struct wbuf *, FT h, int64_t address_translation, int64_t size_translation);
|
int toku_serialize_ft_to_wbuf (
|
||||||
|
struct wbuf *wbuf,
|
||||||
|
FT_HEADER h,
|
||||||
|
DISKOFF translation_location_on_disk,
|
||||||
|
DISKOFF translation_size_on_disk
|
||||||
|
);
|
||||||
enum deserialize_error_code toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft);
|
enum deserialize_error_code toku_deserialize_ft_from (int fd, LSN max_acceptable_lsn, FT *ft);
|
||||||
int toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset);
|
int toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISKOFF offset);
|
||||||
void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc);
|
void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc);
|
||||||
|
@ -579,7 +646,6 @@ struct ft_cursor {
|
||||||
// is required, such as for flushes.
|
// is required, such as for flushes.
|
||||||
//
|
//
|
||||||
static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
|
static inline void fill_bfe_for_full_read(struct ftnode_fetch_extra *bfe, FT h) {
|
||||||
invariant(h->type == FT_CURRENT);
|
|
||||||
bfe->type = ftnode_fetch_all;
|
bfe->type = ftnode_fetch_all;
|
||||||
bfe->h = h;
|
bfe->h = h;
|
||||||
bfe->search = NULL;
|
bfe->search = NULL;
|
||||||
|
@ -608,7 +674,7 @@ static inline void fill_bfe_for_subset_read(
|
||||||
BOOL disable_prefetching
|
BOOL disable_prefetching
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
invariant(h->type == FT_CURRENT);
|
invariant(h->h->type == FT_CURRENT);
|
||||||
bfe->type = ftnode_fetch_subset;
|
bfe->type = ftnode_fetch_subset;
|
||||||
bfe->h = h;
|
bfe->h = h;
|
||||||
bfe->search = search;
|
bfe->search = search;
|
||||||
|
@ -627,7 +693,7 @@ static inline void fill_bfe_for_subset_read(
|
||||||
// Currently used for stat64.
|
// Currently used for stat64.
|
||||||
//
|
//
|
||||||
static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
|
static inline void fill_bfe_for_min_read(struct ftnode_fetch_extra *bfe, FT h) {
|
||||||
invariant(h->type == FT_CURRENT);
|
invariant(h->h->type == FT_CURRENT);
|
||||||
bfe->type = ftnode_fetch_none;
|
bfe->type = ftnode_fetch_none;
|
||||||
bfe->h = h;
|
bfe->h = h;
|
||||||
bfe->search = NULL;
|
bfe->search = NULL;
|
||||||
|
@ -659,7 +725,7 @@ static inline void destroy_bfe_for_prefetch(struct ftnode_fetch_extra *bfe) {
|
||||||
static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
|
static inline void fill_bfe_for_prefetch(struct ftnode_fetch_extra *bfe,
|
||||||
FT h,
|
FT h,
|
||||||
FT_CURSOR c) {
|
FT_CURSOR c) {
|
||||||
invariant(h->type == FT_CURRENT);
|
invariant(h->h->type == FT_CURRENT);
|
||||||
bfe->type = ftnode_fetch_prefetch;
|
bfe->type = ftnode_fetch_prefetch;
|
||||||
bfe->h = h;
|
bfe->h = h;
|
||||||
bfe->search = NULL;
|
bfe->search = NULL;
|
||||||
|
|
146
ft/ft-ops.c
146
ft/ft-ops.c
|
@ -150,6 +150,8 @@ static volatile FT_STATUS_S ft_status;
|
||||||
ft_status.status[k].legend = "brt: " l; \
|
ft_status.status[k].legend = "brt: " l; \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static toku_mutex_t ft_open_close_lock;
|
||||||
|
|
||||||
static void
|
static void
|
||||||
status_init(void)
|
status_init(void)
|
||||||
{
|
{
|
||||||
|
@ -307,8 +309,8 @@ toku_ft_nonleaf_is_gorged (FTNODE node) {
|
||||||
(!buffers_are_empty));
|
(!buffers_are_empty));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ft_verify_flags(FT h, FTNODE node) {
|
static void ft_verify_flags(FT ft, FTNODE node) {
|
||||||
assert(h->flags == node->flags);
|
assert(ft->h->flags == node->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_ft_debug_mode = 0;
|
int toku_ft_debug_mode = 0;
|
||||||
|
@ -599,16 +601,25 @@ static void ft_status_update_flush_reason(FTNODE node, BOOL for_checkpoint) {
|
||||||
|
|
||||||
static void ftnode_update_disk_stats(
|
static void ftnode_update_disk_stats(
|
||||||
FTNODE ftnode,
|
FTNODE ftnode,
|
||||||
FT h,
|
FT ft,
|
||||||
BOOL for_checkpoint
|
BOOL for_checkpoint
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
STAT64INFO_S deltas = ZEROSTATS;
|
STAT64INFO_S deltas = ZEROSTATS;
|
||||||
// capture deltas before rebalancing basements for serialization
|
// capture deltas before rebalancing basements for serialization
|
||||||
deltas = toku_get_and_clear_basement_stats(ftnode);
|
deltas = toku_get_and_clear_basement_stats(ftnode);
|
||||||
toku_ft_update_stats(&h->on_disk_stats, deltas);
|
// locking not necessary here with respect to checkpointing
|
||||||
|
// in Clayface (because of the pending lock and cachetable lock
|
||||||
|
// in toku_cachetable_begin_checkpoint)
|
||||||
|
// essentially, if we are dealing with a for_checkpoint
|
||||||
|
// parameter in a function that is called by the flush_callback,
|
||||||
|
// then the cachetable needs to ensure that this is called in a safe
|
||||||
|
// manner that does not interfere with the beginning
|
||||||
|
// of a checkpoint, which it does with the cachetable lock
|
||||||
|
// and pending lock
|
||||||
|
toku_ft_update_stats(&ft->h->on_disk_stats, deltas);
|
||||||
if (for_checkpoint) {
|
if (for_checkpoint) {
|
||||||
toku_ft_update_stats(&h->checkpoint_staging_stats, deltas);
|
toku_ft_update_stats(&ft->checkpoint_header->on_disk_stats, deltas);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -637,15 +648,15 @@ void toku_ftnode_clone_callback(
|
||||||
{
|
{
|
||||||
FTNODE node = value_data;
|
FTNODE node = value_data;
|
||||||
toku_assert_entire_node_in_memory(node);
|
toku_assert_entire_node_in_memory(node);
|
||||||
FT h = write_extraargs;
|
FT ft = write_extraargs;
|
||||||
FTNODE XMALLOC(cloned_node);
|
FTNODE XMALLOC(cloned_node);
|
||||||
//FTNODE cloned_node = (FTNODE)toku_xmalloc(sizeof(*FTNODE));
|
//FTNODE cloned_node = (FTNODE)toku_xmalloc(sizeof(*FTNODE));
|
||||||
memset(cloned_node, 0, sizeof(*cloned_node));
|
memset(cloned_node, 0, sizeof(*cloned_node));
|
||||||
if (node->height == 0) {
|
if (node->height == 0) {
|
||||||
// set header stats, must be done before rebalancing
|
// set header stats, must be done before rebalancing
|
||||||
ftnode_update_disk_stats(node, h, for_checkpoint);
|
ftnode_update_disk_stats(node, ft, for_checkpoint);
|
||||||
// rebalance the leaf node
|
// rebalance the leaf node
|
||||||
rebalance_ftnode_leaf(node, h->basementnodesize);
|
rebalance_ftnode_leaf(node, ft->h->basementnodesize);
|
||||||
}
|
}
|
||||||
|
|
||||||
cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
|
cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk;
|
||||||
|
@ -870,7 +881,7 @@ void toku_evict_bn_from_memory(FTNODE node, int childnum, FT h) {
|
||||||
// callback for partially evicting a node
|
// callback for partially evicting a node
|
||||||
int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR* new_attr, void* extraargs) {
|
int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR* new_attr, void* extraargs) {
|
||||||
FTNODE node = (FTNODE)ftnode_pv;
|
FTNODE node = (FTNODE)ftnode_pv;
|
||||||
FT h = extraargs;
|
FT ft = extraargs;
|
||||||
// Don't partially evict dirty nodes
|
// Don't partially evict dirty nodes
|
||||||
if (node->dirty) {
|
if (node->dirty) {
|
||||||
goto exit;
|
goto exit;
|
||||||
|
@ -888,7 +899,7 @@ int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR*
|
||||||
if (BP_STATE(node,i) == PT_AVAIL) {
|
if (BP_STATE(node,i) == PT_AVAIL) {
|
||||||
if (BP_SHOULD_EVICT(node,i)) {
|
if (BP_SHOULD_EVICT(node,i)) {
|
||||||
STATUS_VALUE(FT_PARTIAL_EVICTIONS_NONLEAF)++;
|
STATUS_VALUE(FT_PARTIAL_EVICTIONS_NONLEAF)++;
|
||||||
cilk_spawn compress_internal_node_partition(node, i, h->compression_method);
|
cilk_spawn compress_internal_node_partition(node, i, ft->h->compression_method);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
BP_SWEEP_CLOCK(node,i);
|
BP_SWEEP_CLOCK(node,i);
|
||||||
|
@ -919,7 +930,7 @@ int toku_ftnode_pe_callback (void *ftnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATTR*
|
||||||
else if (BP_STATE(node,i) == PT_AVAIL) {
|
else if (BP_STATE(node,i) == PT_AVAIL) {
|
||||||
if (BP_SHOULD_EVICT(node,i)) {
|
if (BP_SHOULD_EVICT(node,i)) {
|
||||||
STATUS_VALUE(FT_PARTIAL_EVICTIONS_LEAF)++;
|
STATUS_VALUE(FT_PARTIAL_EVICTIONS_LEAF)++;
|
||||||
toku_evict_bn_from_memory(node, i, h);
|
toku_evict_bn_from_memory(node, i, ft);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
BP_SWEEP_CLOCK(node,i);
|
BP_SWEEP_CLOCK(node,i);
|
||||||
|
@ -1272,7 +1283,7 @@ toku_initialize_empty_ftnode (FTNODE n, BLOCKNUM nodename, int height, int num_c
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ft_init_new_root(FT h, FTNODE nodea, FTNODE nodeb, DBT splitk, CACHEKEY *rootp, FTNODE *newrootp)
|
ft_init_new_root(FT ft, FTNODE nodea, FTNODE nodeb, DBT splitk, CACHEKEY *rootp, FTNODE *newrootp)
|
||||||
// Effect: Create a new root node whose two children are NODEA and NODEB, and the pivotkey is SPLITK.
|
// Effect: Create a new root node whose two children are NODEA and NODEB, and the pivotkey is SPLITK.
|
||||||
// Store the new root's identity in *ROOTP, and the node in *NEWROOTP.
|
// Store the new root's identity in *ROOTP, and the node in *NEWROOTP.
|
||||||
// Unpin nodea and nodeb.
|
// Unpin nodea and nodeb.
|
||||||
|
@ -1281,11 +1292,11 @@ ft_init_new_root(FT h, FTNODE nodea, FTNODE nodeb, DBT splitk, CACHEKEY *rootp,
|
||||||
FTNODE XMALLOC(newroot);
|
FTNODE XMALLOC(newroot);
|
||||||
int new_height = nodea->height+1;
|
int new_height = nodea->height+1;
|
||||||
BLOCKNUM newroot_diskoff;
|
BLOCKNUM newroot_diskoff;
|
||||||
toku_allocate_blocknum(h->blocktable, &newroot_diskoff, h);
|
toku_allocate_blocknum(ft->blocktable, &newroot_diskoff, ft);
|
||||||
assert(newroot);
|
assert(newroot);
|
||||||
*rootp=newroot_diskoff;
|
*rootp=newroot_diskoff;
|
||||||
assert(new_height > 0);
|
assert(new_height > 0);
|
||||||
toku_initialize_empty_ftnode (newroot, newroot_diskoff, new_height, 2, h->layout_version, h->nodesize, h->flags);
|
toku_initialize_empty_ftnode (newroot, newroot_diskoff, new_height, 2, ft->h->layout_version, ft->h->nodesize, ft->h->flags);
|
||||||
//printf("new_root %lld %d %lld %lld\n", newroot_diskoff, newroot->height, nodea->thisnodename, nodeb->thisnodename);
|
//printf("new_root %lld %d %lld %lld\n", newroot_diskoff, newroot->height, nodea->thisnodename, nodeb->thisnodename);
|
||||||
//printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey);
|
//printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey);
|
||||||
toku_copyref_dbt(&newroot->childkeys[0], splitk);
|
toku_copyref_dbt(&newroot->childkeys[0], splitk);
|
||||||
|
@ -1301,12 +1312,19 @@ ft_init_new_root(FT h, FTNODE nodea, FTNODE nodeb, DBT splitk, CACHEKEY *rootp,
|
||||||
BP_STATE(newroot,0) = PT_AVAIL;
|
BP_STATE(newroot,0) = PT_AVAIL;
|
||||||
BP_STATE(newroot,1) = PT_AVAIL;
|
BP_STATE(newroot,1) = PT_AVAIL;
|
||||||
newroot->dirty = 1;
|
newroot->dirty = 1;
|
||||||
toku_unpin_ftnode(h, nodea);
|
|
||||||
toku_unpin_ftnode(h, nodeb);
|
|
||||||
//printf("%s:%d put %lld\n", __FILE__, __LINE__, newroot_diskoff);
|
//printf("%s:%d put %lld\n", __FILE__, __LINE__, newroot_diskoff);
|
||||||
u_int32_t fullhash = toku_cachetable_hash(h->cf, newroot_diskoff);
|
u_int32_t fullhash = toku_cachetable_hash(ft->cf, newroot_diskoff);
|
||||||
newroot->fullhash = fullhash;
|
newroot->fullhash = fullhash;
|
||||||
toku_cachetable_put(h->cf, newroot_diskoff, fullhash, newroot, make_ftnode_pair_attr(newroot), get_write_callbacks_for_node(h));
|
toku_cachetable_put(ft->cf, newroot_diskoff, fullhash, newroot, make_ftnode_pair_attr(newroot), get_write_callbacks_for_node(ft));
|
||||||
|
|
||||||
|
//at this point, newroot is associated with newroot_diskoff, nodea is associated with root_blocknum
|
||||||
|
// make newroot_diskoff point to nodea
|
||||||
|
// make root_blocknum point to newroot
|
||||||
|
// also modify the blocknum and fullhash of nodea and newroot
|
||||||
|
// before doing this, assert(nodea->blocknum == ft->root_blocknum)
|
||||||
|
|
||||||
|
toku_unpin_ftnode(ft, nodea);
|
||||||
|
toku_unpin_ftnode(ft, nodeb);
|
||||||
*newrootp = newroot;
|
*newrootp = newroot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2048,7 +2066,7 @@ ft_nonleaf_put_cmd (ft_compare_func compare_fun, DESCRIPTOR desc, FTNODE node, F
|
||||||
|
|
||||||
// return TRUE if root changed, FALSE otherwise
|
// return TRUE if root changed, FALSE otherwise
|
||||||
static BOOL
|
static BOOL
|
||||||
ft_process_maybe_reactive_root (FT h, CACHEKEY *rootp, FTNODE *nodep) {
|
ft_process_maybe_reactive_root (FT ft, CACHEKEY *rootp, FTNODE *nodep) {
|
||||||
FTNODE node = *nodep;
|
FTNODE node = *nodep;
|
||||||
toku_assert_entire_node_in_memory(node);
|
toku_assert_entire_node_in_memory(node);
|
||||||
enum reactivity re = get_node_reactivity(node);
|
enum reactivity re = get_node_reactivity(node);
|
||||||
|
@ -2060,18 +2078,18 @@ ft_process_maybe_reactive_root (FT h, CACHEKEY *rootp, FTNODE *nodep) {
|
||||||
// The root node should split, so make a new root.
|
// The root node should split, so make a new root.
|
||||||
FTNODE nodea,nodeb;
|
FTNODE nodea,nodeb;
|
||||||
DBT splitk;
|
DBT splitk;
|
||||||
assert(h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
|
assert(ft->h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
|
||||||
//
|
//
|
||||||
// This happens on the client thread with the ydb lock, so it is safe to
|
// This happens on the client thread with the ydb lock, so it is safe to
|
||||||
// not pass in dependent nodes. Although if we wanted to, we could pass
|
// not pass in dependent nodes. Although if we wanted to, we could pass
|
||||||
// in just node. That would be correct.
|
// in just node. That would be correct.
|
||||||
//
|
//
|
||||||
if (node->height==0) {
|
if (node->height==0) {
|
||||||
ftleaf_split(h, node, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
|
ftleaf_split(ft, node, &nodea, &nodeb, &splitk, TRUE, 0, NULL);
|
||||||
} else {
|
} else {
|
||||||
ft_nonleaf_split(h, node, &nodea, &nodeb, &splitk, 0, NULL);
|
ft_nonleaf_split(ft, node, &nodea, &nodeb, &splitk, 0, NULL);
|
||||||
}
|
}
|
||||||
ft_init_new_root(h, nodea, nodeb, splitk, rootp, nodep);
|
ft_init_new_root(ft, nodea, nodeb, splitk, rootp, nodep);
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
case RE_FUSIBLE:
|
case RE_FUSIBLE:
|
||||||
|
@ -2695,7 +2713,7 @@ toku_ft_maybe_insert (FT_HANDLE brt, DBT *key, DBT *val, TOKUTXN txn, BOOL oplsn
|
||||||
//We have transactions, and this is not 2440. We must send the full root-to-leaf-path
|
//We have transactions, and this is not 2440. We must send the full root-to-leaf-path
|
||||||
message_xids = toku_txn_get_xids(txn);
|
message_xids = toku_txn_get_xids(txn);
|
||||||
}
|
}
|
||||||
else if (txn->ancestor_txnid64 != brt->ft->root_xid_that_created) {
|
else if (txn->ancestor_txnid64 != brt->ft->h->root_xid_that_created) {
|
||||||
//We have transactions, and this is 2440, however the txn doing 2440 did not create the dictionary. We must send the full root-to-leaf-path
|
//We have transactions, and this is 2440, however the txn doing 2440 did not create the dictionary. We must send the full root-to-leaf-path
|
||||||
message_xids = toku_txn_get_xids(txn);
|
message_xids = toku_txn_get_xids(txn);
|
||||||
}
|
}
|
||||||
|
@ -2800,6 +2818,7 @@ toku_ft_maybe_update_broadcast(FT_HANDLE brt, const DBT *update_function_extra,
|
||||||
if (r != 0) { goto cleanup; }
|
if (r != 0) { goto cleanup; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//TODO(yoni): remove treelsn here and similar calls (no longer being used)
|
||||||
LSN treelsn;
|
LSN treelsn;
|
||||||
if (oplsn_valid &&
|
if (oplsn_valid &&
|
||||||
oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(brt->ft)).lsn) {
|
oplsn.lsn <= (treelsn = toku_ft_checkpoint_lsn(brt->ft)).lsn) {
|
||||||
|
@ -2890,7 +2909,7 @@ toku_ft_maybe_delete(FT_HANDLE brt, DBT *key, TOKUTXN txn, BOOL oplsn_valid, LSN
|
||||||
//We have transactions, and this is not 2440. We must send the full root-to-leaf-path
|
//We have transactions, and this is not 2440. We must send the full root-to-leaf-path
|
||||||
message_xids = toku_txn_get_xids(txn);
|
message_xids = toku_txn_get_xids(txn);
|
||||||
}
|
}
|
||||||
else if (txn->ancestor_txnid64 != brt->ft->root_xid_that_created) {
|
else if (txn->ancestor_txnid64 != brt->ft->h->root_xid_that_created) {
|
||||||
//We have transactions, and this is 2440, however the txn doing 2440 did not create the dictionary. We must send the full root-to-leaf-path
|
//We have transactions, and this is 2440, however the txn doing 2440 did not create the dictionary. We must send the full root-to-leaf-path
|
||||||
message_xids = toku_txn_get_xids(txn);
|
message_xids = toku_txn_get_xids(txn);
|
||||||
}
|
}
|
||||||
|
@ -3126,10 +3145,10 @@ cleanup:
|
||||||
static void
|
static void
|
||||||
toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) {
|
toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) {
|
||||||
struct ft_options options = {
|
struct ft_options options = {
|
||||||
.nodesize = ft->nodesize,
|
.nodesize = ft->h->nodesize,
|
||||||
.basementnodesize = ft->basementnodesize,
|
.basementnodesize = ft->h->basementnodesize,
|
||||||
.compression_method = ft->compression_method,
|
.compression_method = ft->h->compression_method,
|
||||||
.flags = ft->flags,
|
.flags = ft->h->flags,
|
||||||
.compare_fun = ft->compare_fun,
|
.compare_fun = ft->compare_fun,
|
||||||
.update_fun = ft->update_fun
|
.update_fun = ft->update_fun
|
||||||
};
|
};
|
||||||
|
@ -3148,6 +3167,7 @@ ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_cr
|
||||||
CACHEFILE cf = NULL;
|
CACHEFILE cf = NULL;
|
||||||
FT ft = NULL;
|
FT ft = NULL;
|
||||||
BOOL did_create = FALSE;
|
BOOL did_create = FALSE;
|
||||||
|
toku_ft_open_close_lock();
|
||||||
|
|
||||||
if (t->did_set_flags) {
|
if (t->did_set_flags) {
|
||||||
r = verify_builtin_comparisons_consistent(t, t->options.flags);
|
r = verify_builtin_comparisons_consistent(t, t->options.flags);
|
||||||
|
@ -3211,7 +3231,7 @@ ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_cr
|
||||||
if (!t->did_set_flags) {
|
if (!t->did_set_flags) {
|
||||||
r = verify_builtin_comparisons_consistent(t, t->options.flags);
|
r = verify_builtin_comparisons_consistent(t, t->options.flags);
|
||||||
if (r) { goto exit; }
|
if (r) { goto exit; }
|
||||||
} else if (t->options.flags != ft->flags) { /* if flags have been set then flags must match */
|
} else if (t->options.flags != ft->h->flags) { /* if flags have been set then flags must match */
|
||||||
r = EINVAL;
|
r = EINVAL;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
@ -3277,7 +3297,10 @@ exit:
|
||||||
// but we have not linked it to this brt. So,
|
// but we have not linked it to this brt. So,
|
||||||
// we can simply try to remove the header.
|
// we can simply try to remove the header.
|
||||||
// We don't need to unlink this brt from the header
|
// We don't need to unlink this brt from the header
|
||||||
if (!toku_ft_needed(ft)) {
|
toku_ft_grab_reflock(ft);
|
||||||
|
BOOL needed = toku_ft_needed_unlocked(ft);
|
||||||
|
toku_ft_release_reflock(ft);
|
||||||
|
if (!needed) {
|
||||||
//Close immediately.
|
//Close immediately.
|
||||||
char *error_string = NULL;
|
char *error_string = NULL;
|
||||||
r = toku_remove_ft(ft, &error_string, false, ZERO_LSN);
|
r = toku_remove_ft(ft, &error_string, false, ZERO_LSN);
|
||||||
|
@ -3288,6 +3311,7 @@ exit:
|
||||||
toku_cachefile_close(&cf, 0, FALSE, ZERO_LSN);
|
toku_cachefile_close(&cf, 0, FALSE, ZERO_LSN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
toku_ft_open_close_unlock();
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3406,36 +3430,24 @@ ft_compare_func toku_ft_get_bt_compare (FT_HANDLE brt) {
|
||||||
return brt->options.compare_fun;
|
return brt->options.compare_fun;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
ft_remove_handle_ref_callback(FT UU(ft), void *extra) {
|
||||||
|
FT_HANDLE handle = extra;
|
||||||
|
toku_list_remove(&handle->live_ft_handle_link);
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_ft_handle_close (FT_HANDLE brt, bool oplsn_valid, LSN oplsn)
|
toku_ft_handle_close (FT_HANDLE brt, bool oplsn_valid, LSN oplsn)
|
||||||
// Effect: See ft-ops.h for the specification of this function.
|
// Effect: See ft-ops.h for the specification of this function.
|
||||||
{
|
{
|
||||||
int r = 0;
|
FT ft = brt->ft;
|
||||||
FT h = brt->ft;
|
if (ft) {
|
||||||
|
toku_ft_remove_reference(brt->ft, oplsn_valid, oplsn, ft_remove_handle_ref_callback, brt);
|
||||||
// it is possible that a header was never opened
|
|
||||||
// for the brt
|
|
||||||
if (brt->ft) {
|
|
||||||
// TODO: figure out the proper locking here
|
|
||||||
// what really protects live_ft_handle_link?
|
|
||||||
toku_ft_lock(h);
|
|
||||||
toku_list_remove(&brt->live_ft_handle_link);
|
|
||||||
toku_ft_unlock(h);
|
|
||||||
|
|
||||||
if (!toku_ft_needed(brt->ft)) {
|
|
||||||
// close header
|
|
||||||
char *error_string = NULL;
|
|
||||||
r = toku_remove_ft(h, &error_string, oplsn_valid, oplsn);
|
|
||||||
assert_zero(r);
|
|
||||||
assert(error_string == NULL);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
toku_free(brt);
|
toku_free(brt);
|
||||||
|
return 0;
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// test function
|
|
||||||
int toku_close_ft_handle_nolsn (FT_HANDLE brt, char** UU(error_string)) {
|
int toku_close_ft_handle_nolsn (FT_HANDLE brt, char** UU(error_string)) {
|
||||||
return toku_ft_handle_close(brt, FALSE, ZERO_LSN);
|
return toku_ft_handle_close(brt, FALSE, ZERO_LSN);
|
||||||
}
|
}
|
||||||
|
@ -3530,7 +3542,7 @@ int toku_ft_cursor (
|
||||||
{
|
{
|
||||||
if (is_snapshot_read) {
|
if (is_snapshot_read) {
|
||||||
invariant(ttxn != NULL);
|
invariant(ttxn != NULL);
|
||||||
int accepted = does_txn_read_entry(brt->ft->root_xid_that_created, ttxn);
|
int accepted = does_txn_read_entry(brt->ft->h->root_xid_that_created, ttxn);
|
||||||
if (accepted!=TOKUDB_ACCEPT) {
|
if (accepted!=TOKUDB_ACCEPT) {
|
||||||
invariant(accepted==0);
|
invariant(accepted==0);
|
||||||
return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
|
return TOKUDB_MVCC_DICTIONARY_TOO_NEW;
|
||||||
|
@ -5434,17 +5446,23 @@ int toku_ft_layer_init(void (*ydb_lock_callback)(void),
|
||||||
void (*ydb_unlock_callback)(void)) {
|
void (*ydb_unlock_callback)(void)) {
|
||||||
int r = 0;
|
int r = 0;
|
||||||
//Portability must be initialized first
|
//Portability must be initialized first
|
||||||
if (r==0)
|
r = toku_portability_init();
|
||||||
r = toku_portability_init();
|
if (r) { goto exit; }
|
||||||
if (r==0)
|
|
||||||
toku_checkpoint_init(ydb_lock_callback, ydb_unlock_callback);
|
toku_checkpoint_init(ydb_lock_callback, ydb_unlock_callback);
|
||||||
if (r == 0)
|
|
||||||
r = toku_ft_serialize_layer_init();
|
r = toku_ft_serialize_layer_init();
|
||||||
|
if (r) { goto exit; }
|
||||||
|
|
||||||
|
toku_mutex_init(&ft_open_close_lock, NULL);
|
||||||
|
exit:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_ft_layer_destroy(void) {
|
int toku_ft_layer_destroy(void) {
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
toku_mutex_destroy(&ft_open_close_lock);
|
||||||
|
|
||||||
if (r == 0)
|
if (r == 0)
|
||||||
r = toku_ft_serialize_layer_destroy();
|
r = toku_ft_serialize_layer_destroy();
|
||||||
if (r==0)
|
if (r==0)
|
||||||
|
@ -5455,6 +5473,14 @@ int toku_ft_layer_destroy(void) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void toku_ft_open_close_lock(void) {
|
||||||
|
toku_mutex_lock(&ft_open_close_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_ft_open_close_unlock(void) {
|
||||||
|
toku_mutex_unlock(&ft_open_close_lock);
|
||||||
|
}
|
||||||
|
|
||||||
//Suppress both rollback and recovery logs.
|
//Suppress both rollback and recovery logs.
|
||||||
void
|
void
|
||||||
toku_ft_suppress_recovery_logs (FT_HANDLE brt, TOKUTXN txn) {
|
toku_ft_suppress_recovery_logs (FT_HANDLE brt, TOKUTXN txn) {
|
||||||
|
|
|
@ -241,6 +241,8 @@ toku_ft_handle_stat64 (FT_HANDLE, TOKUTXN, struct ftstat64_s *stat) __attribute_
|
||||||
int toku_ft_layer_init(void (*ydb_lock_callback)(void),
|
int toku_ft_layer_init(void (*ydb_lock_callback)(void),
|
||||||
void (*ydb_unlock_callback)(void))
|
void (*ydb_unlock_callback)(void))
|
||||||
__attribute__ ((warn_unused_result));
|
__attribute__ ((warn_unused_result));
|
||||||
|
void toku_ft_open_close_lock(void);
|
||||||
|
void toku_ft_open_close_unlock(void);
|
||||||
int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result));
|
int toku_ft_layer_destroy(void) __attribute__ ((warn_unused_result));
|
||||||
int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result));
|
int toku_ft_serialize_layer_init(void) __attribute__ ((warn_unused_result));
|
||||||
int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result));
|
int toku_ft_serialize_layer_destroy(void) __attribute__ ((warn_unused_result));
|
||||||
|
@ -259,10 +261,6 @@ void toku_ft_suppress_recovery_logs (FT_HANDLE brt, TOKUTXN txn);
|
||||||
|
|
||||||
int toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result));
|
int toku_ft_get_fragmentation(FT_HANDLE brt, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
BOOL toku_ft_is_empty_fast (FT_HANDLE brt);
|
|
||||||
// Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
|
|
||||||
// even though if we were to optimize the tree it might turn out that they are empty.
|
|
||||||
|
|
||||||
BOOL toku_ft_is_empty_fast (FT_HANDLE brt) __attribute__ ((warn_unused_result));
|
BOOL toku_ft_is_empty_fast (FT_HANDLE brt) __attribute__ ((warn_unused_result));
|
||||||
// Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
|
// Effect: Return TRUE if there are no messages or leaf entries in the tree. If so, it's empty. If there are messages or leaf entries, we say it's not empty
|
||||||
// even though if we were to optimize the tree it might turn out that they are empty.
|
// even though if we were to optimize the tree it might turn out that they are empty.
|
||||||
|
|
|
@ -139,10 +139,10 @@ exit:
|
||||||
|
|
||||||
// We only deserialize brt header once and then share everything with all the brts.
|
// We only deserialize brt header once and then share everything with all the brts.
|
||||||
static enum deserialize_error_code
|
static enum deserialize_error_code
|
||||||
deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version)
|
||||||
{
|
{
|
||||||
enum deserialize_error_code e = DS_OK;
|
enum deserialize_error_code e = DS_OK;
|
||||||
FT h = NULL;
|
FT ft = NULL;
|
||||||
invariant(version >= FT_LAYOUT_MIN_SUPPORTED_VERSION);
|
invariant(version >= FT_LAYOUT_MIN_SUPPORTED_VERSION);
|
||||||
invariant(version <= FT_LAYOUT_VERSION);
|
invariant(version <= FT_LAYOUT_VERSION);
|
||||||
// We already know:
|
// We already know:
|
||||||
|
@ -155,28 +155,25 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
rbuf_literal_bytes(rb, &magic, 8);
|
rbuf_literal_bytes(rb, &magic, 8);
|
||||||
lazy_assert(memcmp(magic,"tokudata",8)==0);
|
lazy_assert(memcmp(magic,"tokudata",8)==0);
|
||||||
|
|
||||||
CALLOC(h);
|
XCALLOC(ft);
|
||||||
if (!h) {
|
if (!ft) {
|
||||||
e = DS_ERRNO;
|
e = DS_ERRNO;
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
h->type = FT_CURRENT;
|
ft->checkpoint_header = NULL;
|
||||||
h->checkpoint_header = NULL;
|
ft->panic = 0;
|
||||||
h->dirty = 0;
|
ft->panic_string = 0;
|
||||||
h->panic = 0;
|
toku_list_init(&ft->live_ft_handles);
|
||||||
h->panic_string = 0;
|
int r = toku_omt_create(&ft->txns);
|
||||||
toku_list_init(&h->live_ft_handles);
|
|
||||||
int r = toku_omt_create(&h->txns);
|
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
|
|
||||||
//version MUST be in network order on disk regardless of disk order
|
//version MUST be in network order on disk regardless of disk order
|
||||||
h->layout_version_read_from_disk = rbuf_network_int(rb);
|
ft->layout_version_read_from_disk = rbuf_network_int(rb);
|
||||||
invariant(h->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION);
|
invariant(ft->layout_version_read_from_disk >= FT_LAYOUT_MIN_SUPPORTED_VERSION);
|
||||||
invariant(h->layout_version_read_from_disk <= FT_LAYOUT_VERSION);
|
invariant(ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION);
|
||||||
h->layout_version = FT_LAYOUT_VERSION;
|
|
||||||
|
|
||||||
//build_id MUST be in network order on disk regardless of disk order
|
//build_id MUST be in network order on disk regardless of disk order
|
||||||
h->build_id = rbuf_network_int(rb);
|
uint32_t build_id = rbuf_network_int(rb);
|
||||||
|
|
||||||
//Size MUST be in network order regardless of disk order.
|
//Size MUST be in network order regardless of disk order.
|
||||||
u_int32_t size = rbuf_network_int(rb);
|
u_int32_t size = rbuf_network_int(rb);
|
||||||
|
@ -188,16 +185,17 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check;
|
int64_t byte_order_stored = *(int64_t*)tmp_byte_order_check;
|
||||||
lazy_assert(byte_order_stored == toku_byte_order_host);
|
lazy_assert(byte_order_stored == toku_byte_order_host);
|
||||||
|
|
||||||
h->checkpoint_count = rbuf_ulonglong(rb);
|
uint64_t checkpoint_count = rbuf_ulonglong(rb);
|
||||||
h->checkpoint_lsn = rbuf_lsn(rb);
|
LSN checkpoint_lsn = rbuf_lsn(rb);
|
||||||
h->nodesize = rbuf_int(rb);
|
unsigned nodesize = rbuf_int(rb);
|
||||||
DISKOFF translation_address_on_disk = rbuf_diskoff(rb);
|
DISKOFF translation_address_on_disk = rbuf_diskoff(rb);
|
||||||
DISKOFF translation_size_on_disk = rbuf_diskoff(rb);
|
DISKOFF translation_size_on_disk = rbuf_diskoff(rb);
|
||||||
lazy_assert(translation_address_on_disk > 0);
|
lazy_assert(translation_address_on_disk > 0);
|
||||||
lazy_assert(translation_size_on_disk > 0);
|
lazy_assert(translation_size_on_disk > 0);
|
||||||
|
|
||||||
// initialize the tree lock
|
// initialize the tree lock
|
||||||
toku_ft_init_treelock(h);
|
toku_ft_init_treelock(ft);
|
||||||
|
toku_ft_init_reflock(ft);
|
||||||
|
|
||||||
//Load translation table
|
//Load translation table
|
||||||
{
|
{
|
||||||
|
@ -213,7 +211,7 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
}
|
}
|
||||||
toku_unlock_for_pwrite();
|
toku_unlock_for_pwrite();
|
||||||
// Create table and read in data.
|
// Create table and read in data.
|
||||||
e = toku_blocktable_create_from_buffer(&h->blocktable,
|
e = toku_blocktable_create_from_buffer(&ft->blocktable,
|
||||||
translation_address_on_disk,
|
translation_address_on_disk,
|
||||||
translation_size_on_disk,
|
translation_size_on_disk,
|
||||||
tbuf);
|
tbuf);
|
||||||
|
@ -223,73 +221,69 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
h->root_blocknum = rbuf_blocknum(rb);
|
BLOCKNUM root_blocknum = rbuf_blocknum(rb);
|
||||||
h->flags = rbuf_int(rb);
|
unsigned flags = rbuf_int(rb);
|
||||||
if (h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_13) {
|
if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_13) {
|
||||||
// deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag
|
// deprecate 'TOKU_DB_VALCMP_BUILTIN'. just remove the flag
|
||||||
h->flags &= ~TOKU_DB_VALCMP_BUILTIN_13;
|
flags &= ~TOKU_DB_VALCMP_BUILTIN_13;
|
||||||
}
|
}
|
||||||
h->layout_version_original = rbuf_int(rb);
|
int layout_version_original = rbuf_int(rb);
|
||||||
h->build_id_original = rbuf_int(rb);
|
uint32_t build_id_original = rbuf_int(rb);
|
||||||
h->time_of_creation = rbuf_ulonglong(rb);
|
uint64_t time_of_creation = rbuf_ulonglong(rb);
|
||||||
h->time_of_last_modification = rbuf_ulonglong(rb);
|
uint64_t time_of_last_modification = rbuf_ulonglong(rb);
|
||||||
h->time_of_last_verification = 0;
|
|
||||||
if (h->layout_version_read_from_disk <= FT_LAYOUT_VERSION_18) {
|
if (ft->layout_version_read_from_disk <= FT_LAYOUT_VERSION_18) {
|
||||||
// 17 was the last version with these fields, we no longer store
|
// 17 was the last version with these fields, we no longer store
|
||||||
// them, so read and discard them
|
// them, so read and discard them
|
||||||
(void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_13
|
(void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_13
|
||||||
if (h->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) {
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) {
|
||||||
(void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_14
|
(void) rbuf_ulonglong(rb); // num_blocks_to_upgrade_14
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (h->layout_version_read_from_disk >= FT_LAYOUT_VERSION_14) {
|
// fake creation during the last checkpoint
|
||||||
rbuf_TXNID(rb, &h->root_xid_that_created);
|
TXNID root_xid_that_created = checkpoint_lsn.lsn;
|
||||||
} else {
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_14) {
|
||||||
// fake creation during the last checkpoint
|
rbuf_TXNID(rb, &root_xid_that_created);
|
||||||
h->root_xid_that_created = h->checkpoint_lsn.lsn;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (h->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) {
|
// TODO(leif): get this to default to what's specified, not the
|
||||||
h->basementnodesize = rbuf_int(rb);
|
// hard-coded default
|
||||||
h->time_of_last_verification = rbuf_ulonglong(rb);
|
unsigned basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE;
|
||||||
} else {
|
uint64_t time_of_last_verification = 0;
|
||||||
h->basementnodesize = FT_DEFAULT_BASEMENT_NODE_SIZE;
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_15) {
|
||||||
h->time_of_last_verification = 0;
|
basementnodesize = rbuf_int(rb);
|
||||||
|
time_of_last_verification = rbuf_ulonglong(rb);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (h->layout_version_read_from_disk >= FT_LAYOUT_VERSION_18) {
|
STAT64INFO_S on_disk_stats = ZEROSTATS;
|
||||||
h->on_disk_stats.numrows = rbuf_ulonglong(rb);
|
uint64_t time_of_last_optimize_begin = 0;
|
||||||
h->on_disk_stats.numbytes = rbuf_ulonglong(rb);
|
uint64_t time_of_last_optimize_end = 0;
|
||||||
h->in_memory_stats = h->on_disk_stats;
|
uint32_t count_of_optimize_in_progress = 0;
|
||||||
h->time_of_last_optimize_begin = rbuf_ulonglong(rb);
|
MSN msn_at_start_of_last_completed_optimize = ZERO_MSN;
|
||||||
h->time_of_last_optimize_end = rbuf_ulonglong(rb);
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_18) {
|
||||||
h->count_of_optimize_in_progress = rbuf_int(rb);
|
on_disk_stats.numrows = rbuf_ulonglong(rb);
|
||||||
h->count_of_optimize_in_progress_read_from_disk = h->count_of_optimize_in_progress;
|
on_disk_stats.numbytes = rbuf_ulonglong(rb);
|
||||||
h->msn_at_start_of_last_completed_optimize = rbuf_msn(rb);
|
ft->in_memory_stats = on_disk_stats;
|
||||||
} else {
|
time_of_last_optimize_begin = rbuf_ulonglong(rb);
|
||||||
e = toku_upgrade_subtree_estimates_to_stat64info(fd, h);
|
time_of_last_optimize_end = rbuf_ulonglong(rb);
|
||||||
if (e != DS_OK) {
|
count_of_optimize_in_progress = rbuf_int(rb);
|
||||||
goto exit;
|
msn_at_start_of_last_completed_optimize = rbuf_msn(rb);
|
||||||
}
|
|
||||||
h->time_of_last_optimize_begin = 0;
|
|
||||||
h->time_of_last_optimize_end = 0;
|
|
||||||
h->count_of_optimize_in_progress = 0;
|
|
||||||
h->count_of_optimize_in_progress_read_from_disk = 0;
|
|
||||||
h->msn_at_start_of_last_completed_optimize = ZERO_MSN;
|
|
||||||
}
|
}
|
||||||
if (h->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) {
|
|
||||||
|
enum toku_compression_method compression_method;
|
||||||
|
MSN highest_unused_msn_for_upgrade = (MSN) { .msn = (MIN_MSN.msn - 1) };
|
||||||
|
if (ft->layout_version_read_from_disk >= FT_LAYOUT_VERSION_19) {
|
||||||
unsigned char method = rbuf_char(rb);
|
unsigned char method = rbuf_char(rb);
|
||||||
h->compression_method = (enum toku_compression_method) method;
|
compression_method = (enum toku_compression_method) method;
|
||||||
h->highest_unused_msn_for_upgrade = rbuf_msn(rb);
|
highest_unused_msn_for_upgrade = rbuf_msn(rb);
|
||||||
} else {
|
} else {
|
||||||
// we hard coded zlib until 5.2, then quicklz in 5.2
|
// we hard coded zlib until 5.2, then quicklz in 5.2
|
||||||
if (h->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) {
|
if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) {
|
||||||
h->compression_method = TOKU_ZLIB_METHOD;
|
compression_method = TOKU_ZLIB_METHOD;
|
||||||
} else {
|
} else {
|
||||||
h->compression_method = TOKU_QUICKLZ_METHOD;
|
compression_method = TOKU_QUICKLZ_METHOD;
|
||||||
}
|
}
|
||||||
h->highest_unused_msn_for_upgrade.msn = MIN_MSN.msn - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
(void) rbuf_int(rb); //Read in checksum and ignore (already verified).
|
(void) rbuf_int(rb); //Read in checksum and ignore (already verified).
|
||||||
|
@ -300,21 +294,57 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
invariant(h);
|
struct ft_header h = {
|
||||||
invariant((uint32_t) h->layout_version_read_from_disk == version);
|
.type = FT_CURRENT,
|
||||||
e = deserialize_descriptor_from(fd, h->blocktable, &h->descriptor, version);
|
.dirty = 0,
|
||||||
|
.checkpoint_count = checkpoint_count,
|
||||||
|
.checkpoint_lsn = checkpoint_lsn,
|
||||||
|
.layout_version = FT_LAYOUT_VERSION,
|
||||||
|
.layout_version_original = layout_version_original,
|
||||||
|
.build_id = build_id,
|
||||||
|
.build_id_original = build_id_original,
|
||||||
|
.time_of_creation = time_of_creation,
|
||||||
|
.root_xid_that_created = root_xid_that_created,
|
||||||
|
.time_of_last_modification = time_of_last_modification,
|
||||||
|
.time_of_last_verification = time_of_last_verification,
|
||||||
|
.root_blocknum = root_blocknum,
|
||||||
|
.flags = flags,
|
||||||
|
.nodesize = nodesize,
|
||||||
|
.basementnodesize = basementnodesize,
|
||||||
|
.compression_method = compression_method,
|
||||||
|
.highest_unused_msn_for_upgrade = highest_unused_msn_for_upgrade,
|
||||||
|
.time_of_last_optimize_begin = time_of_last_optimize_begin,
|
||||||
|
.time_of_last_optimize_end = time_of_last_optimize_end,
|
||||||
|
.count_of_optimize_in_progress = count_of_optimize_in_progress,
|
||||||
|
.count_of_optimize_in_progress_read_from_disk = count_of_optimize_in_progress,
|
||||||
|
.msn_at_start_of_last_completed_optimize = msn_at_start_of_last_completed_optimize,
|
||||||
|
.on_disk_stats = on_disk_stats
|
||||||
|
};
|
||||||
|
ft->h = toku_xmemdup(&h, sizeof h);
|
||||||
|
|
||||||
|
if (ft->layout_version_read_from_disk < FT_LAYOUT_VERSION_18) {
|
||||||
|
// This needs ft->h to be non-null, so we have to do it after we
|
||||||
|
// read everything else.
|
||||||
|
e = toku_upgrade_subtree_estimates_to_stat64info(fd, ft);
|
||||||
|
if (e != DS_OK) {
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
invariant((uint32_t) ft->layout_version_read_from_disk == version);
|
||||||
|
e = deserialize_descriptor_from(fd, ft->blocktable, &ft->descriptor, version);
|
||||||
if (e != DS_OK) {
|
if (e != DS_OK) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
// copy descriptor to cmp_descriptor for #4541
|
// copy descriptor to cmp_descriptor for #4541
|
||||||
h->cmp_descriptor.dbt.size = h->descriptor.dbt.size;
|
ft->cmp_descriptor.dbt.size = ft->descriptor.dbt.size;
|
||||||
h->cmp_descriptor.dbt.data = toku_xmemdup(h->descriptor.dbt.data, h->descriptor.dbt.size);
|
ft->cmp_descriptor.dbt.data = toku_xmemdup(ft->descriptor.dbt.data, ft->descriptor.dbt.size);
|
||||||
// Version 13 descriptors had an extra 4 bytes that we don't read
|
// Version 13 descriptors had an extra 4 bytes that we don't read
|
||||||
// anymore. Since the header is going to think it's the current
|
// anymore. Since the header is going to think it's the current
|
||||||
// version if it gets written out, we need to write the descriptor in
|
// version if it gets written out, we need to write the descriptor in
|
||||||
// the new format (without those bytes) before that happens.
|
// the new format (without those bytes) before that happens.
|
||||||
if (version <= FT_LAYOUT_VERSION_13) {
|
if (version <= FT_LAYOUT_VERSION_13) {
|
||||||
r = toku_update_descriptor(h, &h->cmp_descriptor, fd);
|
r = toku_update_descriptor(ft, &ft->cmp_descriptor, fd);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
errno = r;
|
errno = r;
|
||||||
e = DS_ERRNO;
|
e = DS_ERRNO;
|
||||||
|
@ -322,11 +352,11 @@ deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ft, uint32_t version)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
exit:
|
exit:
|
||||||
if (e != DS_OK && h != NULL) {
|
if (e != DS_OK && ft != NULL) {
|
||||||
toku_free(h);
|
toku_free(ft);
|
||||||
h = NULL;
|
ft = NULL;
|
||||||
}
|
}
|
||||||
*ft = h;
|
*ftp = ft;
|
||||||
return e;
|
return e;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -625,7 +655,7 @@ exit:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int toku_serialize_ft_size (FT h) {
|
int toku_serialize_ft_size (FT_HEADER h) {
|
||||||
u_int32_t size = serialize_ft_min_size(h->layout_version);
|
u_int32_t size = serialize_ft_min_size(h->layout_version);
|
||||||
//There is no dynamic data.
|
//There is no dynamic data.
|
||||||
lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE);
|
lazy_assert(size <= BLOCK_ALLOCATOR_HEADER_RESERVE);
|
||||||
|
@ -633,7 +663,13 @@ int toku_serialize_ft_size (FT h) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int toku_serialize_ft_to_wbuf (struct wbuf *wbuf, FT h, DISKOFF translation_location_on_disk, DISKOFF translation_size_on_disk) {
|
int toku_serialize_ft_to_wbuf (
|
||||||
|
struct wbuf *wbuf,
|
||||||
|
FT_HEADER h,
|
||||||
|
DISKOFF translation_location_on_disk,
|
||||||
|
DISKOFF translation_size_on_disk
|
||||||
|
)
|
||||||
|
{
|
||||||
wbuf_literal_bytes(wbuf, "tokudata", 8);
|
wbuf_literal_bytes(wbuf, "tokudata", 8);
|
||||||
wbuf_network_int (wbuf, h->layout_version); //MUST be in network order regardless of disk order
|
wbuf_network_int (wbuf, h->layout_version); //MUST be in network order regardless of disk order
|
||||||
wbuf_network_int (wbuf, BUILD_ID); //MUST be in network order regardless of disk order
|
wbuf_network_int (wbuf, BUILD_ID); //MUST be in network order regardless of disk order
|
||||||
|
@ -643,7 +679,6 @@ int toku_serialize_ft_to_wbuf (struct wbuf *wbuf, FT h, DISKOFF translation_loca
|
||||||
wbuf_LSN (wbuf, h->checkpoint_lsn);
|
wbuf_LSN (wbuf, h->checkpoint_lsn);
|
||||||
wbuf_int (wbuf, h->nodesize);
|
wbuf_int (wbuf, h->nodesize);
|
||||||
|
|
||||||
//printf("%s:%d bta=%lu size=%lu\n", __FILE__, __LINE__, h->block_translation_address_on_disk, 4 + 16*h->translated_blocknum_limit);
|
|
||||||
wbuf_DISKOFF(wbuf, translation_location_on_disk);
|
wbuf_DISKOFF(wbuf, translation_location_on_disk);
|
||||||
wbuf_DISKOFF(wbuf, translation_size_on_disk);
|
wbuf_DISKOFF(wbuf, translation_size_on_disk);
|
||||||
wbuf_BLOCKNUM(wbuf, h->root_blocknum);
|
wbuf_BLOCKNUM(wbuf, h->root_blocknum);
|
||||||
|
@ -655,8 +690,8 @@ int toku_serialize_ft_to_wbuf (struct wbuf *wbuf, FT h, DISKOFF translation_loca
|
||||||
wbuf_TXNID(wbuf, h->root_xid_that_created);
|
wbuf_TXNID(wbuf, h->root_xid_that_created);
|
||||||
wbuf_int(wbuf, h->basementnodesize);
|
wbuf_int(wbuf, h->basementnodesize);
|
||||||
wbuf_ulonglong(wbuf, h->time_of_last_verification);
|
wbuf_ulonglong(wbuf, h->time_of_last_verification);
|
||||||
wbuf_ulonglong(wbuf, h->checkpoint_staging_stats.numrows);
|
wbuf_ulonglong(wbuf, h->on_disk_stats.numrows);
|
||||||
wbuf_ulonglong(wbuf, h->checkpoint_staging_stats.numbytes);
|
wbuf_ulonglong(wbuf, h->on_disk_stats.numbytes);
|
||||||
wbuf_ulonglong(wbuf, h->time_of_last_optimize_begin);
|
wbuf_ulonglong(wbuf, h->time_of_last_optimize_begin);
|
||||||
wbuf_ulonglong(wbuf, h->time_of_last_optimize_end);
|
wbuf_ulonglong(wbuf, h->time_of_last_optimize_end);
|
||||||
wbuf_int(wbuf, h->count_of_optimize_in_progress);
|
wbuf_int(wbuf, h->count_of_optimize_in_progress);
|
||||||
|
@ -669,23 +704,21 @@ int toku_serialize_ft_to_wbuf (struct wbuf *wbuf, FT h, DISKOFF translation_loca
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_serialize_ft_to (int fd, FT h) {
|
int toku_serialize_ft_to (int fd, FT_HEADER h, BLOCK_TABLE blocktable, CACHEFILE cf) {
|
||||||
int rr = 0;
|
int rr = 0;
|
||||||
if (h->panic) return h->panic;
|
|
||||||
lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
|
lazy_assert(h->type==FT_CHECKPOINT_INPROGRESS);
|
||||||
toku_ft_lock(h);
|
|
||||||
struct wbuf w_translation;
|
struct wbuf w_translation;
|
||||||
int64_t size_translation;
|
int64_t size_translation;
|
||||||
int64_t address_translation;
|
int64_t address_translation;
|
||||||
{
|
{
|
||||||
//Must serialize translation first, to get address,size for header.
|
//Must serialize translation first, to get address,size for header.
|
||||||
toku_serialize_translation_to_wbuf_unlocked(h->blocktable, &w_translation,
|
toku_serialize_translation_to_wbuf(blocktable, &w_translation,
|
||||||
&address_translation,
|
&address_translation,
|
||||||
&size_translation);
|
&size_translation);
|
||||||
lazy_assert(size_translation==w_translation.size);
|
lazy_assert(size_translation==w_translation.size);
|
||||||
}
|
}
|
||||||
struct wbuf w_main;
|
struct wbuf w_main;
|
||||||
unsigned int size_main = toku_serialize_ft_size (h);
|
unsigned int size_main = toku_serialize_ft_size(h);
|
||||||
{
|
{
|
||||||
wbuf_init(&w_main, toku_xmalloc(size_main), size_main);
|
wbuf_init(&w_main, toku_xmalloc(size_main), size_main);
|
||||||
{
|
{
|
||||||
|
@ -694,7 +727,6 @@ int toku_serialize_ft_to (int fd, FT h) {
|
||||||
}
|
}
|
||||||
lazy_assert(w_main.ndone==size_main);
|
lazy_assert(w_main.ndone==size_main);
|
||||||
}
|
}
|
||||||
toku_ft_unlock(h);
|
|
||||||
toku_lock_for_pwrite();
|
toku_lock_for_pwrite();
|
||||||
{
|
{
|
||||||
//Actual Write translation table
|
//Actual Write translation table
|
||||||
|
@ -708,8 +740,8 @@ int toku_serialize_ft_to (int fd, FT h) {
|
||||||
//If the header has a cachefile we need to do cachefile fsync (to
|
//If the header has a cachefile we need to do cachefile fsync (to
|
||||||
//prevent crash if we redirected to dev null)
|
//prevent crash if we redirected to dev null)
|
||||||
//If there is no cachefile we still need to do an fsync.
|
//If there is no cachefile we still need to do an fsync.
|
||||||
if (h->cf) {
|
if (cf) {
|
||||||
rr = toku_cachefile_fsync(h->cf);
|
rr = toku_cachefile_fsync(cf);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
rr = toku_file_fsync(fd);
|
rr = toku_file_fsync(fd);
|
||||||
|
|
|
@ -74,7 +74,7 @@ int toku_testsetup_nonleaf (FT_HANDLE brt, int height, BLOCKNUM *blocknum, int n
|
||||||
|
|
||||||
int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM blocknum) {
|
int toku_testsetup_root(FT_HANDLE brt, BLOCKNUM blocknum) {
|
||||||
assert(testsetup_initialized);
|
assert(testsetup_initialized);
|
||||||
brt->ft->root_blocknum = blocknum;
|
brt->ft->h->root_blocknum = blocknum;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -410,8 +410,8 @@ toku_verify_ft_with_progress (FT_HANDLE brt, int (*progress_callback)(void *extr
|
||||||
int r = toku_verify_ftnode(brt, ZERO_MSN, ZERO_MSN, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
|
int r = toku_verify_ftnode(brt, ZERO_MSN, ZERO_MSN, root_node, -1, NULL, NULL, progress_callback, progress_extra, 1, verbose, keep_on_going);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
toku_ft_lock(brt->ft);
|
toku_ft_lock(brt->ft);
|
||||||
brt->ft->time_of_last_verification = time(NULL);
|
brt->ft->h->time_of_last_verification = time(NULL);
|
||||||
brt->ft->dirty = 1;
|
brt->ft->h->dirty = 1;
|
||||||
toku_ft_unlock(brt->ft);
|
toku_ft_unlock(brt->ft);
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
|
589
ft/ft.c
589
ft/ft.c
|
@ -14,89 +14,100 @@ toku_ft_suppress_rollbacks(FT h, TOKUTXN txn) {
|
||||||
assert(h->txnid_that_created_or_locked_when_empty == TXNID_NONE ||
|
assert(h->txnid_that_created_or_locked_when_empty == TXNID_NONE ||
|
||||||
h->txnid_that_created_or_locked_when_empty == txnid);
|
h->txnid_that_created_or_locked_when_empty == txnid);
|
||||||
h->txnid_that_created_or_locked_when_empty = txnid;
|
h->txnid_that_created_or_locked_when_empty = txnid;
|
||||||
TXNID rootid = toku_txn_get_root_txnid(txn);
|
|
||||||
assert(h->root_that_created_or_locked_when_empty == TXNID_NONE ||
|
|
||||||
h->root_that_created_or_locked_when_empty == rootid);
|
|
||||||
h->root_that_created_or_locked_when_empty = rootid;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_reset_root_xid_that_created(FT h, TXNID new_root_xid_that_created) {
|
toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) {
|
||||||
// Reset the root_xid_that_created field to the given value.
|
// Reset the root_xid_that_created field to the given value.
|
||||||
// This redefines which xid created the dictionary.
|
// This redefines which xid created the dictionary.
|
||||||
|
|
||||||
// hold lock around setting and clearing of dirty bit
|
// hold lock around setting and clearing of dirty bit
|
||||||
// (see cooperative use of dirty bit in ft_begin_checkpoint())
|
// (see cooperative use of dirty bit in ft_begin_checkpoint())
|
||||||
toku_ft_lock (h);
|
toku_ft_lock (ft);
|
||||||
h->root_xid_that_created = new_root_xid_that_created;
|
ft->h->root_xid_that_created = new_root_xid_that_created;
|
||||||
h->dirty = 1;
|
ft->h->dirty = 1;
|
||||||
toku_ft_unlock (h);
|
toku_ft_unlock (ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
ft_destroy(FT h) {
|
ft_destroy(FT ft) {
|
||||||
if (!h->panic) assert(!h->checkpoint_header);
|
if (!ft->panic) assert(!ft->checkpoint_header);
|
||||||
|
|
||||||
//header and checkpoint_header have same Blocktable pointer
|
//header and checkpoint_header have same Blocktable pointer
|
||||||
//cannot destroy since it is still in use by CURRENT
|
//cannot destroy since it is still in use by CURRENT
|
||||||
if (h->type == FT_CHECKPOINT_INPROGRESS) h->blocktable = NULL;
|
assert(ft->h->type == FT_CURRENT);
|
||||||
else {
|
toku_blocktable_destroy(&ft->blocktable);
|
||||||
assert(h->type == FT_CURRENT);
|
if (ft->descriptor.dbt.data) toku_free(ft->descriptor.dbt.data);
|
||||||
toku_blocktable_destroy(&h->blocktable);
|
if (ft->cmp_descriptor.dbt.data) toku_free(ft->cmp_descriptor.dbt.data);
|
||||||
if (h->descriptor.dbt.data) toku_free(h->descriptor.dbt.data);
|
toku_ft_destroy_treelock(ft);
|
||||||
if (h->cmp_descriptor.dbt.data) toku_free(h->cmp_descriptor.dbt.data);
|
toku_ft_destroy_reflock(ft);
|
||||||
toku_ft_destroy_treelock(h);
|
toku_omt_destroy(&ft->txns);
|
||||||
toku_omt_destroy(&h->txns);
|
toku_free(ft->h);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Make a copy of the header for the purpose of a checkpoint
|
// Make a copy of the header for the purpose of a checkpoint
|
||||||
|
// Not reentrant for a single FT.
|
||||||
|
// See ft_checkpoint for explanation of why
|
||||||
|
// FT lock must be held.
|
||||||
static void
|
static void
|
||||||
ft_copy_for_checkpoint(FT h, LSN checkpoint_lsn) {
|
ft_copy_for_checkpoint_unlocked(FT ft, LSN checkpoint_lsn) {
|
||||||
assert(h->type == FT_CURRENT);
|
assert(ft->h->type == FT_CURRENT);
|
||||||
assert(h->checkpoint_header == NULL);
|
assert(ft->checkpoint_header == NULL);
|
||||||
assert(h->panic==0);
|
assert(ft->panic==0);
|
||||||
|
|
||||||
FT XMALLOC(ch);
|
FT_HEADER ch = toku_xmemdup(ft->h, sizeof *ft->h);
|
||||||
*ch = *h; //Do a shallow copy
|
|
||||||
ch->type = FT_CHECKPOINT_INPROGRESS; //Different type
|
ch->type = FT_CHECKPOINT_INPROGRESS; //Different type
|
||||||
//printf("checkpoint_lsn=%" PRIu64 "\n", checkpoint_lsn.lsn);
|
//printf("checkpoint_lsn=%" PRIu64 "\n", checkpoint_lsn.lsn);
|
||||||
ch->checkpoint_lsn = checkpoint_lsn;
|
ch->checkpoint_lsn = checkpoint_lsn;
|
||||||
ch->panic_string = NULL;
|
|
||||||
|
|
||||||
//ch->blocktable is SHARED between the two headers
|
//ch->blocktable is SHARED between the two headers
|
||||||
h->checkpoint_header = ch;
|
ft->checkpoint_header = ch;
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
ft_free(FT h) {
|
|
||||||
ft_destroy(h);
|
|
||||||
toku_free(h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_free (FT h) {
|
toku_ft_free (FT ft) {
|
||||||
ft_free(h);
|
ft_destroy(ft);
|
||||||
|
toku_free(ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_init_treelock(FT h) {
|
toku_ft_init_treelock(FT ft) {
|
||||||
toku_mutex_init(&h->tree_lock, NULL);
|
toku_mutex_init(&ft->tree_lock, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_destroy_treelock(FT h) {
|
toku_ft_destroy_treelock(FT ft) {
|
||||||
toku_mutex_destroy(&h->tree_lock);
|
toku_mutex_destroy(&ft->tree_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_grab_treelock(FT h) {
|
toku_ft_grab_treelock(FT ft) {
|
||||||
toku_mutex_lock(&h->tree_lock);
|
toku_mutex_lock(&ft->tree_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_release_treelock(FT h) {
|
toku_ft_release_treelock(FT ft) {
|
||||||
toku_mutex_unlock(&h->tree_lock);
|
toku_mutex_unlock(&ft->tree_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_init_reflock(FT ft) {
|
||||||
|
toku_mutex_init(&ft->ft_ref_lock, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_destroy_reflock(FT ft) {
|
||||||
|
toku_mutex_destroy(&ft->ft_ref_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_grab_reflock(FT ft) {
|
||||||
|
toku_mutex_lock(&ft->ft_ref_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_release_reflock(FT ft) {
|
||||||
|
toku_mutex_unlock(&ft->ft_ref_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -106,13 +117,13 @@ toku_ft_release_treelock(FT h) {
|
||||||
// maps to cf->log_fassociate_during_checkpoint
|
// maps to cf->log_fassociate_during_checkpoint
|
||||||
static int
|
static int
|
||||||
ft_log_fassociate_during_checkpoint (CACHEFILE cf, void *header_v) {
|
ft_log_fassociate_during_checkpoint (CACHEFILE cf, void *header_v) {
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
char* fname_in_env = toku_cachefile_fname_in_env(cf);
|
char* fname_in_env = toku_cachefile_fname_in_env(cf);
|
||||||
BYTESTRING bs = { strlen(fname_in_env), // don't include the NUL
|
BYTESTRING bs = { strlen(fname_in_env), // don't include the NUL
|
||||||
fname_in_env };
|
fname_in_env };
|
||||||
TOKULOGGER logger = toku_cachefile_logger(cf);
|
TOKULOGGER logger = toku_cachefile_logger(cf);
|
||||||
FILENUM filenum = toku_cachefile_filenum (cf);
|
FILENUM filenum = toku_cachefile_filenum (cf);
|
||||||
int r = toku_log_fassociate(logger, NULL, 0, filenum, h->flags, bs);
|
int r = toku_log_fassociate(logger, NULL, 0, filenum, ft->h->flags, bs);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,43 +144,64 @@ ft_log_suppress_rollback_during_checkpoint (CACHEFILE cf, void *header_v) {
|
||||||
|
|
||||||
// Maps to cf->begin_checkpoint_userdata
|
// Maps to cf->begin_checkpoint_userdata
|
||||||
// Create checkpoint-in-progress versions of header and translation (btt) (and fifo for now...).
|
// Create checkpoint-in-progress versions of header and translation (btt) (and fifo for now...).
|
||||||
//Has access to fd (it is protected).
|
// Has access to fd (it is protected).
|
||||||
|
//
|
||||||
|
// Not reentrant for a single FT (see ft_checkpoint)
|
||||||
static int
|
static int
|
||||||
ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) {
|
ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) {
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
int r = h->panic;
|
int r = ft->panic;
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
// hold lock around copying and clearing of dirty bit
|
// hold lock around copying and clearing of dirty bit
|
||||||
toku_ft_lock (h);
|
toku_ft_lock (ft);
|
||||||
assert(h->type == FT_CURRENT);
|
assert(ft->h->type == FT_CURRENT);
|
||||||
assert(h->checkpoint_header == NULL);
|
assert(ft->checkpoint_header == NULL);
|
||||||
ft_copy_for_checkpoint(h, checkpoint_lsn);
|
ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn);
|
||||||
h->dirty = 0; // this is only place this bit is cleared (in currentheader)
|
ft->h->dirty = 0; // this is only place this bit is cleared (in currentheader)
|
||||||
// on_disk_stats includes on disk changes since last checkpoint,
|
toku_block_translation_note_start_checkpoint_unlocked(ft->blocktable);
|
||||||
// so checkpoint_staging_stats now includes changes for checkpoint in progress.
|
toku_ft_unlock (ft);
|
||||||
h->checkpoint_staging_stats = h->on_disk_stats;
|
|
||||||
toku_block_translation_note_start_checkpoint_unlocked(h->blocktable);
|
|
||||||
toku_ft_unlock (h);
|
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// #4922: Hack to remove data corruption race condition.
|
||||||
|
// Reading (and upgrading) a node up to version 19 causes this.
|
||||||
|
// We COULD skip this if we know that no nodes remained (as of last checkpoint)
|
||||||
|
// that are below version 19.
|
||||||
|
// If there are no nodes < version 19 this is harmless (field is unused).
|
||||||
|
// If there are, this will make certain the value is at least as low as necessary,
|
||||||
|
// and not much lower. (Too low is good, too high can cause data corruption).
|
||||||
|
// TODO(yoni): If we ever stop supporting upgrades of nodes < version 19 we can delete this.
|
||||||
|
// TODO(yoni): If we know no nodes are left to upgrade, we can skip this. (Probably not worth doing).
|
||||||
|
static void
|
||||||
|
ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(FT ft) {
|
||||||
|
if (ft->h->layout_version_original < FT_LAYOUT_VERSION_19) {
|
||||||
|
ft->checkpoint_header->highest_unused_msn_for_upgrade = ft->h->highest_unused_msn_for_upgrade;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// maps to cf->checkpoint_userdata
|
// maps to cf->checkpoint_userdata
|
||||||
// Write checkpoint-in-progress versions of header and translation to disk (really to OS internal buffer).
|
// Write checkpoint-in-progress versions of header and translation to disk (really to OS internal buffer).
|
||||||
// Copy current header's version of checkpoint_staging stat64info to checkpoint header.
|
// Copy current header's version of checkpoint_staging stat64info to checkpoint header.
|
||||||
// Must have access to fd (protected).
|
// Must have access to fd (protected).
|
||||||
// Requires: all pending bits are clear. This implies that no thread will modify the checkpoint_staging
|
// Requires: all pending bits are clear. This implies that no thread will modify the checkpoint_staging
|
||||||
// version of the stat64info.
|
// version of the stat64info.
|
||||||
|
//
|
||||||
|
// No locks are taken for checkpoint_count/lsn because this is single threaded. Can be called by:
|
||||||
|
// - ft_close
|
||||||
|
// - end_checkpoint
|
||||||
|
// checkpoints hold references to FTs and so they cannot be closed during a checkpoint.
|
||||||
|
// ft_close is not reentrant for a single FT
|
||||||
|
// end_checkpoint is not reentrant period
|
||||||
static int
|
static int
|
||||||
ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
|
ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
FT ch = h->checkpoint_header;
|
FT_HEADER ch = ft->checkpoint_header;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
if (h->panic!=0) goto handle_error;
|
if (ft->panic!=0) goto handle_error;
|
||||||
//printf("%s:%d allocated_limit=%lu writing queue to %lu\n", __FILE__, __LINE__,
|
//printf("%s:%d allocated_limit=%lu writing queue to %lu\n", __FILE__, __LINE__,
|
||||||
// block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize);
|
// block_allocator_allocated_limit(h->block_allocator), h->unused_blocks.b*h->nodesize);
|
||||||
assert(ch);
|
assert(ch);
|
||||||
if (ch->panic!=0) goto handle_error;
|
|
||||||
assert(ch->type == FT_CHECKPOINT_INPROGRESS);
|
assert(ch->type == FT_CHECKPOINT_INPROGRESS);
|
||||||
if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header)
|
if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header)
|
||||||
TOKULOGGER logger = toku_cachefile_logger(cf);
|
TOKULOGGER logger = toku_cachefile_logger(cf);
|
||||||
|
@ -178,22 +210,13 @@ ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
|
||||||
if (r!=0) goto handle_error;
|
if (r!=0) goto handle_error;
|
||||||
}
|
}
|
||||||
uint64_t now = (uint64_t) time(NULL); // 4018;
|
uint64_t now = (uint64_t) time(NULL); // 4018;
|
||||||
h->time_of_last_modification = now;
|
ft->h->time_of_last_modification = now;
|
||||||
ch->time_of_last_modification = now;
|
ch->time_of_last_modification = now;
|
||||||
ch->checkpoint_count++;
|
ch->checkpoint_count++;
|
||||||
// Threadsafety of checkpoint_staging_stats here depends on there being no pending bits set,
|
ft_hack_highest_unused_msn_for_upgrade_for_checkpoint(ft);
|
||||||
// so that all callers to flush callback should have the for_checkpoint argument false,
|
|
||||||
// and therefore will not modify the checkpoint_staging_stats.
|
|
||||||
// TODO 4184: If the flush callback is called with the for_checkpoint argument true even when all the pending bits
|
|
||||||
// are clear, then this is a problem.
|
|
||||||
ch->checkpoint_staging_stats = h->checkpoint_staging_stats;
|
|
||||||
// The in_memory_stats and on_disk_stats in the checkpoint header should be ignored, but we set them
|
|
||||||
// here just in case the serializer looks in the wrong place.
|
|
||||||
ch->in_memory_stats = ch->checkpoint_staging_stats;
|
|
||||||
ch->on_disk_stats = ch->checkpoint_staging_stats;
|
|
||||||
|
|
||||||
// write translation and header to disk (or at least to OS internal buffer)
|
// write translation and header to disk (or at least to OS internal buffer)
|
||||||
r = toku_serialize_ft_to(fd, ch);
|
r = toku_serialize_ft_to(fd, ch, ft->blocktable, ft->cf);
|
||||||
if (r!=0) goto handle_error;
|
if (r!=0) goto handle_error;
|
||||||
ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header)
|
ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header)
|
||||||
|
|
||||||
|
@ -202,22 +225,16 @@ ft_checkpoint (CACHEFILE cf, int fd, void *header_v) {
|
||||||
if (r!=0) {
|
if (r!=0) {
|
||||||
goto handle_error;
|
goto handle_error;
|
||||||
}
|
}
|
||||||
h->checkpoint_count++; // checkpoint succeeded, next checkpoint will save to alternate header location
|
ft->h->checkpoint_count++; // checkpoint succeeded, next checkpoint will save to alternate header location
|
||||||
h->checkpoint_lsn = ch->checkpoint_lsn; //Header updated.
|
ft->h->checkpoint_lsn = ch->checkpoint_lsn; //Header updated.
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
toku_block_translation_note_skipped_checkpoint(ch->blocktable);
|
toku_block_translation_note_skipped_checkpoint(ft->blocktable);
|
||||||
}
|
}
|
||||||
if (0) {
|
if (0) {
|
||||||
handle_error:
|
handle_error:
|
||||||
if (h->panic) r = h->panic;
|
if (ft->panic) r = ft->panic;
|
||||||
else if (ch->panic) {
|
else toku_block_translation_note_failed_checkpoint(ft->blocktable);
|
||||||
r = ch->panic;
|
|
||||||
//Steal panic string. Cannot afford to malloc.
|
|
||||||
h->panic = ch->panic;
|
|
||||||
h->panic_string = ch->panic_string;
|
|
||||||
}
|
|
||||||
else toku_block_translation_note_failed_checkpoint(ch->blocktable);
|
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
@ -229,15 +246,15 @@ handle_error:
|
||||||
// Must have access to fd (protected)
|
// Must have access to fd (protected)
|
||||||
static int
|
static int
|
||||||
ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) {
|
ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) {
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
int r = h->panic;
|
int r = ft->panic;
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
assert(h->type == FT_CURRENT);
|
assert(ft->h->type == FT_CURRENT);
|
||||||
toku_block_translation_note_end_checkpoint(h->blocktable, fd, h);
|
toku_block_translation_note_end_checkpoint(ft->blocktable, fd, ft);
|
||||||
}
|
}
|
||||||
if (h->checkpoint_header) { // could be NULL only if panic was true at begin_checkpoint
|
if (ft->checkpoint_header) { // could be NULL only if panic was true at begin_checkpoint
|
||||||
ft_free(h->checkpoint_header);
|
toku_free(ft->checkpoint_header);
|
||||||
h->checkpoint_header = NULL;
|
ft->checkpoint_header = NULL;
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -246,16 +263,16 @@ ft_end_checkpoint (CACHEFILE UU(cachefile), int fd, void *header_v) {
|
||||||
// Has access to fd (it is protected).
|
// Has access to fd (it is protected).
|
||||||
static int
|
static int
|
||||||
ft_close (CACHEFILE cachefile, int fd, void *header_v, char **malloced_error_string, BOOL oplsn_valid, LSN oplsn) {
|
ft_close (CACHEFILE cachefile, int fd, void *header_v, char **malloced_error_string, BOOL oplsn_valid, LSN oplsn) {
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
assert(h->type == FT_CURRENT);
|
assert(ft->h->type == FT_CURRENT);
|
||||||
toku_ft_lock(h);
|
// We already have exclusive access to this field already, so skip the locking.
|
||||||
assert(!toku_ft_needed(h));
|
// This should already never fail.
|
||||||
toku_ft_unlock(h);
|
invariant(!toku_ft_needed_unlocked(ft));
|
||||||
int r = 0;
|
int r = 0;
|
||||||
if (h->panic) {
|
if (ft->panic) {
|
||||||
r = h->panic;
|
r = ft->panic;
|
||||||
} else {
|
} else {
|
||||||
assert(h->cf == cachefile);
|
assert(ft->cf == cachefile);
|
||||||
TOKULOGGER logger = toku_cachefile_logger(cachefile);
|
TOKULOGGER logger = toku_cachefile_logger(cachefile);
|
||||||
LSN lsn = ZERO_LSN;
|
LSN lsn = ZERO_LSN;
|
||||||
//Get LSN
|
//Get LSN
|
||||||
|
@ -263,8 +280,8 @@ ft_close (CACHEFILE cachefile, int fd, void *header_v, char **malloced_error_str
|
||||||
//Use recovery-specified lsn
|
//Use recovery-specified lsn
|
||||||
lsn = oplsn;
|
lsn = oplsn;
|
||||||
//Recovery cannot reduce lsn of a header.
|
//Recovery cannot reduce lsn of a header.
|
||||||
if (lsn.lsn < h->checkpoint_lsn.lsn)
|
if (lsn.lsn < ft->h->checkpoint_lsn.lsn)
|
||||||
lsn = h->checkpoint_lsn;
|
lsn = ft->h->checkpoint_lsn;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
//Get LSN from logger
|
//Get LSN from logger
|
||||||
|
@ -273,11 +290,11 @@ ft_close (CACHEFILE cachefile, int fd, void *header_v, char **malloced_error_str
|
||||||
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
|
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
|
||||||
assert(fname_in_env);
|
assert(fname_in_env);
|
||||||
BYTESTRING bs = {.len=strlen(fname_in_env), .data=fname_in_env};
|
BYTESTRING bs = {.len=strlen(fname_in_env), .data=fname_in_env};
|
||||||
r = toku_log_fclose(logger, &lsn, h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
|
r = toku_log_fclose(logger, &lsn, ft->h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (h->dirty) { // this is the only place this bit is tested (in currentheader)
|
if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader)
|
||||||
if (logger) { //Rollback cachefile MUST NOT BE CLOSED DIRTY
|
if (logger) { //Rollback cachefile MUST NOT BE CLOSED DIRTY
|
||||||
//It can be checkpointed only via 'checkpoint'
|
//It can be checkpointed only via 'checkpoint'
|
||||||
assert(logger->rollback_cachefile != cachefile);
|
assert(logger->rollback_cachefile != cachefile);
|
||||||
|
@ -286,18 +303,18 @@ ft_close (CACHEFILE cachefile, int fd, void *header_v, char **malloced_error_str
|
||||||
//assert(lsn.lsn!=0);
|
//assert(lsn.lsn!=0);
|
||||||
r2 = ft_begin_checkpoint(lsn, header_v);
|
r2 = ft_begin_checkpoint(lsn, header_v);
|
||||||
if (r==0) r = r2;
|
if (r==0) r = r2;
|
||||||
r2 = ft_checkpoint(cachefile, fd, h);
|
r2 = ft_checkpoint(cachefile, fd, ft);
|
||||||
if (r==0) r = r2;
|
if (r==0) r = r2;
|
||||||
r2 = ft_end_checkpoint(cachefile, fd, header_v);
|
r2 = ft_end_checkpoint(cachefile, fd, header_v);
|
||||||
if (r==0) r = r2;
|
if (r==0) r = r2;
|
||||||
if (!h->panic) assert(!h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
|
if (!ft->panic) assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (malloced_error_string) *malloced_error_string = h->panic_string;
|
if (malloced_error_string) *malloced_error_string = ft->panic_string;
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
r = h->panic;
|
r = ft->panic;
|
||||||
}
|
}
|
||||||
toku_ft_free(h);
|
toku_ft_free(ft);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -309,82 +326,75 @@ ft_note_pin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v)
|
||||||
{
|
{
|
||||||
//Set arbitrary brt (for given header) as pinned by checkpoint.
|
//Set arbitrary brt (for given header) as pinned by checkpoint.
|
||||||
//Only one can be pinned (only one checkpoint at a time), but not worth verifying.
|
//Only one can be pinned (only one checkpoint at a time), but not worth verifying.
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
assert(!h->pinned_by_checkpoint);
|
|
||||||
h->pinned_by_checkpoint = true;
|
// Note: open_close lock is held by checkpoint begin
|
||||||
|
toku_ft_grab_reflock(ft);
|
||||||
|
assert(!ft->pinned_by_checkpoint);
|
||||||
|
assert(toku_ft_needed_unlocked(ft));
|
||||||
|
ft->pinned_by_checkpoint = true;
|
||||||
|
toku_ft_release_reflock(ft);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
unpin_by_checkpoint_callback(FT ft, void *extra) {
|
||||||
|
invariant(extra == NULL);
|
||||||
|
invariant(ft->pinned_by_checkpoint);
|
||||||
|
ft->pinned_by_checkpoint = false; //Unpin
|
||||||
|
}
|
||||||
|
|
||||||
// maps to cf->note_unpin_by_checkpoint
|
// maps to cf->note_unpin_by_checkpoint
|
||||||
//Must be protected by ydb lock.
|
//Must be protected by ydb lock.
|
||||||
//Called by end_checkpoint, which grabs ydb lock around note_unpin
|
//Called by end_checkpoint, which grabs ydb lock around note_unpin
|
||||||
static int
|
static int
|
||||||
ft_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v)
|
ft_note_unpin_by_checkpoint (CACHEFILE UU(cachefile), void *header_v)
|
||||||
{
|
{
|
||||||
FT h = header_v;
|
FT ft = header_v;
|
||||||
assert(h->pinned_by_checkpoint);
|
toku_ft_remove_reference(ft, false, ZERO_LSN, unpin_by_checkpoint_callback, NULL);
|
||||||
h->pinned_by_checkpoint = false; //Unpin
|
return 0;
|
||||||
int r = 0;
|
|
||||||
//Close if necessary
|
|
||||||
if (!toku_ft_needed(h)) {
|
|
||||||
//Close immediately.
|
|
||||||
char *error_string = NULL;
|
|
||||||
r = toku_remove_ft(h, &error_string, false, ZERO_LSN);
|
|
||||||
lazy_assert_zero(r);
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
//
|
||||||
// End of Functions that are callbacks to the cachefile
|
// End of Functions that are callbacks to the cachefile
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
static int setup_initial_ft_root_node (FT h, BLOCKNUM blocknum) {
|
static int setup_initial_ft_root_node (FT ft, BLOCKNUM blocknum) {
|
||||||
FTNODE XMALLOC(node);
|
FTNODE XMALLOC(node);
|
||||||
toku_initialize_empty_ftnode(node, blocknum, 0, 1, h->layout_version, h->nodesize, h->flags);
|
toku_initialize_empty_ftnode(node, blocknum, 0, 1, ft->h->layout_version, ft->h->nodesize, ft->h->flags);
|
||||||
BP_STATE(node,0) = PT_AVAIL;
|
BP_STATE(node,0) = PT_AVAIL;
|
||||||
|
|
||||||
u_int32_t fullhash = toku_cachetable_hash(h->cf, blocknum);
|
u_int32_t fullhash = toku_cachetable_hash(ft->cf, blocknum);
|
||||||
node->fullhash = fullhash;
|
node->fullhash = fullhash;
|
||||||
int r = toku_cachetable_put(h->cf, blocknum, fullhash,
|
int r = toku_cachetable_put(ft->cf, blocknum, fullhash,
|
||||||
node, make_ftnode_pair_attr(node),
|
node, make_ftnode_pair_attr(node),
|
||||||
get_write_callbacks_for_node(h));
|
get_write_callbacks_for_node(ft));
|
||||||
if (r != 0)
|
if (r != 0)
|
||||||
toku_free(node);
|
toku_free(node);
|
||||||
else
|
else
|
||||||
toku_unpin_ftnode(h, node);
|
toku_unpin_ftnode(ft, node);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ft_init (FT ft, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
|
ft_init(FT ft, FT_OPTIONS options, CACHEFILE cf) {
|
||||||
ft->type = FT_CURRENT;
|
|
||||||
ft->checkpoint_header = NULL;
|
ft->checkpoint_header = NULL;
|
||||||
toku_ft_init_treelock(ft);
|
ft->layout_version_read_from_disk = FT_LAYOUT_VERSION; // fake, prevent unnecessary upgrade logic
|
||||||
toku_blocktable_create_new(&ft->blocktable);
|
|
||||||
//Assign blocknum for root block, also dirty the header
|
|
||||||
toku_allocate_blocknum(ft->blocktable, &ft->root_blocknum, ft);
|
|
||||||
|
|
||||||
toku_list_init(&ft->live_ft_handles);
|
toku_list_init(&ft->live_ft_handles);
|
||||||
int r = toku_omt_create(&ft->txns);
|
int r = toku_omt_create(&ft->txns);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
ft->flags = options->flags;
|
|
||||||
ft->nodesize = options->nodesize;
|
|
||||||
ft->basementnodesize = options->basementnodesize;
|
|
||||||
ft->compression_method = options->compression_method;
|
|
||||||
ft->compare_fun = options->compare_fun;
|
ft->compare_fun = options->compare_fun;
|
||||||
ft->update_fun = options->update_fun;
|
ft->update_fun = options->update_fun;
|
||||||
|
|
||||||
if (ft->cf!=NULL) assert(ft->cf == cf);
|
if (ft->cf != NULL) {
|
||||||
|
assert(ft->cf == cf);
|
||||||
|
}
|
||||||
ft->cf = cf;
|
ft->cf = cf;
|
||||||
ft->root_xid_that_created = txn ? txn->ancestor_txnid64 : TXNID_NONE;
|
ft->in_memory_stats = ZEROSTATS;
|
||||||
ft->in_memory_stats = ZEROSTATS;
|
|
||||||
ft->on_disk_stats = ZEROSTATS;
|
|
||||||
ft->checkpoint_staging_stats = ZEROSTATS;
|
|
||||||
ft->highest_unused_msn_for_upgrade.msn = MIN_MSN.msn - 1;
|
|
||||||
|
|
||||||
r = setup_initial_ft_root_node(ft, ft->root_blocknum);
|
r = setup_initial_ft_root_node(ft, ft->h->root_blocknum);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
@ -407,8 +417,41 @@ exit:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// allocate and initialize a brt header.
|
static FT_HEADER
|
||||||
// t->ft->cf is not set to anything.
|
ft_header_new(FT_OPTIONS options, BLOCKNUM root_blocknum, TXNID root_xid_that_created)
|
||||||
|
{
|
||||||
|
uint64_t now = (uint64_t) time(NULL);
|
||||||
|
struct ft_header h = {
|
||||||
|
.type = FT_CURRENT,
|
||||||
|
.dirty = 0,
|
||||||
|
.checkpoint_count = 0,
|
||||||
|
.checkpoint_lsn = ZERO_LSN,
|
||||||
|
.layout_version = FT_LAYOUT_VERSION,
|
||||||
|
.layout_version_original = FT_LAYOUT_VERSION,
|
||||||
|
.build_id = BUILD_ID,
|
||||||
|
.build_id_original = BUILD_ID,
|
||||||
|
.time_of_creation = now,
|
||||||
|
.root_xid_that_created = root_xid_that_created,
|
||||||
|
.time_of_last_modification = now,
|
||||||
|
.time_of_last_verification = 0,
|
||||||
|
.root_blocknum = root_blocknum,
|
||||||
|
.flags = options->flags,
|
||||||
|
.nodesize = options->nodesize,
|
||||||
|
.basementnodesize = options->basementnodesize,
|
||||||
|
.compression_method = options->compression_method,
|
||||||
|
.highest_unused_msn_for_upgrade = { .msn = (MIN_MSN.msn - 1) },
|
||||||
|
.time_of_last_optimize_begin = 0,
|
||||||
|
.time_of_last_optimize_end = 0,
|
||||||
|
.count_of_optimize_in_progress = 0,
|
||||||
|
.count_of_optimize_in_progress_read_from_disk = 0,
|
||||||
|
.msn_at_start_of_last_completed_optimize = ZERO_MSN,
|
||||||
|
.on_disk_stats = ZEROSTATS
|
||||||
|
};
|
||||||
|
return toku_xmemdup(&h, sizeof h);
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate and initialize a fractal tree.
|
||||||
|
// t->ft->cf is not set to anything. TODO(leif): I don't think that's true
|
||||||
int
|
int
|
||||||
toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
|
toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
|
||||||
int r;
|
int r;
|
||||||
|
@ -416,23 +459,18 @@ toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
|
||||||
|
|
||||||
FT XCALLOC(ft);
|
FT XCALLOC(ft);
|
||||||
|
|
||||||
|
|
||||||
ft->layout_version = FT_LAYOUT_VERSION;
|
|
||||||
ft->layout_version_original = FT_LAYOUT_VERSION;
|
|
||||||
ft->layout_version_read_from_disk = FT_LAYOUT_VERSION; // fake, prevent unnecessary upgrade logic
|
|
||||||
|
|
||||||
ft->build_id = BUILD_ID;
|
|
||||||
ft->build_id_original = BUILD_ID;
|
|
||||||
|
|
||||||
uint64_t now = (uint64_t) time(NULL);
|
|
||||||
ft->time_of_creation = now;
|
|
||||||
ft->time_of_last_modification = now;
|
|
||||||
ft->time_of_last_verification = 0;
|
|
||||||
|
|
||||||
memset(&ft->descriptor, 0, sizeof(ft->descriptor));
|
memset(&ft->descriptor, 0, sizeof(ft->descriptor));
|
||||||
memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor));
|
memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor));
|
||||||
|
|
||||||
r = ft_init(ft, options, cf, txn);
|
ft->h = ft_header_new(options, make_blocknum(0), (txn ? txn->ancestor_txnid64 : TXNID_NONE));
|
||||||
|
|
||||||
|
toku_ft_init_treelock(ft);
|
||||||
|
toku_ft_init_reflock(ft);
|
||||||
|
toku_blocktable_create_new(&ft->blocktable);
|
||||||
|
//Assign blocknum for root block, also dirty the header
|
||||||
|
toku_allocate_blocknum(ft->blocktable, &ft->h->root_blocknum, ft);
|
||||||
|
|
||||||
|
r = ft_init(ft, options, cf);
|
||||||
if (r != 0) {
|
if (r != 0) {
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
@ -504,22 +542,30 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_ac
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live) {
|
toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live) {
|
||||||
toku_ft_lock(ft);
|
toku_ft_grab_reflock(ft);
|
||||||
live->ft = ft;
|
live->ft = ft;
|
||||||
toku_list_push(&ft->live_ft_handles, &live->live_ft_handle_link);
|
toku_list_push(&ft->live_ft_handles, &live->live_ft_handle_link);
|
||||||
toku_ft_unlock(ft);
|
toku_ft_release_reflock(ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_ft_needed(FT h) {
|
toku_ft_needed_unlocked(FT h) {
|
||||||
return !toku_list_empty(&h->live_ft_handles) || toku_omt_size(h->txns) != 0 || h->pinned_by_checkpoint;
|
return !toku_list_empty(&h->live_ft_handles) || toku_omt_size(h->txns) != 0 || h->pinned_by_checkpoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BOOL
|
||||||
|
toku_ft_has_one_reference_unlocked(FT ft) {
|
||||||
|
u_int32_t pinned_by_checkpoint = ft->pinned_by_checkpoint ? 1 : 0;
|
||||||
|
u_int32_t num_txns = toku_omt_size(ft->txns);
|
||||||
|
int num_handles = toku_list_num_elements_est(&ft->live_ft_handles);
|
||||||
|
return ((pinned_by_checkpoint + num_txns + num_handles) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Close brt. If opsln_valid, use given oplsn as lsn in brt header instead of logging
|
// Close brt. If opsln_valid, use given oplsn as lsn in brt header instead of logging
|
||||||
// the close and using the lsn provided by logging the close. (Subject to constraint
|
// the close and using the lsn provided by logging the close. (Subject to constraint
|
||||||
// that if a newer lsn is already in the dictionary, don't overwrite the dictionary.)
|
// that if a newer lsn is already in the dictionary, don't overwrite the dictionary.)
|
||||||
int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) {
|
int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) {
|
||||||
assert(!h->pinned_by_checkpoint);
|
|
||||||
int r = 0;
|
int r = 0;
|
||||||
// Must do this work before closing the cf
|
// Must do this work before closing the cf
|
||||||
if (h->cf) {
|
if (h->cf) {
|
||||||
|
@ -534,11 +580,11 @@ int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) {
|
||||||
// for this header, returns NULL
|
// for this header, returns NULL
|
||||||
FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h) {
|
FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h) {
|
||||||
FT_HANDLE ft_handle_ret = NULL;
|
FT_HANDLE ft_handle_ret = NULL;
|
||||||
toku_ft_lock(h);
|
toku_ft_grab_reflock(h);
|
||||||
if (!toku_list_empty(&h->live_ft_handles)) {
|
if (!toku_list_empty(&h->live_ft_handles)) {
|
||||||
ft_handle_ret = toku_list_struct(toku_list_head(&h->live_ft_handles), struct ft_handle, live_ft_handle_link);
|
ft_handle_ret = toku_list_struct(toku_list_head(&h->live_ft_handles), struct ft_handle, live_ft_handle_link);
|
||||||
}
|
}
|
||||||
toku_ft_unlock(h);
|
toku_ft_release_reflock(h);
|
||||||
return ft_handle_ret;
|
return ft_handle_ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -548,16 +594,16 @@ FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h) {
|
||||||
// convenient here for keeping the HOT variables threadsafe.)
|
// convenient here for keeping the HOT variables threadsafe.)
|
||||||
void
|
void
|
||||||
toku_ft_note_hot_begin(FT_HANDLE brt) {
|
toku_ft_note_hot_begin(FT_HANDLE brt) {
|
||||||
FT h = brt->ft;
|
FT ft = brt->ft;
|
||||||
time_t now = time(NULL);
|
time_t now = time(NULL);
|
||||||
|
|
||||||
// hold lock around setting and clearing of dirty bit
|
// hold lock around setting and clearing of dirty bit
|
||||||
// (see cooperative use of dirty bit in ft_begin_checkpoint())
|
// (see cooperative use of dirty bit in ft_begin_checkpoint())
|
||||||
toku_ft_lock(h);
|
toku_ft_lock(ft);
|
||||||
h->time_of_last_optimize_begin = now;
|
ft->h->time_of_last_optimize_begin = now;
|
||||||
h->count_of_optimize_in_progress++;
|
ft->h->count_of_optimize_in_progress++;
|
||||||
h->dirty = 1;
|
ft->h->dirty = 1;
|
||||||
toku_ft_unlock(h);
|
toku_ft_unlock(ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -565,47 +611,45 @@ toku_ft_note_hot_begin(FT_HANDLE brt) {
|
||||||
// Note: See note for toku_ft_note_hot_begin().
|
// Note: See note for toku_ft_note_hot_begin().
|
||||||
void
|
void
|
||||||
toku_ft_note_hot_complete(FT_HANDLE brt, BOOL success, MSN msn_at_start_of_hot) {
|
toku_ft_note_hot_complete(FT_HANDLE brt, BOOL success, MSN msn_at_start_of_hot) {
|
||||||
FT h = brt->ft;
|
FT ft = brt->ft;
|
||||||
time_t now = time(NULL);
|
time_t now = time(NULL);
|
||||||
|
|
||||||
toku_ft_lock(h);
|
toku_ft_lock(ft);
|
||||||
h->count_of_optimize_in_progress--;
|
ft->h->count_of_optimize_in_progress--;
|
||||||
if (success) {
|
if (success) {
|
||||||
h->time_of_last_optimize_end = now;
|
ft->h->time_of_last_optimize_end = now;
|
||||||
h->msn_at_start_of_last_completed_optimize = msn_at_start_of_hot;
|
ft->h->msn_at_start_of_last_completed_optimize = msn_at_start_of_hot;
|
||||||
// If we just successfully completed an optimization and no other thread is performing
|
// If we just successfully completed an optimization and no other thread is performing
|
||||||
// an optimization, then the number of optimizations in progress is zero.
|
// an optimization, then the number of optimizations in progress is zero.
|
||||||
// If there was a crash during a HOT optimization, this is how count_of_optimize_in_progress
|
// If there was a crash during a HOT optimization, this is how count_of_optimize_in_progress
|
||||||
// would be reset to zero on the disk after recovery from that crash.
|
// would be reset to zero on the disk after recovery from that crash.
|
||||||
if (h->count_of_optimize_in_progress == h->count_of_optimize_in_progress_read_from_disk)
|
if (ft->h->count_of_optimize_in_progress == ft->h->count_of_optimize_in_progress_read_from_disk)
|
||||||
h->count_of_optimize_in_progress = 0;
|
ft->h->count_of_optimize_in_progress = 0;
|
||||||
}
|
}
|
||||||
h->dirty = 1;
|
ft->h->dirty = 1;
|
||||||
toku_ft_unlock(h);
|
toku_ft_unlock(ft);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_init(FT h,
|
toku_ft_init(FT ft,
|
||||||
BLOCKNUM root_blocknum_on_disk, LSN checkpoint_lsn, TXNID root_xid_that_created, uint32_t target_nodesize, uint32_t target_basementnodesize, enum toku_compression_method compression_method) {
|
BLOCKNUM root_blocknum_on_disk,
|
||||||
memset(h, 0, sizeof *h);
|
LSN checkpoint_lsn,
|
||||||
h->layout_version = FT_LAYOUT_VERSION;
|
TXNID root_xid_that_created,
|
||||||
h->layout_version_original = FT_LAYOUT_VERSION;
|
uint32_t target_nodesize,
|
||||||
h->build_id = BUILD_ID;
|
uint32_t target_basementnodesize,
|
||||||
h->build_id_original = BUILD_ID;
|
enum toku_compression_method compression_method)
|
||||||
uint64_t now = (uint64_t) time(NULL);
|
{
|
||||||
h->time_of_creation = now;
|
memset(ft, 0, sizeof *ft);
|
||||||
h->time_of_last_modification = now;
|
struct ft_options options = {
|
||||||
h->time_of_last_verification = 0;
|
.nodesize = target_nodesize,
|
||||||
h->checkpoint_count = 1;
|
.basementnodesize = target_basementnodesize,
|
||||||
h->checkpoint_lsn = checkpoint_lsn;
|
.compression_method = compression_method,
|
||||||
h->nodesize = target_nodesize;
|
.flags = 0
|
||||||
h->basementnodesize = target_basementnodesize;
|
};
|
||||||
h->root_blocknum = root_blocknum_on_disk;
|
ft->h = ft_header_new(&options, root_blocknum_on_disk, root_xid_that_created);
|
||||||
h->flags = 0;
|
ft->h->checkpoint_count = 1;
|
||||||
h->root_xid_that_created = root_xid_that_created;
|
ft->h->checkpoint_lsn = checkpoint_lsn;
|
||||||
h->compression_method = compression_method;
|
|
||||||
h->highest_unused_msn_for_upgrade.msn = MIN_MSN.msn - 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Open a brt for use by redirect. The new brt must have the same dict_id as the old_ft passed in. (FILENUM is assigned by the ft_handle_open() function.)
|
// Open a brt for use by redirect. The new brt must have the same dict_id as the old_ft passed in. (FILENUM is assigned by the ft_handle_open() function.)
|
||||||
|
@ -620,11 +664,11 @@ ft_handle_open_for_redirect(FT_HANDLE *new_ftp, const char *fname_in_env, TOKUTX
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
r = toku_ft_set_update(t, old_h->update_fun);
|
r = toku_ft_set_update(t, old_h->update_fun);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
r = toku_ft_set_nodesize(t, old_h->nodesize);
|
r = toku_ft_set_nodesize(t, old_h->h->nodesize);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
r = toku_ft_set_basementnodesize(t, old_h->basementnodesize);
|
r = toku_ft_set_basementnodesize(t, old_h->h->basementnodesize);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
r = toku_ft_set_compression_method(t, old_h->compression_method);
|
r = toku_ft_set_compression_method(t, old_h->h->compression_method);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf);
|
CACHETABLE ct = toku_cachefile_get_cachetable(old_h->cf);
|
||||||
r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id);
|
r = toku_ft_handle_open_with_dict_id(t, fname_in_env, 0, 0, ct, txn, old_h->dict_id);
|
||||||
|
@ -662,14 +706,13 @@ dictionary_redirect_internal(const char *dst_fname_in_env, FT src_h, TOKUTXN txn
|
||||||
|
|
||||||
// for each live brt, brt->ft is currently src_h
|
// for each live brt, brt->ft is currently src_h
|
||||||
// we want to change it to dummy_dst
|
// we want to change it to dummy_dst
|
||||||
|
toku_ft_grab_reflock(src_h);
|
||||||
while (!toku_list_empty(&src_h->live_ft_handles)) {
|
while (!toku_list_empty(&src_h->live_ft_handles)) {
|
||||||
list = src_h->live_ft_handles.next;
|
list = src_h->live_ft_handles.next;
|
||||||
FT_HANDLE src_handle = NULL;
|
FT_HANDLE src_handle = NULL;
|
||||||
src_handle = toku_list_struct(list, struct ft_handle, live_ft_handle_link);
|
src_handle = toku_list_struct(list, struct ft_handle, live_ft_handle_link);
|
||||||
|
|
||||||
toku_ft_lock(src_h);
|
|
||||||
toku_list_remove(&src_handle->live_ft_handle_link);
|
toku_list_remove(&src_handle->live_ft_handle_link);
|
||||||
toku_ft_unlock(src_h);
|
|
||||||
|
|
||||||
toku_ft_note_ft_handle_open(dst_h, src_handle);
|
toku_ft_note_ft_handle_open(dst_h, src_handle);
|
||||||
if (src_handle->redirect_callback) {
|
if (src_handle->redirect_callback) {
|
||||||
|
@ -677,6 +720,9 @@ dictionary_redirect_internal(const char *dst_fname_in_env, FT src_h, TOKUTXN txn
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert(dst_h);
|
assert(dst_h);
|
||||||
|
// making sure that we are not leaking src_h
|
||||||
|
assert(toku_ft_needed_unlocked(src_h));
|
||||||
|
toku_ft_release_reflock(src_h);
|
||||||
|
|
||||||
r = toku_ft_handle_close(tmp_dst_ft, FALSE, ZERO_LSN);
|
r = toku_ft_handle_close(tmp_dst_ft, FALSE, ZERO_LSN);
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
|
@ -699,23 +745,16 @@ toku_dictionary_redirect_abort(FT old_h, FT new_h, TOKUTXN txn) {
|
||||||
assert(old_filenum.fileid!=new_filenum.fileid); //Cannot be same file.
|
assert(old_filenum.fileid!=new_filenum.fileid); //Cannot be same file.
|
||||||
|
|
||||||
//No living brts in old header.
|
//No living brts in old header.
|
||||||
|
toku_ft_grab_reflock(old_h);
|
||||||
assert(toku_list_empty(&old_h->live_ft_handles));
|
assert(toku_list_empty(&old_h->live_ft_handles));
|
||||||
|
toku_ft_release_reflock(old_h);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If application did not close all DBs using the new file, then there should
|
FT dst_h;
|
||||||
// be no zombies and we need to redirect the DBs back to the original file.
|
// redirect back from new_h to old_h
|
||||||
if (!toku_list_empty(&new_h->live_ft_handles)) {
|
r = dictionary_redirect_internal(old_fname_in_env, new_h, txn, &dst_h);
|
||||||
FT dst_h;
|
assert_zero(r);
|
||||||
// redirect back from new_h to old_h
|
assert(dst_h == old_h);
|
||||||
r = dictionary_redirect_internal(old_fname_in_env, new_h, txn, &dst_h);
|
|
||||||
assert_zero(r);
|
|
||||||
assert(dst_h == old_h);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
//No live brts.
|
|
||||||
//No need to redirect back.
|
|
||||||
r = 0;
|
|
||||||
}
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -784,7 +823,6 @@ toku_dictionary_redirect (const char *dst_fname_in_env, FT_HANDLE old_ft, TOKUTX
|
||||||
|
|
||||||
// make rollback log entry
|
// make rollback log entry
|
||||||
if (txn) {
|
if (txn) {
|
||||||
assert(!toku_list_empty(&new_h->live_ft_handles));
|
|
||||||
r = toku_txn_note_ft(txn, new_h); // mark new brt as touched by this txn
|
r = toku_txn_note_ft(txn, new_h); // mark new brt as touched by this txn
|
||||||
|
|
||||||
FILENUM old_filenum = toku_cachefile_filenum(old_h->cf);
|
FILENUM old_filenum = toku_cachefile_filenum(old_h->cf);
|
||||||
|
@ -817,7 +855,7 @@ toku_ft_maybe_add_txn_ref(FT h, TOKUTXN txn) {
|
||||||
BOOL ref_added = FALSE;
|
BOOL ref_added = FALSE;
|
||||||
OMTVALUE txnv;
|
OMTVALUE txnv;
|
||||||
u_int32_t index;
|
u_int32_t index;
|
||||||
toku_ft_lock(h);
|
toku_ft_grab_reflock(h);
|
||||||
// Does brt already know about transaction txn?
|
// Does brt already know about transaction txn?
|
||||||
int r = toku_omt_find_zero(h->txns, find_xid, txn, &txnv, &index);
|
int r = toku_omt_find_zero(h->txns, find_xid, txn, &txnv, &index);
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
|
@ -832,85 +870,80 @@ toku_ft_maybe_add_txn_ref(FT h, TOKUTXN txn) {
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
ref_added = TRUE;
|
ref_added = TRUE;
|
||||||
exit:
|
exit:
|
||||||
toku_ft_unlock(h);
|
toku_ft_release_reflock(h);
|
||||||
return ref_added;
|
return ref_added;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
static void
|
||||||
toku_ft_remove_txn_ref(FT h, TOKUTXN txn) {
|
remove_txn_ref_callback(FT ft, void *context) {
|
||||||
|
TOKUTXN txn = context;
|
||||||
OMTVALUE txnv_again=NULL;
|
OMTVALUE txnv_again=NULL;
|
||||||
u_int32_t index;
|
u_int32_t index;
|
||||||
toku_ft_lock(h);
|
int r = toku_omt_find_zero(ft->txns, find_xid, txn, &txnv_again, &index);
|
||||||
int r = toku_omt_find_zero(h->txns, find_xid, txn, &txnv_again, &index);
|
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
assert(txnv_again == txn);
|
assert(txnv_again == txn);
|
||||||
r = toku_omt_delete_at(h->txns, index);
|
r = toku_omt_delete_at(ft->txns, index);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
// TODO: (Zardosht) figure out how to properly do this
|
}
|
||||||
// below this unlock, are depending on ydb lock
|
|
||||||
toku_ft_unlock(h);
|
void
|
||||||
if (!toku_ft_needed(h)) {
|
toku_ft_remove_txn_ref(FT ft, TOKUTXN txn) {
|
||||||
//Close immediately.
|
toku_ft_remove_reference(ft, false, ZERO_LSN, remove_txn_ref_callback, txn);
|
||||||
// I have no idea how this error string business works
|
|
||||||
char *error_string = NULL;
|
|
||||||
r = toku_remove_ft(h, &error_string, false, ZERO_LSN);
|
|
||||||
lazy_assert_zero(r);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_calculate_root_offset_pointer (
|
void toku_calculate_root_offset_pointer (
|
||||||
FT h,
|
FT ft,
|
||||||
CACHEKEY* root_key,
|
CACHEKEY* root_key,
|
||||||
u_int32_t *roothash
|
u_int32_t *roothash
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
*roothash = toku_cachetable_hash(h->cf, h->root_blocknum);
|
*roothash = toku_cachetable_hash(ft->cf, ft->h->root_blocknum);
|
||||||
*root_key = h->root_blocknum;
|
*root_key = ft->h->root_blocknum;
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_ft_set_new_root_blocknum(
|
void toku_ft_set_new_root_blocknum(
|
||||||
FT h,
|
FT ft,
|
||||||
CACHEKEY new_root_key
|
CACHEKEY new_root_key
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
h->root_blocknum = new_root_key;
|
ft->h->root_blocknum = new_root_key;
|
||||||
}
|
}
|
||||||
|
|
||||||
LSN toku_ft_checkpoint_lsn(FT h) {
|
LSN toku_ft_checkpoint_lsn(FT ft) {
|
||||||
return h->checkpoint_lsn;
|
return ft->h->checkpoint_lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_ft_set_panic(FT h, int panic, char *panic_string) {
|
int toku_ft_set_panic(FT ft, int panic, char *panic_string) {
|
||||||
if (h->panic == 0) {
|
if (ft->panic == 0) {
|
||||||
h->panic = panic;
|
ft->panic = panic;
|
||||||
if (h->panic_string) {
|
if (ft->panic_string) {
|
||||||
toku_free(h->panic_string);
|
toku_free(ft->panic_string);
|
||||||
}
|
}
|
||||||
h->panic_string = toku_strdup(panic_string);
|
ft->panic_string = toku_strdup(panic_string);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
toku_ft_stat64 (FT h, struct ftstat64_s *s) {
|
toku_ft_stat64 (FT ft, struct ftstat64_s *s) {
|
||||||
s->fsize = toku_cachefile_size(h->cf);
|
s->fsize = toku_cachefile_size(ft->cf);
|
||||||
// just use the in memory stats from the header
|
// just use the in memory stats from the header
|
||||||
// prevent appearance of negative numbers for numrows, numbytes
|
// prevent appearance of negative numbers for numrows, numbytes
|
||||||
int64_t n = h->in_memory_stats.numrows;
|
int64_t n = ft->in_memory_stats.numrows;
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
n = 0;
|
n = 0;
|
||||||
}
|
}
|
||||||
s->nkeys = s->ndata = n;
|
s->nkeys = s->ndata = n;
|
||||||
n = h->in_memory_stats.numbytes;
|
n = ft->in_memory_stats.numbytes;
|
||||||
if (n < 0) {
|
if (n < 0) {
|
||||||
n = 0;
|
n = 0;
|
||||||
}
|
}
|
||||||
s->dsize = n;
|
s->dsize = n;
|
||||||
|
|
||||||
// 4018
|
// 4018
|
||||||
s->create_time_sec = h->time_of_creation;
|
s->create_time_sec = ft->h->time_of_creation;
|
||||||
s->modify_time_sec = h->time_of_last_modification;
|
s->modify_time_sec = ft->h->time_of_last_modification;
|
||||||
s->verify_time_sec = h->time_of_last_verification;
|
s->verify_time_sec = ft->h->time_of_last_verification;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: (Zardosht), once the fdlock has been removed from cachetable, remove
|
// TODO: (Zardosht), once the fdlock has been removed from cachetable, remove
|
||||||
|
@ -963,3 +996,33 @@ toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta) {
|
||||||
(void) __sync_fetch_and_sub(&(headerstats->numrows), delta.numrows);
|
(void) __sync_fetch_and_sub(&(headerstats->numrows), delta.numrows);
|
||||||
(void) __sync_fetch_and_sub(&(headerstats->numbytes), delta.numbytes);
|
(void) __sync_fetch_and_sub(&(headerstats->numbytes), delta.numbytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
toku_ft_remove_reference(FT ft, bool oplsn_valid, LSN oplsn, remove_ft_ref_callback remove_ref, void *extra) {
|
||||||
|
toku_ft_grab_reflock(ft);
|
||||||
|
if (toku_ft_has_one_reference_unlocked(ft)) {
|
||||||
|
toku_ft_release_reflock(ft);
|
||||||
|
|
||||||
|
toku_ft_open_close_lock();
|
||||||
|
toku_ft_grab_reflock(ft);
|
||||||
|
|
||||||
|
remove_ref(ft, extra);
|
||||||
|
BOOL needed = toku_ft_needed_unlocked(ft);
|
||||||
|
toku_ft_release_reflock(ft);
|
||||||
|
if (!needed) {
|
||||||
|
// close header
|
||||||
|
char *error_string = NULL;
|
||||||
|
int r;
|
||||||
|
r = toku_remove_ft(ft, &error_string, oplsn_valid, oplsn);
|
||||||
|
assert_zero(r);
|
||||||
|
assert(error_string == NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
toku_ft_open_close_unlock();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
remove_ref(ft, extra);
|
||||||
|
toku_ft_release_reflock(ft);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
11
ft/ft.h
11
ft/ft.h
|
@ -22,13 +22,19 @@ void toku_ft_destroy_treelock(FT h);
|
||||||
void toku_ft_grab_treelock(FT h);
|
void toku_ft_grab_treelock(FT h);
|
||||||
void toku_ft_release_treelock(FT h);
|
void toku_ft_release_treelock(FT h);
|
||||||
|
|
||||||
|
void toku_ft_init_reflock(FT ft);
|
||||||
|
void toku_ft_destroy_reflock(FT ft);
|
||||||
|
void toku_ft_grab_reflock(FT ft);
|
||||||
|
void toku_ft_release_reflock(FT ft);
|
||||||
|
|
||||||
int toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
int toku_create_new_ft(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn);
|
||||||
void toku_ft_free (FT h);
|
void toku_ft_free (FT h);
|
||||||
|
|
||||||
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, BOOL* was_open);
|
int toku_read_ft_and_store_in_cachefile (FT_HANDLE brt, CACHEFILE cf, LSN max_acceptable_lsn, FT *header, BOOL* was_open);
|
||||||
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
void toku_ft_note_ft_handle_open(FT ft, FT_HANDLE live);
|
||||||
|
|
||||||
int toku_ft_needed(FT h);
|
int toku_ft_needed_unlocked(FT h);
|
||||||
|
BOOL toku_ft_has_one_reference_unlocked(FT ft);
|
||||||
int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
|
int toku_remove_ft (FT h, char **error_string, BOOL oplsn_valid, LSN oplsn) __attribute__ ((warn_unused_result));
|
||||||
|
|
||||||
FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h);
|
FT_HANDLE toku_ft_get_some_existing_ft_handle(FT h);
|
||||||
|
@ -71,5 +77,8 @@ void toku_ft_update_cmp_descriptor(FT h);
|
||||||
void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
void toku_ft_update_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||||
void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
void toku_ft_decrease_stats(STAT64INFO headerstats, STAT64INFO_S delta);
|
||||||
|
|
||||||
|
void toku_ft_remove_reference(FT ft,
|
||||||
|
bool oplsn_valid, LSN oplsn,
|
||||||
|
remove_ft_ref_callback remove_ref, void *extra);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -881,8 +881,8 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
|
||||||
int r = toku_serialize_ftnode_to_memory(
|
int r = toku_serialize_ftnode_to_memory(
|
||||||
node,
|
node,
|
||||||
ndd,
|
ndd,
|
||||||
h->basementnodesize,
|
h->h->basementnodesize,
|
||||||
h->compression_method,
|
h->h->compression_method,
|
||||||
do_rebalancing,
|
do_rebalancing,
|
||||||
FALSE, // in_parallel
|
FALSE, // in_parallel
|
||||||
&n_to_write,
|
&n_to_write,
|
||||||
|
@ -1786,7 +1786,7 @@ deserialize_and_upgrade_internal_node(FTNODE node,
|
||||||
// of messages in the buffer.
|
// of messages in the buffer.
|
||||||
MSN lowest;
|
MSN lowest;
|
||||||
u_int64_t amount = n_in_this_buffer;
|
u_int64_t amount = n_in_this_buffer;
|
||||||
lowest.msn = __sync_sub_and_fetch(&bfe->h->highest_unused_msn_for_upgrade.msn, amount);
|
lowest.msn = __sync_sub_and_fetch(&bfe->h->h->highest_unused_msn_for_upgrade.msn, amount);
|
||||||
if (highest_msn.msn == 0) {
|
if (highest_msn.msn == 0) {
|
||||||
highest_msn.msn = lowest.msn + n_in_this_buffer;
|
highest_msn.msn = lowest.msn + n_in_this_buffer;
|
||||||
}
|
}
|
||||||
|
@ -2035,7 +2035,7 @@ deserialize_and_upgrade_leaf_node(FTNODE node,
|
||||||
|
|
||||||
// Whatever this is must be less than the MSNs of every message above
|
// Whatever this is must be less than the MSNs of every message above
|
||||||
// it, so it's ok to take it here.
|
// it, so it's ok to take it here.
|
||||||
bn->max_msn_applied = bfe->h->highest_unused_msn_for_upgrade;
|
bn->max_msn_applied = bfe->h->h->highest_unused_msn_for_upgrade;
|
||||||
bn->stale_ancestor_messages_applied = false;
|
bn->stale_ancestor_messages_applied = false;
|
||||||
node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
|
node->max_msn_applied_to_node_on_disk = bn->max_msn_applied;
|
||||||
|
|
||||||
|
@ -2625,7 +2625,7 @@ toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log
|
||||||
size_t n_to_write;
|
size_t n_to_write;
|
||||||
char *compressed_buf;
|
char *compressed_buf;
|
||||||
{
|
{
|
||||||
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->compression_method, &n_to_write, &compressed_buf);
|
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, h->h->compression_method, &n_to_write, &compressed_buf);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2949,9 +2949,9 @@ toku_upgrade_subtree_estimates_to_stat64info(int fd, FT h)
|
||||||
FTNODE_DISK_DATA unused_ndd = NULL;
|
FTNODE_DISK_DATA unused_ndd = NULL;
|
||||||
struct ftnode_fetch_extra bfe;
|
struct ftnode_fetch_extra bfe;
|
||||||
fill_bfe_for_min_read(&bfe, h);
|
fill_bfe_for_min_read(&bfe, h);
|
||||||
e = deserialize_ftnode_from_fd(fd, h->root_blocknum, 0, &unused_node, &unused_ndd,
|
e = deserialize_ftnode_from_fd(fd, h->h->root_blocknum, 0, &unused_node, &unused_ndd,
|
||||||
&bfe, &h->on_disk_stats);
|
&bfe, &h->h->on_disk_stats);
|
||||||
h->in_memory_stats = h->on_disk_stats;
|
h->in_memory_stats = h->h->on_disk_stats;
|
||||||
|
|
||||||
if (unused_node) {
|
if (unused_node) {
|
||||||
toku_ftnode_free(&unused_node);
|
toku_ftnode_free(&unused_node);
|
||||||
|
|
76
ft/ftdump.c
76
ft/ftdump.c
|
@ -85,34 +85,34 @@ dump_descriptor(DESCRIPTOR d) {
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dump_header (int f, FT *header, CACHEFILE cf) {
|
dump_header (int f, FT *header, CACHEFILE cf) {
|
||||||
FT h;
|
FT ft;
|
||||||
int r;
|
int r;
|
||||||
char timestr[26];
|
char timestr[26];
|
||||||
r = toku_deserialize_ft_from (f, MAX_LSN, &h);
|
r = toku_deserialize_ft_from (f, MAX_LSN, &ft);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
h->cf = cf;
|
ft->cf = cf;
|
||||||
printf("ft:\n");
|
printf("ft:\n");
|
||||||
printf(" layout_version=%d\n", h->layout_version);
|
printf(" layout_version=%d\n", ft->h->layout_version);
|
||||||
printf(" layout_version_original=%d\n", h->layout_version_original);
|
printf(" layout_version_original=%d\n", ft->h->layout_version_original);
|
||||||
printf(" layout_version_read_from_disk=%d\n", h->layout_version_read_from_disk);
|
printf(" layout_version_read_from_disk=%d\n", ft->layout_version_read_from_disk);
|
||||||
printf(" build_id=%d\n", h->build_id);
|
printf(" build_id=%d\n", ft->h->build_id);
|
||||||
printf(" build_id_original=%d\n", h->build_id_original);
|
printf(" build_id_original=%d\n", ft->h->build_id_original);
|
||||||
format_time(h->time_of_creation, timestr);
|
format_time(ft->h->time_of_creation, timestr);
|
||||||
printf(" time_of_creation= %"PRIu64" %s\n", h->time_of_creation, timestr);
|
printf(" time_of_creation= %"PRIu64" %s\n", ft->h->time_of_creation, timestr);
|
||||||
format_time(h->time_of_last_modification, timestr);
|
format_time(ft->h->time_of_last_modification, timestr);
|
||||||
printf(" time_of_last_modification=%"PRIu64" %s\n", h->time_of_last_modification, timestr);
|
printf(" time_of_last_modification=%"PRIu64" %s\n", ft->h->time_of_last_modification, timestr);
|
||||||
printf(" dirty=%d\n", h->dirty);
|
printf(" dirty=%d\n", ft->h->dirty);
|
||||||
printf(" checkpoint_count=%" PRId64 "\n", h->checkpoint_count);
|
printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count);
|
||||||
printf(" checkpoint_lsn=%" PRId64 "\n", h->checkpoint_lsn.lsn);
|
printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn);
|
||||||
printf(" nodesize=%u\n", h->nodesize);
|
printf(" nodesize=%u\n", ft->h->nodesize);
|
||||||
printf(" basementnodesize=%u\n", h->basementnodesize);
|
printf(" basementnodesize=%u\n", ft->h->basementnodesize);
|
||||||
printf(" compression_method=%u\n", (unsigned) h->compression_method);
|
printf(" compression_method=%u\n", (unsigned) ft->h->compression_method);
|
||||||
printf(" unnamed_root=%" PRId64 "\n", h->root_blocknum.b);
|
printf(" unnamed_root=%" PRId64 "\n", ft->h->root_blocknum.b);
|
||||||
printf(" flags=%u\n", h->flags);
|
printf(" flags=%u\n", ft->h->flags);
|
||||||
dump_descriptor(&h->descriptor);
|
dump_descriptor(&ft->descriptor);
|
||||||
printf(" estimated numrows=%" PRId64 "\n", h->in_memory_stats.numrows);
|
printf(" estimated numrows=%" PRId64 "\n", ft->in_memory_stats.numrows);
|
||||||
printf(" estimated numbytes=%" PRId64 "\n", h->in_memory_stats.numbytes);
|
printf(" estimated numbytes=%" PRId64 "\n", ft->in_memory_stats.numbytes);
|
||||||
*header = h;
|
*header = ft;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -506,14 +506,14 @@ main (int argc, const char *const argv[]) {
|
||||||
|
|
||||||
const char *n = argv[0];
|
const char *n = argv[0];
|
||||||
int f = open(n, O_RDWR + O_BINARY); assert(f>=0);
|
int f = open(n, O_RDWR + O_BINARY); assert(f>=0);
|
||||||
FT h;
|
FT ft;
|
||||||
// create a cachefile for the header
|
// create a cachefile for the header
|
||||||
int r = toku_create_cachetable(&ct, 1<<25, (LSN){0}, 0);
|
int r = toku_create_cachetable(&ct, 1<<25, (LSN){0}, 0);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
CACHEFILE cf;
|
CACHEFILE cf;
|
||||||
r = toku_cachetable_openfd (&cf, ct, f, n);
|
r = toku_cachetable_openfd (&cf, ct, f, n);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
dump_header(f, &h, cf);
|
dump_header(f, &ft, cf);
|
||||||
if (interactive) {
|
if (interactive) {
|
||||||
while (1) {
|
while (1) {
|
||||||
printf("ftdump>"); fflush(stdout);
|
printf("ftdump>"); fflush(stdout);
|
||||||
|
@ -530,25 +530,25 @@ main (int argc, const char *const argv[]) {
|
||||||
if (strcmp(fields[0], "help") == 0) {
|
if (strcmp(fields[0], "help") == 0) {
|
||||||
interactive_help();
|
interactive_help();
|
||||||
} else if (strcmp(fields[0], "header") == 0) {
|
} else if (strcmp(fields[0], "header") == 0) {
|
||||||
toku_ft_free(h);
|
toku_ft_free(ft);
|
||||||
dump_header(f, &h, cf);
|
dump_header(f, &ft, cf);
|
||||||
} else if (strcmp(fields[0], "block") == 0 && nfields == 2) {
|
} else if (strcmp(fields[0], "block") == 0 && nfields == 2) {
|
||||||
BLOCKNUM blocknum = make_blocknum(getuint64(fields[1]));
|
BLOCKNUM blocknum = make_blocknum(getuint64(fields[1]));
|
||||||
dump_block(f, blocknum, h);
|
dump_block(f, blocknum, ft);
|
||||||
} else if (strcmp(fields[0], "node") == 0 && nfields == 2) {
|
} else if (strcmp(fields[0], "node") == 0 && nfields == 2) {
|
||||||
BLOCKNUM off = make_blocknum(getuint64(fields[1]));
|
BLOCKNUM off = make_blocknum(getuint64(fields[1]));
|
||||||
dump_node(f, off, h);
|
dump_node(f, off, ft);
|
||||||
} else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) {
|
} else if (strcmp(fields[0], "dumpdata") == 0 && nfields == 2) {
|
||||||
dump_data = strtol(fields[1], NULL, 10);
|
dump_data = strtol(fields[1], NULL, 10);
|
||||||
} else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) {
|
} else if (strcmp(fields[0], "block_translation") == 0 || strcmp(fields[0], "bx") == 0) {
|
||||||
u_int64_t offset = 0;
|
u_int64_t offset = 0;
|
||||||
if (nfields == 2)
|
if (nfields == 2)
|
||||||
offset = getuint64(fields[1]);
|
offset = getuint64(fields[1]);
|
||||||
dump_block_translation(h, offset);
|
dump_block_translation(ft, offset);
|
||||||
} else if (strcmp(fields[0], "fragmentation") == 0) {
|
} else if (strcmp(fields[0], "fragmentation") == 0) {
|
||||||
dump_fragmentation(f, h);
|
dump_fragmentation(f, ft);
|
||||||
} else if (strcmp(fields[0], "garbage") == 0) {
|
} else if (strcmp(fields[0], "garbage") == 0) {
|
||||||
dump_garbage_stats(f, h);
|
dump_garbage_stats(f, ft);
|
||||||
} else if (strcmp(fields[0], "file") == 0 && nfields >= 3) {
|
} else if (strcmp(fields[0], "file") == 0 && nfields >= 3) {
|
||||||
u_int64_t offset = getuint64(fields[1]);
|
u_int64_t offset = getuint64(fields[1]);
|
||||||
u_int64_t size = getuint64(fields[2]);
|
u_int64_t size = getuint64(fields[2]);
|
||||||
|
@ -565,18 +565,18 @@ main (int argc, const char *const argv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (rootnode) {
|
} else if (rootnode) {
|
||||||
dump_node(f, h->root_blocknum, h);
|
dump_node(f, ft->h->root_blocknum, ft);
|
||||||
} else {
|
} else {
|
||||||
printf("Block translation:");
|
printf("Block translation:");
|
||||||
|
|
||||||
toku_dump_translation_table(stdout, h->blocktable);
|
toku_dump_translation_table(stdout, ft->blocktable);
|
||||||
|
|
||||||
struct __dump_node_extra info;
|
struct __dump_node_extra info;
|
||||||
info.f = f;
|
info.f = f;
|
||||||
info.h = h;
|
info.h = ft;
|
||||||
toku_blocktable_iterate(h->blocktable, TRANSLATION_CHECKPOINTED,
|
toku_blocktable_iterate(ft->blocktable, TRANSLATION_CHECKPOINTED,
|
||||||
dump_node_wrapper, &info, TRUE, TRUE);
|
dump_node_wrapper, &info, TRUE, TRUE);
|
||||||
}
|
}
|
||||||
toku_ft_free(h);
|
toku_ft_free(ft);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -507,7 +507,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
|
||||||
#define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = errno; toku_ft_loader_internal_destroy(bl, TRUE); return r; } lval = v; } while (0)
|
#define SET_TO_MY_STRDUP(lval, s) do { char *v = toku_strdup(s); if (!v) { int r = errno; toku_ft_loader_internal_destroy(bl, TRUE); return r; } lval = v; } while (0)
|
||||||
|
|
||||||
MY_CALLOC_N(N, bl->root_xids_that_created);
|
MY_CALLOC_N(N, bl->root_xids_that_created);
|
||||||
for (int i=0; i<N; i++) if (brts[i]) bl->root_xids_that_created[i]=brts[i]->ft->root_xid_that_created;
|
for (int i=0; i<N; i++) if (brts[i]) bl->root_xids_that_created[i]=brts[i]->ft->h->root_xid_that_created;
|
||||||
MY_CALLOC_N(N, bl->dbs);
|
MY_CALLOC_N(N, bl->dbs);
|
||||||
for (int i=0; i<N; i++) if (brts[i]) bl->dbs[i]=dbs[i];
|
for (int i=0; i<N; i++) if (brts[i]) bl->dbs[i]=dbs[i];
|
||||||
MY_CALLOC_N(N, bl->descriptors);
|
MY_CALLOC_N(N, bl->descriptors);
|
||||||
|
@ -2206,11 +2206,12 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
|
||||||
if (bl->root_xids_that_created)
|
if (bl->root_xids_that_created)
|
||||||
root_xid_that_created = bl->root_xids_that_created[which_db];
|
root_xid_that_created = bl->root_xids_that_created[which_db];
|
||||||
|
|
||||||
struct ft h;
|
// TODO: (Zardosht/Yoni/Leif), do this code properly
|
||||||
toku_ft_init(&h, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method);
|
struct ft ft;
|
||||||
|
toku_ft_init(&ft, (BLOCKNUM){0}, bl->load_lsn, root_xid_that_created, target_nodesize, target_basementnodesize, target_compression_method);
|
||||||
|
|
||||||
struct dbout out;
|
struct dbout out;
|
||||||
dbout_init(&out, &h);
|
dbout_init(&out, &ft);
|
||||||
out.fd = fd;
|
out.fd = fd;
|
||||||
out.current_off = 8192; // leave 8K reserved at beginning
|
out.current_off = 8192; // leave 8K reserved at beginning
|
||||||
out.n_translations = 3; // 3 translations reserved at the beginning
|
out.n_translations = 3; // 3 translations reserved at the beginning
|
||||||
|
@ -2333,7 +2334,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (deltas.numrows || deltas.numbytes) {
|
if (deltas.numrows || deltas.numbytes) {
|
||||||
toku_ft_update_stats(&h.in_memory_stats, deltas);
|
toku_ft_update_stats(&ft.in_memory_stats, deltas);
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup_maxkey(&maxkey);
|
cleanup_maxkey(&maxkey);
|
||||||
|
@ -2375,7 +2376,7 @@ static int toku_loader_write_ft_from_q (FTLOADER bl,
|
||||||
|
|
||||||
{
|
{
|
||||||
invariant(sts.n_subtrees==1);
|
invariant(sts.n_subtrees==1);
|
||||||
out.h->root_blocknum = make_blocknum(sts.subtrees[0].block);
|
out.h->h->root_blocknum = make_blocknum(sts.subtrees[0].block);
|
||||||
toku_free(sts.subtrees); sts.subtrees = NULL;
|
toku_free(sts.subtrees); sts.subtrees = NULL;
|
||||||
|
|
||||||
// write the descriptor
|
// write the descriptor
|
||||||
|
@ -2766,16 +2767,15 @@ static int write_translation_table (struct dbout *out, long long *off_of_transla
|
||||||
static int
|
static int
|
||||||
write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) {
|
write_header (struct dbout *out, long long translation_location_on_disk, long long translation_size_on_disk) {
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
unsigned int size = toku_serialize_ft_size (out->h->h);
|
||||||
out->h->checkpoint_staging_stats = out->h->in_memory_stats; // #4184
|
|
||||||
unsigned int size = toku_serialize_ft_size (out->h);
|
|
||||||
struct wbuf wbuf;
|
struct wbuf wbuf;
|
||||||
char *MALLOC_N(size, buf);
|
char *MALLOC_N(size, buf);
|
||||||
if (buf == NULL) {
|
if (buf == NULL) {
|
||||||
result = errno;
|
result = errno;
|
||||||
} else {
|
} else {
|
||||||
wbuf_init(&wbuf, buf, size);
|
wbuf_init(&wbuf, buf, size);
|
||||||
toku_serialize_ft_to_wbuf(&wbuf, out->h, translation_location_on_disk, translation_size_on_disk);
|
out->h->h->on_disk_stats = out->h->in_memory_stats;
|
||||||
|
toku_serialize_ft_to_wbuf(&wbuf, out->h->h, translation_location_on_disk, translation_size_on_disk);
|
||||||
if (wbuf.ndone != size)
|
if (wbuf.ndone != size)
|
||||||
result = EINVAL;
|
result = EINVAL;
|
||||||
else
|
else
|
||||||
|
|
|
@ -38,6 +38,7 @@ typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
|
||||||
typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
|
typedef struct ftnode_nonleaf_childinfo *NONLEAF_CHILDINFO;
|
||||||
typedef struct sub_block *SUB_BLOCK;
|
typedef struct sub_block *SUB_BLOCK;
|
||||||
typedef struct ft *FT;
|
typedef struct ft *FT;
|
||||||
|
typedef struct ft_header *FT_HEADER;
|
||||||
typedef struct ft_options *FT_OPTIONS;
|
typedef struct ft_options *FT_OPTIONS;
|
||||||
struct wbuf;
|
struct wbuf;
|
||||||
struct dbuf;
|
struct dbuf;
|
||||||
|
@ -252,6 +253,7 @@ typedef int (*ft_compare_func)(DB *, const DBT *, const DBT *);
|
||||||
typedef void (*setval_func)(const DBT *, void *);
|
typedef void (*setval_func)(const DBT *, void *);
|
||||||
typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *);
|
typedef int (*ft_update_func)(DB *, const DBT *, const DBT *, const DBT *, setval_func, void *);
|
||||||
typedef void (*on_redirect_callback)(FT_HANDLE, void*);
|
typedef void (*on_redirect_callback)(FT_HANDLE, void*);
|
||||||
|
typedef void (*remove_ft_ref_callback)(FT, void*);
|
||||||
|
|
||||||
#define UU(x) x __attribute__((__unused__))
|
#define UU(x) x __attribute__((__unused__))
|
||||||
|
|
||||||
|
|
20
ft/logger.c
20
ft/logger.c
|
@ -197,7 +197,7 @@ toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create)
|
||||||
//Verify it is empty
|
//Verify it is empty
|
||||||
assert(!t->ft->panic);
|
assert(!t->ft->panic);
|
||||||
//Must have no data blocks (rollback logs or otherwise).
|
//Must have no data blocks (rollback logs or otherwise).
|
||||||
toku_block_verify_no_data_blocks_except_root_unlocked(t->ft->blocktable, t->ft->root_blocknum);
|
toku_block_verify_no_data_blocks_except_root_unlocked(t->ft->blocktable, t->ft->h->root_blocknum);
|
||||||
BOOL is_empty;
|
BOOL is_empty;
|
||||||
is_empty = toku_ft_is_empty_fast(t);
|
is_empty = toku_ft_is_empty_fast(t);
|
||||||
assert(is_empty);
|
assert(is_empty);
|
||||||
|
@ -216,26 +216,26 @@ toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) {
|
||||||
if (!logger->is_panicked && cf) {
|
if (!logger->is_panicked && cf) {
|
||||||
FT_HANDLE ft_to_close;
|
FT_HANDLE ft_to_close;
|
||||||
{ //Find "brt"
|
{ //Find "brt"
|
||||||
FT h = toku_cachefile_get_userdata(cf);
|
FT ft = toku_cachefile_get_userdata(cf);
|
||||||
if (!h->panic && recovery_failed) {
|
if (!ft->panic && recovery_failed) {
|
||||||
r = toku_ft_set_panic(h, EINVAL, "Recovery failed");
|
r = toku_ft_set_panic(ft, EINVAL, "Recovery failed");
|
||||||
assert_zero(r);
|
assert_zero(r);
|
||||||
}
|
}
|
||||||
//Verify it is safe to close it.
|
//Verify it is safe to close it.
|
||||||
if (!h->panic) { //If paniced, it is safe to close.
|
if (!ft->panic) { //If paniced, it is safe to close.
|
||||||
assert(!h->dirty); //Must not be dirty.
|
assert(!ft->h->dirty); //Must not be dirty.
|
||||||
//Must have no data blocks (rollback logs or otherwise).
|
//Must have no data blocks (rollback logs or otherwise).
|
||||||
toku_block_verify_no_data_blocks_except_root_unlocked(h->blocktable, h->root_blocknum);
|
toku_block_verify_no_data_blocks_except_root_unlocked(ft->blocktable, ft->h->root_blocknum);
|
||||||
}
|
}
|
||||||
assert(!h->dirty);
|
assert(!ft->h->dirty);
|
||||||
ft_to_close = toku_ft_get_some_existing_ft_handle(h);
|
ft_to_close = toku_ft_get_some_existing_ft_handle(ft);
|
||||||
assert(ft_to_close);
|
assert(ft_to_close);
|
||||||
{
|
{
|
||||||
BOOL is_empty;
|
BOOL is_empty;
|
||||||
is_empty = toku_ft_is_empty_fast(ft_to_close);
|
is_empty = toku_ft_is_empty_fast(ft_to_close);
|
||||||
assert(is_empty);
|
assert(is_empty);
|
||||||
}
|
}
|
||||||
assert(!h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
|
assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test.
|
||||||
}
|
}
|
||||||
|
|
||||||
r = toku_ft_handle_close(ft_to_close, FALSE, ZERO_LSN);
|
r = toku_ft_handle_close(ft_to_close, FALSE, ZERO_LSN);
|
||||||
|
|
|
@ -330,11 +330,15 @@ test_prefetching(void) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
|
|
@ -273,11 +273,15 @@ test_serialize_nonleaf(void) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -359,11 +363,15 @@ test_serialize_leaf(void) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
|
|
@ -104,11 +104,15 @@ test_serialize_leaf(int valsize, int nelts, double entropy) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
brt_h->compare_fun = long_key_cmp;
|
brt_h->compare_fun = long_key_cmp;
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
|
@ -237,11 +241,15 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
brt_h->compare_fun = long_key_cmp;
|
brt_h->compare_fun = long_key_cmp;
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
|
|
|
@ -250,11 +250,15 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, BOOL do_clone) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -392,11 +396,15 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, BOOL do_clone
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -531,11 +539,15 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, BOOL do_clone) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -675,11 +687,15 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, BOOL do_clone)
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -835,11 +851,15 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, BOOL
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -959,11 +979,15 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -1088,11 +1112,15 @@ test_serialize_leaf(enum ftnode_verify_type bft, BOOL do_clone) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
@ -1230,11 +1258,15 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, BOOL do_clone) {
|
||||||
|
|
||||||
FT_HANDLE XMALLOC(brt);
|
FT_HANDLE XMALLOC(brt);
|
||||||
FT XCALLOC(brt_h);
|
FT XCALLOC(brt_h);
|
||||||
|
toku_ft_init(brt_h,
|
||||||
|
make_blocknum(0),
|
||||||
|
ZERO_LSN,
|
||||||
|
TXNID_NONE,
|
||||||
|
4*1024*1024,
|
||||||
|
128*1024,
|
||||||
|
TOKU_DEFAULT_COMPRESSION_METHOD);
|
||||||
brt->ft = brt_h;
|
brt->ft = brt_h;
|
||||||
brt_h->type = FT_CURRENT;
|
|
||||||
brt_h->panic = 0; brt_h->panic_string = 0;
|
brt_h->panic = 0; brt_h->panic_string = 0;
|
||||||
brt_h->basementnodesize = 128*1024;
|
|
||||||
brt_h->compression_method = TOKU_DEFAULT_COMPRESSION_METHOD;
|
|
||||||
toku_ft_init_treelock(brt_h);
|
toku_ft_init_treelock(brt_h);
|
||||||
toku_blocktable_create_new(&brt_h->blocktable);
|
toku_blocktable_create_new(&brt_h->blocktable);
|
||||||
//Want to use block #20
|
//Want to use block #20
|
||||||
|
|
|
@ -25,14 +25,15 @@ static void test_header (void) {
|
||||||
r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
|
r = toku_open_ft_handle(fname, 1, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
// now insert some info into the header
|
// now insert some info into the header
|
||||||
FT h = t->ft;
|
FT ft = t->ft;
|
||||||
h->dirty = 1;
|
ft->h->dirty = 1;
|
||||||
h->layout_version_original = 13;
|
// cast away const because we actually want to fiddle with the header
|
||||||
h->layout_version_read_from_disk = 14;
|
// in this test
|
||||||
h->build_id_original = 1234;
|
*((int *) &ft->h->layout_version_original) = 13;
|
||||||
h->in_memory_stats = (STAT64INFO_S) {10, 11};
|
ft->layout_version_read_from_disk = 14;
|
||||||
h->on_disk_stats = (STAT64INFO_S) {20, 21};
|
*((uint32_t *) &ft->h->build_id_original) = 1234;
|
||||||
h->checkpoint_staging_stats = (STAT64INFO_S) {30, 31};
|
ft->in_memory_stats = (STAT64INFO_S) {10, 11};
|
||||||
|
ft->h->on_disk_stats = (STAT64INFO_S) {20, 21};
|
||||||
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
|
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
|
||||||
r = toku_cachetable_close(&ct);
|
r = toku_cachetable_close(&ct);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
@ -43,20 +44,17 @@ static void test_header (void) {
|
||||||
r = toku_open_ft_handle(fname, 0, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
|
r = toku_open_ft_handle(fname, 0, &t, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, null_txn, toku_builtin_compare_fun);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
|
||||||
h = t->ft;
|
ft = t->ft;
|
||||||
STAT64INFO_S expected_stats = {20, 21}; // on checkpoint, on_disk_stats copied to checkpoint_staging_stats
|
STAT64INFO_S expected_stats = {20, 21}; // on checkpoint, on_disk_stats copied to ft->checkpoint_header->on_disk_stats
|
||||||
assert(h->layout_version == FT_LAYOUT_VERSION);
|
assert(ft->h->layout_version == FT_LAYOUT_VERSION);
|
||||||
assert(h->layout_version_original == 13);
|
assert(ft->h->layout_version_original == 13);
|
||||||
assert(h->layout_version_read_from_disk == FT_LAYOUT_VERSION);
|
assert(ft->layout_version_read_from_disk == FT_LAYOUT_VERSION);
|
||||||
assert(h->build_id_original == 1234);
|
assert(ft->h->build_id_original == 1234);
|
||||||
assert(h->in_memory_stats.numrows == expected_stats.numrows);
|
assert(ft->in_memory_stats.numrows == expected_stats.numrows);
|
||||||
assert(h->on_disk_stats.numbytes == expected_stats.numbytes);
|
assert(ft->h->on_disk_stats.numbytes == expected_stats.numbytes);
|
||||||
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
|
r = toku_close_ft_handle_nolsn(t, 0); assert(r==0);
|
||||||
r = toku_cachetable_close(&ct);
|
r = toku_cachetable_close(&ct);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
|
1
ft/txn.c
1
ft/txn.c
|
@ -658,7 +658,6 @@ static int remove_txn (OMTVALUE hv, u_int32_t UU(idx), void *txnv)
|
||||||
|
|
||||||
if (txn->txnid64==h->txnid_that_created_or_locked_when_empty) {
|
if (txn->txnid64==h->txnid_that_created_or_locked_when_empty) {
|
||||||
h->txnid_that_created_or_locked_when_empty = TXNID_NONE;
|
h->txnid_that_created_or_locked_when_empty = TXNID_NONE;
|
||||||
h->root_that_created_or_locked_when_empty = TXNID_NONE;
|
|
||||||
}
|
}
|
||||||
if (txn->txnid64==h->txnid_that_suppressed_recovery_logs) {
|
if (txn->txnid64==h->txnid_that_suppressed_recovery_logs) {
|
||||||
h->txnid_that_suppressed_recovery_logs = TXNID_NONE;
|
h->txnid_that_suppressed_recovery_logs = TXNID_NONE;
|
||||||
|
|
|
@ -500,7 +500,8 @@ toku_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
if (!is_db_hot_index) {
|
if (!is_db_hot_index) {
|
||||||
r = toku_db_pre_acquire_fileops_lock(db, txn);
|
//TODO(zardosht): why doesn't hot_index need to do locking?
|
||||||
|
r = toku_db_pre_acquire_table_lock(db, txn);
|
||||||
if (r != 0) { goto cleanup; }
|
if (r != 0) { goto cleanup; }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -677,9 +678,9 @@ locked_db_open(DB *db, DB_TXN *txn, const char *fname, const char *dbname, DBTYP
|
||||||
|
|
||||||
static int
|
static int
|
||||||
locked_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t flags) {
|
locked_db_change_descriptor(DB *db, DB_TXN* txn, const DBT* descriptor, u_int32_t flags) {
|
||||||
toku_ydb_lock();
|
toku_multi_operation_client_lock(); //Cannot begin checkpoint
|
||||||
int r = toku_db_change_descriptor(db, txn, descriptor, flags);
|
int r = toku_db_change_descriptor(db, txn, descriptor, flags);
|
||||||
toku_ydb_unlock();
|
toku_multi_operation_client_unlock(); //Can now begin checkpoint
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,13 @@ struct toku_list {
|
||||||
struct toku_list *next, *prev;
|
struct toku_list *next, *prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static inline int toku_list_num_elements_est(struct toku_list *head) {
|
||||||
|
if (head->next == head) return 0;
|
||||||
|
if (head->next == head->prev) return 1;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void toku_list_init(struct toku_list *head) {
|
static inline void toku_list_init(struct toku_list *head) {
|
||||||
head->next = head->prev = head;
|
head->next = head->prev = head;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue