diff --git a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc index d97d8762252..8e9856b4060 100644 --- a/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc +++ b/storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc @@ -224,6 +224,9 @@ uint32_t toku_get_checkpoint_period_unlocked (CACHETABLE ct) { } void toku_set_cleaner_period (CACHETABLE ct, uint32_t new_period) { + if(force_recovery) { + return; + } ct->cl.set_period(new_period); } @@ -3025,9 +3028,12 @@ int toku_cleaner_thread (void *cleaner_v) { // ENSURE_POD(cleaner); +extern uint force_recovery; + int cleaner::init(uint32_t _cleaner_iterations, pair_list* _pl, CACHETABLE _ct) { // default is no cleaner, for now m_cleaner_cron_init = false; + if (force_recovery) return 0; int r = toku_minicron_setup(&m_cleaner_cron, 0, toku_cleaner_thread, this); if (r == 0) { m_cleaner_cron_init = true; diff --git a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc index 35ba864b9ea..ab9802e88b0 100644 --- a/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc +++ b/storage/tokudb/PerconaFT/ft/ft-cachetable-wrappers.cc @@ -72,7 +72,7 @@ cachetable_put_empty_node_with_dep_nodes( enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes]; for (uint32_t i = 0; i < num_dependent_nodes; i++) { dependent_pairs[i] = dependent_nodes[i]->ct_pair; - dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty; + dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty(); } toku_cachetable_put_with_dep_pairs( @@ -252,7 +252,7 @@ toku_pin_ftnode_for_query( // written out, it would have to be dirtied. That // requires a write lock, and a write lock requires you to // resolve checkpointing. - if (!node->dirty) { + if (!node->dirty()) { toku_ft_bn_update_max_msn(node, max_msn_in_path, bfe->child_to_read); } } @@ -279,7 +279,7 @@ toku_pin_ftnode_with_dep_nodes( enum cachetable_dirty dependent_dirty_bits[num_dependent_nodes]; for (uint32_t i = 0; i < num_dependent_nodes; i++) { dependent_pairs[i] = dependent_nodes[i]->ct_pair; - dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty; + dependent_dirty_bits[i] = (enum cachetable_dirty) dependent_nodes[i]->dirty(); } int r = toku_cachetable_get_and_pin_with_dep_pairs( @@ -332,7 +332,7 @@ cleanup: void toku_unpin_ftnode(FT ft, FTNODE node) { int r = toku_cachetable_unpin(ft->cf, node->ct_pair, - static_cast(node->dirty), + static_cast(node->dirty()), make_ftnode_pair_attr(node)); invariant_zero(r); } @@ -343,7 +343,7 @@ toku_unpin_ftnode_read_only(FT ft, FTNODE node) int r = toku_cachetable_unpin( ft->cf, node->ct_pair, - (enum cachetable_dirty) node->dirty, + (enum cachetable_dirty) node->dirty(), make_invalid_pair_attr() ); assert(r==0); diff --git a/storage/tokudb/PerconaFT/ft/ft-flusher.cc b/storage/tokudb/PerconaFT/ft/ft-flusher.cc index e6452f60cfc..8e687d4ae58 100644 --- a/storage/tokudb/PerconaFT/ft/ft-flusher.cc +++ b/storage/tokudb/PerconaFT/ft/ft-flusher.cc @@ -138,7 +138,7 @@ maybe_destroy_child_blbs(FTNODE node, FTNODE child, FT ft) // up to date. if (child->n_children > 1 && child->height == 0 && - !child->dirty) { + !child->dirty()) { for (int i = 0; i < child->n_children; ++i) { if (BP_STATE(child, i) == PT_AVAIL && node->max_msn_applied_to_node_on_disk.msn < BLB_MAX_MSN_APPLIED(child, i).msn) @@ -479,7 +479,7 @@ handle_split_of_child( } ) - node->dirty = 1; + node->set_dirty(); XREALLOC_N(node->n_children+1, node->bp); // Slide the children over. @@ -661,8 +661,8 @@ static void ftnode_finalize_split(FTNODE node, FTNODE B, MSN max_msn_applied_to_ // The new node in the split inherits the oldest known reference xid B->oldest_referenced_xid_known = node->oldest_referenced_xid_known; - node->dirty = 1; - B->dirty = 1; + node->set_dirty(); + B->set_dirty(); } void @@ -1002,8 +1002,8 @@ flush_this_child( paranoid_invariant(child->blocknum.b!=0); // VERIFY_NODE does not work off client thread as of now //VERIFY_NODE(t, child); - node->dirty = 1; - child->dirty = 1; + node->set_dirty(); + child->set_dirty(); BP_WORKDONE(node, childnum) = 0; // this buffer is drained, no work has been done by its contents NONLEAF_CHILDINFO bnc = BNC(node, childnum); @@ -1033,8 +1033,8 @@ merge_leaf_nodes(FTNODE a, FTNODE b) // TODO(leif): this is no longer the way in_memory_stats is // maintained. verify that it's ok to move this just before the unpin // and then do that. - a->dirty = 1; - b->dirty = 1; + a->set_dirty(); + b->set_dirty(); bn_data* a_last_bd = BLB_DATA(a, a->n_children-1); // this bool states if the last basement node in a has any items or not @@ -1166,8 +1166,8 @@ maybe_merge_pinned_nonleaf_nodes( a->n_children = new_n_children; b->n_children = 0; - a->dirty = 1; - b->dirty = 1; + a->set_dirty(); + b->set_dirty(); *did_merge = true; *did_rebalance = false; @@ -1210,7 +1210,7 @@ maybe_merge_pinned_nodes( toku_ftnode_assert_fully_in_memory(parent); toku_ftnode_assert_fully_in_memory(a); toku_ftnode_assert_fully_in_memory(b); - parent->dirty = 1; // just to make sure + parent->set_dirty(); // just to make sure { MSN msna = a->max_msn_applied_to_node_on_disk; MSN msnb = b->max_msn_applied_to_node_on_disk; @@ -1334,8 +1334,8 @@ ft_merge_child( } paranoid_invariant(BP_BLOCKNUM(node, childnuma).b == childa->blocknum.b); - childa->dirty = 1; // just to make sure - childb->dirty = 1; // just to make sure + childa->set_dirty(); // just to make sure + childb->set_dirty(); // just to make sure } else { // flow will be inaccurate for a while, oh well. the children // are leaves in this case so it's not a huge deal (we're @@ -1344,7 +1344,7 @@ ft_merge_child( // If we didn't merge the nodes, then we need the correct pivot. invariant_notnull(splitk.data); node->pivotkeys.replace_at(&splitk, childnuma); - node->dirty = 1; + node->set_dirty(); } toku_destroy_dbt(&splitk); } @@ -1368,7 +1368,7 @@ ft_merge_child( call_flusher_thread_callback(ft_flush_aflter_merge); // unlock the parent - paranoid_invariant(node->dirty); + paranoid_invariant(node->dirty()); toku_unpin_ftnode(ft, node); } else { @@ -1376,7 +1376,7 @@ ft_merge_child( call_flusher_thread_callback(ft_flush_aflter_rebalance); // unlock the parent - paranoid_invariant(node->dirty); + paranoid_invariant(node->dirty()); toku_unpin_ftnode(ft, node); toku_unpin_ftnode(ft, childb); } @@ -1438,9 +1438,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // only do the following work if there is a flush to perform if (toku_bnc_n_entries(BNC(parent, childnum)) > 0 || parent->height == 1) { - if (!parent->dirty) { + if (!parent->dirty()) { dirtied++; - parent->dirty = 1; + parent->set_dirty(); } // detach buffer BP_WORKDONE(parent, childnum) = 0; // this buffer is drained, no work has been done by its contents @@ -1485,9 +1485,9 @@ void toku_ft_flush_some_child(FT ft, FTNODE parent, struct flusher_advice *fa) // in the buffer to flush, and as a result, flushing is not necessary // and bnc is NULL if (bnc != NULL) { - if (!child->dirty) { + if (!child->dirty()) { dirtied++; - child->dirty = 1; + child->set_dirty(); } // do the actual flush toku_bnc_flush_to_child( @@ -1786,7 +1786,7 @@ static void flush_node_fun(void *fe_v) // read them back in, or just do the regular partial fetch. If we // don't, that means fe->node is a parent, so we need to do this anyway. bring_node_fully_into_memory(fe->node,fe->ft); - fe->node->dirty = 1; + fe->node->set_dirty(); struct flusher_advice fa; struct flush_status_update_extra fste; @@ -1892,7 +1892,7 @@ void toku_ft_flush_node_on_background_thread(FT ft, FTNODE parent) // // can detach buffer and unpin root here // - parent->dirty = 1; + parent->set_dirty(); BP_WORKDONE(parent, childnum) = 0; // this buffer is drained, no work has been done by its contents NONLEAF_CHILDINFO bnc = BNC(parent, childnum); NONLEAF_CHILDINFO new_bnc = toku_create_empty_nl(); diff --git a/storage/tokudb/PerconaFT/ft/ft-internal.h b/storage/tokudb/PerconaFT/ft/ft-internal.h index eec591d1744..130d3c302aa 100644 --- a/storage/tokudb/PerconaFT/ft/ft-internal.h +++ b/storage/tokudb/PerconaFT/ft/ft-internal.h @@ -76,11 +76,30 @@ enum ft_type { FT_CHECKPOINT_INPROGRESS }; +extern "C" { +extern uint force_recovery; +} + +extern int writing_rollback; + // The ft_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. struct ft_header { enum ft_type type; - int dirty; + int dirty_; + + void set_dirty() { + if(force_recovery) assert(writing_rollback); + dirty_ = 1; + } + + void clear_dirty() { + dirty_ = 0; + } + + bool dirty() { + return dirty_; + } // Free-running counter incremented once per checkpoint (toggling LSB). // LSB indicates which header location is used on disk so this diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.cc b/storage/tokudb/PerconaFT/ft/ft-ops.cc index 6d39b08fe02..d2e92768dde 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.cc +++ b/storage/tokudb/PerconaFT/ft/ft-ops.cc @@ -655,7 +655,7 @@ void toku_ftnode_clone_callback(void *value_data, node->layout_version_read_from_disk; cloned_node->build_id = node->build_id; cloned_node->height = node->height; - cloned_node->dirty = node->dirty; + cloned_node->dirty_ = node->dirty_; cloned_node->fullhash = node->fullhash; cloned_node->n_children = node->n_children; @@ -671,8 +671,8 @@ void toku_ftnode_clone_callback(void *value_data, toku_ftnode_clone_partitions(node, cloned_node); // clear dirty bit - node->dirty = 0; - cloned_node->dirty = 0; + node->clear_dirty(); + cloned_node->clear_dirty(); node->layout_version_read_from_disk = FT_LAYOUT_VERSION; // set new pair attr if necessary if (node->height == 0) { @@ -741,7 +741,7 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile), // persisted, we need undo the logical row count adjustments as // they may occur again in the future if/when the node is // re-read from disk for another query or change. - if (!ftnode->dirty && !write_me) { + if (!ftnode->dirty() && !write_me) { int64_t lrc_delta = 0; for (int i = 0; i < ftnode->n_children; i++) { if (BP_STATE(ftnode, i) == PT_AVAIL) { @@ -846,8 +846,8 @@ int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile), if (r == 0) { *sizep = make_ftnode_pair_attr(*node); (*node)->ct_pair = p; - *dirtyp = (*node)->dirty; // deserialize could mark the node as dirty - // (presumably for upgrade) + *dirtyp = (*node)->dirty(); // deserialize could mark the node as dirty + // (presumably for upgrade) } return r; } @@ -869,7 +869,7 @@ void toku_ftnode_pe_est_callback( paranoid_invariant(ftnode_pv != NULL); long bytes_to_free = 0; FTNODE node = static_cast(ftnode_pv); - if (node->dirty || node->height == 0 || + if (node->dirty() || node->height == 0 || node->layout_version_read_from_disk < FT_FIRST_LAYOUT_VERSION_WITH_BASEMENT_NODES) { *bytes_freed_estimate = 0; *cost = PE_CHEAP; @@ -946,7 +946,7 @@ int toku_ftnode_pe_callback(void *ftnode_pv, void *pointers_to_free[node->n_children * 2]; // Don't partially evict dirty nodes - if (node->dirty) { + if (node->dirty()) { goto exit; } // Don't partially evict nodes whose partitions can't be read back @@ -1399,7 +1399,7 @@ ft_init_new_root(FT ft, FTNODE oldroot, FTNODE *newrootp) MSN msna = oldroot->max_msn_applied_to_node_on_disk; newroot->max_msn_applied_to_node_on_disk = msna; BP_STATE(newroot,0) = PT_AVAIL; - newroot->dirty = 1; + newroot->set_dirty(); // Set the first child to have the new blocknum, // and then swap newroot with oldroot. The new root @@ -1487,7 +1487,7 @@ static void inject_message_in_locked_node( // mark the node as dirty. // enforcing invariant here. // - paranoid_invariant(node->dirty != 0); + paranoid_invariant(node->dirty() != 0); // update some status variables if (node->height != 0) { @@ -1847,7 +1847,7 @@ static void push_something_in_subtree( } } - if (next_loc != NEITHER_EXTREME || child->dirty || toku_bnc_should_promote(ft, bnc)) { + if (next_loc != NEITHER_EXTREME || child->dirty() || toku_bnc_should_promote(ft, bnc)) { push_something_in_subtree(ft, child, -1, msg, flow_deltas, gc_info, depth + 1, next_loc, false); toku_sync_fetch_and_add(&bnc->flow[0], flow_deltas[0]); // The recursive call unpinned the child, but @@ -2802,9 +2802,9 @@ static int ft_create_file(FT_HANDLE UU(ft_handle), const char *fname, int *fdp) } // open a file for use by the ft. if the file does not exist, error -static int ft_open_file(const char *fname, int *fdp) { +static int ft_open_file(const char *fname, int *fdp, bool rw) { int fd; - fd = ft_open_maybe_direct(fname, O_RDWR | O_BINARY, file_mode); + fd = ft_open_maybe_direct(fname, (rw ? O_RDWR : O_RDONLY) | O_BINARY, file_mode); if (fd==-1) { return get_error_errno(); } @@ -2955,7 +2955,7 @@ toku_ft_handle_inherit_options(FT_HANDLE t, FT ft) { // The checkpointed version (checkpoint_lsn) of the dictionary must be no later than max_acceptable_lsn . // Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. static int -ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn) { +ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id, LSN max_acceptable_lsn, bool open_rw = true) { int r; bool txn_created = false; char *fname_in_cwd = NULL; @@ -2977,7 +2977,7 @@ ft_handle_open(FT_HANDLE ft_h, const char *fname_in_env, int is_create, int only fname_in_cwd = toku_cachetable_get_fname_in_cwd(cachetable, fname_in_env); { int fd = -1; - r = ft_open_file(fname_in_cwd, &fd); + r = ft_open_file(fname_in_cwd, &fd, open_rw); if (reserved_filenum.fileid == FILENUM_NONE.fileid) { reserved_filenum = toku_cachetable_reserve_filenum(cachetable); } @@ -3123,15 +3123,15 @@ toku_ft_handle_open_recovery(FT_HANDLE t, const char *fname_in_env, int is_creat // Open an ft in normal use. The FILENUM and dict_id are assigned by the ft_handle_open() function. // Requires: The multi-operation client lock must be held to prevent a checkpoint from occuring. int -toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn) { +toku_ft_handle_open(FT_HANDLE t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, bool open_rw) { int r; - r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN); + r = ft_handle_open(t, fname_in_env, is_create, only_create, cachetable, txn, FILENUM_NONE, DICTIONARY_ID_NONE, MAX_LSN, open_rw); return r; } // clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree int -toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn) { +toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw) { FT_HANDLE result_ft_handle; toku_ft_handle_create(&result_ft_handle); @@ -3146,7 +3146,7 @@ toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN t CACHEFILE cf = ft_handle->ft->cf; CACHETABLE ct = toku_cachefile_get_cachetable(cf); const char *fname_in_env = toku_cachefile_fname_in_env(cf); - int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn); + int r = toku_ft_handle_open(result_ft_handle, fname_in_env, false, false, ct, txn, open_rw); if (r != 0) { toku_ft_handle_close(result_ft_handle); result_ft_handle = NULL; @@ -3547,7 +3547,7 @@ unlock_ftnode_fun (void *v) { int r = toku_cachetable_unpin_ct_prelocked_no_flush( ft_handle->ft->cf, node->ct_pair, - (enum cachetable_dirty) node->dirty, + (enum cachetable_dirty) node->dirty(), x->msgs_applied ? make_ftnode_pair_attr(node) : make_invalid_pair_attr() ); assert_zero(r); @@ -4969,6 +4969,14 @@ static void toku_pfs_keys_destroy(void) { } int toku_ft_layer_init(void) { + static bool ft_layer_init_started = false; + + if(ft_layer_init_started) { + return 0; + } + + ft_layer_init_started = true; + int r = 0; // Portability must be initialized first @@ -4999,6 +5007,14 @@ exit: } void toku_ft_layer_destroy(void) { + static bool ft_layer_destroy_started = false; + + if(ft_layer_destroy_started) { + return; + } + + ft_layer_destroy_started = true; + toku_mutex_destroy(&ft_open_close_lock); toku_ft_serialize_layer_destroy(); toku_checkpoint_destroy(); diff --git a/storage/tokudb/PerconaFT/ft/ft-ops.h b/storage/tokudb/PerconaFT/ft/ft-ops.h index df8ffe287df..7b6d0634c37 100644 --- a/storage/tokudb/PerconaFT/ft/ft-ops.h +++ b/storage/tokudb/PerconaFT/ft/ft-ops.h @@ -125,12 +125,12 @@ typedef int (*ft_update_func)(DB *db, const DBT *key, const DBT *old_val, const void toku_ft_set_update(FT_HANDLE ft_h, ft_update_func update_fun); int toku_ft_handle_open(FT_HANDLE, const char *fname_in_env, - int is_create, int only_create, CACHETABLE ct, TOKUTXN txn) __attribute__ ((warn_unused_result)); + int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, bool open_rw=true) __attribute__ ((warn_unused_result)); int toku_ft_handle_open_recovery(FT_HANDLE, const char *fname_in_env, int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, FILENUM use_filenum, LSN max_acceptable_lsn) __attribute__ ((warn_unused_result)); // clone an ft handle. the cloned handle has a new dict_id but refers to the same fractal tree -int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn); +int toku_ft_handle_clone(FT_HANDLE *cloned_ft_handle, FT_HANDLE ft_handle, TOKUTXN txn, bool open_rw=true); // close an ft handle during normal operation. the underlying ft may or may not close, // depending if there are still references. an lsn for this close will come from the logger. diff --git a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc index e31d80772d5..3b5501b66d3 100644 --- a/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc +++ b/storage/tokudb/PerconaFT/ft/ft-recount-rows.cc @@ -98,7 +98,7 @@ int toku_ft_recount_rows(FT_HANDLE ft, if (rre._cancelled == false) { // update ft count toku_unsafe_set(&ft->ft->in_memory_logical_rows, rre._keys); - ft->ft->h->dirty = 1; + ft->ft->h->set_dirty(); ret = 0; } diff --git a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc index 930fb3013d2..8338a0777eb 100644 --- a/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc +++ b/storage/tokudb/PerconaFT/ft/ft-test-helpers.cc @@ -258,7 +258,7 @@ int toku_testsetup_insert_to_nonleaf (FT_HANDLE ft_handle, BLOCKNUM blocknum, en // is directly queueing something in a FIFO instead of // using ft APIs. node->max_msn_applied_to_node_on_disk = msn; - node->dirty = 1; + node->set_dirty(); // Also hack max_msn_in_ft ft_handle->ft->h->max_msn_in_ft = msn; diff --git a/storage/tokudb/PerconaFT/ft/ft-verify.cc b/storage/tokudb/PerconaFT/ft/ft-verify.cc index a2835f730eb..3819799c32f 100644 --- a/storage/tokudb/PerconaFT/ft/ft-verify.cc +++ b/storage/tokudb/PerconaFT/ft/ft-verify.cc @@ -511,7 +511,7 @@ toku_verify_ft_with_progress (FT_HANDLE ft_handle, int (*progress_callback)(void if (r == 0) { toku_ft_lock(ft_handle->ft); ft_handle->ft->h->time_of_last_verification = time(NULL); - ft_handle->ft->h->dirty = 1; + ft_handle->ft->h->set_dirty(); toku_ft_unlock(ft_handle->ft); } return r; diff --git a/storage/tokudb/PerconaFT/ft/ft.cc b/storage/tokudb/PerconaFT/ft/ft.cc index 454bf11794f..5c9f27bf5ad 100644 --- a/storage/tokudb/PerconaFT/ft/ft.cc +++ b/storage/tokudb/PerconaFT/ft/ft.cc @@ -60,7 +60,7 @@ void toku_reset_root_xid_that_created(FT ft, TXNID new_root_xid_that_created) { // (see cooperative use of dirty bit in ft_begin_checkpoint()) toku_ft_lock(ft); ft->h->root_xid_that_created = new_root_xid_that_created; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -146,7 +146,7 @@ static void ft_begin_checkpoint (LSN checkpoint_lsn, void *header_v) { assert(ft->h->type == FT_CURRENT); assert(ft->checkpoint_header == NULL); ft_copy_for_checkpoint_unlocked(ft, checkpoint_lsn); - ft->h->dirty = 0; // this is only place this bit is cleared (in currentheader) + ft->h->clear_dirty(); // this is only place this bit is cleared (in currentheader) ft->blocktable.note_start_checkpoint_unlocked(); toku_ft_unlock (ft); } @@ -185,7 +185,7 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { FT_HEADER ch = ft->checkpoint_header; assert(ch); assert(ch->type == FT_CHECKPOINT_INPROGRESS); - if (ch->dirty) { // this is only place this bit is tested (in checkpoint_header) + if (ch->dirty()) { // this is only place this bit is tested (in checkpoint_header) TOKULOGGER logger = toku_cachefile_logger(cf); if (logger) { toku_logger_fsync_if_lsn_not_fsynced(logger, ch->checkpoint_lsn); @@ -200,7 +200,7 @@ static void ft_checkpoint (CACHEFILE cf, int fd, void *header_v) { // write translation and header to disk (or at least to OS internal buffer) toku_serialize_ft_to(fd, ch, &ft->blocktable, ft->cf); - ch->dirty = 0; // this is only place this bit is cleared (in checkpoint_header) + ch->clear_dirty(); // this is only place this bit is cleared (in checkpoint_header) // fsync the cachefile toku_cachefile_fsync(cf); @@ -254,7 +254,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val toku_log_fclose( logger, &lsn, - ft->h->dirty, + ft->h->dirty(), bs, toku_cachefile_filenum(cachefile)); // flush the log on // close (if new header @@ -265,7 +265,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val } } } - if (ft->h->dirty) { // this is the only place this bit is tested (in currentheader) + if (ft->h->dirty()) { // this is the only place this bit is tested (in currentheader) bool do_checkpoint = true; if (logger && logger->rollback_cachefile == cachefile) { do_checkpoint = false; @@ -274,7 +274,7 @@ static void ft_close(CACHEFILE cachefile, int fd, void *header_v, bool oplsn_val ft_begin_checkpoint(lsn, header_v); ft_checkpoint(cachefile, fd, ft); ft_end_checkpoint(cachefile, fd, header_v); - assert(!ft->h->dirty); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary) + assert(!ft->h->dirty()); // dirty bit should be cleared by begin_checkpoint and never set again (because we're closing the dictionary) } } } @@ -370,7 +370,7 @@ ft_header_create(FT_OPTIONS options, BLOCKNUM root_blocknum, TXNID root_xid_that uint64_t now = (uint64_t) time(NULL); struct ft_header h = { .type = FT_CURRENT, - .dirty = 0, + .dirty_ = 0, .checkpoint_count = 0, .checkpoint_lsn = ZERO_LSN, .layout_version = FT_LAYOUT_VERSION, @@ -521,7 +521,7 @@ toku_ft_note_hot_begin(FT_HANDLE ft_handle) { toku_ft_lock(ft); ft->h->time_of_last_optimize_begin = now; ft->h->count_of_optimize_in_progress++; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -545,7 +545,7 @@ toku_ft_note_hot_complete(FT_HANDLE ft_handle, bool success, MSN msn_at_start_of if (ft->h->count_of_optimize_in_progress == ft->h->count_of_optimize_in_progress_read_from_disk) ft->h->count_of_optimize_in_progress = 0; } - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -958,7 +958,7 @@ void toku_ft_remove_reference( void toku_ft_set_nodesize(FT ft, unsigned int nodesize) { toku_ft_lock(ft); ft->h->nodesize = nodesize; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -971,7 +971,7 @@ void toku_ft_get_nodesize(FT ft, unsigned int *nodesize) { void toku_ft_set_basementnodesize(FT ft, unsigned int basementnodesize) { toku_ft_lock(ft); ft->h->basementnodesize = basementnodesize; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -984,7 +984,7 @@ void toku_ft_get_basementnodesize(FT ft, unsigned int *basementnodesize) { void toku_ft_set_compression_method(FT ft, enum toku_compression_method method) { toku_ft_lock(ft); ft->h->compression_method = method; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } @@ -997,7 +997,7 @@ void toku_ft_get_compression_method(FT ft, enum toku_compression_method *methodp void toku_ft_set_fanout(FT ft, unsigned int fanout) { toku_ft_lock(ft); ft->h->fanout = fanout; - ft->h->dirty = 1; + ft->h->set_dirty(); toku_ft_unlock(ft); } diff --git a/storage/tokudb/PerconaFT/ft/ft.h b/storage/tokudb/PerconaFT/ft/ft.h index ff0b63b2b12..5c6caead978 100644 --- a/storage/tokudb/PerconaFT/ft/ft.h +++ b/storage/tokudb/PerconaFT/ft/ft.h @@ -184,11 +184,11 @@ void tokuft_update_product_name_strings(void); extern char toku_product_name[TOKU_MAX_PRODUCT_NAME_LENGTH]; struct toku_product_name_strings_struct { - char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256]; - char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment")]; - char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory")]; - char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me")]; - char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback")]; + char db_version[sizeof(toku_product_name) + sizeof("1.2.3 build ") + 256 + 1]; + char environmentdictionary[sizeof(toku_product_name) + sizeof(".environment") + 1]; + char fileopsdirectory[sizeof(toku_product_name) + sizeof(".directory") + 1]; + char single_process_lock[sizeof(toku_product_name) + sizeof("___lock_dont_delete_me") + 1]; + char rollback_cachefile[sizeof(toku_product_name) + sizeof(".rollback") + 1]; }; extern struct toku_product_name_strings_struct toku_product_name_strings; diff --git a/storage/tokudb/PerconaFT/ft/logger/logger.cc b/storage/tokudb/PerconaFT/ft/logger/logger.cc index ddbbdcb25ab..5b2d1492cc9 100644 --- a/storage/tokudb/PerconaFT/ft/logger/logger.cc +++ b/storage/tokudb/PerconaFT/ft/logger/logger.cc @@ -49,6 +49,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "util/status.h" +int writing_rollback = 0; + static const int log_format_version = TOKU_LOG_VERSION; toku_instr_key *result_output_condition_lock_mutex_key; @@ -231,6 +233,7 @@ void toku_logger_initialize_rollback_cache(TOKULOGGER logger, FT ft) { } int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool create) { + writing_rollback++; assert(logger->is_open); assert(!logger->rollback_cachefile); @@ -250,6 +253,7 @@ int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, bool cre } else { toku_ft_handle_close(ft_handle); } + writing_rollback--; return r; } @@ -267,20 +271,20 @@ void toku_logger_close_rollback_check_empty(TOKULOGGER logger, bool clean_shutdo FT CAST_FROM_VOIDP(ft, toku_cachefile_get_userdata(cf)); if (clean_shutdown) { //Verify it is safe to close it. - assert(!ft->h->dirty); //Must not be dirty. + assert(!ft->h->dirty()); //Must not be dirty. ft->blocktable.free_unused_blocknums(ft->h->root_blocknum); // Must have no data blocks (rollback logs or otherwise). ft->blocktable.verify_no_data_blocks_except_root(ft->h->root_blocknum); - assert(!ft->h->dirty); + assert(!ft->h->dirty()); } else { - ft->h->dirty = 0; + ft->h->clear_dirty(); } ft_to_close = toku_ft_get_only_existing_ft_handle(ft); if (clean_shutdown) { bool is_empty; is_empty = toku_ft_is_empty_fast(ft_to_close); assert(is_empty); - assert(!ft->h->dirty); // it should not have been dirtied by the toku_ft_is_empty test. + assert(!ft->h->dirty()); // it should not have been dirtied by the toku_ft_is_empty test. } } diff --git a/storage/tokudb/PerconaFT/ft/node.cc b/storage/tokudb/PerconaFT/ft/node.cc index 27943496fbf..39a76c8615e 100644 --- a/storage/tokudb/PerconaFT/ft/node.cc +++ b/storage/tokudb/PerconaFT/ft/node.cc @@ -77,7 +77,7 @@ void toku_initialize_empty_ftnode(FTNODE n, BLOCKNUM blocknum, int height, int n } } } - n->dirty = 1; // special case exception, it's okay to mark as dirty because the basements are empty + n->set_dirty(); // special case exception, it's okay to mark as dirty because the basements are empty toku_ft_status_note_ftnode(height, true); } @@ -153,7 +153,7 @@ void toku_ftnode_clone_partitions(FTNODE node, FTNODE cloned_node) { void toku_evict_bn_from_memory(FTNODE node, int childnum, FT ft) { // free the basement node - assert(!node->dirty); + assert(!node->dirty()); BASEMENTNODE bn = BLB(node, childnum); toku_ft_decrease_stats(&ft->in_memory_stats, bn->stat64_delta); toku_ft_adjust_logical_row_count(ft, -BLB_LRD(node, childnum)); @@ -595,7 +595,7 @@ toku_apply_ancestors_messages_to_node ( oldest_referenced_xid_for_simple_gc, node->oldest_referenced_xid_known, true); - if (!node->dirty && child_to_read >= 0) { + if (!node->dirty() && child_to_read >= 0) { paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); apply_ancestors_messages_to_bn( t, @@ -712,7 +712,7 @@ bool toku_ft_leaf_needs_ancestors_messages( paranoid_invariant(node->height == 0); bool needs_ancestors_messages = false; // child_to_read may be -1 in test cases - if (!node->dirty && child_to_read >= 0) { + if (!node->dirty() && child_to_read >= 0) { paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); needs_ancestors_messages = bn_needs_ancestors_messages( ft, @@ -745,7 +745,7 @@ cleanup: void toku_ft_bn_update_max_msn(FTNODE node, MSN max_msn_applied, int child_to_read) { invariant(node->height == 0); - if (!node->dirty && child_to_read >= 0) { + if (!node->dirty() && child_to_read >= 0) { paranoid_invariant(BP_STATE(node, child_to_read) == PT_AVAIL); BASEMENTNODE bn = BLB(node, child_to_read); if (max_msn_applied.msn > bn->max_msn_applied.msn) { @@ -832,7 +832,7 @@ struct rebalance_array_info { void toku_ftnode_leaf_rebalance(FTNODE node, unsigned int basementnodesize) { assert(node->height == 0); - assert(node->dirty); + assert(node->dirty()); uint32_t num_orig_basements = node->n_children; // Count number of leaf entries in this leaf (num_le). @@ -1141,7 +1141,7 @@ void toku_ft_nonleaf_append_child(FTNODE node, FTNODE child, const DBT *pivotkey invariant(childnum > 0); node->pivotkeys.insert_at(pivotkey, childnum - 1); } - node->dirty = 1; + node->set_dirty(); } void @@ -1744,7 +1744,7 @@ static void ft_append_msg_to_child_buffer(const toku::comparator &cmp, FTNODE no int childnum, const ft_msg &msg, bool is_fresh) { paranoid_invariant(BP_STATE(node,childnum) == PT_AVAIL); bnc_insert_msg(BNC(node, childnum), msg, is_fresh, cmp); - node->dirty = 1; + node->set_dirty(); } // This is only exported for tests. @@ -2089,7 +2089,7 @@ void toku_ft_leaf_apply_msg( // be reapplied later), we mark the node as dirty and // take the opportunity to update node->max_msn_applied_to_node_on_disk. // - node->dirty = 1; + node->set_dirty(); // // we cannot blindly update node->max_msn_applied_to_node_on_disk, diff --git a/storage/tokudb/PerconaFT/ft/node.h b/storage/tokudb/PerconaFT/ft/node.h index 05c8a44ebed..61093f3ed8d 100644 --- a/storage/tokudb/PerconaFT/ft/node.h +++ b/storage/tokudb/PerconaFT/ft/node.h @@ -155,6 +155,12 @@ private: size_t _total_size; }; +extern int writing_rollback; + +extern "C" { +extern uint force_recovery; +} + // TODO: class me up struct ftnode { // max_msn_applied that will be written to disk @@ -173,9 +179,22 @@ struct ftnode { uint32_t build_id; // height is always >= 0. 0 for leaf, >0 for nonleaf. int height; - int dirty; + int dirty_; uint32_t fullhash; + void set_dirty() { + if(force_recovery) assert(writing_rollback); + dirty_ = 1; + } + + void clear_dirty() { + dirty_ = 0; + } + + bool dirty() { + return dirty_; + } + // for internal nodes, if n_children==fanout+1 then the tree needs to be // rebalanced. for leaf nodes, represents number of basement nodes int n_children; diff --git a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc index 56d51f56915..c4c99844edf 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/block_table.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/block_table.cc @@ -195,9 +195,9 @@ static void ft_set_dirty(FT ft, bool for_checkpoint) { invariant(ft->h->type == FT_CURRENT); if (for_checkpoint) { invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS); - ft->checkpoint_header->dirty = 1; + ft->checkpoint_header->set_dirty(); } else { - ft->h->dirty = 1; + ft->h->set_dirty(); } } diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc index 02a9dfd085c..de58fb42a8b 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-node-deserialize.cc @@ -60,7 +60,7 @@ initialize_ftnode(FTNODE node, BLOCKNUM blocknum) { node->fullhash = 0xDEADBEEF; // Is this 'spoof' ok? node->blocknum = blocknum; - node->dirty = 0; + node->clear_dirty(); node->bp = NULL; // Can we use this initialization as a correctness assert in // a later function? diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc index 0d6573972d7..0813855bf55 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft-serialize.cc @@ -340,7 +340,7 @@ int deserialize_ft_versioned(int fd, struct rbuf *rb, FT *ftp, uint32_t version) { struct ft_header h = { .type = FT_CURRENT, - .dirty = 0, + .dirty_ = 0, .checkpoint_count = checkpoint_count, .checkpoint_lsn = checkpoint_lsn, .layout_version = FT_LAYOUT_VERSION, diff --git a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc index 46bb8f81412..46f2e9600c5 100644 --- a/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc +++ b/storage/tokudb/PerconaFT/ft/serialize/ft_node-serialize.cc @@ -827,7 +827,7 @@ int toku_serialize_ftnode_to(int fd, node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); toku_free(compressed_buf); - node->dirty = 0; // See #1957. Must set the node to be clean after + node->clear_dirty(); // See #1957. Must set the node to be clean after // serializing it so that it doesn't get written again on // the next checkpoint or eviction. if (node->height == 0) { @@ -1544,7 +1544,7 @@ static FTNODE alloc_ftnode_for_deserialize(uint32_t fullhash, BLOCKNUM blocknum) FTNODE XMALLOC(node); node->fullhash = fullhash; node->blocknum = blocknum; - node->dirty = 0; + node->clear_dirty(); node->oldest_referenced_xid_known = TXNID_NONE; node->bp = nullptr; node->ct_pair = nullptr; @@ -1951,7 +1951,7 @@ static int deserialize_and_upgrade_internal_node(FTNODE node, // Assign the highest msn from our upgrade message buffers node->max_msn_applied_to_node_on_disk = highest_msn; // Since we assigned MSNs to this node's messages, we need to dirty it. - node->dirty = 1; + node->set_dirty(); // Must compute the checksum now (rather than at the end, while we // still have the pointer to the buffer). @@ -2908,9 +2908,9 @@ int toku_serialize_rollback_log_to(int fd, toku_free(compressed_buf); if (!is_serialized) { toku_static_serialized_rollback_log_destroy(&serialized_local); - log->dirty = 0; // See #1957. Must set the node to be clean after - // serializing it so that it doesn't get written again - // on the next checkpoint or eviction. + log->dirty = false; // See #1957. Must set the node to be clean after + // serializing it so that it doesn't get written again + // on the next checkpoint or eviction. } return 0; } diff --git a/storage/tokudb/PerconaFT/ft/txn/rollback.cc b/storage/tokudb/PerconaFT/ft/txn/rollback.cc index 0c793842f3c..105f980dc0d 100644 --- a/storage/tokudb/PerconaFT/ft/txn/rollback.cc +++ b/storage/tokudb/PerconaFT/ft/txn/rollback.cc @@ -43,6 +43,8 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. #include "ft/logger/log-internal.h" #include "ft/txn/rollback-ct-callbacks.h" +extern int writing_rollback; + static void rollback_unpin_remove_callback(CACHEKEY* cachekey, bool for_checkpoint, void* extra) { FT CAST_FROM_VOIDP(ft, extra); ft->blocktable.free_blocknum(cachekey, ft, for_checkpoint); @@ -155,6 +157,7 @@ static void rollback_log_create ( ROLLBACK_LOG_NODE *result ) { + writing_rollback++; ROLLBACK_LOG_NODE XMALLOC(log); rollback_empty_log_init(log); @@ -169,6 +172,7 @@ static void rollback_log_create ( get_write_callbacks_for_rollback_log(ft), toku_rollback_node_save_ct_pair); txn->roll_info.current_rollback = log->blocknum; + writing_rollback --; } void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) { diff --git a/storage/tokudb/PerconaFT/ft/txn/txn.cc b/storage/tokudb/PerconaFT/ft/txn/txn.cc index 7327cbd9d24..7152833d88d 100644 --- a/storage/tokudb/PerconaFT/ft/txn/txn.cc +++ b/storage/tokudb/PerconaFT/ft/txn/txn.cc @@ -723,7 +723,11 @@ time_t toku_txn_get_start_time(struct tokutxn *txn) { return txn->start_time; } +extern uint force_recovery; int toku_txn_reads_txnid(TXNID txnid, TOKUTXN txn, bool is_provisional UU()) { + if(force_recovery) { + return TOKUDB_ACCEPT; + } int r = 0; TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(txn); if (oldest_live_in_snapshot == TXNID_NONE && txnid < txn->snapshot_txnid64) { diff --git a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc index 786a6ef0546..0f287429542 100644 --- a/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc +++ b/storage/tokudb/PerconaFT/portability/toku_instr_mysql.cc @@ -359,7 +359,16 @@ void toku_instr_rwlock_wrlock_wait_end( void toku_instr_rwlock_unlock(toku_pthread_rwlock_t &rwlock) { if (rwlock.psi_rwlock) + +// Due to change introduced in e4148f2a22922687f7652c4e3d21a22da07c9e78 +// PSI rwlock version and interface changed +// PSI_CURRENT_RWLOCK_VERSION is not defined in MySQL 5.6 and is defined +// as 1 in 5.7 and < 8.0.17 +#if defined(PSI_CURRENT_RWLOCK_VERSION) && (PSI_CURRENT_RWLOCK_VERSION == 2) + PSI_RWLOCK_CALL(unlock_rwlock)(rwlock.psi_rwlock, PSI_RWLOCK_UNLOCK); +#else PSI_RWLOCK_CALL(unlock_rwlock)(rwlock.psi_rwlock); +#endif } #endif // TOKU_MYSQL_WITH_PFS diff --git a/storage/tokudb/PerconaFT/src/ydb.cc b/storage/tokudb/PerconaFT/src/ydb.cc index 8dcbba361b9..4d549c0ac73 100644 --- a/storage/tokudb/PerconaFT/src/ydb.cc +++ b/storage/tokudb/PerconaFT/src/ydb.cc @@ -39,6 +39,9 @@ Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. extern const char *toku_patent_string; const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."; + +extern int writing_rollback; + #include #include #include @@ -87,6 +90,10 @@ const char *toku_copyright_string = "Copyright (c) 2006, 2015, Percona and/or it int toku_close_trace_file (void) { return 0; } #endif +extern "C" { + uint force_recovery = 0; +} + // Set when env is panicked, never cleared. static int env_is_panicked = 0; @@ -223,6 +230,9 @@ env_fs_redzone(DB_ENV *env, uint64_t total) { // Check the available space in the file systems used by tokuft and erect barriers when available space gets low. static int env_fs_poller(void *arg) { + if(force_recovery == 6) { + return 0; + } DB_ENV *env = (DB_ENV *) arg; int r; @@ -307,6 +317,9 @@ env_fs_init(DB_ENV *env) { // Initialize the minicron that polls file system space static int env_fs_init_minicron(DB_ENV *env) { + if(force_recovery == 6) { + return 0; + } int r = toku_minicron_setup(&env->i->fs_poller, env->i->fs_poll_time*1000, env_fs_poller, env); if (r == 0) env->i->fs_poller_is_init = true; @@ -709,7 +722,7 @@ static int validate_env(DB_ENV *env, } // Test for fileops directory - if (r == 0) { + if (r == 0 && force_recovery != 6) { path = toku_construct_full_name( 2, env->i->dir, toku_product_name_strings.fileopsdirectory); assert(path); @@ -752,7 +765,7 @@ static int validate_env(DB_ENV *env, } // Test for recovery log - if ((r == 0) && (env->i->open_flags & DB_INIT_LOG)) { + if ((r == 0) && (env->i->open_flags & DB_INIT_LOG) && force_recovery != 6) { // if using transactions, test for existence of log r = ydb_recover_log_exists(env); // return 0 or ENOENT if (expect_newenv && (r != ENOENT)) @@ -813,6 +826,27 @@ unlock_single_process(DB_ENV *env) { // (The set of necessary files is defined in the function validate_env() above.) static int env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { + + if(force_recovery == 6) { + { + const int len = strlen(toku_product_name_strings.rollback_cachefile); + toku_product_name_strings.rollback_cachefile[len] = '2'; + toku_product_name_strings.rollback_cachefile[len+1] = 0; + } + + { + const int len = strlen(toku_product_name_strings.single_process_lock); + toku_product_name_strings.single_process_lock[len] = '2'; + toku_product_name_strings.single_process_lock[len+1] = 0; + } + + { + const int len = strlen(toku_product_name_strings.environmentdictionary); + toku_product_name_strings.environmentdictionary[len] = '2'; + toku_product_name_strings.environmentdictionary[len+1] = 0; + } + } + HANDLE_PANICKED_ENV(env); int r; bool newenv; // true iff creating a new environment @@ -903,7 +937,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { bool need_rollback_cachefile; need_rollback_cachefile = false; - if (flags & (DB_INIT_TXN | DB_INIT_LOG)) { + if (flags & (DB_INIT_TXN | DB_INIT_LOG) && force_recovery != 6) { need_rollback_cachefile = true; } @@ -916,7 +950,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { r = ydb_maybe_upgrade_env(env, &last_lsn_of_clean_shutdown_read_from_log, &upgrade_in_progress); if (r!=0) goto cleanup; - if (upgrade_in_progress) { + if (upgrade_in_progress || force_recovery == 6) { // Delete old rollback file. There was a clean shutdown, so it has nothing useful, // and there is no value in upgrading it. It is simpler to just create a new one. char* rollback_filename = toku_construct_full_name(2, env->i->dir, toku_product_name_strings.rollback_cachefile); @@ -934,9 +968,13 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { unused_flags &= ~DB_INIT_TXN & ~DB_INIT_LOG; + if(force_recovery == 6) { + flags |= DB_INIT_LOG | DB_INIT_TXN; + } + // do recovery only if there exists a log and recovery is requested // otherwise, a log is created when the logger is opened later - if (!newenv) { + if (!newenv && force_recovery == 0) { if (flags & DB_INIT_LOG) { // the log does exist if (flags & DB_RECOVER) { @@ -1005,7 +1043,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { assert (using_txns); toku_logger_set_cachetable(env->i->logger, env->i->cachetable); if (!toku_logger_rollback_is_open(env->i->logger)) { - bool create_new_rollback_file = newenv | upgrade_in_progress; + bool create_new_rollback_file = newenv | upgrade_in_progress | (force_recovery == 6); r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, create_new_rollback_file); if (r != 0) { r = toku_ydb_do_error(env, r, "Cant open rollback\n"); @@ -1024,6 +1062,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { assert_zero(r); r = toku_db_use_builtin_key_cmp(env->i->persistent_environment); assert_zero(r); + writing_rollback++; r = toku_db_open_iname(env->i->persistent_environment, txn, toku_product_name_strings.environmentdictionary, DB_CREATE, mode); if (r != 0) { r = toku_ydb_do_error(env, r, "Cant open persistent env\n"); @@ -1056,6 +1095,7 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { assert_zero(r); } capture_persistent_env_contents(env, txn); + writing_rollback--; } { r = toku_db_create(&env->i->directory, env, 0); @@ -1074,8 +1114,10 @@ env_open(DB_ENV * env, const char *home, uint32_t flags, int mode) { txn = NULL; } cp = toku_cachetable_get_checkpointer(env->i->cachetable); - r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, STARTUP_CHECKPOINT); - assert_zero(r); + if (!force_recovery) { + r = toku_checkpoint(cp, env->i->logger, NULL, NULL, NULL, NULL, STARTUP_CHECKPOINT); + } + writing_rollback--; env_fs_poller(env); // get the file system state at startup r = env_fs_init_minicron(env); if (r != 0) { diff --git a/storage/tokudb/PerconaFT/src/ydb_db.cc b/storage/tokudb/PerconaFT/src/ydb_db.cc index 40c4a7f6577..ac44b8e7fd3 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.cc +++ b/storage/tokudb/PerconaFT/src/ydb_db.cc @@ -323,6 +323,7 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP // DB_THREAD is implicitly supported and DB_BLACKHOLE is supported at the ft-layer unused_flags &= ~DB_THREAD; unused_flags &= ~DB_BLACKHOLE; + unused_flags &= ~DB_RDONLY; // check for unknown or conflicting flags if (unused_flags) return EINVAL; // unknown flags @@ -404,7 +405,7 @@ int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra) { FT_HANDLE ft_handle = info->ft_handle; FT_HANDLE cloned_ft_handle; - r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn); + r = toku_ft_handle_clone(&cloned_ft_handle, ft_handle, ttxn, info->open_rw); if (r == 0) { assert(lt->get_userdata() == NULL); lt->set_userdata(cloned_ft_handle); @@ -465,6 +466,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t flags&=~DB_READ_COMMITTED; flags&=~DB_SERIALIZABLE; flags&=~DB_IS_HOT_INDEX; + flags&=~DB_RDONLY; // unknown or conflicting flags are bad int unknown_flags = flags & ~DB_THREAD; unknown_flags &= ~DB_BLACKHOLE; @@ -479,11 +481,12 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t db->i->open_flags = flags; db->i->open_mode = mode; + bool open_rw = mode & (S_IWUSR | S_IWOTH | S_IWGRP); FT_HANDLE ft_handle = db->i->ft_handle; int r = toku_ft_handle_open(ft_handle, iname_in_env, is_db_create, is_db_excl, db->dbenv->i->cachetable, - txn ? db_txn_struct_i(txn)->tokutxn : nullptr); + txn ? db_txn_struct_i(txn)->tokutxn : nullptr, open_rw); if (r != 0) { goto out; } @@ -505,6 +508,7 @@ int toku_db_open_iname(DB * db, DB_TXN * txn, const char *iname_in_env, uint32_t struct lt_on_create_callback_extra on_create_extra = { .txn = txn, .ft_handle = db->i->ft_handle, + open_rw }; db->i->lt = db->dbenv->i->ltm.get_lt(db->i->dict_id, toku_ft_get_comparator(db->i->ft_handle), diff --git a/storage/tokudb/PerconaFT/src/ydb_db.h b/storage/tokudb/PerconaFT/src/ydb_db.h index ab8fcd2a401..c260e9d0fbe 100644 --- a/storage/tokudb/PerconaFT/src/ydb_db.h +++ b/storage/tokudb/PerconaFT/src/ydb_db.h @@ -67,6 +67,7 @@ void ydb_db_layer_get_status(YDB_DB_LAYER_STATUS statp); struct lt_on_create_callback_extra { DB_TXN *txn; FT_HANDLE ft_handle; + bool open_rw; }; int toku_db_lt_on_create_callback(toku::locktree *lt, void *extra); void toku_db_lt_on_destroy_callback(toku::locktree *lt); diff --git a/storage/tokudb/PerconaFT/tools/tokuftdump.cc b/storage/tokudb/PerconaFT/tools/tokuftdump.cc index 2838ae5182e..44edb15162a 100644 --- a/storage/tokudb/PerconaFT/tools/tokuftdump.cc +++ b/storage/tokudb/PerconaFT/tools/tokuftdump.cc @@ -181,7 +181,7 @@ static void dump_header(FT ft) { printf(" time_of_creation= %" PRIu64 " %s\n", ft->h->time_of_creation, timestr); format_time(ft->h->time_of_last_modification, timestr); printf(" time_of_last_modification=%" PRIu64 " %s\n", ft->h->time_of_last_modification, timestr); - printf(" dirty=%d\n", ft->h->dirty); + printf(" dirty=%d\n", ft->h->dirty()); printf(" checkpoint_count=%" PRId64 "\n", ft->h->checkpoint_count); printf(" checkpoint_lsn=%" PRId64 "\n", ft->h->checkpoint_lsn.lsn); printf(" nodesize=%u\n", ft->h->nodesize); diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index a4dc9f6e326..39931e747ce 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -1333,7 +1333,7 @@ int ha_tokudb::open_main_dictionary( NULL, DB_BTREE, open_flags, - 0); + S_IWUSR); if (error) { goto exit; } @@ -1396,7 +1396,7 @@ int ha_tokudb::open_secondary_dictionary( } - error = (*ptr)->open(*ptr, txn, newname, NULL, DB_BTREE, open_flags, 0); + error = (*ptr)->open(*ptr, txn, newname, NULL, DB_BTREE, open_flags, S_IWUSR); if (error) { my_errno = error; goto cleanup; diff --git a/storage/tokudb/tokudb_status.h b/storage/tokudb/tokudb_status.h index 5cca54e52c9..07772bdc92a 100644 --- a/storage/tokudb/tokudb_status.h +++ b/storage/tokudb/tokudb_status.h @@ -201,7 +201,7 @@ int create( name, NULL, DB_BTREE, DB_CREATE | DB_EXCL, - 0); + S_IWUSR); } if (error == 0) { *status_db_ptr = status_db; @@ -230,7 +230,7 @@ int open( NULL, DB_BTREE, DB_THREAD, - 0); + S_IWUSR); } if (error == 0) { uint32_t pagesize = 0;