refs #5638, merge to main, finally.

git-svn-id: file:///svn/toku/tokudb@51767 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Zardosht Kasheff 2013-04-17 00:01:26 -04:00 committed by Yoni Fogel
parent e0a0785900
commit ebb5315335
45 changed files with 1781 additions and 852 deletions

View file

@ -65,6 +65,7 @@ set(FT_SOURCES
roll
sub_block
txn
txn_child_manager
txn_manager
ule
x1764

View file

@ -2623,9 +2623,9 @@ int set_filenum_in_array(const FT &ft, const uint32_t index, FILENUM *const arra
return 0;
}
int log_open_txn (const TOKUTXN &txn, const uint32_t UU(index), checkpointer * const cp);
int log_open_txn (const TOKUTXN &txn, const uint32_t UU(index), checkpointer * const cp) {
static int log_open_txn (TOKUTXN txn, void* extra) {
int r;
checkpointer* cp = (checkpointer *)extra;
TOKULOGGER logger = txn->logger;
FILENUMS open_filenums;
uint32_t num_filenums = txn->open_fts.size();
@ -4241,9 +4241,11 @@ void checkpointer::log_begin_checkpoint() {
}
// Write open transactions to the log.
r = toku_txn_manager_iter_over_live_txns<checkpointer, log_open_txn> (
r = toku_txn_manager_iter_over_live_txns(
m_logger->txn_manager,
this);
log_open_txn,
this
);
assert(r == 0);
}

View file

@ -2287,9 +2287,9 @@ ft_leaf_run_gc(FTNODE node, FT ft) {
xid_omt_t live_root_txns;
toku_txn_manager_clone_state_for_gc(
logger->txn_manager,
snapshot_txnids,
referenced_xids,
live_root_txns
&snapshot_txnids,
&referenced_xids,
&live_root_txns
);
// Perform garbage collection. Provide a full snapshot of the transaction
@ -2303,8 +2303,8 @@ ft_leaf_run_gc(FTNODE node, FT ft) {
// Free the OMT's we used for garbage collecting.
snapshot_txnids.destroy();
live_root_txns.destroy();
referenced_xids.destroy();
live_root_txns.destroy();
}
}
@ -3044,7 +3044,7 @@ void toku_ft_load_recovery(TOKUTXN txn, FILENUM old_filenum, char const * new_in
BYTESTRING new_iname_bs = {.len=(uint32_t) strlen(new_iname), .data=(char*)new_iname};
toku_logger_save_rollback_load(txn, old_filenum, &new_iname_bs);
if (do_log && logger) {
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
toku_log_load(logger, load_lsn, do_fsync, txn, xid, old_filenum, new_iname_bs);
}
}
@ -3061,7 +3061,7 @@ void toku_ft_hot_index_recovery(TOKUTXN txn, FILENUMS filenums, int do_fsync, in
// write to the rollback log
toku_logger_save_rollback_hot_index(txn, &filenums);
if (do_log && logger) {
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
// write to the recovery log
toku_log_hot_index(logger, hot_index_lsn, do_fsync, txn, xid, filenums);
}
@ -3111,7 +3111,7 @@ toku_ft_log_put (TOKUTXN txn, FT_HANDLE brt, const DBT *key, const DBT *val) {
if (logger) {
BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
BYTESTRING valbs = {.len=val->size, .data=(char *) val->data};
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
toku_log_enq_insert(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(brt->ft->cf), xid, keybs, valbs);
}
}
@ -3130,7 +3130,7 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
FILENUMS filenums = {.num = num_fts, .filenums = fnums};
BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
BYTESTRING valbs = {.len=val->size, .data=(char *) val->data};
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE;
toku_log_enq_insert_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs);
}
@ -3139,7 +3139,7 @@ toku_ft_log_put_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
void toku_ft_maybe_insert (FT_HANDLE ft_h, DBT *key, DBT *val, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging, enum ft_msg_type type) {
paranoid_invariant(type==FT_INSERT || type==FT_INSERT_NO_OVERWRITE);
XIDS message_xids = xids_get_root_xids(); //By default use committed messages
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
if (txn) {
BYTESTRING keybs = {key->size, (char *) key->data};
toku_logger_save_rollback_cmdinsert(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
@ -3180,7 +3180,7 @@ ft_send_update_msg(FT_HANDLE brt, FT_MSG_S *msg, TOKUTXN txn) {
void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_function_extra,
TOKUTXN txn, bool oplsn_valid, LSN oplsn,
bool do_logging) {
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
if (txn) {
BYTESTRING keybs = { key->size, (char *) key->data };
toku_logger_save_rollback_cmdupdate(
@ -3212,7 +3212,7 @@ void toku_ft_maybe_update(FT_HANDLE ft_h, const DBT *key, const DBT *update_func
void toku_ft_maybe_update_broadcast(FT_HANDLE ft_h, const DBT *update_function_extra,
TOKUTXN txn, bool oplsn_valid, LSN oplsn,
bool do_logging, bool is_resetting_op) {
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
uint8_t resetting = is_resetting_op ? 1 : 0;
if (txn) {
toku_logger_save_rollback_cmdupdatebroadcast(txn, toku_cachefile_filenum(ft_h->ft->cf), resetting);
@ -3263,7 +3263,7 @@ toku_ft_log_del(TOKUTXN txn, FT_HANDLE brt, const DBT *key) {
TOKULOGGER logger = toku_txn_logger(txn);
if (logger) {
BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
toku_log_enq_delete_any(logger, (LSN*)0, 0, txn, toku_cachefile_filenum(brt->ft->cf), xid, keybs);
}
}
@ -3282,7 +3282,7 @@ toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
FILENUMS filenums = {.num = num_fts, .filenums = fnums};
BYTESTRING keybs = {.len=key->size, .data=(char *) key->data};
BYTESTRING valbs = {.len=val->size, .data=(char *) val->data};
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
FILENUM src_filenum = src_ft ? toku_cachefile_filenum(src_ft->ft->cf) : FILENUM_NONE;
toku_log_enq_delete_multiple(logger, (LSN*)0, 0, txn, src_filenum, filenums, xid, keybs, valbs);
}
@ -3290,7 +3290,7 @@ toku_ft_log_del_multiple (TOKUTXN txn, FT_HANDLE src_ft, FT_HANDLE *brts, uint32
void toku_ft_maybe_delete(FT_HANDLE ft_h, DBT *key, TOKUTXN txn, bool oplsn_valid, LSN oplsn, bool do_logging) {
XIDS message_xids = xids_get_root_xids(); //By default use committed messages
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
if (txn) {
BYTESTRING keybs = {key->size, (char *) key->data};
toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(ft_h->ft->cf), &keybs);
@ -3492,7 +3492,7 @@ void toku_ft_change_descriptor(
if (do_log) {
TOKULOGGER logger = toku_txn_logger(txn);
TXNID xid = toku_txn_get_txnid(txn);
TXNID_PAIR xid = toku_txn_get_txnid(txn);
toku_log_change_fdescriptor(
logger, NULL, 0,
txn,
@ -3905,7 +3905,7 @@ static int
does_txn_read_entry(TXNID id, TOKUTXN context) {
int rval;
TXNID oldest_live_in_snapshot = toku_get_oldest_in_live_root_txn_list(context);
if (id < oldest_live_in_snapshot || id == context->ancestor_txnid64) {
if (id < oldest_live_in_snapshot || id == context->txnid.parent_id64) {
rval = TOKUDB_ACCEPT;
}
else if (id > context->snapshot_txnid64 || toku_is_txn_in_live_root_txn_list(*context->live_root_txn_list, id)) {
@ -5811,6 +5811,7 @@ int toku_ft_layer_init(void) {
partitioned_counters_init();
status_init();
txn_status_init();
toku_checkpoint_init();
toku_ft_serialize_layer_init();
toku_mutex_init(&ft_open_close_lock, NULL);
@ -5823,6 +5824,7 @@ void toku_ft_layer_destroy(void) {
toku_ft_serialize_layer_destroy();
toku_checkpoint_destroy();
status_destroy();
txn_status_destroy();
partitioned_counters_destroy();
//Portability must be cleaned up last
toku_portability_destroy();

View file

@ -369,6 +369,7 @@ serialize_ft_min_size (uint32_t version) {
size_t size = 0;
switch(version) {
case FT_LAYOUT_VERSION_23:
case FT_LAYOUT_VERSION_22:
case FT_LAYOUT_VERSION_21:
size += sizeof(MSN); // max_msn_in_ft

View file

@ -360,7 +360,7 @@ void toku_ft_create(FT *ftp, FT_OPTIONS options, CACHEFILE cf, TOKUTXN txn) {
memset(&ft->descriptor, 0, sizeof(ft->descriptor));
memset(&ft->cmp_descriptor, 0, sizeof(ft->cmp_descriptor));
ft->h = ft_header_create(options, make_blocknum(0), (txn ? txn->ancestor_txnid64 : TXNID_NONE));
ft->h = ft_header_create(options, make_blocknum(0), (txn ? txn->txnid.parent_id64: TXNID_NONE));
toku_ft_init_reflock(ft);

View file

@ -31,6 +31,7 @@ enum ft_layout_version_e {
FT_LAYOUT_VERSION_21 = 21, // Ming: Add max_msn_in_ft to header,
// Removed log suppression logentry
FT_LAYOUT_VERSION_22 = 22, // Ming: Add oldest known referenced xid to each ftnode, for better garbage collection
FT_LAYOUT_VERSION_23 = 23, // Riddler: change logentries that log transactions to store TXNID_PAIRs instead of TXNIDs
FT_NEXT_VERSION, // the version after the current version
FT_LAYOUT_VERSION = FT_NEXT_VERSION-1, // A hack so I don't have to change this line.
FT_LAYOUT_MIN_SUPPORTED_VERSION = FT_LAYOUT_VERSION_13, // Minimum version supported

View file

@ -2579,7 +2579,7 @@ toku_db_badformat(void) {
static size_t
serialize_rollback_log_size(ROLLBACK_LOG_NODE log) {
size_t size = node_header_overhead //8 "tokuroll", 4 version, 4 version_original, 4 build_id
+8 //TXNID
+16 //TXNID_PAIR
+8 //sequence
+8 //blocknum
+8 //previous (blocknum)
@ -2599,7 +2599,7 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc
wbuf_nocrc_int(&wb, log->layout_version);
wbuf_nocrc_int(&wb, log->layout_version_original);
wbuf_nocrc_uint(&wb, BUILD_ID);
wbuf_nocrc_TXNID(&wb, log->txnid);
wbuf_nocrc_TXNID_PAIR(&wb, log->txnid);
wbuf_nocrc_ulonglong(&wb, log->sequence);
wbuf_nocrc_BLOCKNUM(&wb, log->blocknum);
wbuf_nocrc_BLOCKNUM(&wb, log->previous);
@ -2752,7 +2752,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, uint32_t fullhash, ROLLBA
result->dirty = false;
//TODO: Maybe add descriptor (or just descriptor version) here eventually?
//TODO: This is hard.. everything is shared in a single dictionary.
rbuf_TXNID(rb, &result->txnid);
rbuf_TXNID_PAIR(rb, &result->txnid);
result->sequence = rbuf_ulonglong(rb);
result->blocknum = rbuf_blocknum(rb);
if (result->blocknum.b != blocknum.b) {

View file

@ -480,7 +480,7 @@ int toku_ft_loader_internal_init (/* out */ FTLOADER *blp,
bl->N = N;
bl->load_lsn = load_lsn;
if (txn) {
bl->load_root_xid = txn->ancestor_txnid64;
bl->load_root_xid = txn->txnid.parent_id64;
}
else {
bl->load_root_xid = TXNID_NONE;

View file

@ -38,9 +38,18 @@ typedef const void *bytevec;
typedef int64_t DISKOFF; /* Offset in a disk. -1 is the NULL pointer. */
typedef uint64_t TXNID;
typedef struct txnid_pair_s {
TXNID parent_id64;
TXNID child_id64;
} TXNID_PAIR;
#define TXNID_NONE_LIVING ((TXNID)0)
#define TXNID_NONE ((TXNID)0)
static const TXNID_PAIR TXNID_PAIR_NONE = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE };
typedef struct blocknum_s { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems.
typedef struct gid_s { uint8_t *gid; } GID; // the gid is of size [DB_GID_SIZE]
typedef TOKU_XA_XID *XIDP; // this is the type that's passed to the logger code (so that we don't have to copy all 152 bytes when only a subset are even valid.)

View file

@ -21,6 +21,7 @@
#include <portability/toku_pthread.h>
#include <util/omt.h>
#include "rollback_log_node_cache.h"
#include "txn_child_manager.h"
using namespace toku;
// Locking for the logger
@ -139,14 +140,29 @@ struct txn_roll_info {
struct tokutxn {
// These don't change after create:
const uint64_t txnid64; // this happens to be the first lsn
const uint64_t ancestor_txnid64; // this is the lsn of root transaction
const uint64_t snapshot_txnid64; // this is the lsn of the snapshot
TXNID_PAIR txnid;
uint64_t snapshot_txnid64; // this is the lsn of the snapshot
const TXN_SNAPSHOT_TYPE snapshot_type;
const bool for_recovery;
const TOKULOGGER logger;
const TOKUTXN parent;
// These don't either but they're created in a way that's hard to make
// The child txn is protected by the child_txn_manager lock
// and by the user contract. The user contract states (and is
// enforced at the ydb layer) that a child txn should not be created
// while another child exists. The txn_child_manager will protect
// other threads from trying to read this value while another
// thread commits/aborts the child
TOKUTXN child;
// statically allocated child manager, if this
// txn is a root txn, this manager will be used and set to
// child_manager for this transaction and all of its children
txn_child_manager child_manager_s;
// child manager for this transaction, all of its children,
// and all of its ancestors
txn_child_manager* child_manager;
// These don't change but they're created in a way that's hard to make
// strictly const.
DB_TXN *container_db_txn; // reference to DB_TXN that contains this tokutxn
xid_omt_t *live_root_txn_list; // the root txns live when the root ancestor (self if a root) started.
@ -170,9 +186,14 @@ struct tokutxn {
omt<FT> open_fts; // a collection of the fts that we touched. Indexed by filenum.
struct txn_roll_info roll_info; // Info used to manage rollback entries
// Protected by the txn manager lock:
// mutex that protects the transition of the state variable
// the rest of the variables are used by the txn code and
// hot indexing to ensure that when hot indexing is processing a
// leafentry, a TOKUTXN cannot dissappear or change state out from
// underneath it
toku_mutex_t state_lock;
toku_cond_t state_cond;
TOKUTXN_STATE state;
struct toku_list prepared_txns_link; // list of prepared transactions
uint32_t num_pin; // number of threads (all hot indexes) that want this
// txn to not transition to commit or abort
};
@ -234,6 +255,10 @@ static inline int toku_logsizeof_TXNID (TXNID txnid __attribute__((__unused__)))
return 8;
}
static inline int toku_logsizeof_TXNID_PAIR (TXNID_PAIR txnid __attribute__((__unused__))) {
return 16;
}
static inline int toku_logsizeof_XIDP (XIDP xid) {
assert(0<=xid->gtrid_length && xid->gtrid_length<=64);
assert(0<=xid->bqual_length && xid->bqual_length<=64);

View file

@ -31,6 +31,7 @@ static inline int toku_copy_BYTESTRING(BYTESTRING *target, BYTESTRING val) {
return 0;
}
static inline void toku_free_TXNID(TXNID txnid __attribute__((__unused__))) {}
static inline void toku_free_TXNID_PAIR(TXNID_PAIR txnid __attribute__((__unused__))) {}
static inline void toku_free_LSN(LSN lsn __attribute__((__unused__))) {}
static inline void toku_free_uint64_t(uint64_t u __attribute__((__unused__))) {}
static inline void toku_free_uint32_t(uint32_t u __attribute__((__unused__))) {}

View file

@ -67,7 +67,7 @@ const struct logtype rollbacks[] = {
{"FILENUM", "filenum", 0},
{"BYTESTRING", "key", 0},
NULLFIELD}, LOG_BEGIN_ACTION_NA},
{"rollinclude", 'r', FA{{"TXNID", "xid", 0},
{"rollinclude", 'r', FA{{"TXNID_PAIR", "xid", 0},
{"uint64_t", "num_nodes", 0},
{"BLOCKNUM", "spilled_head", 0},
{"uint32_t", "spilled_head_hash", 0},
@ -114,8 +114,8 @@ const struct logtype logtypes[] = {
NULLFIELD}, IGNORE_LOG_BEGIN},
//We do not use a TXNINFO struct since recovery log has
//FILENUMS and TOKUTXN has FTs (for open_fts)
{"xstillopen", 's', FA{{"TXNID", "xid", 0},
{"TXNID", "parentxid", 0},
{"xstillopen", 's', FA{{"TXNID_PAIR", "xid", 0},
{"TXNID_PAIR", "parentxid", 0},
{"uint64_t", "rollentry_raw_count", 0},
{"FILENUMS", "open_filenums", 0},
{"uint8_t", "force_fsync_on_commit", 0},
@ -126,7 +126,7 @@ const struct logtype logtypes[] = {
{"BLOCKNUM", "current_rollback", 0},
NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions
// prepared txns need a gid
{"xstillopenprepared", 'p', FA{{"TXNID", "xid", 0},
{"xstillopenprepared", 'p', FA{{"TXNID_PAIR", "xid", 0},
{"XIDP", "xa_xid", 0}, // prepared transactions need a gid, and have no parentxid.
{"uint64_t", "rollentry_raw_count", 0},
{"FILENUMS", "open_filenums", 0},
@ -138,12 +138,12 @@ const struct logtype logtypes[] = {
{"BLOCKNUM", "current_rollback", 0},
NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED}, // record all transactions
// Records produced by transactions
{"xbegin", 'b', FA{{"TXNID", "xid", 0},{"TXNID", "parentxid", 0},NULLFIELD}, IGNORE_LOG_BEGIN},
{"xcommit",'C', FA{{"TXNID", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
{"xprepare",'P', FA{{"TXNID", "xid", 0}, {"XIDP", "xa_xid", 0}, NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
{"xabort", 'q', FA{{"TXNID", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
{"xbegin", 'b', FA{{"TXNID_PAIR", "xid", 0},{"TXNID_PAIR", "parentxid", 0},NULLFIELD}, IGNORE_LOG_BEGIN},
{"xcommit",'C', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
{"xprepare",'P', FA{{"TXNID_PAIR", "xid", 0}, {"XIDP", "xa_xid", 0}, NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
{"xabort", 'q', FA{{"TXNID_PAIR", "xid", 0},NULLFIELD}, ASSERT_BEGIN_WAS_LOGGED},
//TODO: #2037 Add dname
{"fcreate", 'F', FA{{"TXNID", "xid", 0},
{"fcreate", 'F', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUM", "filenum", 0},
{"BYTESTRING", "iname", 0},
{"uint32_t", "mode", "0%o"},
@ -162,32 +162,32 @@ const struct logtype logtypes[] = {
{"FILENUM", "filenum", 0},
NULLFIELD}, IGNORE_LOG_BEGIN},
//TODO: #2037 Add dname
{"fdelete", 'U', FA{{"TXNID", "xid", 0},
{"fdelete", 'U', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUM", "filenum", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "value", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "value", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_delete_any", 'E', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_insert_multiple", 'm', FA{{"FILENUM", "src_filenum", 0},
{"FILENUMS", "dest_filenums", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "src_key", 0},
{"BYTESTRING", "src_val", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_delete_multiple", 'M', FA{{"FILENUM", "src_filenum", 0},
{"FILENUMS", "dest_filenums", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "src_key", 0},
{"BYTESTRING", "src_val", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
@ -211,26 +211,26 @@ const struct logtype logtypes[] = {
{"shutdown", '0', FA{{"uint64_t", "timestamp", 0},
{"TXNID", "last_xid", 0},
NULLFIELD}, IGNORE_LOG_BEGIN},
{"load", 'l', FA{{"TXNID", "xid", 0},
{"load", 'l', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUM", "old_filenum", 0},
{"BYTESTRING", "new_iname", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
// #2954
{"hot_index", 'h', FA{{"TXNID", "xid", 0},
{"hot_index", 'h', FA{{"TXNID_PAIR", "xid", 0},
{"FILENUMS", "hot_index_filenums", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_update", 'u', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "key", 0},
{"BYTESTRING", "extra", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"enq_updatebroadcast", 'B', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "extra", 0},
{"bool", "is_resetting_op", 0},
NULLFIELD}, SHOULD_LOG_BEGIN},
{"change_fdescriptor", 'D', FA{{"FILENUM", "filenum", 0},
{"TXNID", "xid", 0},
{"TXNID_PAIR", "xid", 0},
{"BYTESTRING", "old_descriptor", 0},
{"BYTESTRING", "new_descriptor", 0},
{"bool", "update_cmp_descriptor", 0},

View file

@ -289,7 +289,7 @@ is_closed:
void toku_logger_shutdown(TOKULOGGER logger) {
if (logger->is_open) {
TXN_MANAGER mgr = logger->txn_manager;
if (toku_txn_manager_num_live_txns(mgr) == 0) {
if (toku_txn_manager_num_live_root_txns(mgr) == 0) {
TXNID last_xid = toku_txn_manager_get_last_xid(mgr);
toku_log_shutdown(logger, NULL, true, 0, last_xid);
}
@ -718,6 +718,13 @@ void toku_logger_trim_log_files (TOKULOGGER logger, bool trim_log_files)
logger->trim_log_files = trim_log_files;
}
bool toku_logger_txns_exist(TOKULOGGER logger)
// Called during close of environment to ensure that transactions don't exist
{
return toku_txn_manager_txns_exist(logger->txn_manager);
}
void toku_logger_maybe_fsync(TOKULOGGER logger, LSN lsn, int do_fsync)
// Effect: If fsync is nonzero, then make sure that the log is flushed and synced at least up to lsn.
// Entry: Holds input lock. The log entry has already been written to the input buffer.
@ -918,6 +925,17 @@ int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t
return toku_fread_uint64_t (f, txnid, checksum, len);
}
int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len) {
TXNID parent;
TXNID child;
toku_fread_TXNID(f, &parent, checksum, len);
toku_fread_TXNID(f, &child, checksum, len);
txnid->parent_id64 = parent;
txnid->child_id64 = child;
return 0;
}
int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len) {
// These reads are verbose because XA defined the fields as "long", but we use 4 bytes, 1 byte and 1 byte respectively.
TOKU_XA_XID *XMALLOC(xid);
@ -999,6 +1017,14 @@ int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1
return 0;
}
int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) {
TXNID_PAIR v;
int r = toku_fread_TXNID_PAIR(inf, &v, checksum, len);
if (r!=0) return r;
fprintf(outf, " %s=%" PRIu64 "%" PRIu64, fieldname, v.parent_id64, v.child_id64);
return 0;
}
int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__))) {
XIDP vp;
int r = toku_fread_XIDP(inf, &vp, checksum, len);
@ -1145,9 +1171,10 @@ int toku_read_logmagic (FILE *f, uint32_t *versionp) {
return 0;
}
TXNID toku_txn_get_txnid (TOKUTXN txn) {
if (txn==0) return 0;
else return txn->txnid64;
TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn) {
TXNID_PAIR tp = { .parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE};
if (txn==0) return tp;
else return txn->txnid;
}
LSN toku_logger_last_lsn(TOKULOGGER logger) {
@ -1158,8 +1185,19 @@ TOKULOGGER toku_txn_logger (TOKUTXN txn) {
return txn ? txn->logger : 0;
}
void toku_txnid2txn(TOKULOGGER logger, TXNID txnid, TOKUTXN *result) {
toku_txn_manager_id2txn(logger->txn_manager, txnid, result);
void toku_txnid2txn(TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result) {
TOKUTXN root_txn = NULL;
toku_txn_manager_suspend(logger->txn_manager);
toku_txn_manager_id2txn_unlocked(logger->txn_manager, txnid, &root_txn);
if (root_txn == NULL || root_txn->txnid.child_id64 == txnid.child_id64) {
*result = root_txn;
}
else if (root_txn != NULL) {
root_txn->child_manager->suspend();
root_txn->child_manager->find_tokutxn_by_xid_unlocked(txnid, result);
root_txn->child_manager->resume();
}
toku_txn_manager_resume(logger->txn_manager);
}
// Find the earliest LSN in a log. No locks are needed.

View file

@ -40,6 +40,7 @@ int toku_logger_set_lg_bsize(TOKULOGGER logger, uint32_t bsize);
void toku_logger_write_log_files (TOKULOGGER logger, bool write_log_files);
void toku_logger_trim_log_files(TOKULOGGER logger, bool trim_log_files);
bool toku_logger_txns_exist(TOKULOGGER logger);
// Restart the logger. This function is used by recovery to really start
// logging.
@ -66,12 +67,14 @@ int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, uint32_t *len
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, uint32_t *len);
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, uint32_t *len);
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, uint32_t *len);
int toku_fread_TXNID_PAIR (FILE *f, TXNID_PAIR *txnid, struct x1764 *checksum, uint32_t *len);
int toku_fread_XIDP (FILE *f, XIDP *xidp, struct x1764 *checksum, uint32_t *len);
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, uint32_t *len);
int toku_fread_FILENUMS (FILE *f, FILENUMS *fs, struct x1764 *checksum, uint32_t *len);
int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_TXNID_PAIR (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_XIDP (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format __attribute__((__unused__)));
int toku_logprint_uint8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
int toku_logprint_uint32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, uint32_t *len, const char *format);
@ -85,11 +88,11 @@ int toku_logprint_FILENUMS (FILE *outf, FILE *inf, const char *fieldname, struct
int toku_read_and_print_logmagic (FILE *f, uint32_t *versionp);
int toku_read_logmagic (FILE *f, uint32_t *versionp);
TXNID toku_txn_get_txnid (TOKUTXN txn);
TXNID_PAIR toku_txn_get_txnid (TOKUTXN txn);
LSN toku_logger_last_lsn(TOKULOGGER logger);
TOKULOGGER toku_txn_logger (TOKUTXN txn);
void toku_txnid2txn (TOKULOGGER logger, TXNID txnid, TOKUTXN *result);
void toku_txnid2txn (TOKULOGGER logger, TXNID_PAIR txnid, TOKUTXN *result);
int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags);

View file

@ -130,10 +130,20 @@ static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((
static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) {
*txnid = rbuf_ulonglong(r);
}
static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) {
txnid->parent_id64 = rbuf_ulonglong(r);
txnid->child_id64 = rbuf_ulonglong(r);
}
static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) {
rbuf_TXNID(r, txnid);
}
static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
rbuf_TXNID_PAIR(r, txnid);
}
static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) {
filenum->fileid = rbuf_int(r);
}

View file

@ -460,15 +460,18 @@ static int toku_recover_backward_fassociate (struct logtype_fassociate *UU(l), R
}
static int
recover_transaction(TOKUTXN *txnp, TXNID xid, TXNID parentxid, TOKULOGGER logger) {
recover_transaction(TOKUTXN *txnp, TXNID_PAIR xid, TXNID_PAIR parentxid, TOKULOGGER logger) {
int r;
// lookup the parent
TOKUTXN parent = NULL;
if (parentxid != TXNID_NONE) {
if (!txn_pair_is_none(parentxid)) {
toku_txnid2txn(logger, parentxid, &parent);
assert(parent!=NULL);
}
else {
invariant(xid.child_id64 == TXNID_NONE);
}
// create a transaction and bind it to the transaction id
TOKUTXN txn = NULL;
@ -488,8 +491,8 @@ recover_transaction(TOKUTXN *txnp, TXNID xid, TXNID parentxid, TOKULOGGER logger
static int recover_xstillopen_internal (TOKUTXN *txnp,
LSN UU(lsn),
TXNID xid,
TXNID parentxid,
TXNID_PAIR xid,
TXNID_PAIR parentxid,
uint64_t rollentry_raw_count,
FILENUMS open_filenums,
bool force_fsync_on_commit,
@ -507,7 +510,7 @@ static int recover_xstillopen_internal (TOKUTXN *txnp,
case FORWARD_BETWEEN_CHECKPOINT_BEGIN_END: {
renv->ss.checkpoint_num_xstillopen++;
invariant(renv->ss.last_xid != TXNID_NONE);
invariant(xid <= renv->ss.last_xid);
invariant(xid.parent_id64 <= renv->ss.last_xid);
TOKUTXN txn = NULL;
{ //Create the transaction.
r = recover_transaction(&txn, xid, parentxid, renv->logger);
@ -588,7 +591,7 @@ static int toku_recover_xstillopenprepared (struct logtype_xstillopenprepared *l
int r = recover_xstillopen_internal (&txn,
l->lsn,
l->xid,
(TXNID)0,
TXNID_PAIR_NONE,
l->rollentry_raw_count,
l->open_filenums,
l->force_fsync_on_commit,
@ -1171,15 +1174,13 @@ int tokudb_needs_recovery(const char *log_dir, bool ignore_log_empty) {
}
static uint32_t recover_get_num_live_txns(RECOVER_ENV renv) {
return toku_txn_manager_num_live_txns(renv->logger->txn_manager);
return toku_txn_manager_num_live_root_txns(renv->logger->txn_manager);
}
// template-only function, but must be extern
int is_txn_unprepared(const TOKUTXN &txn, const uint32_t UU(index), TOKUTXN *const extra)
__attribute__((nonnull(3)));
int is_txn_unprepared(const TOKUTXN &txn, const uint32_t UU(index), TOKUTXN *const extra) {
static int is_txn_unprepared(TOKUTXN txn, void* extra) {
TOKUTXN* ptxn = (TOKUTXN *)extra;
if (txn->state != TOKUTXN_PREPARING) {
*extra = txn;
*ptxn = txn;
return -1; // return -1 to get iterator to return
}
return 0;
@ -1188,8 +1189,9 @@ int is_txn_unprepared(const TOKUTXN &txn, const uint32_t UU(index), TOKUTXN *con
static int find_an_unprepared_txn (RECOVER_ENV renv, TOKUTXN *txnp) {
TOKUTXN txn = nullptr;
int r = toku_txn_manager_iter_over_live_txns<TOKUTXN, is_txn_unprepared>(
int r = toku_txn_manager_iter_over_live_root_txns(
renv->logger->txn_manager,
is_txn_unprepared,
&txn
);
assert(r == 0 || r == -1);
@ -1200,26 +1202,36 @@ static int find_an_unprepared_txn (RECOVER_ENV renv, TOKUTXN *txnp) {
return DB_NOTFOUND;
}
// template-only function, but must be extern
int call_prepare_txn_callback_iter(const TOKUTXN &txn, const uint32_t UU(index), RECOVER_ENV *const renv)
__attribute__((nonnull(3)));
int call_prepare_txn_callback_iter(const TOKUTXN &txn, const uint32_t UU(index), RECOVER_ENV *const renv) {
static int call_prepare_txn_callback_iter(TOKUTXN txn, void* extra) {
RECOVER_ENV* renv = (RECOVER_ENV *)extra;
invariant(txn->state == TOKUTXN_PREPARING);
invariant(txn->child == NULL);
(*renv)->prepared_txn_callback((*renv)->env, txn);
return 0;
}
static void recover_abort_live_txn(TOKUTXN txn) {
// recursively abort all children first
if (txn->child != NULL) {
recover_abort_live_txn(txn->child);
}
// sanity check that the recursive call successfully NULLs out txn->child
invariant(txn->child == NULL);
// abort the transaction
int r = toku_txn_abort_txn(txn, NULL, NULL);
assert(r == 0);
// close the transaction
toku_txn_close_txn(txn);
}
// abort all of the remaining live transactions in descending transaction id order
static void recover_abort_live_txns(RECOVER_ENV renv) {
static void recover_abort_all_live_txns(RECOVER_ENV renv) {
while (1) {
TOKUTXN txn;
int r = find_an_unprepared_txn(renv, &txn);
if (r==0) {
// abort the transaction
r = toku_txn_abort_txn(txn, NULL, NULL);
assert(r == 0);
// close the transaction
toku_txn_close_txn(txn);
recover_abort_live_txn(txn);
} else if (r==DB_NOTFOUND) {
break;
} else {
@ -1228,8 +1240,9 @@ static void recover_abort_live_txns(RECOVER_ENV renv) {
}
// Now we have only prepared txns. These prepared txns don't have full DB_TXNs in them, so we need to make some.
int r = toku_txn_manager_iter_over_live_txns<RECOVER_ENV, call_prepare_txn_callback_iter>(
int r = toku_txn_manager_iter_over_live_root_txns(
renv->logger->txn_manager,
call_prepare_txn_callback_iter,
&renv
);
assert_zero(r);
@ -1415,7 +1428,7 @@ static int do_recovery(RECOVER_ENV renv, const char *env_dir, const char *log_di
fprintf(stderr, "%.24s Tokudb recovery has %" PRIu32 " live transaction%s\n", ctime(&tnow), n, n > 1 ? "s" : "");
}
}
recover_abort_live_txns(renv);
recover_abort_all_live_txns(renv);
{
uint32_t n = recover_get_num_live_txns(renv);
if (n > 0) {

View file

@ -278,7 +278,7 @@ toku_rollback_cmddelete (FILENUM filenum,
}
static int
toku_apply_rollinclude (TXNID xid,
toku_apply_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash __attribute__((__unused__)),
@ -334,7 +334,7 @@ toku_apply_rollinclude (TXNID xid,
}
int
toku_commit_rollinclude (TXNID xid,
toku_commit_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash,
@ -352,7 +352,7 @@ toku_commit_rollinclude (TXNID xid,
}
int
toku_rollback_rollinclude (TXNID xid,
toku_rollback_rollinclude (TXNID_PAIR xid,
uint64_t num_nodes,
BLOCKNUM spilled_head,
uint32_t spilled_head_hash,

View file

@ -77,7 +77,7 @@ apply_txn(TOKUTXN txn, LSN lsn, apply_rollback_item func) {
ROLLBACK_LOG_NODE log;
//pin log
toku_get_and_pin_rollback_log(txn, next_log, next_log_hash, &log);
toku_rollback_verify_contents(log, txn->txnid64, last_sequence - 1);
toku_rollback_verify_contents(log, txn->txnid, last_sequence - 1);
toku_maybe_prefetch_previous_rollback_log(txn, log);
@ -142,7 +142,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
if (txn_has_current_rollback_log(txn)) {
num_nodes--; //Don't count the in-progress rollback log.
}
toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid64, num_nodes,
toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid, num_nodes,
txn->roll_info.spilled_rollback_head, txn->roll_info.spilled_rollback_head_hash,
txn->roll_info.spilled_rollback_tail, txn->roll_info.spilled_rollback_tail_hash);
//Remove ownership from child.
@ -164,7 +164,7 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
ROLLBACK_LOG_NODE child_log;
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback,
txn->roll_info.current_rollback_hash, &child_log);
toku_rollback_verify_contents(child_log, txn->txnid64, txn->roll_info.num_rollback_nodes - 1);
toku_rollback_verify_contents(child_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1);
// Append the list to the front of the parent.
if (child_log->oldest_logentry) {

View file

@ -79,7 +79,8 @@ static void toku_rollback_node_save_ct_pair(CACHEKEY UU(key), void *value_data,
void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
// Having a txnid set to TXNID_NONE is how we determine if the
// rollback log node is empty or in use.
log->txnid = TXNID_NONE;
log->txnid.parent_id64 = TXNID_NONE;
log->txnid.child_id64 = TXNID_NONE;
log->layout_version = FT_LAYOUT_VERSION;
log->layout_version_original = FT_LAYOUT_VERSION;
@ -103,7 +104,7 @@ static void rollback_initialize_for_txn(
uint32_t previous_hash
)
{
log->txnid = txn->txnid64;
log->txnid = txn->txnid;
log->sequence = txn->roll_info.num_rollback_nodes++;
log->previous = previous;
log->previous_hash = previous_hash;
@ -241,9 +242,10 @@ void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE lo
}
void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log,
TXNID txnid, uint64_t sequence)
TXNID_PAIR txnid, uint64_t sequence)
{
assert(log->txnid == txnid);
assert(log->txnid.parent_id64 == txnid.parent_id64);
assert(log->txnid.child_id64 == txnid.child_id64);
assert(log->sequence == sequence);
}
@ -273,7 +275,7 @@ void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE
invariant(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING); // hot indexing may call this function for prepared transactions
if (txn_has_current_rollback_log(txn)) {
toku_get_and_pin_rollback_log(txn, txn->roll_info.current_rollback, txn->roll_info.current_rollback_hash, &pinned_log);
toku_rollback_verify_contents(pinned_log, txn->txnid64, txn->roll_info.num_rollback_nodes - 1);
toku_rollback_verify_contents(pinned_log, txn->txnid, txn->roll_info.num_rollback_nodes - 1);
} else {
// For each transaction, we try to acquire the first rollback log
// from the rollback log node cache, so that we avoid
@ -302,7 +304,8 @@ void toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE
rollback_log_create(txn, txn->roll_info.spilled_rollback_tail, txn->roll_info.spilled_rollback_tail_hash, &pinned_log);
}
}
assert(pinned_log->txnid == txn->txnid64);
assert(pinned_log->txnid.parent_id64 == txn->txnid.parent_id64);
assert(pinned_log->txnid.child_id64 == txn->txnid.child_id64);
assert(pinned_log->blocknum.b != ROLLBACK_NONE.b);
*log = pinned_log;
}

View file

@ -25,7 +25,7 @@ void toku_get_and_pin_rollback_log(TOKUTXN txn, BLOCKNUM blocknum, uint32_t hash
void toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log);
// assert that the given log's txnid and sequence match the ones given
void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, TXNID txnid, uint64_t sequence);
void toku_rollback_verify_contents(ROLLBACK_LOG_NODE log, TXNID_PAIR txnid, uint64_t sequence);
// if there is a previous rollback log for the given log node, prefetch it
void toku_maybe_prefetch_previous_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log);
@ -65,7 +65,7 @@ struct rollback_log_node {
uint32_t build_id; // build_id (svn rev number) of software that wrote this node to disk
int dirty;
// to which transaction does this node belong?
TXNID txnid;
TXNID_PAIR txnid;
// sequentially, where in the rollback log chain is this node?
// the sequence is between 0 and totalnodes-1
uint64_t sequence;
@ -105,7 +105,7 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log);
void make_rollback_log_empty(ROLLBACK_LOG_NODE log);
static inline bool rollback_log_is_unused(ROLLBACK_LOG_NODE log) {
return (log->txnid == TXNID_NONE);
return (log->txnid.parent_id64 == TXNID_NONE);
}

View file

@ -20,12 +20,15 @@ static void test_5123(void) {
test_setup(&logger, &ct);
int r;
TXNID_PAIR one = {.parent_id64 = (TXNID)1, TXNID_NONE};
TXNID_PAIR two = {.parent_id64 = (TXNID)2, TXNID_NONE};
TXNID_PAIR three = {.parent_id64 = (TXNID)3, TXNID_NONE};
toku_log_xbegin(logger, NULL, false, (TXNID) 1, TXNID_NONE);
toku_log_xbegin(logger, NULL, false, (TXNID) 3, TXNID_NONE);
toku_log_xbegin(logger, NULL, false, (TXNID) 2, TXNID_NONE);
toku_log_xbegin(logger, NULL, false, one, TXNID_PAIR_NONE);
toku_log_xbegin(logger, NULL, false, three, TXNID_PAIR_NONE);
toku_log_xbegin(logger, NULL, false, two, TXNID_PAIR_NONE);
toku_log_xcommit(logger, NULL, false, NULL, (TXNID) 2);
toku_log_xcommit(logger, NULL, false, NULL, two);
toku_logger_close_rollback(logger);

View file

@ -0,0 +1,129 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: test-leafentry-nested.cc 49851 2012-11-12 00:43:22Z esmet $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#include <toku_portability.h>
#include <string.h>
#include "test.h"
#include "fttypes.h"
#include "ule.h"
#include "ule-internal.h"
static void init_empty_ule(ULE ule, DBT *key) {
ule->num_cuxrs = 0;
ule->num_puxrs = 0;
ule->uxrs = ule->uxrs_static;
ule->keylen = key->size;
ule->keyp = key->data;
}
static void add_committed_entry(ULE ule, DBT *val, TXNID xid) {
uint32_t index = ule->num_cuxrs;
ule->num_cuxrs++;
ule->uxrs[index].type = XR_INSERT;
ule->uxrs[index].vallen = val->size;
ule->uxrs[index].valp = val->data;
ule->uxrs[index].xid = xid;
}
static FT_MSG_S
msg_init(enum ft_msg_type type, XIDS xids,
DBT *key, DBT *val) {
FT_MSG_S msg;
msg.type = type;
msg.xids = xids;
msg.u.id.key = key;
msg.u.id.val = val;
return msg;
}
//Test all the different things that can happen to a
//committed leafentry (logical equivalent of a committed insert).
static void
run_test(void) {
ULE_S ule_initial;
ULE ule = &ule_initial;
ule_initial.uxrs = ule_initial.uxrs_static;
int r;
DBT key;
DBT val;
uint64_t key_data = 1;
uint64_t val_data_one = 1;
uint64_t val_data_two = 2;
uint64_t val_data_three = 3;
uint32_t keysize = 8;
uint32_t valsize = 8;
toku_fill_dbt(&key, &key_data, keysize);
toku_fill_dbt(&val, &val_data_one, valsize);
// test case where we apply a message and the innermost child_id
// is the same as the innermost committed TXNID
XIDS root_xids = xids_get_root_xids();
TXNID root_txnid = 1000;
TXNID child_id = 10;
XIDS msg_xids_1;
XIDS msg_xids_2;
r = xids_create_child(root_xids, &msg_xids_1, root_txnid);
assert(r==0);
r = xids_create_child(msg_xids_1, &msg_xids_2, child_id);
assert(r==0);
init_empty_ule(&ule_initial, &key);
add_committed_entry(&ule_initial, &val, 0);
val.data = &val_data_two;
// make the TXNID match the child id of xids
add_committed_entry(&ule_initial, &val, 10);
// now do the application of xids to the ule
FT_MSG_S msg;
// do a commit
msg = msg_init(FT_COMMIT_ANY, msg_xids_2, &key, &val);
test_msg_modify_ule(&ule_initial, &msg);
assert(ule->num_cuxrs == 2);
assert(ule->uxrs[0].xid == TXNID_NONE);
assert(ule->uxrs[1].xid == 10);
assert(ule->uxrs[0].valp == &val_data_one);
assert(ule->uxrs[1].valp == &val_data_two);
// do an abort
msg = msg_init(FT_ABORT_ANY, msg_xids_2, &key, &val);
test_msg_modify_ule(&ule_initial, &msg);
assert(ule->num_cuxrs == 2);
assert(ule->uxrs[0].xid == TXNID_NONE);
assert(ule->uxrs[1].xid == 10);
assert(ule->uxrs[0].valp == &val_data_one);
assert(ule->uxrs[1].valp == &val_data_two);
// do an insert
val.data = &val_data_three;
msg = msg_init(FT_INSERT, msg_xids_2, &key, &val);
test_msg_modify_ule(&ule_initial, &msg);
// now that message applied, verify that things are good
assert(ule->num_cuxrs == 2);
assert(ule->num_puxrs == 2);
assert(ule->uxrs[0].xid == TXNID_NONE);
assert(ule->uxrs[1].xid == 10);
assert(ule->uxrs[2].xid == 1000);
assert(ule->uxrs[3].xid == 10);
assert(ule->uxrs[0].valp == &val_data_one);
assert(ule->uxrs[1].valp == &val_data_two);
assert(ule->uxrs[2].type == XR_PLACEHOLDER);
assert(ule->uxrs[3].valp == &val_data_three);
xids_destroy(&msg_xids_2);
xids_destroy(&msg_xids_1);
xids_destroy(&root_xids);
}
int
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
run_test();
return 0;
}

View file

@ -0,0 +1,255 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: xid_lsn_independent.cc 49853 2012-11-12 04:26:30Z zardosht $"
#ident "Copyright (c) 2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "test.h"
#include "toku_os.h"
#include "checkpoint.h"
#define TESTDIR __SRCFILE__ ".dir"
#define FILENAME "test0.ft"
#include "test-ft-txns.h"
static int txn_child_manager_test_cb(TOKUTXN txn, void* extra) {
TOKUTXN* ptxn = (TOKUTXN *)extra;
assert(txn == *ptxn);
*ptxn = txn->child;
return 0;
}
static int txn_child_manager_test_cb2(TOKUTXN txn, void* extra) {
TOKUTXN extra_txn = (TOKUTXN)extra;
if (txn == extra_txn) {
return -1;
}
return 0;
}
class txn_child_manager_unit_test {
public:
void run_test();
void run_child_txn_test();
};
// simple test that verifies that creating a TXN_CHILD_SNAPSHOT tokutxn
// creates its own snapshot
void txn_child_manager_unit_test::run_child_txn_test() {
TOKULOGGER logger;
CACHETABLE ct;
int r = 0;
test_setup(&logger, &ct);
// create the root transaction
TOKUTXN root_txn = NULL;
r = toku_txn_begin_txn(
(DB_TXN *)NULL,
NULL,
&root_txn,
logger,
TXN_SNAPSHOT_CHILD
);
CKERR(r);
// test starting a child txn
TOKUTXN child_txn = NULL;
r = toku_txn_begin_txn(
NULL,
root_txn,
&child_txn,
logger,
TXN_SNAPSHOT_CHILD
);
CKERR(r);
// assert that the child has a later snapshot
assert(child_txn->snapshot_txnid64 > root_txn->snapshot_txnid64);
r = toku_txn_commit_txn(child_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(child_txn);
assert(root_txn->child == NULL);
r = toku_txn_commit_txn(root_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(root_txn);
clean_shutdown(&logger, &ct);
}
void txn_child_manager_unit_test::run_test() {
TOKULOGGER logger;
CACHETABLE ct;
int r = 0;
test_setup(&logger, &ct);
// create the root transaction
TOKUTXN root_txn = NULL;
r = toku_txn_begin_txn(
(DB_TXN *)NULL,
NULL,
&root_txn,
logger,
TXN_SNAPSHOT_ROOT
);
CKERR(r);
txn_child_manager* cm = root_txn->child_manager;
assert(cm == &root_txn->child_manager_s);
assert(cm->m_root == root_txn);
assert(cm->m_last_xid == TXNID_NONE);
assert(root_txn->child == NULL);
// this assumption implies our assumptions of child_id values below,
// because the parent id cannot be the child id
assert(root_txn->txnid.parent_id64 == 1);
// test starting a child txn
TOKUTXN child_txn = NULL;
r = toku_txn_begin_txn(
NULL,
root_txn,
&child_txn,
logger,
TXN_SNAPSHOT_ROOT
);
CKERR(r);
assert(child_txn->child_manager == cm);
assert(child_txn->parent == root_txn);
assert(root_txn->child == child_txn);
assert(child_txn->txnid.parent_id64 == root_txn->txnid.parent_id64);
assert(child_txn->txnid.child_id64 == 2);
assert(child_txn->live_root_txn_list == root_txn->live_root_txn_list);
assert(child_txn->snapshot_txnid64 == root_txn->snapshot_txnid64);
assert(cm->m_root == root_txn);
assert(cm->m_last_xid == child_txn->txnid.child_id64);
TOKUTXN grandchild_txn = NULL;
r = toku_txn_begin_txn(
NULL,
child_txn,
&grandchild_txn,
logger,
TXN_SNAPSHOT_ROOT
);
CKERR(r);
assert(grandchild_txn->child_manager == cm);
assert(grandchild_txn->parent == child_txn);
assert(child_txn->child == grandchild_txn);
assert(grandchild_txn->txnid.parent_id64 == root_txn->txnid.parent_id64);
assert(grandchild_txn->txnid.child_id64 == 3);
assert(grandchild_txn->live_root_txn_list == root_txn->live_root_txn_list);
assert(grandchild_txn->snapshot_txnid64 == root_txn->snapshot_txnid64);
assert(cm->m_root == root_txn);
assert(cm->m_last_xid == grandchild_txn->txnid.child_id64);
r = toku_txn_commit_txn(grandchild_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(grandchild_txn);
// now after closing one grandchild txn, open another one
r = toku_txn_begin_txn(
NULL,
child_txn,
&grandchild_txn,
logger,
TXN_SNAPSHOT_ROOT
);
CKERR(r);
assert(grandchild_txn->child_manager == cm);
assert(grandchild_txn->parent == child_txn);
assert(child_txn->child == grandchild_txn);
assert(grandchild_txn->txnid.parent_id64 == root_txn->txnid.parent_id64);
assert(grandchild_txn->txnid.child_id64 == 4);
assert(grandchild_txn->live_root_txn_list == root_txn->live_root_txn_list);
assert(grandchild_txn->snapshot_txnid64 == root_txn->snapshot_txnid64);
assert(cm->m_root == root_txn);
assert(cm->m_last_xid == grandchild_txn->txnid.child_id64);
TXNID_PAIR xid = {.parent_id64 = root_txn->txnid.parent_id64, .child_id64 = 100};
TOKUTXN recovery_txn = NULL;
r = toku_txn_begin_with_xid(
grandchild_txn,
&recovery_txn,
logger,
xid,
TXN_SNAPSHOT_NONE,
NULL,
true // for recovery
);
assert(recovery_txn->child_manager == cm);
assert(recovery_txn->parent == grandchild_txn);
assert(grandchild_txn->child == recovery_txn);
assert(recovery_txn->txnid.parent_id64 == root_txn->txnid.parent_id64);
assert(recovery_txn->txnid.child_id64 == 100);
// ensure that no snapshot is made
assert(recovery_txn->live_root_txn_list == NULL);
assert(recovery_txn->snapshot_txnid64 == TXNID_NONE);
assert(cm->m_root == root_txn);
assert(cm->m_last_xid == recovery_txn->txnid.child_id64);
// now ensure that txn_child_manager::find_tokutxn_by_xid_unlocked works
TOKUTXN found_txn = NULL;
// first ensure that a dummy TXNID_PAIR cannot be found
TXNID_PAIR dummy_pair = { .parent_id64 = root_txn->txnid.parent_id64, .child_id64 = 1000};
cm->find_tokutxn_by_xid_unlocked(dummy_pair, &found_txn);
assert(found_txn == NULL);
cm->find_tokutxn_by_xid_unlocked(root_txn->txnid, &found_txn);
assert(found_txn == root_txn);
cm->find_tokutxn_by_xid_unlocked(child_txn->txnid, &found_txn);
assert(found_txn == child_txn);
cm->find_tokutxn_by_xid_unlocked(grandchild_txn->txnid, &found_txn);
assert(found_txn == grandchild_txn);
cm->find_tokutxn_by_xid_unlocked(recovery_txn->txnid, &found_txn);
assert(found_txn == recovery_txn);
// now ensure that the iterator works
found_txn = root_txn;
r = cm->iterate(txn_child_manager_test_cb, &found_txn);
CKERR(r);
assert(found_txn == NULL);
// now test that iterator properly stops
found_txn = child_txn;
r = cm->iterate(txn_child_manager_test_cb2, found_txn);
assert(r == -1);
r = toku_txn_commit_txn(recovery_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(recovery_txn);
assert(grandchild_txn->child == NULL);
r = toku_txn_commit_txn(grandchild_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(grandchild_txn);
assert(child_txn->child == NULL);
r = toku_txn_commit_txn(child_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(child_txn);
assert(root_txn->child == NULL);
r = toku_txn_commit_txn(root_txn, true, NULL, NULL);
CKERR(r);
toku_txn_close_txn(root_txn);
clean_shutdown(&logger, &ct);
}
int test_main (int argc, const char *argv[]) {
default_parse_args(argc, argv);
txn_child_manager_unit_test foo;
foo.run_test();
return 0;
}

View file

@ -166,8 +166,8 @@ int create_logfiles() {
TOKULOGGER logger;
LSN lsn = {0};
TXNID txnid = 0;
TXNID cp_txnid = 0;
TXNID_PAIR txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE};
TXNID_PAIR cp_txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE};
uint32_t num_fassociate = 0;
uint32_t num_xstillopen = 0;
@ -185,23 +185,23 @@ int create_logfiles() {
// use old x1.tdb test log as basis
//xbegin 'b': lsn=1 parenttxnid=0 crc=00005f1f len=29
toku_log_xbegin(logger, &lsn, 1, NO_FSYNC, 0); txnid = lsn.lsn;
toku_log_xbegin(logger, &lsn, 1, TXNID_PAIR_NONE, TXNID_PAIR_NONE); txnid.parent_id64= lsn.lsn;
//fcreate 'F': lsn=2 txnid=1 filenum=0 fname={len=4 data="a.db"} mode=0777 treeflags=0 crc=18a3d525 len=49
toku_log_fcreate(logger, &lsn, NO_FSYNC, NULL, txnid, fn_aname, bs_aname, 0x0777, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 0);
//commit 'C': lsn=3 txnid=1 crc=00001f1e len=29
toku_log_xcommit(logger, &lsn, FSYNC, NULL, txnid);
//xbegin 'b': lsn=4 parenttxnid=0 crc=00000a1f len=29
toku_log_xbegin(logger, &lsn, 2, NO_FSYNC, 0); txnid = lsn.lsn;
toku_log_xbegin(logger, &lsn, 2, TXNID_PAIR_NONE, TXNID_PAIR_NONE); txnid.parent_id64= lsn.lsn;
//fcreate 'F': lsn=5 txnid=4 filenum=1 fname={len=4 data="b.db"} mode=0777 treeflags=0 crc=14a47925 len=49
toku_log_fcreate(logger, &lsn, NO_FSYNC, NULL, txnid, fn_bname, bs_bname, 0x0777, 0, 0, TOKU_DEFAULT_COMPRESSION_METHOD, 0);
//commit 'C': lsn=6 txnid=4 crc=0000c11e len=29
toku_log_xcommit(logger, &lsn, FSYNC, NULL, txnid);
//xbegin 'b': lsn=7 parenttxnid=0 crc=0000f91f len=29
toku_log_xbegin(logger, &lsn, 3, NO_FSYNC, 0); txnid = lsn.lsn;
toku_log_xbegin(logger, &lsn, 3, TXNID_PAIR_NONE, TXNID_PAIR_NONE); txnid.parent_id64= lsn.lsn;
//enq_insert 'I': lsn=8 filenum=0 xid=7 key={len=2 data="a\000"} value={len=2 data="b\000"} crc=40b863e4 len=45
toku_log_enq_insert(logger, &lsn, NO_FSYNC, NULL, fn_aname, txnid, bs_a, bs_b);
//begin_checkpoint 'x': lsn=9 timestamp=1251309957584197 crc=cd067878 len=29
toku_log_begin_checkpoint(logger, &lsn, NO_FSYNC, 1251309957584197, 0); cp_txnid = lsn.lsn;
toku_log_begin_checkpoint(logger, &lsn, NO_FSYNC, 1251309957584197, 0); cp_txnid.parent_id64= lsn.lsn;
//fassociate 'f': lsn=11 filenum=1 fname={len=4 data="b.db"} crc=a7126035 len=33
toku_log_fassociate(logger, &lsn, NO_FSYNC, fn_bname, 0, bs_bname, 0);
num_fassociate++;
@ -211,13 +211,13 @@ int create_logfiles() {
//xstillopen 's': lsn=10 txnid=7 parent=0 crc=00061816 len=37 <- obsolete
{
FILENUMS filenums = {0, NULL};
toku_log_xstillopen(logger, &lsn, NO_FSYNC, NULL, txnid, 0,
toku_log_xstillopen(logger, &lsn, NO_FSYNC, NULL, txnid, TXNID_PAIR_NONE,
0, filenums, 0, 0, 0,
ROLLBACK_NONE, ROLLBACK_NONE, ROLLBACK_NONE);
}
num_xstillopen++;
//end_checkpoint 'X': lsn=13 txnid=9 timestamp=1251309957586872 crc=cd285c30 len=37
toku_log_end_checkpoint(logger, &lsn, FSYNC, (LSN){cp_txnid}, 1251309957586872, num_fassociate, num_xstillopen);
toku_log_end_checkpoint(logger, &lsn, FSYNC, (LSN){cp_txnid.parent_id64}, 1251309957586872, num_fassociate, num_xstillopen);
//enq_insert 'I': lsn=14 filenum=1 xid=7 key={len=2 data="b\000"} value={len=2 data="a\000"} crc=40388be4 len=45
toku_log_enq_insert(logger, &lsn, NO_FSYNC, NULL, fn_bname, txnid, bs_b, bs_a);
//commit 'C': lsn=15 txnid=7 crc=00016d1e len=29

View file

@ -18,7 +18,7 @@
static void do_txn(TOKULOGGER logger, bool readonly) {
int r;
TOKUTXN txn;
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_NONE);
CKERR(r);
if (!readonly) {
@ -40,7 +40,7 @@ static void test_xid_lsn_independent(int N) {
int r;
TOKUTXN txn;
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_NONE);
CKERR(r);
r = toku_open_ft_handle(FILENAME, 1, &brt, 1024, 256, TOKU_DEFAULT_COMPRESSION_METHOD, ct, txn, toku_builtin_compare_fun);
@ -50,25 +50,25 @@ static void test_xid_lsn_independent(int N) {
CKERR(r);
toku_txn_close_txn(txn);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn, logger, TXN_SNAPSHOT_NONE);
CKERR(r);
TXNID xid_first = txn->txnid64;
TXNID xid_first = txn->txnid.parent_id64;
unsigned int rands[N];
for (int i=0; i<N; i++) {
char key[100],val[300];
DBT k, v;
rands[i] = random();
snprintf(key, sizeof(key), "key%x.%x", rands[i], i);
memset(val, 'v', sizeof(val));
val[sizeof(val)-1]=0;
toku_ft_insert(brt, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), txn);
char key[100],val[300];
DBT k, v;
rands[i] = random();
snprintf(key, sizeof(key), "key%x.%x", rands[i], i);
memset(val, 'v', sizeof(val));
val[sizeof(val)-1]=0;
toku_ft_insert(brt, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), txn);
}
{
TOKUTXN txn2;
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn2, logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn2, logger, TXN_SNAPSHOT_NONE);
CKERR(r);
// Verify the txnid has gone up only by one (even though many log entries were done)
invariant(txn2->txnid64 == xid_first + 1);
invariant(txn2->txnid.parent_id64 == xid_first + 1);
r = toku_txn_commit_txn(txn2, false, NULL, NULL);
CKERR(r);
toku_txn_close_txn(txn2);
@ -80,9 +80,9 @@ static void test_xid_lsn_independent(int N) {
//TODO(yoni) #5067 will break this portion of the test. (End ids are also assigned, so it would increase by 4 instead of 2.)
// Verify the txnid has gone up only by two (even though many log entries were done)
TOKUTXN txn3;
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn3, logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, (TOKUTXN)0, &txn3, logger, TXN_SNAPSHOT_NONE);
CKERR(r);
invariant(txn3->txnid64 == xid_first + 2);
invariant(txn3->txnid.parent_id64 == xid_first + 2);
r = toku_txn_commit_txn(txn3, false, NULL, NULL);
CKERR(r);
toku_txn_close_txn(txn3);
@ -176,7 +176,7 @@ static void test_xid_lsn_independent_parents(int N) {
ZERO_ARRAY(txns_hack);
for (int i = 0; i < N; i++) {
r = toku_txn_begin_txn((DB_TXN*)NULL, txns[i-1], &txns[i], logger, TXN_SNAPSHOT_ROOT);
r = toku_txn_begin_txn((DB_TXN*)NULL, txns[i-1], &txns[i], logger, TXN_SNAPSHOT_NONE);
CKERR(r);
if (i < num_non_cascade) {

342
ft/txn.cc
View file

@ -12,6 +12,8 @@
#include "ule.h"
#include "rollback-apply.h"
#include "txn_manager.h"
#include "txn_child_manager.h"
#include <util/partitioned_counter.h>
///////////////////////////////////////////////////////////////////////////////////
// Engine status
@ -21,33 +23,39 @@
static TXN_STATUS_S txn_status;
#define STATUS_INIT(k,t,l) { \
txn_status.status[k].keyname = #k; \
txn_status.status[k].type = t; \
txn_status.status[k].legend = "txn: " l; \
}
#define STATUS_INIT(k,t,l) do { \
txn_status.status[k].keyname = #k; \
txn_status.status[k].type = t; \
txn_status.status[k].legend = "txn: " l; \
if (t == PARCOUNT) { \
txn_status.status[k].value.parcount = create_partitioned_counter(); \
} \
} while (0)
static void
status_init(void) {
void
txn_status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(TXN_BEGIN, UINT64, "begin");
STATUS_INIT(TXN_COMMIT, UINT64, "successful commits");
STATUS_INIT(TXN_ABORT, UINT64, "aborts");
STATUS_INIT(TXN_CLOSE, UINT64, "close (should be sum of aborts and commits)");
STATUS_INIT(TXN_NUM_OPEN, UINT64, "number currently open (should be begin - close)");
STATUS_INIT(TXN_MAX_OPEN, UINT64, "max number open simultaneously");
STATUS_INIT(TXN_BEGIN, PARCOUNT, "begin");
STATUS_INIT(TXN_COMMIT, PARCOUNT, "successful commits");
STATUS_INIT(TXN_ABORT, PARCOUNT, "aborts");
txn_status.initialized = true;
}
void txn_status_destroy(void) {
for (int i = 0; i < TXN_STATUS_NUM_ROWS; ++i) {
if (txn_status.status[i].type == PARCOUNT) {
destroy_partitioned_counter(txn_status.status[i].value.parcount);
}
}
}
#undef STATUS_INIT
#define STATUS_VALUE(x) txn_status.status[x].value.num
#define STATUS_INC(x, d) increment_partitioned_counter(txn_status.status[x].value.parcount, d)
void
toku_txn_get_status(TXN_STATUS s) {
if (!txn_status.initialized) {
status_init();
}
*s = txn_status;
}
@ -64,9 +72,9 @@ toku_txn_unlock(TOKUTXN txn)
}
uint64_t
toku_txn_get_id(TOKUTXN txn)
toku_txn_get_root_id(TOKUTXN txn)
{
return txn->txnid64;
return txn->txnid.parent_id64;
}
int
@ -78,22 +86,77 @@ toku_txn_begin_txn (
TXN_SNAPSHOT_TYPE snapshot_type
)
{
int r = toku_txn_begin_with_xid(parent_tokutxn, tokutxn, logger, TXNID_NONE, snapshot_type, container_db_txn, false);
int r = toku_txn_begin_with_xid(parent_tokutxn, tokutxn, logger, TXNID_PAIR_NONE, snapshot_type, container_db_txn, false);
return r;
}
int
toku_txn_begin_with_xid (
TOKUTXN parent_tokutxn,
TOKUTXN *tokutxn,
TOKUTXN parent,
TOKUTXN *txnp,
TOKULOGGER logger,
TXNID xid,
TXNID_PAIR xid,
TXN_SNAPSHOT_TYPE snapshot_type,
DB_TXN *container_db_txn,
bool for_recovery
)
{
return toku_txn_manager_start_txn(tokutxn, logger->txn_manager, parent_tokutxn, logger, xid, snapshot_type, container_db_txn, for_recovery);
{
int r = 0;
TOKUTXN txn;
XIDS xids;
// Do as much (safe) work as possible before serializing on the txn_manager lock.
XIDS parent_xids;
if (parent == NULL) {
parent_xids = xids_get_root_xids();
} else {
parent_xids = parent->xids;
}
r = xids_create_unknown_child(parent_xids, &xids);
if (r != 0) {
return r;
}
toku_txn_create_txn(&txn, parent, logger, snapshot_type, container_db_txn, xids, for_recovery);
// txnid64, snapshot_txnid64
// will be set in here.
if (for_recovery) {
if (parent == NULL) {
invariant(xid.child_id64 == TXNID_NONE);
toku_txn_manager_start_txn_for_recovery(
txn,
logger->txn_manager,
xid.parent_id64
);
}
else {
parent->child_manager->start_child_txn_for_recovery(txn, parent, xid);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
}
}
else {
assert(xid.parent_id64 == TXNID_NONE);
assert(xid.child_id64 == TXNID_NONE);
if (parent == NULL) {
toku_txn_manager_start_txn(
txn,
logger->txn_manager,
snapshot_type
);
}
else {
parent->child_manager->start_child_txn(txn, parent);
txn->oldest_referenced_xid = parent->oldest_referenced_xid;
toku_txn_manager_handle_snapshot_create_for_child_txn(
txn,
logger->txn_manager,
snapshot_type
);
}
}
TXNID finalized_xid = (parent == NULL) ? txn->txnid.parent_id64 : txn->txnid.child_id64;
xids_finalize_with_child(txn->xids, finalized_xid);
*txnp = txn;
return r;
}
DB_TXN *
@ -124,8 +187,8 @@ void toku_txn_create_txn (
assert(logger->rollback_cachefile);
omt<FT> open_fts;
open_fts.create();
open_fts.create_no_array();
struct txn_roll_info roll_info = {
.num_rollback_nodes = 0,
.num_rollentries = 0,
@ -139,14 +202,18 @@ void toku_txn_create_txn (
.current_rollback_hash = 0,
};
static txn_child_manager tcm;
struct tokutxn new_txn = {
.txnid64 = TXNID_NONE,
.ancestor_txnid64 = TXNID_NONE,
.txnid = {.parent_id64 = TXNID_NONE, .child_id64 = TXNID_NONE },
.snapshot_txnid64 = TXNID_NONE,
.snapshot_type = snapshot_type,
.snapshot_type = for_recovery ? TXN_SNAPSHOT_NONE : snapshot_type,
.for_recovery = for_recovery,
.logger = logger,
.parent = parent_tokutxn,
.child = NULL,
.child_manager_s = tcm,
.child_manager = NULL,
.container_db_txn = container_db_txn,
.live_root_txn_list = nullptr,
.xids = xids,
@ -161,52 +228,46 @@ void toku_txn_create_txn (
.txn_lock = ZERO_MUTEX_INITIALIZER,
.open_fts = open_fts,
.roll_info = roll_info,
.state_lock = ZERO_MUTEX_INITIALIZER,
.state_cond = ZERO_COND_INITIALIZER,
.state = TOKUTXN_LIVE,
.prepared_txns_link = {0},
.num_pin = 0
};
TOKUTXN XMEMDUP(result, &new_txn);
toku_mutex_init(&result->txn_lock, NULL);
TOKUTXN result = NULL;
XMEMDUP(result, &new_txn);
invalidate_xa_xid(&result->xa_xid);
if (parent_tokutxn == NULL) {
result->child_manager = &result->child_manager_s;
result->child_manager->init(result);
}
else {
result->child_manager = parent_tokutxn->child_manager;
}
toku_mutex_init(&result->txn_lock, nullptr);
toku_pthread_mutexattr_t attr;
toku_mutexattr_init(&attr);
toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE);
toku_mutex_init(&result->state_lock, &attr);
toku_mutexattr_destroy(&attr);
toku_cond_init(&result->state_cond, nullptr);
*tokutxn = result;
STATUS_VALUE(TXN_BEGIN)++;
STATUS_VALUE(TXN_NUM_OPEN)++;
if (STATUS_VALUE(TXN_NUM_OPEN) > STATUS_VALUE(TXN_MAX_OPEN))
STATUS_VALUE(TXN_MAX_OPEN) = STATUS_VALUE(TXN_NUM_OPEN);
STATUS_INC(TXN_BEGIN, 1);
}
void
toku_txn_update_xids_in_txn(TOKUTXN txn, TXNID xid)
{
// these should not have been set yet
invariant(txn->txnid64 == TXNID_NONE);
invariant(txn->ancestor_txnid64 == TXNID_NONE);
invariant(txn->snapshot_txnid64 == TXNID_NONE);
TXNID snapshot_txnid64;
if (txn->snapshot_type == TXN_SNAPSHOT_NONE) {
snapshot_txnid64 = TXNID_NONE;
} else if (txn->parent == NULL || txn->snapshot_type == TXN_SNAPSHOT_CHILD) {
snapshot_txnid64 = xid;
} else if (txn->snapshot_type == TXN_SNAPSHOT_ROOT) {
snapshot_txnid64 = txn->parent->snapshot_txnid64;
} else {
assert(false);
}
#define UNCONST(t, x) *((t *) &(x))
// we need to cast around const here in order to move
// toku_txn_create_txn outside of the txn_manager_lock in
// toku_txn_manager_start_txn
UNCONST(TXNID, txn->txnid64) = xid;
UNCONST(TXNID, txn->snapshot_txnid64) = snapshot_txnid64;
UNCONST(TXNID, txn->ancestor_txnid64) = (txn->parent ? txn->parent->ancestor_txnid64 : xid);
#undef UNCONST
invariant(txn->txnid.parent_id64 == TXNID_NONE);
invariant(txn->txnid.child_id64 == TXNID_NONE);
txn->txnid.parent_id64 = xid;
txn->txnid.child_id64 = TXNID_NONE;
}
//Used on recovery to recover a transaction.
@ -250,10 +311,45 @@ struct xcommit_info {
TOKUTXN txn;
};
static void txn_note_commit(TOKUTXN txn) {
// Purpose:
// Delay until any indexer is done pinning this transaction.
// Update status of a transaction from live->committing (or prepared->committing)
// Do so in a thread-safe manner that does not conflict with hot indexing or
// begin checkpoint.
if (toku_txn_is_read_only(txn)) {
// Neither hot indexing nor checkpoint do any work with readonly txns,
// so we can skip taking the txn_manager lock here.
invariant(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_COMMITTING;
goto done;
}
if (txn->state==TOKUTXN_PREPARING) {
invalidate_xa_xid(&txn->xa_xid);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
toku_txn_lock_state(txn);
while (txn->num_pin > 0) {
toku_cond_wait(
&txn->state_cond,
&txn->state_lock
);
}
txn->state = TOKUTXN_COMMITTING;
toku_txn_unlock_state(txn);
done:
return;
}
int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn,
TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra)
{
toku_txn_manager_note_commit_txn(txn->logger->txn_manager, txn);
// there should be no child when we commit or abort a TOKUTXN
invariant(txn->child == NULL);
txn_note_commit(txn);
// Child transactions do not actually 'commit'. They promote their
// changes to parent, so no need to fsync if this txn has a parent. The
@ -270,14 +366,14 @@ int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, LSN oplsn,
txn->progress_poll_fun_extra = poll_extra;
if (!toku_txn_is_read_only(txn)) {
toku_log_xcommit(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid64);
toku_log_xcommit(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid);
}
// If !txn->begin_was_logged, we could skip toku_rollback_commit
// but it's cheap (only a number of function calls that return immediately)
// since there were no writes. Skipping it would mean we would need to be careful
// in case we added any additional required cleanup into those functions in the future.
int r = toku_rollback_commit(txn, oplsn);
STATUS_VALUE(TXN_COMMIT)++;
STATUS_INC(TXN_COMMIT, 1);
return r;
}
@ -289,25 +385,59 @@ int toku_txn_abort_txn(TOKUTXN txn,
return toku_txn_abort_with_lsn(txn, ZERO_LSN, poll, poll_extra);
}
static void txn_note_abort(TOKUTXN txn) {
// Purpose:
// Delay until any indexer is done pinning this transaction.
// Update status of a transaction from live->aborting (or prepared->aborting)
// Do so in a thread-safe manner that does not conflict with hot indexing or
// begin checkpoint.
if (toku_txn_is_read_only(txn)) {
// Neither hot indexing nor checkpoint do any work with readonly txns,
// so we can skip taking the state lock here.
invariant(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_ABORTING;
goto done;
}
if (txn->state==TOKUTXN_PREPARING) {
invalidate_xa_xid(&txn->xa_xid);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
toku_txn_lock_state(txn);
while (txn->num_pin > 0) {
toku_cond_wait(
&txn->state_cond,
&txn->state_lock
);
}
txn->state = TOKUTXN_ABORTING;
toku_txn_unlock_state(txn);
done:
return;
}
int toku_txn_abort_with_lsn(TOKUTXN txn, LSN oplsn,
TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra)
// Effect: Among other things, if release_multi_operation_client_lock is true, then unlock that lock (even if an error path is taken)
{
toku_txn_manager_note_abort_txn(txn->logger->txn_manager, txn);
// there should be no child when we commit or abort a TOKUTXN
invariant(txn->child == NULL);
txn_note_abort(txn);
txn->progress_poll_fun = poll;
txn->progress_poll_fun_extra = poll_extra;
txn->do_fsync = false;
if (!toku_txn_is_read_only(txn)) {
toku_log_xabort(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid64);
toku_log_xabort(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid);
}
// If !txn->begin_was_logged, we could skip toku_rollback_abort
// but it's cheap (only a number of function calls that return immediately)
// since there were no writes. Skipping it would mean we would need to be careful
// in case we added any additional required cleanup into those functions in the future.
int r = toku_rollback_abort(txn, oplsn);
STATUS_VALUE(TXN_ABORT)++;
STATUS_INC(TXN_ABORT, 1);
return r;
}
@ -327,12 +457,17 @@ void toku_txn_prepare_txn (TOKUTXN txn, TOKU_XA_XID *xa_xid) {
// XA guarantees are free. No need to pay for overhead of prepare.
return;
}
toku_txn_manager_add_prepared_txn(txn->logger->txn_manager, txn);
assert(txn->state==TOKUTXN_LIVE);
// This state transition must be protected against begin_checkpoint
// Therefore, the caller must have the mo lock held
toku_txn_lock_state(txn);
txn->state = TOKUTXN_PREPARING;
toku_txn_unlock_state(txn);
// Do we need to do an fsync?
txn->do_fsync = (txn->force_fsync_on_commit || txn->roll_info.num_rollentries>0);
copy_xid(&txn->xa_xid, xa_xid);
// This list will go away with #4683, so we wn't need the ydb lock for this anymore.
toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid64, xa_xid);
toku_log_xprepare(txn->logger, &txn->do_fsync_lsn, 0, txn, txn->txnid, xa_xid);
}
void toku_txn_get_prepared_xa_xid (TOKUTXN txn, TOKU_XA_XID *xid) {
@ -340,7 +475,7 @@ void toku_txn_get_prepared_xa_xid (TOKUTXN txn, TOKU_XA_XID *xid) {
}
int toku_logger_recover_txn (TOKULOGGER logger, struct tokulogger_preplist preplist[/*count*/], long count, /*out*/ long *retp, uint32_t flags) {
return toku_txn_manager_recover_txn(
return toku_txn_manager_recover_root_txn(
logger->txn_manager,
preplist,
count,
@ -386,7 +521,18 @@ void toku_txn_complete_txn(TOKUTXN txn) {
assert(txn->roll_info.current_rollback.b == ROLLBACK_NONE.b);
assert(txn->num_pin == 0);
assert(txn->state == TOKUTXN_COMMITTING || txn->state == TOKUTXN_ABORTING);
toku_txn_manager_finish_txn(txn->logger->txn_manager, txn);
if (txn->parent) {
toku_txn_manager_handle_snapshot_destroy_for_child_txn(
txn,
txn->logger->txn_manager,
txn->snapshot_type
);
txn->parent->child_manager->finish_child_txn(txn);
}
else {
toku_txn_manager_finish_txn(txn->logger->txn_manager, txn);
txn->child_manager->destroy();
}
// note that here is another place we depend on
// this function being called with the multi operation lock
note_txn_closing(txn);
@ -396,10 +542,9 @@ void toku_txn_destroy_txn(TOKUTXN txn) {
txn->open_fts.destroy();
xids_destroy(&txn->xids);
toku_mutex_destroy(&txn->txn_lock);
toku_mutex_destroy(&txn->state_lock);
toku_cond_destroy(&txn->state_cond);
toku_free(txn);
STATUS_VALUE(TXN_CLOSE)++;
STATUS_VALUE(TXN_NUM_OPEN)--;
}
XIDS toku_txn_get_xids (TOKUTXN txn) {
@ -446,16 +591,16 @@ maybe_log_begin_txn_for_write_operation_unlocked(TOKUTXN txn) {
}
TOKUTXN parent;
parent = txn->parent;
TXNID xid;
xid = txn->txnid64;
TXNID pxid;
pxid = 0;
TXNID_PAIR xid;
xid = txn->txnid;
TXNID_PAIR pxid;
pxid = TXNID_PAIR_NONE;
if (parent) {
// Recursively log parent first if necessary.
// Transactions cannot do work if they have children,
// so the lowest level child's lock is sufficient for ancestors.
maybe_log_begin_txn_for_write_operation_unlocked(parent);
pxid = parent->txnid64;
pxid = parent->txnid;
}
toku_log_xbegin(txn->logger, NULL, 0, xid, pxid);
@ -484,6 +629,39 @@ toku_txn_is_read_only(TOKUTXN txn) {
return false;
}
// needed for hot indexing
void toku_txn_lock_state(TOKUTXN txn) {
toku_mutex_lock(&txn->state_lock);
}
void toku_txn_unlock_state(TOKUTXN txn){
toku_mutex_unlock(&txn->state_lock);
}
// prevents a client thread from transitioning txn from LIVE|PREPARING -> COMMITTING|ABORTING
// hot indexing may need a transactions to stay in the LIVE|PREPARING state while it processes
// a leafentry.
void toku_txn_pin_live_txn_unlocked(TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(!toku_txn_is_read_only(txn));
txn->num_pin++;
}
// allows a client thread to go back to being able to transition txn
// from LIVE|PREPARING -> COMMITTING|ABORTING
void toku_txn_unpin_live_txn(TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(txn->num_pin > 0);
toku_txn_lock_state(txn);
txn->num_pin--;
if (txn->num_pin == 0) {
toku_cond_broadcast(&txn->state_cond);
}
toku_txn_unlock_state(txn);
}
#include <toku_race_tools.h>
void __attribute__((__constructor__)) toku_txn_status_helgrind_ignore(void);
void

View file

@ -9,10 +9,26 @@
#include "txn_manager.h"
void txn_status_init(void);
void txn_status_destroy(void);
inline bool txn_pair_is_none(TXNID_PAIR txnid) {
return txnid.parent_id64 == TXNID_NONE && txnid.child_id64 == TXNID_NONE;
}
inline bool txn_needs_snapshot(TXN_SNAPSHOT_TYPE snapshot_type, TOKUTXN parent) {
// we need a snapshot if the snapshot type is a child or
// if the snapshot type is root and we have no parent.
// Cases that we don't need a snapshot: when snapshot type is NONE
// or when it is ROOT and we have a parent
return (snapshot_type != TXN_SNAPSHOT_NONE && (parent==NULL || snapshot_type == TXN_SNAPSHOT_CHILD));
}
void toku_txn_lock(TOKUTXN txn);
void toku_txn_unlock(TOKUTXN txn);
uint64_t toku_txn_get_id(TOKUTXN txn);
uint64_t toku_txn_get_root_id(TOKUTXN txn);
int toku_txn_begin_txn (
DB_TXN *container_db_txn,
@ -30,7 +46,7 @@ int toku_txn_begin_with_xid (
TOKUTXN parent_tokutxn,
TOKUTXN *tokutxn,
TOKULOGGER logger,
TXNID xid,
TXNID_PAIR xid,
TXN_SNAPSHOT_TYPE snapshot_type,
DB_TXN *container_db_txn,
bool for_recovery
@ -80,9 +96,6 @@ typedef enum {
TXN_BEGIN, // total number of transactions begun (does not include recovered txns)
TXN_COMMIT, // successful commits
TXN_ABORT,
TXN_CLOSE, // should be sum of aborts and commits
TXN_NUM_OPEN, // should be begin - close
TXN_MAX_OPEN, // max value of num_open
TXN_STATUS_NUM_ROWS
} txn_status_entry;
@ -112,4 +125,9 @@ void toku_maybe_log_begin_txn_for_write_operation(TOKUTXN txn);
// Return whether txn (or it's descendents) have done no work.
bool toku_txn_is_read_only(TOKUTXN txn);
void toku_txn_lock_state(TOKUTXN txn);
void toku_txn_unlock_state(TOKUTXN txn);
void toku_txn_pin_live_txn_unlocked(TOKUTXN txn);
void toku_txn_unpin_live_txn(TOKUTXN txn);
#endif //TOKUTXN_H

109
ft/txn_child_manager.cc Normal file
View file

@ -0,0 +1,109 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: rollback.cc 49033 2012-10-17 18:48:30Z zardosht $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "log-internal.h"
#include "txn_child_manager.h"
//
// initialized a txn_child_manager,
// when called, root->txnid.parent_id64 may not yet be set
//
void txn_child_manager::init(TOKUTXN root) {
invariant(root->txnid.child_id64 == TXNID_NONE);
invariant(root->parent == NULL);
m_root = root;
m_last_xid = TXNID_NONE;
m_mutex = ZERO_MUTEX_INITIALIZER;
toku_pthread_mutexattr_t attr;
toku_mutexattr_init(&attr);
toku_mutexattr_settype(&attr, TOKU_MUTEX_ADAPTIVE);
toku_mutex_init(&m_mutex, &attr);
toku_mutexattr_destroy(&attr);
}
void txn_child_manager::destroy() {
toku_mutex_destroy(&m_mutex);
}
void txn_child_manager::start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid) {
invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64);
invariant(txnid.parent_id64 == m_root->txnid.parent_id64);
child->txnid = txnid;
toku_mutex_lock(&m_mutex);
if (txnid.child_id64 > m_last_xid) {
m_last_xid = txnid.child_id64;
}
parent->child = child;
toku_mutex_unlock(&m_mutex);
}
void txn_child_manager::start_child_txn(TOKUTXN child, TOKUTXN parent) {
invariant(parent->txnid.parent_id64 == m_root->txnid.parent_id64);
child->txnid.parent_id64 = m_root->txnid.parent_id64;
toku_mutex_lock(&m_mutex);
++m_last_xid;
// Here we ensure that the child_id64 is never equal to the parent_id64
// We do this to make this feature work more easily with the XIDs
// struct and message application. The XIDs struct stores the parent id
// as the first TXNID, and subsequent TXNIDs store child ids. So, if we
// have a case where the parent id is the same as the child id, we will
// have to do some tricky maneuvering in the message application code
// in ule.cc. So, to lessen the probability of bugs, we ensure that the
// parent id is not the same as the child id.
if (m_last_xid == m_root->txnid.parent_id64) {
++m_last_xid;
}
child->txnid.child_id64 = m_last_xid;
parent->child = child;
toku_mutex_unlock(&m_mutex);
}
void txn_child_manager::finish_child_txn(TOKUTXN child) {
invariant(child->txnid.parent_id64 == m_root->txnid.parent_id64);
toku_mutex_lock(&m_mutex);
child->parent->child = NULL;
toku_mutex_unlock(&m_mutex);
}
void txn_child_manager::suspend() {
toku_mutex_lock(&m_mutex);
}
void txn_child_manager::resume() {
toku_mutex_unlock(&m_mutex);
}
void txn_child_manager::find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result) {
invariant(xid.parent_id64 == m_root->txnid.parent_id64);
TOKUTXN curr_txn = m_root;
while (curr_txn != NULL) {
if (xid.child_id64 == curr_txn->txnid.child_id64) {
*result = curr_txn;
break;
}
curr_txn = curr_txn->child;
}
}
int txn_child_manager::iterate(txn_mgr_iter_callback cb, void* extra) {
TOKUTXN curr_txn = m_root;
int ret = 0;
toku_mutex_lock(&m_mutex);
while (curr_txn != NULL) {
ret = cb(curr_txn, extra);
if (ret != 0) {
break;
}
curr_txn = curr_txn->child;
}
toku_mutex_unlock(&m_mutex);
return ret;
}

35
ft/txn_child_manager.h Normal file
View file

@ -0,0 +1,35 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ifndef TOKU_TXN_CHILD_MANAGER_H
#define TOKU_TXN_CHILD_MANAGER_H
#ident "$Id: rollback.h 49033 2012-10-17 18:48:30Z zardosht $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include "txn_manager.h"
class txn_child_manager {
public:
void init (TOKUTXN root);
void destroy();
void start_child_txn_for_recovery(TOKUTXN child, TOKUTXN parent, TXNID_PAIR txnid);
void start_child_txn(TOKUTXN child, TOKUTXN parent);
void finish_child_txn(TOKUTXN child);
void suspend();
void resume();
void find_tokutxn_by_xid_unlocked(TXNID_PAIR xid, TOKUTXN* result);
int iterate(txn_mgr_iter_callback cb, void* extra);
private:
TXNID m_last_xid;
TOKUTXN m_root;
toku_mutex_t m_mutex;
friend class txn_child_manager_unit_test;
};
ENSURE_POD(txn_child_manager);
#endif // TOKU_TXN_CHILD_MANAGER_H

View file

@ -21,17 +21,20 @@ bool garbage_collection_debug = false;
static void txn_manager_lock(TXN_MANAGER txn_manager);
static void txn_manager_unlock(TXN_MANAGER txn_manager);
#if 0
static bool is_txnid_live(TXN_MANAGER txn_manager, TXNID txnid) {
TOKUTXN result = NULL;
toku_txn_manager_id2txn_unlocked(txn_manager, txnid, &result);
return (result != NULL);
}
#endif
//Heaviside function to search through an OMT by a TXNID
int find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind);
static void
verify_snapshot_system(TXN_MANAGER txn_manager UU()) {
#if 0
uint32_t num_snapshot_txnids = txn_manager->snapshot_txnids.size();
TXNID snapshot_txnids[num_snapshot_txnids];
uint32_t num_live_txns = txn_manager->live_txns.size();
@ -170,62 +173,67 @@ verify_snapshot_system(TXN_MANAGER txn_manager UU()) {
}
}
#endif
}
void toku_txn_manager_init(TXN_MANAGER* txn_managerp) {
TXN_MANAGER XCALLOC(txn_manager);
toku_mutex_init(&txn_manager->txn_manager_lock, NULL);
txn_manager->live_txns.create();
txn_manager->live_root_txns.create();
txn_manager->live_root_ids.create();
txn_manager->snapshot_txnids.create();
txn_manager->referenced_xids.create();
txn_manager->last_xid = 0;
toku_list_init(&txn_manager->prepared_txns);
toku_list_init(&txn_manager->prepared_and_returned_txns);
toku_cond_init(&txn_manager->wait_for_unpin_of_txn, 0);
txn_manager->last_xid_seen_for_recover = TXNID_NONE;
*txn_managerp = txn_manager;
}
void toku_txn_manager_destroy(TXN_MANAGER txn_manager) {
toku_mutex_destroy(&txn_manager->txn_manager_lock);
txn_manager->live_txns.destroy();
txn_manager->live_root_txns.destroy();
txn_manager->live_root_ids.destroy();
txn_manager->snapshot_txnids.destroy();
txn_manager->referenced_xids.destroy();
toku_cond_destroy(&txn_manager->wait_for_unpin_of_txn);
toku_free(txn_manager);
}
TXNID
toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager) {
TOKUTXN rtxn = NULL;
TXNID rval = TXNID_NONE_LIVING;
txn_manager_lock(txn_manager);
if (txn_manager->live_root_txns.size() > 0) {
// We use live_root_txns because roots are always older than children,
// and live_root_txns stores TXNIDs directly instead of TOKUTXNs in live_txns
int r = txn_manager->live_root_txns.fetch(0, &rval);
int r = txn_manager->live_root_txns.fetch(0, &rtxn);
invariant_zero(r);
}
if (rtxn) {
rval = rtxn->txnid.parent_id64;
}
txn_manager_unlock(txn_manager);
return rval;
}
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids);
int live_root_txn_list_iter(const TOKUTXN &live_xid, const uint32_t UU(index), TXNID **const referenced_xids){
(*referenced_xids)[index] = live_xid->txnid.parent_id64;
return 0;
}
// Create list of root transactions that were live when this txn began.
static void
setup_live_root_txn_list(TXN_MANAGER txn_manager, TOKUTXN txn) {
invariant(txn_manager->live_root_txns.size() > 0);
invariant(txn->live_root_txn_list == nullptr);
XMALLOC(txn->live_root_txn_list);
txn->live_root_txn_list->clone(txn_manager->live_root_txns);
static inline void
setup_live_root_txn_list(xid_omt_t* live_root_txnid, xid_omt_t* live_root_txn_list) {
live_root_txn_list->clone(*live_root_txnid);
}
//Heaviside function to search through an OMT by a TXNID
int
find_by_xid (const TOKUTXN &txn, const TXNID &txnidfind) {
if (txn->txnid64<txnidfind) return -1;
if (txn->txnid64>txnidfind) return +1;
if (txn->txnid.parent_id64 < txnidfind) return -1;
if (txn->txnid.parent_id64 > txnidfind) return +1;
return 0;
}
@ -257,7 +265,7 @@ max_xid(TXNID a, TXNID b) {
static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
TXNID oldest_referenced_xid = TXNID_NONE_LIVING;
int r = txn_manager->live_root_txns.fetch(0, &oldest_referenced_xid);
int r = txn_manager->live_root_ids.fetch(0, &oldest_referenced_xid);
// this function should only be called when we know there is at least
// one live transaction
invariant_zero(r);
@ -272,135 +280,32 @@ static TXNID get_oldest_referenced_xid_unlocked(TXN_MANAGER txn_manager) {
return oldest_referenced_xid;
}
int toku_txn_manager_start_txn(
TOKUTXN *txnp,
TXN_MANAGER txn_manager,
TOKUTXN parent,
TOKULOGGER logger,
TXNID xid,
TXN_SNAPSHOT_TYPE snapshot_type,
DB_TXN *container_db_txn,
bool for_recovery)
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
// template-only function, but must be extern
int find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind);
int
find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind)
{
int r;
XIDS xids;
TOKUTXN txn;
// Do as much (safe) work as possible before serializing on the txn_manager lock.
XIDS parent_xids;
if (parent == NULL) {
parent_xids = xids_get_root_xids();
} else {
parent_xids = parent->xids;
}
r = xids_create_unknown_child(parent_xids, &xids);
if (r != 0) {
return r;
}
toku_txn_create_txn(&txn, parent, logger, snapshot_type, container_db_txn, xids, for_recovery);
// the act of getting a transaction ID and adding the
// txn to the proper OMTs must be atomic. MVCC depends
// on this.
txn_manager_lock(txn_manager);
if (garbage_collection_debug) {
verify_snapshot_system(txn_manager);
}
if (xid == TXNID_NONE) {
invariant(!for_recovery);
// The transaction manager maintains the latest transaction id.
xid = ++txn_manager->last_xid;
invariant(logger);
}
else {
// Recovered transactions may not come in ascending order,
// because we assign xids when transactions are created but
// log transactions only when they first perform a write.
invariant(for_recovery);
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
}
xids_finalize_with_child(txn->xids, xid);
toku_txn_update_xids_in_txn(txn, xid);
//Add txn to list (omt) of live transactions
{
uint32_t idx = txn_manager->live_txns.size();
if (for_recovery) {
r = txn_manager->live_txns.find_zero<TXNID, find_by_xid>(txn->txnid64, nullptr, &idx);
invariant(r==DB_NOTFOUND);
}
r = txn_manager->live_txns.insert_at(txn, idx);
lazy_assert_zero(r);
}
{
//
// maintain the data structures necessary for MVCC:
// 1. add txn to list of live_root_txns if this is a root transaction
// 2. if the transaction is creating a snapshot:
// - create a live list for the transaction
// - add the id to the list of snapshot ids
//
// The order of operations is important here, and must be taken
// into account when the transaction is closed. The txn is added
// to the live_root_txns first (if it is a root txn). This has the implication
// that a root level snapshot transaction is in its own live list. This fact
// is taken into account when the transaction is closed.
// add ancestor information, and maintain global live root txn list
if (parent == NULL) {
//Add txn to list (omt) of live root txns
uint32_t idx = txn_manager->live_root_txns.size();
if (for_recovery) {
r = txn_manager->live_root_txns.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid64, nullptr, &idx);
invariant(r == DB_NOTFOUND);
}
r = txn_manager->live_root_txns.insert_at(txn->txnid64, idx);
}
txn->oldest_referenced_xid = for_recovery ? TXNID_NONE : get_oldest_referenced_xid_unlocked(txn_manager);
// setup information for snapshot reads
if (txn->snapshot_type != TXN_SNAPSHOT_NONE) {
// in this case, either this is a root level transaction that needs its live list setup, or it
// is a child transaction that specifically asked for its own snapshot
if (parent == NULL || txn->snapshot_type == TXN_SNAPSHOT_CHILD) {
setup_live_root_txn_list(txn_manager, txn);
// Add this txn to the global list of txns that have their own snapshots.
// (Note, if a txn is a child that creates its own snapshot, then that child xid
// is the xid stored in the global list.)
{
uint32_t idx = txn_manager->snapshot_txnids.size();
if (for_recovery) {
r = txn_manager->snapshot_txnids.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid64, nullptr, &idx);
invariant(r == DB_NOTFOUND);
}
r = txn_manager->snapshot_txnids.insert_at(txn->txnid64, idx);
lazy_assert_zero(r);
}
}
// in this case, it is a child transaction that specified its snapshot to be that
// of the root transaction
else if (txn->snapshot_type == TXN_SNAPSHOT_ROOT) {
txn->live_root_txn_list = parent->live_root_txn_list;
}
else {
assert(false);
}
}
}
if (garbage_collection_debug) {
verify_snapshot_system(txn_manager);
}
txn_manager_unlock(txn_manager);
*txnp = txn;
if (txn->txnid.parent_id64 < txnfind->txnid.parent_id64) return -1;
if (txn->txnid.parent_id64 > txnfind->txnid.parent_id64) return +1;
return 0;
}
static inline void txn_manager_create_snapshot_unlocked(
TXN_MANAGER txn_manager,
TOKUTXN txn
)
{
txn->snapshot_txnid64 = ++txn_manager->last_xid;
setup_live_root_txn_list(&txn_manager->live_root_ids, txn->live_root_txn_list);
// Add this txn to the global list of txns that have their own snapshots.
// (Note, if a txn is a child that creates its own snapshot, then that child xid
// is the xid stored in the global list.)
uint32_t idx = txn_manager->snapshot_txnids.size();
int r = txn_manager->snapshot_txnids.insert_at(txn->snapshot_txnid64, idx);
lazy_assert_zero(r);
}
// template-only function, but must be extern
int find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfind);
int
@ -411,30 +316,6 @@ find_tuple_by_xid (const struct referenced_xid_tuple &tuple, const TXNID &xidfin
return 0;
}
TXNID
toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids) {
struct referenced_xid_tuple *tuple;
int r;
TXNID rval = TXNID_NONE;
r = referenced_xids.find_zero<TXNID, find_tuple_by_xid>(xc, &tuple, nullptr);
if (r == DB_NOTFOUND) {
goto done;
}
TXNID live;
r = snapshot_txnids.find<TXNID, toku_find_xid_by_xid>(tuple->end_id, -1, &live, nullptr);
if (r == DB_NOTFOUND) {
goto done;
}
invariant(live < tuple->end_id);
if (live > tuple->begin_id) {
rval = live;
}
done:
return rval;
}
// template-only function, but must be extern
int referenced_xids_note_snapshot_txn_end_iter(const TXNID &live_xid, const uint32_t UU(index), rx_omt_t *const referenced_xids)
__attribute__((nonnull(3)));
@ -513,90 +394,236 @@ note_snapshot_txn_end_by_txn_live_list(TXN_MANAGER mgr, xid_omt_t* live_root_txn
return 0;
}
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
// template-only function, but must be extern
int find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind);
int
find_xid (const TOKUTXN &txn, const TOKUTXN &txnfind)
static inline void txn_manager_remove_snapshot_unlocked(
TOKUTXN txn,
TXN_MANAGER txn_manager,
uint32_t* index_in_snapshot_txnids
)
{
if (txn->txnid64<txnfind->txnid64) return -1;
if (txn->txnid64>txnfind->txnid64) return +1;
return 0;
TXNID xid;
//Free memory used for snapshot_txnids
int r = txn_manager->snapshot_txnids.find_zero<TXNID, toku_find_xid_by_xid>(txn->snapshot_txnid64, &xid, index_in_snapshot_txnids);
invariant_zero(r);
invariant(xid == txn->snapshot_txnid64);
r = txn_manager->snapshot_txnids.delete_at(*index_in_snapshot_txnids);
invariant_zero(r);
uint32_t ref_xids_size = txn_manager->referenced_xids.size();
uint32_t live_list_size = txn->live_root_txn_list->size();
if (ref_xids_size > 0 && live_list_size > 0) {
if (live_list_size > ref_xids_size && ref_xids_size < 2000) {
note_snapshot_txn_end_by_txn_live_list(txn_manager, txn->live_root_txn_list);
}
else {
note_snapshot_txn_end_by_ref_xids(txn_manager, *txn->live_root_txn_list);
}
}
}
static inline void inherit_snapshot_from_parent(TOKUTXN child) {
if (child->parent) {
child->snapshot_txnid64 = child->parent->snapshot_txnid64;
child->live_root_txn_list = child->parent->live_root_txn_list;
}
}
void toku_txn_manager_handle_snapshot_create_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXN_SNAPSHOT_TYPE snapshot_type
)
{
// this is a function for child txns, so just doint a sanity check
invariant(txn->parent != NULL);
bool needs_snapshot = txn_needs_snapshot(snapshot_type, txn->parent);
if (needs_snapshot) {
invariant(txn->live_root_txn_list == nullptr);
XMALLOC(txn->live_root_txn_list);
txn_manager_lock(txn_manager);
txn_manager_create_snapshot_unlocked(txn_manager, txn);
txn_manager_unlock(txn_manager);
}
else {
inherit_snapshot_from_parent(txn);
}
}
void toku_txn_manager_handle_snapshot_destroy_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXN_SNAPSHOT_TYPE snapshot_type
)
{
// this is a function for child txns, so just doint a sanity check
invariant(txn->parent != NULL);
bool is_snapshot = txn_needs_snapshot(snapshot_type, txn->parent);
if (is_snapshot) {
uint32_t index;
txn_manager_lock(txn_manager);
txn_manager_remove_snapshot_unlocked(txn, txn_manager, &index);
txn_manager_unlock(txn_manager);
invariant(txn->live_root_txn_list != nullptr);
txn->live_root_txn_list->destroy();
toku_free(txn->live_root_txn_list);
}
}
void toku_txn_manager_start_txn_for_recovery(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXNID xid
)
{
txn_manager_lock(txn_manager);
// using xid that is passed in
txn_manager->last_xid = max_xid(txn_manager->last_xid, xid);
toku_txn_update_xids_in_txn(txn, xid);
txn->oldest_referenced_xid = TXNID_NONE;
uint32_t idx;
int r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, nullptr, &idx);
invariant(r == DB_NOTFOUND);
r = txn_manager->live_root_txns.insert_at(txn, idx);
invariant_zero(r);
r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx);
invariant_zero(r);
txn_manager_unlock(txn_manager);
}
void toku_txn_manager_start_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXN_SNAPSHOT_TYPE snapshot_type
)
{
int r;
TXNID xid = TXNID_NONE;
// if we are running in recovery, we don't need to make snapshots
bool needs_snapshot = txn_needs_snapshot(snapshot_type, NULL);
// perform a malloc outside of the txn_manager lock
// will be used in txn_manager_create_snapshot_unlocked below
if (needs_snapshot) {
invariant(txn->live_root_txn_list == nullptr);
XMALLOC(txn->live_root_txn_list);
}
// the act of getting a transaction ID and adding the
// txn to the proper OMTs must be atomic. MVCC depends
// on this.
txn_manager_lock(txn_manager);
if (garbage_collection_debug) {
verify_snapshot_system(txn_manager);
}
//
// maintain the data structures necessary for MVCC:
// 1. add txn to list of live_root_txns if this is a root transaction
// 2. if the transaction is creating a snapshot:
// - create a live list for the transaction
// - add the id to the list of snapshot ids
//
// The order of operations is important here, and must be taken
// into account when the transaction is closed. The txn is added
// to the live_root_txns first (if it is a root txn). This has the implication
// that a root level snapshot transaction is in its own live list. This fact
// is taken into account when the transaction is closed.
// add ancestor information, and maintain global live root txn list
xid = ++txn_manager->last_xid;
toku_txn_update_xids_in_txn(txn, xid);
uint32_t idx = txn_manager->live_root_txns.size();
r = txn_manager->live_root_txns.insert_at(txn, idx);
invariant_zero(r);
r = txn_manager->live_root_ids.insert_at(txn->txnid.parent_id64, idx);
invariant_zero(r);
txn->oldest_referenced_xid = get_oldest_referenced_xid_unlocked(txn_manager);
if (needs_snapshot) {
txn_manager_create_snapshot_unlocked(
txn_manager,
txn
);
}
if (garbage_collection_debug) {
verify_snapshot_system(txn_manager);
}
txn_manager_unlock(txn_manager);
}
TXNID
toku_get_youngest_live_list_txnid_for(TXNID xc, const xid_omt_t &snapshot_txnids, const rx_omt_t &referenced_xids) {
struct referenced_xid_tuple *tuple;
int r;
TXNID rval = TXNID_NONE;
r = referenced_xids.find_zero<TXNID, find_tuple_by_xid>(xc, &tuple, nullptr);
if (r == DB_NOTFOUND) {
goto done;
}
TXNID live;
r = snapshot_txnids.find<TXNID, toku_find_xid_by_xid>(tuple->end_id, -1, &live, nullptr);
if (r == DB_NOTFOUND) {
goto done;
}
invariant(live < tuple->end_id);
if (live > tuple->begin_id) {
rval = live;
}
done:
return rval;
}
void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
int r;
invariant(txn->parent == NULL);
bool is_snapshot = txn_needs_snapshot(txn->snapshot_type, NULL);
txn_manager_lock(txn_manager);
if (garbage_collection_debug) {
verify_snapshot_system(txn_manager);
}
{
//Remove txn from list (omt) of live transactions
TOKUTXN txnagain;
uint32_t idx;
r = txn_manager->live_txns.find_zero<TOKUTXN, find_xid>(txn, &txnagain, &idx);
invariant_zero(r);
invariant(txn==txnagain);
r = txn_manager->live_txns.delete_at(idx);
invariant_zero(r);
}
bool is_snapshot = (txn->snapshot_type != TXN_SNAPSHOT_NONE && (txn->parent==NULL || txn->snapshot_type == TXN_SNAPSHOT_CHILD));
uint32_t index_in_snapshot_txnids;
if (is_snapshot) {
TXNID xid;
//Free memory used for snapshot_txnids
r = txn_manager->snapshot_txnids.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid64, &xid, &index_in_snapshot_txnids);
invariant_zero(r);
invariant(xid == txn->txnid64);
r = txn_manager->snapshot_txnids.delete_at(index_in_snapshot_txnids);
invariant_zero(r);
uint32_t ref_xids_size = txn_manager->referenced_xids.size();
uint32_t live_list_size = txn->live_root_txn_list->size();
if (ref_xids_size > 0 && live_list_size > 0) {
if (live_list_size > ref_xids_size && ref_xids_size < 2000) {
note_snapshot_txn_end_by_txn_live_list(txn_manager, txn->live_root_txn_list);
}
else {
note_snapshot_txn_end_by_ref_xids(txn_manager, *txn->live_root_txn_list);
}
}
//Free memory used for live root txns local list (will happen
//after we unlock the txn_manager_lock)
invariant(txn->live_root_txn_list->size() > 0);
txn_manager_remove_snapshot_unlocked(
txn,
txn_manager,
&index_in_snapshot_txnids
);
}
if (txn->parent==NULL) {
TXNID xid;
uint32_t idx;
//Remove txn from list of live root txns
r = txn_manager->live_root_txns.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid64, &xid, &idx);
invariant_zero(r);
invariant(xid == txn->txnid64);
r = txn_manager->live_root_txns.delete_at(idx);
invariant_zero(r);
uint32_t idx;
//Remove txn from list of live root txns
TOKUTXN txnagain;
r = txn_manager->live_root_txns.find_zero<TOKUTXN, find_xid>(txn, &txnagain, &idx);
invariant_zero(r);
invariant(txn==txnagain);
if (!toku_txn_is_read_only(txn) || garbage_collection_debug) {
if (!is_snapshot) {
// If it's a snapshot, we already calculated index_in_snapshot_txnids.
// Otherwise, calculate it now.
r = txn_manager->snapshot_txnids.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid64, nullptr, &index_in_snapshot_txnids);
invariant(r == DB_NOTFOUND);
}
uint32_t num_references = txn_manager->snapshot_txnids.size() - index_in_snapshot_txnids;
if (num_references > 0) {
// This transaction exists in a live list of another transaction.
struct referenced_xid_tuple tuple = {
.begin_id = txn->txnid64,
.end_id = ++txn_manager->last_xid,
.references = num_references
};
r = txn_manager->referenced_xids.insert<TXNID, find_tuple_by_xid>(tuple, txn->txnid64, nullptr);
lazy_assert_zero(r);
}
r = txn_manager->live_root_txns.delete_at(idx);
invariant_zero(r);
r = txn_manager->live_root_ids.delete_at(idx);
invariant_zero(r);
if (!toku_txn_is_read_only(txn) || garbage_collection_debug) {
if (!is_snapshot) {
//
// If it's a snapshot, we already calculated index_in_snapshot_txnids.
// Otherwise, calculate it now.
//
r = txn_manager->snapshot_txnids.find_zero<TXNID, toku_find_xid_by_xid>(txn->txnid.parent_id64, nullptr, &index_in_snapshot_txnids);
invariant(r == DB_NOTFOUND);
}
uint32_t num_references = txn_manager->snapshot_txnids.size() - index_in_snapshot_txnids;
if (num_references > 0) {
// This transaction exists in a live list of another transaction.
struct referenced_xid_tuple tuple = {
.begin_id = txn->txnid.parent_id64,
.end_id = ++txn_manager->last_xid,
.references = num_references
};
r = txn_manager->referenced_xids.insert<TXNID, find_tuple_by_xid>(tuple, txn->txnid.parent_id64, nullptr);
lazy_assert_zero(r);
}
}
@ -615,23 +642,23 @@ void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
void toku_txn_manager_clone_state_for_gc(
TXN_MANAGER txn_manager,
xid_omt_t &snapshot_xids,
rx_omt_t &referenced_xids,
xid_omt_t &live_root_txns
xid_omt_t* snapshot_xids,
rx_omt_t* referenced_xids,
xid_omt_t* live_root_txns
)
{
txn_manager_lock(txn_manager);
snapshot_xids.clone(txn_manager->snapshot_txnids);
referenced_xids.clone(txn_manager->referenced_xids);
live_root_txns.clone(txn_manager->live_root_txns);
snapshot_xids->clone(txn_manager->snapshot_txnids);
referenced_xids->clone(txn_manager->referenced_xids);
setup_live_root_txn_list(&txn_manager->live_root_ids, live_root_txns);
txn_manager_unlock(txn_manager);
}
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID txnid, TOKUTXN *result) {
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result) {
TOKUTXN txn;
int r = txn_manager->live_txns.find_zero<TXNID, find_by_xid>(txnid, &txn, nullptr);
int r = txn_manager->live_root_txns.find_zero<TXNID, find_by_xid>(txnid.parent_id64, &txn, nullptr);
if (r==0) {
assert(txn->txnid64==txnid);
assert(txn->txnid.parent_id64 == txnid.parent_id64);
*result = txn;
}
else {
@ -641,20 +668,14 @@ void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID txnid, TOKU
}
}
void toku_txn_manager_id2txn(TXN_MANAGER txn_manager, TXNID txnid, TOKUTXN *result) {
txn_manager_lock(txn_manager);
toku_txn_manager_id2txn_unlocked(txn_manager, txnid, result);
txn_manager_unlock(txn_manager);
}
int toku_txn_manager_get_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp) {
int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp) {
txn_manager_lock(txn_manager);
int ret_val = 0;
int num_live_txns = txn_manager->live_txns.size();
int num_live_txns = txn_manager->live_root_txns.size();
for (int i = 0; i < num_live_txns; i++) {
TOKUTXN txn;
{
int r = txn_manager->live_txns.fetch(i, &txn);
int r = txn_manager->live_root_txns.fetch(i, &txn);
assert_zero(r);
}
if (txn->xa_xid.formatID == xid->formatID
@ -672,94 +693,70 @@ exit:
return ret_val;
}
uint32_t toku_txn_manager_num_live_txns(TXN_MANAGER txn_manager) {
uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager) {
int ret_val = 0;
txn_manager_lock(txn_manager);
ret_val = txn_manager->live_txns.size();
ret_val = txn_manager->live_root_txns.size();
txn_manager_unlock(txn_manager);
return ret_val;
}
void toku_txn_manager_add_prepared_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
txn_manager_lock(txn_manager);
assert(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_PREPARING; // This state transition must be protected against begin_checkpoint
toku_list_push(&txn_manager->prepared_txns, &txn->prepared_txns_link);
txn_manager_unlock(txn_manager);
static int txn_manager_iter(
TXN_MANAGER txn_manager,
txn_mgr_iter_callback cb,
void* extra,
bool just_root_txns
)
{
int r = 0;
toku_mutex_lock(&txn_manager->txn_manager_lock);
uint32_t size = txn_manager->live_root_txns.size();
for (uint32_t i = 0; i < size; i++) {
TOKUTXN curr_txn = NULL;
r = txn_manager->live_root_txns.fetch(i, &curr_txn);
assert_zero(r);
if (just_root_txns) {
r = cb(curr_txn, extra);
}
else {
r = curr_txn->child_manager->iterate(cb, extra);
}
if (r) {
break;
}
}
toku_mutex_unlock(&txn_manager->txn_manager_lock);
return r;
}
static void invalidate_xa_xid (TOKU_XA_XID *xid) {
TOKU_ANNOTATE_NEW_MEMORY(xid, sizeof(*xid)); // consider it to be all invalid for valgrind
xid->formatID = -1; // According to the XA spec, -1 means "invalid data"
int toku_txn_manager_iter_over_live_txns(
TXN_MANAGER txn_manager,
txn_mgr_iter_callback cb,
void* extra
)
{
return txn_manager_iter(
txn_manager,
cb,
extra,
false
);
}
void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
// Purpose:
// Delay until any indexer is done pinning this transaction.
// Update status of a transaction from live->aborting (or prepared->aborting)
// Do so in a thread-safe manner that does not conflict with hot indexing or
// begin checkpoint.
if (toku_txn_is_read_only(txn)) {
// Neither hot indexing nor checkpoint do any work with readonly txns,
// so we can skip taking the txn_manager lock here.
invariant(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_ABORTING;
goto done;
}
txn_manager_lock(txn_manager);
if (txn->state==TOKUTXN_PREPARING) {
invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_ABORTING;
txn_manager_unlock(txn_manager);
done:
return;
int toku_txn_manager_iter_over_live_root_txns(
TXN_MANAGER txn_manager,
txn_mgr_iter_callback cb,
void* extra
)
{
return txn_manager_iter(
txn_manager,
cb,
extra,
true
);
}
void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn) {
// Purpose:
// Delay until any indexer is done pinning this transaction.
// Update status of a transaction from live->committing (or prepared->committing)
// Do so in a thread-safe manner that does not conflict with hot indexing or
// begin checkpoint.
if (toku_txn_is_read_only(txn)) {
// Neither hot indexing nor checkpoint do any work with readonly txns,
// so we can skip taking the txn_manager lock here.
invariant(txn->state==TOKUTXN_LIVE);
txn->state = TOKUTXN_COMMITTING;
goto done;
}
txn_manager_lock(txn_manager);
if (txn->state==TOKUTXN_PREPARING) {
invalidate_xa_xid(&txn->xa_xid);
toku_list_remove(&txn->prepared_txns_link);
}
// for hot indexing, if hot index is processing
// this transaction in some leafentry, then we cannot change
// the state to commit or abort until
// hot index is done with that leafentry
while (txn->num_pin > 0) {
toku_cond_wait(
&txn_manager->wait_for_unpin_of_txn,
&txn_manager->txn_manager_lock
);
}
txn->state = TOKUTXN_COMMITTING;
txn_manager_unlock(txn_manager);
done:
return;
}
//
// This function is called only via env_txn_xa_recover and env_txn_recover.
@ -768,11 +765,16 @@ done:
// quiescant, in that we are right after recovery and before user operations
// commence.
//
// Another key assumption made here is that only root transactions
// may be prepared and that child transactions cannot be prepared.
// This assumption is made by the fact that we iterate over the live root txns
// to find prepared transactions.
//
// I (Zardosht), don't think we take advantage of this fact, as we are holding
// the txn_manager_lock in this function, but in the future we might want
// to take these assumptions into account.
//
int toku_txn_manager_recover_txn (
int toku_txn_manager_recover_root_txn (
TXN_MANAGER txn_manager,
struct tokulogger_preplist preplist[/*count*/],
long count,
@ -782,60 +784,47 @@ int toku_txn_manager_recover_txn (
{
int ret_val = 0;
txn_manager_lock(txn_manager);
uint32_t num_txns_returned = 0;
// scan through live root txns to find
// prepared transactions and return them
uint32_t size = txn_manager->live_root_txns.size();
if (flags==DB_FIRST) {
// Anything in the returned list goes back on the prepared list.
while (!toku_list_empty(&txn_manager->prepared_and_returned_txns)) {
struct toku_list *h = toku_list_head(&txn_manager->prepared_and_returned_txns);
toku_list_remove(h);
toku_list_push(&txn_manager->prepared_txns, h);
}
} else if (flags!=DB_NEXT) {
txn_manager->last_xid_seen_for_recover = TXNID_NONE;
}
else if (flags!=DB_NEXT) {
ret_val = EINVAL;
goto exit;
}
long i;
for (i=0; i<count; i++) {
if (!toku_list_empty(&txn_manager->prepared_txns)) {
struct toku_list *h = toku_list_head(&txn_manager->prepared_txns);
toku_list_remove(h);
toku_list_push(&txn_manager->prepared_and_returned_txns, h);
TOKUTXN txn = toku_list_struct(h, struct tokutxn, prepared_txns_link);
assert(txn->container_db_txn);
preplist[i].txn = txn->container_db_txn;
preplist[i].xid = txn->xa_xid;
} else {
for (uint32_t i = 0; i < size; i++) {
TOKUTXN curr_txn = NULL;
txn_manager->live_root_txns.fetch(i, &curr_txn);
// skip over TOKUTXNs whose txnid64 is too small, meaning
// we have already processed them.
if (curr_txn->txnid.parent_id64 <= txn_manager->last_xid_seen_for_recover) {
continue;
}
if (curr_txn->state == TOKUTXN_PREPARING) {
assert(curr_txn->container_db_txn);
preplist[num_txns_returned].txn = curr_txn->container_db_txn;
preplist[num_txns_returned].xid = curr_txn->xa_xid;
txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64;
num_txns_returned++;
}
txn_manager->last_xid_seen_for_recover = curr_txn->txnid.parent_id64;
// if we found the maximum number of prepared transactions we are
// allowed to find, then break
if (num_txns_returned >= count) {
break;
}
}
*retp = i;
invariant(num_txns_returned <= count);
*retp = num_txns_returned;
ret_val = 0;
exit:
txn_manager_unlock(txn_manager);
return ret_val;
}
// needed for hot indexing
// prevents a client thread from transitioning txn from LIVE|PREPAREING -> COMMITTING|ABORTING
// hot indexing may need a transactions to stay in the LIVE|PREPARING state while it processes
// a leafentry.
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER UU(txn_manager), TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(!toku_txn_is_read_only(txn));
txn->num_pin++;
}
// allows a client thread to go back to being able to transition txn
// from LIVE|PREPAREING -> COMMITTING|ABORTING
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn) {
assert(txn->state == TOKUTXN_LIVE || txn->state == TOKUTXN_PREPARING);
assert(txn->num_pin > 0);
txn->num_pin--;
if (txn->num_pin == 0) {
toku_cond_broadcast(&txn_manager->wait_for_unpin_of_txn);
}
}
static void txn_manager_lock(TXN_MANAGER txn_manager) {
toku_mutex_lock(&txn_manager->txn_manager_lock);
}
@ -871,6 +860,15 @@ toku_txn_manager_get_last_xid(TXN_MANAGER mgr) {
return last_xid;
}
bool
toku_txn_manager_txns_exist(TXN_MANAGER mgr) {
txn_manager_lock(mgr);
bool retval = mgr->live_root_txns.size() > 0;
txn_manager_unlock(mgr);
return retval;
}
// Test-only function
void
toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment) {

View file

@ -25,17 +25,15 @@ typedef toku::omt<struct referenced_xid_tuple, struct referenced_xid_tuple *> rx
struct txn_manager {
toku_mutex_t txn_manager_lock; // a lock protecting this object
txn_omt_t live_txns; // a sorted tree. Old comment said should be a hashtable. Do we still want that?
xid_omt_t live_root_txns; // a sorted tree.
txn_omt_t live_root_txns; // a sorted tree.
xid_omt_t live_root_ids; //contains TXNID x | x is snapshot txn
xid_omt_t snapshot_txnids; //contains TXNID x | x is snapshot txn
// Contains 3-tuples: (TXNID begin_id, TXNID end_id, uint64_t num_live_list_references)
// for committed root transaction ids that are still referenced by a live list.
rx_omt_t referenced_xids;
struct toku_list prepared_txns; // transactions that have been prepared and are unresolved, but have not been returned through txn_recover.
struct toku_list prepared_and_returned_txns; // transactions that have been prepared and unresolved, and have been returned through txn_recover. We need this list so that we can restart the recovery.
toku_cond_t wait_for_unpin_of_txn;
TXNID last_xid;
TXNID last_xid_seen_for_recover;
};
@ -44,53 +42,64 @@ void toku_txn_manager_destroy(TXN_MANAGER txn_manager);
TXNID toku_txn_manager_get_oldest_living_xid(TXN_MANAGER txn_manager);
// Assign a txnid. Log the txn begin in the recovery log. Initialize the txn live lists.
// Also, create the txn.
int toku_txn_manager_start_txn(
TOKUTXN *txnp,
void toku_txn_manager_handle_snapshot_create_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TOKUTXN parent,
TOKULOGGER logger,
TXNID xid,
TXN_SNAPSHOT_TYPE snapshot_type,
DB_TXN *container_db_txn,
bool for_recovery);
TXN_SNAPSHOT_TYPE snapshot_type
);
void toku_txn_manager_handle_snapshot_destroy_for_child_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXN_SNAPSHOT_TYPE snapshot_type
);
// Assign a txnid. Log the txn begin in the recovery log. Initialize the txn live lists.
void toku_txn_manager_start_txn(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXN_SNAPSHOT_TYPE snapshot_type
);
void toku_txn_manager_start_txn_for_recovery(
TOKUTXN txn,
TXN_MANAGER txn_manager,
TXNID xid
);
void toku_txn_manager_finish_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_clone_state_for_gc(
TXN_MANAGER txn_manager,
xid_omt_t &snapshot_xids,
rx_omt_t &referenced_xids,
xid_omt_t &live_root_txns
xid_omt_t* snapshot_xids,
rx_omt_t* referenced_xids,
xid_omt_t* live_root_txns
);
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID txnid, TOKUTXN *result);
void toku_txn_manager_id2txn (TXN_MANAGER txn_manager, TXNID txnid, TOKUTXN *result);
void toku_txn_manager_id2txn_unlocked(TXN_MANAGER txn_manager, TXNID_PAIR txnid, TOKUTXN *result);
int toku_txn_manager_get_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp);
// Returns a root txn associated with xid. The system as a whole
// assumes that only root txns get prepared, adn therefore only
// root txns will have XIDs associated with them.
int toku_txn_manager_get_root_txn_from_xid (TXN_MANAGER txn_manager, TOKU_XA_XID *xid, DB_TXN **txnp);
uint32_t toku_txn_manager_num_live_txns(TXN_MANAGER txn_manager);
uint32_t toku_txn_manager_num_live_root_txns(TXN_MANAGER txn_manager);
typedef int (*txn_mgr_iter_callback)(TOKUTXN txn, void* extra);
template<typename iterate_extra_t,
int (*f)(const TOKUTXN &, const uint32_t, iterate_extra_t *const)>
int toku_txn_manager_iter_over_live_txns(
TXN_MANAGER txn_manager,
iterate_extra_t *const v
)
{
int r = 0;
toku_mutex_lock(&txn_manager->txn_manager_lock);
r = txn_manager->live_txns.iterate<iterate_extra_t, f>(v);
toku_mutex_unlock(&txn_manager->txn_manager_lock);
return r;
}
txn_mgr_iter_callback cb,
void* extra
);
void toku_txn_manager_add_prepared_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_note_abort_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_note_commit_txn(TXN_MANAGER txn_manager, TOKUTXN txn);
int toku_txn_manager_iter_over_live_root_txns(
TXN_MANAGER txn_manager,
txn_mgr_iter_callback cb,
void* extra
);
int toku_txn_manager_recover_txn(
int toku_txn_manager_recover_root_txn(
TXN_MANAGER txn_manager,
struct tokulogger_preplist preplist[/*count*/],
long count,
@ -98,9 +107,6 @@ int toku_txn_manager_recover_txn(
uint32_t flags
);
void toku_txn_manager_pin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_unpin_live_txn_unlocked(TXN_MANAGER txn_manager, TOKUTXN txn);
void toku_txn_manager_suspend(TXN_MANAGER txn_manager);
void toku_txn_manager_resume(TXN_MANAGER txn_manager);
@ -108,6 +114,8 @@ void toku_txn_manager_set_last_xid_from_logger(TXN_MANAGER txn_manager, TXNID la
void toku_txn_manager_set_last_xid_from_recovered_checkpoint(TXN_MANAGER txn_manager, TXNID last_xid);
TXNID toku_txn_manager_get_last_xid(TXN_MANAGER mgr);
bool toku_txn_manager_txns_exist(TXN_MANAGER mgr);
// Test-only function
void toku_txn_manager_increase_last_xid(TXN_MANAGER mgr, uint64_t increment);

117
ft/ule.cc
View file

@ -45,9 +45,9 @@ static uint32_t ule_get_innermost_numbytes(ULE ule);
static LE_STATUS_S le_status;
#define STATUS_INIT(k,t,l) { \
le_status.status[k].keyname = #k; \
le_status.status[k].type = t; \
le_status.status[k].legend = "le: " l; \
le_status.status[k].keyname = #k; \
le_status.status[k].type = t; \
le_status.status[k].legend = "le: " l; \
}
static void
@ -65,7 +65,7 @@ status_init(void) {
void
toku_le_get_status(LE_STATUS statp) {
if (!le_status.initialized)
status_init();
status_init();
*statp = le_status;
}
@ -157,9 +157,9 @@ le_malloc(OMT *omtp, struct mempool *mp, size_t size, void **maybe_free)
{
void * rval;
if (omtp)
rval = mempool_malloc_from_omt(omtp, mp, size, maybe_free);
rval = mempool_malloc_from_omt(omtp, mp, size, maybe_free);
else
rval = toku_xmalloc(size);
rval = toku_xmalloc(size);
resource_assert(rval);
return rval;
}
@ -375,7 +375,7 @@ toku_le_apply_msg(FT_MSG msg, // message to apply to leafentry
msg_init_empty_ule(&ule, msg);
} else {
le_unpack(&ule, old_leafentry); // otherwise unpack leafentry
oldnumbytes = ule_get_innermost_numbytes(&ule);
oldnumbytes = ule_get_innermost_numbytes(&ule);
}
msg_modify_ule(&ule, msg); // modify unpacked leafentry
ule_simple_garbage_collection(&ule, oldest_referenced_xid, gc_info);
@ -387,7 +387,7 @@ toku_le_apply_msg(FT_MSG msg, // message to apply to leafentry
maybe_free);
invariant_zero(rval);
if (new_leafentry_p) {
newnumbytes = ule_get_innermost_numbytes(&ule);
newnumbytes = ule_get_innermost_numbytes(&ule);
}
*numbytes_delta_p = newnumbytes - oldnumbytes;
ule_cleanup(&ule);
@ -772,13 +772,13 @@ uxr_unpack_data(UXR uxr, uint8_t *p) {
static inline void
update_le_status(ULE ule, size_t memsize) {
if (ule->num_cuxrs > STATUS_VALUE(LE_MAX_COMMITTED_XR))
STATUS_VALUE(LE_MAX_COMMITTED_XR) = ule->num_cuxrs;
STATUS_VALUE(LE_MAX_COMMITTED_XR) = ule->num_cuxrs;
if (ule->num_puxrs > STATUS_VALUE(LE_MAX_PROVISIONAL_XR))
STATUS_VALUE(LE_MAX_PROVISIONAL_XR) = ule->num_puxrs;
STATUS_VALUE(LE_MAX_PROVISIONAL_XR) = ule->num_puxrs;
if (ule->num_cuxrs > MAX_TRANSACTION_RECORDS)
STATUS_VALUE(LE_EXPANDED)++;
STATUS_VALUE(LE_EXPANDED)++;
if (memsize > STATUS_VALUE(LE_MAX_MEMSIZE))
STATUS_VALUE(LE_MAX_MEMSIZE) = memsize;
STATUS_VALUE(LE_MAX_MEMSIZE) = memsize;
}
// Purpose is to return a newly allocated leaf entry in packed format, or
@ -789,11 +789,11 @@ update_le_status(ULE ule, size_t memsize) {
// Transaction records in ule are stored outer to inner (uxr[0] is outermost).
int
le_pack(ULE ule, // data to be packed into new leafentry
size_t *new_leafentry_memorysize,
LEAFENTRY * const new_leafentry_p, // this is what this function creates
OMT *omtp,
struct mempool *mp,
void **maybe_free)
size_t *new_leafentry_memorysize,
LEAFENTRY * const new_leafentry_p, // this is what this function creates
OMT* omtp,
struct mempool *mp,
void **maybe_free)
{
invariant(ule->num_cuxrs > 0);
invariant(ule->uxrs[0].xid == TXNID_NONE);
@ -1517,7 +1517,7 @@ ule_do_implicit_promotions(ULE ule, XIDS xids) {
//No commits necessary if everything is already committed.
if (ule->num_puxrs > 0) {
int num_xids = xids_get_num_xids(xids);
invariant(num_xids>0); // TODO: If loader/2440 become MVCC happy (instead of 'errors'/etc) we may need to support committed messages.
invariant(num_xids>0);
uint32_t max_index = ule->num_cuxrs + min_i32(ule->num_puxrs, num_xids) - 1;
uint32_t ica_index = max_index;
uint32_t index;
@ -1632,10 +1632,19 @@ ule_apply_delete(ULE ule, XIDS xids) {
static void
ule_prepare_for_new_uxr(ULE ule, XIDS xids) {
TXNID this_xid = xids_get_innermost_xid(xids);
if (ule_get_innermost_xid(ule) == this_xid)
//This is for LOADER_USE_PUTS or transactionless environment
//where messages use XIDS of 0
if (this_xid == TXNID_NONE && ule_get_innermost_xid(ule) == TXNID_NONE) {
ule_remove_innermost_uxr(ule);
else
}
// case where we are transactional and xids stack matches ule stack
else if (ule->num_puxrs > 0 && ule_get_innermost_xid(ule) == this_xid) {
ule_remove_innermost_uxr(ule);
}
// case where we are transactional and xids stack does not match ule stack
else {
ule_add_placeholders(ule, xids);
}
}
// Purpose is to apply an abort message to this leafentry.
@ -1650,7 +1659,11 @@ ule_apply_abort(ULE ule, XIDS xids) {
TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction doing this abort
invariant(this_xid!=TXNID_NONE);
UXR innermost = ule_get_innermost_uxr(ule);
if (innermost->xid == this_xid) {
// need to check for provisional entries in ule, otherwise
// there is nothing to abort, not checking this may result
// in a bug where the most recently committed has same xid
// as the XID's innermost
if (ule->num_puxrs > 0 && innermost->xid == this_xid) {
invariant(ule->num_puxrs>0);
ule_remove_innermost_uxr(ule);
ule_remove_innermost_placeholders(ule);
@ -1676,8 +1689,12 @@ ule_apply_broadcast_commit_all (ULE ule) {
void ule_apply_commit(ULE ule, XIDS xids) {
TXNID this_xid = xids_get_innermost_xid(xids); // xid of transaction committing
invariant(this_xid!=TXNID_NONE);
if (ule_get_innermost_xid(ule) == this_xid) {
//3 cases:
// need to check for provisional entries in ule, otherwise
// there is nothing to abort, not checking this may result
// in a bug where the most recently committed has same xid
// as the XID's innermost
if (ule->num_puxrs > 0 && ule_get_innermost_xid(ule) == this_xid) {
// 3 cases:
//1- it's already a committed value (do nothing) (num_puxrs==0)
//2- it's provisional but root level (make a new committed value (num_puxrs==1)
//3- it's provisional and not root (promote); (num_puxrs>1)
@ -1802,17 +1819,6 @@ ule_remove_innermost_placeholders(ULE ule) {
}
}
static uint8_t
outermost_xid_not_in_ule(ULE ule, XIDS xids) {
uint8_t index = 0;
invariant(ule->num_puxrs < xids_get_num_xids(xids));
if (ule->num_puxrs) {
TXNID ule_xid = ule_get_innermost_xid(ule); // xid of ica
index = xids_find_index_of_xid(xids, ule_xid) + 1;
}
return index;
}
// Purpose is to add placeholders to the top of the leaf stack (the innermost
// recorded transactions), if necessary. This function is idempotent.
// Note, after placeholders are added, an insert or delete will be added. This
@ -1822,17 +1828,22 @@ static void
ule_add_placeholders(ULE ule, XIDS xids) {
//Placeholders can be placed on top of the committed uxr.
invariant(ule->num_cuxrs > 0);
TXNID ica_xid = ule_get_innermost_xid(ule); // xid of ica
TXNID this_xid = xids_get_innermost_xid(xids); // xid of this transaction
invariant(this_xid!=TXNID_NONE);
if (ica_xid != this_xid) { // if this transaction is the ICA, don't push any placeholders
uint8_t index = outermost_xid_not_in_ule(ule, xids);
TXNID current_msg_xid = xids_get_xid(xids, index);
while (current_msg_xid != this_xid) { // Placeholder for each transaction before this transaction
ule_push_placeholder_uxr(ule, current_msg_xid);
index++;
current_msg_xid = xids_get_xid(xids, index);
}
uint32_t num_xids = xids_get_num_xids(xids);
// we assume that implicit promotion has happened
// when we get this call, so the number of xids MUST
// be greater than the number of provisional entries
invariant(num_xids >= ule->num_puxrs);
// make sure that the xids stack matches up to a certain amount
// this first for loop is just debug code
for (uint32_t i = 0; i < ule->num_puxrs; i++) {
TXNID current_msg_xid = xids_get_xid(xids, i);
TXNID current_ule_xid = ule_get_xid(ule, i + ule->num_cuxrs);
invariant(current_msg_xid == current_ule_xid);
}
for (uint32_t i = ule->num_puxrs; i < num_xids-1; i++) {
TXNID current_msg_xid = xids_get_xid(xids, i);
ule_push_placeholder_uxr(ule, current_msg_xid);
}
}
@ -1886,10 +1897,10 @@ ule_get_innermost_numbytes(ULE ule) {
uint32_t rval;
UXR uxr = ule_get_innermost_uxr(ule);
if (uxr_is_delete(uxr))
rval = 0;
rval = 0;
else {
rval = uxr_get_vallen(uxr);
rval += ule_get_keylen(ule);
rval = uxr_get_vallen(uxr);
rval += ule_get_keylen(ule);
}
return rval;
}
@ -2378,10 +2389,10 @@ leafentry_disksize_13(LEAFENTRY_13 le) {
int
toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
size_t *new_leafentry_memorysize,
LEAFENTRY *new_leafentry_p,
OMT *omtp,
struct mempool *mp) {
size_t *new_leafentry_memorysize,
LEAFENTRY *new_leafentry_p,
OMT* omtp,
struct mempool *mp) {
ULE_S ule;
int rval;
invariant(old_leafentry);
@ -2392,7 +2403,7 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
rval = le_pack(&ule, // create packed leafentry
new_leafentry_memorysize,
new_leafentry_p,
omtp, mp, NULL);
omtp, mp, NULL);
ule_cleanup(&ule);
return rval;
}

View file

@ -186,6 +186,12 @@ static inline void wbuf_nocrc_TXNID (struct wbuf *w, TXNID tid) {
wbuf_nocrc_ulonglong(w, tid);
}
static inline void wbuf_nocrc_TXNID_PAIR (struct wbuf *w, TXNID_PAIR tid) {
wbuf_nocrc_ulonglong(w, tid.parent_id64);
wbuf_nocrc_ulonglong(w, tid.child_id64);
}
static inline void wbuf_TXNID (struct wbuf *w, TXNID tid) {
wbuf_ulonglong(w, tid);
}

View file

@ -88,8 +88,6 @@ void
xids_finalize_with_child(XIDS xids, TXNID this_xid) {
// Precondition:
// - xids was created by xids_create_unknown_child
// - All error checking (except that this_xid is higher than its parent) is already complete
invariant(this_xid > xids_get_innermost_xid(xids));
TXNID this_xid_disk = toku_htod64(this_xid);
uint32_t num_child_xids = ++xids->num_xids;
xids->ids[num_child_xids - 1] = this_xid_disk;
@ -118,8 +116,6 @@ xids_create_from_buffer(struct rbuf *rb, // xids list for parent transaction
uint8_t index;
for (index = 0; index < xids->num_xids; index++) {
rbuf_TXNID(rb, &xids->ids[index]);
if (index > 0)
assert(xids->ids[index] > xids->ids[index-1]);
}
*xids_p = xids;
}
@ -143,20 +139,6 @@ xids_get_xid(XIDS xids, uint8_t index) {
return rval;
}
// This function assumes that target_xid IS in the list
// of xids.
uint8_t
xids_find_index_of_xid(XIDS xids, TXNID target_xid) {
uint8_t index = 0; // search outer to inner
TXNID current_xid = xids_get_xid(xids, index);
while (current_xid != target_xid) {
invariant(current_xid < target_xid);
index++;
current_xid = xids_get_xid(xids, index); // Next inner txnid in xids.
}
return index;
}
uint8_t
xids_get_num_xids(XIDS xids) {
uint8_t rval = xids->num_xids;

View file

@ -45,8 +45,6 @@ void xids_destroy(XIDS *xids_p);
TXNID xids_get_xid(XIDS xids, uint8_t index);
uint8_t xids_find_index_of_xid(XIDS xids, TXNID target_xid);
uint8_t xids_get_num_xids(XIDS xids);
TXNID xids_get_innermost_xid(XIDS xids);

View file

@ -177,6 +177,15 @@ typedef struct toku_cond {
pthread_cond_t pcond;
} toku_cond_t;
// Different OSes implement mutexes as different amounts of nested structs.
// C++ will fill out all missing values with zeroes if you provide at least one zero, but it needs the right amount of nesting.
#if defined(__FreeBSD__)
# define ZERO_COND_INITIALIZER {0}
#elif defined(__APPLE__)
# define ZERO_COND_INITIALIZER {{0}}
#else // __linux__, at least
# define ZERO_COND_INITIALIZER {{{0}}}
#endif
#define TOKU_COND_INITIALIZER {PTHREAD_COND_INITIALIZER}
static inline void

View file

@ -184,28 +184,14 @@ static void release_txns(
)
{
uint32_t num_provisional = ule_get_num_provisional(ule);
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
bool some_txn_pinned = false;
if (indexer->i->test_xid_state) {
goto exit;
}
// see if any txn pinned before bothering to grab txn_manager lock
for (uint32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_PREPARING) {
assert(prov_txns[i]);
some_txn_pinned = true;
toku_txn_unpin_live_txn(prov_txns[i]);
}
}
if (some_txn_pinned) {
toku_txn_manager_suspend(txn_manager);
for (uint32_t i = 0; i < num_provisional; i++) {
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_PREPARING) {
toku_txn_manager_unpin_live_txn_unlocked(txn_manager, prov_txns[i]);
}
}
toku_txn_manager_resume(txn_manager);
}
exit:
return;
}

View file

@ -330,38 +330,76 @@ indexer_fill_prov_info(DB_INDEXER *indexer, struct ule_prov_info *prov_info) {
return;
}
// handle test case first
if (indexer->i->test_xid_state) {
for (uint32_t i = 0; i < num_provisional; i++) {
UXRHANDLE uxr = ule_get_uxr(ule, num_committed + i);
prov_ids[i] = uxr_get_txnid(uxr);
prov_states[i] = indexer->i->test_xid_state(indexer, prov_ids[i]);
prov_txns[i] = NULL;
}
return;
}
// hold the txn manager lock while we inspect txn state
// and pin some live txns
DB_ENV *env = indexer->i->env;
TXN_MANAGER txn_manager = toku_logger_get_txn_manager(env->i->logger);
toku_txn_manager_suspend(txn_manager);
TXNID parent_xid = uxr_get_txnid(ule_get_uxr(ule, num_committed));
// let's first initialize things to defaults
for (uint32_t i = 0; i < num_provisional; i++) {
UXRHANDLE uxr = ule_get_uxr(ule, num_committed + i);
prov_ids[i] = uxr_get_txnid(uxr);
if (indexer->i->test_xid_state) {
prov_states[i] = indexer->i->test_xid_state(indexer, prov_ids[i]);
prov_txns[i] = NULL;
prov_txns[i] = NULL;
prov_states[i] = TOKUTXN_RETIRED;
}
toku_txn_manager_suspend(txn_manager);
TXNID_PAIR root_xid_pair = {.parent_id64=parent_xid, .child_id64 = TXNID_NONE};
TOKUTXN root_txn = NULL;
toku_txn_manager_id2txn_unlocked(
txn_manager,
root_xid_pair,
&root_txn
);
if (root_txn == NULL) {
toku_txn_manager_resume(txn_manager);
return; //everything is retired in this case, the default
}
prov_txns[0] = root_txn;
prov_states[0] = toku_txn_get_state(root_txn);
toku_txn_lock_state(root_txn);
prov_states[0] = toku_txn_get_state(root_txn);
if (prov_states[0] == TOKUTXN_LIVE || prov_states[0] == TOKUTXN_PREPARING) {
// pin this live txn so it can't commit or abort until we're done with it
toku_txn_pin_live_txn_unlocked(root_txn);
}
toku_txn_unlock_state(root_txn);
root_txn->child_manager->suspend();
for (uint32_t i = 1; i < num_provisional; i++) {
UXRHANDLE uxr = ule_get_uxr(ule, num_committed + i);
TXNID child_id = uxr_get_txnid(uxr);
TOKUTXN txn = NULL;
TXNID_PAIR txnid_pair = {.parent_id64 = parent_xid, .child_id64 = child_id};
root_txn->child_manager->find_tokutxn_by_xid_unlocked(txnid_pair, &txn);
prov_txns[i] = txn;
if (txn) {
toku_txn_lock_state(txn);
prov_states[i] = toku_txn_get_state(txn);
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_PREPARING) {
// pin this live txn so it can't commit or abort until we're done with it
toku_txn_pin_live_txn_unlocked(txn);
}
toku_txn_unlock_state(txn);
}
else {
TOKUTXN txn = NULL;
toku_txn_manager_id2txn_unlocked(
txn_manager,
prov_ids[i],
&txn
);
prov_txns[i] = txn;
if (txn) {
prov_states[i] = toku_txn_get_state(txn);
if (prov_states[i] == TOKUTXN_LIVE || prov_states[i] == TOKUTXN_PREPARING) {
// pin this live txn so it can't commit or abort until we're done with it
toku_txn_manager_pin_live_txn_unlocked(txn_manager, txn);
}
}
else {
prov_states[i] = TOKUTXN_RETIRED;
}
prov_states[i] = TOKUTXN_RETIRED;
}
}
root_txn->child_manager->resume();
toku_txn_manager_resume(txn_manager);
}

View file

@ -91,6 +91,7 @@ if(BUILD_TESTING OR BUILD_SRC_TESTS)
multiprocess
mvcc-create-table
mvcc-many-committed
mvcc-read-committed
perf_checkpoint_var
perf_child_txn
perf_cursor_nop

View file

@ -0,0 +1,64 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id: mvcc-create-table.cc 48377 2012-09-27 18:14:35Z leifwalsh $"
#ident "Copyright (c) 2007-2012 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
// Test that isolation works right for subtransactions.
// In particular, check to see what happens if a subtransaction has different isolation level from its parent.
#include "test.h"
const int envflags = DB_INIT_MPOOL|DB_CREATE|DB_THREAD |DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_TXN|DB_PRIVATE;
int test_main (int argc, char * const argv[]) {
parse_args(argc, argv);
int r;
r = system("rm -rf " ENVDIR);
CKERR(r);
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
DB_ENV *env;
r = db_env_create(&env, 0); CKERR(r);
env->set_errfile(env, stderr);
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
DB *db;
DB_TXN* txna = NULL;
DB_TXN* txnb = NULL;
DB_TXN* txnc = NULL;
DB_TXN* txnb_child = NULL;
DB_TXN* txnc_child = NULL;
r = env->txn_begin(env, NULL, &txna, 0); CKERR(r);
r = db_create(&db, env, 0); CKERR(r);
r = db->open(db, txna, "foo.db", NULL, DB_BTREE, DB_CREATE, 0666); CKERR(r);
r = txna->commit(txna, 0); CKERR(r);
r = env->txn_begin(env, NULL, &txnb, DB_TXN_SNAPSHOT); CKERR(r);
r = env->txn_begin(env, NULL, &txnc, DB_READ_COMMITTED); CKERR(r);
DBT key,val;
r = env->txn_begin(env, NULL, &txna, 0); CKERR(r);
r = db->put(db, txna, dbt_init(&key, "a", 2), dbt_init(&val, "a", 2), 0); CKERR(r);
r = txna->commit(txna, 0); CKERR(r);
// do a simple test to show that DB_TXN_SNAPSHOT and DB_READ_COMMITTED
// work differently
r = env->txn_begin(env, txnb, &txnb_child, DB_TXN_SNAPSHOT); CKERR(r);
r = env->txn_begin(env, txnc, &txnc_child, DB_READ_COMMITTED); CKERR(r);
r = db->get(db, txnb_child, &key, &val, 0);
CKERR2(r, DB_NOTFOUND);
r = db->get(db, txnc_child, &key, &val, 0);
CKERR(r);
r = txnb_child->commit(txnb_child, 0); CKERR(r);
r = txnc_child->commit(txnc_child, 0); CKERR(r);
r = txnb->commit(txnb, 0); CKERR(r);
r = txnc->commit(txnc, 0); CKERR(r);
r = db->close(db, 0); CKERR(r);
r = env->close(env, 0); CKERR(r);
return 0;
}

View file

@ -47,9 +47,9 @@ do_x1_shutdown (bool do_commit, bool do_abort) {
DBT a,b;
dbt_init(&a, "a", 2);
dbt_init(&b, "b", 2);
r = dba->put(dba, txn, &a, &b, 0); CKERR(r);
r = env->txn_checkpoint(env, 0, 0, 0); CKERR(r);
r = dbb->put(dbb, txn, &b, &a, 0); CKERR(r);
r = dba->put(dba, txn, &a, &b, 0); CKERR(r);
r = env->txn_checkpoint(env, 0, 0, 0); CKERR(r);
r = dbb->put(dbb, txn, &b, &a, 0); CKERR(r);
}
//printf("opened\n");
r = txn->commit(txn, 0); CKERR(r);
@ -91,28 +91,28 @@ do_x1_recover (bool did_commit) {
int ra = ca->c_get(ca, &aa, &ab, DB_FIRST); CKERR(r);
int rb = cb->c_get(cb, &ba, &bb, DB_FIRST); CKERR(r);
if (did_commit) {
assert(ra==0);
assert(rb==0);
// verify key-value pairs
assert(aa.size==2);
assert(ab.size==2);
assert(ba.size==2);
assert(bb.size==2);
const char a[2] = "a";
const char b[2] = "b";
assert(ra==0);
assert(rb==0);
// verify key-value pairs
assert(aa.size==2);
assert(ab.size==2);
assert(ba.size==2);
assert(bb.size==2);
const char a[2] = "a";
const char b[2] = "b";
assert(memcmp(aa.data, &a, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(memcmp(ab.data, &b, 2)==0);
assert(memcmp(bb.data, &a, 2)==0);
// make sure no other entries in DB
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == DB_NOTFOUND);
assert(cb->c_get(cb, &ba, &bb, DB_NEXT) == DB_NOTFOUND);
fprintf(stderr, "Both verified. Yay!\n");
// make sure no other entries in DB
assert(ca->c_get(ca, &aa, &ab, DB_NEXT) == DB_NOTFOUND);
assert(cb->c_get(cb, &ba, &bb, DB_NEXT) == DB_NOTFOUND);
fprintf(stderr, "Both verified. Yay!\n");
} else {
// It wasn't committed (it also wasn't aborted), but a checkpoint happened.
assert(ra==DB_NOTFOUND);
assert(rb==DB_NOTFOUND);
fprintf(stderr, "Neither present. Yay!\n");
// It wasn't committed (it also wasn't aborted), but a checkpoint happened.
assert(ra==DB_NOTFOUND);
assert(rb==DB_NOTFOUND);
fprintf(stderr, "Neither present. Yay!\n");
}
r = ca->c_close(ca); CKERR(r);
r = cb->c_close(cb); CKERR(r);
@ -142,32 +142,32 @@ static void
do_test_internal (bool commit) {
pid_t pid;
if (0 == (pid=fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--commit" : "--abort", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--commit" : "--abort", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("signaled=%d sig=%d\n", WIFSIGNALED(status), WTERMSIG(status));
assert(WIFSIGNALED(status) && WTERMSIG(status)==SIGABRT);
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("signaled=%d sig=%d\n", WIFSIGNALED(status), WTERMSIG(status));
assert(WIFSIGNALED(status) && WTERMSIG(status)==SIGABRT);
}
// Now find out what happend
if (0 == (pid = fork())) {
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--recover-committed" : "--recover-aborted", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
int r=execl(cmd, verbose ? "-v" : "-q", commit ? "--recover-committed" : "--recover-aborted", NULL);
assert(r==-1);
printf("execl failed: %d (%s)\n", errno, strerror(errno));
assert(0);
}
{
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("recovery exited=%d\n", WIFEXITED(status));
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
int r;
int status;
r = waitpid(pid, &status, 0);
//printf("recovery exited=%d\n", WIFEXITED(status));
assert(WIFEXITED(status) && WEXITSTATUS(status)==0);
}
}
@ -187,49 +187,49 @@ x1_parse_args (int argc, char * const argv[]) {
cmd = argv[0];
argc--; argv++;
while (argc>0) {
if (strcmp(argv[0], "-v") == 0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "--commit")==0) {
do_commit=true;
} else if (strcmp(argv[0], "--abort")==0 || strcmp(argv[0], "--test") == 0) {
do_abort=true;
} else if (strcmp(argv[0], "--explicit-abort")==0) {
do_explicit_abort=true;
} else if (strcmp(argv[0], "--recover-committed")==0) {
do_recover_committed=true;
} else if (strcmp(argv[0], "--recover-aborted")==0 || strcmp(argv[0], "--recover") == 0) {
do_recover_aborted=true;
if (strcmp(argv[0], "-v") == 0) {
verbose++;
} else if (strcmp(argv[0],"-q")==0) {
verbose--;
if (verbose<0) verbose=0;
} else if (strcmp(argv[0], "--commit")==0) {
do_commit=true;
} else if (strcmp(argv[0], "--abort")==0 || strcmp(argv[0], "--test") == 0) {
do_abort=true;
} else if (strcmp(argv[0], "--explicit-abort")==0) {
do_explicit_abort=true;
} else if (strcmp(argv[0], "--recover-committed")==0) {
do_recover_committed=true;
} else if (strcmp(argv[0], "--recover-aborted")==0 || strcmp(argv[0], "--recover") == 0) {
do_recover_aborted=true;
} else if (strcmp(argv[0], "--recover-only") == 0) {
do_recover_only=true;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--commit | --abort | --explicit-abort | --recover-committed | --recover-aborted } \n", cmd);
exit(resultcode);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
} else if (strcmp(argv[0], "-h")==0) {
resultcode=0;
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-q]* [-h] {--commit | --abort | --explicit-abort | --recover-committed | --recover-aborted } \n", cmd);
exit(resultcode);
} else {
fprintf(stderr, "Unknown arg: %s\n", argv[0]);
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
{
int n_specified=0;
if (do_commit) n_specified++;
if (do_abort) n_specified++;
if (do_explicit_abort) n_specified++;
if (do_recover_committed) n_specified++;
if (do_recover_aborted) n_specified++;
if (do_recover_only) n_specified++;
if (n_specified>1) {
printf("Specify only one of --commit or --abort or --recover-committed or --recover-aborted\n");
resultcode=1;
goto do_usage;
}
int n_specified=0;
if (do_commit) n_specified++;
if (do_abort) n_specified++;
if (do_explicit_abort) n_specified++;
if (do_recover_committed) n_specified++;
if (do_recover_aborted) n_specified++;
if (do_recover_only) n_specified++;
if (n_specified>1) {
printf("Specify only one of --commit or --abort or --recover-committed or --recover-aborted\n");
resultcode=1;
goto do_usage;
}
}
}
@ -237,21 +237,21 @@ int
test_main (int argc, char * const argv[]) {
x1_parse_args(argc, argv);
if (do_commit) {
do_x1_shutdown (true, false);
do_x1_shutdown (true, false);
} else if (do_abort) {
do_x1_shutdown (false, false);
do_x1_shutdown (false, false);
} else if (do_explicit_abort) {
do_x1_shutdown(false, true);
} else if (do_recover_committed) {
do_x1_recover(true);
do_x1_recover(true);
} else if (do_recover_aborted) {
do_x1_recover(false);
do_x1_recover(false);
} else if (do_recover_only) {
do_x1_recover_only();
}
#if 0
else {
do_test();
do_test();
}
#endif
return 0;

View file

@ -69,7 +69,6 @@ struct __toku_db_env_internal {
TOKULOGGER logger;
toku::locktree::manager ltm;
int32_t open_txns; // Number of open transactions
DB *directory; // Maps dnames to inames
DB *persistent_environment; // Stores environment settings, can be used for upgrade
// TODO: toku::omt<DB *>

View file

@ -207,11 +207,6 @@ ydb_getf_do_nothing(DBT const* UU(key), DBT const* UU(val), void* UU(extra)) {
/* env methods */
static void
env_init_open_txn(DB_ENV *env) {
env->i->open_txns = 0;
}
static void
env_fs_report_in_yellow(DB_ENV *UU(env)) {
char tbuf[26];
@ -977,7 +972,7 @@ env_close(DB_ENV * env, uint32_t flags) {
if (toku_env_is_panicked(env)) {
goto panic_and_quit_early;
}
if (env->i->open_txns != 0) {
if (env->i->logger && toku_logger_txns_exist(env->i->logger)) {
err_msg = "Cannot close environment due to open transactions\n";
r = toku_ydb_do_error(env, EINVAL, "%s", err_msg);
goto panic_and_quit_early;
@ -1397,7 +1392,7 @@ env_txn_recover (DB_ENV *env, DB_PREPLIST preplist[/*count*/], long count, /*out
static int
env_get_txn_from_xid (DB_ENV *env, /*in*/ TOKU_XA_XID *xid, /*out*/ DB_TXN **txnp) {
return toku_txn_manager_get_txn_from_xid(toku_logger_get_txn_manager(env->i->logger), xid, txnp);
return toku_txn_manager_get_root_txn_from_xid(toku_logger_get_txn_manager(env->i->logger), xid, txnp);
}
static int
@ -2143,7 +2138,6 @@ toku_env_create(DB_ENV ** envp, uint32_t flags) {
result->i->datadir_lockfd = -1;
result->i->logdir_lockfd = -1;
result->i->tmpdir_lockfd = -1;
env_init_open_txn(result);
env_fs_init(result);
result->i->bt_compare = toku_builtin_compare_fun;

View file

@ -80,22 +80,22 @@ create_iname_hint(const char *dname, char *hint) {
// n >= 0 means to include mark ("_B_" or "_P_") with hex value of n in iname
// (intended for use by loader, which will create many inames using one txnid).
static char *
create_iname(DB_ENV *env, uint64_t id, char *hint, const char *mark, int n) {
create_iname(DB_ENV *env, uint64_t id1, uint64_t id2, char *hint, const char *mark, int n) {
int bytes;
char inamebase[strlen(hint) +
8 + // hex file format version
16 + // hex id (normally the txnid)
24 + // hex id (normally the txnid's parent and child)
8 + // hex value of n if non-neg
sizeof("_B___.tokudb")]; // extra pieces
if (n < 0)
bytes = snprintf(inamebase, sizeof(inamebase),
"%s_%" PRIx64 "_%" PRIx32 ".tokudb",
hint, id, FT_LAYOUT_VERSION);
"%s_%" PRIx64 "_%" PRIx64 "_%" PRIx32 ".tokudb",
hint, id1, id2, FT_LAYOUT_VERSION);
else {
invariant(strlen(mark) == 1);
bytes = snprintf(inamebase, sizeof(inamebase),
"%s_%" PRIx64 "_%" PRIx32 "_%s_%" PRIx32 ".tokudb",
hint, id, FT_LAYOUT_VERSION, mark, n);
"%s_%" PRIx64 "_%" PRIx64 "_%" PRIx32 "_%s_%" PRIx32 ".tokudb",
hint, id1, id2, FT_LAYOUT_VERSION, mark, n);
}
assert(bytes>0);
assert(bytes<=(int)sizeof(inamebase)-1);
@ -264,15 +264,17 @@ toku_db_open(DB * db, DB_TXN * txn, const char *fname, const char *dbname, DBTYP
char hint[strlen(dname) + 1];
// create iname and make entry in directory
uint64_t id = 0;
uint64_t id1 = 0;
uint64_t id2 = 0;
if (txn) {
id = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn);
id1 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).parent_id64;
id2 = toku_txn_get_txnid(db_txn_struct_i(txn)->tokutxn).child_id64;
} else {
id = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
id1 = toku_sync_fetch_and_add(&nontransactional_open_id, 1);
}
create_iname_hint(dname, hint);
iname = create_iname(db->dbenv, id, hint, NULL, -1); // allocated memory for iname
iname = create_iname(db->dbenv, id1, id2, hint, NULL, -1); // allocated memory for iname
toku_fill_dbt(&iname_dbt, iname, strlen(iname) + 1);
//
// put_flags will be 0 for performance only, avoid unnecessary query
@ -969,7 +971,7 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new
int rval = 0;
int i;
TXNID xid = 0;
TXNID_PAIR xid = TXNID_PAIR_NONE;
DBT dname_dbt; // holds dname
DBT iname_dbt; // holds new iname
@ -994,7 +996,7 @@ load_inames(DB_ENV * env, DB_TXN * txn, int N, DB * dbs[/*N*/], const char * new
// now create new iname
char hint[strlen(dname) + 1];
create_iname_hint(dname, hint);
const char *new_iname = create_iname(env, xid, hint, mark, i); // allocates memory for iname_in_env
const char *new_iname = create_iname(env, xid.parent_id64, xid.child_id64, hint, mark, i); // allocates memory for iname_in_env
new_inames_in_env[i] = new_iname;
toku_fill_dbt(&iname_dbt, new_iname, strlen(new_iname) + 1); // iname_in_env goes in directory
rval = toku_db_put(env->i->directory, txn, &dname_dbt, &iname_dbt, 0, true);

View file

@ -20,7 +20,7 @@
static uint64_t
toku_txn_id64(DB_TXN * txn) {
HANDLE_PANICKED_ENV(txn->mgrp);
return toku_txn_get_id(db_txn_struct_i(txn)->tokutxn);
return toku_txn_get_root_id(db_txn_struct_i(txn)->tokutxn);
}
static void
@ -36,8 +36,6 @@ toku_txn_release_locks(DB_TXN *txn) {
static void
toku_txn_destroy(DB_TXN *txn) {
int32_t open_txns = toku_sync_sub_and_fetch(&txn->mgrp->i->open_txns, 1);
invariant(open_txns >= 0);
db_txn_struct_i(txn)->lt_map.destroy();
toku_txn_destroy_txn(db_txn_struct_i(txn)->tokutxn);
toku_mutex_destroy(&db_txn_struct_i(txn)->txn_mutex);
@ -402,7 +400,7 @@ toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) {
result->parent = stxn;
db_txn_struct_i(result)->flags = txn_flags;
db_txn_struct_i(result)->iso = child_isolation;
db_txn_struct_i(result)->lt_map.create();
db_txn_struct_i(result)->lt_map.create_no_array();
TXN_SNAPSHOT_TYPE snapshot_type;
switch(db_txn_struct_i(result)->iso){
@ -422,14 +420,15 @@ toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) {
break;
}
}
int r = toku_txn_manager_start_txn(&db_txn_struct_i(result)->tokutxn,
toku_logger_get_txn_manager(env->i->logger),
stxn ? db_txn_struct_i(stxn)->tokutxn : 0,
env->i->logger,
TXNID_NONE,
snapshot_type,
result,
false);
int r = toku_txn_begin_with_xid(
stxn ? db_txn_struct_i(stxn)->tokutxn : 0,
&db_txn_struct_i(result)->tokutxn,
env->i->logger,
TXNID_PAIR_NONE,
snapshot_type,
result,
false
);
if (r != 0) {
toku_free(result);
return r;
@ -442,7 +441,6 @@ toku_txn_begin(DB_ENV *env, DB_TXN * stxn, DB_TXN ** txn, uint32_t flags) {
}
toku_mutex_init(&db_txn_struct_i(result)->txn_mutex, NULL);
(void) toku_sync_fetch_and_add(&env->i->open_txns, 1);
*txn = result;
return 0;
@ -462,7 +460,6 @@ void toku_keep_prepared_txn_callback (DB_ENV *env, TOKUTXN tokutxn) {
toku_txn_set_container_db_txn(tokutxn, result);
toku_mutex_init(&db_txn_struct_i(result)->txn_mutex, NULL);
(void) toku_sync_fetch_and_add(&env->i->open_txns, 1);
}
// Test-only function