From a12ded1a4559ff205e182e4702c04ff41a194a1f Mon Sep 17 00:00:00 2001 From: Yoni Fogel Date: Tue, 16 Apr 2013 23:57:56 -0400 Subject: [PATCH] Addresses #1866 Oldest living xid stored in logger instead of a global. Cursors take a copy of oldest living xid upon creation, which they use for implicit promotion git-svn-id: file:///svn/toku/tokudb@13606 c7de825b-a66e-492c-adef-691d508d4ae1 --- newbrt/brt-internal.h | 1 + newbrt/brt.c | 21 ++++++++------- newbrt/brt.h | 2 +- newbrt/log-internal.h | 1 + newbrt/logger.c | 12 ++++++++- newbrt/logger.h | 2 ++ newbrt/rollback.c | 12 ++++----- newbrt/tests/brt-serialize-sub-block-test.c | 2 +- newbrt/tests/brt-test-cursor-2.c | 2 +- newbrt/tests/brt-test-cursor.c | 26 +++++++++--------- newbrt/tests/brt-test.c | 30 ++++++++++----------- newbrt/tests/shortcut.c | 2 +- newbrt/txn.c | 12 ++++----- newbrt/txn.h | 1 - src/ydb.c | 2 +- 15 files changed, 71 insertions(+), 57 deletions(-) diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index 198d3ce6aa6..fe8f4df63ac 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -277,6 +277,7 @@ struct brt_cursor { DBT key, val; // The key-value pair that the cursor currently points to OMTCURSOR omtcursor; u_int64_t root_put_counter; // what was the count on the BRT when we validated the cursor? + TXNID oldest_living_xid;// what was the oldest live txnid when we created the cursor? struct brt_cursor_leaf_info leaf_info; }; diff --git a/newbrt/brt.c b/newbrt/brt.c index 895d9faeee3..17d55d90b54 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -1346,7 +1346,7 @@ brt_leaf_apply_full_promotion_once (BRTNODE node, LEAFENTRY le) } static void -maybe_do_implicit_promotion_on_query (BRTNODE node, LEAFENTRY le) { +maybe_do_implicit_promotion_on_query (BRT_CURSOR brtcursor, LEAFENTRY le) { //Requires: le is not a provdel (Callers never call it unless not provdel). //assert(!le_is_provdel(le)); //Must be as fast as possible. Assert is superfluous. @@ -1362,8 +1362,8 @@ maybe_do_implicit_promotion_on_query (BRTNODE node, LEAFENTRY le) { // * We will sometimes say a txn is uncommitted when it is committed. // * We will NEVER say a txn is committed when it is uncommitted. TXNID outermost_uncommitted_xid = le_outermost_uncommitted_xid(le); - if (outermost_uncommitted_xid != 0 && outermost_uncommitted_xid < oldest_living_xid) { - brt_leaf_apply_full_promotion_once(node, le); + if (outermost_uncommitted_xid != 0 && outermost_uncommitted_xid < brtcursor->oldest_living_xid) { + brt_leaf_apply_full_promotion_once(brtcursor->leaf_info.node, le); } } @@ -3305,7 +3305,7 @@ brt_cursor_invalidate(BRT_CURSOR brtcursor) { } } -int toku_brt_cursor (BRT brt, BRT_CURSOR *cursorptr) { +int toku_brt_cursor (BRT brt, BRT_CURSOR *cursorptr, TOKULOGGER logger) { BRT_CURSOR cursor = toku_malloc(sizeof *cursor); if (cursor == 0) return ENOMEM; @@ -3313,6 +3313,8 @@ int toku_brt_cursor (BRT brt, BRT_CURSOR *cursorptr) { cursor->brt = brt; cursor->current_in_omt = FALSE; cursor->prefetching = FALSE; + cursor->oldest_living_xid = toku_logger_get_oldest_living_xid(logger); + assert(cursor->oldest_living_xid != MAX_TXNID); list_push(&brt->cursors, &cursor->cursors_link); int r = toku_omt_cursor_create(&cursor->omtcursor); assert(r==0); @@ -3474,7 +3476,9 @@ brt_search_leaf_node(BRTNODE node, brt_search_t *search, BRT_GET_STRADDLE_CALLBA } } got_a_good_value: - maybe_do_implicit_promotion_on_query(node, le); + //Save node ptr in brtcursor (implicit promotion requires it). + brtcursor->leaf_info.node = node; + maybe_do_implicit_promotion_on_query(brtcursor, le); { u_int32_t keylen; bytevec key = le_latest_key_and_len(le, &keylen); @@ -3501,7 +3505,6 @@ got_a_good_value: brtcursor->leaf_info.fullhash = node->fullhash; brtcursor->leaf_info.blocknumber = node->thisnodename; #endif - brtcursor->leaf_info.node = node; brtcursor->leaf_info.leaflock = node->u.l.leaflock; brtcursor->leaf_info.to_be.omt = node->u.l.buffer; brtcursor->leaf_info.to_be.index = idx; @@ -3890,7 +3893,7 @@ brt_cursor_shortcut (BRT_CURSOR cursor, int direction, u_int32_t limit, BRT_GET_ assert(r==0); if (!le_is_provdel(le)) { - maybe_do_implicit_promotion_on_query(cursor->leaf_info.node, le); + maybe_do_implicit_promotion_on_query(cursor, le); u_int32_t keylen; bytevec key = le_latest_key_and_len(le, &keylen); u_int32_t vallen; @@ -4357,7 +4360,7 @@ toku_brt_lookup (BRT brt, DBT *k, DBT *v, BRT_GET_CALLBACK_FUNCTION getf, void * int r, rr; BRT_CURSOR cursor; - rr = toku_brt_cursor(brt, &cursor); + rr = toku_brt_cursor(brt, &cursor, NULL); if (rr != 0) return rr; int op = brt->flags & TOKU_DB_DUPSORT ? DB_GET_BOTH : DB_SET; @@ -4723,7 +4726,7 @@ brt_is_empty (BRT brt, TOKULOGGER logger) { BRT_CURSOR cursor; int r, r2; BOOL is_empty; - r = toku_brt_cursor(brt, &cursor); + r = toku_brt_cursor(brt, &cursor, NULL); if (r == 0) { r = toku_brt_cursor_first(cursor, getf_nothing, NULL, logger); r2 = toku_brt_cursor_close(cursor); diff --git a/newbrt/brt.h b/newbrt/brt.h index c6c9d1ad807..0d9eec15a34 100644 --- a/newbrt/brt.h +++ b/newbrt/brt.h @@ -84,7 +84,7 @@ int toku_verify_brt (BRT brt); //int show_brt_blocknumbers(BRT); typedef struct brt_cursor *BRT_CURSOR; -int toku_brt_cursor (BRT, BRT_CURSOR*); +int toku_brt_cursor (BRT, BRT_CURSOR*, TOKULOGGER); // get is deprecated in favor of the individual functions below int toku_brt_cursor_get (BRT_CURSOR cursor, DBT *key, DBT *val, BRT_GET_CALLBACK_FUNCTION getf, void *getf_v, int get_flags, TOKUTXN txn); diff --git a/newbrt/log-internal.h b/newbrt/log-internal.h index 2161a693acf..d974cc0190e 100644 --- a/newbrt/log-internal.h +++ b/newbrt/log-internal.h @@ -77,6 +77,7 @@ struct tokulogger { int n_in_file; u_int32_t write_block_size; // How big should the blocks be written to various logs? + TXNID oldest_living_xid; }; int toku_logger_find_next_unused_log_file(const char *directory, long long *result); diff --git a/newbrt/logger.c b/newbrt/logger.c index 83a468af709..2cc48e02f79 100644 --- a/newbrt/logger.c +++ b/newbrt/logger.c @@ -29,6 +29,7 @@ int toku_logger_create (TOKULOGGER *resultp) { result->n_in_file=0; result->directory=0; result->checkpoint_lsn=(LSN){0}; + result->oldest_living_xid = MAX_TXNID; result->write_block_size = BRT_DEFAULT_NODE_SIZE; // default logging size is the same as the default brt block size *resultp=result; r = ml_init(&result->input_lock); if (r!=0) goto died1; @@ -812,7 +813,8 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) { // get them into increasing order qsort(all_logs, all_n_logs, sizeof(all_logs[0]), logfilenamecompare); - LSN oldest_live_txn_lsn={.lsn = oldest_living_xid}; + LSN oldest_live_txn_lsn={.lsn = toku_logger_get_oldest_living_xid(logger)}; + assert(oldest_live_txn_lsn.lsn != 0); //printf("%s:%d Oldest txn is %lld\n", __FILE__, __LINE__, (long long)oldest_live_txn_lsn.lsn); // Now starting at the last one, look for archivable ones. @@ -871,3 +873,11 @@ TOKUTXN toku_logger_txn_parent (TOKUTXN txn) { void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) { logger->checkpoint_lsn = lsn; } + +TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger) { + TXNID rval = 0; + if (logger) + rval = logger->oldest_living_xid; + return rval; +} + diff --git a/newbrt/logger.h b/newbrt/logger.h index 683002fa108..54be7872b11 100644 --- a/newbrt/logger.h +++ b/newbrt/logger.h @@ -68,4 +68,6 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags); TOKUTXN toku_logger_txn_parent (TOKUTXN txn); void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn); +TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger); + #endif diff --git a/newbrt/rollback.c b/newbrt/rollback.c index 467c8416883..e8ae469c83a 100644 --- a/newbrt/rollback.c +++ b/newbrt/rollback.c @@ -42,9 +42,9 @@ void toku_rollback_txn_close (TOKUTXN txn) { assert(r==0); } - assert(oldest_living_xid <= txn->txnid64); - assert(oldest_living_xid < MAX_TXNID); - if (txn->txnid64 == oldest_living_xid) { + assert(txn->logger->oldest_living_xid <= txn->txnid64); + assert(txn->logger->oldest_living_xid < MAX_TXNID); + if (txn->txnid64 == txn->logger->oldest_living_xid) { TOKULOGGER logger = txn->logger; OMTVALUE oldest_txnv; @@ -52,13 +52,13 @@ void toku_rollback_txn_close (TOKUTXN txn) { if (r==0) { TOKUTXN oldest_txn = oldest_txnv; assert(oldest_txn != txn); // We just removed it - assert(oldest_txn->txnid64 > oldest_living_xid); //Must be newer than the previous oldest - oldest_living_xid = oldest_txn->txnid64; + assert(oldest_txn->txnid64 > txn->logger->oldest_living_xid); //Must be newer than the previous oldest + txn->logger->oldest_living_xid = oldest_txn->txnid64; } else { //No living transactions assert(r==EINVAL); - oldest_living_xid = MAX_TXNID; + txn->logger->oldest_living_xid = MAX_TXNID; } } diff --git a/newbrt/tests/brt-serialize-sub-block-test.c b/newbrt/tests/brt-serialize-sub-block-test.c index a40a5bd8d48..b125b88ec8f 100644 --- a/newbrt/tests/brt-serialize-sub-block-test.c +++ b/newbrt/tests/brt-serialize-sub-block-test.c @@ -50,7 +50,7 @@ static void test_sub_block(int n) { assert(error == 0); BRT_CURSOR cursor; - error = toku_brt_cursor(brt, &cursor); + error = toku_brt_cursor(brt, &cursor, NULL); assert(error == 0); for (i=0; ; i++) { diff --git a/newbrt/tests/brt-test-cursor-2.c b/newbrt/tests/brt-test-cursor-2.c index cca4001d104..299080dc0c8 100644 --- a/newbrt/tests/brt-test-cursor-2.c +++ b/newbrt/tests/brt-test-cursor-2.c @@ -51,7 +51,7 @@ static void test_multiple_brt_cursor_dbts(int n, DB *db) { } for (i=0; i= v */ @@ -1238,7 +1238,7 @@ static void test_new_brt_cursor_set(int n, int cursor_op, DB *db) { r = toku_brt_insert(brt, toku_fill_dbt(&key, &k, sizeof k), toku_fill_dbt(&val, &v, sizeof v), 0); assert(r == 0); } - r = toku_brt_cursor(brt, &cursor); assert(r==0); + r = toku_brt_cursor(brt, &cursor, NULL); assert(r==0); /* set cursor to random keys in set { 0, 10, 20, .. 10*(n-1) } */ for (i=0; iis_panicked) return EINVAL; TAGMALLOC(TOKUTXN, result); @@ -49,11 +47,11 @@ died2: result->rollentry_arena = memarena_create(); if (toku_omt_size(logger->live_txns) == 0) { - assert(oldest_living_xid == MAX_TXNID); - oldest_living_xid = result->txnid64; + assert(logger->oldest_living_xid == MAX_TXNID); + logger->oldest_living_xid = result->txnid64; } - assert(oldest_living_xid < MAX_TXNID); - assert(oldest_living_xid <= result->txnid64); + assert(logger->oldest_living_xid < MAX_TXNID); + assert(logger->oldest_living_xid <= result->txnid64); { //Add txn to list (omt) of live transactions @@ -61,7 +59,7 @@ died2: r = toku_omt_insert(logger->live_txns, result, find_xid, result, &idx); if (r!=0) goto died2; - if (oldest_living_xid == result->txnid64) + if (logger->oldest_living_xid == result->txnid64) assert(idx == 0); else assert(idx > 0); diff --git a/newbrt/txn.h b/newbrt/txn.h index 4ddf56c57ba..fb59a69a231 100644 --- a/newbrt/txn.h +++ b/newbrt/txn.h @@ -10,6 +10,5 @@ int toku_txn_commit_txn (TOKUTXN txn, int nosync, YIELDF yield, void*yieldv); int toku_txn_abort_txn(TOKUTXN txn, YIELDF yield, void*yieldv); void toku_txn_close_txn(TOKUTXN txn); XIDS toku_txn_get_xids (TOKUTXN); -extern TXNID oldest_living_xid; #endif //TOKUTXN_H diff --git a/src/ydb.c b/src/ydb.c index b51cc20ab61..7bbef26ab26 100644 --- a/src/ydb.c +++ b/src/ydb.c @@ -2752,7 +2752,7 @@ static int toku_db_cursor(DB * db, DB_TXN * txn, DBC ** c, u_int32_t flags, int dbc_struct_i(result)->skey = &dbc_struct_i(result)->skey_s; dbc_struct_i(result)->sval = &dbc_struct_i(result)->sval_s; } - int r = toku_brt_cursor(db->i->brt, &dbc_struct_i(result)->c); + int r = toku_brt_cursor(db->i->brt, &dbc_struct_i(result)->c, db->dbenv->i->logger); assert(r == 0); *c = result; return 0;