2013-04-17 00:00:35 -04:00
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: expandtab:ts=8:sw=4:softtabstop=4:
2013-04-16 23:59:03 -04:00
# ident "$Id$"
2013-04-16 23:59:09 -04:00
# ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
2013-04-16 23:57:55 -04:00
# ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11 / 760379 and to the patents and / or patent applications resulting from it."
# include "includes.h"
2013-04-17 00:00:35 -04:00
static void
poll_txn_progress_function ( TOKUTXN txn , uint8_t is_commit , uint8_t stall_for_checkpoint ) {
2013-04-16 23:59:01 -04:00
if ( txn - > progress_poll_fun ) {
TOKU_TXN_PROGRESS_S progress = {
. entries_total = txn - > num_rollentries ,
. entries_processed = txn - > num_rollentries_processed ,
. is_commit = is_commit ,
. stalled_on_checkpoint = stall_for_checkpoint } ;
txn - > progress_poll_fun ( & progress , txn - > progress_poll_fun_extra ) ;
}
}
2013-04-17 00:00:38 -04:00
int toku_commit_rollback_item ( TOKUTXN txn , struct roll_entry * item , LSN lsn ) {
2013-04-16 23:57:55 -04:00
int r = 0 ;
2013-04-17 00:00:38 -04:00
rolltype_dispatch_assign ( item , toku_commit_ , r , txn , lsn ) ;
2013-04-16 23:59:01 -04:00
txn - > num_rollentries_processed + + ;
2013-04-17 00:00:35 -04:00
if ( txn - > num_rollentries_processed % 1024 = = 0 ) {
poll_txn_progress_function ( txn , TRUE , FALSE ) ;
}
2013-04-16 23:57:55 -04:00
return r ;
}
2013-04-17 00:00:38 -04:00
int toku_abort_rollback_item ( TOKUTXN txn , struct roll_entry * item , LSN lsn ) {
2013-04-16 23:57:55 -04:00
int r = 0 ;
2013-04-17 00:00:38 -04:00
rolltype_dispatch_assign ( item , toku_rollback_ , r , txn , lsn ) ;
2013-04-16 23:59:01 -04:00
txn - > num_rollentries_processed + + ;
2013-04-17 00:00:35 -04:00
if ( txn - > num_rollentries_processed % 1024 = = 0 ) {
poll_txn_progress_function ( txn , FALSE , FALSE ) ;
}
2013-04-16 23:59:01 -04:00
return r ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
static inline int
2013-04-17 00:00:15 -04:00
txn_has_current_rollback_log ( TOKUTXN txn ) {
2013-04-16 23:59:05 -04:00
return txn - > current_rollback . b ! = ROLLBACK_NONE . b ;
}
static inline int
txn_has_spilled_rollback_logs ( TOKUTXN txn ) {
return txn - > spilled_rollback_tail . b ! = ROLLBACK_NONE . b ;
}
2013-04-17 00:00:06 -04:00
static void rollback_unpin_remove_callback ( CACHEKEY * cachekey , BOOL for_checkpoint , void * extra ) {
2013-04-17 00:00:35 -04:00
FT h = extra ;
2013-04-17 00:00:06 -04:00
toku_free_blocknum (
h - > blocktable ,
cachekey ,
h ,
for_checkpoint
) ;
}
2013-04-17 00:00:15 -04:00
void toku_rollback_log_unpin_and_remove ( TOKUTXN txn , ROLLBACK_LOG_NODE log ) {
2013-04-16 23:59:05 -04:00
int r ;
CACHEFILE cf = txn - > logger - > rollback_cachefile ;
2013-04-17 00:00:35 -04:00
FT h = toku_cachefile_get_userdata ( cf ) ;
2013-04-17 00:00:15 -04:00
r = toku_cachetable_unpin_and_remove ( cf , log - > blocknum , rollback_unpin_remove_callback , h ) ;
assert ( r = = 0 ) ;
2013-04-16 23:59:05 -04:00
}
static int
2013-04-17 00:00:38 -04:00
apply_txn ( TOKUTXN txn , LSN lsn ,
2013-04-16 23:59:05 -04:00
apply_rollback_item func ) {
int r = 0 ;
// do the commit/abort calls and free everything
// we do the commit/abort calls in reverse order too.
struct roll_entry * item ;
//printf("%s:%d abort\n", __FILE__, __LINE__);
BLOCKNUM next_log = ROLLBACK_NONE ;
uint32_t next_log_hash = 0 ;
BOOL is_current = FALSE ;
2013-04-17 00:00:15 -04:00
if ( txn_has_current_rollback_log ( txn ) ) {
2013-04-16 23:59:05 -04:00
next_log = txn - > current_rollback ;
next_log_hash = txn - > current_rollback_hash ;
is_current = TRUE ;
}
else if ( txn_has_spilled_rollback_logs ( txn ) ) {
next_log = txn - > spilled_rollback_tail ;
next_log_hash = txn - > spilled_rollback_tail_hash ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
uint64_t last_sequence = txn - > num_rollback_nodes ;
BOOL found_head = FALSE ;
while ( next_log . b ! = ROLLBACK_NONE . b ) {
ROLLBACK_LOG_NODE log ;
//pin log
2013-04-17 00:00:15 -04:00
toku_get_and_pin_rollback_log ( txn , next_log , next_log_hash , & log ) ;
toku_rollback_verify_contents ( log , txn - > txnid64 , last_sequence - 1 ) ;
2013-04-16 23:59:06 -04:00
2013-04-17 00:00:15 -04:00
toku_maybe_prefetch_previous_rollback_log ( txn , log ) ;
2013-04-16 23:59:41 -04:00
2013-04-16 23:59:05 -04:00
last_sequence = log - > sequence ;
if ( func ) {
while ( ( item = log - > newest_logentry ) ) {
log - > newest_logentry = item - > prev ;
2013-04-17 00:00:38 -04:00
r = func ( txn , item , lsn ) ;
2013-04-16 23:59:05 -04:00
if ( r ! = 0 ) return r ;
}
}
if ( next_log . b = = txn - > spilled_rollback_head . b ) {
assert ( ! found_head ) ;
found_head = TRUE ;
assert ( log - > sequence = = 0 ) ;
}
2013-04-17 00:00:15 -04:00
next_log = log - > previous ;
next_log_hash = log - > previous_hash ;
2013-04-16 23:59:05 -04:00
{
//Clean up transaction structure to prevent
//toku_txn_close from double-freeing
if ( is_current ) {
txn - > current_rollback = ROLLBACK_NONE ;
txn - > current_rollback_hash = 0 ;
is_current = FALSE ;
}
else {
txn - > spilled_rollback_tail = next_log ;
txn - > spilled_rollback_tail_hash = next_log_hash ;
}
if ( found_head ) {
assert ( next_log . b = = ROLLBACK_NONE . b ) ;
txn - > spilled_rollback_head = next_log ;
txn - > spilled_rollback_head_hash = next_log_hash ;
}
}
2013-04-17 00:00:15 -04:00
toku_rollback_log_unpin_and_remove ( txn , log ) ;
2013-04-16 23:59:05 -04:00
}
return r ;
}
2013-04-16 23:59:22 -04:00
int
toku_find_xid_by_xid ( OMTVALUE v , void * xidv ) {
2013-04-16 23:59:50 -04:00
TXNID xid = ( TXNID ) v ;
TXNID xidfind = ( TXNID ) xidv ;
if ( xid < xidfind ) return - 1 ;
if ( xid > xidfind ) return + 1 ;
2013-04-16 23:59:22 -04:00
return 0 ;
}
int
toku_find_pair_by_xid ( OMTVALUE v , void * xidv ) {
XID_PAIR pair = v ;
2013-04-17 00:00:23 -04:00
TXNID xidfind = ( TXNID ) xidv ;
if ( pair - > xid1 < xidfind ) return - 1 ;
if ( pair - > xid1 > xidfind ) return + 1 ;
2013-04-16 23:59:22 -04:00
return 0 ;
}
2013-04-17 00:00:15 -04:00
void * toku_malloc_in_rollback ( ROLLBACK_LOG_NODE log , size_t size ) {
2013-04-16 23:59:05 -04:00
return malloc_in_memarena ( log - > rollentry_arena , size ) ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
void * toku_memdup_in_rollback ( ROLLBACK_LOG_NODE log , const void * v , size_t len ) {
void * r = toku_malloc_in_rollback ( log , len ) ;
2013-04-16 23:57:55 -04:00
memcpy ( r , v , len ) ;
return r ;
}
2013-04-17 00:00:35 -04:00
static int note_ft_used_in_txns_parent ( OMTVALUE hv , u_int32_t UU ( index ) , void * txnv ) {
2013-04-16 23:57:55 -04:00
TOKUTXN child = txnv ;
TOKUTXN parent = child - > parent ;
2013-04-17 00:00:35 -04:00
FT h = hv ;
int r = toku_txn_note_ft ( parent , h ) ;
2013-04-16 23:57:55 -04:00
if ( r = = 0 & &
2013-04-17 00:00:27 -04:00
h - > txnid_that_created_or_locked_when_empty = = toku_txn_get_txnid ( child ) ) {
2013-04-16 23:59:05 -04:00
//Pass magic "no rollback needed" flag to parent.
2013-04-17 00:00:27 -04:00
h - > txnid_that_created_or_locked_when_empty = toku_txn_get_txnid ( parent ) ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:06 -04:00
if ( r = = 0 & &
2013-04-17 00:00:27 -04:00
h - > txnid_that_suppressed_recovery_logs = = toku_txn_get_txnid ( child ) ) {
2013-04-16 23:59:06 -04:00
//Pass magic "no recovery needed" flag to parent.
2013-04-17 00:00:27 -04:00
h - > txnid_that_suppressed_recovery_logs = toku_txn_get_txnid ( parent ) ;
2013-04-16 23:59:06 -04:00
}
2013-04-16 23:57:55 -04:00
return r ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
//Commit each entry in the rollback log.
2013-04-16 23:58:04 -04:00
//If the transaction has a parent, it just promotes its information to its parent.
2013-04-17 00:00:38 -04:00
int toku_rollback_commit ( TOKUTXN txn , LSN lsn ) {
2013-04-16 23:57:55 -04:00
int r = 0 ;
if ( txn - > parent ! = 0 ) {
2013-04-16 23:59:05 -04:00
// First we must put a rollinclude entry into the parent if we spilled
if ( txn_has_spilled_rollback_logs ( txn ) ) {
uint64_t num_nodes = txn - > num_rollback_nodes ;
2013-04-17 00:00:15 -04:00
if ( txn_has_current_rollback_log ( txn ) ) {
2013-04-16 23:59:05 -04:00
num_nodes - - ; //Don't count the in-progress rollback log.
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
r = toku_logger_save_rollback_rollinclude ( txn - > parent , txn - > txnid64 , num_nodes ,
txn - > spilled_rollback_head , txn - > spilled_rollback_head_hash ,
txn - > spilled_rollback_tail , txn - > spilled_rollback_tail_hash ) ;
if ( r ! = 0 ) return r ;
//Remove ownership from child.
txn - > spilled_rollback_head = ROLLBACK_NONE ;
txn - > spilled_rollback_head_hash = 0 ;
txn - > spilled_rollback_tail = ROLLBACK_NONE ;
txn - > spilled_rollback_tail_hash = 0 ;
2013-04-16 23:57:55 -04:00
}
2013-04-17 00:00:15 -04:00
// if we're commiting a child rollback, put its entries into the parent
// by pinning both child and parent and then linking the child log entry
// list to the end of the parent log entry list.
if ( txn_has_current_rollback_log ( txn ) ) {
2013-04-16 23:59:05 -04:00
//Pin parent log
2013-04-17 00:00:15 -04:00
ROLLBACK_LOG_NODE parent_log ;
toku_get_and_pin_rollback_log_for_new_entry ( txn - > parent , & parent_log ) ;
2013-04-16 23:59:05 -04:00
//Pin child log
2013-04-17 00:00:15 -04:00
ROLLBACK_LOG_NODE child_log ;
toku_get_and_pin_rollback_log ( txn , txn - > current_rollback ,
txn - > current_rollback_hash , & child_log ) ;
toku_rollback_verify_contents ( child_log , txn - > txnid64 , txn - > num_rollback_nodes - 1 ) ;
2013-04-16 23:59:05 -04:00
// Append the list to the front of the parent.
if ( child_log - > oldest_logentry ) {
// There are some entries, so link them in.
child_log - > oldest_logentry - > prev = parent_log - > newest_logentry ;
if ( ! parent_log - > oldest_logentry ) {
parent_log - > oldest_logentry = child_log - > oldest_logentry ;
}
parent_log - > newest_logentry = child_log - > newest_logentry ;
parent_log - > rollentry_resident_bytecount + = child_log - > rollentry_resident_bytecount ;
txn - > parent - > rollentry_raw_count + = txn - > rollentry_raw_count ;
child_log - > rollentry_resident_bytecount = 0 ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
if ( parent_log - > oldest_logentry = = NULL ) {
parent_log - > oldest_logentry = child_log - > oldest_logentry ;
}
child_log - > newest_logentry = child_log - > oldest_logentry = 0 ;
// Put all the memarena data into the parent.
if ( memarena_total_size_in_use ( child_log - > rollentry_arena ) > 0 ) {
// If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
memarena_move_buffers ( parent_log - > rollentry_arena , child_log - > rollentry_arena ) ;
}
2013-04-17 00:00:15 -04:00
toku_rollback_log_unpin_and_remove ( txn , child_log ) ;
2013-04-16 23:59:05 -04:00
txn - > current_rollback = ROLLBACK_NONE ;
txn - > current_rollback_hash = 0 ;
2013-04-17 00:00:15 -04:00
toku_maybe_spill_rollbacks ( txn - > parent , parent_log ) ;
toku_rollback_log_unpin ( txn - > parent , parent_log ) ;
assert ( r = = 0 ) ;
2013-04-16 23:57:55 -04:00
}
// Note the open brts, the omts must be merged
2013-04-17 00:00:35 -04:00
r = toku_omt_iterate ( txn - > open_fts , note_ft_used_in_txns_parent , txn ) ;
2013-04-16 23:57:55 -04:00
assert ( r = = 0 ) ;
2013-04-16 23:59:06 -04:00
// Merge the list of headers that must be checkpointed before commit
2013-04-17 00:00:24 -04:00
if ( txn - > checkpoint_needed_before_commit ) {
txn - > parent - > checkpoint_needed_before_commit = TRUE ;
2013-04-16 23:59:06 -04:00
}
2013-04-16 23:58:04 -04:00
//If this transaction needs an fsync (if it commits)
//save that in the parent. Since the commit really happens in the root txn.
txn - > parent - > force_fsync_on_commit | = txn - > force_fsync_on_commit ;
2013-04-16 23:59:01 -04:00
txn - > parent - > num_rollentries + = txn - > num_rollentries ;
2013-04-16 23:57:55 -04:00
} else {
2013-04-17 00:00:38 -04:00
r = apply_txn ( txn , lsn , toku_commit_rollback_item ) ;
2013-04-16 23:59:05 -04:00
assert ( r = = 0 ) ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:05 -04:00
2013-04-16 23:57:55 -04:00
return r ;
}
2013-04-17 00:00:38 -04:00
int toku_rollback_abort ( TOKUTXN txn , LSN lsn ) {
2013-04-16 23:59:05 -04:00
int r ;
2013-04-17 00:00:38 -04:00
r = apply_txn ( txn , lsn , toku_abort_rollback_item ) ;
2013-04-16 23:59:05 -04:00
assert ( r = = 0 ) ;
return r ;
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:54 -04:00
static inline PAIR_ATTR make_rollback_pair_attr ( long size ) {
PAIR_ATTR result = {
. size = size ,
. nonleaf_size = 0 ,
. leaf_size = 0 ,
. rollback_size = size ,
2013-04-17 00:00:11 -04:00
. cache_pressure_size = 0 ,
. is_valid = TRUE
2013-04-16 23:59:54 -04:00
} ;
return result ;
}
2013-04-16 23:57:55 -04:00
// Write something out. Keep trying even if partial writes occur.
// On error: Return negative with errno set.
// On success return nbytes.
2013-04-16 23:59:54 -04:00
static PAIR_ATTR
2013-04-16 23:59:05 -04:00
rollback_memory_size ( ROLLBACK_LOG_NODE log ) {
size_t size = sizeof ( * log ) ;
size + = memarena_total_memory_size ( log - > rollentry_arena ) ;
2013-04-16 23:59:54 -04:00
return make_rollback_pair_attr ( size ) ;
2013-04-16 23:59:05 -04:00
}
2013-04-17 00:00:15 -04:00
// Cleanup the rollback memory
2013-04-16 23:59:05 -04:00
static void
2013-04-17 00:00:15 -04:00
rollback_log_destroy ( ROLLBACK_LOG_NODE log ) {
2013-04-16 23:59:05 -04:00
memarena_close ( & log - > rollentry_arena ) ;
toku_free ( log ) ;
}
2013-04-17 00:00:15 -04:00
static void rollback_flush_callback ( CACHEFILE cachefile , int fd , BLOCKNUM logname ,
2013-04-17 00:00:13 -04:00
void * rollback_v , void * * UU ( disk_data ) , void * extraargs , PAIR_ATTR size , PAIR_ATTR * new_size ,
BOOL write_me , BOOL keep_me , BOOL for_checkpoint , BOOL UU ( is_clone ) ) {
2013-04-16 23:59:05 -04:00
int r ;
2013-04-16 23:59:06 -04:00
ROLLBACK_LOG_NODE log = rollback_v ;
2013-04-17 00:00:35 -04:00
FT h = extraargs ;
2013-04-16 23:59:05 -04:00
2013-04-16 23:59:06 -04:00
assert ( h - > cf = = cachefile ) ;
2013-04-17 00:00:15 -04:00
assert ( log - > blocknum . b = = logname . b ) ;
2013-04-16 23:59:06 -04:00
2013-04-16 23:59:05 -04:00
if ( write_me & & ! h - > panic ) {
int n_workitems , n_threads ;
toku_cachefile_get_workqueue_load ( cachefile , & n_workitems , & n_threads ) ;
2013-04-17 00:00:15 -04:00
r = toku_serialize_rollback_log_to ( fd , log - > blocknum , log , h , n_workitems , n_threads , for_checkpoint ) ;
2013-04-16 23:59:05 -04:00
if ( r ) {
if ( h - > panic = = 0 ) {
char * e = strerror ( r ) ;
int l = 200 + strlen ( e ) ;
char s [ l ] ;
h - > panic = r ;
snprintf ( s , l - 1 , " While writing data to disk, error %d (%s) " , r , e ) ;
h - > panic_string = toku_strdup ( s ) ;
}
}
2013-04-16 23:57:55 -04:00
}
2013-04-16 23:59:50 -04:00
* new_size = size ;
2013-04-16 23:59:05 -04:00
if ( ! keep_me ) {
2013-04-17 00:00:15 -04:00
rollback_log_destroy ( log ) ;
2013-04-16 23:59:05 -04:00
}
}
2013-04-17 00:00:15 -04:00
static int rollback_fetch_callback ( CACHEFILE cachefile , int fd , BLOCKNUM logname , u_int32_t fullhash ,
2013-04-17 00:00:35 -04:00
void * * rollback_pv , void * * UU ( disk_data ) , PAIR_ATTR * sizep , int * UU ( dirtyp ) , void * extraargs ) {
2013-04-16 23:59:05 -04:00
int r ;
2013-04-17 00:00:35 -04:00
FT h = extraargs ;
2013-04-16 23:59:06 -04:00
assert ( h - > cf = = cachefile ) ;
2013-04-16 23:59:05 -04:00
ROLLBACK_LOG_NODE * result = ( ROLLBACK_LOG_NODE * ) rollback_pv ;
2013-04-16 23:59:06 -04:00
r = toku_deserialize_rollback_log_from ( fd , logname , fullhash , result , h ) ;
2013-04-16 23:59:05 -04:00
if ( r = = 0 ) {
* sizep = rollback_memory_size ( * result ) ;
}
return r ;
2013-04-16 23:57:55 -04:00
}
2013-04-17 00:00:15 -04:00
static void rollback_pe_est_callback (
2013-04-16 23:59:48 -04:00
void * rollback_v ,
2013-04-17 00:00:13 -04:00
void * UU ( disk_data ) ,
2013-04-16 23:59:48 -04:00
long * bytes_freed_estimate ,
enum partial_eviction_cost * cost ,
void * UU ( write_extraargs )
)
{
assert ( rollback_v ! = NULL ) ;
* bytes_freed_estimate = 0 ;
* cost = PE_CHEAP ;
}
2013-04-16 23:59:40 -04:00
// callback for partially evicting a cachetable entry
2013-04-17 00:00:15 -04:00
static int rollback_pe_callback (
2013-04-16 23:59:40 -04:00
void * rollback_v ,
2013-04-16 23:59:54 -04:00
PAIR_ATTR UU ( old_attr ) ,
PAIR_ATTR * new_attr ,
2013-04-16 23:59:40 -04:00
void * UU ( extraargs )
)
{
assert ( rollback_v ! = NULL ) ;
2013-04-16 23:59:54 -04:00
* new_attr = old_attr ;
2013-04-16 23:59:40 -04:00
return 0 ;
}
2013-04-17 00:00:15 -04:00
// partial fetch is never required for a rollback log node
2013-04-17 00:00:35 -04:00
static BOOL rollback_pf_req_callback ( void * UU ( ftnode_pv ) , void * UU ( read_extraargs ) ) {
2013-04-16 23:59:41 -04:00
return FALSE ;
}
2013-04-17 00:00:15 -04:00
// a rollback node should never be partial fetched,
// because we always say it is not required.
// (pf req callback always returns false)
2013-04-17 00:00:35 -04:00
static int rollback_pf_callback ( void * UU ( ftnode_pv ) , void * UU ( disk_data ) , void * UU ( read_extraargs ) , int UU ( fd ) , PAIR_ATTR * UU ( sizep ) ) {
2013-04-16 23:59:41 -04:00
assert ( FALSE ) ;
2013-04-16 23:59:41 -04:00
return 0 ;
2013-04-16 23:59:41 -04:00
}
2013-04-16 23:59:40 -04:00
2013-04-17 00:00:15 -04:00
// the cleaner thread should never choose a rollback node for cleaning
static int rollback_cleaner_callback (
2013-04-17 00:00:35 -04:00
void * UU ( ftnode_pv ) ,
2013-04-16 23:59:54 -04:00
BLOCKNUM UU ( blocknum ) ,
u_int32_t UU ( fullhash ) ,
void * UU ( extraargs )
)
{
assert ( FALSE ) ;
return 0 ;
}
2013-04-16 23:59:40 -04:00
2013-04-17 00:00:35 -04:00
static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log ( FT h ) {
2013-04-17 00:00:09 -04:00
CACHETABLE_WRITE_CALLBACK wc ;
2013-04-17 00:00:15 -04:00
wc . flush_callback = rollback_flush_callback ;
wc . pe_est_callback = rollback_pe_est_callback ;
wc . pe_callback = rollback_pe_callback ;
wc . cleaner_callback = rollback_cleaner_callback ;
2013-04-17 00:00:13 -04:00
wc . clone_callback = NULL ;
2013-04-17 00:00:09 -04:00
wc . write_extraargs = h ;
return wc ;
}
2013-04-16 23:59:40 -04:00
2013-04-17 00:00:15 -04:00
// create and pin a new rollback log node. chain it to the other rollback nodes
// by providing a previous blocknum/ hash and assigning the new rollback log
// node the next sequence number
static void rollback_log_create ( TOKUTXN txn , BLOCKNUM previous , uint32_t previous_hash , ROLLBACK_LOG_NODE * result ) {
2013-04-16 23:59:22 -04:00
ROLLBACK_LOG_NODE MALLOC ( log ) ;
2013-04-16 23:59:05 -04:00
assert ( log ) ;
int r ;
CACHEFILE cf = txn - > logger - > rollback_cachefile ;
2013-04-17 00:00:35 -04:00
FT h = toku_cachefile_get_userdata ( cf ) ;
2013-04-16 23:59:05 -04:00
2013-04-17 00:00:35 -04:00
log - > layout_version = FT_LAYOUT_VERSION ;
log - > layout_version_original = FT_LAYOUT_VERSION ;
log - > layout_version_read_from_disk = FT_LAYOUT_VERSION ;
2013-04-16 23:59:05 -04:00
log - > dirty = TRUE ;
log - > txnid = txn - > txnid64 ;
log - > sequence = txn - > num_rollback_nodes + + ;
2013-04-17 00:00:15 -04:00
toku_allocate_blocknum ( h - > blocktable , & log - > blocknum , h ) ;
log - > hash = toku_cachetable_hash ( cf , log - > blocknum ) ;
log - > previous = previous ;
log - > previous_hash = previous_hash ;
2013-04-16 23:59:05 -04:00
log - > oldest_logentry = NULL ;
log - > newest_logentry = NULL ;
log - > rollentry_arena = memarena_create ( ) ;
log - > rollentry_resident_bytecount = 0 ;
* result = log ;
2013-04-17 00:00:15 -04:00
r = toku_cachetable_put ( cf , log - > blocknum , log - > hash ,
2013-04-16 23:59:05 -04:00
log , rollback_memory_size ( log ) ,
2013-04-17 00:00:09 -04:00
get_write_callbacks_for_rollback_log ( h ) ) ;
2013-04-17 00:00:15 -04:00
assert ( r = = 0 ) ;
txn - > current_rollback = log - > blocknum ;
txn - > current_rollback_hash = log - > hash ;
2013-04-16 23:59:50 -04:00
}
2013-04-17 00:00:15 -04:00
void toku_rollback_log_unpin ( TOKUTXN txn , ROLLBACK_LOG_NODE log ) {
2013-04-16 23:59:05 -04:00
int r ;
CACHEFILE cf = txn - > logger - > rollback_cachefile ;
2013-04-17 00:00:15 -04:00
r = toku_cachetable_unpin ( cf , log - > blocknum , log - > hash ,
2013-04-16 23:59:05 -04:00
( enum cachetable_dirty ) log - > dirty , rollback_memory_size ( log ) ) ;
2013-04-17 00:00:15 -04:00
assert ( r = = 0 ) ;
2013-04-16 23:59:05 -04:00
}
//Requires: log is pinned
// log is current
//After:
// Maybe there is no current after (if it spilled)
2013-04-17 00:00:15 -04:00
void toku_maybe_spill_rollbacks ( TOKUTXN txn , ROLLBACK_LOG_NODE log ) {
2013-04-16 23:59:05 -04:00
if ( log - > rollentry_resident_bytecount > txn - > logger - > write_block_size ) {
2013-04-17 00:00:15 -04:00
assert ( log - > blocknum . b = = txn - > current_rollback . b ) ;
2013-04-16 23:59:05 -04:00
//spill
if ( ! txn_has_spilled_rollback_logs ( txn ) ) {
//First spilled. Copy to head.
txn - > spilled_rollback_head = txn - > current_rollback ;
txn - > spilled_rollback_head_hash = txn - > current_rollback_hash ;
}
//Unconditionally copy to tail. Old tail does not need to be cached anymore.
txn - > spilled_rollback_tail = txn - > current_rollback ;
txn - > spilled_rollback_tail_hash = txn - > current_rollback_hash ;
txn - > current_rollback = ROLLBACK_NONE ;
txn - > current_rollback_hash = 0 ;
}
2013-04-16 23:57:55 -04:00
}
2013-04-17 00:00:27 -04:00
static int find_filenum ( OMTVALUE v , void * hv ) {
2013-04-17 00:00:35 -04:00
FT h = v ;
FT hfind = hv ;
2013-04-17 00:00:27 -04:00
FILENUM fnum = toku_cachefile_filenum ( h - > cf ) ;
FILENUM fnumfind = toku_cachefile_filenum ( hfind - > cf ) ;
2013-04-16 23:57:55 -04:00
if ( fnum . fileid < fnumfind . fileid ) return - 1 ;
if ( fnum . fileid > fnumfind . fileid ) return + 1 ;
return 0 ;
}
//Notify a transaction that it has touched a brt.
2013-04-17 00:00:35 -04:00
int toku_txn_note_ft ( TOKUTXN txn , FT h ) {
BOOL ref_added = toku_ft_maybe_add_txn_ref ( h , txn ) ;
2013-04-16 23:57:55 -04:00
// Insert reference to brt into transaction
2013-04-17 00:00:28 -04:00
if ( ref_added ) {
2013-04-17 00:00:35 -04:00
int r = toku_omt_insert ( txn - > open_fts , h , find_filenum , h , 0 ) ;
2013-04-17 00:00:28 -04:00
assert ( r = = 0 ) ;
}
2013-04-16 23:57:55 -04:00
return 0 ;
}
// Return the number of bytes that went into the rollback data structure (the uncompressed count if there is compression)
2013-04-16 23:59:05 -04:00
int toku_logger_txn_rollback_raw_count ( TOKUTXN txn , u_int64_t * raw_count )
2013-04-16 23:57:55 -04:00
{
* raw_count = txn - > rollentry_raw_count ;
return 0 ;
}
2013-04-17 00:00:15 -04:00
void toku_maybe_prefetch_previous_rollback_log ( TOKUTXN txn , ROLLBACK_LOG_NODE log ) {
//Currently processing 'log'. Prefetch the next (previous) log node.
2013-04-16 23:59:41 -04:00
2013-04-17 00:00:15 -04:00
BLOCKNUM name = log - > previous ;
2013-04-16 23:59:06 -04:00
int r = 0 ;
if ( name . b ! = ROLLBACK_NONE . b ) {
2013-04-17 00:00:15 -04:00
uint32_t hash = log - > previous_hash ;
2013-04-16 23:59:06 -04:00
CACHEFILE cf = txn - > logger - > rollback_cachefile ;
2013-04-17 00:00:35 -04:00
FT h = toku_cachefile_get_userdata ( cf ) ;
2013-04-16 23:59:46 -04:00
BOOL doing_prefetch = FALSE ;
2013-04-16 23:59:06 -04:00
r = toku_cachefile_prefetch ( cf , name , hash ,
2013-04-17 00:00:09 -04:00
get_write_callbacks_for_rollback_log ( h ) ,
2013-04-17 00:00:15 -04:00
rollback_fetch_callback ,
rollback_pf_req_callback ,
rollback_pf_callback ,
2013-04-16 23:59:46 -04:00
h ,
& doing_prefetch ) ;
2013-04-17 00:00:15 -04:00
assert ( r = = 0 ) ;
2013-04-16 23:59:06 -04:00
}
}
2013-04-17 00:00:15 -04:00
void toku_rollback_verify_contents ( ROLLBACK_LOG_NODE log ,
TXNID txnid , uint64_t sequence )
{
assert ( log - > txnid = = txnid ) ;
assert ( log - > sequence = = sequence ) ;
}
2013-04-16 23:59:05 -04:00
2013-04-17 00:00:15 -04:00
void toku_get_and_pin_rollback_log ( TOKUTXN txn , BLOCKNUM blocknum , uint32_t hash , ROLLBACK_LOG_NODE * log ) {
void * value ;
CACHEFILE cf = txn - > logger - > rollback_cachefile ;
2013-04-17 00:00:35 -04:00
FT h = toku_cachefile_get_userdata ( cf ) ;
2013-04-17 00:00:15 -04:00
int r = toku_cachetable_get_and_pin ( cf , blocknum , hash ,
& value , NULL ,
2013-04-17 00:00:09 -04:00
get_write_callbacks_for_rollback_log ( h ) ,
2013-04-17 00:00:15 -04:00
rollback_fetch_callback ,
rollback_pf_req_callback ,
rollback_pf_callback ,
2013-04-17 00:00:13 -04:00
TRUE , // may_modify_value
2013-04-16 23:59:50 -04:00
h
) ;
2013-04-17 00:00:15 -04:00
assert ( r = = 0 ) ;
ROLLBACK_LOG_NODE pinned_log = value ;
assert ( pinned_log - > blocknum . b = = blocknum . b ) ;
* log = pinned_log ;
2013-04-16 23:59:05 -04:00
}
2013-04-17 00:00:15 -04:00
void toku_get_and_pin_rollback_log_for_new_entry ( TOKUTXN txn , ROLLBACK_LOG_NODE * log ) {
ROLLBACK_LOG_NODE pinned_log ;
2013-04-16 23:59:36 -04:00
invariant ( txn - > state = = TOKUTXN_LIVE ) ; // #3258
2013-04-17 00:00:15 -04:00
if ( txn_has_current_rollback_log ( txn ) ) {
toku_get_and_pin_rollback_log ( txn , txn - > current_rollback , txn - > current_rollback_hash , & pinned_log ) ;
toku_rollback_verify_contents ( pinned_log , txn - > txnid64 , txn - > num_rollback_nodes - 1 ) ;
} else {
// create a new log for this transaction to use.
// this call asserts success internally
rollback_log_create ( txn , txn - > spilled_rollback_tail , txn - > spilled_rollback_tail_hash , & pinned_log ) ;
2013-04-16 23:59:05 -04:00
}
2013-04-17 00:00:15 -04:00
assert ( pinned_log - > txnid = = txn - > txnid64 ) ;
2013-04-17 00:00:35 -04:00
assert ( pinned_log - > blocknum . b ! = ROLLBACK_NONE . b ) ;
2013-04-17 00:00:15 -04:00
* log = pinned_log ;
2013-04-16 23:59:05 -04:00
}