From 7942bcf20945aafa86b9c8a5feac914f807747c6 Mon Sep 17 00:00:00 2001 From: Zardosht Kasheff Date: Wed, 17 Apr 2013 00:00:13 -0400 Subject: [PATCH] [t:4028], merge to main git-svn-id: file:///svn/toku/tokudb@41142 c7de825b-a66e-492c-adef-691d508d4ae1 --- newbrt/brt-cachetable-wrappers.c | 6 + newbrt/brt-cachetable-wrappers.h | 3 + newbrt/brt-flusher.c | 12 +- newbrt/brt-hot-flusher.c | 1 + newbrt/brt-internal.h | 52 +- newbrt/brt-serialize.c | 172 ++- newbrt/brt-test-helpers.c | 4 + newbrt/brt-verify.c | 1 + newbrt/brt.c | 336 ++++-- newbrt/brtdump.c | 12 +- newbrt/brtloader.c | 11 +- newbrt/brttypes.h | 1 + newbrt/cachetable.c | 980 +++++++++++------- newbrt/cachetable.h | 24 +- newbrt/checkpoint.c | 6 +- newbrt/fifo.c | 16 + newbrt/fifo.h | 2 + newbrt/mempool.c | 8 + newbrt/mempool.h | 2 + newbrt/rollback.c | 11 +- newbrt/tests/brt-bfe-query.c | 52 +- newbrt/tests/brt-clock-test.c | 33 +- newbrt/tests/brt-serialize-benchmark.c | 16 +- newbrt/tests/brt-serialize-test.c | 237 +++-- newbrt/tests/cachetable-3969.c | 9 +- newbrt/tests/cachetable-4357.c | 2 + newbrt/tests/cachetable-4365.c | 2 + newbrt/tests/cachetable-4545.c | 10 +- newbrt/tests/cachetable-all-write.c | 10 +- newbrt/tests/cachetable-checkpoint-pending.c | 9 +- .../cachetable-checkpoint-pinned-nodes.c | 9 +- .../cachetable-checkpoint-prefetched-nodes.c | 5 +- newbrt/tests/cachetable-checkpoint-test.c | 16 +- newbrt/tests/cachetable-cleaner-checkpoint.c | 6 +- newbrt/tests/cachetable-cleaner-checkpoint2.c | 6 +- newbrt/tests/cachetable-cleaner-dev-null.c | 6 +- ...chetable-cleaner-thread-attrs-accumulate.c | 6 +- ...hetable-cleaner-thread-everything-pinned.c | 1 + ...le-cleaner-thread-nothing-needs-flushing.c | 1 + .../tests/cachetable-cleaner-thread-simple.c | 2 + newbrt/tests/cachetable-clock-eviction.c | 13 +- newbrt/tests/cachetable-clock-eviction2.c | 17 +- newbrt/tests/cachetable-clock-eviction3.c | 18 +- newbrt/tests/cachetable-clock-eviction4.c | 16 +- newbrt/tests/cachetable-clone-checkpoint.c | 109 ++ ...chetable-clone-partial-fetch-pinned-node.c | 113 ++ newbrt/tests/cachetable-clone-partial-fetch.c | 113 ++ .../tests/cachetable-clone-pin-nonblocking.c | 96 ++ newbrt/tests/cachetable-clone-unpin-remove.c | 102 ++ newbrt/tests/cachetable-eviction-close-test.c | 10 +- .../tests/cachetable-eviction-close-test2.c | 10 +- .../cachetable-eviction-getandpin-test.c | 10 +- .../cachetable-eviction-getandpin-test2.c | 5 + .../tests/cachetable-flush-during-cleaner.c | 2 +- newbrt/tests/cachetable-getandpin-test.c | 7 +- .../cachetable-kibbutz_and_flush_cachefile.c | 4 +- newbrt/tests/cachetable-partial-fetch.c | 18 +- newbrt/tests/cachetable-pin-checkpoint.c | 27 +- ...hetable-pin-nonblocking-checkpoint-clean.c | 7 +- .../cachetable-prefetch-checkpoint-test.c | 5 +- .../cachetable-prefetch-close-leak-test.c | 5 +- newbrt/tests/cachetable-prefetch-close-test.c | 6 +- .../cachetable-prefetch-flowcontrol-test.c | 5 +- .../cachetable-prefetch-getandpin-test.c | 12 +- .../cachetable-prefetch-maybegetandpin-test.c | 1 + newbrt/tests/cachetable-prefetch2-test.c | 1 + newbrt/tests/cachetable-put-checkpoint.c | 32 +- newbrt/tests/cachetable-rename-test.c | 8 +- newbrt/tests/cachetable-scan.c | 8 +- newbrt/tests/cachetable-simple-clone.c | 153 +++ newbrt/tests/cachetable-simple-clone2.c | 103 ++ .../tests/cachetable-simple-maybe-get-pin.c | 2 +- .../tests/cachetable-simple-pin-dep-nodes.c | 10 +- .../tests/cachetable-simple-pin-nonblocking.c | 22 +- newbrt/tests/cachetable-simple-pin.c | 12 +- .../tests/cachetable-simple-put-dep-nodes.c | 9 +- ...achetable-simple-unpin-remove-checkpoint.c | 4 +- newbrt/tests/cachetable-simple-verify.c | 2 +- newbrt/tests/cachetable-test.c | 63 +- newbrt/tests/cachetable-test2.c | 10 +- .../tests/cachetable-unpin-and-remove-test.c | 3 +- .../cachetable-unpin-remove-and-checkpoint.c | 4 +- newbrt/tests/cachetable-writer-thread-limit.c | 4 +- newbrt/tests/test-checkpoint-during-flush.c | 4 + newbrt/tests/test-checkpoint-during-merge.c | 6 + .../tests/test-checkpoint-during-rebalance.c | 5 + newbrt/tests/test-checkpoint-during-split.c | 6 + newbrt/tests/test-dirty-flushes-on-cleaner.c | 4 + newbrt/tests/test-flushes-on-cleaner.c | 4 + newbrt/tests/test-merges-on-cleaner.c | 2 + newbrt/tests/test.h | 11 +- newbrt/tests/test4244.c | 1 + newbrt/tests/test4302.c | 7 +- newbrt/workqueue.c | 6 +- newbrt/workqueue.h | 4 +- src/tests/perf_checkpoint_var.c | 87 ++ 96 files changed, 2598 insertions(+), 828 deletions(-) create mode 100644 newbrt/tests/cachetable-clone-checkpoint.c create mode 100644 newbrt/tests/cachetable-clone-partial-fetch-pinned-node.c create mode 100644 newbrt/tests/cachetable-clone-partial-fetch.c create mode 100644 newbrt/tests/cachetable-clone-pin-nonblocking.c create mode 100644 newbrt/tests/cachetable-clone-unpin-remove.c create mode 100644 newbrt/tests/cachetable-simple-clone.c create mode 100644 newbrt/tests/cachetable-simple-clone2.c diff --git a/newbrt/brt-cachetable-wrappers.c b/newbrt/brt-cachetable-wrappers.c index f75f11f2020..cd76071f557 100644 --- a/newbrt/brt-cachetable-wrappers.c +++ b/newbrt/brt-cachetable-wrappers.c @@ -127,6 +127,7 @@ toku_pin_brtnode( ANCESTORS ancestors, const PIVOT_BOUNDS bounds, BRTNODE_FETCH_EXTRA bfe, + BOOL may_modify_node, BOOL apply_ancestor_messages, // this BOOL is probably temporary, for #3972, once we know how range query estimates work, will revisit this BRTNODE *node_p, BOOL* msgs_applied) @@ -143,6 +144,7 @@ toku_pin_brtnode( toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + may_modify_node, bfe, //read_extraargs unlockers); if (r==0) { @@ -168,6 +170,7 @@ toku_pin_brtnode_holding_lock( const PIVOT_BOUNDS bounds, BRTNODE_FETCH_EXTRA bfe, BOOL apply_ancestor_messages, // this BOOL is probably temporary, for #3972, once we know how range query estimates work, will revisit this + BOOL may_modify_node, BRTNODE *node_p) { void *node_v; @@ -181,6 +184,7 @@ toku_pin_brtnode_holding_lock( toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + may_modify_node, bfe ); assert(r==0); @@ -196,6 +200,7 @@ toku_pin_brtnode_off_client_thread( BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE_FETCH_EXTRA bfe, + BOOL may_modify_node, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes, BRTNODE *node_p) @@ -222,6 +227,7 @@ toku_pin_brtnode_off_client_thread( toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + may_modify_node, bfe, num_dependent_nodes, dependent_cf, diff --git a/newbrt/brt-cachetable-wrappers.h b/newbrt/brt-cachetable-wrappers.h index 8f96c093d7f..506edf60ead 100644 --- a/newbrt/brt-cachetable-wrappers.h +++ b/newbrt/brt-cachetable-wrappers.h @@ -71,6 +71,7 @@ toku_pin_brtnode( ANCESTORS ancestors, const PIVOT_BOUNDS pbounds, BRTNODE_FETCH_EXTRA bfe, + BOOL may_modify_node, BOOL apply_ancestor_messages, // this BOOL is probably temporary, for #3972, once we know how range query estimates work, will revisit this BRTNODE *node_p, BOOL* msgs_applied @@ -88,6 +89,7 @@ toku_pin_brtnode_holding_lock( const PIVOT_BOUNDS pbounds, BRTNODE_FETCH_EXTRA bfe, BOOL apply_ancestor_messages, + BOOL may_modify_node, BRTNODE *node_p ); @@ -104,6 +106,7 @@ toku_pin_brtnode_off_client_thread( BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE_FETCH_EXTRA bfe, + BOOL may_modify_node, u_int32_t num_dependent_nodes, BRTNODE* dependent_nodes, BRTNODE *node_p diff --git a/newbrt/brt-flusher.c b/newbrt/brt-flusher.c index 6b8df0e528f..95a8a1cdd57 100644 --- a/newbrt/brt-flusher.c +++ b/newbrt/brt-flusher.c @@ -400,7 +400,7 @@ ct_maybe_merge_child(struct flusher_advice *fa, CACHEKEY *rootp = toku_calculate_root_offset_pointer(h, &fullhash); struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, h); - toku_pin_brtnode_off_client_thread(h, *rootp, fullhash, &bfe, 0,NULL, &root_node); + toku_pin_brtnode_off_client_thread(h, *rootp, fullhash, &bfe, TRUE, 0, NULL, &root_node); toku_assert_entire_node_in_memory(root_node); toku_brtheader_release_treelock(h); @@ -512,8 +512,6 @@ handle_split_of_child( BP_BLOCKNUM(node, childnum+1) = childb->thisnodename; BP_WORKDONE(node, childnum+1) = 0; BP_STATE(node,childnum+1) = PT_AVAIL; - BP_START(node,childnum+1) = 0; - BP_SIZE(node,childnum+1) = 0; set_BNC(node, childnum+1, toku_create_empty_nl()); @@ -824,8 +822,6 @@ brtleaf_split( for (int i = 0; i < num_children_in_b; i++) { BP_BLOCKNUM(B,i).b = 0; BP_STATE(B,i) = PT_AVAIL; - BP_START(B,i) = 0; - BP_SIZE(B,i) = 0; BP_WORKDONE(B,i) = 0; set_BLB(B, i, toku_create_empty_bn()); } @@ -1361,7 +1357,7 @@ brt_merge_child( u_int32_t childfullhash = compute_child_fullhash(h->cf, node, childnuma); struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, h); - toku_pin_brtnode_off_client_thread(h, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, 1, &node, &childa); + toku_pin_brtnode_off_client_thread(h, BP_BLOCKNUM(node, childnuma), childfullhash, &bfe, TRUE, 1, &node, &childa); } // for test call_flusher_thread_callback(ft_flush_before_pin_second_node_for_merge); @@ -1372,7 +1368,7 @@ brt_merge_child( u_int32_t childfullhash = compute_child_fullhash(h->cf, node, childnumb); struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, h); - toku_pin_brtnode_off_client_thread(h, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, 2, dep_nodes, &childb); + toku_pin_brtnode_off_client_thread(h, BP_BLOCKNUM(node, childnumb), childfullhash, &bfe, TRUE, 2, dep_nodes, &childb); } if (toku_bnc_n_entries(BNC(node,childnuma))>0) { @@ -1498,7 +1494,7 @@ flush_some_child( // Note that we don't read the entire node into memory yet. // The idea is let's try to do the minimum work before releasing the parent lock fill_bfe_for_min_read(&bfe, h); - toku_pin_brtnode_off_client_thread(h, targetchild, childfullhash, &bfe, 1, &parent, &child); + toku_pin_brtnode_off_client_thread(h, targetchild, childfullhash, &bfe, TRUE, 1, &parent, &child); // for test call_flusher_thread_callback(ft_flush_after_child_pin); diff --git a/newbrt/brt-hot-flusher.c b/newbrt/brt-hot-flusher.c index 0cecb3b17cb..ec4f19cbd1d 100644 --- a/newbrt/brt-hot-flusher.c +++ b/newbrt/brt-hot-flusher.c @@ -280,6 +280,7 @@ toku_brt_hot_optimize(BRT brt, (BLOCKNUM) *rootp, fullhash, &bfe, + TRUE, 0, NULL, &root); diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index 42efdd7155d..5aa2608e512 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -188,6 +188,22 @@ typedef struct __attribute__((__packed__)) brtnode_child_pointer { } u; } BRTNODE_CHILD_POINTER; + +struct brtnode_disk_data { + // + // stores the offset to the beginning of the partition on disk from the brtnode, and the length, needed to read a partition off of disk + // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless + // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition + // The SIZE is the size of the compressed partition. + // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. + // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. + u_int32_t start; + u_int32_t size; +}; +#define BP_START(node_dd,i) ((node_dd)[i].start) +#define BP_SIZE(node_dd,i) ((node_dd)[i].size) + + // a brtnode partition, associated with a child of a node struct __attribute__((__packed__)) brtnode_partition { // the following three variables are used for nonleaf nodes @@ -203,14 +219,6 @@ struct __attribute__((__packed__)) brtnode_partition { // enum pt_state state; // make this an enum to make debugging easier. // - // stores the offset to the beginning of the partition on disk from the brtnode, and the length, needed to read a partition off of disk - // the value is only meaningful if the node is clean. If the node is dirty, then the value is meaningless - // The START is the distance from the end of the compressed node_info data, to the beginning of the compressed partition - // The SIZE is the size of the compressed partition. - // Rationale: We cannot store the size from the beginning of the node since we don't know how big the header will be. - // However, later when we are doing aligned writes, we won't be able to store the size from the end since we want things to align. - u_int32_t start,size; - // // pointer to the partition. Depending on the state, they may be different things // if state == PT_INVALID, then the node was just initialized and ptr == NULL // if state == PT_ON_DISK, then ptr == NULL @@ -258,11 +266,7 @@ struct brtnode { // brtnode partition macros // BP stands for brtnode_partition #define BP_BLOCKNUM(node,i) ((node)->bp[i].blocknum) -#define BP_HAVE_FULLHASH(node,i) ((node)->bp[i].have_fullhash) -#define BP_FULLHASH(node,i) ((node)->bp[i].fullhash) #define BP_STATE(node,i) ((node)->bp[i].state) -#define BP_START(node,i) ((node)->bp[i].start) -#define BP_SIZE(node,i) ((node)->bp[i].size) #define BP_WORKDONE(node, i)((node)->bp[i].workdone) // @@ -448,18 +452,21 @@ toku_create_compressed_partition_from_available( int childnum, SUB_BLOCK sb ); +void rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize); int toku_serialize_brtnode_to_memory (BRTNODE node, + BRTNODE_DISK_DATA* ndd, unsigned int basementnodesize, + BOOL do_rebalancing, /*out*/ size_t *n_bytes_to_write, /*out*/ char **bytes_to_write); -int toku_serialize_brtnode_to(int fd, BLOCKNUM, BRTNODE node, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint); +int toku_serialize_brtnode_to(int fd, BLOCKNUM, BRTNODE node, BRTNODE_DISK_DATA* ndd, BOOL do_rebalancing, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint); int toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint); int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, ROLLBACK_LOG_NODE *logp, struct brt_header *h); -void toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode_fetch_extra* bfe); +void toku_deserialize_bp_from_disk(BRTNODE node, BRTNODE_DISK_DATA ndd, int childnum, int fd, struct brtnode_fetch_extra* bfe); void toku_deserialize_bp_from_compressed(BRTNODE node, int childnum, DESCRIPTOR desc, brt_compare_func cmp); -int toku_deserialize_brtnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, BRTNODE *brtnode, struct brtnode_fetch_extra* bfe); +int toku_deserialize_brtnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, BRTNODE *brtnode, BRTNODE_DISK_DATA* ndd, struct brtnode_fetch_extra* bfe); unsigned int toku_serialize_brtnode_size(BRTNODE node); /* How much space will it take? */ int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len); @@ -477,6 +484,8 @@ int toku_serialize_descriptor_contents_to_fd(int fd, const DESCRIPTOR desc, DISK void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, const DESCRIPTOR desc); BASEMENTNODE toku_create_empty_bn(void); BASEMENTNODE toku_create_empty_bn_no_buffer(void); // create a basement node with a null buffer. +NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo); +BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn); NONLEAF_CHILDINFO toku_create_empty_nl(void); // FIXME needs toku prefix void destroy_basement_node (BASEMENTNODE bn); @@ -529,12 +538,13 @@ struct brtenv { }; void toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe); -extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint); -extern int toku_brtnode_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, PAIR_ATTR *sizep, int*dirty, void*extraargs); -extern void toku_brtnode_pe_est_callback(void* brtnode_pv, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs); +extern void toku_brtnode_clone_callback(void* value_data, void** cloned_value_data, PAIR_ATTR* new_attr, BOOL for_checkpoint, void* write_extraargs); +extern void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint, BOOL is_clone); +extern int toku_brtnode_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int*dirty, void*extraargs); +extern void toku_brtnode_pe_est_callback(void* brtnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* write_extraargs); extern int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR old_attr, PAIR_ATTR* new_attr, void *extraargs); extern BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs); -int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAIR_ATTR* sizep); +int toku_brtnode_pf_callback(void* brtnode_pv, void* UU(disk_data), void* read_extraargs, int fd, PAIR_ATTR* sizep); extern int toku_brtnode_cleaner_callback( void *brtnode_pv, BLOCKNUM blocknum, u_int32_t fullhash, void *extraargs); extern int toku_brt_alloc_init_header(BRT t, TOKUTXN txn); extern int toku_read_brt_header_and_store_in_cachefile (BRT brt, CACHEFILE cf, LSN max_acceptable_lsn, struct brt_header **header, BOOL* was_open); @@ -546,6 +556,7 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_node(struct brt_ wc.pe_est_callback = toku_brtnode_pe_est_callback; wc.pe_callback = toku_brtnode_pe_callback; wc.cleaner_callback = toku_brtnode_cleaner_callback; + wc.clone_callback = toku_brtnode_clone_callback; wc.write_extraargs = h; return wc; } @@ -900,6 +911,9 @@ typedef enum { BRT_STATUS_NUM_ROWS } brt_status_entry; +void brt_begin_checkpoint(void); +void brt_end_checkpoint(void); + typedef struct { bool initialized; TOKU_ENGINE_STATUS_ROW_S status[BRT_STATUS_NUM_ROWS]; diff --git a/newbrt/brt-serialize.c b/newbrt/brt-serialize.c index 933b857dc80..98d8dbb6159 100644 --- a/newbrt/brt-serialize.c +++ b/newbrt/brt-serialize.c @@ -237,7 +237,7 @@ serialize_node_header_size(BRTNODE node) { } static void -serialize_node_header(BRTNODE node, struct wbuf *wbuf) { +serialize_node_header(BRTNODE node, BRTNODE_DISK_DATA ndd, struct wbuf *wbuf) { if (node->height == 0) wbuf_nocrc_literal_bytes(wbuf, "tokuleaf", 8); else @@ -248,9 +248,9 @@ serialize_node_header(BRTNODE node, struct wbuf *wbuf) { wbuf_nocrc_uint(wbuf, BUILD_ID); wbuf_nocrc_int (wbuf, node->n_children); for (int i=0; in_children; i++) { - assert(BP_SIZE(node,i)>0); - wbuf_nocrc_int(wbuf, BP_START(node, i)); // save the beginning of the partition - wbuf_nocrc_int(wbuf, BP_SIZE (node, i)); // and the size + assert(BP_SIZE(ndd,i)>0); + wbuf_nocrc_int(wbuf, BP_START(ndd, i)); // save the beginning of the partition + wbuf_nocrc_int(wbuf, BP_SIZE (ndd, i)); // and the size } // checksum the header u_int32_t end_to_end_checksum = x1764_memory(wbuf->buf, wbuf_get_woffset(wbuf)); @@ -500,7 +500,7 @@ sum_item (OMTVALUE lev, u_int32_t UU(idx), void *vsi) { // Because all messages above have been applied, setting msn of all new basements // to max msn of existing basements is correct. (There cannot be any messages in // buffers above that still need to be applied.) -static void +void rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) { assert(node->height == 0); @@ -687,9 +687,6 @@ rebalance_brtnode_leaf(BRTNODE node, unsigned int basementnodesize) toku_free(num_les_this_bn); } // end of rebalance_brtnode_leaf() -static void -serialize_and_compress(BRTNODE node, int npartitions, struct sub_block sb[]); - static void serialize_and_compress_partition(BRTNODE node, int childnum, SUB_BLOCK sb) { @@ -729,85 +726,29 @@ toku_create_compressed_partition_from_available( } -// tests are showing that serial insertions are slightly faster -// using the pthreads than using CILK. Disabling CILK until we have -// some evidence that it is faster -#ifdef HAVE_CILK - static void serialize_and_compress(BRTNODE node, int npartitions, struct sub_block sb[]) { -#pragma cilk grainsize = 2 - cilk_for (int i = 0; i < npartitions; i++) { + for (int i = 0; i < npartitions; i++) { serialize_and_compress_partition(node, i, &sb[i]); } } -#else - -struct serialize_compress_work { - struct work base; - BRTNODE node; - int i; - struct sub_block *sb; -}; - -static void * -serialize_and_compress_worker(void *arg) { - struct workset *ws = (struct workset *) arg; - while (1) { - struct serialize_compress_work *w = (struct serialize_compress_work *) workset_get(ws); - if (w == NULL) - break; - int i = w->i; - serialize_and_compress_partition(w->node, i, &w->sb[i]); - } - workset_release_ref(ws); - return arg; -} - -static void -serialize_and_compress(BRTNODE node, int npartitions, struct sub_block sb[]) { - if (npartitions == 1) { - serialize_and_compress_partition(node, 0, &sb[0]); - } else { - int T = num_cores; - if (T > npartitions) - T = npartitions; - if (T > 0) - T = T - 1; - struct workset ws; - workset_init(&ws); - struct serialize_compress_work work[npartitions]; - workset_lock(&ws); - for (int i = 0; i < npartitions; i++) { - work[i] = (struct serialize_compress_work) { .node = node, .i = i, .sb = sb }; - workset_put_locked(&ws, &work[i].base); - } - workset_unlock(&ws); - toku_thread_pool_run(brt_pool, 0, &T, serialize_and_compress_worker, &ws); - workset_add_ref(&ws, T); - serialize_and_compress_worker(&ws); - workset_join(&ws); - workset_destroy(&ws); - } -} - -#endif - // Writes out each child to a separate malloc'd buffer, then compresses // all of them, and writes the uncompressed header, to bytes_to_write, // which is malloc'd. // int toku_serialize_brtnode_to_memory (BRTNODE node, + BRTNODE_DISK_DATA* ndd, unsigned int basementnodesize, + BOOL do_rebalancing, /*out*/ size_t *n_bytes_to_write, /*out*/ char **bytes_to_write) { toku_assert_entire_node_in_memory(node); - if (node->height == 0) { - rebalance_brtnode_leaf(node, basementnodesize); + if (do_rebalancing && node->height == 0) { + rebalance_brtnode_leaf(node, basementnodesize); } const int npartitions = node->n_children; @@ -815,6 +756,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node, // For internal nodes, a sub block is a message buffer // For leaf nodes, a sub block is a basement node struct sub_block *XMALLOC_N(npartitions, sb); + *ndd = toku_xrealloc(*ndd, npartitions*sizeof(**ndd)); struct sub_block sb_node_info; for (int i = 0; i < npartitions; i++) { sub_block_init(&sb[i]);; @@ -845,8 +787,8 @@ toku_serialize_brtnode_to_memory (BRTNODE node, // store the BP_SIZESs for (int i = 0; i < node->n_children; i++) { u_int32_t len = sb[i].compressed_size + 4; // data and checksum - BP_SIZE (node,i) = len; - BP_START(node,i) = total_node_size; + BP_SIZE (*ndd,i) = len; + BP_START(*ndd,i) = total_node_size; total_node_size += sb[i].compressed_size + 4; } @@ -857,7 +799,7 @@ toku_serialize_brtnode_to_memory (BRTNODE node, // write the header struct wbuf wb; wbuf_init(&wb, curr_ptr, serialize_node_header_size(node)); - serialize_node_header(node, &wb); + serialize_node_header(node, *ndd, &wb); assert(wb.ndone == wb.size); curr_ptr += serialize_node_header_size(node); @@ -895,12 +837,12 @@ toku_serialize_brtnode_to_memory (BRTNODE node, } int -toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct brt_header *h, int UU(n_workitems), int UU(n_threads), BOOL for_checkpoint) { +toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, BRTNODE_DISK_DATA* ndd, BOOL do_rebalancing, struct brt_header *h, int UU(n_workitems), int UU(n_threads), BOOL for_checkpoint) { size_t n_to_write; char *compressed_buf = NULL; { - int r = toku_serialize_brtnode_to_memory(node, h->basementnodesize, + int r = toku_serialize_brtnode_to_memory(node, ndd, h->basementnodesize, do_rebalancing, &n_to_write, &compressed_buf); if (r!=0) return r; } @@ -1046,6 +988,41 @@ BASEMENTNODE toku_create_empty_bn(void) { return bn; } +struct mp_pair { + void* orig_base; + void* new_base; + OMT omt; +}; + +static int fix_mp_offset(OMTVALUE v, u_int32_t i, void* extra) { + struct mp_pair* p = extra; + char* old_value = v; + char *new_value = old_value - (char *)p->orig_base + (char *)p->new_base; + toku_omt_set_at(p->omt, (OMTVALUE) new_value, i); + return 0; +} + +BASEMENTNODE toku_clone_bn(BASEMENTNODE orig_bn) { + BASEMENTNODE bn = toku_create_empty_bn_no_buffer(); + bn->max_msn_applied = orig_bn->max_msn_applied; + bn->n_bytes_in_buffer = orig_bn->n_bytes_in_buffer; + bn->seqinsert = orig_bn->seqinsert; + bn->stale_ancestor_messages_applied = orig_bn->stale_ancestor_messages_applied; + bn->stat64_delta = orig_bn->stat64_delta; + toku_mempool_clone(&orig_bn->buffer_mempool, &bn->buffer_mempool); + toku_omt_clone_noptr(&bn->buffer, orig_bn->buffer); + struct mp_pair p; + p.orig_base = toku_mempool_get_base(&orig_bn->buffer_mempool); + p.new_base = toku_mempool_get_base(&bn->buffer_mempool); + p.omt = bn->buffer; + toku_omt_iterate( + bn->buffer, + fix_mp_offset, + &p + ); + return bn; +} + BASEMENTNODE toku_create_empty_bn_no_buffer(void) { BASEMENTNODE XMALLOC(bn); bn->max_msn_applied.msn = 0; @@ -1068,6 +1045,17 @@ NONLEAF_CHILDINFO toku_create_empty_nl(void) { return cn; } +// does NOT create OMTs, just the FIFO +NONLEAF_CHILDINFO toku_clone_nl(NONLEAF_CHILDINFO orig_childinfo) { + NONLEAF_CHILDINFO XMALLOC(cn); + cn->n_bytes_in_buffer = orig_childinfo->n_bytes_in_buffer; + cn->fresh_message_tree = NULL; + cn->stale_message_tree = NULL; + cn->broadcast_list = NULL; + toku_fifo_clone(orig_childinfo->buffer, &cn->buffer); + return cn; +} + void destroy_basement_node (BASEMENTNODE bn) { // The buffer may have been freed already, in some cases. @@ -1080,9 +1068,9 @@ void destroy_basement_node (BASEMENTNODE bn) void destroy_nonleaf_childinfo (NONLEAF_CHILDINFO nl) { toku_fifo_free(&nl->buffer); - toku_omt_destroy(&nl->fresh_message_tree); - toku_omt_destroy(&nl->stale_message_tree); - toku_omt_destroy(&nl->broadcast_list); + if (nl->fresh_message_tree) toku_omt_destroy(&nl->fresh_message_tree); + if (nl->stale_message_tree) toku_omt_destroy(&nl->stale_message_tree); + if (nl->broadcast_list) toku_omt_destroy(&nl->broadcast_list); toku_free(nl); } @@ -1402,6 +1390,7 @@ check_and_copy_compressed_sub_block_worker(struct rbuf curr_rbuf, struct sub_blo } static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnode, + BRTNODE_DISK_DATA* ndd, BLOCKNUM blocknum, u_int32_t fullhash, struct brtnode_fetch_extra *bfe, @@ -1455,10 +1444,11 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod } XMALLOC_N(node->n_children, node->bp); + *ndd = toku_xmalloc(node->n_children*sizeof(**ndd)); // read the partition locations for (int i=0; in_children; i++) { - BP_START(node,i) = rbuf_int(rb); - BP_SIZE (node,i) = rbuf_int(rb); + BP_START(*ndd,i) = rbuf_int(rb); + BP_SIZE (*ndd,i) = rbuf_int(rb); } u_int32_t checksum = x1764_memory(rb->buf, rb->ndone); @@ -1517,7 +1507,7 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod if (bfe->type != brtnode_fetch_none) { PAIR_ATTR attr; - toku_brtnode_pf_callback(node, bfe, fd, &attr); + toku_brtnode_pf_callback(node, *ndd, bfe, fd, &attr); } // handle clock for (int i = 0; i < node->n_children; i++) { @@ -1532,6 +1522,7 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod cleanup: if (r!=0) { if (node) { + toku_free(*ndd); toku_free(node->bp); toku_free(node); } @@ -1542,6 +1533,7 @@ static int deserialize_brtnode_header_from_rbuf_if_small_enough (BRTNODE *brtnod static int deserialize_brtnode_from_rbuf( BRTNODE *brtnode, + BRTNODE_DISK_DATA* ndd, BLOCKNUM blocknum, u_int32_t fullhash, struct brtnode_fetch_extra* bfe, @@ -1577,10 +1569,11 @@ deserialize_brtnode_from_rbuf( node->build_id = rbuf_int(rb); node->n_children = rbuf_int(rb); XMALLOC_N(node->n_children, node->bp); + *ndd = toku_xmalloc(node->n_children*sizeof(**ndd)); // read the partition locations for (int i=0; in_children; i++) { - BP_START(node,i) = rbuf_int(rb); - BP_SIZE (node,i) = rbuf_int(rb); + BP_START(*ndd,i) = rbuf_int(rb); + BP_SIZE (*ndd,i) = rbuf_int(rb); } // verify checksum of header stored u_int32_t checksum = x1764_memory(rb->buf, rb->ndone); @@ -1609,8 +1602,8 @@ deserialize_brtnode_from_rbuf( // Previously, this code was a for loop with spawns inside and a sync at the end. // But now the loop is parallelizeable since we don't have a dependency on the work done so far. cilk_for (int i = 0; i < node->n_children; i++) { - u_int32_t curr_offset = BP_START(node,i); - u_int32_t curr_size = BP_SIZE(node,i); + u_int32_t curr_offset = BP_START(*ndd,i); + u_int32_t curr_size = BP_SIZE(*ndd,i); // the compressed, serialized partitions start at where rb is currently pointing, // which would be rb->buf + rb->ndone // we need to intialize curr_rbuf to point to this place @@ -1665,7 +1658,7 @@ cleanup: } void -toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode_fetch_extra* bfe) { +toku_deserialize_bp_from_disk(BRTNODE node, BRTNODE_DISK_DATA ndd, int childnum, int fd, struct brtnode_fetch_extra* bfe) { assert(BP_STATE(node,childnum) == PT_ON_DISK); assert(node->bp[childnum].ptr.tag == BCT_NULL); @@ -1687,8 +1680,8 @@ toku_deserialize_bp_from_disk(BRTNODE node, int childnum, int fd, struct brtnode &total_node_disk_size ); - u_int32_t curr_offset = BP_START(node, childnum); - u_int32_t curr_size = BP_SIZE (node, childnum); + u_int32_t curr_offset = BP_START(ndd, childnum); + u_int32_t curr_size = BP_SIZE (ndd, childnum); struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0}; u_int8_t *XMALLOC_N(curr_size, raw_block); @@ -1738,6 +1731,7 @@ int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, + BRTNODE_DISK_DATA* ndd, struct brtnode_fetch_extra* bfe) // Effect: Read a node in. If possible, read just the header. { @@ -1746,7 +1740,7 @@ int toku_deserialize_brtnode_from (int fd, struct rbuf rb = RBUF_INITIALIZER; read_brtnode_header_from_fd_into_rbuf_if_small_enough(fd, blocknum, bfe->h, &rb); - int r = deserialize_brtnode_header_from_rbuf_if_small_enough(brtnode, blocknum, fullhash, bfe, &rb, fd); + int r = deserialize_brtnode_header_from_rbuf_if_small_enough(brtnode, ndd, blocknum, fullhash, bfe, &rb, fd); if (r != 0) { toku_free(rb.buf); rb = RBUF_INITIALIZER; @@ -1756,7 +1750,7 @@ int toku_deserialize_brtnode_from (int fd, r = read_block_from_fd_into_rbuf(fd, blocknum, bfe->h, &rb); if (r != 0) { goto cleanup; } // if we were successful, then we are done. - r = deserialize_brtnode_from_rbuf(brtnode, blocknum, fullhash, bfe, &rb); + r = deserialize_brtnode_from_rbuf(brtnode, ndd, blocknum, fullhash, bfe, &rb); if (r!=0) { dump_bad_block(rb.buf,rb.size); } diff --git a/newbrt/brt-test-helpers.c b/newbrt/brt-test-helpers.c index c5ebd1be5fa..5f2485ee0dc 100644 --- a/newbrt/brt-test-helpers.c +++ b/newbrt/brt-test-helpers.c @@ -98,6 +98,7 @@ int toku_testsetup_get_sersize(BRT brt, BLOCKNUM diskoff) // Return the size on toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + TRUE, &bfe ); assert(r==0); @@ -124,6 +125,7 @@ int toku_testsetup_insert_to_leaf (BRT brt, BLOCKNUM blocknum, char *key, int ke toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + TRUE, &bfe ); if (r!=0) return r; @@ -172,6 +174,7 @@ toku_pin_node_with_min_bfe(BRTNODE* node, BLOCKNUM b, BRT t) b, toku_cachetable_hash(t->h->cf, b), &bfe, + TRUE, 0, NULL, node @@ -196,6 +199,7 @@ int toku_testsetup_insert_to_nonleaf (BRT brt, BLOCKNUM blocknum, enum brt_msg_t toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + TRUE, &bfe ); if (r!=0) return r; diff --git a/newbrt/brt-verify.c b/newbrt/brt-verify.c index f3617f45109..19a514fe9ae 100644 --- a/newbrt/brt-verify.c +++ b/newbrt/brt-verify.c @@ -215,6 +215,7 @@ toku_get_node_for_verify( blocknum, fullhash, &bfe, + TRUE, // may_modify_node, safe to set to TRUE 0, NULL, nodep diff --git a/newbrt/brt.c b/newbrt/brt.c index e0bdb763035..fc1c328d1a0 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -589,11 +589,11 @@ toku_get_and_clear_basement_stats(BRTNODE leafnode) { invariant(leafnode->height == 0); STAT64INFO_S deltas = ZEROSTATS; for (int i = 0; i < leafnode->n_children; i++) { - BASEMENTNODE bn = BLB(leafnode, i); - invariant(BP_STATE(leafnode,i) == PT_AVAIL); - deltas.numrows += bn->stat64_delta.numrows; - deltas.numbytes += bn->stat64_delta.numbytes; - bn->stat64_delta = ZEROSTATS; + BASEMENTNODE bn = BLB(leafnode, i); + invariant(BP_STATE(leafnode,i) == PT_AVAIL); + deltas.numrows += bn->stat64_delta.numrows; + deltas.numbytes += bn->stat64_delta.numbytes; + bn->stat64_delta = ZEROSTATS; } return deltas; } @@ -624,59 +624,162 @@ toku_mark_node_dirty(BRTNODE node) { node->dirty = 1; } +static void brt_status_update_flush_reason(BRTNODE node, BOOL for_checkpoint) { + if (node->height == 0) { + if (for_checkpoint) { + __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF_FOR_CHECKPOINT), 1); + } + else { + __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF), 1); + } + } + else { + if (for_checkpoint) { + __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT), 1); + } + else { + __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF), 1); + } + } +} + +static void brtnode_update_disk_stats( + BRTNODE brtnode, + struct brt_header* h, + BOOL for_checkpoint + ) +{ + STAT64INFO_S deltas = ZEROSTATS; + // capture deltas before rebalancing basements for serialization + deltas = toku_get_and_clear_basement_stats(brtnode); + update_header_stats(&(h->on_disk_stats), &deltas); + if (for_checkpoint) { + update_header_stats(&(h->checkpoint_staging_stats), &deltas); + } +} + +static void brtnode_clone_partitions(BRTNODE node, BRTNODE cloned_node) { + for (int i = 0; i < node->n_children; i++) { + BP_BLOCKNUM(cloned_node,i) = BP_BLOCKNUM(node,i); + assert(BP_STATE(node,i) == PT_AVAIL); + BP_STATE(cloned_node,i) = PT_AVAIL; + BP_WORKDONE(cloned_node, i) = BP_WORKDONE(node, i); + if (node->height == 0) { + set_BLB(cloned_node, i,toku_clone_bn(BLB(node,i))); + } + else { + set_BNC(cloned_node, i, toku_clone_nl(BNC(node,i))); + } + } +} + +void toku_brtnode_clone_callback( + void* value_data, + void** cloned_value_data, + PAIR_ATTR* new_attr, + BOOL for_checkpoint, + void* write_extraargs + ) +{ + BRTNODE node = value_data; + toku_assert_entire_node_in_memory(node); + struct brt_header *h = write_extraargs; + BRTNODE XMALLOC(cloned_node); + //BRTNODE cloned_node = (BRTNODE)toku_xmalloc(sizeof(*BRTNODE)); + memset(cloned_node, 0, sizeof(*cloned_node)); + if (node->height == 0) { + rebalance_brtnode_leaf(node, h->basementnodesize); + } + + cloned_node->max_msn_applied_to_node_on_disk = node->max_msn_applied_to_node_on_disk; + cloned_node->h = node->h; + cloned_node->nodesize = node->nodesize; + cloned_node->flags = node->flags; + cloned_node->thisnodename = node->thisnodename; + cloned_node->layout_version = node->layout_version; + cloned_node->layout_version_original = node->layout_version_original; + cloned_node->layout_version_read_from_disk = node->layout_version_read_from_disk; + cloned_node->build_id = node->build_id; + cloned_node->height = node->height; + cloned_node->dirty = node->dirty; + cloned_node->fullhash = node->fullhash; + cloned_node->optimized_for_upgrade = node->optimized_for_upgrade; + cloned_node->n_children = node->n_children; + cloned_node->totalchildkeylens = node->totalchildkeylens; + + XMALLOC_N(node->n_children-1, cloned_node->childkeys); + XMALLOC_N(node->n_children, cloned_node->bp); + // clone pivots + for (int i = 0; i < node->n_children-1; i++) { + cloned_node->childkeys[i] = kv_pair_malloc( + kv_pair_key(node->childkeys[i]), + kv_pair_keylen(node->childkeys[i]), + 0, + 0 + ); + } + // clone partition + brtnode_clone_partitions(node, cloned_node); + + // set header stats + if (node->height == 0) { + brtnode_update_disk_stats(node, h, for_checkpoint); + } + // clear dirty bit + node->dirty = 0; + cloned_node->dirty = 0; + // set new pair attr if necessary + if (node->height == 0) { + *new_attr = make_brtnode_pair_attr(node); + } + else { + new_attr->is_valid = FALSE; + } + *cloned_value_data = cloned_node; +} + + //fd is protected (must be holding fdlock) -void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename, void *brtnode_v, void *extraargs, PAIR_ATTR size __attribute__((unused)), PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint) { +void toku_brtnode_flush_callback ( + CACHEFILE cachefile, + int fd, + BLOCKNUM nodename, + void *brtnode_v, + void** disk_data, + void *extraargs, + PAIR_ATTR size __attribute__((unused)), + PAIR_ATTR* new_size, + BOOL write_me, + BOOL keep_me, + BOOL for_checkpoint, + BOOL is_clone + ) +{ struct brt_header *h = extraargs; BRTNODE brtnode = brtnode_v; + BRTNODE_DISK_DATA* ndd = (BRTNODE_DISK_DATA*)disk_data; assert(brtnode->thisnodename.b==nodename.b); int height = brtnode->height; - STAT64INFO_S deltas = ZEROSTATS; - //printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]); if (write_me) { - if (height == 0) - // capture deltas before rebalancing basements for serialization - deltas = toku_get_and_clear_basement_stats(brtnode); + if (height == 0 && !is_clone) { + brtnode_update_disk_stats(brtnode, h, for_checkpoint); + } if (!h->panic) { // if the brt panicked, stop writing, otherwise try to write it. toku_assert_entire_node_in_memory(brtnode); int n_workitems, n_threads; toku_cachefile_get_workqueue_load(cachefile, &n_workitems, &n_threads); - int r = toku_serialize_brtnode_to(fd, brtnode->thisnodename, brtnode, h, n_workitems, n_threads, for_checkpoint); - if (r) { - if (h->panic==0) { - char *e = strerror(r); - int l = 200 + strlen(e); - char s[l]; - h->panic=r; - snprintf(s, l-1, "While writing data to disk, error %d (%s)", r, e); - h->panic_string = toku_strdup(s); - } - } - } - if (height == 0) { - struct brt_header * header_in_node = brtnode->h; - invariant(header_in_node == h); - update_header_stats(&(h->on_disk_stats), &deltas); - if (for_checkpoint) { - update_header_stats(&(h->checkpoint_staging_stats), &deltas); - } - if (for_checkpoint) - __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF_FOR_CHECKPOINT), 1); - else - __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF), 1); - } - else { - if (for_checkpoint) - __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT), 1); - else - __sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF), 1); + int r = toku_serialize_brtnode_to(fd, brtnode->thisnodename, brtnode, ndd, !is_clone, h, n_workitems, n_threads, for_checkpoint); + assert_zero(r); } + brt_status_update_flush_reason(brtnode, for_checkpoint); } - //printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]); - *new_size = make_brtnode_pair_attr(brtnode); if (!keep_me) { + if (!is_clone) toku_free(*disk_data); toku_brtnode_free(&brtnode); } - //printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced); + else { + *new_size = make_brtnode_pair_attr(brtnode); + } } void @@ -693,15 +796,16 @@ toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe) //fd is protected (must be holding fdlock) int toku_brtnode_fetch_callback (CACHEFILE UU(cachefile), int fd, BLOCKNUM nodename, u_int32_t fullhash, - void **brtnode_pv, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { + void **brtnode_pv, void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { assert(extraargs); assert(*brtnode_pv == NULL); + BRTNODE_DISK_DATA* ndd = (BRTNODE_DISK_DATA*)disk_data; struct brtnode_fetch_extra *bfe = (struct brtnode_fetch_extra *)extraargs; BRTNODE *node=(BRTNODE*)brtnode_pv; // deserialize the node, must pass the bfe in because we cannot // evaluate what piece of the the node is necessary until we get it at // least partially into memory - int r = toku_deserialize_brtnode_from(fd, nodename, fullhash, node, bfe); + int r = toku_deserialize_brtnode_from(fd, nodename, fullhash, node, ndd, bfe); if (r == 0) { (*node)->h = bfe->h; // copy reference to header from bfe *sizep = make_brtnode_pair_attr(*node); @@ -712,6 +816,7 @@ int toku_brtnode_fetch_callback (CACHEFILE UU(cachefile), int fd, BLOCKNUM noden void toku_brtnode_pe_est_callback( void* brtnode_pv, + void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -742,7 +847,8 @@ void toku_brtnode_pe_est_callback( // first get an estimate for how much space will be taken // after compression, it is simply the size of compressed // data on disk plus the size of the struct that holds it - u_int32_t compressed_data_size = BP_SIZE(node, i); + BRTNODE_DISK_DATA ndd = disk_data; + u_int32_t compressed_data_size = BP_SIZE(ndd, i); compressed_data_size += sizeof(struct sub_block); // now get the space taken now @@ -942,6 +1048,81 @@ BOOL toku_brtnode_pf_req_callback(void* brtnode_pv, void* read_extraargs) { return retval; } +u_int64_t num_basements_decompressed; +u_int64_t num_buffers_decompressed; +u_int64_t num_basements_fetched; +u_int64_t num_buffers_fetched; +u_int64_t num_pivots_fetched; + +void brt_begin_checkpoint(void) { + /* + u_int64_t old_num_basements_decompressed = num_basements_decompressed; + u_int64_t old_num_buffers_decompressed = num_buffers_decompressed; + u_int64_t old_num_basements_fetched = num_basements_fetched; + u_int64_t old_num_buffers_fetched = num_buffers_fetched; + u_int64_t old_num_pivots_fetched = num_pivots_fetched; + */ + num_basements_decompressed = + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE); + + num_buffers_decompressed = + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE); + + num_basements_fetched = + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_NORMAL) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_WRITE); + + num_buffers_fetched = + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_NORMAL) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_WRITE); + + num_pivots_fetched = + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_QUERY) + + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_WRITE); +} + +void brt_end_checkpoint(void) { + num_basements_decompressed = + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH) + + STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE); + + num_buffers_decompressed = + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE); + + num_basements_fetched = + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_NORMAL) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_WRITE); + + num_buffers_fetched = + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_NORMAL) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_WRITE); + + num_pivots_fetched = + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_QUERY) + + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_PREFETCH) + + STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_WRITE); +} + static void brt_status_update_partial_fetch_reason( struct brtnode_fetch_extra* UU(bfe), @@ -950,7 +1131,6 @@ brt_status_update_partial_fetch_reason( BOOL UU(is_leaf) ) { -#if 0 invariant(state == PT_COMPRESSED || state == PT_ON_DISK); if (is_leaf) { if (bfe->type == brtnode_fetch_prefetch) { @@ -1006,13 +1186,13 @@ brt_status_update_partial_fetch_reason( } } } -#endif } // callback for partially reading a node // could have just used toku_brtnode_fetch_callback, but wanted to separate the two cases to separate functions -int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAIR_ATTR* sizep) { +int toku_brtnode_pf_callback(void* brtnode_pv, void* disk_data, void* read_extraargs, int fd, PAIR_ATTR* sizep) { BRTNODE node = brtnode_pv; + BRTNODE_DISK_DATA ndd = disk_data; struct brtnode_fetch_extra *bfe = read_extraargs; // there must be a reason this is being called. If we get a garbage type or the type is brtnode_fetch_none, // then something went wrong @@ -1041,7 +1221,7 @@ int toku_brtnode_pf_callback(void* brtnode_pv, void* read_extraargs, int fd, PAI cilk_spawn toku_deserialize_bp_from_compressed(node, i, &bfe->h->descriptor, bfe->h->compare_fun); } else if (BP_STATE(node,i) == PT_ON_DISK) { - cilk_spawn toku_deserialize_bp_from_disk(node, i, fd, bfe); + cilk_spawn toku_deserialize_bp_from_disk(node, ndd, i, fd, bfe); } else { assert(FALSE); @@ -1271,8 +1451,6 @@ toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num for (int i = 0; i < num_children; i++) { BP_BLOCKNUM(n,i).b=0; BP_STATE(n,i) = PT_INVALID; - BP_START(n,i) = 0; - BP_SIZE (n,i) = 0; BP_WORKDONE(n,i) = 0; BP_INIT_TOUCHED_CLOCK(n, i); set_BNULL(n,i); @@ -1329,8 +1507,6 @@ static void init_childinfo(BRTNODE node, int childnum, BRTNODE child) { BP_BLOCKNUM(node,childnum) = child->thisnodename; BP_STATE(node,childnum) = PT_AVAIL; - BP_START(node,childnum) = 0; - BP_SIZE (node,childnum) = 0; BP_WORKDONE(node, childnum) = 0; set_BNC(node, childnum, toku_create_empty_nl()); } @@ -2303,11 +2479,15 @@ void bring_node_fully_into_memory(BRTNODE node, struct brt_header* h) { if (!is_entire_node_in_memory(node)) { struct brtnode_fetch_extra bfe; - PAIR_ATTR attr; - int fd = toku_cachefile_get_and_pin_fd(h->cf); fill_bfe_for_full_read(&bfe, h); - toku_brtnode_pf_callback(node, &bfe, fd, &attr); - toku_cachefile_unpin_fd(h->cf); + toku_cachetable_pf_pinned_pair( + node, + toku_brtnode_pf_callback, + &bfe, + h->cf, + node->thisnodename, + toku_cachetable_hash(h->cf, node->thisnodename) + ); } } @@ -2542,7 +2722,17 @@ toku_brt_root_put_cmd (BRT brt, BRT_MSG_S * cmd) // get the root node struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, brt->h); - toku_pin_brtnode_holding_lock(brt, *rootp, fullhash,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node); + toku_pin_brtnode_holding_lock( + brt, + *rootp, + fullhash, + (ANCESTORS)NULL, + &infinite_bounds, + &bfe, + TRUE, + TRUE, // may_modify_node + &node + ); toku_assert_entire_node_in_memory(node); cmd->msn.msn = node->max_msn_applied_to_node_on_disk.msn + 1; @@ -5136,18 +5326,18 @@ brt_search_node ( #if TOKU_DO_PREFETCH static int -brtnode_fetch_callback_and_free_bfe(CACHEFILE cf, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) +brtnode_fetch_callback_and_free_bfe(CACHEFILE cf, int fd, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, void** UU(disk_data), PAIR_ATTR *sizep, int *dirtyp, void *extraargs) { - int r = toku_brtnode_fetch_callback(cf, fd, nodename, fullhash, brtnode_pv, sizep, dirtyp, extraargs); + int r = toku_brtnode_fetch_callback(cf, fd, nodename, fullhash, brtnode_pv, disk_data, sizep, dirtyp, extraargs); destroy_bfe_for_prefetch(extraargs); toku_free(extraargs); return r; } static int -brtnode_pf_callback_and_free_bfe(void *brtnode_pv, void *read_extraargs, int fd, PAIR_ATTR *sizep) +brtnode_pf_callback_and_free_bfe(void *brtnode_pv, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep) { - int r = toku_brtnode_pf_callback(brtnode_pv, read_extraargs, fd, sizep); + int r = toku_brtnode_pf_callback(brtnode_pv, disk_data, read_extraargs, fd, sizep); destroy_bfe_for_prefetch(read_extraargs); toku_free(read_extraargs); return r; @@ -5239,6 +5429,7 @@ brt_search_child(BRT brt, BRTNODE node, int childnum, brt_search_t *search, BRT_ unlockers, &next_ancestors, bounds, &bfe, + (node->height == 1), // may_modify_node TRUE iff child is leaf TRUE, &childnode, &msgs_applied); @@ -5569,6 +5760,7 @@ try_again: *rootp, fullhash, &bfe, + FALSE, // may_modify_node set to FALSE, because root cannot change during search 0, NULL, &node @@ -6084,7 +6276,19 @@ toku_brt_keyrange_internal (BRT brt, BRTNODE node, u_int32_t fullhash = compute_child_fullhash(brt->cf, node, child_number); BRTNODE childnode; BOOL msgs_applied = FALSE; - r = toku_pin_brtnode(brt, childblocknum, fullhash, unlockers, &next_ancestors, bounds, bfe, FALSE, &childnode, &msgs_applied); + r = toku_pin_brtnode( + brt, + childblocknum, + fullhash, + unlockers, + &next_ancestors, + bounds, + bfe, + FALSE, // may_modify_node is FALSE, because node guaranteed to not change + FALSE, + &childnode, + &msgs_applied + ); assert(!msgs_applied); if (r != TOKUDB_TRY_AGAIN) { assert(r == 0); @@ -6136,6 +6340,7 @@ toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less_p, u_int64_t *equal_p, u_i *rootp, fullhash, &bfe, + FALSE, // may_modify_node, cannot change root during keyrange 0, NULL, &node @@ -6221,6 +6426,7 @@ toku_dump_brtnode (FILE *file, BRT brt, BLOCKNUM blocknum, int depth, struct kv_ toku_brtnode_fetch_callback, toku_brtnode_pf_req_callback, toku_brtnode_pf_callback, + TRUE, // may_modify_value, just safe to set to TRUE, I think it could theoretically be FALSE &bfe ); assert_zero(r); @@ -6533,6 +6739,7 @@ static BOOL is_empty_fast_iter (BRT brt, BRTNODE node) { childblocknum, fullhash, &bfe, + FALSE, // may_modify_node set to FALSE, as nodes not modified 0, NULL, &childnode @@ -6572,6 +6779,7 @@ BOOL toku_brt_is_empty_fast (BRT brt) *rootp, fullhash, &bfe, + FALSE, // may_modify_node set to FALSE, node does not change 0, NULL, &node diff --git a/newbrt/brtdump.c b/newbrt/brtdump.c index c363fdb5844..5a5e525afb4 100644 --- a/newbrt/brtdump.c +++ b/newbrt/brtdump.c @@ -123,8 +123,9 @@ static void dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) { BRTNODE n; struct brtnode_fetch_extra bfe; + BRTNODE_DISK_DATA ndd = NULL; fill_bfe_for_full_read(&bfe, h); - int r = toku_deserialize_brtnode_from (f, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &bfe); + int r = toku_deserialize_brtnode_from (f, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); assert(r==0); assert(n!=0); printf("brtnode\n"); @@ -207,6 +208,7 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) { } } toku_brtnode_free(&n); + toku_free(ndd); } static void @@ -226,9 +228,10 @@ static int fragmentation_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void *extra) { frag_help_extra *info = extra; BRTNODE n; + BRTNODE_DISK_DATA ndd = NULL; struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, info->h); - int r = toku_deserialize_brtnode_from(info->f, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &bfe); + int r = toku_deserialize_brtnode_from(info->f, b, 0 /*pass zero for hash, it doesn't matter*/, &n, &ndd, &bfe); if (r==0) { info->blocksizes += size; if (n->height == 0) { @@ -236,6 +239,7 @@ fragmentation_helper(BLOCKNUM b, int64_t size, int64_t UU(address), void *extra) info->leafblocks++; } toku_brtnode_free(&n); + toku_free(ndd); } return 0; } @@ -282,9 +286,10 @@ static int garbage_helper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) { garbage_help_extra *info = extra; BRTNODE n; + BRTNODE_DISK_DATA ndd = NULL; struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, info->h); - int r = toku_deserialize_brtnode_from(info->f, b, 0, &n, &bfe); + int r = toku_deserialize_brtnode_from(info->f, b, 0, &n, &ndd, &bfe); if (r != 0) { goto no_node; } @@ -300,6 +305,7 @@ garbage_helper(BLOCKNUM b, int64_t UU(size), int64_t UU(address), void *extra) { } exit: toku_brtnode_free(&n); + toku_free(ndd); no_node: return r; } diff --git a/newbrt/brtloader.c b/newbrt/brtloader.c index 0081708e72b..18ed861f523 100644 --- a/newbrt/brtloader.c +++ b/newbrt/brtloader.c @@ -2806,7 +2806,8 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr // serialize leaf to buffer size_t serialized_leaf_size = 0; char *serialized_leaf = NULL; - result = toku_serialize_brtnode_to_memory(lbuf->node, target_basementnodesize, &serialized_leaf_size, &serialized_leaf); + BRTNODE_DISK_DATA ndd = NULL; + result = toku_serialize_brtnode_to_memory(lbuf->node, &ndd, target_basementnodesize, TRUE, &serialized_leaf_size, &serialized_leaf); // write it out if (result == 0) { @@ -2822,8 +2823,10 @@ static void finish_leafnode (struct dbout *out, struct leaf_buf *lbuf, int progr } // free the node - if (serialized_leaf) + if (serialized_leaf) { + toku_free(ndd); toku_free(serialized_leaf); + } toku_brtnode_free(&lbuf->node); xids_destroy(&lbuf->xids); toku_free(lbuf); @@ -3015,11 +3018,12 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu BP_STATE(node,i) = PT_AVAIL; } + BRTNODE_DISK_DATA ndd = NULL; if (result == 0) { size_t n_bytes; char *bytes; int r; - r = toku_serialize_brtnode_to_memory(node, target_basementnodesize, &n_bytes, &bytes); + r = toku_serialize_brtnode_to_memory(node, &ndd, target_basementnodesize, TRUE, &n_bytes, &bytes); if (r) { result = r; } else { @@ -3049,6 +3053,7 @@ static void write_nonleaf_node (BRTLOADER bl, struct dbout *out, int64_t blocknu toku_free(node->bp); toku_free(node->childkeys); toku_free(node); + toku_free(ndd); toku_free(subtree_info); blocknum_of_new_node = blocknum_of_new_node; diff --git a/newbrt/brttypes.h b/newbrt/brttypes.h index a35da0f6ea6..5d1abbb7af6 100644 --- a/newbrt/brttypes.h +++ b/newbrt/brttypes.h @@ -31,6 +31,7 @@ typedef bool BOOL; typedef struct brt *BRT; typedef struct brtnode *BRTNODE; +typedef struct brtnode_disk_data *BRTNODE_DISK_DATA; typedef struct brtnode_leaf_basement_node *BASEMENTNODE; typedef struct brtnode_nonleaf_childinfo *NONLEAF_CHILDINFO; typedef struct sub_block *SUB_BLOCK; diff --git a/newbrt/cachetable.c b/newbrt/cachetable.c index be3eb76d53f..c976496bfe6 100644 --- a/newbrt/cachetable.c +++ b/newbrt/cachetable.c @@ -25,11 +25,6 @@ #include "brt-internal.h" - -static void cachetable_writer(WORKITEM); -static void cachetable_reader(WORKITEM); -static void cachetable_partial_reader(WORKITEM); - #define TRACE_CACHETABLE 0 #if TRACE_CACHETABLE #define WHEN_TRACE_CT(x) x @@ -124,7 +119,10 @@ typedef struct ctpair *PAIR; struct ctpair { CACHEFILE cachefile; CACHEKEY key; - void *value; + void* value_data; + void* cloned_value_data; + long cloned_value_size; + void* disk_data; PAIR_ATTR attr; // @@ -146,6 +144,7 @@ struct ctpair { CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; CACHETABLE_CLEANER_CALLBACK cleaner_callback; + CACHETABLE_CLONE_CALLBACK clone_callback; long size_evicting_estimate; void *write_extraargs; @@ -158,9 +157,11 @@ struct ctpair { PAIR pending_next; PAIR pending_prev; - struct nb_mutex nb_mutex; // single writer + struct nb_mutex value_nb_mutex; // single writer + struct nb_mutex disk_nb_mutex; // single writer struct workqueue *cq; // writers sometimes return ctpair's using this queue struct workitem asyncwork; // work item for the worker threads + struct workitem checkpoint_asyncwork; // work item for the worker threads u_int32_t refs; // References that prevent destruction int already_removed; // If a pair is removed from the cachetable, but cannot be freed because refs>0, this is set. struct toku_list next_for_cachefile; // link in the cachefile list @@ -188,7 +189,8 @@ static inline void ctpair_destroy(PAIR p) { assert(p->refs>0); p->refs--; if (p->refs==0) { - nb_mutex_destroy(&p->nb_mutex); + nb_mutex_destroy(&p->value_nb_mutex); + nb_mutex_destroy(&p->disk_nb_mutex); toku_free(p); } } @@ -216,6 +218,8 @@ struct cachetable { toku_pthread_mutex_t cachefiles_mutex; // lock that protects the cachefiles list struct workqueue wq; // async work queue THREADPOOL threadpool; // pool of worker threads + struct workqueue checkpoint_wq; + THREADPOOL checkpoint_threadpool; KIBBUTZ kibbutz; // another pool of worker threads and jobs to do asynchronously. @@ -250,6 +254,18 @@ struct cachetable { int64_t size_leaf; int64_t size_rollback; int64_t size_cachepressure; + + // variables used by the checkpoint thread to know + // when all work induced by cloning on client threads is done + // when a client thread clones a PAIR and places it on + // a background thread to be written out, n_checkpoint_clones_running + // is incremented. On the background thread, when the checkpointing + // is completed, n_checkpoint_clones_running is decremented. + // When the checkpoint thread uses clones_background_wait for + // n_checkpoint_clones_running to go to zero, it knows that + // the checkpoint is complete + u_int32_t n_checkpoint_clones_running; + toku_pthread_cond_t clones_background_wait; }; @@ -512,7 +528,8 @@ int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN UU(initial_l ct->size_limit = size_limit; ct->size_reserved = unreservable_memory(size_limit); ct->logger = logger; - toku_init_workers(&ct->wq, &ct->threadpool); + toku_init_workers(&ct->wq, &ct->threadpool, 1); + toku_init_workers(&ct->checkpoint_wq, &ct->checkpoint_threadpool, 8); ct->mutex = workqueue_lock_ref(&ct->wq); int r = toku_pthread_mutex_init(&ct->openfd_mutex, NULL); resource_assert_zero(r); r = toku_pthread_mutex_init(&ct->cachefiles_mutex, 0); resource_assert_zero(r); @@ -524,6 +541,7 @@ int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN UU(initial_l ct->cleaner_iterations = 1; // default is one iteration r = toku_omt_create(&ct->reserved_filenums); assert(r==0); ct->env_dir = toku_xstrdup("."); + r = toku_pthread_cond_init(&ct->clones_background_wait, NULL); resource_assert_zero(r); *result = ct; return 0; } @@ -1309,7 +1327,6 @@ cachetable_change_pair_attr(CACHETABLE ct, PAIR_ATTR old_attr, PAIR_ATTR new_att // Effects: the pair is removed from the LRU list and from the cachetable's hash table. // The size of the objects in the cachetable is adjusted by the size of the pair being // removed. - static void cachetable_remove_pair (CACHETABLE ct, PAIR p) { pair_remove(ct, p); pending_pairs_remove(ct, p); @@ -1319,20 +1336,20 @@ static void cachetable_remove_pair (CACHETABLE ct, PAIR p) { ct->n_in_table--; // Remove it from the hash chain. { - unsigned int h = p->fullhash&(ct->table_size-1); - ct->table[h] = remove_from_hash_chain (p, ct->table[h]); + unsigned int h = p->fullhash&(ct->table_size-1); + ct->table[h] = remove_from_hash_chain (p, ct->table[h]); } cachetable_remove_pair_attr(ct, p->attr); p->already_removed = TRUE; } - static void cachetable_free_pair(CACHETABLE ct, PAIR p) { // helgrind CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; CACHEFILE cachefile = p->cachefile; CACHEKEY key = p->key; - void *value = p->value; + void *value = p->value_data; + void* disk_data = p->disk_data; void *write_extraargs = p->write_extraargs; PAIR_ATTR old_attr = p->attr; @@ -1342,7 +1359,7 @@ static void cachetable_free_pair(CACHETABLE ct, PAIR p) { PAIR_ATTR new_attr = p->attr; // Note that flush_callback is called with write_me FALSE, so the only purpose of this // call is to tell the brt layer to evict the node (keep_me is FALSE). - flush_callback(cachefile, cachefile->fd, key, value, write_extraargs, old_attr, &new_attr, FALSE, FALSE, TRUE); + flush_callback(cachefile, cachefile->fd, key, value, &disk_data, write_extraargs, old_attr, &new_attr, FALSE, FALSE, TRUE, FALSE); cachetable_lock(ct); rwlock_read_unlock(&cachefile->fdlock); @@ -1358,76 +1375,71 @@ static void cachetable_free_pair(CACHETABLE ct, PAIR p) { // anything except destroy the node. static void cachetable_maybe_remove_and_free_pair (CACHETABLE ct, PAIR p, BOOL* destroyed) { *destroyed = FALSE; - if (nb_mutex_users(&p->nb_mutex) == 0) { + if (nb_mutex_users(&p->value_nb_mutex) == 0) { + // assumption is that if we are about to remove the pair + // that no one has grabbed the disk_nb_mutex, + // and that there is no cloned_value_data, because + // no one is writing a cloned value out. + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(p->cloned_value_data == NULL); cachetable_remove_pair(ct, p); cachetable_free_pair(ct, p); *destroyed = TRUE; } } -// Read a pair from a cachefile into memory using the pair's fetch callback -static void cachetable_fetch_pair( +// assumes value_nb_mutex and disk_nb_mutex held on entry +// responsibility of this function is to only write a locked PAIR to disk +// and NOTHING else. We do not manipulate the state of the PAIR +// of the cachetable here (with the exception of ct->size_current for clones) +static void cachetable_only_write_locked_data( CACHETABLE ct, - CACHEFILE cf, PAIR p, - CACHETABLE_FETCH_CALLBACK fetch_callback, - void* read_extraargs, - BOOL keep_pair_locked + BOOL for_checkpoint, + PAIR_ATTR* new_attr, + BOOL is_clone ) -{ - // helgrind +{ + CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; + CACHEFILE cachefile = p->cachefile; CACHEKEY key = p->key; - u_int32_t fullhash = p->fullhash; - - void *toku_value = 0; - PAIR_ATTR attr; + void *value = is_clone ? p->cloned_value_data : p->value_data; + void *disk_data = p->disk_data; + void *write_extraargs = p->write_extraargs; + PAIR_ATTR old_attr = p->attr; + BOOL dowrite = TRUE; - // FIXME this should be enum cachetable_dirty, right? - int dirty = 0; - - WHEN_TRACE_CT(printf("%s:%d CT: fetch_callback(%lld...)\n", __FILE__, __LINE__, key)); - - rwlock_prefer_read_lock(&cf->fdlock, ct->mutex); + rwlock_prefer_read_lock(&cachefile->fdlock, ct->mutex); cachetable_unlock(ct); - - int r; - assert(!toku_cachefile_is_dev_null_unlocked(cf)); - r = fetch_callback(cf, cf->fd, key, fullhash, &toku_value, &attr, &dirty, read_extraargs); - if (dirty) - p->dirty = CACHETABLE_DIRTY; - + + // write callback + if (toku_cachefile_is_dev_null_unlocked(cachefile)) { + dowrite = FALSE; + } + flush_callback( + cachefile, + cachefile->fd, + key, + value, + &disk_data, + write_extraargs, + old_attr, + new_attr, + dowrite, + is_clone ? FALSE : TRUE, // keep_me (only keep if this is not cloned pointer) + for_checkpoint, + is_clone //is_clone + ); + p->disk_data = disk_data; cachetable_lock(ct); - rwlock_read_unlock(&cf->fdlock); - // brt.c asserts that get_and_pin succeeds, - // so we might as well just assert it here as opposed - // to trying to support an INVALID state - assert(r == 0); - - p->value = toku_value; - p->attr = attr; - cachetable_add_pair_attr(ct, attr); - p->state = CTPAIR_IDLE; - if (keep_pair_locked) { - // if the caller wants the pair to remain locked - // that means the caller requests continued - // ownership of the PAIR, so there better not - // be a cq asking to transfer ownership - assert(!p->cq); - } - else { - if (p->cq) { - workitem_init(&p->asyncwork, NULL, p); - workqueue_enq(p->cq, &p->asyncwork, 1); - } - else { - nb_mutex_write_unlock(&p->nb_mutex); - } - } - if (0) printf("%s:%d %"PRId64" complete\n", __FUNCTION__, __LINE__, key.b); + if (is_clone) { + p->cloned_value_data = NULL; + ct->size_current -= p->cloned_value_size; + p->cloned_value_size = 0; + } + rwlock_read_unlock(&cachefile->fdlock); } -static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remove, BOOL* destroyed); - // // This function writes a PAIR's value out to disk. Currently, it is called @@ -1436,38 +1448,32 @@ static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remov // that needs to write out a dirty node for checkpoint. // static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) { - // see comments in toku_cachetable_begin_checkpoint to understand - // purpose of the pending_lock - rwlock_read_lock(&ct->pending_lock, ct->mutex); - - // helgrind - CACHETABLE_FLUSH_CALLBACK flush_callback = p->flush_callback; - CACHEFILE cachefile = p->cachefile; - CACHEKEY key = p->key; - void *value = p->value; - void *write_extraargs = p->write_extraargs; PAIR_ATTR old_attr = p->attr; PAIR_ATTR new_attr = p->attr; - BOOL dowrite = (BOOL)(p->dirty); + rwlock_read_lock(&ct->pending_lock, ct->mutex); BOOL for_checkpoint = p->checkpoint_pending; - - //Must set to FALSE before releasing cachetable lock p->checkpoint_pending = FALSE; - rwlock_prefer_read_lock(&cachefile->fdlock, ct->mutex); - cachetable_unlock(ct); - - // write callback - if (toku_cachefile_is_dev_null_unlocked(cachefile)) dowrite = FALSE; - flush_callback(cachefile, cachefile->fd, key, value, write_extraargs, old_attr, &new_attr, dowrite, TRUE, for_checkpoint); - - cachetable_lock(ct); - rwlock_read_unlock(&cachefile->fdlock); - // - // now let's update variables - // - p->attr = new_attr; - cachetable_change_pair_attr(ct, old_attr, new_attr); - + // grabbing the disk_nb_mutex here ensures that + // after this point, no one is writing out a cloned value + // if we grab the disk_nb_mutex inside the if clause, + // then we may try to evict a PAIR that is in the process + // of having its clone be written out + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); + // make sure that assumption about cloned_value_data is true + // if we have grabbed the disk_nb_mutex, then that means that + // there should be no cloned value data + assert(p->cloned_value_data == NULL); + if (p->dirty) { + cachetable_only_write_locked_data(ct, p, for_checkpoint, &new_attr, FALSE); + // + // now let's update variables + // + if (new_attr.is_valid) { + p->attr = new_attr; + cachetable_change_pair_attr(ct, old_attr, new_attr); + } + } + nb_mutex_write_unlock(&p->disk_nb_mutex); // the pair is no longer dirty once written p->dirty = CACHETABLE_CLEAN; @@ -1475,6 +1481,18 @@ static void cachetable_write_locked_pair(CACHETABLE ct, PAIR p) { rwlock_read_unlock(&ct->pending_lock); } +// complete the write of a pair by reseting the writing flag, and +// maybe removing the pair from the cachetable if there are no +// references to it + +static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remove, BOOL* destroyed) { + p->cq = 0; + nb_mutex_write_unlock(&p->value_nb_mutex); + if (do_remove) { + cachetable_maybe_remove_and_free_pair(ct, p, destroyed); + } +} + // Write a pair to storage // Effects: an exclusive lock on the pair is obtained, the write callback is called, // the pair dirty state is adjusted, and the write is completed. The keep_me @@ -1507,16 +1525,15 @@ static void cachetable_write_pair(CACHETABLE ct, PAIR p, BOOL remove_me) { } } -// complete the write of a pair by reseting the writing flag, and -// maybe removing the pair from the cachetable if there are no -// references to it - -static void cachetable_complete_write_pair (CACHETABLE ct, PAIR p, BOOL do_remove, BOOL* destroyed) { - p->cq = 0; - nb_mutex_write_unlock(&p->nb_mutex); - if (do_remove) { - cachetable_maybe_remove_and_free_pair(ct, p, destroyed); - } +// Worker thread function to write a pair from memory to its cachefile +// As of now, the writer thread NEVER evicts, hence passing FALSE +// for the third parameter to cachetable_write_pair +static void cachetable_writer(WORKITEM wi) { + PAIR p = workitem_arg(wi); + CACHETABLE ct = p->cachefile->cachetable; + cachetable_lock(ct); + cachetable_write_pair(ct, p, p->remove_me); + cachetable_unlock(ct); } static void try_evict_pair(CACHETABLE ct, PAIR p) { @@ -1525,15 +1542,20 @@ static void try_evict_pair(CACHETABLE ct, PAIR p) { // must check for before we grab the write lock because we may // be trying to evict something this thread is trying to read - if (!nb_mutex_users(&p->nb_mutex)) { - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + if (!nb_mutex_users(&p->value_nb_mutex)) { + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); p->state = CTPAIR_WRITING; assert(ct->size_evicting >= 0); ct->size_evicting += p->attr.size; assert(ct->size_evicting >= 0); - - if (!p->dirty) { + + // if the PAIR is dirty, the running eviction requires writing the + // PAIR out. if the disk_nb_mutex is grabbed, then running + // eviction requires waiting for the disk_nb_mutex to become available, + // which may be expensive. Hence, if either is true, we + // do the eviction on a writer thread + if (!p->dirty && (nb_mutex_writers(&p->disk_nb_mutex) == 0)) { cachetable_write_pair(ct, p, TRUE); } else { @@ -1545,12 +1567,10 @@ static void try_evict_pair(CACHETABLE ct, PAIR p) { } } -// flush and remove a pair from the cachetable. the callbacks are run by a thread in -// a thread pool. // flush and remove a pair from the cachetable. the callbacks are run by a thread in // a thread pool. static void flush_and_maybe_remove (CACHETABLE ct, PAIR p) { - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); p->state = CTPAIR_WRITING; // this needs to be done here regardless of whether the eviction occurs on the main thread or on // a writer thread, because there may be a completion queue that needs access to this information @@ -1565,7 +1585,7 @@ static void flush_and_maybe_remove (CACHETABLE ct, PAIR p) { workitem_init(wi, cachetable_writer, p); // evictions without a write or unpinned pair's that are clean // can be run in the current thread - if (!nb_mutex_writers(&p->nb_mutex) && !p->dirty) { + if (!nb_mutex_writers(&p->value_nb_mutex) && !p->dirty) { assert(ct->size_evicting >= 0); ct->size_evicting += p->attr.size; assert(ct->size_evicting >= 0); @@ -1585,7 +1605,7 @@ static void do_partial_eviction(CACHETABLE ct, PAIR p) { PAIR_ATTR old_attr = p->attr; cachetable_unlock(ct); - p->pe_callback(p->value, old_attr, &new_attr, p->write_extraargs); + p->pe_callback(p->value_data, old_attr, &new_attr, p->write_extraargs); cachetable_lock(ct); cachetable_change_pair_attr(ct, old_attr, new_attr); @@ -1603,7 +1623,7 @@ static void do_partial_eviction(CACHETABLE ct, PAIR p) { workqueue_enq(p->cq, &p->asyncwork, 1); } else { - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); } } @@ -1632,7 +1652,7 @@ static void maybe_flush_some (CACHETABLE ct, long size) { while ((ct->clock_head) && (size + ct->size_current > ct->size_limit + ct->size_evicting)) { PAIR curr_in_clock = ct->clock_head; - if (nb_mutex_users(&curr_in_clock->nb_mutex)) { + if (nb_mutex_users(&curr_in_clock->value_nb_mutex) || nb_mutex_users(&curr_in_clock->disk_nb_mutex)) { if (set_val && curr_in_clock->key.b == curr_cachekey.b && curr_in_clock->cachefile->filenum.fileid == curr_filenum.fileid) @@ -1655,14 +1675,16 @@ static void maybe_flush_some (CACHETABLE ct, long size) { if (curr_in_clock->count > 0) { curr_in_clock->count--; // call the partial eviction callback - nb_mutex_write_lock(&curr_in_clock->nb_mutex, ct->mutex); + nb_mutex_write_lock(&curr_in_clock->value_nb_mutex, ct->mutex); - void *value = curr_in_clock->value; + void *value = curr_in_clock->value_data; + void* disk_data = curr_in_clock->disk_data; void *write_extraargs = curr_in_clock->write_extraargs; enum partial_eviction_cost cost; long bytes_freed_estimate = 0; curr_in_clock->pe_est_callback( value, + disk_data, &bytes_freed_estimate, &cost, write_extraargs @@ -1700,7 +1722,7 @@ static void maybe_flush_some (CACHETABLE ct, long size) { // set up a completion queue. // So, a completion queue cannot exist assert(!curr_in_clock->cq); - nb_mutex_write_unlock(&curr_in_clock->nb_mutex); + nb_mutex_write_unlock(&curr_in_clock->value_nb_mutex); } } else { @@ -1750,7 +1772,10 @@ static PAIR cachetable_insert_at(CACHETABLE ct, ctpair_add_ref(p); p->cachefile = cachefile; p->key = key; - p->value = value; + p->value_data = value; + p->cloned_value_data = NULL; + p->cloned_value_size = 0; + p->disk_data = NULL; p->fullhash = fullhash; p->dirty = dirty; p->attr = attr; @@ -1759,11 +1784,13 @@ static PAIR cachetable_insert_at(CACHETABLE ct, p->pe_callback = write_callback.pe_callback; p->pe_est_callback = write_callback.pe_est_callback; p->cleaner_callback = write_callback.cleaner_callback; + p->clone_callback = write_callback.clone_callback; p->write_extraargs = write_callback.write_extraargs; p->fullhash = fullhash; p->clock_next = p->clock_prev = 0; p->remove_me = FALSE; - nb_mutex_init(&p->nb_mutex); + nb_mutex_init(&p->value_nb_mutex); + nb_mutex_init(&p->disk_nb_mutex); p->cq = 0; pair_add_to_clock(ct, p); toku_list_push(&cachefile->pairs_for_cachefile, &p->next_for_cachefile); @@ -1845,7 +1872,7 @@ static int cachetable_put_internal( CACHETABLE_DIRTY ); assert(p); - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); //note_hash_count(count); return 0; } @@ -1869,20 +1896,105 @@ static int cachetable_get_pair (CACHEFILE cachefile, CACHEKEY key, u_int32_t ful return r; } +// ct locked on entry +static void +clone_pair(CACHETABLE ct, PAIR p) { + PAIR_ATTR old_attr = p->attr; + PAIR_ATTR new_attr; + + // act of cloning should be fast, + // not sure if we have to release + // and regrab the cachetable lock, + // but doing it for now + cachetable_unlock(ct); + p->clone_callback( + p->value_data, + &p->cloned_value_data, + &new_attr, + TRUE, + p->write_extraargs + ); + cachetable_lock(ct); + + // now we need to do the same actions we would do + // if the PAIR had been written to disk + // + // because we hold the value_nb_mutex, + // it doesn't matter whether we clear + // the pending bit before the clone + // or after the clone + p->checkpoint_pending = FALSE; + p->dirty = CACHETABLE_CLEAN; + if (new_attr.is_valid) { + p->attr = new_attr; + cachetable_change_pair_attr(ct, old_attr, new_attr); + } + p->cloned_value_size = p->attr.size; + ct->size_current += p->cloned_value_size; +} + +static void checkpoint_cloned_pair(WORKITEM wi) { + PAIR p = workitem_arg(wi); + CACHETABLE ct = p->cachefile->cachetable; + cachetable_lock(ct); + PAIR_ATTR new_attr; + // note that pending lock is not needed here because + // we KNOW we are in the middle of a checkpoint + // and that a begin_checkpoint cannot happen + cachetable_only_write_locked_data( + ct, + p, + TRUE, //for_checkpoint + &new_attr, + TRUE //is_clone + ); + nb_mutex_write_unlock(&p->disk_nb_mutex); + ct->n_checkpoint_clones_running--; + if (ct->n_checkpoint_clones_running == 0) { + int r = toku_pthread_cond_broadcast(&ct->clones_background_wait); + assert(r==0); + } + cachetable_unlock(ct); +} + +static void +checkpoint_cloned_pair_on_writer_thread(CACHETABLE ct, PAIR p) { + WORKITEM wi = &p->checkpoint_asyncwork; + workitem_init(wi, checkpoint_cloned_pair, p); + workqueue_enq(&ct->checkpoint_wq, wi, 1); +} + + static void write_locked_pair_for_checkpoint(CACHETABLE ct, PAIR p) { - // - // this function is called by toku_cachetable_get_and_pin to write locked nodes - // out for checkpoint. get_and_pin assumes that there is no - // completion queue, so we assert it here. - // - assert(!p->cq); if (p->dirty && p->checkpoint_pending) { - // this is essentially a flush_and_maybe_remove except that - // we already have p->nb_mutex and we just do the write in our own thread. - p->state = CTPAIR_WRITING; - cachetable_write_locked_pair(ct, p); // keeps the PAIR's write lock + if (p->clone_callback) { + // I think it is safe to grab the disk_nb_mutex after + // cloning the pair, but doing it before just to be safe, + // even though the act of cloning does not touch disk_data + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); + assert(!p->cloned_value_data); + clone_pair(ct, p); + assert(p->cloned_value_data); + // place it on the background thread and continue + // responsibility of writer thread to release disk_nb_mutex + ct->n_checkpoint_clones_running++; + checkpoint_cloned_pair_on_writer_thread(ct, p); + // possibly run eviction because act of cloning adds + // to ct->size_current, we don't do it in + // write_pair_for_checkpoint_thread, because that clones at most + // one node at any time, where as this may be called from many + // threads simultaneously + maybe_flush_some(ct, 0); + } + else { + // The pair is not cloneable, just write the pair to disk + + // we already have p->value_nb_mutex and we just do the write in our own thread. + p->state = CTPAIR_WRITING; + cachetable_write_locked_pair(ct, p); // keeps the PAIR's write lock + } } else { // @@ -1896,6 +2008,74 @@ write_locked_pair_for_checkpoint(CACHETABLE ct, PAIR p) } } +// On entry: hold the ct lock +// On exit: the node is written out +// Method: take write lock +// maybe write out the node +// if p->cq, put on completion queue. Else release write lock +static void +write_pair_for_checkpoint_thread (CACHETABLE ct, PAIR p) +{ + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); // grab an exclusive lock on the pair + if (p->dirty && p->checkpoint_pending) { + if (p->clone_callback) { + // I think it is safe to grab the disk_nb_mutex after + // cloning the pair, but doing it before just to be safe, + // even though the act of cloning does not touch disk_data + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); + assert(!p->cloned_value_data); + clone_pair(ct, p); + assert(p->cloned_value_data); + } + else { + // The pair is not cloneable, just write the pair to disk + // we already have p->value_nb_mutex and we just do the write in our own thread. + // this will grab and release disk_nb_mutex + p->state = CTPAIR_WRITING; + cachetable_write_locked_pair(ct, p); // keeps the PAIR's write lock + } + // if we are checkpointing a PAIR, a cq should not exist + // close cannot be running, and unpin_and_remove + // should have set the PAIR to clean + assert(!p->cq); + + // now release value_nb_mutex, before we write the PAIR out + // so that the PAIR is available to client threads + nb_mutex_write_unlock(&p->value_nb_mutex); // didn't call cachetable_write_pair so we have to unlock it ourselves. + if (p->clone_callback) { + // note that pending lock is not needed here because + // we KNOW we are in the middle of a checkpoint + // and that a begin_checkpoint cannot happen + PAIR_ATTR attr; + cachetable_only_write_locked_data( + ct, + p, + TRUE, //for_checkpoint + &attr, + TRUE //is_clone + ); + nb_mutex_write_unlock(&p->disk_nb_mutex); + } + } + else { + // + // we may clear the pending bit here because we have + // both the cachetable lock and the PAIR lock. + // The rule, as mentioned in toku_cachetable_begin_checkpoint, + // is that to clear the bit, we must have both the PAIR lock + // and the pending lock + // + p->checkpoint_pending = FALSE; + if (p->cq) { + workitem_init(&p->asyncwork, NULL, p); + workqueue_enq(p->cq, &p->asyncwork, 1); + } + else { + nb_mutex_write_unlock(&p->value_nb_mutex); + } + } +} + // // For each PAIR associated with these CACHEFILEs and CACHEKEYs // if the checkpoint_pending bit is set and the PAIR is dirty, write the PAIR @@ -1926,7 +2106,7 @@ static void checkpoint_dependent_pairs( assert(curr_dep_pair != NULL); // pair had better be locked, as we are assuming // to own the write lock - assert(nb_mutex_writers(&curr_dep_pair->nb_mutex)); + assert(nb_mutex_writers(&curr_dep_pair->value_nb_mutex)); // we need to update the dirtyness of the dependent pair, // because the client may have dirtied it while holding its lock, // and if the pair is pending a checkpoint, it needs to be written out @@ -2044,47 +2224,6 @@ static uint64_t get_tnow(void) { return tv.tv_sec * 1000000ULL + tv.tv_usec; } -// for debug -static PAIR write_for_checkpoint_pair = NULL; - - -// On entry: hold the ct lock -// On exit: the node is written out -// Method: take write lock -// maybe write out the node -// if p->cq, put on completion queue. Else release write lock -static void -write_pair_for_checkpoint (CACHETABLE ct, PAIR p) -{ - write_for_checkpoint_pair = p; - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); // grab an exclusive lock on the pair - if (p->dirty && p->checkpoint_pending) { - // this is essentially a flush_and_maybe_remove except that - // we already have p->nb_mutex and we just do the write in our own thread. - p->state = CTPAIR_WRITING; - workitem_init(&p->asyncwork, NULL, p); - cachetable_write_pair(ct, p, FALSE); // releases the write lock on the pair - } - else { - // - // we may clear the pending bit here because we have - // both the cachetable lock and the PAIR lock. - // The rule, as mentioned in toku_cachetable_begin_checkpoint, - // is that to clear the bit, we must have both the PAIR lock - // and the pending lock - // - p->checkpoint_pending = FALSE; - if (p->cq) { - workitem_init(&p->asyncwork, NULL, p); - workqueue_enq(p->cq, &p->asyncwork, 1); - } - else { - nb_mutex_write_unlock(&p->nb_mutex); // didn't call cachetable_write_pair so we have to unlock it ourselves. - } - } - write_for_checkpoint_pair = NULL; -} - // // cachetable lock and PAIR lock are held on entry // On exit, cachetable lock is still held, but PAIR lock @@ -2109,14 +2248,16 @@ do_partial_fetch( p->state = CTPAIR_READING; rwlock_prefer_read_lock(&cachefile->fdlock, ct->mutex); + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); cachetable_unlock(ct); - int r = pf_callback(p->value, read_extraargs, cachefile->fd, &new_attr); + int r = pf_callback(p->value_data, p->disk_data, read_extraargs, cachefile->fd, &new_attr); lazy_assert_zero(r); cachetable_lock(ct); rwlock_read_unlock(&cachefile->fdlock); p->attr = new_attr; cachetable_change_pair_attr(ct, old_attr, new_attr); p->state = CTPAIR_IDLE; + nb_mutex_write_unlock(&p->disk_nb_mutex); if (keep_pair_locked) { // if the caller wants the pair to remain locked // that means the caller requests continued @@ -2130,11 +2271,38 @@ do_partial_fetch( workqueue_enq(p->cq, &p->asyncwork, 1); } else { - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); } } } +void toku_cachetable_pf_pinned_pair( + void* value, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void* read_extraargs, + CACHEFILE cf, + CACHEKEY key, + u_int32_t fullhash + ) +{ + PAIR_ATTR attr; + PAIR p = NULL; + cachetable_lock(cf->cachetable); + int r = cachetable_get_pair(cf, key, fullhash, &p); + assert_zero(r); + assert(p->value_data == value); + assert(nb_mutex_writers(&p->value_nb_mutex)); + nb_mutex_write_lock(&p->disk_nb_mutex, cf->cachetable->mutex); + rwlock_prefer_read_lock(&cf->fdlock, cf->cachetable->mutex); + int fd = cf->fd; + cachetable_unlock(cf->cachetable); + pf_callback(value, p->disk_data, read_extraargs, fd, &attr); + cachetable_lock(cf->cachetable); + nb_mutex_write_unlock(&p->disk_nb_mutex); + rwlock_read_unlock(&cf->fdlock); + cachetable_unlock(cf->cachetable); +} + int toku_cachetable_get_and_pin ( CACHEFILE cachefile, @@ -2146,6 +2314,7 @@ int toku_cachetable_get_and_pin ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + BOOL may_modify_value, void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback ) { @@ -2165,6 +2334,7 @@ int toku_cachetable_get_and_pin ( fetch_callback, pf_req_callback, pf_callback, + may_modify_value, read_extraargs, 0, // number of dependent pairs that we may need to checkpoint NULL, // array of cachefiles of dependent pairs @@ -2174,8 +2344,73 @@ int toku_cachetable_get_and_pin ( ); } +// Read a pair from a cachefile into memory using the pair's fetch callback +static void cachetable_fetch_pair( + CACHETABLE ct, + CACHEFILE cf, + PAIR p, + CACHETABLE_FETCH_CALLBACK fetch_callback, + void* read_extraargs, + BOOL keep_pair_locked + ) +{ + // helgrind + CACHEKEY key = p->key; + u_int32_t fullhash = p->fullhash; + + void *toku_value = NULL; + void *disk_data = NULL; + PAIR_ATTR attr; + + // FIXME this should be enum cachetable_dirty, right? + int dirty = 0; + + WHEN_TRACE_CT(printf("%s:%d CT: fetch_callback(%lld...)\n", __FILE__, __LINE__, key)); + + rwlock_prefer_read_lock(&cf->fdlock, ct->mutex); + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); + cachetable_unlock(ct); + + int r; + assert(!toku_cachefile_is_dev_null_unlocked(cf)); + r = fetch_callback(cf, cf->fd, key, fullhash, &toku_value, &disk_data, &attr, &dirty, read_extraargs); + if (dirty) + p->dirty = CACHETABLE_DIRTY; + + cachetable_lock(ct); + rwlock_read_unlock(&cf->fdlock); + // brt.c asserts that get_and_pin succeeds, + // so we might as well just assert it here as opposed + // to trying to support an INVALID state + assert(r == 0); + + p->value_data = toku_value; + p->disk_data = disk_data; + p->attr = attr; + cachetable_add_pair_attr(ct, attr); + p->state = CTPAIR_IDLE; + nb_mutex_write_unlock(&p->disk_nb_mutex); + if (keep_pair_locked) { + // if the caller wants the pair to remain locked + // that means the caller requests continued + // ownership of the PAIR, so there better not + // be a cq asking to transfer ownership + assert(!p->cq); + } + else { + if (p->cq) { + workitem_init(&p->asyncwork, NULL, p); + workqueue_enq(p->cq, &p->asyncwork, 1); + } + else { + nb_mutex_write_unlock(&p->value_nb_mutex); + } + } + if (0) printf("%s:%d %"PRId64" complete\n", __FUNCTION__, __LINE__, key.b); +} + static BOOL resolve_checkpointing_fast(PAIR p) { - return !(p->checkpoint_pending && (p->dirty == CACHETABLE_DIRTY)); + return !(p->checkpoint_pending && (p->dirty == CACHETABLE_DIRTY) && !p->clone_callback); } static void checkpoint_pair_and_dependent_pairs( CACHETABLE ct, @@ -2238,6 +2473,7 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + BOOL may_modify_value, void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback u_int32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint CACHEFILE* dependent_cfs, // array of cachefiles of dependent pairs @@ -2270,24 +2506,26 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( else if (p->state == CTPAIR_WRITING) { cachetable_wait_writing++; } - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); pair_touch(p); - checkpoint_pair_and_dependent_pairs( - ct, - p, - num_dependent_pairs, - dependent_cfs, - dependent_keys, - dependent_fullhash, - dependent_dirty - ); + if (may_modify_value) { + checkpoint_pair_and_dependent_pairs( + ct, + p, + num_dependent_pairs, + dependent_cfs, + dependent_keys, + dependent_fullhash, + dependent_dirty + ); + } cachetable_unlock(ct); - BOOL partial_fetch_required = pf_req_callback(p->value,read_extraargs); + BOOL partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); // shortcutting a path to getting the user the data // helps scalability for in-memory workloads if (!partial_fetch_required) { - *value = p->value; + *value = p->value_data; if (sizep) *sizep = p->attr.size; return 0; } @@ -2327,16 +2565,18 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( CACHETABLE_CLEAN ); assert(p); - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - checkpoint_pair_and_dependent_pairs( - ct, - p, - num_dependent_pairs, - dependent_cfs, - dependent_keys, - dependent_fullhash, - dependent_dirty - ); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + if (may_modify_value) { + checkpoint_pair_and_dependent_pairs( + ct, + p, + num_dependent_pairs, + dependent_cfs, + dependent_keys, + dependent_fullhash, + dependent_dirty + ); + } uint64_t t0 = get_tnow(); // Retrieve the value of the PAIR from disk. @@ -2348,7 +2588,7 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( goto got_value; } got_value: - *value = p->value; + *value = p->value_data; if (sizep) *sizep = p->attr.size; maybe_flush_some(ct, 0); cachetable_unlock(ct); @@ -2378,12 +2618,12 @@ int toku_cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, u_int3 if (p->key.b==key.b && p->cachefile==cachefile) { if (!p->checkpoint_pending && //If checkpoint pending, we would need to first write it, which would make it clean p->dirty && - nb_mutex_users(&p->nb_mutex) == 0 + nb_mutex_users(&p->value_nb_mutex) == 0 ) { cachetable_maybe_get_and_pin_hits++; // because nb_mutex_users is 0, this is fast - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - *value = p->value; + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + *value = p->value_data; pair_touch(p); r = 0; //printf("%s:%d cachetable_maybe_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value); @@ -2410,12 +2650,12 @@ int toku_cachetable_maybe_get_and_pin_clean (CACHEFILE cachefile, CACHEKEY key, count++; if (p->key.b==key.b && p->cachefile==cachefile) { if (!p->checkpoint_pending && //If checkpoint pending, we would need to first write it, which would make it clean (if the pin would be used for writes. If would be used for read-only we could return it, but that would increase complexity) - nb_mutex_users(&p->nb_mutex) == 0 + nb_mutex_users(&p->value_nb_mutex) == 0 ) { cachetable_maybe_get_and_pin_hits++; // because nb_mutex_users is 0, this is fast - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - *value = p->value; + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + *value = p->value_data; r = 0; //printf("%s:%d cachetable_maybe_get_and_pin_clean(%lld)--> %p\n", __FILE__, __LINE__, key, *value); } @@ -2441,7 +2681,7 @@ cachetable_unpin_internal(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, for (PAIR p=ct->table[fullhash&(ct->table_size-1)]; p; p=p->hash_chain) { count++; if (p->key.b==key.b && p->cachefile==cachefile) { - assert(nb_mutex_writers(&p->nb_mutex)>0); + assert(nb_mutex_writers(&p->value_nb_mutex)>0); // this is a client thread that is unlocking the PAIR // That is, a cleaner, flusher, or get_and_pin thread // So, there must not be a completion queue lying around @@ -2451,7 +2691,7 @@ cachetable_unpin_internal(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, // So, we should assert that a completion queue does not // exist assert(!p->cq); - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); if (dirty) p->dirty = CACHETABLE_DIRTY; if (attr.is_valid) { PAIR_ATTR old_attr = p->attr; @@ -2503,6 +2743,7 @@ int toku_cachetable_get_and_pin_nonblocking ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + BOOL may_modify_value, void *read_extraargs, UNLOCKERS unlockers ) @@ -2539,10 +2780,12 @@ int toku_cachetable_get_and_pin_nonblocking ( // Otherwise, if there is no write lock grabbed, we know there will // be no stall, so we grab the lock and return to the user // - if (!nb_mutex_writers(&p->nb_mutex) && resolve_checkpointing_fast(p)) { + if (!nb_mutex_writers(&p->value_nb_mutex) && + (!may_modify_value || resolve_checkpointing_fast(p))) + { //cachetable_hit++; - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - if (p->checkpoint_pending) { + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + if (may_modify_value && p->checkpoint_pending) { write_locked_pair_for_checkpoint(ct, p); } pair_touch(p); @@ -2551,7 +2794,7 @@ int toku_cachetable_get_and_pin_nonblocking ( // when calling pf_req_callback, and if possible, returns the PAIR to the user without // reacquiring the cachetable lock cachetable_unlock(ct); - BOOL partial_fetch_required = pf_req_callback(p->value,read_extraargs); + BOOL partial_fetch_required = pf_req_callback(p->value_data,read_extraargs); // // Just because the PAIR exists does necessarily mean the all the data the caller requires // is in memory. A partial fetch may be required, which is evaluated above @@ -2568,7 +2811,7 @@ int toku_cachetable_get_and_pin_nonblocking ( return TOKUDB_TRY_AGAIN; } else { - *value = p->value; + *value = p->value_data; return 0; } } @@ -2576,44 +2819,36 @@ int toku_cachetable_get_and_pin_nonblocking ( run_unlockers(unlockers); // The contract says the unlockers are run with the ct lock being held. // Now wait for the I/O to occur. // We need to obtain the lock (waiting for the write to finish), but then we only waited so we could wake up again - if (p->checkpoint_pending) { - // an optimization we can later do is if - // we can grab the write lock on the pair and - // it is clean, then dont run the unlockers, simply - // clear the pending bit and return the PAIR to the user - // but this is simpler. - //cachetable_wait_checkpoint++; - write_pair_for_checkpoint(ct, p); + if (p->state == CTPAIR_READING) { + cachetable_wait_reading++; + } + else if (p->state == CTPAIR_WRITING) { + cachetable_wait_writing++; + } + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + if (may_modify_value && p->checkpoint_pending) { + write_locked_pair_for_checkpoint(ct, p); + } + // deadlock discovered in #4357 shows we need + // to do this. After running unlockers and waiting + // on the PAIR lock, a flusher thread may come + // along and try to unpin_and_remove this PAIR. + // In that case, the thread running unpin_and_remove + // sets up a completion queue and we must transfer ownership + // of this PAIR lock to that thread via the completion + // queue + if (p->cq) { + // while we wait on the PAIR lock, a thread may come in and + // call toku_cachetable_unpin_and_remove on this PAIR. + // In that case, we must do NOTHING with the PAIR, as + // it has been removed from the cachetable's data structures. + // So, we should just pass the PAIR over to the completion + // queue. + workitem_init(&p->asyncwork, NULL, p); + workqueue_enq(p->cq, &p->asyncwork, 1); } else { - if (p->state == CTPAIR_READING) { - cachetable_wait_reading++; - } - else if (p->state == CTPAIR_WRITING) { - cachetable_wait_writing++; - } - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - // deadlock discovered in #4357 shows we need - // to do this. After running unlockers and waiting - // on the PAIR lock, a flusher thread may come - // along and try to unpin_and_remove this PAIR. - // In that case, the thread running unpin_and_remove - // sets up a completion queue and we must transfer ownership - // of this PAIR lock to that thread via the completion - // queue - if (p->cq) { - // while we wait on the PAIR lock, a thread may come in and - // call toku_cachetable_unpin_and_remove on this PAIR. - // In that case, we must do NOTHING with the PAIR, as - // it has been removed from the cachetable's data structures. - // So, we should just pass the PAIR over to the completion - // queue. - workitem_init(&p->asyncwork, NULL, p); - workqueue_enq(p->cq, &p->asyncwork, 1); - } - else { - nb_mutex_write_unlock(&p->nb_mutex); - } + nb_mutex_write_unlock(&p->value_nb_mutex); } cachetable_unlock(ct); return TOKUDB_TRY_AGAIN; @@ -2635,7 +2870,7 @@ int toku_cachetable_get_and_pin_nonblocking ( CACHETABLE_CLEAN ); assert(p); - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); run_unlockers(unlockers); // we hold the ct mutex. u_int64_t t0 = get_tnow(); cachetable_fetch_pair(ct, cf, p, fetch_callback, read_extraargs, FALSE); @@ -2657,6 +2892,35 @@ struct cachefile_partial_prefetch_args { void *read_extraargs; }; +// Worker thread function to read a pair from a cachefile to memory +static void cachetable_reader(WORKITEM wi) { + struct cachefile_prefetch_args* cpargs = workitem_arg(wi); + CACHETABLE ct = cpargs->p->cachefile->cachetable; + cachetable_lock(ct); + // TODO: find a way to properly pass some information for read_extraargs + // This is only called in toku_cachefile_prefetch, by putting it on a workqueue + // The problem is described in comments in toku_cachefile_prefetch + cachetable_fetch_pair( + ct, + cpargs->p->cachefile, + cpargs->p, + cpargs->fetch_callback, + cpargs->read_extraargs, + FALSE + ); + cachetable_unlock(ct); + toku_free(cpargs); +} + +static void cachetable_partial_reader(WORKITEM wi) { + struct cachefile_partial_prefetch_args *cpargs = workitem_arg(wi); + CACHETABLE ct = cpargs->p->cachefile->cachetable; + cachetable_lock(ct); + do_partial_fetch(ct, cpargs->p->cachefile, cpargs->p, cpargs->pf_callback, cpargs->read_extraargs, FALSE); + cachetable_unlock(ct); + toku_free(cpargs); +} + int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, CACHETABLE_WRITE_CALLBACK write_callback, CACHETABLE_FETCH_CALLBACK fetch_callback, @@ -2685,7 +2949,6 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, PAIR p; for (p = ct->table[fullhash&(ct->table_size-1)]; p; p = p->hash_chain) { if (p->key.b==key.b && p->cachefile==cf) { - //Maybe check for pending and do write_pair_for_checkpoint()? pair_touch(p); break; } @@ -2706,7 +2969,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, CACHETABLE_CLEAN ); assert(p); - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); struct cachefile_prefetch_args *MALLOC(cpargs); cpargs->p = p; cpargs->fetch_callback = fetch_callback; @@ -2717,15 +2980,15 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, *doing_prefetch = TRUE; } } - else if (nb_mutex_users(&p->nb_mutex)==0) { + else if (nb_mutex_users(&p->value_nb_mutex)==0) { // client should not be trying to prefetch a node that is either // belongs to a cachefile being flushed or to a PAIR being // unpinned and removed assert(!p->cq); // nobody else is using the node, so we should go ahead and prefetch - nb_mutex_write_lock(&p->nb_mutex, ct->mutex); - BOOL partial_fetch_required = pf_req_callback(p->value, read_extraargs); + nb_mutex_write_lock(&p->value_nb_mutex, ct->mutex); + BOOL partial_fetch_required = pf_req_callback(p->value_data, read_extraargs); if (partial_fetch_required) { p->state = CTPAIR_READING; @@ -2743,7 +3006,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, // sanity check, we already have an assert // before locking the PAIR assert(!p->cq); - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); } } cachetable_unlock(ct); @@ -2794,7 +3057,7 @@ int64_t UU() toku_cachetable_size_slowslow (CACHETABLE ct) { int64_t ret = 0; for (p=ct->clock_head; ct->clock_head!=NULL && (p!=ct->clock_head || is_first); p=p->clock_next) { is_first=FALSE; - ret += brtnode_memory_size((BRTNODE) p->value); + ret += brtnode_memory_size((BRTNODE) p->value_data); } return ret; } @@ -2808,7 +3071,7 @@ int64_t UU() toku_cachetable_size_discrepancy (CACHETABLE ct) { int64_t ret = 0; for (p=ct->clock_head; ct->clock_head!=NULL && (p!=ct->clock_head || is_first); p=p->clock_next) { is_first=FALSE; - ret += brtnode_memory_size((BRTNODE) p->value) - p->attr.size; + ret += brtnode_memory_size((BRTNODE) p->value_data) - p->attr.size; } return ret; } @@ -2822,8 +3085,8 @@ int64_t UU() toku_cachetable_size_discrepancy_pinned (CACHETABLE ct) { int64_t ret = 0; for (p=ct->clock_head; ct->clock_head!=NULL && (p!=ct->clock_head || is_first); p=p->clock_next) { is_first=FALSE; - if (nb_mutex_writers(&p->nb_mutex)) { - ret += brtnode_memory_size((BRTNODE) p->value) - p->attr.size; + if (nb_mutex_writers(&p->value_nb_mutex)) { + ret += brtnode_memory_size((BRTNODE) p->value_data) - p->attr.size; } } return ret; @@ -3011,7 +3274,7 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) { // Once again, the assumption is that any PAIR // is either unlocked or on a writer thread work queue // - if (!nb_mutex_writers(&p->nb_mutex)) { + if (!nb_mutex_writers(&p->value_nb_mutex)) { flush_and_maybe_remove(ct, p); } } @@ -3046,7 +3309,7 @@ static void cachetable_flush_cachefile(CACHETABLE ct, CACHEFILE cf) { PAIR p = workitem_arg(wi); p->cq = 0; //Some other thread owned the lock, but transferred ownership to the thread executing this function - nb_mutex_write_unlock(&p->nb_mutex); //Release the lock, no one has a pin, per our assumptions above. + nb_mutex_write_unlock(&p->value_nb_mutex); //Release the lock, no one has a pin, per our assumptions above. BOOL destroyed; cachetable_maybe_remove_and_free_pair(ct, p, &destroyed); } @@ -3097,9 +3360,11 @@ toku_cachetable_close (CACHETABLE *ctp) { int r = toku_pthread_mutex_destroy(&ct->openfd_mutex); resource_assert_zero(r); cachetable_unlock(ct); toku_destroy_workers(&ct->wq, &ct->threadpool); + toku_destroy_workers(&ct->checkpoint_wq, &ct->checkpoint_threadpool); toku_kibbutz_destroy(ct->kibbutz); toku_omt_destroy(&ct->reserved_filenums); r = toku_pthread_mutex_destroy(&ct->cachefiles_mutex); resource_assert_zero(r); + r = toku_pthread_cond_destroy(&ct->clones_background_wait); resource_assert_zero(r); toku_free(ct->table); toku_free(ct->env_dir); toku_free(ct); @@ -3125,8 +3390,12 @@ int toku_cachetable_unpin_and_remove ( count++; if (p->key.b==key.b && p->cachefile==cachefile) { p->dirty = CACHETABLE_CLEAN; // clear the dirty bit. We're just supposed to remove it. - assert(nb_mutex_writers(&p->nb_mutex)); - + assert(nb_mutex_writers(&p->value_nb_mutex)); + // grab disk_nb_mutex to ensure any background thread writing + // out a cloned value completes + nb_mutex_write_lock(&p->disk_nb_mutex, ct->mutex); + assert(p->cloned_value_data == NULL); + // // take care of key removal // @@ -3168,7 +3437,8 @@ int toku_cachetable_unpin_and_remove ( // we must not have a completion queue // lying around, as we may create one now assert(!p->cq); - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); + nb_mutex_write_unlock(&p->disk_nb_mutex); // // As of Dr. Noga, only these threads may be // blocked waiting to lock this PAIR: @@ -3213,10 +3483,10 @@ int toku_cachetable_unpin_and_remove ( // nothing, and looking at those functions, it is clear they do nothing. // cachetable_remove_pair(ct, p); - if (nb_mutex_blocked_writers(&p->nb_mutex)>0) { + if (nb_mutex_blocked_writers(&p->value_nb_mutex)>0) { struct workqueue cq; workqueue_init(&cq); - while (nb_mutex_blocked_writers(&p->nb_mutex)>0) { + while (nb_mutex_blocked_writers(&p->value_nb_mutex)>0) { //Someone (one or more checkpoint threads) is waiting for a write lock //on this pair. //They are still blocked because we have not released the @@ -3239,7 +3509,7 @@ int toku_cachetable_unpin_and_remove ( //We are holding the write lock on the pair cachetable_lock(ct); - assert(nb_mutex_writers(&p->nb_mutex) == 1); + assert(nb_mutex_writers(&p->value_nb_mutex) == 1); // let's also assert that this PAIR was not somehow marked // as pending a checkpoint. Above, when calling // remove_key(), we cleared the dirty bit so that @@ -3247,7 +3517,7 @@ int toku_cachetable_unpin_and_remove ( // make sure that our assumption is valid. assert(!p->checkpoint_pending); assert(p->attr.cache_pressure_size == 0); - nb_mutex_write_unlock(&p->nb_mutex); + nb_mutex_write_unlock(&p->value_nb_mutex); // Because we assume it is just the checkpoint thread // that may have been blocked (as argued above), // it is safe to simply remove the PAIR from the @@ -3256,6 +3526,9 @@ int toku_cachetable_unpin_and_remove ( p->cq = NULL; workqueue_destroy(&cq); } + // just a sanity check + assert(nb_mutex_users(&p->disk_nb_mutex) == 0); + assert(p->cloned_value_data == NULL); //Remove pair. cachetable_free_pair(ct, p); r = 0; @@ -3328,6 +3601,7 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) { // written to disk before it can be modified.) { + brt_begin_checkpoint(); unsigned i; if (logger) { // Unpin all 'inprogress rollback log nodes' pinned by transactions int r = toku_omt_iterate(logger->live_txns, @@ -3452,6 +3726,13 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) { rwlock_write_lock(&ct->pending_lock, ct->mutex); ct->checkpoint_is_beginning = TRUE; // detect threadsafety bugs, must set checkpoint_is_beginning ... invariant(ct->checkpoint_prohibited == 0); // ... before testing checkpoint_prohibited + invariant(ct->n_checkpoint_clones_running == 0); + u_int64_t leaf_sum = 0; + u_int64_t nonleaf_sum = 0; + u_int64_t rollback_sum = 0; + u_int64_t maybe_leaf_sum = 0; + u_int64_t maybe_nonleaf_sum = 0; + u_int64_t maybe_rollback_sum = 0; for (i=0; i < ct->table_size; i++) { PAIR p; for (p = ct->table[i]; p; p=p->hash_chain) { @@ -3469,7 +3750,17 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) { // BOTH the cachetable lock and the PAIR lock. Otherwise, // we may end up clearing the pending bit before the // current lock is ever released. - if (p->dirty || nb_mutex_writers(&p->nb_mutex)) { + if (p->dirty || nb_mutex_writers(&p->value_nb_mutex)) { + if (p->dirty) { + leaf_sum += p->attr.leaf_size; + nonleaf_sum += p->attr.nonleaf_size; + rollback_sum += p->attr.rollback_size; + } + else { + maybe_leaf_sum += p->attr.leaf_size; + maybe_nonleaf_sum += p->attr.nonleaf_size; + maybe_rollback_sum += p->attr.rollback_size; + } p->checkpoint_pending = TRUE; if (ct->pending_head) { ct->pending_head->pending_prev = p; @@ -3481,6 +3772,15 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) { } } } + if (0) { + printf("leaf_sum: %"PRIu64"\n", leaf_sum); + printf("nonleaf_sum: %"PRIu64"\n", nonleaf_sum); + printf("rollback_sum: %"PRIu64"\n", rollback_sum); + printf("maybe_leaf_sum: %"PRIu64"\n", maybe_leaf_sum); + printf("maybe_nonleaf: %"PRIu64"\n", maybe_nonleaf_sum); + printf("maybe_rollback: %"PRIu64"\n", maybe_rollback_sum); + printf("*****************************\n"); + } rwlock_write_unlock(&ct->pending_lock); if (0 && (npending > 0 || ct->checkpoint_num_files > 0 || ct->checkpoint_num_txns > 0)) { fprintf(stderr, "%s:%d pending=%u %u files=%u txns=%u\n", __FUNCTION__, __LINE__, npending, ct->n_in_table, ct->checkpoint_num_files, ct->checkpoint_num_txns); @@ -3533,30 +3833,35 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger, // // #TODO: #1424 Long-lived get and pin (held by cursor) will cause a deadlock here. // Need some solution (possibly modify requirement for write lock or something else). - PAIR p; - while ((p = ct->pending_head)!=0) { + PAIR p; + while ((p = ct->pending_head)!=0) { ct->pending_head = ct->pending_head->pending_next; pending_pairs_remove(ct, p); - write_pair_for_checkpoint(ct, p); // if still pending, clear the pending bit and write out the node - // Don't need to unlock and lock cachetable, because the cachetable was unlocked and locked while the flush callback ran. - } + write_pair_for_checkpoint_thread(ct, p); // if still pending, clear the pending bit and write out the node + // Don't need to unlock and lock cachetable, because the cachetable was unlocked and locked while the flush callback ran. + } } - assert(!ct->pending_head); + assert(!ct->pending_head); + while (ct->n_checkpoint_clones_running > 0) { + int r = toku_pthread_cond_wait(&ct->clones_background_wait, ct->mutex); + assert(r==0); + } + assert(ct->n_checkpoint_clones_running == 0); { // have just written data blocks, so next write the translation and header for each open dictionary - CACHEFILE cf; + CACHEFILE cf; //cachefiles_in_checkpoint is protected by the checkpoint_safe_lock - for (cf = ct->cachefiles_in_checkpoint; cf; cf=cf->next_in_checkpoint) { - if (cf->checkpoint_userdata) { + for (cf = ct->cachefiles_in_checkpoint; cf; cf=cf->next_in_checkpoint) { + if (cf->checkpoint_userdata) { rwlock_prefer_read_lock(&cf->fdlock, ct->mutex); if (!logger || ct->lsn_of_checkpoint_in_progress.lsn != cf->most_recent_global_checkpoint_that_finished_early.lsn) { assert(ct->lsn_of_checkpoint_in_progress.lsn >= cf->most_recent_global_checkpoint_that_finished_early.lsn); cachetable_unlock(ct); assert(cf->checkpoint_state == CS_CALLED_BEGIN_CHECKPOINT); - toku_cachetable_set_checkpointing_user_data_status(1); + toku_cachetable_set_checkpointing_user_data_status(1); int r = cf->checkpoint_userdata(cf, cf->fd, cf->userdata); - toku_cachetable_set_checkpointing_user_data_status(0); + toku_cachetable_set_checkpointing_user_data_status(0); assert(r==0); cf->checkpoint_state = CS_CALLED_CHECKPOINT; cachetable_lock(ct); @@ -3565,17 +3870,17 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger, assert(cf->checkpoint_state == CS_NOT_IN_PROGRESS); } rwlock_read_unlock(&cf->fdlock); - } - } + } + } } { // everything has been written to file (or at least OS internal buffer)... - // ... so fsync and call checkpoint-end function in block translator - // to free obsolete blocks on disk used by previous checkpoint - CACHEFILE cf; + // ... so fsync and call checkpoint-end function in block translator + // to free obsolete blocks on disk used by previous checkpoint + CACHEFILE cf; //cachefiles_in_checkpoint is protected by the checkpoint_safe_lock - for (cf = ct->cachefiles_in_checkpoint; cf; cf=cf->next_in_checkpoint) { - if (cf->end_checkpoint_userdata) { + for (cf = ct->cachefiles_in_checkpoint; cf; cf=cf->next_in_checkpoint) { + if (cf->end_checkpoint_userdata) { rwlock_prefer_read_lock(&cf->fdlock, ct->mutex); if (!logger || ct->lsn_of_checkpoint_in_progress.lsn != cf->most_recent_global_checkpoint_that_finished_early.lsn) { assert(ct->lsn_of_checkpoint_in_progress.lsn >= cf->most_recent_global_checkpoint_that_finished_early.lsn); @@ -3589,8 +3894,8 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger, } assert(cf->checkpoint_state == CS_NOT_IN_PROGRESS); rwlock_read_unlock(&cf->fdlock); - } - } + } + } } cachetable_unlock(ct); @@ -3598,7 +3903,7 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger, //Delete list of cachefiles in the checkpoint, //remove reference //clear bit saying they're in checkpoint - CACHEFILE cf; + CACHEFILE cf; //cachefiles_in_checkpoint is protected by the checkpoint_safe_lock while ((cf = ct->cachefiles_in_checkpoint)) { ct->cachefiles_in_checkpoint = cf->next_in_checkpoint; @@ -3620,19 +3925,20 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger, // For testing purposes only. Dictionary has been fsync-ed to disk but log has not yet been written. if (testcallback_f) - testcallback_f(testextra); + testcallback_f(testextra); if (logger) { - int r = toku_log_end_checkpoint(logger, NULL, - 1, // want the end_checkpoint to be fsync'd - ct->lsn_of_checkpoint_in_progress.lsn, - 0, - ct->checkpoint_num_files, - ct->checkpoint_num_txns); - assert(r==0); - toku_logger_note_checkpoint(logger, ct->lsn_of_checkpoint_in_progress); + int r = toku_log_end_checkpoint(logger, NULL, + 1, // want the end_checkpoint to be fsync'd + ct->lsn_of_checkpoint_in_progress.lsn, + 0, + ct->checkpoint_num_files, + ct->checkpoint_num_txns); + assert(r==0); + toku_logger_note_checkpoint(logger, ct->lsn_of_checkpoint_in_progress); } + brt_end_checkpoint(); panic: return retval; } @@ -3645,48 +3951,6 @@ FILENUM toku_cachefile_filenum (CACHEFILE cf) { return cf->filenum; } - -// Worker thread function to write a pair from memory to its cachefile -// As of now, the writer thread NEVER evicts, hence passing FALSE -// for the third parameter to cachetable_write_pair -static void cachetable_writer(WORKITEM wi) { - PAIR p = workitem_arg(wi); - CACHETABLE ct = p->cachefile->cachetable; - cachetable_lock(ct); - cachetable_write_pair(ct, p, p->remove_me); - cachetable_unlock(ct); -} - -// Worker thread function to read a pair from a cachefile to memory -static void cachetable_reader(WORKITEM wi) { - struct cachefile_prefetch_args* cpargs = workitem_arg(wi); - CACHETABLE ct = cpargs->p->cachefile->cachetable; - cachetable_lock(ct); - // TODO: find a way to properly pass some information for read_extraargs - // This is only called in toku_cachefile_prefetch, by putting it on a workqueue - // The problem is described in comments in toku_cachefile_prefetch - cachetable_fetch_pair( - ct, - cpargs->p->cachefile, - cpargs->p, - cpargs->fetch_callback, - cpargs->read_extraargs, - FALSE - ); - cachetable_unlock(ct); - toku_free(cpargs); -} - -static void cachetable_partial_reader(WORKITEM wi) { - struct cachefile_partial_prefetch_args *cpargs = workitem_arg(wi); - CACHETABLE ct = cpargs->p->cachefile->cachetable; - cachetable_lock(ct); - do_partial_fetch(ct, cpargs->p->cachefile, cpargs->p, cpargs->pf_callback, cpargs->read_extraargs, FALSE); - cachetable_unlock(ct); - toku_free(cpargs); -} - - // debug functions int toku_cachetable_assert_all_unpinned (CACHETABLE ct) { @@ -3696,9 +3960,9 @@ int toku_cachetable_assert_all_unpinned (CACHETABLE ct) { for (i=0; itable_size; i++) { PAIR p; for (p=ct->table[i]; p; p=p->hash_chain) { - assert(nb_mutex_writers(&p->nb_mutex)>=0); - if (nb_mutex_writers(&p->nb_mutex)) { - //printf("%s:%d pinned: %"PRId64" (%p)\n", __FILE__, __LINE__, p->key.b, p->value); + assert(nb_mutex_writers(&p->value_nb_mutex)>=0); + if (nb_mutex_writers(&p->value_nb_mutex)) { + //printf("%s:%d pinned: %"PRId64" (%p)\n", __FILE__, __LINE__, p->key.b, p->value_data); some_pinned=1; } } @@ -3714,9 +3978,9 @@ int toku_cachefile_count_pinned (CACHEFILE cf, int print_them) { cachetable_lock(ct); for (struct toku_list *next_pair = cf->pairs_for_cachefile.next; next_pair != &cf->pairs_for_cachefile; next_pair = next_pair->next) { PAIR p = toku_list_struct(next_pair, struct ctpair, next_for_cachefile); - assert(nb_mutex_writers(&p->nb_mutex) >= 0); - if (nb_mutex_writers(&p->nb_mutex)) { - if (print_them) printf("%s:%d pinned: %"PRId64" (%p)\n", __FILE__, __LINE__, p->key.b, p->value); + assert(nb_mutex_writers(&p->value_nb_mutex) >= 0); + if (nb_mutex_writers(&p->value_nb_mutex)) { + if (print_them) printf("%s:%d pinned: %"PRId64" (%p)\n", __FILE__, __LINE__, p->key.b, p->value_data); n_pinned++; } } @@ -3732,7 +3996,7 @@ void toku_cachetable_print_state (CACHETABLE ct) { if (p != 0) { printf("t[%u]=", i); for (p=ct->table[i]; p; p=p->hash_chain) { - printf(" {%"PRId64", %p, dirty=%d, pin=%d, size=%ld}", p->key.b, p->cachefile, (int) p->dirty, nb_mutex_writers(&p->nb_mutex), p->attr.size); + printf(" {%"PRId64", %p, dirty=%d, pin=%d, size=%ld}", p->key.b, p->cachefile, (int) p->dirty, nb_mutex_writers(&p->value_nb_mutex), p->attr.size); } printf("\n"); } @@ -3767,11 +4031,11 @@ int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, CACHEFILE cf, vo if (p->key.b == key.b && p->cachefile == cf) { //note_hash_count(count); if (value_ptr) - *value_ptr = p->value; + *value_ptr = p->value_data; if (dirty_ptr) *dirty_ptr = p->dirty; if (pin_ptr) - *pin_ptr = nb_mutex_writers(&p->nb_mutex); + *pin_ptr = nb_mutex_writers(&p->value_nb_mutex); if (size_ptr) *size_ptr = p->attr.size; r = 0; @@ -3951,7 +4215,7 @@ toku_cleaner_thread (void *cachetable_v) // - this is how a thread that is calling unpin_and_remove will prevent // the cleaner thread from picking its PAIR (see comments in that function) do { - if (nb_mutex_users(&ct->cleaner_head->nb_mutex) > 0 || ct->cleaner_head->cachefile->is_flushing) { + if (nb_mutex_users(&ct->cleaner_head->value_nb_mutex) > 0 || ct->cleaner_head->cachefile->is_flushing) { goto next_pair; } n_seen++; @@ -3968,7 +4232,7 @@ toku_cleaner_thread (void *cachetable_v) // that is, best_pair != NULL, we do the clean // if (best_pair) { - nb_mutex_write_lock(&best_pair->nb_mutex, ct->mutex); + nb_mutex_write_lock(&best_pair->value_nb_mutex, ct->mutex); // verify a key assumption. assert(cleaner_thread_rate_pair(best_pair) > 0); // the order of operations for these two pieces is important @@ -4006,7 +4270,7 @@ toku_cleaner_thread (void *cachetable_v) cleaner_thread_rate_pair(best_pair) > 0) { cachetable_unlock(ct); - int r = best_pair->cleaner_callback(best_pair->value, + int r = best_pair->cleaner_callback(best_pair->value_data, best_pair->key, best_pair->fullhash, best_pair->write_extraargs); @@ -4019,7 +4283,7 @@ toku_cleaner_thread (void *cachetable_v) // don't need to unlock it if the cleaner callback is called. if (!cleaner_callback_called) { assert(!best_pair->cq); - nb_mutex_write_unlock(&best_pair->nb_mutex); + nb_mutex_write_unlock(&best_pair->value_nb_mutex); } rwlock_read_unlock(&cf->fdlock); // We need to make sure the cachefile sticks around so a close diff --git a/newbrt/cachetable.h b/newbrt/cachetable.h index 3b339f4d6e2..c61208f3081 100644 --- a/newbrt/cachetable.h +++ b/newbrt/cachetable.h @@ -130,14 +130,14 @@ enum cachetable_dirty { // When for_checkpoint is true, this was a 'pending' write // Returns: 0 if success, otherwise an error number. // Can access fd (fd is protected by a readlock during call) -typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void *write_extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint); +typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, void *value, void **disk_data, void *write_extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, BOOL for_checkpoint, BOOL is_clone); // The fetch callback is called when a thread is attempting to get and pin a memory // object and it is not in the cachetable. // Returns: 0 if success, otherwise an error number. The address and size of the object // associated with the key are returned. // Can access fd (fd is protected by a readlock during call) -typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, u_int32_t fullhash, void **value, PAIR_ATTR *sizep, int *dirtyp, void *read_extraargs); +typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, u_int32_t fullhash, void **value_data, void **disk_data, PAIR_ATTR *sizep, int *dirtyp, void *read_extraargs); // The cachetable calls the partial eviction estimate callback to determine if // partial eviction is a cheap operation that may be called by on the client thread @@ -147,7 +147,7 @@ typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, u_int3 // to return an estimate of the number of bytes it will free // so that the cachetable can estimate how much data is being evicted on background threads. // If cost is PE_CHEAP, then the callback does not set bytes_freed_estimate. -typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *brtnode_pv, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *write_extraargs); +typedef void (*CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK)(void *brtnode_pv, void* disk_data, long* bytes_freed_estimate, enum partial_eviction_cost *cost, void *write_extraargs); // The cachetable calls the partial eviction callback is to possibly try and partially evict pieces // of the PAIR. The callback determines the strategy for what to evict. The callback may choose to free @@ -173,16 +173,19 @@ typedef BOOL (*CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK)(void *brtnode_pv, voi // The new PAIR_ATTR of the PAIR is returned in sizep // Can access fd (fd is protected by a readlock during call) // Returns: 0 if success, otherwise an error number. -typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *brtnode_pv, void *read_extraargs, int fd, PAIR_ATTR *sizep); +typedef int (*CACHETABLE_PARTIAL_FETCH_CALLBACK)(void *value_data, void* disk_data, void *read_extraargs, int fd, PAIR_ATTR *sizep); // TODO(leif) XXX TODO XXX typedef int (*CACHETABLE_CLEANER_CALLBACK)(void *brtnode_pv, BLOCKNUM blocknum, u_int32_t fullhash, void *write_extraargs); +typedef void (*CACHETABLE_CLONE_CALLBACK)(void* value_data, void** cloned_value_data, PAIR_ATTR* new_attr, BOOL for_checkpoint, void* write_extraargs); + typedef struct { CACHETABLE_FLUSH_CALLBACK flush_callback; CACHETABLE_PARTIAL_EVICTION_EST_CALLBACK pe_est_callback; CACHETABLE_PARTIAL_EVICTION_CALLBACK pe_callback; CACHETABLE_CLEANER_CALLBACK cleaner_callback; + CACHETABLE_CLONE_CALLBACK clone_callback; void* write_extraargs; // parameter for flush_callback, pe_est_callback, pe_callback, and cleaner_callback } CACHETABLE_WRITE_CALLBACK; @@ -262,6 +265,7 @@ int toku_cachetable_get_and_pin_with_dep_pairs ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + BOOL may_modify_value, void* read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback u_int32_t num_dependent_pairs, // number of dependent pairs that we may need to checkpoint CACHEFILE* dependent_cfs, // array of cachefiles of dependent pairs @@ -286,9 +290,20 @@ int toku_cachetable_get_and_pin ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback, CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + BOOL may_modify_value, void* read_extraargs // parameter for fetch_callback, pf_req_callback, and pf_callback ); +// does partial fetch on a pinned pair +void toku_cachetable_pf_pinned_pair( + void* value, + CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback, + void* read_extraargs, + CACHEFILE cf, + CACHEKEY key, + u_int32_t fullhash + ); + struct unlockers { BOOL locked; void (*f)(void*extra); @@ -309,6 +324,7 @@ int toku_cachetable_get_and_pin_nonblocking ( CACHETABLE_FETCH_CALLBACK fetch_callback, CACHETABLE_PARTIAL_FETCH_REQUIRED_CALLBACK pf_req_callback __attribute__((unused)), CACHETABLE_PARTIAL_FETCH_CALLBACK pf_callback __attribute__((unused)), + BOOL may_modify_value, void *read_extraargs, // parameter for fetch_callback, pf_req_callback, and pf_callback UNLOCKERS unlockers ); diff --git a/newbrt/checkpoint.c b/newbrt/checkpoint.c index ecd425c7784..d992baeec06 100644 --- a/newbrt/checkpoint.c +++ b/newbrt/checkpoint.c @@ -310,9 +310,9 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger, SET_CHECKPOINT_FOOTPRINT(40); if (r==0) { - if (callback_f) - callback_f(extra); // callback is called with checkpoint_safe_lock still held - r = toku_cachetable_end_checkpoint(ct, logger, ydb_lock, ydb_unlock, callback2_f, extra2); + if (callback_f) + callback_f(extra); // callback is called with checkpoint_safe_lock still held + r = toku_cachetable_end_checkpoint(ct, logger, ydb_lock, ydb_unlock, callback2_f, extra2); } SET_CHECKPOINT_FOOTPRINT(50); if (r==0 && logger) { diff --git a/newbrt/fifo.c b/newbrt/fifo.c index 3750f4e3d30..add248c3a2a 100644 --- a/newbrt/fifo.c +++ b/newbrt/fifo.c @@ -226,3 +226,19 @@ DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry) { const struct fifo_entry *toku_fifo_get_entry(FIFO fifo, long off) { return toku_fifo_iterate_internal_get_entry(fifo, off); } + +void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo) { + struct fifo *XMALLOC(new_fifo); + assert(new_fifo); + new_fifo->n_items_in_fifo = orig_fifo->n_items_in_fifo; + new_fifo->memory_start = 0; + new_fifo->memory_used = orig_fifo->memory_used - orig_fifo->memory_start; + new_fifo->memory_size = new_fifo->memory_used; + new_fifo->memory = toku_xmalloc(new_fifo->memory_size); + memcpy( + new_fifo->memory, + orig_fifo->memory + orig_fifo->memory_start, + new_fifo->memory_size + ); + *cloned_fifo = new_fifo; +} diff --git a/newbrt/fifo.h b/newbrt/fifo.h index c51b9fbc7f1..39d4af4423e 100644 --- a/newbrt/fifo.h +++ b/newbrt/fifo.h @@ -110,6 +110,8 @@ struct fifo_entry * toku_fifo_iterate_internal_get_entry(FIFO fifo, int off); DBT *fill_dbt_for_fifo_entry(DBT *dbt, const struct fifo_entry *entry); const struct fifo_entry *toku_fifo_get_entry(FIFO fifo, long off); +void toku_fifo_clone(FIFO orig_fifo, FIFO* cloned_fifo); + #if defined(__cplusplus) || defined(__cilkplusplus) }; #endif diff --git a/newbrt/mempool.c b/newbrt/mempool.c index 5326c9d9af4..ce0d82b013c 100644 --- a/newbrt/mempool.c +++ b/newbrt/mempool.c @@ -137,3 +137,11 @@ size_t toku_mempool_footprint(struct mempool *mp) { size_t rval = toku_memory_footprint(base, touched); return rval; } + +void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp) { + new_mp->frag_size = orig_mp->frag_size; + new_mp->free_offset = orig_mp->free_offset; + new_mp->size = orig_mp->free_offset; // only make the cloned mempool store what is needed + new_mp->base = toku_xmalloc(new_mp->size); + memcpy(new_mp->base, orig_mp->base, new_mp->size); +} diff --git a/newbrt/mempool.h b/newbrt/mempool.h index 06497f8a06e..fbce5b1c56b 100644 --- a/newbrt/mempool.h +++ b/newbrt/mempool.h @@ -83,6 +83,8 @@ static inline int toku_mempool_inrange(struct mempool *mp, void *vp, size_t size /* get memory footprint */ size_t toku_mempool_footprint(struct mempool *mp); +void toku_mempool_clone(struct mempool* orig_mp, struct mempool* new_mp); + #if defined(__cplusplus) || defined(__cilkplusplus) }; #endif diff --git a/newbrt/rollback.c b/newbrt/rollback.c index 5bdb6eb1e51..778a2c56640 100644 --- a/newbrt/rollback.c +++ b/newbrt/rollback.c @@ -492,8 +492,8 @@ toku_rollback_log_free(ROLLBACK_LOG_NODE *log_p) { } static void toku_rollback_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM logname, - void *rollback_v, void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, - BOOL write_me, BOOL keep_me, BOOL for_checkpoint) { + void *rollback_v, void** UU(disk_data), void *extraargs, PAIR_ATTR size, PAIR_ATTR* new_size, + BOOL write_me, BOOL keep_me, BOOL for_checkpoint, BOOL UU(is_clone)) { int r; ROLLBACK_LOG_NODE log = rollback_v; struct brt_header *h = extraargs; @@ -524,7 +524,7 @@ static void toku_rollback_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM } static int toku_rollback_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM logname, u_int32_t fullhash, - void **rollback_pv, PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs) { + void **rollback_pv, void** UU(disk_data), PAIR_ATTR *sizep, int * UU(dirtyp), void *extraargs) { int r; struct brt_header *h = extraargs; assert(h->cf == cachefile); @@ -539,6 +539,7 @@ static int toku_rollback_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM l static void toku_rollback_pe_est_callback( void* rollback_v, + void* UU(disk_data), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -565,7 +566,7 @@ static BOOL toku_rollback_pf_req_callback(void* UU(brtnode_pv), void* UU(read_ex return FALSE; } -static int toku_rollback_pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { +static int toku_rollback_pf_callback(void* UU(brtnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { // should never be called, given that toku_rollback_pf_req_callback always returns false assert(FALSE); return 0; @@ -588,6 +589,7 @@ static inline CACHETABLE_WRITE_CALLBACK get_write_callbacks_for_rollback_log(str wc.pe_est_callback = toku_rollback_pe_est_callback; wc.pe_callback = toku_rollback_pe_callback; wc.cleaner_callback = toku_rollback_cleaner_callback; + wc.clone_callback = NULL; wc.write_extraargs = h; return wc; } @@ -873,6 +875,7 @@ int toku_get_and_pin_rollback_log(TOKUTXN txn, TXNID xid, uint64_t sequence, BLO toku_rollback_fetch_callback, toku_rollback_pf_req_callback, toku_rollback_pf_callback, + TRUE, // may_modify_value h ); assert(r==0); diff --git a/newbrt/tests/brt-bfe-query.c b/newbrt/tests/brt-bfe-query.c index aa593079d77..8e5c92b4c84 100644 --- a/newbrt/tests/brt-bfe-query.c +++ b/newbrt/tests/brt-bfe-query.c @@ -36,24 +36,26 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { // disable_prefetching to TRUE cursor->disable_prefetching = TRUE; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); destroy_bfe_for_prefetch(&bfe); toku_brtnode_free(&dn); + toku_free(ndd); // now enable prefetching again cursor->disable_prefetching = FALSE; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_AVAIL); @@ -63,18 +65,19 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); destroy_bfe_for_prefetch(&bfe); toku_brtnode_free(&dn); + toku_free(ndd); u_int64_t left_key = 150; toku_fill_dbt(&cursor->range_lock_left_key, &left_key, sizeof(u_int64_t)); cursor->left_is_neg_infty = FALSE; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); @@ -84,18 +87,19 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); destroy_bfe_for_prefetch(&bfe); toku_brtnode_free(&dn); + toku_free(ndd); u_int64_t right_key = 151; toku_fill_dbt(&cursor->range_lock_right_key, &right_key, sizeof(u_int64_t)); cursor->right_is_pos_infty = FALSE; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); @@ -105,17 +109,18 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_ON_DISK); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); destroy_bfe_for_prefetch(&bfe); toku_brtnode_free(&dn); + toku_free(ndd); left_key = 100000; right_key = 100000; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); @@ -125,17 +130,18 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_COMPRESSED); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); destroy_bfe_for_prefetch(&bfe); + toku_free(ndd); toku_brtnode_free(&dn); left_key = 100; right_key = 100; fill_bfe_for_prefetch(&bfe, brt_h, cursor); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_AVAIL); @@ -145,12 +151,13 @@ test_prefetch_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_ON_DISK); destroy_bfe_for_prefetch(&bfe); toku_brtnode_free(&dn); + toku_free(ndd); toku_free(cursor); } @@ -161,6 +168,7 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { brt_h->compare_fun = int64_key_cmp; BRT_CURSOR cursor = toku_malloc(sizeof *cursor); BRTNODE dn = NULL; + BRTNODE_DISK_DATA ndd = NULL; PAIR_ATTR attr; // first test that prefetching everything should work @@ -191,7 +199,7 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { // set disable_prefetching ON bfe.child_to_read = 2; bfe.disable_prefetching = TRUE; - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); @@ -206,16 +214,17 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_COMPRESSED); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_ON_DISK); assert(BP_STATE(dn,2) == PT_AVAIL); toku_brtnode_free(&dn); + toku_free(ndd); // fake the childnum to read bfe.child_to_read = 2; bfe.disable_prefetching = FALSE; - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_ON_DISK); @@ -230,15 +239,16 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_COMPRESSED); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_ON_DISK); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_AVAIL); toku_brtnode_free(&dn); + toku_free(ndd); // fake the childnum to read bfe.child_to_read = 0; - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd, &bfe); assert(r==0); assert(dn->n_children == 3); assert(BP_STATE(dn,0) == PT_AVAIL); @@ -253,11 +263,12 @@ test_subset_read(int fd, BRT UU(brt), struct brt_header *brt_h) { assert(BP_STATE(dn,0) == PT_COMPRESSED); assert(BP_STATE(dn,1) == PT_COMPRESSED); assert(BP_STATE(dn,2) == PT_ON_DISK); - r = toku_brtnode_pf_callback(dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(dn, ndd, &bfe, fd, &attr); assert(BP_STATE(dn,0) == PT_AVAIL); assert(BP_STATE(dn,1) == PT_AVAIL); assert(BP_STATE(dn,2) == PT_ON_DISK); toku_brtnode_free(&dn); + toku_free(ndd); toku_free(cursor); } @@ -345,8 +356,8 @@ test_prefetching(void) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } - - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, &ndd, TRUE, brt->h, 1, 1, FALSE); assert(r==0); test_prefetch_read(fd, brt, brt_h); @@ -365,6 +376,7 @@ test_prefetching(void) { toku_blocktable_destroy(&brt_h->blocktable); toku_free(brt_h); toku_free(brt); + toku_free(ndd); r = close(fd); assert(r != -1); } diff --git a/newbrt/tests/brt-clock-test.c b/newbrt/tests/brt-clock-test.c index 2961d32a59a..b0f279948e2 100644 --- a/newbrt/tests/brt-clock-test.c +++ b/newbrt/tests/brt-clock-test.c @@ -67,7 +67,8 @@ test1(int fd, struct brt_header *brt_h, BRTNODE *dn) { struct brtnode_fetch_extra bfe_all; brt_h->compare_fun = string_key_cmp; fill_bfe_for_full_read(&bfe_all, brt_h); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe_all); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_all); BOOL is_leaf = ((*dn)->height == 0); assert(r==0); for (int i = 0; i < (*dn)->n_children; i++) { @@ -93,7 +94,7 @@ test1(int fd, struct brt_header *brt_h, BRTNODE *dn) { PAIR_ATTR size; BOOL req = toku_brtnode_pf_req_callback(*dn, &bfe_all); assert(req); - toku_brtnode_pf_callback(*dn, &bfe_all, fd, &size); + toku_brtnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_brtnode_pe_callback(*dn, attr, &attr, NULL); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); @@ -111,7 +112,7 @@ test1(int fd, struct brt_header *brt_h, BRTNODE *dn) { req = toku_brtnode_pf_req_callback(*dn, &bfe_all); assert(req); - toku_brtnode_pf_callback(*dn, &bfe_all, fd, &size); + toku_brtnode_pf_callback(*dn, ndd, &bfe_all, fd, &size); toku_brtnode_pe_callback(*dn, attr, &attr, NULL); for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); @@ -124,7 +125,7 @@ test1(int fd, struct brt_header *brt_h, BRTNODE *dn) { for (int i = 0; i < (*dn)->n_children; i++) { assert(BP_STATE(*dn,i) == PT_AVAIL); } - + toku_free(ndd); toku_brtnode_free(dn); } @@ -160,8 +161,8 @@ test2(int fd, struct brt_header *brt_h, BRTNODE *dn) { TRUE, FALSE ); - - int r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe_subset); + BRTNODE_DISK_DATA ndd = NULL; + int r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_subset); assert(r==0); BOOL is_leaf = ((*dn)->height == 0); // at this point, although both partitions are available, only the @@ -182,13 +183,13 @@ test2(int fd, struct brt_header *brt_h, BRTNODE *dn) { BOOL req = toku_brtnode_pf_req_callback(*dn, &bfe_subset); assert(req); - toku_brtnode_pf_callback(*dn, &bfe_subset, fd, &attr); + toku_brtnode_pf_callback(*dn, ndd, &bfe_subset, fd, &attr); assert(BP_STATE(*dn, 0) == PT_AVAIL); assert(BP_STATE(*dn, 1) == PT_AVAIL); assert(BP_SHOULD_EVICT(*dn, 0)); assert(!BP_SHOULD_EVICT(*dn, 1)); - + toku_free(ndd); toku_brtnode_free(dn); } @@ -206,8 +207,8 @@ test3_leaf(int fd, struct brt_header *brt_h, BRTNODE *dn) { &bfe_min, brt_h ); - - int r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe_min); + BRTNODE_DISK_DATA ndd = NULL; + int r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &ndd, &bfe_min); assert(r==0); // // make sure we have a leaf @@ -217,6 +218,7 @@ test3_leaf(int fd, struct brt_header *brt_h, BRTNODE *dn) { assert(BP_STATE(*dn, i) == PT_ON_DISK); } toku_brtnode_free(dn); + toku_free(ndd); } static void @@ -296,8 +298,8 @@ test_serialize_nonleaf(void) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } - - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, &ndd, TRUE, brt->h, 1, 1, FALSE); assert(r==0); test1(fd, brt_h, &dn); @@ -309,6 +311,7 @@ test_serialize_nonleaf(void) { destroy_nonleaf_childinfo(BNC(&sn, 1)); toku_free(sn.bp); toku_free(sn.childkeys); + toku_free(ndd); toku_block_free(brt_h->blocktable, BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); toku_brtheader_destroy_treelock(brt_h); @@ -382,8 +385,8 @@ test_serialize_leaf(void) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } - - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, &ndd, TRUE, brt->h, 1, 1, FALSE); assert(r==0); test1(fd, brt_h, &dn); @@ -408,7 +411,7 @@ test_serialize_leaf(void) { toku_blocktable_destroy(&brt_h->blocktable); toku_free(brt_h); toku_free(brt); - + toku_free(ndd); r = close(fd); assert(r != -1); } diff --git a/newbrt/tests/brt-serialize-benchmark.c b/newbrt/tests/brt-serialize-benchmark.c index 81108c71037..8c368ce1a4d 100644 --- a/newbrt/tests/brt-serialize-benchmark.c +++ b/newbrt/tests/brt-serialize-benchmark.c @@ -130,7 +130,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { struct timeval t[2]; gettimeofday(&t[0], NULL); - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, &ndd, TRUE, brt->h, 1, 1, FALSE); assert(r==0); gettimeofday(&t[1], NULL); double dt; @@ -140,7 +141,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, brt_h); gettimeofday(&t[0], NULL); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + BRTNODE_DISK_DATA ndd2 = NULL; + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); assert(r==0); gettimeofday(&t[1], NULL); dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); @@ -165,6 +167,8 @@ test_serialize_leaf(int valsize, int nelts, double entropy) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(ndd); + toku_free(ndd2); r = close(fd); assert(r != -1); } @@ -259,7 +263,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { struct timeval t[2]; gettimeofday(&t[0], NULL); - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); + BRTNODE_DISK_DATA ndd = NULL; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, &ndd, TRUE, brt->h, 1, 1, FALSE); assert(r==0); gettimeofday(&t[1], NULL); double dt; @@ -269,7 +274,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, brt_h); gettimeofday(&t[0], NULL); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &bfe); + BRTNODE_DISK_DATA ndd2 = NULL; + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, &dn, &ndd2, &bfe); assert(r==0); gettimeofday(&t[1], NULL); dt = (t[1].tv_sec - t[0].tv_sec) + ((t[1].tv_usec - t[0].tv_usec) / USECS_PER_SEC); @@ -291,6 +297,8 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(ndd); + toku_free(ndd2); r = close(fd); assert(r != -1); } diff --git a/newbrt/tests/brt-serialize-test.c b/newbrt/tests/brt-serialize-test.c index cc631699020..524ea9e1775 100644 --- a/newbrt/tests/brt-serialize-test.c +++ b/newbrt/tests/brt-serialize-test.c @@ -102,19 +102,19 @@ string_key_cmp(DB *UU(e), const DBT *a, const DBT *b) } static void -setup_dn(enum brtnode_verify_type bft, int fd, struct brt_header *brt_h, BRTNODE *dn) { +setup_dn(enum brtnode_verify_type bft, int fd, struct brt_header *brt_h, BRTNODE *dn, BRTNODE_DISK_DATA* ndd) { int r; brt_h->compare_fun = string_key_cmp; if (bft == read_all) { struct brtnode_fetch_extra bfe; fill_bfe_for_full_read(&bfe, brt_h); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); } else if (bft == read_compressed || bft == read_none) { struct brtnode_fetch_extra bfe; fill_bfe_for_min_read(&bfe, brt_h); - r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, &bfe); + r = toku_deserialize_brtnode_from(fd, make_blocknum(20), 0/*pass zero for hash*/, dn, ndd, &bfe); assert(r==0); // assert all bp's are compressed or on disk. for (int i = 0; i < (*dn)->n_children; i++) { @@ -143,7 +143,7 @@ setup_dn(enum brtnode_verify_type bft, int fd, struct brt_header *brt_h, BRTNODE PAIR_ATTR attr; fill_bfe_for_full_read(&bfe, brt_h); assert(toku_brtnode_pf_req_callback(*dn, &bfe)); - r = toku_brtnode_pf_callback(*dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); assert(r==0); // assert all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { @@ -166,7 +166,7 @@ setup_dn(enum brtnode_verify_type bft, int fd, struct brt_header *brt_h, BRTNODE fill_bfe_for_full_read(&bfe, brt_h); assert(toku_brtnode_pf_req_callback(*dn, &bfe)); PAIR_ATTR attr; - r = toku_brtnode_pf_callback(*dn, &bfe, fd, &attr); + r = toku_brtnode_pf_callback(*dn, *ndd, &bfe, fd, &attr); assert(r==0); // assert all bp's are available for (int i = 0; i < (*dn)->n_children; i++) { @@ -180,8 +180,25 @@ setup_dn(enum brtnode_verify_type bft, int fd, struct brt_header *brt_h, BRTNODE } } +static void write_sn_to_disk(int fd, BRT brt, BRTNODE sn, BRTNODE_DISK_DATA* src_ndd, BOOL do_clone) { + int r; + if (do_clone) { + void* cloned_node_v = NULL; + PAIR_ATTR attr; + toku_brtnode_clone_callback(sn, &cloned_node_v, &attr, FALSE, brt->h); + BRTNODE cloned_node = cloned_node_v; + r = toku_serialize_brtnode_to(fd, make_blocknum(20), cloned_node, src_ndd, FALSE, brt->h, 1, 1, FALSE); + assert(r==0); + toku_brtnode_free(&cloned_node); + } + else { + r = toku_serialize_brtnode_to(fd, make_blocknum(20), sn, src_ndd, TRUE, brt->h, 1, 1, FALSE); + assert(r==0); + } +} + static void -test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { +test_serialize_leaf_check_msn(enum brtnode_verify_type bft, BOOL do_clone) { // struct brt source_brt; const int nodesize = 1024; struct brtnode sn, *dn; @@ -256,11 +273,12 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -285,10 +303,10 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { assert(BLB_MAX_MSN_APPLIED(dn, i).msn == POSTSERIALIZE_MSN_ON_DISK.msn); - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); u_int32_t keylen; @@ -308,9 +326,9 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { kv_pair_free(sn.childkeys[i]); } for (int i = 0; i < sn.n_children; i++) { - BASEMENTNODE bn = BLB(&sn, i); - struct mempool * mp = &bn->buffer_mempool; - toku_mempool_destroy(mp); + BASEMENTNODE bn = BLB(&sn, i); + struct mempool * mp = &bn->buffer_mempool; + toku_mempool_destroy(mp); destroy_basement_node(BLB(&sn, i)); } toku_free(sn.bp); @@ -321,12 +339,14 @@ test_serialize_leaf_check_msn(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { +test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft, BOOL do_clone) { int r; struct brtnode sn, *dn; const int keylens = 256*1024, vallens = 0, nrows = 8; @@ -396,11 +416,12 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -428,10 +449,10 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0); toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); @@ -461,12 +482,14 @@ test_serialize_leaf_with_large_pivots(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { +test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft, BOOL do_clone) { int r; struct brtnode sn, *dn; const int keylens = sizeof(int), vallens = sizeof(int), nrows = 196*1024; @@ -533,10 +556,11 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { assert(size == 100); } - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -561,10 +585,10 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_int_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0); toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); @@ -595,13 +619,15 @@ test_serialize_leaf_with_many_rows(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { +test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft, BOOL do_clone) { int r; struct brtnode sn, *dn; const uint32_t nrows = 7; @@ -674,10 +700,11 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { assert(size == 100); } - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -708,10 +735,10 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { struct check_leafentries_struct extra = { .nelts = nrows, .elts = les, .i = 0, .cmp = omt_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0); toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); @@ -741,13 +768,15 @@ test_serialize_leaf_with_large_rows(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { +test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft, BOOL do_clone) { const int nodesize = 1024; struct brtnode sn, *dn; @@ -830,11 +859,11 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); - - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -857,10 +886,10 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } assert(toku_omt_size(BLB_BUFFER(dn, i)) > 0); toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); @@ -890,12 +919,14 @@ test_serialize_leaf_with_empty_basement_nodes(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type bft) { +test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type bft, BOOL do_clone) { const int nodesize = 1024; struct brtnode sn, *dn; @@ -954,10 +985,11 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type assert(size == 100); } - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -973,10 +1005,10 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type struct check_leafentries_struct extra = { .nelts = 0, .elts = NULL, .i = 0, .cmp = omt_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } assert(toku_omt_size(BLB_BUFFER(dn, i)) == 0); toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); @@ -1002,13 +1034,15 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum brtnode_verify_type toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_leaf(enum brtnode_verify_type bft) { +test_serialize_leaf(enum brtnode_verify_type bft, BOOL do_clone) { // struct brt source_brt; const int nodesize = 1024; struct brtnode sn, *dn; @@ -1016,6 +1050,8 @@ test_serialize_leaf(enum brtnode_verify_type bft) { int fd = open(__FILE__ ".brt", O_RDWR|O_CREAT|O_BINARY, S_IRWXU|S_IRWXG|S_IRWXO); assert(fd >= 0); int r; + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; sn.max_msn_applied_to_node_on_disk.msn = 0; sn.nodesize = nodesize; @@ -1079,10 +1115,9 @@ test_serialize_leaf(enum brtnode_verify_type bft) { assert(size == 100); } - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -1105,10 +1140,10 @@ test_serialize_leaf(enum brtnode_verify_type bft) { struct check_leafentries_struct extra = { .nelts = 3, .elts = elts, .i = 0, .cmp = omt_cmp }; u_int32_t last_i = 0; for (u_int32_t i = 0; i < npartitions; ++i) { - assert(dn->bp[i].start > 0); - assert(dn->bp[i].size > 0); + assert(dest_ndd[i].start > 0); + assert(dest_ndd[i].size > 0); if (i > 0) { - assert(dn->bp[i].start >= dn->bp[i-1].start + dn->bp[i-1].size); + assert(dest_ndd[i].start >= dest_ndd[i-1].start + dest_ndd[i-1].size); } toku_omt_iterate(BLB_BUFFER(dn, i), check_leafentries, &extra); u_int32_t keylen; @@ -1141,12 +1176,14 @@ test_serialize_leaf(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } static void -test_serialize_nonleaf(enum brtnode_verify_type bft) { +test_serialize_nonleaf(enum brtnode_verify_type bft, BOOL do_clone) { // struct brt source_brt; const int nodesize = 1024; struct brtnode sn, *dn; @@ -1222,11 +1259,11 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) { assert(offset == BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(size == 100); } + BRTNODE_DISK_DATA src_ndd = NULL; + BRTNODE_DISK_DATA dest_ndd = NULL; + write_sn_to_disk(fd, brt, &sn, &src_ndd, do_clone); - r = toku_serialize_brtnode_to(fd, make_blocknum(20), &sn, brt->h, 1, 1, FALSE); - assert(r==0); - - setup_dn(bft, fd, brt_h, &dn); + setup_dn(bft, fd, brt_h, &dn, &dest_ndd); assert(dn->thisnodename.b==20); @@ -1339,43 +1376,69 @@ test_serialize_nonleaf(enum brtnode_verify_type bft) { toku_brtheader_destroy_treelock(brt_h); toku_free(brt_h); toku_free(brt); + toku_free(src_ndd); + toku_free(dest_ndd); r = close(fd); assert(r != -1); } int test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) { - test_serialize_leaf(read_none); - test_serialize_leaf(read_all); - test_serialize_leaf(read_compressed); + test_serialize_leaf(read_none, FALSE); + test_serialize_leaf(read_all, FALSE); + test_serialize_leaf(read_compressed, FALSE); + test_serialize_leaf(read_none, TRUE); + test_serialize_leaf(read_all, TRUE); + test_serialize_leaf(read_compressed, TRUE); - test_serialize_leaf_with_empty_basement_nodes(read_none); - test_serialize_leaf_with_empty_basement_nodes(read_all); - test_serialize_leaf_with_empty_basement_nodes(read_compressed); + test_serialize_leaf_with_empty_basement_nodes(read_none, FALSE); + test_serialize_leaf_with_empty_basement_nodes(read_all, FALSE); + test_serialize_leaf_with_empty_basement_nodes(read_compressed, FALSE); + test_serialize_leaf_with_empty_basement_nodes(read_none, TRUE); + test_serialize_leaf_with_empty_basement_nodes(read_all, TRUE); + test_serialize_leaf_with_empty_basement_nodes(read_compressed, TRUE); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_none); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_all); - test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, FALSE); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, FALSE); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, FALSE); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_none, TRUE); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_all, TRUE); + test_serialize_leaf_with_multiple_empty_basement_nodes(read_compressed, TRUE); - test_serialize_leaf_with_large_rows(read_none); - test_serialize_leaf_with_large_rows(read_all); - test_serialize_leaf_with_large_rows(read_compressed); + test_serialize_leaf_with_large_rows(read_none, FALSE); + test_serialize_leaf_with_large_rows(read_all, FALSE); + test_serialize_leaf_with_large_rows(read_compressed, FALSE); + test_serialize_leaf_with_large_rows(read_none, TRUE); + test_serialize_leaf_with_large_rows(read_all, TRUE); + test_serialize_leaf_with_large_rows(read_compressed, TRUE); - test_serialize_leaf_with_many_rows(read_none); - test_serialize_leaf_with_many_rows(read_all); - test_serialize_leaf_with_many_rows(read_compressed); + test_serialize_leaf_with_many_rows(read_none, FALSE); + test_serialize_leaf_with_many_rows(read_all, FALSE); + test_serialize_leaf_with_many_rows(read_compressed, FALSE); + test_serialize_leaf_with_many_rows(read_none, TRUE); + test_serialize_leaf_with_many_rows(read_all, TRUE); + test_serialize_leaf_with_many_rows(read_compressed, TRUE); - test_serialize_leaf_with_large_pivots(read_none); - test_serialize_leaf_with_large_pivots(read_all); - test_serialize_leaf_with_large_pivots(read_compressed); + test_serialize_leaf_with_large_pivots(read_none, FALSE); + test_serialize_leaf_with_large_pivots(read_all, FALSE); + test_serialize_leaf_with_large_pivots(read_compressed, FALSE); + test_serialize_leaf_with_large_pivots(read_none, TRUE); + test_serialize_leaf_with_large_pivots(read_all, TRUE); + test_serialize_leaf_with_large_pivots(read_compressed, TRUE); - test_serialize_leaf_check_msn(read_none); - test_serialize_leaf_check_msn(read_all); - test_serialize_leaf_check_msn(read_compressed); + test_serialize_leaf_check_msn(read_none, FALSE); + test_serialize_leaf_check_msn(read_all, FALSE); + test_serialize_leaf_check_msn(read_compressed, FALSE); + test_serialize_leaf_check_msn(read_none, TRUE); + test_serialize_leaf_check_msn(read_all, TRUE); + test_serialize_leaf_check_msn(read_compressed, TRUE); - test_serialize_nonleaf(read_none); - test_serialize_nonleaf(read_all); - test_serialize_nonleaf(read_compressed); + test_serialize_nonleaf(read_none, FALSE); + test_serialize_nonleaf(read_all, FALSE); + test_serialize_nonleaf(read_compressed, FALSE); + test_serialize_nonleaf(read_none, TRUE); + test_serialize_nonleaf(read_all, TRUE); + test_serialize_nonleaf(read_compressed, TRUE); return 0; } diff --git a/newbrt/tests/cachetable-3969.c b/newbrt/tests/cachetable-3969.c index cde1e417ddc..d6da2a1941c 100644 --- a/newbrt/tests/cachetable-3969.c +++ b/newbrt/tests/cachetable-3969.c @@ -31,11 +31,11 @@ run_test (void) { long s1; long s2; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); for (int i = 0; i < 20; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); } @@ -47,12 +47,12 @@ run_test (void) { // pin 1 and 2 - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_begin_checkpoint(ct, NULL); // mark nodes as pending a checkpoint, so that get_and_pin_nonblocking on block 1 will return TOKUDB_TRY_AGAIN r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); assert(r==0); - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); // now we try to pin 1, and it should get evicted out from under us struct unlockers foo; foo.extra = NULL; @@ -69,6 +69,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, &foo ); diff --git a/newbrt/tests/cachetable-4357.c b/newbrt/tests/cachetable-4357.c index 5dfd9ab98cc..16cc3b4ddf5 100644 --- a/newbrt/tests/cachetable-4357.c +++ b/newbrt/tests/cachetable-4357.c @@ -15,6 +15,7 @@ static void *pin_nonblocking(void *arg) { &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, NULL ); @@ -42,6 +43,7 @@ cachetable_test (void) { &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL ); toku_pthread_t pin_nonblocking_tid; diff --git a/newbrt/tests/cachetable-4365.c b/newbrt/tests/cachetable-4365.c index afa2796cc0c..d08a3be176c 100644 --- a/newbrt/tests/cachetable-4365.c +++ b/newbrt/tests/cachetable-4365.c @@ -15,6 +15,7 @@ static void *pin_nonblocking(void *arg) { &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, NULL ); @@ -63,6 +64,7 @@ cachetable_test (void) { &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL ); toku_pthread_t pin_nonblocking_tid; diff --git a/newbrt/tests/cachetable-4545.c b/newbrt/tests/cachetable-4545.c index 57dc227b831..4dbb006a607 100644 --- a/newbrt/tests/cachetable-4545.c +++ b/newbrt/tests/cachetable-4545.c @@ -12,12 +12,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { flush_called = TRUE; *new_size = make_pair_attr(8); @@ -29,7 +31,7 @@ static BOOL pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraargs)) { return TRUE; } -static int pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { +static int pf_callback(void* UU(brtnode_pv), void* UU(disk_data), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { assert(pf_req_called); assert(flush_called); pf_called = TRUE; @@ -52,7 +54,7 @@ cachetable_test (void) { long s1; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, pf_req_callback, pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, pf_req_callback, pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); flush_called = FALSE; @@ -60,7 +62,7 @@ cachetable_test (void) { pf_called = FALSE; r = toku_cachetable_begin_checkpoint(ct, NULL); assert_zero(r); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, pf_req_callback, pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, pf_req_callback, pf_callback, TRUE, NULL); assert_zero(r); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); assert_zero(r); diff --git a/newbrt/tests/cachetable-all-write.c b/newbrt/tests/cachetable-all-write.c index 0c533a3752c..ac4cf7617e0 100644 --- a/newbrt/tests/cachetable-all-write.c +++ b/newbrt/tests/cachetable-all-write.c @@ -8,12 +8,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d write_me %d\n", (int)k.b, w); } @@ -39,11 +41,9 @@ cachetable_test (void) { long s1, s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); - // usleep (2*1024*1024); - //r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, flush, def_fetch, def_pe_est_callback, pe_callback, pf_req_callback, pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/newbrt/tests/cachetable-checkpoint-pending.c b/newbrt/tests/cachetable-checkpoint-pending.c index 68633d70aad..d1e894033b2 100644 --- a/newbrt/tests/cachetable-checkpoint-pending.c +++ b/newbrt/tests/cachetable-checkpoint-pending.c @@ -36,12 +36,14 @@ flush ( int UU(fd), CACHEKEY UU(key), void *value, + void** UU(dd), void *UU(extraargs), PAIR_ATTR size, PAIR_ATTR* UU(new_size), BOOL write_me, BOOL keep_me, - BOOL UU(for_checkpoint) + BOOL UU(for_checkpoint), + BOOL UU(is_clone) ) { // printf("f"); @@ -61,7 +63,8 @@ fetch ( int UU(fd), CACHEKEY UU(key), u_int32_t UU(fullhash), - void **UU(value), + void **UU(value), + void **UU(dd), PAIR_ATTR *UU(sizep), int *UU(dirtyp), void *UU(extraargs) @@ -84,7 +87,7 @@ do_update (void *UU(ignore)) long size; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - int r = toku_cachetable_get_and_pin(cf, key, hi, &vv, &size, wc, fetch, def_pf_req_callback, def_pf_callback, 0); + int r = toku_cachetable_get_and_pin(cf, key, hi, &vv, &size, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); //printf("g"); assert(r==0); assert(size==sizeof(int)); diff --git a/newbrt/tests/cachetable-checkpoint-pinned-nodes.c b/newbrt/tests/cachetable-checkpoint-pinned-nodes.c index e90edc6e91a..7f33e9f599e 100644 --- a/newbrt/tests/cachetable-checkpoint-pinned-nodes.c +++ b/newbrt/tests/cachetable-checkpoint-pinned-nodes.c @@ -14,12 +14,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -41,6 +43,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -73,9 +76,9 @@ cachetable_test (void) { long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(&dirty_val); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, &dirty_val); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, &dirty_val); wc.write_extraargs = NULL; - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); // // Here is the test, we have two pairs, v1 is dirty, v2 is clean, but both are currently pinned diff --git a/newbrt/tests/cachetable-checkpoint-prefetched-nodes.c b/newbrt/tests/cachetable-checkpoint-prefetched-nodes.c index 579fd062f3c..5a2e1cc95c0 100644 --- a/newbrt/tests/cachetable-checkpoint-prefetched-nodes.c +++ b/newbrt/tests/cachetable-checkpoint-prefetched-nodes.c @@ -14,12 +14,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -41,6 +43,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) diff --git a/newbrt/tests/cachetable-checkpoint-test.c b/newbrt/tests/cachetable-checkpoint-test.c index 9088382cde4..b86d275f4ff 100644 --- a/newbrt/tests/cachetable-checkpoint-test.c +++ b/newbrt/tests/cachetable-checkpoint-test.c @@ -12,7 +12,21 @@ static const int item_size = 1; static int n_flush, n_write_me, n_keep_me, n_fetch; -static void flush(CACHEFILE cf, int UU(fd), CACHEKEY key, void *value, void *extraargs, PAIR_ATTR size, PAIR_ATTR* UU(new_size), BOOL write_me, BOOL keep_me, BOOL UU(for_checkpoint)) { +static void flush( + CACHEFILE cf, + int UU(fd), + CACHEKEY key, + void *value, + void** UU(dd), + void *extraargs, + PAIR_ATTR size, + PAIR_ATTR* UU(new_size), + BOOL write_me, + BOOL keep_me, + BOOL UU(for_checkpoint), + BOOL UU(is_clone) + ) +{ cf = cf; key = key; value = value; extraargs = extraargs; // assert(key == make_blocknum((long)value)); assert(size.size == item_size); diff --git a/newbrt/tests/cachetable-cleaner-checkpoint.c b/newbrt/tests/cachetable-cleaner-checkpoint.c index 0577eb4ed16..3283d2f18e7 100644 --- a/newbrt/tests/cachetable-cleaner-checkpoint.c +++ b/newbrt/tests/cachetable-cleaner-checkpoint.c @@ -12,12 +12,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -70,7 +72,7 @@ cachetable_test (void) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.cleaner_callback = cleaner_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); PAIR_ATTR attr = make_pair_attr(8); attr.cache_pressure_size = 8; r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, attr); diff --git a/newbrt/tests/cachetable-cleaner-checkpoint2.c b/newbrt/tests/cachetable-cleaner-checkpoint2.c index 302fc15eb65..df935c15938 100644 --- a/newbrt/tests/cachetable-cleaner-checkpoint2.c +++ b/newbrt/tests/cachetable-cleaner-checkpoint2.c @@ -12,12 +12,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -70,7 +72,7 @@ cachetable_test (void) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.cleaner_callback = cleaner_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); PAIR_ATTR attr = make_pair_attr(8); attr.cache_pressure_size = 8; r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, attr); diff --git a/newbrt/tests/cachetable-cleaner-dev-null.c b/newbrt/tests/cachetable-cleaner-dev-null.c index 7fd4316fce0..345ce6a98a8 100644 --- a/newbrt/tests/cachetable-cleaner-dev-null.c +++ b/newbrt/tests/cachetable-cleaner-dev-null.c @@ -11,12 +11,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -59,7 +61,7 @@ cachetable_test (void) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.cleaner_callback = cleaner_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); PAIR_ATTR attr = make_pair_attr(8); attr.cache_pressure_size = 8; r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, attr); diff --git a/newbrt/tests/cachetable-cleaner-thread-attrs-accumulate.c b/newbrt/tests/cachetable-cleaner-thread-attrs-accumulate.c index cd831215d17..c9b54a03157 100644 --- a/newbrt/tests/cachetable-cleaner-thread-attrs-accumulate.c +++ b/newbrt/tests/cachetable-cleaner-thread-attrs-accumulate.c @@ -30,12 +30,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { PAIR_ATTR *expect = e; if (!keep) { @@ -85,6 +87,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, &expect); assert_zero(r); r = toku_cachetable_unpin(f1, make_blocknum(i+1), i+1, CACHETABLE_DIRTY, attrs[i]); @@ -109,6 +112,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, &expect); toku_cachetable_unpin(f1, make_blocknum(n_pairs + 1), n_pairs + 1, CACHETABLE_CLEAN, make_pair_attr(test_limit - expect.size + 20)); diff --git a/newbrt/tests/cachetable-cleaner-thread-everything-pinned.c b/newbrt/tests/cachetable-cleaner-thread-everything-pinned.c index fa63df98be4..8ced54093ee 100644 --- a/newbrt/tests/cachetable-cleaner-thread-everything-pinned.c +++ b/newbrt/tests/cachetable-cleaner-thread-everything-pinned.c @@ -47,6 +47,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL); assert_zero(r); } diff --git a/newbrt/tests/cachetable-cleaner-thread-nothing-needs-flushing.c b/newbrt/tests/cachetable-cleaner-thread-nothing-needs-flushing.c index 6e86dadfc87..185f184a6a7 100644 --- a/newbrt/tests/cachetable-cleaner-thread-nothing-needs-flushing.c +++ b/newbrt/tests/cachetable-cleaner-thread-nothing-needs-flushing.c @@ -45,6 +45,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL); assert_zero(r); // set cachepressure_size to 0 diff --git a/newbrt/tests/cachetable-cleaner-thread-simple.c b/newbrt/tests/cachetable-cleaner-thread-simple.c index 6463e08b978..504cc78a06d 100644 --- a/newbrt/tests/cachetable-cleaner-thread-simple.c +++ b/newbrt/tests/cachetable-cleaner-thread-simple.c @@ -52,6 +52,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL); PAIR_ATTR attr = make_pair_attr(8); attr.cache_pressure_size = 100; @@ -63,6 +64,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL); assert_zero(r); // set cachepressure_size to 0 diff --git a/newbrt/tests/cachetable-clock-eviction.c b/newbrt/tests/cachetable-clock-eviction.c index 3b0494462a3..37dea40882e 100644 --- a/newbrt/tests/cachetable-clock-eviction.c +++ b/newbrt/tests/cachetable-clock-eviction.c @@ -13,12 +13,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (check_flush && !keep) { @@ -36,6 +38,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -66,19 +69,19 @@ cachetable_test (void) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; for (int i = 0; i < 100000; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 8; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 4; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(3), 3, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 2; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(4), 4, CACHETABLE_CLEAN, make_pair_attr(1)); } flush_may_occur = TRUE; diff --git a/newbrt/tests/cachetable-clock-eviction2.c b/newbrt/tests/cachetable-clock-eviction2.c index 712855ab826..40d26d1214d 100755 --- a/newbrt/tests/cachetable-clock-eviction2.c +++ b/newbrt/tests/cachetable-clock-eviction2.c @@ -10,12 +10,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v, + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep, - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(flush_may_occur); if (!keep) { @@ -31,6 +33,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -48,12 +51,14 @@ other_flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { } @@ -103,28 +108,28 @@ cachetable_test (void) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 8; i++) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 4; i++) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(3), 3, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 2; i++) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(4), 4, CACHETABLE_CLEAN, make_pair_attr(4)); } flush_may_occur = FALSE; diff --git a/newbrt/tests/cachetable-clock-eviction3.c b/newbrt/tests/cachetable-clock-eviction3.c index 2d0081a1ab7..21c6237b6ef 100755 --- a/newbrt/tests/cachetable-clock-eviction3.c +++ b/newbrt/tests/cachetable-clock-eviction3.c @@ -10,12 +10,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void* UU(v), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep, - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(flush_may_occur); if (!keep) { @@ -31,6 +33,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -48,18 +51,21 @@ other_flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { } static void pe_est_callback( void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -118,7 +124,7 @@ cachetable_test (void) { wc.flush_callback = flush; wc.pe_est_callback = pe_est_callback; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 8; i++) { @@ -126,7 +132,7 @@ cachetable_test (void) { wc.flush_callback = flush; wc.pe_est_callback = pe_est_callback; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 4; i++) { @@ -134,7 +140,7 @@ cachetable_test (void) { wc.flush_callback = flush; wc.pe_est_callback = pe_est_callback; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(3), 3, CACHETABLE_CLEAN, make_pair_attr(4)); } for (int i = 0; i < 2; i++) { @@ -142,7 +148,7 @@ cachetable_test (void) { wc.flush_callback = flush; wc.pe_est_callback = pe_est_callback; wc.pe_callback = pe_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(4), 4, CACHETABLE_CLEAN, make_pair_attr(4)); } flush_may_occur = FALSE; diff --git a/newbrt/tests/cachetable-clock-eviction4.c b/newbrt/tests/cachetable-clock-eviction4.c index b0ecea2220b..923cf40308c 100644 --- a/newbrt/tests/cachetable-clock-eviction4.c +++ b/newbrt/tests/cachetable-clock-eviction4.c @@ -23,12 +23,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (check_flush && !keep) { @@ -46,6 +48,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -58,7 +61,8 @@ fetch (CACHEFILE f __attribute__((__unused__)), static void pe_est_callback( - void* UU(brtnode_pv), + void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -104,19 +108,19 @@ cachetable_test (void) { wc.pe_est_callback = pe_est_callback; wc.pe_callback = pe_callback; for (int i = 0; i < 100000; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 8; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 4; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(3), 3, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(3), 3, CACHETABLE_CLEAN, make_pair_attr(1)); } for (int i = 0; i < 2; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(4), 4, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(4), 4, CACHETABLE_CLEAN, make_pair_attr(1)); } flush_may_occur = TRUE; diff --git a/newbrt/tests/cachetable-clone-checkpoint.c b/newbrt/tests/cachetable-clone-checkpoint.c new file mode 100644 index 00000000000..368969550e4 --- /dev/null +++ b/newbrt/tests/cachetable-clone-checkpoint.c @@ -0,0 +1,109 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; +} + +BOOL clone_flush_started; +BOOL clone_flush_completed; +CACHETABLE ct; + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL is_clone + ) +{ + if (is_clone) { + clone_flush_started = TRUE; + usleep(4*1024*1024); + clone_flush_completed = TRUE; + } +} + +static void *run_end_checkpoint(void *arg) { + int r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + return arg; +} + +// +// this test verifies that a PAIR that undergoes a checkpoint on the checkpoint thread is still pinnable while being written out +// +static void +cachetable_test (void) { + const int test_limit = 200; + int r; + ct = NULL; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.flush_callback = flush; + wc.clone_callback = clone_callback; + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); + assert_zero(r); + r = toku_cachetable_begin_checkpoint(ct, NULL); + + + clone_flush_started = FALSE; + clone_flush_completed = FALSE; + toku_pthread_t checkpoint_tid; + r = toku_pthread_create(&checkpoint_tid, NULL, run_end_checkpoint, NULL); + assert_zero(r); + + usleep(1*1024*1024); + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + assert(clone_flush_started && !clone_flush_completed); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert_zero(r); + + void *ret; + r = toku_pthread_join(checkpoint_tid, &ret); + assert_zero(r); + assert(clone_flush_started && clone_flush_completed); + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + cachetable_test(); + return 0; +} diff --git a/newbrt/tests/cachetable-clone-partial-fetch-pinned-node.c b/newbrt/tests/cachetable-clone-partial-fetch-pinned-node.c new file mode 100644 index 00000000000..5f859894bc1 --- /dev/null +++ b/newbrt/tests/cachetable-clone-partial-fetch-pinned-node.c @@ -0,0 +1,113 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + + +BOOL flush_completed; +BOOL pf_called; + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ + if (is_clone) { + usleep(2*1024*1024); + flush_completed = TRUE; + } +} + +static int true_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { + assert(flush_completed); + pf_called = TRUE; + *sizep = make_pair_attr(9); + return 0; +} + + +// this test verifies that a partial fetch will wait for a cloned pair to complete +// writing to disk +static void +cachetable_test (void) { + const int test_limit = 12; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = clone_callback; + wc.flush_callback = flush; + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); + assert_zero(r); + + flush_completed = FALSE; + r = toku_cachetable_begin_checkpoint(ct, NULL); assert_zero(r); + assert_zero(r); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert_zero(r); + + pf_called = FALSE; + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + assert(!pf_called); + toku_cachetable_pf_pinned_pair(v1, true_pf_callback, NULL, f1, make_blocknum(1), 1); + assert(pf_called); + + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert_zero(r); + assert(pf_called); + + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); + + +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + cachetable_test(); + return 0; +} diff --git a/newbrt/tests/cachetable-clone-partial-fetch.c b/newbrt/tests/cachetable-clone-partial-fetch.c new file mode 100644 index 00000000000..9877ddf9f35 --- /dev/null +++ b/newbrt/tests/cachetable-clone-partial-fetch.c @@ -0,0 +1,113 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + + +BOOL flush_completed; +BOOL pf_called; + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ + if (is_clone) { + usleep(2*1024*1024); + flush_completed = TRUE; + } +} + +static BOOL true_pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraargs)) { + return TRUE; +} + +static int true_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { + assert(flush_completed); + pf_called = TRUE; + *sizep = make_pair_attr(9); + return 0; +} + + +// this test verifies that a partial fetch will wait for a cloned pair to complete +// writing to disk +static void +cachetable_test (void) { + const int test_limit = 12; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = clone_callback; + wc.flush_callback = flush; + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); + assert_zero(r); + + flush_completed = FALSE; + r = toku_cachetable_begin_checkpoint(ct, NULL); assert_zero(r); + assert_zero(r); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert_zero(r); + + pf_called = FALSE; + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, true_pf_req_callback, true_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert_zero(r); + assert(pf_called); + + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); + + +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + cachetable_test(); + return 0; +} diff --git a/newbrt/tests/cachetable-clone-pin-nonblocking.c b/newbrt/tests/cachetable-clone-pin-nonblocking.c new file mode 100644 index 00000000000..d1f46b13546 --- /dev/null +++ b/newbrt/tests/cachetable-clone-pin-nonblocking.c @@ -0,0 +1,96 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ +} + + +// this test verifies that a partial fetch will wait for a cloned pair to complete +// writing to disk +static void +cachetable_test (enum cachetable_dirty dirty, BOOL cloneable) { + const int test_limit = 12; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = cloneable ? clone_callback : NULL; + wc.flush_callback = flush; + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, dirty, make_pair_attr(8)); + + // test that having a pin that passes FALSE for may_modify_value does not stall behind checkpoint + r = toku_cachetable_begin_checkpoint(ct, NULL); assert_zero(r); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, FALSE, NULL, NULL); + assert(r == 0); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + assert(r == 0); + + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); + if (dirty == CACHETABLE_DIRTY && !cloneable) { + assert(r == TOKUDB_TRY_AGAIN); + } + else { + assert(r == 0); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + } + + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); + + +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + cachetable_test(CACHETABLE_DIRTY, TRUE); + cachetable_test(CACHETABLE_DIRTY, FALSE); + cachetable_test(CACHETABLE_CLEAN, TRUE); + cachetable_test(CACHETABLE_CLEAN, FALSE); + return 0; +} diff --git a/newbrt/tests/cachetable-clone-unpin-remove.c b/newbrt/tests/cachetable-clone-unpin-remove.c new file mode 100644 index 00000000000..781489a9d36 --- /dev/null +++ b/newbrt/tests/cachetable-clone-unpin-remove.c @@ -0,0 +1,102 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + + +BOOL flush_completed; +BOOL evict_called; + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ + if (is_clone) { + usleep(2*1024*1024); + flush_completed = TRUE; + } + else if (!keep && !is_clone) { + assert(flush_completed); + evict_called = TRUE; + } +} + + + +// this test verifies that a partial fetch will wait for a cloned pair to complete +// writing to disk +static void +cachetable_test (void) { + const int test_limit = 12; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = clone_callback; + wc.flush_callback = flush; + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), CACHETABLE_DIRTY, make_pair_attr(8)); + assert_zero(r); + + flush_completed = FALSE; + evict_called = FALSE; + r = toku_cachetable_begin_checkpoint(ct, NULL); assert_zero(r); + assert_zero(r); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + assert_zero(r); + r = toku_cachetable_unpin_and_remove(f1, make_blocknum(1), NULL, NULL); + assert_zero(r); + + + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); + + +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + cachetable_test(); + return 0; +} diff --git a/newbrt/tests/cachetable-eviction-close-test.c b/newbrt/tests/cachetable-eviction-close-test.c index a59c2c967b5..057d5db2488 100644 --- a/newbrt/tests/cachetable-eviction-close-test.c +++ b/newbrt/tests/cachetable-eviction-close-test.c @@ -15,12 +15,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(expect_full_flush); sleep(2); @@ -34,6 +36,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) @@ -50,7 +53,8 @@ fetch (CACHEFILE f __attribute__((__unused__)), static void pe_est_callback( - void* UU(brtnode_pv), + void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -95,6 +99,7 @@ static void cachetable_eviction_full_test (void) { fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -115,6 +120,7 @@ static void cachetable_eviction_full_test (void) { fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); diff --git a/newbrt/tests/cachetable-eviction-close-test2.c b/newbrt/tests/cachetable-eviction-close-test2.c index 4861dc5dd25..d69835a514c 100644 --- a/newbrt/tests/cachetable-eviction-close-test2.c +++ b/newbrt/tests/cachetable-eviction-close-test2.c @@ -15,12 +15,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(expect_full_flush); } @@ -33,6 +35,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) @@ -49,7 +52,8 @@ fetch (CACHEFILE f __attribute__((__unused__)), static void pe_est_callback( - void* UU(brtnode_pv), + void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -108,6 +112,7 @@ static void cachetable_eviction_full_test (void) { fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -129,6 +134,7 @@ static void cachetable_eviction_full_test (void) { fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); diff --git a/newbrt/tests/cachetable-eviction-getandpin-test.c b/newbrt/tests/cachetable-eviction-getandpin-test.c index d7108b54e02..443b1fa97a4 100644 --- a/newbrt/tests/cachetable-eviction-getandpin-test.c +++ b/newbrt/tests/cachetable-eviction-getandpin-test.c @@ -13,12 +13,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { if (do_sleep) { sleep(2); @@ -59,6 +61,7 @@ static void cachetable_predef_fetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -83,6 +86,7 @@ static void cachetable_predef_fetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -95,9 +99,9 @@ static void cachetable_predef_fetch_maybegetandpin_test (void) { // now verify that the block we are trying to evict may be pinned wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin_nonblocking(f1, key, fullhash, &v, &size, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, key, fullhash, &v, &size, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(r == TOKUDB_TRY_AGAIN); - r = toku_cachetable_get_and_pin(f1, key, fullhash, &v, &size, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, key, fullhash, &v, &size, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); assert(r == 0 && v == 0 && size == 8); do_sleep = FALSE; diff --git a/newbrt/tests/cachetable-eviction-getandpin-test2.c b/newbrt/tests/cachetable-eviction-getandpin-test2.c index eba1b77a862..7b92e11c724 100644 --- a/newbrt/tests/cachetable-eviction-getandpin-test2.c +++ b/newbrt/tests/cachetable-eviction-getandpin-test2.c @@ -10,6 +10,7 @@ static void pe_est_callback( void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -67,6 +68,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -89,6 +91,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); @@ -109,6 +112,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, NULL ); @@ -123,6 +127,7 @@ static void cachetable_prefetch_maybegetandpin_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL ); assert(r == 0 && v == 0 && size == 1); diff --git a/newbrt/tests/cachetable-flush-during-cleaner.c b/newbrt/tests/cachetable-flush-during-cleaner.c index c533783d1f6..dc0cba3fe40 100644 --- a/newbrt/tests/cachetable-flush-during-cleaner.c +++ b/newbrt/tests/cachetable-flush-during-cleaner.c @@ -39,7 +39,7 @@ cachetable_test (void) { for (int i = 0; i < 10; i++) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.cleaner_callback = cleaner_callback; - r = toku_cachetable_get_and_pin(f1, make_blocknum(i), i, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(i), i, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(i), i, CACHETABLE_DIRTY, make_pair_attr(8)); } r = toku_cachefile_flush(f1); diff --git a/newbrt/tests/cachetable-getandpin-test.c b/newbrt/tests/cachetable-getandpin-test.c index a4730e30dad..c874b921b45 100644 --- a/newbrt/tests/cachetable-getandpin-test.c +++ b/newbrt/tests/cachetable-getandpin-test.c @@ -7,12 +7,14 @@ flush (CACHEFILE cf __attribute__((__unused__)), int UU(fd), CACHEKEY key __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *extraargs __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me __attribute__((__unused__)), - BOOL for_checkpoint __attribute__((__unused__)) + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert((long) key.b == size.size); if (!keep_me) toku_free(v); @@ -25,6 +27,7 @@ fetch ( CACHEKEY key, u_int32_t hash, void **vptr, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extra @@ -57,7 +60,7 @@ cachetable_getandpin_test (int n) { void *v; long size; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(i), hi, &v, &size, wc, fetch, def_pf_req_callback, def_pf_callback, 0); + r = toku_cachetable_get_and_pin(f1, make_blocknum(i), hi, &v, &size, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); assert(size == i); diff --git a/newbrt/tests/cachetable-kibbutz_and_flush_cachefile.c b/newbrt/tests/cachetable-kibbutz_and_flush_cachefile.c index 7c89792e6ca..0206bf472ea 100644 --- a/newbrt/tests/cachetable-kibbutz_and_flush_cachefile.c +++ b/newbrt/tests/cachetable-kibbutz_and_flush_cachefile.c @@ -35,14 +35,14 @@ run_test (void) { long s1; //long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); foo = FALSE; cachefile_kibbutz_enq(f1, kibbutz_work, f1); r = toku_cachefile_flush(f1); assert(r == 0); assert(foo); assert(f1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); foo = FALSE; cachefile_kibbutz_enq(f1, kibbutz_work, f1); r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); diff --git a/newbrt/tests/cachetable-partial-fetch.c b/newbrt/tests/cachetable-partial-fetch.c index 5390e52dd92..615adc39d89 100644 --- a/newbrt/tests/cachetable-partial-fetch.c +++ b/newbrt/tests/cachetable-partial-fetch.c @@ -17,6 +17,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -33,6 +34,7 @@ err_fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -50,17 +52,17 @@ static BOOL true_pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraargs)) return TRUE; } -static int err_pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { +static int err_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { assert(FALSE); return 0; // gcov } -static int pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { +static int pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { assert(FALSE); return 0; // gcov } -static int true_pf_callback(void* UU(brtnode_pv), void* read_extraargs, int UU(fd), PAIR_ATTR* sizep) { +static int true_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* read_extraargs, int UU(fd), PAIR_ATTR* sizep) { pf_req_called = TRUE; *sizep = make_pair_attr(sizeof(fetch_val)+1); assert(read_extraargs == &fetch_val); @@ -85,7 +87,7 @@ cachetable_test (void) { long s1; //long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, pf_req_callback, pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, pf_req_callback, pf_callback, TRUE, NULL); assert(&fetch_val == v1); // // verify that a prefetch of this node will fail @@ -108,14 +110,14 @@ cachetable_test (void) { // // now get and pin node again, and make sure that partial fetch and fetch are not called // - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); // // now make sure that if we say a partial fetch is required, that we get a partial fetch // and that read_extraargs properly passed down // pf_req_called = FALSE; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, true_pf_req_callback, true_pf_callback, &fetch_val); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, true_pf_req_callback, true_pf_callback, TRUE, &fetch_val); assert(pf_req_called); assert(s1 == sizeof(fetch_val)+1); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); @@ -143,7 +145,7 @@ cachetable_test (void) { // // now verify we can pin it, and NO fetch callback should get called // - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, TRUE, NULL); assert(&fetch_val == v1); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); @@ -162,7 +164,7 @@ cachetable_test (void) { &doing_prefetch ); assert(doing_prefetch); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, err_fetch, pf_req_callback, err_pf_callback, TRUE, NULL); assert(&fetch_val == v1); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/newbrt/tests/cachetable-pin-checkpoint.c b/newbrt/tests/cachetable-pin-checkpoint.c index 586a9966060..02224452dfa 100644 --- a/newbrt/tests/cachetable-pin-checkpoint.c +++ b/newbrt/tests/cachetable-pin-checkpoint.c @@ -22,18 +22,35 @@ int64_t checkpointed_data[NUM_ELEMENTS]; u_int32_t time_of_test; BOOL run_test; +static void +clone_callback( + void* value_data, + void** cloned_value_data, + PAIR_ATTR* new_attr, + BOOL UU(for_checkpoint), + void* UU(write_extraargs) + ) +{ + new_attr->is_valid = FALSE; + int64_t* data_val = toku_xmalloc(sizeof(int64_t)); + *data_val = *(int64_t *)value_data; + *cloned_value_data = data_val; +} + static void flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me, BOOL keep_me, - BOOL checkpoint_me + BOOL checkpoint_me, + BOOL UU(is_clone) ) { /* Do nothing */ int64_t val_to_write = *(int64_t *)v; @@ -55,6 +72,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k, u_int32_t fullhash __attribute__((__unused__)), void **value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -120,6 +138,7 @@ static void *move_numbers(void *arg) { enum cachetable_dirty less_dirty = CACHETABLE_DIRTY; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r = toku_cachetable_get_and_pin_with_dep_pairs( f1, less_key, @@ -127,6 +146,7 @@ static void *move_numbers(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 0, //num_dependent_pairs NULL, @@ -148,6 +168,7 @@ static void *move_numbers(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 1, //num_dependent_pairs &f1, @@ -181,6 +202,7 @@ static void *move_numbers(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 1, //num_dependent_pairs &f1, @@ -210,6 +232,7 @@ static void *read_random_numbers(void *arg) { int r1; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r1 = toku_cachetable_get_and_pin_nonblocking( f1, make_blocknum(rand_key1), @@ -217,6 +240,7 @@ static void *read_random_numbers(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + FALSE, NULL, NULL ); @@ -259,6 +283,7 @@ static void *checkpoints(void *arg) { sum += checkpointed_data[i]; } assert (sum==0); + usleep(10*1024); num_checkpoints++; } return arg; diff --git a/newbrt/tests/cachetable-pin-nonblocking-checkpoint-clean.c b/newbrt/tests/cachetable-pin-nonblocking-checkpoint-clean.c index c288775575c..dd071abe8da 100644 --- a/newbrt/tests/cachetable-pin-nonblocking-checkpoint-clean.c +++ b/newbrt/tests/cachetable-pin-nonblocking-checkpoint-clean.c @@ -24,15 +24,15 @@ run_test (void) { long s1; long s2; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); for (int i = 0; i < 20; i++) { - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(2), 2, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); } - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v2, &s2, def_write_callback(NULL), def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_begin_checkpoint(ct, NULL); // mark nodes as pending a checkpoint, so that get_and_pin_nonblocking on block 1 will return TOKUDB_TRY_AGAIN r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); @@ -47,6 +47,7 @@ run_test (void) { def_fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, NULL ); diff --git a/newbrt/tests/cachetable-prefetch-checkpoint-test.c b/newbrt/tests/cachetable-prefetch-checkpoint-test.c index f721e0a6f5f..e4f9084cfe4 100644 --- a/newbrt/tests/cachetable-prefetch-checkpoint-test.c +++ b/newbrt/tests/cachetable-prefetch-checkpoint-test.c @@ -20,12 +20,14 @@ static void flush( int UU(fd), CACHEKEY key, void *value, + void** UU(dd), void *extraargs, PAIR_ATTR size, PAIR_ATTR* UU(new_size), BOOL write_me, BOOL keep_me, - BOOL UU(for_checkpoint) + BOOL UU(for_checkpoint), + BOOL UU(is_clone) ) { cf = cf; key = key; value = value; extraargs = extraargs; @@ -42,6 +44,7 @@ static int fetch( CACHEKEY key, u_int32_t fullhash, void **value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extraargs diff --git a/newbrt/tests/cachetable-prefetch-close-leak-test.c b/newbrt/tests/cachetable-prefetch-close-leak-test.c index 6384b5d487c..488f9f91dda 100644 --- a/newbrt/tests/cachetable-prefetch-close-leak-test.c +++ b/newbrt/tests/cachetable-prefetch-close-leak-test.c @@ -11,12 +11,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(w == FALSE && v != NULL); toku_free(v); @@ -30,6 +32,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) diff --git a/newbrt/tests/cachetable-prefetch-close-test.c b/newbrt/tests/cachetable-prefetch-close-test.c index 353a376d6c8..f3b1b5f6e2c 100644 --- a/newbrt/tests/cachetable-prefetch-close-test.c +++ b/newbrt/tests/cachetable-prefetch-close-test.c @@ -13,12 +13,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(w == FALSE); } @@ -31,6 +33,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) @@ -80,6 +83,7 @@ static void cachetable_prefetch_full_test (BOOL partial_fetch) { fetch, def_pf_req_callback, def_pf_callback, + TRUE, 0 ); assert(r==0); diff --git a/newbrt/tests/cachetable-prefetch-flowcontrol-test.c b/newbrt/tests/cachetable-prefetch-flowcontrol-test.c index 78c9a956c1c..c95318b0fa7 100644 --- a/newbrt/tests/cachetable-prefetch-flowcontrol-test.c +++ b/newbrt/tests/cachetable-prefetch-flowcontrol-test.c @@ -17,12 +17,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k, void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w, BOOL keep, - BOOL f_ckpt __attribute__((__unused__)) + BOOL f_ckpt __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(w == FALSE); flush_calls++; @@ -41,6 +43,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k, u_int32_t fullhash __attribute__((__unused__)), void **value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extraargs __attribute__((__unused__)) diff --git a/newbrt/tests/cachetable-prefetch-getandpin-test.c b/newbrt/tests/cachetable-prefetch-getandpin-test.c index f7cef9c7781..0f6fee988ac 100644 --- a/newbrt/tests/cachetable-prefetch-getandpin-test.c +++ b/newbrt/tests/cachetable-prefetch-getandpin-test.c @@ -14,12 +14,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { assert(w == FALSE); } @@ -30,6 +32,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) @@ -55,7 +58,7 @@ static BOOL pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraargs)) { } } -static int pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { +static int pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { assert(expect_pf); sleep(2); *sizep = make_pair_attr(2); @@ -97,6 +100,7 @@ static void cachetable_prefetch_maybegetandpin_test (BOOL do_partial_fetch) { fetch, pf_req_callback, pf_callback, + TRUE, 0 ); assert(r==0); @@ -115,9 +119,9 @@ static void cachetable_prefetch_maybegetandpin_test (BOOL do_partial_fetch) { void *v = 0; long size = 0; do_pf = FALSE; - r = toku_cachetable_get_and_pin_nonblocking(f1, key, fullhash, &v, &size, wc, fetch, pf_req_callback, pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, key, fullhash, &v, &size, wc, fetch, pf_req_callback, pf_callback, TRUE, NULL, NULL); assert(r==TOKUDB_TRY_AGAIN); - r = toku_cachetable_get_and_pin(f1, key, fullhash, &v, &size, wc, fetch, pf_req_callback, pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, key, fullhash, &v, &size, wc, fetch, pf_req_callback, pf_callback, TRUE, NULL); assert(r == 0 && v == 0 && size == 2); struct timeval tend; diff --git a/newbrt/tests/cachetable-prefetch-maybegetandpin-test.c b/newbrt/tests/cachetable-prefetch-maybegetandpin-test.c index f838880382d..14c256ece64 100644 --- a/newbrt/tests/cachetable-prefetch-maybegetandpin-test.c +++ b/newbrt/tests/cachetable-prefetch-maybegetandpin-test.c @@ -12,6 +12,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) diff --git a/newbrt/tests/cachetable-prefetch2-test.c b/newbrt/tests/cachetable-prefetch2-test.c index 636e327a6cc..f46539f73dd 100644 --- a/newbrt/tests/cachetable-prefetch2-test.c +++ b/newbrt/tests/cachetable-prefetch2-test.c @@ -15,6 +15,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) diff --git a/newbrt/tests/cachetable-put-checkpoint.c b/newbrt/tests/cachetable-put-checkpoint.c index 223bbe42a4b..9430f24fa3e 100644 --- a/newbrt/tests/cachetable-put-checkpoint.c +++ b/newbrt/tests/cachetable-put-checkpoint.c @@ -25,23 +25,41 @@ int64_t checkpointed_data[NUM_ELEMENTS]; u_int32_t time_of_test; BOOL run_test; +static void +clone_callback( + void* value_data, + void** cloned_value_data, + PAIR_ATTR* new_attr, + BOOL UU(for_checkpoint), + void* UU(write_extraargs) + ) +{ + new_attr->is_valid = FALSE; + int64_t* data_val = toku_xmalloc(sizeof(int64_t)); + *data_val = *(int64_t *)value_data; + *cloned_value_data = data_val; + *new_attr = make_pair_attr(8); +} static void flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), - PAIR_ATTR* new_size __attribute__((__unused__)), + PAIR_ATTR* new_size, BOOL write_me, BOOL keep_me, - BOOL checkpoint_me + BOOL checkpoint_me, + BOOL UU(is_clone) ) { int64_t val_to_write = *(int64_t *)v; size_t data_index = (size_t)k.b; if (write_me) { usleep(10); + *new_size = make_pair_attr(8); data[data_index] = val_to_write; if (checkpoint_me) checkpointed_data[data_index] = val_to_write; } @@ -56,6 +74,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k, u_int32_t fullhash __attribute__((__unused__)), void **value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -113,6 +132,7 @@ static void move_number_to_child( u_int32_t child_fullhash = toku_cachetable_hash(f1, child_key); CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r = toku_cachetable_get_and_pin_with_dep_pairs( f1, child_key, @@ -120,6 +140,7 @@ static void move_number_to_child( &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 1, //num_dependent_pairs &f1, @@ -158,6 +179,7 @@ static void *move_numbers(void *arg) { u_int32_t parent_fullhash = toku_cachetable_hash(f1, parent_key); CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r = toku_cachetable_get_and_pin_with_dep_pairs( f1, parent_key, @@ -165,6 +187,7 @@ static void *move_numbers(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 0, //num_dependent_pairs NULL, @@ -222,6 +245,7 @@ static void merge_and_split_child( enum cachetable_dirty child_dirty = CACHETABLE_CLEAN; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r = toku_cachetable_get_and_pin_with_dep_pairs( f1, child_key, @@ -229,6 +253,7 @@ static void merge_and_split_child( &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 1, //num_dependent_pairs &f1, @@ -262,6 +287,7 @@ static void merge_and_split_child( &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 2, //num_dependent_pairs cfs, @@ -330,6 +356,7 @@ static void *merge_and_split(void *arg) { u_int32_t parent_fullhash = toku_cachetable_hash(f1, parent_key); CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; + wc.clone_callback = clone_callback; r = toku_cachetable_get_and_pin_with_dep_pairs( f1, parent_key, @@ -337,6 +364,7 @@ static void *merge_and_split(void *arg) { &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, NULL, 0, //num_dependent_pairs NULL, diff --git a/newbrt/tests/cachetable-rename-test.c b/newbrt/tests/cachetable-rename-test.c index e33cc7352e7..c71f7093c28 100644 --- a/newbrt/tests/cachetable-rename-test.c +++ b/newbrt/tests/cachetable-rename-test.c @@ -40,12 +40,15 @@ static void r_flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k, void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me, - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { int i; //printf("Flush\n"); if (keep_me) return; @@ -74,6 +77,7 @@ static int r_fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY key __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void**value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void*extraargs __attribute__((__unused__))) { @@ -131,7 +135,7 @@ static void test_rename (void) { if (verbose) printf("Rename %" PRIx64 " to %" PRIx64 "\n", okey.b, nkey.b); CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = r_flush; - r = toku_cachetable_get_and_pin(f, okey, toku_cachetable_hash(f, okey), ¤t_value, ¤t_size, wc, r_fetch, def_pf_req_callback, def_pf_callback, 0); + r = toku_cachetable_get_and_pin(f, okey, toku_cachetable_hash(f, okey), ¤t_value, ¤t_size, wc, r_fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); if (r == -42) continue; assert(r==0); r = toku_cachetable_rename(f, okey, nkey); diff --git a/newbrt/tests/cachetable-scan.c b/newbrt/tests/cachetable-scan.c index 45aa653bb68..801f74299f2 100644 --- a/newbrt/tests/cachetable-scan.c +++ b/newbrt/tests/cachetable-scan.c @@ -13,12 +13,15 @@ static void f_flush (CACHEFILE f, int UU(fd), CACHEKEY key, void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size, PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me, BOOL keep_me, - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { assert(size.size==BLOCKSIZE); if (write_me) { toku_os_full_pwrite(toku_cachefile_get_and_pin_fd(f), value, BLOCKSIZE, key.b); @@ -34,6 +37,7 @@ static int f_fetch (CACHEFILE f, CACHEKEY key, u_int32_t fullhash __attribute__((__unused__)), void**value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void*extraargs __attribute__((__unused__))) { @@ -93,7 +97,7 @@ static void readit (void) { u_int32_t fullhash = toku_cachetable_hash(f, key); CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = f_flush; - r=toku_cachetable_get_and_pin(f, key, fullhash, &block, ¤t_size, wc, f_fetch, def_pf_req_callback, def_pf_callback, 0); assert(r==0); + r=toku_cachetable_get_and_pin(f, key, fullhash, &block, ¤t_size, wc, f_fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r==0); r=toku_cachetable_unpin(f, key, fullhash, CACHETABLE_CLEAN, make_pair_attr(BLOCKSIZE)); assert(r==0); } r = toku_cachefile_close(&f, 0, FALSE, ZERO_LSN); assert(r == 0); diff --git a/newbrt/tests/cachetable-simple-clone.c b/newbrt/tests/cachetable-simple-clone.c new file mode 100644 index 00000000000..ded7f11d6a8 --- /dev/null +++ b/newbrt/tests/cachetable-simple-clone.c @@ -0,0 +1,153 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + +BOOL clone_called; +BOOL check_flush; +BOOL flush_expected; +BOOL flush_called; + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; + clone_called = TRUE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ + if (w) usleep(5*1024*1024); + if (w && check_flush) { + assert(flush_expected); + if (clone_called) assert(is_clone); + } + flush_called = TRUE; + if (is_clone) assert(!keep); +} + +static uint64_t tdelta_usec(struct timeval *tend, struct timeval *tstart) { + uint64_t t = tend->tv_sec * 1000000 + tend->tv_usec; + t -= tstart->tv_sec * 1000000 + tstart->tv_usec; + return t; +} + + +// +// test the following things for simple cloning: +// - if the pending pair is clean, nothing gets written +// - if the pending pair is dirty and cloneable, then pair is written +// in background and get_and_pin returns immedietely +// - if the pending pair is dirty and not cloneable, then get_and_pin +// blocks until the pair is written out +// +static void +test_clean (enum cachetable_dirty dirty, BOOL cloneable) { + const int test_limit = 12; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = cloneable ? clone_callback : NULL; + wc.flush_callback = flush; + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, dirty, make_pair_attr(8)); + + check_flush = TRUE; + clone_called = FALSE; + flush_expected = (dirty == CACHETABLE_DIRTY) ? TRUE : FALSE; + flush_called = FALSE; + // begin checkpoint, since pair is clean, we should not + // have the clone called + r = toku_cachetable_begin_checkpoint(ct, NULL); + assert_zero(r); + struct timeval tstart; + struct timeval tend; + gettimeofday(&tstart, NULL); + + // test that having a pin that passes FALSE for may_modify_value does not stall behind checkpoint + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, FALSE, NULL); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + gettimeofday(&tend, NULL); + assert(tdelta_usec(&tend, &tstart) <= 2000000); + assert(!clone_called); + + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + gettimeofday(&tend, NULL); + + // we take 5 seconds for a write + // we check if time to pin is less than 2 seconds, if it is + // then we know act of cloning worked properly + if (cloneable || !dirty ) { + assert(tdelta_usec(&tend, &tstart) <= 2000000); + } + else { + assert(tdelta_usec(&tend, &tstart) >= 2000000); + } + + + if (dirty == CACHETABLE_DIRTY && cloneable) { + assert(clone_called); + } + else { + assert(!clone_called); + } + + // at this point, there should be no more dirty writes + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); + gettimeofday(&tend, NULL); + if (cloneable || !dirty ) { + assert(tdelta_usec(&tend, &tstart) <= 2000000); + } + else { + assert(tdelta_usec(&tend, &tstart) >= 2000000); + } + + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + check_flush = FALSE; + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + test_clean(CACHETABLE_CLEAN, TRUE); + test_clean(CACHETABLE_DIRTY, TRUE); + test_clean(CACHETABLE_CLEAN, FALSE); + test_clean(CACHETABLE_DIRTY, FALSE); + return 0; +} diff --git a/newbrt/tests/cachetable-simple-clone2.c b/newbrt/tests/cachetable-simple-clone2.c new file mode 100644 index 00000000000..7fd42429f6c --- /dev/null +++ b/newbrt/tests/cachetable-simple-clone2.c @@ -0,0 +1,103 @@ +#ident "$Id: cachetable-simple-verify.c 39504 2012-02-03 16:19:33Z zardosht $" +#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved." +#include "includes.h" +#include "test.h" + +BOOL clone_called; +BOOL check_flush; +BOOL flush_expected; +BOOL flush_called; + +static void +clone_callback(void* UU(value_data), void** cloned_value_data, PAIR_ATTR* new_attr, BOOL UU(for_checkpoint), void* UU(write_extraargs)) +{ + *cloned_value_data = (void *)1; + new_attr->is_valid = FALSE; + clone_called = TRUE; +} + +static void +flush ( + CACHEFILE f __attribute__((__unused__)), + int UU(fd), + CACHEKEY k __attribute__((__unused__)), + void *v __attribute__((__unused__)), + void** UU(dd), + void *e __attribute__((__unused__)), + PAIR_ATTR s __attribute__((__unused__)), + PAIR_ATTR* new_size __attribute__((__unused__)), + BOOL w __attribute__((__unused__)), + BOOL keep __attribute__((__unused__)), + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) + ) +{ + if (w && check_flush) { + assert(flush_expected); + flush_called = TRUE; + } +} + +// +// test the following things for simple cloning: +// - verifies that after teh checkpoint ends, the PAIR is properly +// dirty or clean based on the second unpin +// +static void +test_clean (enum cachetable_dirty dirty, BOOL cloneable) { + const int test_limit = 200; + int r; + CACHETABLE ct; + r = toku_create_cachetable(&ct, test_limit, ZERO_LSN, NULL_LOGGER); assert(r == 0); + char fname1[] = __FILE__ "test1.dat"; + unlink(fname1); + CACHEFILE f1; + r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0); + check_flush = FALSE; + + void* v1; + long s1; + CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); + wc.clone_callback = cloneable ? clone_callback : NULL; + wc.flush_callback = flush; + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); + + // begin checkpoint, since pair is clean, we should not + // have the clone called + r = toku_cachetable_begin_checkpoint(ct, NULL); + assert_zero(r); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); + + // at this point, there should be no more dirty writes + r = toku_cachetable_unpin(f1, make_blocknum(1), 1, dirty, make_pair_attr(8)); + usleep(2*1024*1024); + r = toku_cachetable_end_checkpoint( + ct, + NULL, + fake_ydb_lock, + fake_ydb_unlock, + NULL, + NULL + ); + assert_zero(r); + + check_flush = TRUE; + flush_expected = (dirty == CACHETABLE_DIRTY) ? TRUE : FALSE; + flush_called = FALSE; + + toku_cachetable_verify(ct); + r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); + r = toku_cachetable_close(&ct); lazy_assert_zero(r); + if (flush_expected) assert(flush_called); +} + +int +test_main(int argc, const char *argv[]) { + default_parse_args(argc, argv); + test_clean(CACHETABLE_CLEAN, TRUE); + test_clean(CACHETABLE_DIRTY, TRUE); + test_clean(CACHETABLE_CLEAN, FALSE); + test_clean(CACHETABLE_DIRTY, FALSE); + return 0; +} diff --git a/newbrt/tests/cachetable-simple-maybe-get-pin.c b/newbrt/tests/cachetable-simple-maybe-get-pin.c index e4c80b0d83d..66807bb5b8d 100644 --- a/newbrt/tests/cachetable-simple-maybe-get-pin.c +++ b/newbrt/tests/cachetable-simple-maybe-get-pin.c @@ -24,7 +24,7 @@ cachetable_test (void) { // nothing in cachetable, so this should fail r = toku_cachetable_maybe_get_and_pin(f1, make_blocknum(1), 1, &v1); assert(r==-1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); // maybe_get_and_pin_clean should succeed, maybe_get_and_pin should fail diff --git a/newbrt/tests/cachetable-simple-pin-dep-nodes.c b/newbrt/tests/cachetable-simple-pin-dep-nodes.c index 1f8043cd720..f877bf82276 100644 --- a/newbrt/tests/cachetable-simple-pin-dep-nodes.c +++ b/newbrt/tests/cachetable-simple-pin-dep-nodes.c @@ -18,12 +18,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -50,6 +52,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -80,9 +83,9 @@ cachetable_test (BOOL write_first, BOOL write_second, BOOL start_checkpoint) { CACHETABLE_WRITE_CALLBACK wc = def_write_callback(&val1); wc.flush_callback = flush; wc.write_extraargs = &val1; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, &val1); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, &val1); wc.write_extraargs = &val2; - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, &val2); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, &val2); CACHEFILE dependent_cfs[2]; dependent_cfs[0] = f1; @@ -117,6 +120,7 @@ cachetable_test (BOOL write_first, BOOL write_second, BOOL start_checkpoint) { &v3, &s3, wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, &val3, 2, //num_dependent_pairs dependent_cfs, diff --git a/newbrt/tests/cachetable-simple-pin-nonblocking.c b/newbrt/tests/cachetable-simple-pin-nonblocking.c index 272ab9b77fb..3d71a1f50b9 100644 --- a/newbrt/tests/cachetable-simple-pin-nonblocking.c +++ b/newbrt/tests/cachetable-simple-pin-nonblocking.c @@ -15,12 +15,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -36,7 +38,7 @@ flush (CACHEFILE f __attribute__((__unused__)), static BOOL true_def_pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraargs)) { return TRUE; } -static int true_def_pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { +static int true_def_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* sizep) { *sizep = make_pair_attr(8); return 0; } @@ -85,33 +87,33 @@ run_test (void) { // because the PAIR was not in the cachetable. // is_fake_locked = TRUE; - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(r==TOKUDB_TRY_AGAIN); assert(is_fake_locked); // now it should succeed - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(r==0); assert(is_fake_locked); foo = FALSE; cachefile_kibbutz_enq(f1, kibbutz_work, f1); // because node is in use, should return TOKUDB_TRY_AGAIN assert(is_fake_locked); - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(is_fake_locked); assert(r==TOKUDB_TRY_AGAIN); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); assert(foo); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); // now make sure we get TOKUDB_TRY_AGAIN when a partial fetch is involved assert(is_fake_locked); // first make sure value is there - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(is_fake_locked); assert(r==0); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); assert(r==0); // now make sure that we get TOKUDB_TRY_AGAIN for the partial fetch - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, true_def_pf_req_callback, true_def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, true_def_pf_req_callback, true_def_pf_callback, TRUE, NULL, NULL); assert(is_fake_locked); assert(r==TOKUDB_TRY_AGAIN); @@ -119,13 +121,13 @@ run_test (void) { // now test that if there is a checkpoint pending, // first pin and unpin with dirty // - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(is_fake_locked); assert(r==0); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); assert(r==0); // this should mark the PAIR as pending r = toku_cachetable_begin_checkpoint(ct, NULL); assert(r == 0); - r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL, NULL); + r = toku_cachetable_get_and_pin_nonblocking(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL, NULL); assert(is_fake_locked); assert(r==TOKUDB_TRY_AGAIN); my_ydb_unlock(); diff --git a/newbrt/tests/cachetable-simple-pin.c b/newbrt/tests/cachetable-simple-pin.c index ce810030675..301038f59d0 100644 --- a/newbrt/tests/cachetable-simple-pin.c +++ b/newbrt/tests/cachetable-simple-pin.c @@ -16,12 +16,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -61,16 +63,16 @@ run_test (void) { //long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); foo = FALSE; cachefile_kibbutz_enq(f1, kibbutz_work, f1); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); assert(foo); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); //now let's do a simple checkpoint test // first dirty the PAIR - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); // now this should mark the pair for checkpoint @@ -81,7 +83,7 @@ run_test (void) { // check_me = TRUE; flush_called = FALSE; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); assert(flush_called); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/newbrt/tests/cachetable-simple-put-dep-nodes.c b/newbrt/tests/cachetable-simple-put-dep-nodes.c index 7a24d1bf851..ec06094cdef 100644 --- a/newbrt/tests/cachetable-simple-put-dep-nodes.c +++ b/newbrt/tests/cachetable-simple-put-dep-nodes.c @@ -18,12 +18,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -50,6 +52,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -84,9 +87,9 @@ cachetable_test (BOOL write_first, BOOL write_second, BOOL start_checkpoint) { long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); wc.flush_callback = flush; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, &val1); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, &val1); assert(r==0); - r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, &val2); + r = toku_cachetable_get_and_pin(f1, make_blocknum(2), 2, &v2, &s2, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, &val2); assert(r==0); CACHEFILE dependent_cfs[2]; diff --git a/newbrt/tests/cachetable-simple-unpin-remove-checkpoint.c b/newbrt/tests/cachetable-simple-unpin-remove-checkpoint.c index dd64d80b5a4..c0c12ecdec7 100644 --- a/newbrt/tests/cachetable-simple-unpin-remove-checkpoint.c +++ b/newbrt/tests/cachetable-simple-unpin-remove-checkpoint.c @@ -37,7 +37,7 @@ cachetable_test (void) { long s1; //long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_begin_checkpoint(ct, NULL); assert(r == 0); r = toku_cachetable_unpin_and_remove(f1, make_blocknum(1), remove_key_expect_checkpoint, NULL); r = toku_cachetable_end_checkpoint( @@ -50,7 +50,7 @@ cachetable_test (void) { ); assert(r==0); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin_and_remove(f1, make_blocknum(1), remove_key_expect_no_checkpoint, NULL); diff --git a/newbrt/tests/cachetable-simple-verify.c b/newbrt/tests/cachetable-simple-verify.c index d0d806b57e9..a731743f2aa 100644 --- a/newbrt/tests/cachetable-simple-verify.c +++ b/newbrt/tests/cachetable-simple-verify.c @@ -19,7 +19,7 @@ cachetable_test (void) { long s1; //long s2; CACHETABLE_WRITE_CALLBACK wc = def_write_callback(NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), 1, &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_DIRTY, make_pair_attr(8)); toku_cachetable_verify(ct); r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0); diff --git a/newbrt/tests/cachetable-test.c b/newbrt/tests/cachetable-test.c index 95bc5db7734..2d4a62f7a5b 100644 --- a/newbrt/tests/cachetable-test.c +++ b/newbrt/tests/cachetable-test.c @@ -91,12 +91,15 @@ static void flush (CACHEFILE f, int UU(fd), CACHEKEY key, void*value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me __attribute__((__unused__)), - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { struct item *it = value; int i; @@ -132,7 +135,7 @@ static struct item *make_item (u_int64_t key) { } static CACHEKEY did_fetch={-1}; -static int fetch (CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash __attribute__((__unused__)), void**value, PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void*extraargs) { +static int fetch (CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash __attribute__((__unused__)), void**value, void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void*extraargs) { if (verbose) printf("Fetch %" PRId64 "\n", key.b); assert (expect_f==f); assert((long)extraargs==23); @@ -232,7 +235,7 @@ static void test0 (void) { { void *item_v=0; expect_init(); - r=toku_cachetable_get_and_pin(f, make_blocknum(5), toku_cachetable_hash(f, make_blocknum(5)), &item_v, NULL, wc, fetch, def_pf_req_callback, def_pf_callback, t3); /* 5P 7U 6P 4P 1P */ + r=toku_cachetable_get_and_pin(f, make_blocknum(5), toku_cachetable_hash(f, make_blocknum(5)), &item_v, NULL, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, t3); /* 5P 7U 6P 4P 1P */ assert(r==0); assert(((struct item *)item_v)->key.b==5); assert(strcmp(((struct item *)item_v)->something,"something")==0); @@ -249,7 +252,7 @@ static void test0 (void) { did_fetch=make_blocknum(-1); CACHETABLE_WRITE_CALLBACK wc2 = def_write_callback(t3); wc2.flush_callback = flush; - r=toku_cachetable_get_and_pin(f, make_blocknum(2), toku_cachetable_hash(f, make_blocknum(2)), &item_v, NULL, wc2, fetch, def_pf_req_callback, def_pf_callback, t3); /* 2p 5P 7U 6P 1P */ + r=toku_cachetable_get_and_pin(f, make_blocknum(2), toku_cachetable_hash(f, make_blocknum(2)), &item_v, NULL, wc2, fetch, def_pf_req_callback, def_pf_callback, TRUE, t3); /* 2p 5P 7U 6P 1P */ assert(r==0); assert(did_fetch.b==2); /* Expect that 2 is fetched in. */ assert(((struct item *)item_v)->key.b==2); @@ -290,17 +293,22 @@ static void test0 (void) { static void flush_n (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY key __attribute__((__unused__)), void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me __attribute__((__unused__)), - BOOL for_checkpoint __attribute__ ((__unused__))) { + BOOL for_checkpoint __attribute__ ((__unused__)), + BOOL UU(is_clone) + ) { int *v = value; assert(*v==0); } static int fetch_n (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY key __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), - void**value, PAIR_ATTR *sizep __attribute__((__unused__)), + void**value, + void** UU(dd), +PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { assert((long)extraargs==42); *value=0; @@ -333,7 +341,7 @@ static void test_nested_pin (void) { r = toku_cachetable_put(f, make_blocknum(1), f1hash, &i0, make_pair_attr(1), wc); assert(r==0); r = toku_cachetable_unpin(f, make_blocknum(1), f1hash, CACHETABLE_CLEAN, make_pair_attr(test_object_size)); - r = toku_cachetable_get_and_pin(f, make_blocknum(1), f1hash, &vv, NULL, wc, fetch_n, def_pf_req_callback, def_pf_callback, f2); + r = toku_cachetable_get_and_pin(f, make_blocknum(1), f1hash, &vv, NULL, wc, fetch_n, def_pf_req_callback, def_pf_callback, TRUE, f2); assert(r==0); assert(vv==&i0); assert(i0==0); @@ -359,15 +367,20 @@ static void null_flush (CACHEFILE cf __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me __attribute__((__unused__)), - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { } -static int add123_fetch (CACHEFILE cf, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value, PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { +static int add123_fetch (CACHEFILE cf, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value, + void** UU(dd), +PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { assert(fullhash==toku_cachetable_hash(cf,key)); assert((long)extraargs==123); *value = (void*)((unsigned long)key.b+123L); @@ -376,7 +389,9 @@ static int add123_fetch (CACHEFILE cf, int UU(fd), CACHEKEY key, u_int32_t fullh return 0; } -static int add222_fetch (CACHEFILE cf, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value, PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { +static int add222_fetch (CACHEFILE cf, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value, + void** UU(dd), +PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { assert(fullhash==toku_cachetable_hash(cf,key)); assert((long)extraargs==222); *value = (void*)((unsigned long)key.b+222L); @@ -411,12 +426,12 @@ static void test_multi_filehandles (void) { wc.flush_callback = null_flush; r = toku_cachetable_put(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), (void*)124, make_pair_attr(test_object_size), wc); assert(r==0); r = toku_cachetable_unpin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), CACHETABLE_DIRTY, make_pair_attr(0)); assert(r==0); - r = toku_cachetable_get_and_pin(f2, make_blocknum(1), toku_cachetable_hash(f2, make_blocknum(1)), &v, NULL, wc, add123_fetch, def_pf_req_callback, def_pf_callback, (void*)123); assert(r==0); + r = toku_cachetable_get_and_pin(f2, make_blocknum(1), toku_cachetable_hash(f2, make_blocknum(1)), &v, NULL, wc, add123_fetch, def_pf_req_callback, def_pf_callback, TRUE, (void*)123); assert(r==0); assert((unsigned long)v==124); - r = toku_cachetable_get_and_pin(f2, make_blocknum(2), toku_cachetable_hash(f2, make_blocknum(2)), &v, NULL, wc, add123_fetch, def_pf_req_callback, def_pf_callback, (void*)123); assert(r==0); + r = toku_cachetable_get_and_pin(f2, make_blocknum(2), toku_cachetable_hash(f2, make_blocknum(2)), &v, NULL, wc, add123_fetch, def_pf_req_callback, def_pf_callback, TRUE, (void*)123); assert(r==0); assert((unsigned long)v==125); wc.write_extraargs = (void*)222; - r = toku_cachetable_get_and_pin(f3, make_blocknum(2), toku_cachetable_hash(f3, make_blocknum(2)), &v, NULL, wc, add222_fetch, def_pf_req_callback, def_pf_callback, (void*)222); assert(r==0); + r = toku_cachetable_get_and_pin(f3, make_blocknum(2), toku_cachetable_hash(f3, make_blocknum(2)), &v, NULL, wc, add222_fetch, def_pf_req_callback, def_pf_callback, TRUE, (void*)222); assert(r==0); assert((unsigned long)v==224); r = toku_cachetable_unpin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), CACHETABLE_CLEAN, make_pair_attr(0)); assert(r==0); @@ -439,16 +454,21 @@ static void test_dirty_flush(CACHEFILE f, int UU(fd), CACHEKEY key, void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size, PAIR_ATTR* new_size __attribute__((__unused__)), BOOL do_write, BOOL keep, - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { if (verbose) printf("test_dirty_flush %p %" PRId64 " %p %ld %u %u\n", f, key.b, value, size.size, (unsigned)do_write, (unsigned)keep); } -static int test_dirty_fetch(CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value_ptr, PAIR_ATTR *size_ptr, int * dirtyp, void *arg) { +static int test_dirty_fetch(CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash, void **value_ptr, + void** UU(dd), +PAIR_ATTR *size_ptr, int * dirtyp, void *arg) { *value_ptr = arg; *dirtyp = 0; *size_ptr = make_pair_attr(0); @@ -495,7 +515,7 @@ static void test_dirty(void) { assert(pinned == 0); r = toku_cachetable_get_and_pin(f, key, hkey, &value, NULL, wc, - test_dirty_fetch, def_pf_req_callback, def_pf_callback, 0); + test_dirty_fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); // cachetable_print_state(t); @@ -517,7 +537,7 @@ static void test_dirty(void) { hkey = toku_cachetable_hash(f, key); r = toku_cachetable_get_and_pin(f, key, hkey, &value, NULL, wc, - test_dirty_fetch, def_pf_req_callback, def_pf_callback, 0); + test_dirty_fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); // cachetable_print_state(t); @@ -537,7 +557,7 @@ static void test_dirty(void) { r = toku_cachetable_get_and_pin(f, key, hkey, &value, NULL, wc, - test_dirty_fetch, def_pf_req_callback, def_pf_callback, 0); + test_dirty_fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); // cachetable_print_state(t); @@ -568,12 +588,15 @@ static void test_size_flush_callback(CACHEFILE f, int UU(fd), CACHEKEY key, void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size, PAIR_ATTR* new_size __attribute__((__unused__)), BOOL do_write, BOOL keep, - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { if (test_size_debug && verbose) printf("test_size_flush %p %" PRId64 " %p %ld %u %u\n", f, key.b, value, size.size, (unsigned)do_write, (unsigned)keep); if (keep) { if (do_write) { @@ -628,7 +651,7 @@ static void test_size_resize(void) { void *current_value; long current_size; - r = toku_cachetable_get_and_pin(f, key, hkey, ¤t_value, ¤t_size, wc, 0, def_pf_req_callback, def_pf_callback, 0); + r = toku_cachetable_get_and_pin(f, key, hkey, ¤t_value, ¤t_size, wc, 0, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); assert(current_value == value); assert(current_size == new_size); diff --git a/newbrt/tests/cachetable-test2.c b/newbrt/tests/cachetable-test2.c index bc32b06af98..eb970f3779c 100644 --- a/newbrt/tests/cachetable-test2.c +++ b/newbrt/tests/cachetable-test2.c @@ -97,12 +97,15 @@ static void flush_forchain (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY key, void *value, + void** UU(dd), void *extra __attribute__((__unused__)), PAIR_ATTR size __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL write_me __attribute__((__unused__)), BOOL keep_me __attribute__((__unused__)), - BOOL for_checkpoint __attribute__((__unused__))) { + BOOL for_checkpoint __attribute__((__unused__)), + BOOL UU(is_clone) + ) { if (keep_me) return; int *v = value; //toku_cachetable_print_state(ct); @@ -112,7 +115,9 @@ static void flush_forchain (CACHEFILE f __attribute__((__unused__)), //print_ints(); } -static int fetch_forchain (CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash, void**value, PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { +static int fetch_forchain (CACHEFILE f, int UU(fd), CACHEKEY key, u_int32_t fullhash, void**value, + void** UU(dd), +PAIR_ATTR *sizep __attribute__((__unused__)), int * dirtyp, void*extraargs) { assert(toku_cachetable_hash(f, key)==fullhash); assert((long)extraargs==(long)key.b); *value = (void*)(long)key.b; @@ -197,6 +202,7 @@ static void test_chaining (void) { fetch_forchain, def_pf_req_callback, def_pf_callback, + TRUE, (void*)(long)whichkey.b ); assert(r==0); diff --git a/newbrt/tests/cachetable-unpin-and-remove-test.c b/newbrt/tests/cachetable-unpin-and-remove-test.c index be80672462a..fde20d2bb68 100644 --- a/newbrt/tests/cachetable-unpin-and-remove-test.c +++ b/newbrt/tests/cachetable-unpin-and-remove-test.c @@ -9,6 +9,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void** UU(dd), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp __attribute__((__unused__)), void *extraargs __attribute__((__unused__)) @@ -113,7 +114,7 @@ cachetable_put_evict_remove_test (int n) { // get 0 void *v; long s; - r = toku_cachetable_get_and_pin(f1, make_blocknum(0), hi[0], &v, &s, wc, fetch, def_pf_req_callback, def_pf_callback, 0); + r = toku_cachetable_get_and_pin(f1, make_blocknum(0), hi[0], &v, &s, wc, fetch, def_pf_req_callback, def_pf_callback, TRUE, 0); assert(r == 0); // remove 0 diff --git a/newbrt/tests/cachetable-unpin-remove-and-checkpoint.c b/newbrt/tests/cachetable-unpin-remove-and-checkpoint.c index 7780deed7ec..3a2d3721722 100644 --- a/newbrt/tests/cachetable-unpin-remove-and-checkpoint.c +++ b/newbrt/tests/cachetable-unpin-remove-and-checkpoint.c @@ -39,7 +39,7 @@ run_test (void) { //void* v2; long s1; //long s2; - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); toku_cachetable_unpin( f1, make_blocknum(1), @@ -50,7 +50,7 @@ run_test (void) { // now this should mark the pair for checkpoint r = toku_cachetable_begin_checkpoint(ct, NULL); - r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, NULL); + r = toku_cachetable_get_and_pin(f1, make_blocknum(1), toku_cachetable_hash(f1, make_blocknum(1)), &v1, &s1, wc, def_fetch, def_pf_req_callback, def_pf_callback, TRUE, NULL); toku_pthread_t mytid; r = toku_pthread_create(&mytid, NULL, run_end_chkpt, NULL); diff --git a/newbrt/tests/cachetable-writer-thread-limit.c b/newbrt/tests/cachetable-writer-thread-limit.c index f38770eb1ae..103c3d99410 100644 --- a/newbrt/tests/cachetable-writer-thread-limit.c +++ b/newbrt/tests/cachetable-writer-thread-limit.c @@ -13,12 +13,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { if (w) { int curr_size = __sync_fetch_and_add(&total_size, -1); diff --git a/newbrt/tests/test-checkpoint-during-flush.c b/newbrt/tests/test-checkpoint-during-flush.c index cae9fe3e919..9d26953e033 100644 --- a/newbrt/tests/test-checkpoint-during-flush.c +++ b/newbrt/tests/test-checkpoint-during-flush.c @@ -145,6 +145,7 @@ doit (BOOL after_child_pin) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -163,6 +164,7 @@ doit (BOOL after_child_pin) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -199,6 +201,7 @@ doit (BOOL after_child_pin) { node_root, toku_cachetable_hash(c_brt->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -219,6 +222,7 @@ doit (BOOL after_child_pin) { node_leaf, toku_cachetable_hash(c_brt->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-checkpoint-during-merge.c b/newbrt/tests/test-checkpoint-during-merge.c index 0d453044ef7..4fdee6446d2 100644 --- a/newbrt/tests/test-checkpoint-during-merge.c +++ b/newbrt/tests/test-checkpoint-during-merge.c @@ -163,6 +163,7 @@ doit (int state) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -180,6 +181,7 @@ doit (int state) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -218,6 +220,7 @@ doit (int state) { node_root, toku_cachetable_hash(c_brt->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -247,6 +250,7 @@ doit (int state) { left_child, toku_cachetable_hash(c_brt->h->cf, left_child), &bfe, + TRUE, 0, NULL, &node @@ -262,6 +266,7 @@ doit (int state) { right_child, toku_cachetable_hash(c_brt->h->cf, right_child), &bfe, + TRUE, 0, NULL, &node @@ -278,6 +283,7 @@ doit (int state) { left_child, toku_cachetable_hash(c_brt->h->cf, left_child), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-checkpoint-during-rebalance.c b/newbrt/tests/test-checkpoint-during-rebalance.c index 7443f7cd1b9..3eda53b9e43 100644 --- a/newbrt/tests/test-checkpoint-during-rebalance.c +++ b/newbrt/tests/test-checkpoint-during-rebalance.c @@ -183,6 +183,7 @@ doit (int state) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -200,6 +201,7 @@ doit (int state) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -238,6 +240,7 @@ doit (int state) { node_root, toku_cachetable_hash(c_brt->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -258,6 +261,7 @@ doit (int state) { left_child, toku_cachetable_hash(c_brt->h->cf, left_child), &bfe, + TRUE, 0, NULL, &node @@ -273,6 +277,7 @@ doit (int state) { right_child, toku_cachetable_hash(c_brt->h->cf, right_child), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-checkpoint-during-split.c b/newbrt/tests/test-checkpoint-during-split.c index 647b071c446..5fe299a703e 100644 --- a/newbrt/tests/test-checkpoint-during-split.c +++ b/newbrt/tests/test-checkpoint-during-split.c @@ -159,6 +159,7 @@ doit (BOOL after_split) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -176,6 +177,7 @@ doit (BOOL after_split) { node_root, toku_cachetable_hash(t->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -214,6 +216,7 @@ doit (BOOL after_split) { node_root, toku_cachetable_hash(c_brt->h->cf, node_root), &bfe, + TRUE, 0, NULL, &node @@ -241,6 +244,7 @@ doit (BOOL after_split) { left_child, toku_cachetable_hash(c_brt->h->cf, left_child), &bfe, + TRUE, 0, NULL, &node @@ -256,6 +260,7 @@ doit (BOOL after_split) { right_child, toku_cachetable_hash(c_brt->h->cf, right_child), &bfe, + TRUE, 0, NULL, &node @@ -272,6 +277,7 @@ doit (BOOL after_split) { left_child, toku_cachetable_hash(c_brt->h->cf, left_child), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-dirty-flushes-on-cleaner.c b/newbrt/tests/test-dirty-flushes-on-cleaner.c index f108044b573..a4f6106b9cf 100644 --- a/newbrt/tests/test-dirty-flushes-on-cleaner.c +++ b/newbrt/tests/test-dirty-flushes-on-cleaner.c @@ -166,6 +166,7 @@ doit (void) { node_leaf, toku_cachetable_hash(brt->h->cf, node_leaf), &bfe, + TRUE, 0, NULL, &node @@ -194,6 +195,7 @@ doit (void) { node_leaf, toku_cachetable_hash(brt->h->cf, node_leaf), &bfe, + TRUE, 0, NULL, &node @@ -213,6 +215,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node @@ -236,6 +239,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-flushes-on-cleaner.c b/newbrt/tests/test-flushes-on-cleaner.c index 8946b75a7ba..4cabc901e7a 100644 --- a/newbrt/tests/test-flushes-on-cleaner.c +++ b/newbrt/tests/test-flushes-on-cleaner.c @@ -171,6 +171,7 @@ doit (void) { node_leaf, toku_cachetable_hash(brt->h->cf, node_leaf), &bfe, + TRUE, 0, NULL, &node @@ -206,6 +207,7 @@ doit (void) { node_leaf, toku_cachetable_hash(brt->h->cf, node_leaf), &bfe, + TRUE, 0, NULL, &node @@ -225,6 +227,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node @@ -248,6 +251,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test-merges-on-cleaner.c b/newbrt/tests/test-merges-on-cleaner.c index a4057785c4b..49c1a99fc47 100644 --- a/newbrt/tests/test-merges-on-cleaner.c +++ b/newbrt/tests/test-merges-on-cleaner.c @@ -158,6 +158,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node @@ -180,6 +181,7 @@ doit (void) { node_internal, toku_cachetable_hash(brt->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test.h b/newbrt/tests/test.h index 33d98e441ec..3097177fad8 100644 --- a/newbrt/tests/test.h +++ b/newbrt/tests/test.h @@ -125,18 +125,21 @@ def_flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void **dd __attribute__((__unused__)), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { } static UU() void def_pe_est_callback( - void* UU(brtnode_pv), + void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -162,7 +165,7 @@ static UU() BOOL def_pf_req_callback(void* UU(brtnode_pv), void* UU(read_extraar return FALSE; } -static UU() int def_pf_callback(void* UU(brtnode_pv), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { + static UU() int def_pf_callback(void* UU(brtnode_pv), void* UU(dd), void* UU(read_extraargs), int UU(fd), PAIR_ATTR* UU(sizep)) { assert(FALSE); return 0; } @@ -173,6 +176,7 @@ def_fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value __attribute__((__unused__)), + void **dd __attribute__((__unused__)), PAIR_ATTR *sizep __attribute__((__unused__)), int *dirtyp, void *extraargs __attribute__((__unused__)) @@ -203,6 +207,7 @@ static UU() CACHETABLE_WRITE_CALLBACK def_write_callback(void* write_extraargs) wc.pe_callback = def_pe_callback; wc.cleaner_callback = def_cleaner_callback; wc.write_extraargs = write_extraargs; + wc.clone_callback = NULL; return wc; } diff --git a/newbrt/tests/test4244.c b/newbrt/tests/test4244.c index e53b0ba725f..bcb6c86757b 100644 --- a/newbrt/tests/test4244.c +++ b/newbrt/tests/test4244.c @@ -75,6 +75,7 @@ doit (void) { node_internal, toku_cachetable_hash(t->h->cf, node_internal), &bfe, + TRUE, 0, NULL, &node diff --git a/newbrt/tests/test4302.c b/newbrt/tests/test4302.c index b6c83d84017..04b0c6ff19d 100644 --- a/newbrt/tests/test4302.c +++ b/newbrt/tests/test4302.c @@ -11,12 +11,14 @@ flush (CACHEFILE f __attribute__((__unused__)), int UU(fd), CACHEKEY k __attribute__((__unused__)), void *v __attribute__((__unused__)), + void** UU(dd), void *e __attribute__((__unused__)), PAIR_ATTR s __attribute__((__unused__)), PAIR_ATTR* new_size __attribute__((__unused__)), BOOL w __attribute__((__unused__)), BOOL keep __attribute__((__unused__)), - BOOL c __attribute__((__unused__)) + BOOL c __attribute__((__unused__)), + BOOL UU(is_clone) ) { /* Do nothing */ if (verbose) { printf("FLUSH: %d\n", (int)k.b); } @@ -28,6 +30,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), CACHEKEY k __attribute__((__unused__)), u_int32_t fullhash __attribute__((__unused__)), void **value, + void** UU(dd), PAIR_ATTR *sizep, int *dirtyp, void *extraargs @@ -41,6 +44,7 @@ fetch (CACHEFILE f __attribute__((__unused__)), static void pe_est_callback( void* UU(brtnode_pv), + void* UU(dd), long* bytes_freed_estimate, enum partial_eviction_cost *cost, void* UU(write_extraargs) @@ -120,6 +124,7 @@ cachetable_test (void) { wc, fetch, def_pf_req_callback, def_pf_callback, + TRUE, &val1 ); r = toku_cachetable_unpin(f1, make_blocknum(1), 1, CACHETABLE_CLEAN, make_pair_attr(8)); diff --git a/newbrt/workqueue.c b/newbrt/workqueue.c index 2489c8921b3..c9da2dac42a 100644 --- a/newbrt/workqueue.c +++ b/newbrt/workqueue.c @@ -16,10 +16,12 @@ // Create fixed number of worker threads, all waiting on a single queue // of work items (WORKQUEUE). -void toku_init_workers(WORKQUEUE wq, THREADPOOL *tpptr) { +void toku_init_workers(WORKQUEUE wq, THREADPOOL *tpptr, int fraction) { workqueue_init(wq); + assert(fraction > 0); int nprocs = toku_os_get_number_active_processors(); - int nthreads = nprocs*2; + int nthreads = (nprocs*2)/fraction; + if (nthreads == 0) nthreads = 1; toku_thread_pool_create(tpptr, nthreads); toku_thread_pool_run(*tpptr, 0, &nthreads, toku_worker, wq); } diff --git a/newbrt/workqueue.h b/newbrt/workqueue.h index 59e68d3331f..4d6597204cb 100644 --- a/newbrt/workqueue.h +++ b/newbrt/workqueue.h @@ -205,7 +205,9 @@ static int workqueue_n_in_queue (WORKQUEUE wq, int dolock) { #include "threadpool.h" // initialize the work queue and worker -void toku_init_workers(WORKQUEUE wq, THREADPOOL *tpptr); +void toku_init_workers(WORKQUEUE wq, THREADPOOL *tpptr, int fraction); + +void toku_init_workers_with_num_threads(WORKQUEUE wq, THREADPOOL *tpptr, int num_threads); // destroy the work queue and worker void toku_destroy_workers(WORKQUEUE wq, THREADPOOL *tpptr); diff --git a/src/tests/perf_checkpoint_var.c b/src/tests/perf_checkpoint_var.c index 80dae56d0a2..a1ecb06103c 100644 --- a/src/tests/perf_checkpoint_var.c +++ b/src/tests/perf_checkpoint_var.c @@ -14,6 +14,91 @@ #include "threaded_stress_test_helpers.h" +u_int64_t num_basements_decompressed; +u_int64_t num_buffers_decompressed; +u_int64_t num_basements_fetched; +u_int64_t num_buffers_fetched; +u_int64_t num_pivots_fetched; + +static void checkpoint_callback_1(void * extra) { + DB_ENV* env = extra; + u_int64_t old_num_basements_decompressed = num_basements_decompressed; + u_int64_t old_num_buffers_decompressed = num_buffers_decompressed; + u_int64_t old_num_basements_fetched = num_basements_fetched; + u_int64_t old_num_buffers_fetched = num_buffers_fetched; + u_int64_t old_num_pivots_fetched = num_pivots_fetched; + + num_basements_decompressed = + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE"); + + num_buffers_decompressed = + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE"); + + num_basements_fetched = + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_WRITE"); + + num_buffers_fetched = + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_WRITE"); + + num_pivots_fetched = + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_QUERY") + + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_WRITE"); + + printf("basements decompressed %"PRIu64" \n", num_basements_decompressed - old_num_basements_decompressed); + printf("buffers decompressed %"PRIu64" \n", num_buffers_decompressed- old_num_buffers_decompressed); + printf("basements fetched %"PRIu64" \n", num_basements_fetched - old_num_basements_fetched); + printf("buffers fetched %"PRIu64" \n", num_buffers_fetched - old_num_buffers_fetched); + printf("pivots fetched %"PRIu64" \n", num_pivots_fetched - old_num_pivots_fetched); + printf("************************************************************\n"); +} + +static void checkpoint_callback_2(void * extra) { + DB_ENV* env = extra; + num_basements_decompressed = + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE"); + + num_buffers_decompressed = + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE"); + + num_basements_fetched = + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_WRITE"); + + num_buffers_fetched = + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_NORMAL") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_MSG_BUFFER_FETCHED_WRITE"); + + num_pivots_fetched = + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_QUERY") + + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_PREFETCH") + + get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_WRITE"); +} + + + // // This test is a form of stress that does operations on a single dictionary: // We create a dictionary bigger than the cachetable (around 4x greater). @@ -69,6 +154,8 @@ cleanup: static void stress_table(DB_ENV* env, DB** dbp, struct cli_args *cli_args) { + db_env_set_checkpoint_callback(checkpoint_callback_1, env); + db_env_set_checkpoint_callback2(checkpoint_callback_2, env); int n = cli_args->num_elements; // // the threads that we want: