diff --git a/buildheader/db.h_4_1 b/buildheader/db.h_4_1 index 8555d9796b2..051077d8079 100644 --- a/buildheader/db.h_4_1 +++ b/buildheader/db.h_4_1 @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/buildheader/db.h_4_3 b/buildheader/db.h_4_3 index 572f8b4b2f6..7060acc8469 100644 --- a/buildheader/db.h_4_3 +++ b/buildheader/db.h_4_3 @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/buildheader/db.h_4_4 b/buildheader/db.h_4_4 index 88a5534c55a..801a903213f 100644 --- a/buildheader/db.h_4_4 +++ b/buildheader/db.h_4_4 @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/buildheader/db.h_4_5 b/buildheader/db.h_4_5 index ef01fbbc135..4c604581044 100644 --- a/buildheader/db.h_4_5 +++ b/buildheader/db.h_4_5 @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/buildheader/db.h_4_6 b/buildheader/db.h_4_6 index 3005e199276..2b7ae8075e2 100644 --- a/buildheader/db.h_4_6 +++ b/buildheader/db.h_4_6 @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/buildheader/make_db_h.c b/buildheader/make_db_h.c index e02e36b0097..8cd9989e3f3 100644 --- a/buildheader/make_db_h.c +++ b/buildheader/make_db_h.c @@ -499,6 +499,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ printf(" u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ \n"); printf(" u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ \n"); printf(" u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ \n"); + printf(" u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ \n"); printf(" u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ \n"); printf(" u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ \n"); printf(" u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ \n"); @@ -575,6 +576,16 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__ printf(" uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */\n"); printf(" uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */\n"); printf(" uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */\n"); + printf(" uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */\n"); + printf(" uint64_t flush_in_memory; /* number of in memory flushes */\n"); + printf(" uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */\n"); + printf(" uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */\n"); + printf(" uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */\n"); + printf(" uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */\n"); + printf(" uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */\n"); + printf(" uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */\n"); + printf(" uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */\n"); + printf(" uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */\n"); printf(" u_int64_t point_queries; /* ydb point queries */ \n"); printf(" u_int64_t sequential_queries; /* ydb sequential queries */ \n"); printf(" u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ \n"); diff --git a/buildheader/tdb.h b/buildheader/tdb.h index 83f7c557711..6e358fc4783 100644 --- a/buildheader/tdb.h +++ b/buildheader/tdb.h @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/include/db.h b/include/db.h index 83f7c557711..6e358fc4783 100644 --- a/include/db.h +++ b/include/db.h @@ -105,6 +105,7 @@ typedef struct __toku_engine_status { u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ + u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ @@ -181,6 +182,16 @@ typedef struct __toku_engine_status { uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */ uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */ + uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */ + uint64_t flush_in_memory; /* number of in memory flushes */ + uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */ + uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */ + uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */ + uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */ + uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */ + uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */ + uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */ + uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */ u_int64_t point_queries; /* ydb point queries */ u_int64_t sequential_queries; /* ydb sequential queries */ u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ diff --git a/newbrt/brt-internal.h b/newbrt/brt-internal.h index f76ff500e33..b10d4e0102a 100644 --- a/newbrt/brt-internal.h +++ b/newbrt/brt-internal.h @@ -653,7 +653,6 @@ int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, struct pivot_bounds const * const pbounds, struct brtnode_fetch_extra *bfe, - BOOL apply_ancestor_messages, // this BOOL is probably temporary, for #3972, once we know how range query estimates work, will revisit this BRTNODE *node_p) __attribute__((__warn_unused_result__)); void toku_pin_brtnode_holding_lock (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash, @@ -750,12 +749,6 @@ typedef struct brt_status { uint64_t search_root_retries; // number of searches that required the root node to be fetched more than once uint64_t search_tries_gt_height; // number of searches that required more tries than the height of the tree uint64_t search_tries_gt_heightplus3; // number of searches that required more tries than the height of the tree plus three - uint64_t disk_flush_leaf; // number of leaf nodes flushed to disk, not for checkpoint - uint64_t disk_flush_nonleaf; // number of nonleaf nodes flushed to disk, not for checkpoint - uint64_t disk_flush_leaf_for_checkpoint; // number of leaf nodes flushed to disk for checkpoint - uint64_t disk_flush_nonleaf_for_checkpoint; // number of nonleaf nodes flushed to disk for checkpoint - uint64_t destroy_leaf; // number of leaf nodes destroyed - uint64_t destroy_nonleaf; // number of nonleaf nodes destroyed uint64_t cleaner_total_nodes; // total number of nodes whose buffers are potentially flushed by cleaner thread uint64_t cleaner_h1_nodes; // number of nodes of height one whose message buffers are flushed by cleaner thread uint64_t cleaner_hgt1_nodes; // number of nodes of height > 1 whose message buffers are flushed by cleaner thread @@ -767,6 +760,16 @@ typedef struct brt_status { uint64_t cleaner_max_buffer_workdone; // max workdone value of any message buffer flushed by cleaner thread uint64_t cleaner_min_buffer_workdone; uint64_t cleaner_total_buffer_workdone; + uint64_t flush_total; // total number of flushes done by flusher threads or cleaner threads + uint64_t flush_in_memory; // number of in memory flushes + uint64_t flush_needed_io; // number of flushes that had to read a child (or part) off disk + uint64_t flush_cascades; // number of flushes that triggered another flush in the child + uint64_t flush_cascades_1; // number of flushes that triggered 1 cascading flush + uint64_t flush_cascades_2; // number of flushes that triggered 2 cascading flushes + uint64_t flush_cascades_3; // number of flushes that triggered 3 cascading flushes + uint64_t flush_cascades_4; // number of flushes that triggered 4 cascading flushes + uint64_t flush_cascades_5; // number of flushes that triggered 5 cascading flushes + uint64_t flush_cascades_gt_5; // number of flushes that triggered more than 5 cascading flushes } BRT_STATUS_S, *BRT_STATUS; void toku_brt_get_status(BRT_STATUS); diff --git a/newbrt/brt.c b/newbrt/brt.c index 9782730e1e6..09087f3dad3 100644 --- a/newbrt/brt.c +++ b/newbrt/brt.c @@ -235,6 +235,10 @@ nonleaf_node_is_gorged (BRTNODE node) { (!buffers_are_empty)); } +static inline void add_to_brt_status(u_int64_t* val, u_int64_t data) { + (*val) += data; +} + static void brtnode_put_cmd ( brt_compare_func compare_fun, brt_update_func update_fun, @@ -285,7 +289,6 @@ int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash, UNLOCKERS unlockers, ANCESTORS ancestors, struct pivot_bounds const * const bounds, struct brtnode_fetch_extra *bfe, - BOOL apply_ancestor_messages, // this BOOL is probably temporary, for #3972, once we know how range query estimates work, will revisit this BRTNODE *node_p) { void *node_v; int r = toku_cachetable_get_and_pin_nonblocking( @@ -306,7 +309,7 @@ int toku_pin_brtnode (BRT brt, BLOCKNUM blocknum, u_int32_t fullhash, unlockers); if (r==0) { BRTNODE node = node_v; - if (apply_ancestor_messages) maybe_apply_ancestors_messages_to_node(brt, node, ancestors, bounds); + maybe_apply_ancestors_messages_to_node(brt, node, ancestors, bounds); *node_p = node; // printf("%*sPin %ld\n", 8-node->height, "", blocknum.b); } else { @@ -491,7 +494,7 @@ long toku_bnc_memory_size(NONLEAF_CHILDINFO bnc) { return (sizeof(*bnc) + - toku_fifo_memory_size_in_use(bnc->buffer) + + toku_fifo_memory_size(bnc->buffer) + toku_omt_memory_size(bnc->fresh_message_tree) + toku_omt_memory_size(bnc->stale_message_tree) + toku_omt_memory_size(bnc->broadcast_list)); @@ -671,7 +674,6 @@ void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename struct brt_header *h = extraargs; BRTNODE brtnode = brtnode_v; assert(brtnode->thisnodename.b==nodename.b); - int height = brtnode->height; //printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]); if (write_me) { if (!h->panic) { // if the brt panicked, stop writing, otherwise try to write it. @@ -690,18 +692,6 @@ void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename } } } - if (height == 0) { // statistics incremented only when disk I/O is done, so worth the threadsafe count - if (for_checkpoint) - (void) toku_sync_fetch_and_increment_uint64(&brt_status.disk_flush_leaf_for_checkpoint); - else - (void) toku_sync_fetch_and_increment_uint64(&brt_status.disk_flush_leaf); - } - else { - if (for_checkpoint) - (void) toku_sync_fetch_and_increment_uint64(&brt_status.disk_flush_nonleaf_for_checkpoint); - else - (void) toku_sync_fetch_and_increment_uint64(&brt_status.disk_flush_nonleaf); - } } //printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]); *new_size = make_brtnode_pair_attr(brtnode); @@ -870,7 +860,7 @@ exit: } -static void flush_some_child(struct brt_header* h, BRTNODE node); +static void flush_some_child(struct brt_header* h, BRTNODE node, int *n_dirtied, int cascades); static void bring_node_fully_into_memory(BRTNODE node, struct brt_header *h); // TODO 3988 Leif set cleaner_nodes_dirtied @@ -879,31 +869,31 @@ update_cleaner_status(BRTNODE node, int childnum) { brt_status.cleaner_total_nodes++; if (node->height == 1) { - brt_status.cleaner_h1_nodes++; + brt_status.cleaner_h1_nodes++; } else { - brt_status.cleaner_hgt1_nodes++; + brt_status.cleaner_hgt1_nodes++; } - + unsigned int nbytesinbuf = toku_bnc_nbytesinbuf(BNC(node, childnum)); if (nbytesinbuf == 0) { - brt_status.cleaner_empty_nodes++; + brt_status.cleaner_empty_nodes++; } else { - if (nbytesinbuf > brt_status.cleaner_max_buffer_size) { - brt_status.cleaner_max_buffer_size = nbytesinbuf; - } - if (nbytesinbuf < brt_status.cleaner_min_buffer_size) { - brt_status.cleaner_min_buffer_size = nbytesinbuf; - } - brt_status.cleaner_total_buffer_size += nbytesinbuf; + if (nbytesinbuf > brt_status.cleaner_max_buffer_size) { + brt_status.cleaner_max_buffer_size = nbytesinbuf; + } + if (nbytesinbuf < brt_status.cleaner_min_buffer_size) { + brt_status.cleaner_min_buffer_size = nbytesinbuf; + } + brt_status.cleaner_total_buffer_size += nbytesinbuf; - uint64_t workdone = BP_WORKDONE(node, childnum); - if (workdone > brt_status.cleaner_max_buffer_workdone) { - brt_status.cleaner_max_buffer_workdone = workdone; - } - if (workdone < brt_status.cleaner_min_buffer_workdone) { - brt_status.cleaner_min_buffer_workdone = workdone; - } - brt_status.cleaner_total_buffer_workdone += workdone; + uint64_t workdone = BP_WORKDONE(node, childnum); + if (workdone > brt_status.cleaner_max_buffer_workdone) { + brt_status.cleaner_max_buffer_workdone = workdone; + } + if (workdone < brt_status.cleaner_min_buffer_workdone) { + brt_status.cleaner_min_buffer_workdone = workdone; + } + brt_status.cleaner_total_buffer_workdone += workdone; } } @@ -924,7 +914,9 @@ toku_brtnode_cleaner_callback(void *brtnode_pv, BLOCKNUM blocknum, u_int32_t ful // Either flush_some_child will unlock the node, or we do it here. if (toku_bnc_nbytesinbuf(BNC(node, childnum)) > 0) { - flush_some_child(h, node); + int n_dirtied = 0; + flush_some_child(h, node, &n_dirtied, 0); + brt_status.cleaner_nodes_dirtied += n_dirtied; } else { toku_unpin_brtnode_off_client_thread(h, node); } @@ -1168,10 +1160,7 @@ void toku_brtnode_free (BRTNODE *nodep) { toku_mempool_destroy(mp); } } - toku_sync_fetch_and_increment_uint64(&brt_status.destroy_leaf); } - else - toku_sync_fetch_and_increment_uint64(&brt_status.destroy_nonleaf); toku_destroy_brtnode_internals(node); toku_free(node); *nodep=0; @@ -2006,11 +1995,11 @@ brt_split_child (struct brt_header* h, BRTNODE node, int childnum, BRTNODE child toku_unpin_brtnode_off_client_thread(h, node); if (nodea->height > 0 && nonleaf_node_is_gorged(nodea)) { toku_unpin_brtnode_off_client_thread(h, nodeb); - flush_some_child(h, nodea); + flush_some_child(h, nodea, NULL, 0); } else if (nodeb->height > 0 && nonleaf_node_is_gorged(nodeb)) { toku_unpin_brtnode_off_client_thread(h, nodea); - flush_some_child(h, nodeb); + flush_some_child(h, nodeb, NULL, 0); } else { toku_unpin_brtnode_off_client_thread(h, nodea); @@ -2188,7 +2177,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE b if (cmd->type == BRT_UPDATE) { // key is passed in with command (should be same as from le) // update function extra is passed in with command - toku_sync_fetch_and_increment_uint64(&brt_status.updates); + add_to_brt_status(&brt_status.updates,1); keyp = cmd->u.id.key; update_function_extra = cmd->u.id.val; } else if (cmd->type == BRT_UPDATE_BROADCAST_ALL) { @@ -2197,7 +2186,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BASEMENTNODE b assert(le); // for broadcast updates, we just hit all leafentries // so this cannot be null assert(cmd->u.id.key->size == 0); - toku_sync_fetch_and_increment_uint64(&brt_status.updates_broadcast); + add_to_brt_status(&brt_status.updates_broadcast,1); keyp = toku_fill_dbt(&key, le_key(le), le_keylen(le)); update_function_extra = cmd->u.id.val; } else { @@ -3013,7 +3002,7 @@ brt_merge_child (struct brt_header* h, BRTNODE node, int childnum_to_merge, BOOL toku_unpin_brtnode_off_client_thread(h, childb); } if (childa->height > 0 && nonleaf_node_is_gorged(childa)) { - flush_some_child(h, childa); + flush_some_child(h, childa, NULL, 0); } else { toku_unpin_brtnode_off_client_thread(h, childa); @@ -3174,7 +3163,41 @@ maybe_destroy_child_blbs(BRTNODE node, BRTNODE child) } static void -flush_some_child (struct brt_header* h, BRTNODE parent) +update_flush_status(BRTNODE UU(parent), BRTNODE child, int cascades) +{ + __sync_fetch_and_add(&brt_status.flush_total, 1); + if (cascades > 0) { + __sync_fetch_and_add(&brt_status.flush_cascades, 1); + switch (cascades) { + case 1: + __sync_fetch_and_add(&brt_status.flush_cascades_1, 1); break; + case 2: + __sync_fetch_and_add(&brt_status.flush_cascades_2, 1); break; + case 3: + __sync_fetch_and_add(&brt_status.flush_cascades_3, 1); break; + case 4: + __sync_fetch_and_add(&brt_status.flush_cascades_4, 1); break; + case 5: + __sync_fetch_and_add(&brt_status.flush_cascades_5, 1); break; + default: + __sync_fetch_and_add(&brt_status.flush_cascades_gt_5, 1); break; + } + } + bool flush_needs_io = false; + for (int i = 0; !flush_needs_io && i < child->n_children; ++i) { + if (BP_STATE(child, i) == PT_ON_DISK) { + flush_needs_io = true; + } + } + if (flush_needs_io) { + __sync_fetch_and_add(&brt_status.flush_needed_io, 1); + } else { + __sync_fetch_and_add(&brt_status.flush_in_memory, 1); + } +} + +static void +flush_some_child (struct brt_header* h, BRTNODE parent, int *n_dirtied, int cascades) // Effect: This function does the following: // - Pick a child of parent (the heaviest child), // - flush from parent to child, @@ -3187,6 +3210,9 @@ flush_some_child (struct brt_header* h, BRTNODE parent) bool parent_unpinned = false; assert(parent->height>0); toku_assert_entire_node_in_memory(parent); + if (n_dirtied && !parent->dirty) { + (*n_dirtied)++; + } // pick the child we want to flush to int childnum; @@ -3208,6 +3234,11 @@ flush_some_child (struct brt_header* h, BRTNODE parent) fill_bfe_for_min_read(&bfe, h); toku_pin_brtnode_off_client_thread(h, targetchild, childfullhash, &bfe, 1, &parent, &child); + if (n_dirtied && !child->dirty) { + (*n_dirtied)++; + } + update_flush_status(parent, child, cascades); + // for test call_flusher_thread_callback(ft_flush_after_child_pin); @@ -3287,7 +3318,7 @@ flush_some_child (struct brt_header* h, BRTNODE parent) // it is the responsibility of flush_some_child to unpin parent // if (child->height > 0 && nonleaf_node_is_gorged(child)) { - flush_some_child(h, child); + flush_some_child(h, child, n_dirtied, cascades+1); } else { toku_unpin_brtnode_off_client_thread(h, child); @@ -3324,6 +3355,7 @@ static void flush_this_child (struct brt_header* h, BRTNODE node, BRTNODE child, int childnum) // Effect: Push everything in the CHILDNUMth buffer of node down into the child. { + update_flush_status(node, child, 0); int r; toku_assert_entire_node_in_memory(node); maybe_destroy_child_blbs(node, child); @@ -3426,7 +3458,7 @@ void toku_apply_cmd_to_leaf( snapshot_txnids, live_list_reverse); } else { - toku_sync_fetch_and_increment_uint64(&brt_status.msn_discards); + add_to_brt_status(&brt_status.msn_discards,1); } } } @@ -3449,7 +3481,7 @@ void toku_apply_cmd_to_leaf( live_list_reverse); if (bn_made_change) *made_change = 1; } else { - toku_sync_fetch_and_increment_uint64(&brt_status.msn_discards); + add_to_brt_status(&brt_status.msn_discards,1); } } } @@ -3597,7 +3629,7 @@ static void flush_node_fun(void *fe_v) // of flush_some_child to unlock the node // otherwise, we unlock the node here. if (fe->node->height > 0 && nonleaf_node_is_gorged(fe->node)) { - flush_some_child(fe->h, fe->node); + flush_some_child(fe->h, fe->node, NULL, 0); } else { toku_unpin_brtnode_off_client_thread(fe->h,fe->node); @@ -3608,7 +3640,7 @@ static void flush_node_fun(void *fe_v) // bnc, which means we are tasked with flushing some // buffer in the node. // It is the responsibility of flush_some_child to unlock the node - flush_some_child(fe->h, fe->node); + flush_some_child(fe->h, fe->node, NULL, 0); } remove_background_job(fe->h->cf, false); toku_free(fe); @@ -5740,7 +5772,7 @@ do_brt_leaf_put_cmd(BRT t, BASEMENTNODE bn, BRTNODE ancestor, int childnum, OMT } brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum), snapshot_txnids, live_list_reverse); } else { - toku_sync_fetch_and_increment_uint64(&brt_status.msn_discards); + add_to_brt_status(&brt_status.msn_discards,1); } } @@ -6258,7 +6290,6 @@ brt_search_child(BRT brt, BRTNODE node, int childnum, brt_search_t *search, BRT_ unlockers, &next_ancestors, bounds, &bfe, - TRUE, &childnode); if (rr==TOKUDB_TRY_AGAIN) return rr; assert(rr==0); @@ -6499,7 +6530,7 @@ try_again: brtcursor->left_is_neg_infty, brtcursor->right_is_pos_infty ); - r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, TRUE, &node); + r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, &node); assert(r==0 || r== TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { root_tries++; @@ -7053,7 +7084,7 @@ static int toku_brt_keyrange_internal (BRT brt, BRTNODE node, BLOCKNUM childblocknum = BP_BLOCKNUM(node, child_number); u_int32_t fullhash = compute_child_fullhash(brt->cf, node, child_number); BRTNODE childnode; - r = toku_pin_brtnode(brt, childblocknum, fullhash, unlockers, &next_ancestors, bounds, bfe, FALSE, &childnode); + r = toku_pin_brtnode(brt, childblocknum, fullhash, unlockers, &next_ancestors, bounds, bfe, &childnode); if (r!=TOKUDB_TRY_AGAIN) { assert(r==0); struct unlock_brtnode_extra unlock_extra = {brt,childnode}; @@ -7101,7 +7132,7 @@ int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less_p, u_int64_t *equal_p, BRTNODE node; { - int r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, FALSE, &node); + int r = toku_pin_brtnode(brt, *rootp, fullhash,(UNLOCKERS)NULL,(ANCESTORS)NULL, &infinite_bounds, &bfe, &node); assert(r==0 || r== TOKUDB_TRY_AGAIN); if (r == TOKUDB_TRY_AGAIN) { goto try_again; diff --git a/newbrt/cachetable.c b/newbrt/cachetable.c index d186c88aa4a..b17c7c0dfa3 100644 --- a/newbrt/cachetable.c +++ b/newbrt/cachetable.c @@ -54,6 +54,7 @@ static u_int64_t local_checkpoint; // number of times a local checkpoint static u_int64_t local_checkpoint_files; // number of files subject to local checkpoint taken for a commit (2440) static u_int64_t local_checkpoint_during_checkpoint; // number of times a local checkpoint happened during normal checkpoint (2440) static u_int64_t cachetable_evictions; +static u_int64_t cleaner_executions; // number of times the cleaner thread's loop has executed enum ctpair_state { @@ -3589,6 +3590,7 @@ void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s) { s->local_checkpoint_files = local_checkpoint_files; s->local_checkpoint_during_checkpoint = local_checkpoint_during_checkpoint; s->evictions = cachetable_evictions; + s->cleaner_executions = cleaner_executions; s->size_nonleaf = ct->size_nonleaf; s->size_leaf = ct->size_leaf; s->size_rollback = ct->size_rollback; @@ -3657,6 +3659,7 @@ cleaner_thread (void *cachetable_v) assert(ct); u_int32_t num_iterations = toku_get_cleaner_iterations(ct); for (u_int32_t i = 0; i < num_iterations; ++i) { + cleaner_executions++; cachetable_lock(ct); PAIR best_pair = NULL; int n_seen = 0; diff --git a/newbrt/cachetable.h b/newbrt/cachetable.h index 256148908e2..038c0a90931 100644 --- a/newbrt/cachetable.h +++ b/newbrt/cachetable.h @@ -513,6 +513,7 @@ typedef struct cachetable_status { uint64_t local_checkpoint_files; // number of files subject to local checkpoint taken for a commit (2440) uint64_t local_checkpoint_during_checkpoint; // number of times a local checkpoint happened during normal checkpoint (2440) u_int64_t evictions; + u_int64_t cleaner_executions; // number of times the cleaner thread's loop has executed int64_t size_nonleaf; // number of bytes in cachetable belonging to nonleaf nodes int64_t size_leaf; // number of bytes in cachetable belonging to leaf nodes int64_t size_rollback; // number of bytes in cachetable belonging to rollback nodes diff --git a/src/ydb.c b/src/ydb.c index 57f0fc609c8..bde5b5e52b9 100644 --- a/src/ydb.c +++ b/src/ydb.c @@ -79,10 +79,6 @@ static u_int64_t num_multi_updates; static u_int64_t num_multi_updates_fail; static u_int64_t num_point_queries; static u_int64_t num_sequential_queries; -static u_int64_t num_db_open; -static u_int64_t num_db_close; -static u_int64_t max_db_open; -static u_int64_t num_open_dbs; static u_int64_t directory_read_locks; /* total directory read locks taken */ static u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */ @@ -1979,6 +1975,7 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st engstat->local_checkpoint_files = ctstat.local_checkpoint_files; engstat->local_checkpoint_during_checkpoint = ctstat.local_checkpoint_during_checkpoint; engstat->cachetable_evictions = ctstat.evictions; + engstat->cleaner_executions = ctstat.cleaner_executions; engstat->cachetable_size_leaf = ctstat.size_leaf; engstat->cachetable_size_nonleaf = ctstat.size_nonleaf; engstat->cachetable_size_rollback = ctstat.size_rollback; @@ -2059,6 +2056,16 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat, char * env_panic_st engstat->cleaner_max_buffer_workdone = brt_stat.cleaner_max_buffer_workdone; engstat->cleaner_min_buffer_workdone = brt_stat.cleaner_min_buffer_workdone; engstat->cleaner_total_buffer_workdone = brt_stat.cleaner_total_buffer_workdone; + engstat->flush_total = brt_stat.flush_total; + engstat->flush_in_memory = brt_stat.flush_in_memory; + engstat->flush_needed_io = brt_stat.flush_needed_io; + engstat->flush_cascades = brt_stat.flush_cascades; + engstat->flush_cascades_1 = brt_stat.flush_cascades_1; + engstat->flush_cascades_2 = brt_stat.flush_cascades_2; + engstat->flush_cascades_3 = brt_stat.flush_cascades_3; + engstat->flush_cascades_4 = brt_stat.flush_cascades_4; + engstat->flush_cascades_5 = brt_stat.flush_cascades_5; + engstat->flush_cascades_gt_5 = brt_stat.flush_cascades_gt_5; } { u_int64_t fsync_count, fsync_time; @@ -2225,6 +2232,7 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { n += snprintf(buff + n, bufsiz - n, "cachetable_wait_reading %"PRIu64"\n", engstat.cachetable_wait_reading); n += snprintf(buff + n, bufsiz - n, "cachetable_wait_writing %"PRIu64"\n", engstat.cachetable_wait_writing); n += snprintf(buff + n, bufsiz - n, "cachetable_evictions %"PRIu64"\n", engstat.cachetable_evictions); + n += snprintf(buff + n, bufsiz - n, "cleaner_executions %"PRIu64"\n", engstat.cleaner_executions); n += snprintf(buff + n, bufsiz - n, "puts %"PRIu64"\n", engstat.puts); n += snprintf(buff + n, bufsiz - n, "prefetches %"PRIu64"\n", engstat.prefetches); n += snprintf(buff + n, bufsiz - n, "maybe_get_and_pins %"PRIu64"\n", engstat.maybe_get_and_pins); @@ -2290,6 +2298,15 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) { n += snprintf(buff + n, bufsiz - n, "cleaner_max_buffer_workdone %"PRIu64"\n", engstat.cleaner_max_buffer_workdone); n += snprintf(buff + n, bufsiz - n, "cleaner_min_buffer_workdone %"PRIu64"\n", engstat.cleaner_min_buffer_workdone); n += snprintf(buff + n, bufsiz - n, "cleaner_total_buffer_workdone %"PRIu64"\n", engstat.cleaner_total_buffer_workdone); + n += snprintf(buff + n, bufsiz - n, "flush_total %"PRIu64"\n", engstat.flush_total); + n += snprintf(buff + n, bufsiz - n, "flush_needed_io %"PRIu64"\n", engstat.flush_needed_io); + n += snprintf(buff + n, bufsiz - n, "flush_cascades %"PRIu64"\n", engstat.flush_cascades); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_1 %"PRIu64"\n", engstat.flush_cascades_1); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_2 %"PRIu64"\n", engstat.flush_cascades_2); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_3 %"PRIu64"\n", engstat.flush_cascades_3); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_4 %"PRIu64"\n", engstat.flush_cascades_4); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_5 %"PRIu64"\n", engstat.flush_cascades_5); + n += snprintf(buff + n, bufsiz - n, "flush_cascades_gt_5 %"PRIu64"\n", engstat.flush_cascades_gt_5); n += snprintf(buff + n, bufsiz - n, "cleaner_period %"PRIu32"\n", engstat.cleaner_period); n += snprintf(buff + n, bufsiz - n, "cleaner_iterations %"PRIu32"\n", engstat.cleaner_iterations); n += snprintf(buff + n, bufsiz - n, "multi_inserts %"PRIu64"\n", engstat.multi_inserts); @@ -2999,10 +3016,6 @@ env_note_db_opened(DB_ENV *env, DB *db) { OMTVALUE dbv; uint32_t idx; env->i->num_open_dbs++; - num_open_dbs = env->i->num_open_dbs; - num_db_open++; - if (num_open_dbs > max_db_open) - max_db_open = num_open_dbs; r = toku_omt_find_zero(env->i->open_dbs, find_db_by_db, db, &dbv, &idx); assert(r==DB_NOTFOUND); //Must not already be there. r = toku_omt_insert_at(env->i->open_dbs, db, idx); @@ -3018,8 +3031,6 @@ env_note_db_closed(DB_ENV *env, DB *db) { OMTVALUE dbv; uint32_t idx; env->i->num_open_dbs--; - num_open_dbs = env->i->num_open_dbs; - num_db_close++; r = toku_omt_find_zero(env->i->open_dbs, find_db_by_db, db, &dbv, &idx); assert(r==0); //Must already be there. assert((DB*)dbv == db);