[t:2949] Merge from tokudb.2949 with command, executed at sandbox/toku {{{svn merge -r39023:HEAD tokudb.2949 tokudb}}}. Refs #2949.

git-svn-id: file:///svn/toku/tokudb@39376 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Barry Perlman 2013-04-17 00:00:08 -04:00 committed by Yoni Fogel
parent 717ff6662f
commit e37c5d33de
46 changed files with 2266 additions and 3461 deletions

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -430,8 +202,9 @@ struct __toku_db_env {
void *app_private; /* 32-bit offset=36 size=4, 64=bit offset=72 size=8 */
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
@ -458,7 +231,7 @@ struct __toku_db_env {
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[6];
void* __toku_dummy0[5];
char __toku_dummy1[64];
void *api1_internal; /* 32-bit offset=212 size=4, 64=bit offset=360 size=8 */
void* __toku_dummy2[7];

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -432,8 +204,9 @@ struct __toku_db_env {
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
void *app_private; /* 32-bit offset=44 size=4, 64=bit offset=88 size=8 */
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
@ -460,7 +233,7 @@ struct __toku_db_env {
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[6];
void* __toku_dummy0[5];
char __toku_dummy1[96];
void *api1_internal; /* 32-bit offset=244 size=4, 64=bit offset=392 size=8 */
void* __toku_dummy2[7];

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -432,8 +204,9 @@ struct __toku_db_env {
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
void *app_private; /* 32-bit offset=44 size=4, 64=bit offset=88 size=8 */
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
@ -460,7 +233,7 @@ struct __toku_db_env {
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[21];
void* __toku_dummy0[20];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[7];

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -431,9 +203,10 @@ struct __toku_db_env {
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
@ -460,7 +233,7 @@ struct __toku_db_env {
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[21];
void* __toku_dummy0[20];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[8];

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -432,9 +204,10 @@ struct __toku_db_env {
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private; /* 32-bit offset=52 size=4, 64=bit offset=104 size=8 */
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;
@ -461,7 +234,7 @@ struct __toku_db_env {
void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra));
int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec);
int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec);
void* __toku_dummy0[22];
void* __toku_dummy0[21];
char __toku_dummy1[144];
void *api1_internal; /* 32-bit offset=356 size=4, 64=bit offset=568 size=8 */
void* __toku_dummy2[8];

View file

@ -459,257 +459,34 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */\n");
printf("};\n");
//engine status info
printf("typedef struct __toku_engine_status {\n");
printf(" char creationtime[26]; /* time of environment creation */ \n");
printf(" char startuptime[26]; /* time of engine startup */ \n");
printf(" char now[26]; /* time of engine status query (i.e. now) */ \n");
printf(" u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */\n");
printf(" u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */\n");
printf(" u_int64_t max_waiters; /* The maximum of num_waiters_now. */\n");
printf(" u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */\n");
printf(" u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */\n");
printf(" u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */\n");
printf(" u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */\n");
printf(" u_int64_t checkpoint_period; /* delay between automatic checkpoints */ \n");
printf(" u_int64_t checkpoint_footprint; /* state of checkpoint procedure */ \n");
printf(" char checkpoint_time_begin[26]; /* time of last checkpoint begin */ \n");
printf(" char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */ \n");
printf(" char checkpoint_time_end[26]; /* time of last checkpoint end */ \n");
printf(" uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */ \n");
printf(" uint64_t checkpoint_count; /* number of checkpoints taken */ \n");
printf(" uint64_t checkpoint_count_fail; /* number of checkpoints failed */ \n");
printf(" uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */ \n");
printf(" uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */ \n");
printf(" uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */ \n");
printf(" uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */ \n");
printf(" uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */ \n");
printf(" uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */ \n");
printf(" uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */ \n");
printf(" uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */ \n");
printf(" uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */ \n");
printf(" uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */ \n");
printf(" uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */ \n");
printf(" uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */ \n");
printf(" u_int64_t cleaner_period; /* delay between executions of cleaner */ \n");
printf(" u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */ \n");
printf(" u_int64_t txn_begin; /* number of transactions ever begun */ \n");
printf(" u_int64_t txn_commit; /* txn commit operations */ \n");
printf(" u_int64_t txn_abort; /* txn abort operations */ \n");
printf(" u_int64_t txn_close; /* txn completions (should equal commit+abort) */ \n");
printf(" u_int64_t txn_num_open; /* should be begin - close */ \n");
printf(" u_int64_t txn_max_open; /* max value of num_open */ \n");
printf(" u_int64_t txn_oldest_live; /* oldest extant txn txnid */ \n");
printf(" char txn_oldest_live_starttime[26]; /* oldest extant txn start time */ \n");
printf(" u_int64_t next_lsn; /* lsn that will be assigned to next log entry */ \n");
printf(" u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */ \n");
printf(" u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */ \n");
printf(" u_int64_t cachetable_hit; /* how many cache hits */ \n");
printf(" u_int64_t cachetable_miss; /* how many cache misses */ \n");
printf(" u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */ \n");
printf(" u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */ \n");
printf(" u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */ \n");
printf(" u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ \n");
printf(" u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ \n");
printf(" u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ \n");
printf(" u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ \n");
printf(" u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ \n");
printf(" u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */ \n");
printf(" uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */ \n");
printf(" uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */ \n");
printf(" uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */ \n");
printf(" uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */ \n");
printf(" uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */ \n");
printf(" uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */ \n");
printf(" uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */ \n");
printf(" uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */ \n");
printf(" u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */ \n");
printf(" u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */ \n");
printf(" u_int64_t range_locks_max; /* max total number of range locks */ \n");
printf(" u_int64_t range_locks_curr; /* total range locks currently in use */ \n");
printf(" u_int64_t range_locks_max_memory; /* max total bytes of range locks */ \n");
printf(" u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */ \n");
printf(" u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */ \n");
printf(" u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */ \n");
printf(" u_int64_t range_read_locks; /* total range read locks taken */ \n");
printf(" u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */ \n");
printf(" u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */ \n");
printf(" u_int64_t range_write_locks; /* total range write locks taken */ \n");
printf(" u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */ \n");
printf(" u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */ \n");
printf(" u_int64_t range_lt_create; /* number of locktrees created */ \n");
printf(" u_int64_t range_lt_create_fail; /* number of locktree create failures */ \n");
printf(" u_int64_t range_lt_destroy; /* number of locktrees destroyed */ \n");
printf(" u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */ \n");
printf(" u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */ \n");
printf(" u_int64_t directory_read_locks; /* total directory read locks taken */ \n");
printf(" u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */ \n");
printf(" u_int64_t directory_write_locks; /* total directory write locks taken */ \n");
printf(" u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */ \n");
printf(" u_int64_t inserts; /* ydb row insert operations */ \n");
printf(" u_int64_t inserts_fail; /* ydb row insert operations that failed */ \n");
printf(" u_int64_t deletes; /* ydb row delete operations */ \n");
printf(" u_int64_t deletes_fail; /* ydb row delete operations that failed */ \n");
printf(" u_int64_t updates; /* ydb row update operations */ \n");
printf(" u_int64_t updates_fail; /* ydb row update operations that failed */ \n");
printf(" u_int64_t updates_broadcast; /* ydb row update broadcast operations */ \n");
printf(" u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */ \n");
printf(" u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */ \n");
printf(" u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */ \n");
printf(" u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */ \n");
printf(" u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */ \n");
printf(" u_int64_t multi_updates; /* ydb row update operations, dictionary count */ \n");
printf(" u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */ \n");
printf(" u_int64_t point_queries; /* ydb point queries */ \n");
printf(" u_int64_t sequential_queries; /* ydb sequential queries */ \n");
printf(" u_int64_t num_db_open; /* number of db_open operations */\n");
printf(" u_int64_t num_db_close; /* number of db_close operations */\n");
printf(" u_int64_t num_open_dbs; /* number of currently open dbs */\n");
printf(" u_int64_t max_open_dbs; /* max number of simultaneously open dbs */\n");
printf(" u_int64_t le_updates; /* leafentry update operations */ \n");
printf(" u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */ \n");
printf(" u_int64_t descriptor_set; /* descriptor set operations */ \n");
printf(" u_int64_t partial_fetch_hit; /* node partition is present */ \n");
printf(" u_int64_t partial_fetch_miss; /* node is present but partition is absent */ \n");
printf(" u_int64_t partial_fetch_compressed; /* node partition is present but compressed */ \n");
printf(" u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */ \n");
printf(" u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */ \n");
printf(" u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */ \n");
printf(" u_int64_t max_workdone; /* max workdone value of any buffer */ \n");
printf(" uint64_t total_searches; /* total number of searches */ \n");
printf(" uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */ \n");
printf(" uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */ \n");
printf(" uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */ \n");
printf(" uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */ \n");
printf(" uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */ \n");
printf(" uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */ \n");
printf(" uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */\n");
printf(" uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */\n");
printf(" uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */\n");
printf(" uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */\n");
printf(" uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */\n");
printf(" uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */\n");
printf(" uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */\n");
printf(" uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */\n");
printf(" uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */\n");
printf(" uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */\n");
printf(" uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */\n");
printf(" uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the \"flush from root\" process to merge a leaf node */\n");
printf(" uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */\n");
printf(" uint64_t flush_in_memory; /* number of in memory flushes */\n");
printf(" uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */\n");
printf(" uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */\n");
printf(" uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */\n");
printf(" uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */\n");
printf(" uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */\n");
printf(" uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */\n");
printf(" uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */\n");
printf(" uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */\n");
printf(" uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */\n");
printf(" uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */\n");
printf(" uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */\n");
printf(" uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */\n");
printf(" uint64_t create_leaf; /* number of leaf nodes created */\n");
printf(" uint64_t create_nonleaf; /* number of nonleaf nodes created */\n");
printf(" uint64_t destroy_leaf; /* number of leaf nodes destroyed */\n");
printf(" uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */\n");
printf(" uint64_t split_leaf; /* number of leaf nodes split */\n");
printf(" uint64_t split_nonleaf; /* number of nonleaf nodes split */\n");
printf(" uint64_t merge_leaf; /* number of times leaf nodes are merged */\n");
printf(" uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */\n");
printf(" uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */\n");
printf(" uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */\n");
printf(" uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */\n");
printf(" uint64_t hot_num_started; /* number of HOT operations that have begun */\n");
printf(" uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */\n");
printf(" uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */\n");
printf(" uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */\n");
printf(" uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/\n");
printf(" uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/\n");
printf(" uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/\n");
printf(" uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/\n");
printf(" uint64_t msg_num; /* how many messages injected at root*/\n");
printf(" uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/\n");
printf(" uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */\n");
printf(" uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_basements_decompressed_prefetch;\n");
printf(" uint64_t num_basements_decompressed_write;\n");
printf(" uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */\n");
printf(" uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_msg_buffer_decompressed_prefetch;\n");
printf(" uint64_t num_msg_buffer_decompressed_write;\n");
printf(" uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */\n");
printf(" uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */\n");
printf(" uint64_t num_pivots_fetched_write; /* ... for a write */\n");
printf(" uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */\n");
printf(" uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_basements_fetched_prefetch;\n");
printf(" uint64_t num_basements_fetched_write;\n");
printf(" uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */\n");
printf(" uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */\n");
printf(" uint64_t num_msg_buffer_fetched_prefetch;\n");
printf(" uint64_t num_msg_buffer_fetched_write;\n");
printf(" u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */ \n");
printf(" u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */ \n");
printf(" u_int64_t le_max_memsize; /* max memsize of any packed le */ \n");
printf(" u_int64_t le_expanded; /* number of times ule used expanded memory */ \n");
printf(" u_int64_t fsync_count; /* number of times fsync performed */ \n");
printf(" u_int64_t fsync_time; /* total time required to fsync */ \n");
printf(" u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */ \n");
printf(" u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */ \n");
printf(" u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */ \n");
printf(" char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */ \n");
printf(" u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */ \n");
printf(" u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */ \n");
printf(" u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */ \n");
printf(" u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */ \n");
printf(" u_int64_t loader_create; /* number of loaders created */ \n");
printf(" u_int64_t loader_create_fail; /* number of failed loader creations */ \n");
printf(" u_int64_t loader_put; /* number of loader puts (success) */ \n");
printf(" u_int64_t loader_put_fail; /* number of loader puts that failed */ \n");
printf(" u_int64_t loader_close; /* number of loaders closed (succeed or fail) */ \n");
printf(" u_int64_t loader_close_fail; /* number of loaders closed with error return */ \n");
printf(" u_int64_t loader_abort; /* number of loaders aborted */ \n");
printf(" u_int64_t loader_current; /* number of loaders currently existing */ \n");
printf(" u_int64_t loader_max; /* max number of loaders extant simultaneously */ \n");
printf(" u_int64_t logsuppress; /* number of times logging is suppressed */ \n");
printf(" u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */ \n");
printf(" u_int64_t indexer_create; /* number of indexers created successfully */ \n");
printf(" u_int64_t indexer_create_fail; /* number of failed indexer creations */ \n");
printf(" u_int64_t indexer_build; /* number of indexer build calls (succeeded) */ \n");
printf(" u_int64_t indexer_build_fail; /* number of indexers build calls with error return */ \n");
printf(" u_int64_t indexer_close; /* number of indexers closed successfully) */ \n");
printf(" u_int64_t indexer_close_fail; /* number of indexers closed with error return */ \n");
printf(" u_int64_t indexer_abort; /* number of indexers aborted */ \n");
printf(" u_int64_t indexer_current; /* number of indexers currently existing */ \n");
printf(" u_int64_t indexer_max; /* max number of indexers extant simultaneously */ \n");
printf(" u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */ \n");
printf(" u_int64_t upgrade_header; /* how many brt headers were upgraded? */ \n");
printf(" u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */ \n");
printf(" u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */ \n");
printf(" u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */ \n");
printf(" u_int64_t original_ver; /* original environment version */ \n");
printf(" u_int64_t ver_at_startup; /* environment version at startup */ \n");
printf(" u_int64_t last_lsn_v13; /* last lsn of version 13 environment */ \n");
printf(" char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */ \n");
printf(" u_int64_t env_panic; /* non-zero if environment is panicked */ \n");
printf(" u_int64_t logger_panic; /* non-zero if logger is panicked */ \n");
printf(" u_int64_t logger_panic_errno; /* non-zero if environment is panicked */ \n");
printf(" uint64_t malloc_count; /* number of malloc operations */ \n");
printf(" uint64_t free_count; /* number of free operations */ \n");
printf(" uint64_t realloc_count; /* number of realloc operations */ \n");
printf(" uint64_t malloc_fail; /* number of failed malloc operations */ \n");
printf(" uint64_t realloc_fail; /* number of failed realloc operations */ \n");
printf(" uint64_t mem_requested; /* number of bytes requested via malloc/realloc */ \n");
printf(" uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */ \n");
printf(" uint64_t mem_freed; /* number of bytes freed */ \n");
printf(" uint64_t max_mem_in_use; /* estimated max value of (used - freed) */ \n");
printf(" uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */ \n");
printf(" const char * mallocator_version; /* version string from malloc lib */ \n");
printf("} ENGINE_STATUS;\n");
// Filesystem redzone state
printf("typedef enum { \n");
printf(" FS_GREEN = 0, // green zone (we have lots of space) \n");
printf(" FS_YELLOW = 1, // yellow zone (issue warning but allow operations) \n");
printf(" FS_RED = 2, // red zone (prevent insert operations) \n");
printf(" FS_BLOCKED = 3 // For reporting engine status, completely blocked \n");
printf("} fs_redzone_state;\n");
// engine status info
// engine status is passed to handlerton as an array of TOKU_ENGINE_STATUS_ROW_S[]
printf("typedef enum {\n");
printf(" FS_STATE = 0, // interpret as file system state (redzone) enum \n");
printf(" UINT64, // interpret as uint64_t \n");
printf(" CHARSTR, // interpret as char * \n");
printf(" UNIXTIME, // interpret as time_t \n");
printf(" TOKUTIME // interpret as tokutime_t \n");
printf("} toku_engine_status_display_type; \n");
printf("typedef struct __toku_engine_status_row {\n");
printf(" char * keyname; // info schema key, should not change across revisions without good reason \n");
printf(" char * legend; // the text that will appear at user interface \n");
printf(" toku_engine_status_display_type type; // how to interpret the value \n");
printf(" union { \n");
printf(" uint64_t num; \n");
printf(" char * str; \n");
printf(" } value; \n");
printf("} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S; \n");
print_dbtype();
// print_db_notices();
@ -725,46 +502,48 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
//print_struct("db_btree_stat", 0, db_btree_stat_fields32, db_btree_stat_fields64, sizeof(db_btree_stat_fields32)/sizeof(db_btree_stat_fields32[0]), 0);
assert(sizeof(db_env_fields32)==sizeof(db_env_fields64));
{
const char *extra[]={
"int (*checkpointing_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */",
"int (*checkpointing_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
"int (*cleaner_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_set_iterations) (DB_ENV*, u_int32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
"int (*cleaner_get_iterations) (DB_ENV*, u_int32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
"int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */",
"int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
"int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */",
"int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */",
"int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */",
"int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */",
"int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);",
"int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */",
"int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags)",
"int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags)",
"int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" const DBT *src_key, const DBT *src_val,\n"
" uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */",
"int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put)",
"int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" const DBT *src_key, const DBT *src_val,\n"
" uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */",
"int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del)",
"int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" DBT *old_src_key, DBT *old_src_data,\n"
" DBT *new_src_key, DBT *new_src_data,\n"
" uint32_t num_dbs, DB **db_array, uint32_t *flags_array,\n"
" uint32_t num_keys, DBT *keys,\n"
" uint32_t num_vals, DBT *vals) /* update multiple DBs */",
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
"int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max)",
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
"int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec)",
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
NULL};
const char *extra[]= {
"int (*checkpointing_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic checkpoints. 0 means disabled. */",
"int (*checkpointing_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic checkpoints. 0 means disabled. */",
"int (*cleaner_set_period) (DB_ENV*, u_int32_t) /* Change the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_get_period) (DB_ENV*, u_int32_t*) /* Retrieve the delay between automatic cleaner attempts. 0 means disabled. */",
"int (*cleaner_set_iterations) (DB_ENV*, u_int32_t) /* Change the number of attempts on each cleaner invokation. 0 means disabled. */",
"int (*cleaner_get_iterations) (DB_ENV*, u_int32_t*) /* Retrieve the number of attempts on each cleaner invokation. 0 means disabled. */",
"int (*checkpointing_postpone) (DB_ENV*) /* Use for 'rename table' or any other operation that must be disjoint from a checkpoint */",
"int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */",
"int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */",
"int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */",
"int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */",
"int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */",
"int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */",
"int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */",
"int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);",
"int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */",
"int (*create_loader) (DB_ENV *env, DB_TXN *txn, DB_LOADER **blp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t dbt_flags[/*N*/], uint32_t loader_flags)",
"int (*create_indexer) (DB_ENV *env, DB_TXN *txn, DB_INDEXER **idxrp, DB *src_db, int N, DB *dbs[/*N*/], uint32_t db_flags[/*N*/], uint32_t indexer_flags)",
"int (*put_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" const DBT *src_key, const DBT *src_val,\n"
" uint32_t num_dbs, DB **db_array, DBT *keys, DBT *vals, uint32_t *flags_array) /* insert into multiple DBs */",
"int (*set_generate_row_callback_for_put) (DB_ENV *env, generate_row_for_put_func generate_row_for_put)",
"int (*del_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" const DBT *src_key, const DBT *src_val,\n"
" uint32_t num_dbs, DB **db_array, DBT *keys, uint32_t *flags_array) /* delete from multiple DBs */",
"int (*set_generate_row_callback_for_del) (DB_ENV *env, generate_row_for_del_func generate_row_for_del)",
"int (*update_multiple) (DB_ENV *env, DB *src_db, DB_TXN *txn,\n"
" DBT *old_src_key, DBT *old_src_data,\n"
" DBT *new_src_key, DBT *new_src_data,\n"
" uint32_t num_dbs, DB **db_array, uint32_t *flags_array,\n"
" uint32_t num_keys, DBT *keys,\n"
" uint32_t num_vals, DBT *vals) /* update multiple DBs */",
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
"int (*set_lk_max_memory) (DB_ENV *env, uint64_t max)",
"int (*get_lk_max_memory) (DB_ENV *env, uint64_t *max)",
"void (*set_update) (DB_ENV *env, int (*update_function)(DB *, const DBT *key, const DBT *old_val, const DBT *extra, void (*set_val)(const DBT *new_val, void *set_extra), void *set_extra))",
"int (*set_lock_timeout) (DB_ENV *env, uint64_t lock_wait_time_msec)",
"int (*get_lock_timeout) (DB_ENV *env, uint64_t *lock_wait_time_msec)",
NULL
};
print_struct("db_env", 1, db_env_fields32, db_env_fields64, sizeof(db_env_fields32)/sizeof(db_env_fields32[0]), extra);
}

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -432,9 +204,10 @@ struct __toku_db_env {
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;

View file

@ -67,256 +67,28 @@ struct __toku_indexer {
int (*close)(DB_INDEXER *indexer); /* finish indexing, free memory */
int (*abort)(DB_INDEXER *indexer); /* abort indexing, free memory */
};
typedef struct __toku_engine_status {
char creationtime[26]; /* time of environment creation */
char startuptime[26]; /* time of engine startup */
char now[26]; /* time of engine status query (i.e. now) */
u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released? */
u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder if any)? */
u_int64_t max_waiters; /* The maximum of num_waiters_now. */
u_int64_t total_sleep_time; /* Total time spent (since the system was booted) sleeping (by the indexer) to give foreground threads a chance to work. */
u_int64_t max_time_ydb_lock_held; /* Maximum time that the ydb lock was held (tokutime_t). */
u_int64_t total_time_ydb_lock_held;/* Total time client threads held the ydb lock (really tokutime_t, convert to seconds with tokutime_to_seconds()) */
u_int64_t total_time_since_start; /* Total time since the lock was created (tokutime_t). Use this as total_time_ydb_lock_held/total_time_since_start to get a ratio. */
u_int64_t checkpoint_period; /* delay between automatic checkpoints */
u_int64_t checkpoint_footprint; /* state of checkpoint procedure */
char checkpoint_time_begin[26]; /* time of last checkpoint begin */
char checkpoint_time_begin_complete[26]; /* time of last complete checkpoint begin */
char checkpoint_time_end[26]; /* time of last checkpoint end */
uint64_t checkpoint_last_lsn; /* LSN of last complete checkpoint */
uint64_t checkpoint_count; /* number of checkpoints taken */
uint64_t checkpoint_count_fail; /* number of checkpoints failed */
uint64_t checkpoint_waiters_now; /* number of threads currently waiting to perform a checkpoint */
uint64_t checkpoint_waiters_max; /* max threads ever simultaneously waiting to perform a checkpoint */
uint64_t checkpoint_client_wait_on_mo; /* how many times a client thread waited for the multi_operation lock */
uint64_t checkpoint_client_wait_on_cs; /* how many times a client thread waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_cs; /* how many times a scheduled checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_client_cs; /* how many times a client checkpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_txn_cs; /* how many times a txn_commitcheckpoint waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_other_cs; /* how many times a checkpoint for another purpose waited for the checkpoint_safe lock */
uint64_t checkpoint_wait_sched_mo; /* how many times a scheduled checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_client_mo; /* how many times a client checkpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_txn_mo; /* how many times a txn_commitcheckpoint waited for the multi_operation lock */
uint64_t checkpoint_wait_other_mo; /* how many times a checkpoint for another purpose waited for the multi_operation lock */
u_int64_t cleaner_period; /* delay between executions of cleaner */
u_int64_t cleaner_iterations; /* number of nodes to flush per cleaner execution */
u_int64_t txn_begin; /* number of transactions ever begun */
u_int64_t txn_commit; /* txn commit operations */
u_int64_t txn_abort; /* txn abort operations */
u_int64_t txn_close; /* txn completions (should equal commit+abort) */
u_int64_t txn_num_open; /* should be begin - close */
u_int64_t txn_max_open; /* max value of num_open */
u_int64_t txn_oldest_live; /* oldest extant txn txnid */
char txn_oldest_live_starttime[26]; /* oldest extant txn start time */
u_int64_t next_lsn; /* lsn that will be assigned to next log entry */
u_int64_t cachetable_lock_taken; /* how many times has cachetable lock been taken */
u_int64_t cachetable_lock_released;/* how many times has cachetable lock been released */
u_int64_t cachetable_hit; /* how many cache hits */
u_int64_t cachetable_miss; /* how many cache misses */
u_int64_t cachetable_misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
u_int64_t maybe_get_and_pin_hits; /* how many times has get_and_pin(_clean) returned with a node */
uint64_t cachetable_size_current; /* sum of the sizes of the nodes represented in the cachetable */
uint64_t cachetable_size_limit; /* the limit to the sum of the node sizes */
uint64_t cachetable_size_max; /* the max value (high water mark) of cachetable_size_current */
uint64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
uint64_t cachetable_size_nonleaf; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_leaf; /* the number of bytes of leaf nodes */
uint64_t cachetable_size_rollback; /* the number of bytes of nonleaf nodes */
uint64_t cachetable_size_cachepressure; /* number of bytes causing cache pressure (sum of buffers and workdone counters) */
u_int64_t cachetable_evictions; /* how many cache table blocks are evicted */
u_int64_t cleaner_executions; /* how many times the loop in cleaner_thread has executed */
u_int64_t range_locks_max; /* max total number of range locks */
u_int64_t range_locks_curr; /* total range locks currently in use */
u_int64_t range_locks_max_memory; /* max total bytes of range locks */
u_int64_t range_locks_curr_memory; /* total bytes of range locks currently in use */
u_int64_t range_lock_escalation_successes; /* number of times range locks escalation succeeded */
u_int64_t range_lock_escalation_failures; /* number of times range locks escalation failed */
u_int64_t range_read_locks; /* total range read locks taken */
u_int64_t range_read_locks_fail; /* total range read locks unable to be taken */
u_int64_t range_out_of_read_locks; /* total times range read locks exhausted */
u_int64_t range_write_locks; /* total range write locks taken */
u_int64_t range_write_locks_fail; /* total range write locks unable to be taken */
u_int64_t range_out_of_write_locks; /* total times range write locks exhausted */
u_int64_t range_lt_create; /* number of locktrees created */
u_int64_t range_lt_create_fail; /* number of locktree create failures */
u_int64_t range_lt_destroy; /* number of locktrees destroyed */
u_int64_t range_lt_num; /* number of locktrees (should be created - destroyed) */
u_int64_t range_lt_num_max; /* max number of locktrees that have existed simultaneously */
u_int64_t directory_read_locks; /* total directory read locks taken */
u_int64_t directory_read_locks_fail; /* total directory read locks unable to be taken */
u_int64_t directory_write_locks; /* total directory write locks taken */
u_int64_t directory_write_locks_fail; /* total directory write locks unable to be taken */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t inserts_fail; /* ydb row insert operations that failed */
u_int64_t deletes; /* ydb row delete operations */
u_int64_t deletes_fail; /* ydb row delete operations that failed */
u_int64_t updates; /* ydb row update operations */
u_int64_t updates_fail; /* ydb row update operations that failed */
u_int64_t updates_broadcast; /* ydb row update broadcast operations */
u_int64_t updates_broadcast_fail; /* ydb row update broadcast operations that failed */
u_int64_t multi_inserts; /* ydb multi_row insert operations, dictionaray count */
u_int64_t multi_inserts_fail; /* ydb multi_row insert operations that failed, dictionary count */
u_int64_t multi_deletes; /* ydb multi_row delete operations, dictionary count */
u_int64_t multi_deletes_fail; /* ydb multi_row delete operations that failed, dictionary count */
u_int64_t multi_updates; /* ydb row update operations, dictionary count */
u_int64_t multi_updates_fail; /* ydb row update operations that failed, dictionary count */
u_int64_t point_queries; /* ydb point queries */
u_int64_t sequential_queries; /* ydb sequential queries */
u_int64_t num_db_open; /* number of db_open operations */
u_int64_t num_db_close; /* number of db_close operations */
u_int64_t num_open_dbs; /* number of currently open dbs */
u_int64_t max_open_dbs; /* max number of simultaneously open dbs */
u_int64_t le_updates; /* leafentry update operations */
u_int64_t le_updates_broadcast; /* leafentry update broadcast operations */
u_int64_t descriptor_set; /* descriptor set operations */
u_int64_t partial_fetch_hit; /* node partition is present */
u_int64_t partial_fetch_miss; /* node is present but partition is absent */
u_int64_t partial_fetch_compressed; /* node partition is present but compressed */
u_int64_t partial_evictions_nonleaf; /* number of nonleaf node partial evictions */
u_int64_t partial_evictions_leaf; /* number of leaf node partial evictions */
u_int64_t msn_discards; /* how many messages were ignored by leaf because of msn */
u_int64_t max_workdone; /* max workdone value of any buffer */
uint64_t total_searches; /* total number of searches */
uint64_t total_retries; /* total number of search retries due to TRY_AGAIN */
uint64_t max_search_excess_retries; /* max number of excess search retries (retries - treeheight) due to TRY_AGAIN */
uint64_t max_search_root_tries; /* max number of times root node was fetched in a single search */
uint64_t search_root_retries; /* number of searches that required the root node to be fetched more than once */
uint64_t search_tries_gt_height; /* number of searches that required more tries than the height of the tree */
uint64_t search_tries_gt_heightplus3; /* number of searches that required more tries than the height of the tree plus three */
uint64_t cleaner_total_nodes; /* total number of nodes whose buffers are potentially flushed by cleaner thread */
uint64_t cleaner_h1_nodes; /* number of nodes of height one whose message buffers are flushed by cleaner thread */
uint64_t cleaner_hgt1_nodes; /* number of nodes of height > 1 whose message buffers are flushed by cleaner thread */
uint64_t cleaner_empty_nodes; /* number of nodes that are selected by cleaner, but whose buffers are empty */
uint64_t cleaner_nodes_dirtied; /* number of nodes that are made dirty by the cleaner thread */
uint64_t cleaner_max_buffer_size; /* max number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_size; /* min number of bytes in message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_size; /* total number of bytes in message buffers flushed by cleaner thread */
uint64_t cleaner_max_buffer_workdone; /* max workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_min_buffer_workdone; /* min workdone value of any message buffer flushed by cleaner thread */
uint64_t cleaner_total_buffer_workdone; /* total workdone value of message buffers flushed by cleaner thread */
uint64_t cleaner_num_leaf_merges_started; /* number of times cleaner thread tries to merge a leaf */
uint64_t cleaner_num_leaf_merges_running; /* number of cleaner thread leaf merges in progress */
uint64_t cleaner_num_leaf_merges_completed; /* number of times cleaner thread successfully merges a leaf */
uint64_t cleaner_num_dirtied_for_leaf_merge; /* nodes dirtied by the "flush from root" process to merge a leaf node */
uint64_t flush_total; /* total number of flushes done by flusher threads or cleaner threads */
uint64_t flush_in_memory; /* number of in memory flushes */
uint64_t flush_needed_io; /* number of flushes that had to read a child (or part) off disk */
uint64_t flush_cascades; /* number of flushes that triggered another flush in the child */
uint64_t flush_cascades_1; /* number of flushes that triggered 1 cascading flush */
uint64_t flush_cascades_2; /* number of flushes that triggered 2 cascading flushes */
uint64_t flush_cascades_3; /* number of flushes that triggered 3 cascading flushes */
uint64_t flush_cascades_4; /* number of flushes that triggered 4 cascading flushes */
uint64_t flush_cascades_5; /* number of flushes that triggered 5 cascading flushes */
uint64_t flush_cascades_gt_5; /* number of flushes that triggered more than 5 cascading flushes */
uint64_t disk_flush_leaf; /* number of leaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_nonleaf; /* number of nonleaf nodes flushed to disk, not for checkpoint */
uint64_t disk_flush_leaf_for_checkpoint; /* number of leaf nodes flushed to disk for checkpoint */
uint64_t disk_flush_nonleaf_for_checkpoint; /* number of nonleaf nodes flushed to disk for checkpoint */
uint64_t create_leaf; /* number of leaf nodes created */
uint64_t create_nonleaf; /* number of nonleaf nodes created */
uint64_t destroy_leaf; /* number of leaf nodes destroyed */
uint64_t destroy_nonleaf; /* number of nonleaf nodes destroyed */
uint64_t split_leaf; /* number of leaf nodes split */
uint64_t split_nonleaf; /* number of nonleaf nodes split */
uint64_t merge_leaf; /* number of times leaf nodes are merged */
uint64_t merge_nonleaf; /* number of times nonleaf nodes are merged */
uint64_t dirty_leaf; /* number of times leaf nodes are dirtied when previously clean */
uint64_t dirty_nonleaf; /* number of times nonleaf nodes are dirtied when previously clean */
uint64_t balance_leaf; /* number of times a leaf node is balanced inside brt */
uint64_t hot_num_started; /* number of HOT operations that have begun */
uint64_t hot_num_completed; /* number of HOT operations that have successfully completed */
uint64_t hot_num_aborted; /* number of HOT operations that have been aborted */
uint64_t hot_max_root_flush_count; /* max number of flushes from root ever required to optimize a tree */
uint64_t msg_bytes_in; /* how many bytes of messages injected at root (for all trees)*/
uint64_t msg_bytes_out; /* how many bytes of messages flushed from h1 nodes to leaves*/
uint64_t msg_bytes_curr; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_bytes_max; /* how many bytes of messages currently in trees (estimate)*/
uint64_t msg_num; /* how many messages injected at root*/
uint64_t msg_num_broadcast; /* how many broadcast messages injected at root*/
uint64_t num_basements_decompressed_normal; /* how many basement nodes were decompressed because they were the target of a query */
uint64_t num_basements_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; /* how many msg buffers were decompressed because they were the target of a query */
uint64_t num_msg_buffer_decompressed_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; /* how many pivots were fetched were fetched for a query */
uint64_t num_pivots_fetched_prefetch; /* ... for a prefetch */
uint64_t num_pivots_fetched_write; /* ... for a write */
uint64_t num_basements_fetched_normal; /* how many basement nodes were fetched because they were the target of a query */
uint64_t num_basements_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; /* how many msg buffers were fetched because they were the target of a query */
uint64_t num_msg_buffer_fetched_aggressive; /* ... because they were between lc and rc */
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
u_int64_t le_max_committed_xr; /* max committed transaction records in any packed le */
u_int64_t le_max_provisional_xr; /* max provisional transaction records in any packed le */
u_int64_t le_max_memsize; /* max memsize of any packed le */
u_int64_t le_expanded; /* number of times ule used expanded memory */
u_int64_t fsync_count; /* number of times fsync performed */
u_int64_t fsync_time; /* total time required to fsync */
u_int64_t logger_ilock_ctr; /* how many times has logger input lock been taken or released */
u_int64_t logger_olock_ctr; /* how many times has logger output condition lock been taken or released */
u_int64_t logger_swap_ctr; /* how many times have logger buffers been swapped */
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_ctr; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_redzone_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_state; /* state of ydb-level ENOSPC prevention (0 = green, 1 = yellow, 2 = red) */
u_int64_t loader_create; /* number of loaders created */
u_int64_t loader_create_fail; /* number of failed loader creations */
u_int64_t loader_put; /* number of loader puts (success) */
u_int64_t loader_put_fail; /* number of loader puts that failed */
u_int64_t loader_close; /* number of loaders closed (succeed or fail) */
u_int64_t loader_close_fail; /* number of loaders closed with error return */
u_int64_t loader_abort; /* number of loaders aborted */
u_int64_t loader_current; /* number of loaders currently existing */
u_int64_t loader_max; /* max number of loaders extant simultaneously */
u_int64_t logsuppress; /* number of times logging is suppressed */
u_int64_t logsuppressfail; /* number of times logging cannot be suppressed */
u_int64_t indexer_create; /* number of indexers created successfully */
u_int64_t indexer_create_fail; /* number of failed indexer creations */
u_int64_t indexer_build; /* number of indexer build calls (succeeded) */
u_int64_t indexer_build_fail; /* number of indexers build calls with error return */
u_int64_t indexer_close; /* number of indexers closed successfully) */
u_int64_t indexer_close_fail; /* number of indexers closed with error return */
u_int64_t indexer_abort; /* number of indexers aborted */
u_int64_t indexer_current; /* number of indexers currently existing */
u_int64_t indexer_max; /* max number of indexers extant simultaneously */
u_int64_t upgrade_env_status; /* Was an environment upgrade done? What was done? */
u_int64_t upgrade_header; /* how many brt headers were upgraded? */
u_int64_t upgrade_nonleaf; /* how many brt nonleaf nodes were upgraded? */
u_int64_t upgrade_leaf; /* how many brt leaf nodes were upgraded? */
u_int64_t optimized_for_upgrade; /* how many optimized_for_upgrade messages were broadcast */
u_int64_t original_ver; /* original environment version */
u_int64_t ver_at_startup; /* environment version at startup */
u_int64_t last_lsn_v13; /* last lsn of version 13 environment */
char upgrade_v14_time[26]; /* timestamp of when upgrade to version 14 environment was done */
u_int64_t env_panic; /* non-zero if environment is panicked */
u_int64_t logger_panic; /* non-zero if logger is panicked */
u_int64_t logger_panic_errno; /* non-zero if environment is panicked */
uint64_t malloc_count; /* number of malloc operations */
uint64_t free_count; /* number of free operations */
uint64_t realloc_count; /* number of realloc operations */
uint64_t malloc_fail; /* number of failed malloc operations */
uint64_t realloc_fail; /* number of failed realloc operations */
uint64_t mem_requested; /* number of bytes requested via malloc/realloc */
uint64_t mem_used; /* number of bytes used (obtained from malloc_usable_size()) */
uint64_t mem_freed; /* number of bytes freed */
uint64_t max_mem_in_use; /* estimated max value of (used - freed) */
uint64_t malloc_mmap_threshold; /* threshold for malloc to use mmap */
const char * mallocator_version; /* version string from malloc lib */
} ENGINE_STATUS;
typedef enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent insert operations)
FS_BLOCKED = 3 // For reporting engine status, completely blocked
} fs_redzone_state;
typedef enum {
FS_STATE = 0, // interpret as file system state (redzone) enum
UINT64, // interpret as uint64_t
CHARSTR, // interpret as char *
UNIXTIME, // interpret as time_t
TOKUTIME // interpret as tokutime_t
} toku_engine_status_display_type;
typedef struct __toku_engine_status_row {
char * keyname; // info schema key, should not change across revisions without good reason
char * legend; // the text that will appear at user interface
toku_engine_status_display_type type; // how to interpret the value
union {
uint64_t num;
char * str;
} value;
} * TOKU_ENGINE_STATUS_ROW, TOKU_ENGINE_STATUS_ROW_S;
typedef enum {
DB_BTREE=1,
DB_UNKNOWN=5
@ -432,9 +204,10 @@ struct __toku_db_env {
int (*checkpointing_resume) (DB_ENV*) /* Alert tokudb 'postpone' is no longer necessary */;
int (*checkpointing_begin_atomic_operation) (DB_ENV*) /* Begin a set of operations (that must be atomic as far as checkpoints are concerned). i.e. inserting into every index in one table */;
int (*checkpointing_end_atomic_operation) (DB_ENV*) /* End a set of operations (that must be atomic as far as checkpoints are concerned). */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status) (DB_ENV*, ENGINE_STATUS*, char*, int) /* Fill in status struct, possibly env panic string */;
int (*set_default_bt_compare) (DB_ENV*,int (*bt_compare) (DB *, const DBT *, const DBT *)) /* Set default (key) comparison function for all DBs in this environment. Required for RECOVERY since you cannot open the DBs manually. */;
int (*get_engine_status_num_rows) (DB_ENV*, uint64_t*) /* return number of rows in engine status */;
void *app_private;
int (*get_engine_status) (DB_ENV*, TOKU_ENGINE_STATUS_ROW, uint64_t, fs_redzone_state*, uint64_t*, char*, int) /* Fill in status struct and redzone state, possibly env panic string */;
int (*get_engine_status_text) (DB_ENV*, char*, int) /* Fill in status text */;
int (*crash) (DB_ENV*, const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/);;
int (*get_iname) (DB_ENV* env, DBT* dname_dbt, DBT* iname_dbt) /* FOR TEST ONLY: lookup existing iname */;

View file

@ -2,7 +2,7 @@
.DEFAULT_GOAL=install
TOKUROOT=../
INCLUDEDIRS=-I.
INCLUDEDIRS=-I. -I$(TOKUROOT)/include
include $(TOKUROOT)toku_include/Makefile.include
OPT_AROPT=-qnoipo #Disable ipo for lib creation even when optimization is on.

View file

@ -2,6 +2,8 @@
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#include <toku_portability.h>
#include "db.h" // get Toku-specific version of db.h
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
@ -16,7 +18,51 @@ static free_fun_t t_free = 0;
static realloc_fun_t t_realloc = 0;
static realloc_fun_t t_xrealloc = 0;
static MEMORY_STATUS_S status;
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static MEMORY_STATUS_S memory_status;
static volatile uint64_t max_in_use; // maximum memory footprint (used - freed), approximate (not worth threadsafety overhead for exact, but worth keeping as volatile)
#define STATUS_INIT(k,t,l) { \
memory_status.status[k].keyname = #k; \
memory_status.status[k].type = t; \
memory_status.status[k].legend = "memory: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(MEMORY_MALLOC_COUNT, UINT64, "number of malloc operations");
STATUS_INIT(MEMORY_FREE_COUNT, UINT64, "number of free operations");
STATUS_INIT(MEMORY_REALLOC_COUNT, UINT64, "number of realloc operations");
STATUS_INIT(MEMORY_MALLOC_FAIL, UINT64, "number of malloc operations that failed");
STATUS_INIT(MEMORY_REALLOC_FAIL, UINT64, "number of realloc operations that failed" );
STATUS_INIT(MEMORY_REQUESTED, UINT64, "number of bytes requested");
STATUS_INIT(MEMORY_USED, UINT64, "number of bytes used (requested + overhead)");
STATUS_INIT(MEMORY_FREED, UINT64, "number of bytes freed");
STATUS_INIT(MEMORY_MAX_IN_USE, UINT64, "estimated maximum memory footprint");
STATUS_INIT(MEMORY_MALLOCATOR_VERSION, CHARSTR, "mallocator version");
STATUS_INIT(MEMORY_MMAP_THRESHOLD, UINT64, "mmap threshold");
memory_status.initialized = 1; // TODO 2949 Make this a bool, set to true
}
#undef STATUS_INIT
#define STATUS_VALUE(x) memory_status.status[x].value.num
void
toku_memory_get_status(MEMORY_STATUS statp) {
if (!memory_status.initialized)
status_init();
STATUS_VALUE(MEMORY_MAX_IN_USE) = max_in_use;
*statp = memory_status;
}
#define STATUS_VERSION_STRING memory_status.status[MEMORY_MALLOCATOR_VERSION].value.str
int
toku_memory_startup(void) {
@ -26,8 +72,8 @@ toku_memory_startup(void) {
size_t mmap_threshold = 64 * 1024; // 64K and larger should be malloced with mmap().
int success = mallopt(M_MMAP_THRESHOLD, mmap_threshold);
if (success) {
status.mallocator_version = "libc";
status.mmap_threshold = mmap_threshold;
STATUS_VERSION_STRING = "libc";
STATUS_VALUE(MEMORY_MMAP_THRESHOLD) = mmap_threshold;
} else
result = EINVAL;
@ -38,14 +84,14 @@ toku_memory_startup(void) {
mallctl_fun_t mallctl_f;
mallctl_f = (mallctl_fun_t) dlsym(RTLD_DEFAULT, "mallctl");
if (mallctl_f) { // jemalloc is loaded
size_t version_length = sizeof status.mallocator_version;
result = mallctl_f("version", &status.mallocator_version, &version_length, NULL, 0);
size_t version_length = sizeof STATUS_VERSION_STRING;
result = mallctl_f("version", &STATUS_VERSION_STRING, &version_length, NULL, 0);
if (result == 0) {
size_t lg_chunk; // log2 of the mmap threshold
size_t lg_chunk_length = sizeof lg_chunk;
result = mallctl_f("opt.lg_chunk", &lg_chunk, &lg_chunk_length, NULL, 0);
if (result == 0)
status.mmap_threshold = 1 << lg_chunk;
STATUS_VALUE(MEMORY_MMAP_THRESHOLD) = 1 << lg_chunk;
}
}
@ -56,11 +102,6 @@ void
toku_memory_shutdown(void) {
}
void
toku_memory_get_status(MEMORY_STATUS s) {
*s = status;
}
// jemalloc's malloc_usable_size does not work with a NULL pointer, so we implement a version that works
static size_t
my_malloc_usable_size(void *p) {
@ -71,16 +112,16 @@ my_malloc_usable_size(void *p) {
// It is not worth the overhead to make it completely accurate, but
// this logic is intended to guarantee that it increases monotonically.
// Note that status.sum_used and status.sum_freed increase monotonically
// and that status.max_in_use is declared volatile.
// and that max_in_use is declared volatile.
static inline void
set_max(uint64_t sum_used, uint64_t sum_freed) {
if (sum_used >= sum_freed) {
uint64_t in_use = sum_used - sum_freed;
uint64_t old_max;
do {
old_max = status.max_in_use;
old_max = max_in_use;
} while (old_max < in_use &&
!__sync_bool_compare_and_swap(&status.max_in_use, old_max, in_use));
!__sync_bool_compare_and_swap(&max_in_use, old_max, in_use));
}
}
@ -92,7 +133,7 @@ toku_memory_footprint(void * p, size_t touched) {
pagesize = sysconf(_SC_PAGESIZE);
if (p) {
size_t usable = my_malloc_usable_size(p);
if (usable >= status.mmap_threshold) {
if (usable >= STATUS_VALUE(MEMORY_MMAP_THRESHOLD)) {
int num_pages = (touched + pagesize) / pagesize;
rval = num_pages * pagesize;
}
@ -108,12 +149,12 @@ toku_malloc(size_t size) {
void *p = t_malloc ? t_malloc(size) : os_malloc(size);
if (p) {
size_t used = my_malloc_usable_size(p);
__sync_add_and_fetch(&status.malloc_count, 1);
__sync_add_and_fetch(&status.requested,size);
__sync_add_and_fetch(&status.used, used);
set_max(status.used, status.freed);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_MALLOC_COUNT), 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REQUESTED), size);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_USED), used);
set_max(STATUS_VALUE(MEMORY_USED), STATUS_VALUE(MEMORY_FREED));
} else {
__sync_add_and_fetch(&status.malloc_fail, 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_MALLOC_FAIL), 1);
}
return p;
}
@ -132,13 +173,13 @@ toku_realloc(void *p, size_t size) {
void *q = t_realloc ? t_realloc(p, size) : os_realloc(p, size);
if (q) {
size_t used = my_malloc_usable_size(q);
__sync_add_and_fetch(&status.realloc_count, 1);
__sync_add_and_fetch(&status.requested, size);
__sync_add_and_fetch(&status.used, used);
__sync_add_and_fetch(&status.freed, used_orig);
set_max(status.used, status.freed);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REALLOC_COUNT), 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REQUESTED), size);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_USED), used);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_FREED), used_orig);
set_max(STATUS_VALUE(MEMORY_USED), STATUS_VALUE(MEMORY_FREED));
} else {
__sync_add_and_fetch(&status.realloc_fail, 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REALLOC_FAIL), 1);
}
return q;
}
@ -159,8 +200,8 @@ void
toku_free(void *p) {
if (p) {
size_t used = my_malloc_usable_size(p);
__sync_add_and_fetch(&status.free_count, 1);
__sync_add_and_fetch(&status.freed, used);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_FREE_COUNT), 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_FREED), used);
if (t_free)
t_free(p);
else
@ -179,10 +220,10 @@ toku_xmalloc(size_t size) {
if (p == NULL) // avoid function call in common case
resource_assert(p);
size_t used = my_malloc_usable_size(p);
__sync_add_and_fetch(&status.malloc_count, 1);
__sync_add_and_fetch(&status.requested, size);
__sync_add_and_fetch(&status.used, used);
set_max(status.used, status.freed);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_MALLOC_COUNT), 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REQUESTED), size);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_USED), used);
set_max(STATUS_VALUE(MEMORY_USED), STATUS_VALUE(MEMORY_FREED));
return p;
}
@ -201,11 +242,11 @@ toku_xrealloc(void *v, size_t size) {
if (p == 0) // avoid function call in common case
resource_assert(p);
size_t used = my_malloc_usable_size(p);
__sync_add_and_fetch(&status.realloc_count, 1);
__sync_add_and_fetch(&status.requested, size);
__sync_add_and_fetch(&status.used, used);
__sync_add_and_fetch(&status.freed, used_orig);
set_max(status.used, status.freed);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REALLOC_COUNT), 1);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_REQUESTED), size);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_USED), used);
__sync_add_and_fetch(&STATUS_VALUE(MEMORY_FREED), used_orig);
set_max(STATUS_VALUE(MEMORY_USED), STATUS_VALUE(MEMORY_FREED));
return p;
}
@ -268,5 +309,5 @@ toku_set_func_free(free_fun_t f) {
void __attribute__((constructor)) toku_memory_drd_ignore(void);
void
toku_memory_drd_ignore(void) {
DRD_IGNORE_VAR(status);
DRD_IGNORE_VAR(memory_status);
}

View file

@ -18,14 +18,18 @@
static void *backtrace_pointers[N_POINTERS];
#endif
static uint64_t engine_status_num_rows = 0;
// Function pointers are zero by default so asserts can be used by brt-layer tests without an environment.
static int (*toku_maybe_get_engine_status_text_p)(char* buff, int buffsize) = 0;
static void (*toku_maybe_set_env_panic_p)(int code, char* msg) = 0;
void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int),
void (*toku_maybe_set_env_panic_pointer)(int, char*)) {
void (*toku_maybe_set_env_panic_pointer)(int, char*),
uint64_t num_rows) {
toku_maybe_get_engine_status_text_p = toku_maybe_get_engine_status_text_pointer;
toku_maybe_set_env_panic_p = toku_maybe_set_env_panic_pointer;
engine_status_num_rows = num_rows;
}
void (*do_assert_hook)(void) = NULL;
@ -44,10 +48,9 @@ toku_do_backtrace_abort(void) {
fflush(stderr);
if (toku_maybe_get_engine_status_text_p) {
int buffsize = 1024 * 32;
char buff[buffsize];
if (engine_status_num_rows && toku_maybe_get_engine_status_text_p) {
int buffsize = engine_status_num_rows * 128; // assume 128 characters per row (gross overestimate, should be safe)
char buff[buffsize];
toku_maybe_get_engine_status_text_p(buff, buffsize);
fprintf(stderr, "Engine status:\n%s\n", buff);
}

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
@ -8,14 +8,63 @@
#include <brt-flusher-internal.h>
#include <brt-cachetable-wrappers.h>
static BRT_FLUSHER_STATUS_S brt_flusher_status;
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
*/
static volatile BRT_FLUSHER_STATUS_S brt_flusher_status;
#define STATUS_INIT(k, t, l) { \
brt_flusher_status.status[k].keyname = #k; \
brt_flusher_status.status[k].type = t; \
brt_flusher_status.status[k].legend = "brt flusher: " l; \
}
#define STATUS_VALUE(x) brt_flusher_status.status[x].value.num
void toku_brt_flusher_status_init(void) {
brt_flusher_status.cleaner_min_buffer_size = UINT64_MAX;
brt_flusher_status.cleaner_min_buffer_workdone = UINT64_MAX;
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(BRT_FLUSHER_CLEANER_TOTAL_NODES, UINT64, "total nodes potentially flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_H1_NODES, UINT64, "height-one nodes flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_HGT1_NODES, UINT64, "height-greater-than-one nodes flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_EMPTY_NODES, UINT64, "nodes cleaned which had empty buffers");
STATUS_INIT(BRT_FLUSHER_CLEANER_NODES_DIRTIED, UINT64, "nodes dirtied by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, UINT64, "max bytes in a buffer flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_MIN_BUFFER_SIZE, UINT64, "min bytes in a buffer flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE, UINT64, "total bytes in buffers flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, UINT64, "max workdone in a buffer flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE, UINT64, "min workdone in a buffer flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE, UINT64, "total workdone in buffers flushed by cleaner thread");
STATUS_INIT(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, UINT64, "times cleaner thread tries to merge a leaf");
STATUS_INIT(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, UINT64, "cleaner thread leaf merges in progress");
STATUS_INIT(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, UINT64, "cleaner thread leaf merges successful");
STATUS_INIT(BRT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, UINT64, "nodes dirtied by cleaner thread leaf merges");
STATUS_INIT(BRT_FLUSHER_FLUSH_TOTAL, UINT64, "total number of flushes done by flusher threads or cleaner threads");
STATUS_INIT(BRT_FLUSHER_FLUSH_IN_MEMORY, UINT64, "number of in memory flushes");
STATUS_INIT(BRT_FLUSHER_FLUSH_NEEDED_IO, UINT64, "number of flushes that read something off disk");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES, UINT64, "number of flushes that triggered another flush in child");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_1, UINT64, "number of flushes that triggered 1 cascading flush");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_2, UINT64, "number of flushes that triggered 2 cascading flushes");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_3, UINT64, "number of flushes that triggered 3 cascading flushes");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_4, UINT64, "number of flushes that triggered 4 cascading flushes");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_5, UINT64, "number of flushes that triggered 5 cascading flushes");
STATUS_INIT(BRT_FLUSHER_FLUSH_CASCADES_GT_5, UINT64, "number of flushes that triggered over 5 cascading flushes");
STATUS_INIT(BRT_FLUSHER_SPLIT_LEAF, UINT64, "leaf node splits");
STATUS_INIT(BRT_FLUSHER_SPLIT_NONLEAF, UINT64, "nonleaf node splits");
STATUS_INIT(BRT_FLUSHER_MERGE_LEAF, UINT64, "leaf node merges");
STATUS_INIT(BRT_FLUSHER_MERGE_NONLEAF, UINT64, "nonleaf node merges");
STATUS_INIT(BRT_FLUSHER_BALANCE_LEAF, UINT64, "leaf node balances");
STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = UINT64_MAX;
STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = UINT64_MAX;
brt_flusher_status.initialized = true;
}
#undef STATUS_INIT
void toku_brt_flusher_get_status(BRT_FLUSHER_STATUS status) {
if (!brt_flusher_status.initialized) {
toku_brt_flusher_status_init();
}
*status = brt_flusher_status;
}
@ -65,22 +114,22 @@ find_heaviest_child(BRTNODE node)
static void
update_flush_status(BRTNODE child, int cascades) {
brt_flusher_status.flush_total++;
STATUS_VALUE(BRT_FLUSHER_FLUSH_TOTAL)++;
if (cascades > 0) {
brt_flusher_status.flush_cascades++;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES)++;
switch (cascades) {
case 1:
brt_flusher_status.flush_cascades_1++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_1)++; break;
case 2:
brt_flusher_status.flush_cascades_2++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_2)++; break;
case 3:
brt_flusher_status.flush_cascades_3++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_3)++; break;
case 4:
brt_flusher_status.flush_cascades_4++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_4)++; break;
case 5:
brt_flusher_status.flush_cascades_5++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_5)++; break;
default:
brt_flusher_status.flush_cascades_gt_5++; break;
STATUS_VALUE(BRT_FLUSHER_FLUSH_CASCADES_GT_5)++; break;
}
}
bool flush_needs_io = false;
@ -90,9 +139,9 @@ update_flush_status(BRTNODE child, int cascades) {
}
}
if (flush_needs_io) {
brt_flusher_status.flush_needed_io++;
STATUS_VALUE(BRT_FLUSHER_FLUSH_NEEDED_IO)++;
} else {
brt_flusher_status.flush_in_memory++;
STATUS_VALUE(BRT_FLUSHER_FLUSH_IN_MEMORY)++;
}
}
@ -275,7 +324,7 @@ ctm_update_status(
void* UU(extra)
)
{
brt_flusher_status.cleaner_num_dirtied_for_leaf_merge += dirtied;
STATUS_VALUE(BRT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE) += dirtied;
}
static void
@ -287,7 +336,7 @@ ctm_maybe_merge_child(struct flusher_advice *fa,
void *extra)
{
if (child->height == 0) {
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_completed, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED), 1);
}
default_merge_child(fa, h, parent, childnum, child, extra);
}
@ -357,12 +406,12 @@ ct_maybe_merge_child(struct flusher_advice *fa,
toku_brtheader_release_treelock(h);
}
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_started, 1);
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_running, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED), 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), 1);
flush_some_child(h, root_node, &new_fa);
(void) __sync_fetch_and_add(&brt_flusher_status.cleaner_num_leaf_merges_running, -1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING), -1);
toku_free(buf);
}
@ -375,7 +424,7 @@ ct_update_status(BRTNODE child,
{
struct flush_status_update_extra* fste = extra;
update_flush_status(child, fste->cascades);
brt_flusher_status.cleaner_nodes_dirtied += dirtied;
STATUS_VALUE(BRT_FLUSHER_CLEANER_NODES_DIRTIED) += dirtied;
// Incrementing this in case `flush_some_child` decides to recurse.
fste->cascades++;
}
@ -669,7 +718,7 @@ brtleaf_split(
{
invariant(node->height == 0);
brt_flusher_status.split_leaf++;
STATUS_VALUE(BRT_FLUSHER_SPLIT_LEAF)++;
if (node->n_children) {
// First move all the accumulated stat64info deltas into the first basement.
// After the split, either both nodes or neither node will be included in the next checkpoint.
@ -874,7 +923,7 @@ brt_nonleaf_split(
BRTNODE* dependent_nodes)
{
//VERIFY_NODE(t,node);
brt_flusher_status.split_nonleaf++;
STATUS_VALUE(BRT_FLUSHER_SPLIT_NONLEAF)++;
toku_assert_entire_node_in_memory(node);
int old_n_children = node->n_children;
int n_children_in_a = old_n_children/2;
@ -1036,7 +1085,7 @@ flush_this_child(
static void
merge_leaf_nodes(BRTNODE a, BRTNODE b)
{
brt_flusher_status.merge_leaf++;
STATUS_VALUE(BRT_FLUSHER_MERGE_LEAF)++;
toku_assert_entire_node_in_memory(a);
toku_assert_entire_node_in_memory(b);
assert(a->height == 0);
@ -1116,7 +1165,7 @@ balance_leaf_nodes(
// If b is bigger then move stuff from b to a until b is the smaller.
// If a is bigger then move stuff from a to b until a is the smaller.
{
brt_flusher_status.balance_leaf++;
STATUS_VALUE(BRT_FLUSHER_BALANCE_LEAF)++;
DBT splitk_dbt;
// first merge all the data into a
merge_leaf_nodes(a,b);
@ -1205,7 +1254,7 @@ maybe_merge_pinned_nonleaf_nodes(
*did_rebalance = FALSE;
*splitk = NULL;
brt_flusher_status.merge_nonleaf++;
STATUS_VALUE(BRT_FLUSHER_MERGE_NONLEAF)++;
}
static void
@ -1613,33 +1662,33 @@ update_cleaner_status(
BRTNODE node,
int childnum)
{
brt_flusher_status.cleaner_total_nodes++;
STATUS_VALUE(BRT_FLUSHER_CLEANER_TOTAL_NODES)++;
if (node->height == 1) {
brt_flusher_status.cleaner_h1_nodes++;
STATUS_VALUE(BRT_FLUSHER_CLEANER_H1_NODES)++;
} else {
brt_flusher_status.cleaner_hgt1_nodes++;
STATUS_VALUE(BRT_FLUSHER_CLEANER_HGT1_NODES)++;
}
unsigned int nbytesinbuf = toku_bnc_nbytesinbuf(BNC(node, childnum));
if (nbytesinbuf == 0) {
brt_flusher_status.cleaner_empty_nodes++;
STATUS_VALUE(BRT_FLUSHER_CLEANER_EMPTY_NODES)++;
} else {
if (nbytesinbuf > brt_flusher_status.cleaner_max_buffer_size) {
brt_flusher_status.cleaner_max_buffer_size = nbytesinbuf;
if (nbytesinbuf > STATUS_VALUE(BRT_FLUSHER_CLEANER_MAX_BUFFER_SIZE)) {
STATUS_VALUE(BRT_FLUSHER_CLEANER_MAX_BUFFER_SIZE) = nbytesinbuf;
}
if (nbytesinbuf < brt_flusher_status.cleaner_min_buffer_size) {
brt_flusher_status.cleaner_min_buffer_size = nbytesinbuf;
if (nbytesinbuf < STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_SIZE)) {
STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_SIZE) = nbytesinbuf;
}
brt_flusher_status.cleaner_total_buffer_size += nbytesinbuf;
STATUS_VALUE(BRT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE) += nbytesinbuf;
uint64_t workdone = BP_WORKDONE(node, childnum);
if (workdone > brt_flusher_status.cleaner_max_buffer_workdone) {
brt_flusher_status.cleaner_max_buffer_workdone = workdone;
if (workdone > STATUS_VALUE(BRT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE)) {
STATUS_VALUE(BRT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE) = workdone;
}
if (workdone < brt_flusher_status.cleaner_min_buffer_workdone) {
brt_flusher_status.cleaner_min_buffer_workdone = workdone;
if (workdone < STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE)) {
STATUS_VALUE(BRT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE) = workdone;
}
brt_flusher_status.cleaner_total_buffer_workdone += workdone;
STATUS_VALUE(BRT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE) += workdone;
}
}
@ -1844,3 +1893,5 @@ void
toku_brt_flusher_drd_ignore(void) {
DRD_IGNORE_VAR(brt_flusher_status);
}
#undef STATUS_VALUE

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ifndef BRT_FLUSHER
#define BRT_FLUSHER
#ident "$Id$"
@ -11,37 +11,43 @@
C_BEGIN
typedef struct brt_flusher_status {
uint64_t cleaner_total_nodes; // total number of nodes whose buffers are potentially flushed by cleaner thread
uint64_t cleaner_h1_nodes; // number of nodes of height one whose message buffers are flushed by cleaner thread
uint64_t cleaner_hgt1_nodes; // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
uint64_t cleaner_empty_nodes; // number of nodes that are selected by cleaner, but whose buffers are empty
uint64_t cleaner_nodes_dirtied; // number of nodes that are made dirty by the cleaner thread
uint64_t cleaner_max_buffer_size; // max number of bytes in message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_size;
uint64_t cleaner_total_buffer_size;
uint64_t cleaner_max_buffer_workdone; // max workdone value of any message buffer flushed by cleaner thread
uint64_t cleaner_min_buffer_workdone;
uint64_t cleaner_total_buffer_workdone;
uint64_t cleaner_num_leaf_merges_started; // number of times cleaner thread tries to merge a leaf
uint64_t cleaner_num_leaf_merges_running; // number of cleaner thread leaf merges in progress
uint64_t cleaner_num_leaf_merges_completed; // number of times cleaner thread successfully merges a leaf
uint64_t cleaner_num_dirtied_for_leaf_merge; // nodes dirtied by the "flush from root" process to merge a leaf node
uint64_t flush_total; // total number of flushes done by flusher threads or cleaner threads
uint64_t flush_in_memory; // number of in memory flushes
uint64_t flush_needed_io; // number of flushes that had to read a child (or part) off disk
uint64_t flush_cascades; // number of flushes that triggered another flush in the child
uint64_t flush_cascades_1; // number of flushes that triggered 1 cascading flush
uint64_t flush_cascades_2; // number of flushes that triggered 2 cascading flushes
uint64_t flush_cascades_3; // number of flushes that triggered 3 cascading flushes
uint64_t flush_cascades_4; // number of flushes that triggered 4 cascading flushes
uint64_t flush_cascades_5; // number of flushes that triggered 5 cascading flushes
uint64_t flush_cascades_gt_5; // number of flushes that triggered more than 5 cascading flushes
uint64_t split_leaf; // number of leaf nodes split
uint64_t split_nonleaf; // number of nonleaf nodes split
uint64_t merge_leaf; // number of times leaf nodes are merged
uint64_t merge_nonleaf; // number of times nonleaf nodes are merged
uint64_t balance_leaf; // number of times a leaf node is balanced inside brt
typedef enum {
BRT_FLUSHER_CLEANER_TOTAL_NODES = 0, // total number of nodes whose buffers are potentially flushed by cleaner thread
BRT_FLUSHER_CLEANER_H1_NODES, // number of nodes of height one whose message buffers are flushed by cleaner thread
BRT_FLUSHER_CLEANER_HGT1_NODES, // number of nodes of height > 1 whose message buffers are flushed by cleaner thread
BRT_FLUSHER_CLEANER_EMPTY_NODES, // number of nodes that are selected by cleaner, but whose buffers are empty
BRT_FLUSHER_CLEANER_NODES_DIRTIED, // number of nodes that are made dirty by the cleaner thread
BRT_FLUSHER_CLEANER_MAX_BUFFER_SIZE, // max number of bytes in message buffer flushed by cleaner thread
BRT_FLUSHER_CLEANER_MIN_BUFFER_SIZE,
BRT_FLUSHER_CLEANER_TOTAL_BUFFER_SIZE,
BRT_FLUSHER_CLEANER_MAX_BUFFER_WORKDONE, // max workdone value of any message buffer flushed by cleaner thread
BRT_FLUSHER_CLEANER_MIN_BUFFER_WORKDONE,
BRT_FLUSHER_CLEANER_TOTAL_BUFFER_WORKDONE,
BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_STARTED, // number of times cleaner thread tries to merge a leaf
BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_RUNNING, // number of cleaner thread leaf merges in progress
BRT_FLUSHER_CLEANER_NUM_LEAF_MERGES_COMPLETED, // number of times cleaner thread successfully merges a leaf
BRT_FLUSHER_CLEANER_NUM_DIRTIED_FOR_LEAF_MERGE, // nodes dirtied by the "flush from root" process to merge a leaf node
BRT_FLUSHER_FLUSH_TOTAL, // total number of flushes done by flusher threads or cleaner threads
BRT_FLUSHER_FLUSH_IN_MEMORY, // number of in memory flushes
BRT_FLUSHER_FLUSH_NEEDED_IO, // number of flushes that had to read a child (or part) off disk
BRT_FLUSHER_FLUSH_CASCADES, // number of flushes that triggered another flush in the child
BRT_FLUSHER_FLUSH_CASCADES_1, // number of flushes that triggered 1 cascading flush
BRT_FLUSHER_FLUSH_CASCADES_2, // number of flushes that triggered 2 cascading flushes
BRT_FLUSHER_FLUSH_CASCADES_3, // number of flushes that triggered 3 cascading flushes
BRT_FLUSHER_FLUSH_CASCADES_4, // number of flushes that triggered 4 cascading flushes
BRT_FLUSHER_FLUSH_CASCADES_5, // number of flushes that triggered 5 cascading flushes
BRT_FLUSHER_FLUSH_CASCADES_GT_5, // number of flushes that triggered more than 5 cascading flushes
BRT_FLUSHER_SPLIT_LEAF, // number of leaf nodes split
BRT_FLUSHER_SPLIT_NONLEAF, // number of nonleaf nodes split
BRT_FLUSHER_MERGE_LEAF, // number of times leaf nodes are merged
BRT_FLUSHER_MERGE_NONLEAF, // number of times nonleaf nodes are merged
BRT_FLUSHER_BALANCE_LEAF, // number of times a leaf node is balanced inside brt
BRT_FLUSHER_STATUS_NUM_ROWS
} brt_flusher_status_entry;
typedef struct {
bool initialized;
TOKU_ENGINE_STATUS_ROW_S status[BRT_FLUSHER_STATUS_NUM_ROWS];
} BRT_FLUSHER_STATUS_S, *BRT_FLUSHER_STATUS;
void toku_brt_flusher_status_init(void) __attribute__((__constructor__));
@ -132,11 +138,17 @@ brt_nonleaf_split(
************************************************************************
*/
typedef struct brt_hot_status {
uint64_t num_started; // number of HOT operations that have begun
uint64_t num_completed; // number of HOT operations that have successfully completed
uint64_t num_aborted; // number of HOT operations that have been aborted
uint64_t max_root_flush_count; // max number of flushes from root ever required to optimize a tree
typedef enum {
BRT_HOT_NUM_STARTED = 0, // number of HOT operations that have begun
BRT_HOT_NUM_COMPLETED, // number of HOT operations that have successfully completed
BRT_HOT_NUM_ABORTED, // number of HOT operations that have been aborted
BRT_HOT_MAX_ROOT_FLUSH_COUNT, // max number of flushes from root ever required to optimize a tree
BRT_HOT_STATUS_NUM_ROWS
} brt_hot_status_entry;
typedef struct {
bool initialized;
TOKU_ENGINE_STATUS_ROW_S status[BRT_HOT_STATUS_NUM_ROWS];
} BRT_HOT_STATUS_S, *BRT_HOT_STATUS;
void toku_brt_hot_status_init(void) __attribute__((__constructor__));

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2011 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
@ -28,16 +28,34 @@ struct hot_flusher_extra {
bool rightmost_leaf_seen;
};
static BRT_HOT_STATUS_S hot_status;
static volatile BRT_HOT_STATUS_S hot_status;
#define STATUS_INIT(k,t,l) { \
hot_status.status[k].keyname = #k; \
hot_status.status[k].type = t; \
hot_status.status[k].legend = "hot: " l; \
}
#define STATUS_VALUE(x) hot_status.status[x].value.num
void
toku_brt_hot_status_init(void)
{
DRD_IGNORE_VAR(hot_status.max_root_flush_count);
STATUS_INIT(BRT_HOT_NUM_STARTED, UINT64, "operations ever started");
STATUS_INIT(BRT_HOT_NUM_COMPLETED, UINT64, "operations successfully completed");
STATUS_INIT(BRT_HOT_NUM_ABORTED, UINT64, "operations aborted");
STATUS_INIT(BRT_HOT_MAX_ROOT_FLUSH_COUNT, UINT64, "max number of flushes from root ever required to optimize a tree");
DRD_IGNORE_VAR(STATUS_VALUE(BRT_HOT_MAX_ROOT_FLUSH_COUNT));
hot_status.initialized = true;
}
#undef STATUS_INIT
void
toku_brt_hot_get_status(BRT_HOT_STATUS s) {
if (!hot_status.initialized) {
toku_brt_hot_status_init();
}
*s = hot_status;
}
@ -229,7 +247,7 @@ toku_brt_hot_optimize(BRT brt,
uint64_t loop_count = 0;
MSN msn_at_start_of_hot = ZERO_MSN; // capture msn from root at
// start of HOT operation
(void) __sync_fetch_and_add(&hot_status.num_started, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_HOT_NUM_STARTED), 1);
{
toku_cachetable_call_ydb_lock(brt->h->cf);
@ -273,8 +291,8 @@ toku_brt_hot_optimize(BRT brt,
loop_count++;
if (loop_count > hot_status.max_root_flush_count) {
hot_status.max_root_flush_count = loop_count;
if (loop_count > STATUS_VALUE(BRT_HOT_MAX_ROOT_FLUSH_COUNT)) {
STATUS_VALUE(BRT_HOT_MAX_ROOT_FLUSH_COUNT) = loop_count;
}
// Initialize the maximum current key. We need to do this for
@ -338,10 +356,12 @@ toku_brt_hot_optimize(BRT brt,
}
if (success) {
(void) __sync_fetch_and_add(&hot_status.num_completed, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_HOT_NUM_COMPLETED), 1);
} else {
(void) __sync_fetch_and_add(&hot_status.num_aborted, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(BRT_HOT_NUM_ABORTED), 1);
}
}
return r;
}
#undef STATUS_VALUE

View file

@ -798,78 +798,97 @@ int toku_db_badformat(void) __attribute__((__warn_unused_result__));
int toku_brt_remove_on_commit(TOKUTXN child, DBT* iname_dbt_p) __attribute__((__warn_unused_result__));
int toku_brt_remove_now(CACHETABLE ct, DBT* iname_dbt_p) __attribute__((__warn_unused_result__));
typedef struct brt_upgrade_status {
u_int64_t header_13; // how many headers were upgraded from version 13
u_int64_t nonleaf_13;
u_int64_t leaf_13;
u_int64_t optimized_for_upgrade; // how many optimize_for_upgrade messages were sent
typedef enum {
BRT_UPGRADE_FOOTPRINT = 0,
BRT_UPGRADE_HEADER_13, // how many headers were upgraded from version 13
BRT_UPGRADE_NONLEAF_13,
BRT_UPGRADE_LEAF_13,
BRT_UPGRADE_OPTIMIZED_FOR_UPGRADE, // how many optimize_for_upgrade messages were sent
BRT_UPGRADE_STATUS_NUM_ROWS
} brt_upgrade_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[BRT_UPGRADE_STATUS_NUM_ROWS];
} BRT_UPGRADE_STATUS_S, *BRT_UPGRADE_STATUS;
void toku_brt_get_upgrade_status(BRT_UPGRADE_STATUS);
void toku_brt_upgrade_get_status(BRT_UPGRADE_STATUS);
typedef struct le_status {
u_int64_t max_committed_xr;
u_int64_t max_provisional_xr;
u_int64_t expanded;
u_int64_t max_memsize;
typedef enum {
LE_MAX_COMMITTED_XR = 0,
LE_MAX_PROVISIONAL_XR,
LE_EXPANDED,
LE_MAX_MEMSIZE,
LE_STATUS_NUM_ROWS
} le_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LE_STATUS_NUM_ROWS];
} LE_STATUS_S, *LE_STATUS;
void toku_le_get_status(LE_STATUS);
struct brt_status {
u_int64_t updates;
u_int64_t updates_broadcast;
u_int64_t descriptor_set;
u_int64_t partial_fetch_hit; // node partition is present
u_int64_t partial_fetch_miss; // node is present but partition is absent
u_int64_t partial_fetch_compressed; // node partition is present but compressed
u_int64_t partial_evictions_nonleaf; // number of nonleaf node partial evictions
u_int64_t partial_evictions_leaf; // number of leaf node partial evictions
u_int64_t msn_discards; // how many messages were ignored by leaf because of msn
u_int64_t max_workdone; // max workdone value of any buffer
uint64_t total_searches; // total number of searches
uint64_t total_retries; // total number of search retries due to TRY_AGAIN
uint64_t max_search_excess_retries; // max number of excess search retries (retries - treeheight) due to TRY_AGAIN
uint64_t max_search_root_tries; // max number of times root node was fetched in a single search
uint64_t search_root_retries; // number of searches that required the root node to be fetched more than once
uint64_t search_tries_gt_height; // number of searches that required more tries than the height of the tree
uint64_t search_tries_gt_heightplus3; // number of searches that required more tries than the height of the tree plus three
uint64_t disk_flush_leaf; // number of leaf nodes flushed to disk, not for checkpoint
uint64_t disk_flush_nonleaf; // number of nonleaf nodes flushed to disk, not for checkpoint
uint64_t disk_flush_leaf_for_checkpoint; // number of leaf nodes flushed to disk for checkpoint
uint64_t disk_flush_nonleaf_for_checkpoint; // number of nonleaf nodes flushed to disk for checkpoint
uint64_t create_leaf; // number of leaf nodes created
uint64_t create_nonleaf; // number of nonleaf nodes created
uint64_t destroy_leaf; // number of leaf nodes destroyed
uint64_t destroy_nonleaf; // number of nonleaf nodes destroyed
uint64_t dirty_leaf; // number of times leaf nodes are dirtied when previously clean
uint64_t dirty_nonleaf; // number of times nonleaf nodes are dirtied when previously clean
uint64_t msg_bytes_in; // how many bytes of messages injected at root (for all trees)
uint64_t msg_bytes_out; // how many bytes of messages flushed from h1 nodes to leaves
uint64_t msg_bytes_curr; // how many bytes of messages currently in trees (estimate)
uint64_t msg_bytes_max; // how many bytes of messages currently in trees (estimate)
uint64_t msg_num; // how many messages injected at root
uint64_t msg_num_broadcast; // how many broadcast messages injected at root
uint64_t num_basements_decompressed_normal; // how many basement nodes were decompressed because they were the target of a query
uint64_t num_basements_decompressed_aggressive; // ... because they were between lc and rc
uint64_t num_basements_decompressed_prefetch;
uint64_t num_basements_decompressed_write;
uint64_t num_msg_buffer_decompressed_normal; // how many msg buffers were decompressed because they were the target of a query
uint64_t num_msg_buffer_decompressed_aggressive; // ... because they were between lc and rc
uint64_t num_msg_buffer_decompressed_prefetch;
uint64_t num_msg_buffer_decompressed_write;
uint64_t num_pivots_fetched_query; // how many pivots were fetched for a query
uint64_t num_pivots_fetched_prefetch; // ... for a prefetch
uint64_t num_pivots_fetched_write; // ... for a write
uint64_t num_basements_fetched_normal; // how many basement nodes were fetched because they were the target of a query
uint64_t num_basements_fetched_aggressive; // ... because they were between lc and rc
uint64_t num_basements_fetched_prefetch;
uint64_t num_basements_fetched_write;
uint64_t num_msg_buffer_fetched_normal; // how many msg buffers were fetched because they were the target of a query
uint64_t num_msg_buffer_fetched_aggressive; // ... because they were between lc and rc
uint64_t num_msg_buffer_fetched_prefetch;
uint64_t num_msg_buffer_fetched_write;
};
typedef enum {
BRT_UPDATES = 0,
BRT_UPDATES_BROADCAST,
BRT_DESCRIPTOR_SET,
BRT_PARTIAL_FETCH_HIT, // node partition is present
BRT_PARTIAL_FETCH_MISS, // node is present but partition is absent
BRT_PARTIAL_FETCH_COMPRESSED, // node partition is present but compressed
BRT_PARTIAL_EVICTIONS_NONLEAF, // number of nonleaf node partial evictions
BRT_PARTIAL_EVICTIONS_LEAF, // number of leaf node partial evictions
BRT_MSN_DISCARDS, // how many messages were ignored by leaf because of msn
BRT_MAX_WORKDONE, // max workdone value of any buffer
BRT_TOTAL_SEARCHES, // total number of searches
BRT_TOTAL_RETRIES, // total number of search retries due to TRY_AGAIN
BRT_MAX_SEARCH_EXCESS_RETRIES, // max number of excess search retries (retries - treeheight) due to TRY_AGAIN
BRT_MAX_SEARCH_ROOT_TRIES, // max number of times root node was fetched in a single search
BRT_SEARCH_ROOT_RETRIES, // number of searches that required the root node to be fetched more than once
BRT_SEARCH_TRIES_GT_HEIGHT, // number of searches that required more tries than the height of the tree
BRT_SEARCH_TRIES_GT_HEIGHTPLUS3, // number of searches that required more tries than the height of the tree plus three
BRT_DISK_FLUSH_LEAF, // number of leaf nodes flushed to disk, not for checkpoint
BRT_DISK_FLUSH_NONLEAF, // number of nonleaf nodes flushed to disk, not for checkpoint
BRT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, // number of leaf nodes flushed to disk for checkpoint
BRT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, // number of nonleaf nodes flushed to disk for checkpoint
BRT_CREATE_LEAF, // number of leaf nodes created
BRT_CREATE_NONLEAF, // number of nonleaf nodes created
BRT_DESTROY_LEAF, // number of leaf nodes destroyed
BRT_DESTROY_NONLEAF, // number of nonleaf nodes destroyed
BRT_DIRTY_LEAF, // number of times leaf nodes are dirtied when previously clean
BRT_DIRTY_NONLEAF, // number of times nonleaf nodes are dirtied when previously clean
BRT_MSG_BYTES_IN, // how many bytes of messages injected at root (for all trees)
BRT_MSG_BYTES_OUT, // how many bytes of messages flushed from h1 nodes to leaves
BRT_MSG_BYTES_CURR, // how many bytes of messages currently in trees (estimate)
BRT_MSG_BYTES_MAX, // how many bytes of messages currently in trees (estimate)
BRT_MSG_NUM, // how many messages injected at root
BRT_MSG_NUM_BROADCAST, // how many broadcast messages injected at root
BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, // how many basement nodes were decompressed because they were the target of a query
BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH,
BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE,
BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, // how many msg buffers were decompressed because they were the target of a query
BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, // ... because they were between lc and rc
BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH,
BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE,
BRT_NUM_PIVOTS_FETCHED_QUERY, // how many pivots were fetched for a query
BRT_NUM_PIVOTS_FETCHED_PREFETCH, // ... for a prefetch
BRT_NUM_PIVOTS_FETCHED_WRITE, // ... for a write
BRT_NUM_BASEMENTS_FETCHED_NORMAL, // how many basement nodes were fetched because they were the target of a query
BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
BRT_NUM_BASEMENTS_FETCHED_PREFETCH,
BRT_NUM_BASEMENTS_FETCHED_WRITE,
BRT_NUM_MSG_BUFFER_FETCHED_NORMAL, // how many msg buffers were fetched because they were the target of a query
BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, // ... because they were between lc and rc
BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH,
BRT_NUM_MSG_BUFFER_FETCHED_WRITE,
BRT_STATUS_NUM_ROWS
} brt_status_entry;
typedef struct {
bool initialized;
TOKU_ENGINE_STATUS_ROW_S status[BRT_STATUS_NUM_ROWS];
} BRT_STATUS_S, *BRT_STATUS;
void toku_brt_get_status(BRT_STATUS);

View file

@ -18,12 +18,41 @@
#define cilk_worker_count 1
#endif
static BRT_UPGRADE_STATUS_S upgrade_status; // accountability, used in backwards_x.c
void
toku_brt_get_upgrade_status (BRT_UPGRADE_STATUS s) {
*s = upgrade_status;
static BRT_UPGRADE_STATUS_S brt_upgrade_status;
#define UPGRADE_STATUS_INIT(k,t,l) { \
brt_upgrade_status.status[k].keyname = #k; \
brt_upgrade_status.status[k].type = t; \
brt_upgrade_status.status[k].legend = "brt upgrade: " l; \
}
static void
status_init(void)
{
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
UPGRADE_STATUS_INIT(BRT_UPGRADE_FOOTPRINT, UINT64, "footprint");
UPGRADE_STATUS_INIT(BRT_UPGRADE_HEADER_13, UINT64, "V13 headers");
UPGRADE_STATUS_INIT(BRT_UPGRADE_NONLEAF_13, UINT64, "V13 nonleaf nodes");
UPGRADE_STATUS_INIT(BRT_UPGRADE_LEAF_13, UINT64, "V13 leaf nodes");
UPGRADE_STATUS_INIT(BRT_UPGRADE_OPTIMIZED_FOR_UPGRADE, UINT64, "optimized for upgrade");
brt_upgrade_status.initialized = true;
}
#undef UPGRADE_STATUS_INIT
#define UPGRADE_STATUS_VALUE(x) brt_upgrade_status.status[x].value.num
void
toku_brt_upgrade_get_status(BRT_UPGRADE_STATUS s) {
if (!brt_upgrade_status.initialized) {
status_init();
}
UPGRADE_STATUS_VALUE(BRT_UPGRADE_FOOTPRINT) = toku_log_upgrade_get_footprint();
*s = brt_upgrade_status;
}
// performance tracing
#define DO_TOKU_TRACE 0
@ -1764,7 +1793,7 @@ toku_maybe_upgrade_brt(BRT t) { // possibly do some work to complete the version
if (r == 0 && upgrade) {
r = toku_brt_optimize_for_upgrade(t);
if (r==0)
__sync_fetch_and_add(&upgrade_status.optimized_for_upgrade, 1);
__sync_fetch_and_add(&UPGRADE_STATUS_VALUE(BRT_UPGRADE_OPTIMIZED_FOR_UPGRADE), 1);
}
if (r == 0) {
t->h->upgrade_brt_performed = TRUE; // no further upgrade necessary
@ -2228,7 +2257,7 @@ deserialize_brtheader_versioned (int fd, struct rbuf *rb, struct brt_header **br
h->flags &= ~TOKU_DB_VALCMP_BUILTIN_13;
}
h->layout_version++;
__sync_fetch_and_add(&upgrade_status.header_13, 1); // how many header nodes upgraded from v13
__sync_fetch_and_add(&UPGRADE_STATUS_VALUE(BRT_UPGRADE_HEADER_13), 1); // how many header nodes upgraded from v13
upgrade++;
//Fall through on purpose
case BRT_LAYOUT_VERSION_14:
@ -2871,3 +2900,4 @@ cleanup:
}
#undef UPGRADE_STATUS_VALUE

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
@ -158,13 +158,89 @@ Lookup:
static const uint32_t this_version = BRT_LAYOUT_VERSION;
static BRT_STATUS_S brt_status;
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
*/
static volatile BRT_STATUS_S brt_status;
void
#define STATUS_INIT(k,t,l) { \
brt_status.status[k].keyname = #k; \
brt_status.status[k].type = t; \
brt_status.status[k].legend = "brt: " l; \
}
static void
status_init(void)
{
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(BRT_UPDATES, UINT64, "dictionary updates");
STATUS_INIT(BRT_UPDATES_BROADCAST, UINT64, "dictionary broadcast updates");
STATUS_INIT(BRT_DESCRIPTOR_SET, UINT64, "descriptor set");
STATUS_INIT(BRT_PARTIAL_FETCH_HIT, UINT64, "partial fetch hit, node partition is present");
STATUS_INIT(BRT_PARTIAL_FETCH_MISS, UINT64, "partial fetch miss, node present but partition absent");
STATUS_INIT(BRT_PARTIAL_FETCH_COMPRESSED, UINT64, "partial fetch, node partition is present but compressed");
STATUS_INIT(BRT_PARTIAL_EVICTIONS_NONLEAF, UINT64, "nonleaf node partial evictions");
STATUS_INIT(BRT_PARTIAL_EVICTIONS_LEAF, UINT64, "leaf node partial evictions");
STATUS_INIT(BRT_MSN_DISCARDS, UINT64, "messages ignored by leaf due to msn");
STATUS_INIT(BRT_MAX_WORKDONE, UINT64, "max workdone over all buffers");
STATUS_INIT(BRT_TOTAL_SEARCHES, UINT64, "total searches");
STATUS_INIT(BRT_TOTAL_RETRIES, UINT64, "total search retries due to TRY_AGAIN");
STATUS_INIT(BRT_MAX_SEARCH_EXCESS_RETRIES, UINT64, "max excess search retries (retries - tree height) due to TRY_AGAIN");
STATUS_INIT(BRT_MAX_SEARCH_ROOT_TRIES, UINT64, "max times root fetched in a single search");
STATUS_INIT(BRT_SEARCH_ROOT_RETRIES, UINT64, "searches requiring root to be fetched more than once");
STATUS_INIT(BRT_SEARCH_TRIES_GT_HEIGHT, UINT64, "searches requiring more tries than the height of the tree");
STATUS_INIT(BRT_SEARCH_TRIES_GT_HEIGHTPLUS3, UINT64, "searches requiring more tries than the height of the tree plus three");
STATUS_INIT(BRT_DISK_FLUSH_LEAF, UINT64, "leaf nodes flushed to disk (not for checkpoint)");
STATUS_INIT(BRT_DISK_FLUSH_NONLEAF, UINT64, "nonleaf nodes flushed to disk (not for checkpoint)");
STATUS_INIT(BRT_DISK_FLUSH_LEAF_FOR_CHECKPOINT, UINT64, "leaf nodes flushed to disk (for checkpoint)");
STATUS_INIT(BRT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT, UINT64, "nonleaf nodes flushed to disk (for checkpoint)");
STATUS_INIT(BRT_CREATE_LEAF, UINT64, "leaf nodes created");
STATUS_INIT(BRT_CREATE_NONLEAF, UINT64, "nonleaf nodes created");
STATUS_INIT(BRT_DESTROY_LEAF, UINT64, "leaf nodes destroyed");
STATUS_INIT(BRT_DESTROY_NONLEAF, UINT64, "nonleaf nodes destroyed");
STATUS_INIT(BRT_DIRTY_LEAF, UINT64, "leaf node transitions clean -> dirty");
STATUS_INIT(BRT_DIRTY_NONLEAF, UINT64, "nonleaf node transitions clean -> dirty");
STATUS_INIT(BRT_MSG_BYTES_IN, UINT64, "bytes of messages injected at root (all trees)");
STATUS_INIT(BRT_MSG_BYTES_OUT, UINT64, "bytes of messages flushed from h1 nodes to leaves");
STATUS_INIT(BRT_MSG_BYTES_CURR, UINT64, "bytes of messages currently in trees (estimate)");
STATUS_INIT(BRT_MSG_BYTES_MAX, UINT64, "max bytes of messages ever in trees (estimate)");
STATUS_INIT(BRT_MSG_NUM, UINT64, "messages injected at root");
STATUS_INIT(BRT_MSG_NUM_BROADCAST, UINT64, "broadcast messages injected at root");
STATUS_INIT(BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL, UINT64, "basements decompressed as a target of a query");
STATUS_INIT(BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE, UINT64, "basements decompressed for prelocked range");
STATUS_INIT(BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH, UINT64, "basements decompressed for prefetch");
STATUS_INIT(BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE, UINT64, "basements decompressed for write");
STATUS_INIT(BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL, UINT64, "buffers decompressed as a target of a query");
STATUS_INIT(BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE, UINT64, "buffers decompressed for prelocked range");
STATUS_INIT(BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH, UINT64, "buffers decompressed for prefetch");
STATUS_INIT(BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE, UINT64, "buffers decompressed for write");
STATUS_INIT(BRT_NUM_PIVOTS_FETCHED_QUERY, UINT64, "pivots fetched for query");
STATUS_INIT(BRT_NUM_PIVOTS_FETCHED_PREFETCH, UINT64, "pivots fetched for prefetch");
STATUS_INIT(BRT_NUM_PIVOTS_FETCHED_WRITE, UINT64, "pivots fetched for write");
STATUS_INIT(BRT_NUM_BASEMENTS_FETCHED_NORMAL, UINT64, "basements fetched as a target of a query");
STATUS_INIT(BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE, UINT64, "basements fetched for prelocked range");
STATUS_INIT(BRT_NUM_BASEMENTS_FETCHED_PREFETCH, UINT64, "basements fetched for prefetch");
STATUS_INIT(BRT_NUM_BASEMENTS_FETCHED_WRITE, UINT64, "basements fetched for write");
STATUS_INIT(BRT_NUM_MSG_BUFFER_FETCHED_NORMAL, UINT64, "buffers fetched as a target of a query");
STATUS_INIT(BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE, UINT64, "buffers fetched for prelocked range");
STATUS_INIT(BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH, UINT64, "buffers fetched for prefetch");
STATUS_INIT(BRT_NUM_MSG_BUFFER_FETCHED_WRITE, UINT64, "buffers fetched for write");
brt_status.initialized = true;
}
#undef STATUS_INIT
void
toku_brt_get_status(BRT_STATUS s) {
if (!brt_status.initialized) {
status_init();
}
*s = brt_status;
}
#define STATUS_VALUE(x) brt_status.status[x].value.num
void
toku_brt_header_suppress_rollbacks(struct brt_header *h, TOKUTXN txn) {
TXNID txnid = toku_txn_get_txnid(txn);
@ -270,11 +346,6 @@ toku_brt_nonleaf_is_gorged (BRTNODE node) {
(!buffers_are_empty));
}
// FIXME this is not used
static inline void add_to_brt_status(u_int64_t* val, u_int64_t data) {
(*val) += data;
}
static void brt_verify_flags(BRT brt, BRTNODE node) {
assert(brt->flags == node->flags);
}
@ -546,16 +617,16 @@ toku_mark_node_dirty(BRTNODE node) {
// If node is a leafnode, and if it has any basements, and if it is clean, then:
// update the header with the aggregate of the deltas in the basements (do NOT clear the deltas).
if (!node->dirty) {
if (node->height == 0) {
brt_status.dirty_leaf++;
struct brt_header *h = node->h;
for (int i = 0; i < node->n_children; i++) {
STAT64INFO delta = &(BLB(node,i)->stat64_delta);
update_header_stats(&h->in_memory_stats, delta);
}
}
else
brt_status.dirty_nonleaf++;
if (node->height == 0) {
STATUS_VALUE(BRT_DIRTY_LEAF)++;
struct brt_header *h = node->h;
for (int i = 0; i < node->n_children; i++) {
STAT64INFO delta = &(BLB(node,i)->stat64_delta);
update_header_stats(&h->in_memory_stats, delta);
}
}
else
STATUS_VALUE(BRT_DIRTY_NONLEAF)++;
}
node->dirty = 1;
}
@ -589,22 +660,22 @@ void toku_brtnode_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM nodename
}
}
if (height == 0) {
struct brt_header * header_in_node = brtnode->h;
invariant(header_in_node == h);
update_header_stats(&(h->on_disk_stats), &deltas);
if (for_checkpoint) {
update_header_stats(&(h->checkpoint_staging_stats), &deltas);
}
struct brt_header * header_in_node = brtnode->h;
invariant(header_in_node == h);
update_header_stats(&(h->on_disk_stats), &deltas);
if (for_checkpoint) {
update_header_stats(&(h->checkpoint_staging_stats), &deltas);
}
if (for_checkpoint)
__sync_fetch_and_add(&brt_status.disk_flush_leaf_for_checkpoint, 1);
__sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF_FOR_CHECKPOINT), 1);
else
__sync_fetch_and_add(&brt_status.disk_flush_leaf, 1);
__sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_LEAF), 1);
}
else {
if (for_checkpoint)
__sync_fetch_and_add(&brt_status.disk_flush_nonleaf_for_checkpoint, 1);
__sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF_FOR_CHECKPOINT), 1);
else
__sync_fetch_and_add(&brt_status.disk_flush_nonleaf, 1);
__sync_fetch_and_add(&STATUS_VALUE(BRT_DISK_FLUSH_NONLEAF), 1);
}
}
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]);
@ -619,11 +690,11 @@ void
toku_brt_status_update_pivot_fetch_reason(struct brtnode_fetch_extra *bfe)
{
if (bfe->type == brtnode_fetch_prefetch) {
brt_status.num_pivots_fetched_prefetch++;
STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_PREFETCH)++;
} else if (bfe->type == brtnode_fetch_all) {
brt_status.num_pivots_fetched_write++;
STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_WRITE)++;
} else if (bfe->type == brtnode_fetch_subset) {
brt_status.num_pivots_fetched_query++;
STATUS_VALUE(BRT_NUM_PIVOTS_FETCHED_QUERY)++;
}
}
@ -724,7 +795,7 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
for (int i = 0; i < node->n_children; i++) {
if (BP_STATE(node,i) == PT_AVAIL) {
if (BP_SHOULD_EVICT(node,i)) {
brt_status.partial_evictions_nonleaf++;
STATUS_VALUE(BRT_PARTIAL_EVICTIONS_NONLEAF)++;
cilk_spawn compress_internal_node_partition(node, i);
}
else {
@ -746,7 +817,7 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
for (int i = 0; i < node->n_children; i++) {
// Get rid of compressed stuff no matter what.
if (BP_STATE(node,i) == PT_COMPRESSED) {
brt_status.partial_evictions_leaf++;
STATUS_VALUE(BRT_PARTIAL_EVICTIONS_LEAF)++;
SUB_BLOCK sb = BSB(node, i);
toku_free(sb->compressed_ptr);
toku_free(sb);
@ -755,7 +826,7 @@ int toku_brtnode_pe_callback (void *brtnode_pv, PAIR_ATTR UU(old_attr), PAIR_ATT
}
else if (BP_STATE(node,i) == PT_AVAIL) {
if (BP_SHOULD_EVICT(node,i)) {
brt_status.partial_evictions_leaf++;
STATUS_VALUE(BRT_PARTIAL_EVICTIONS_LEAF)++;
// free the basement node
BASEMENTNODE bn = BLB(node, i);
struct mempool * mp = &bn->buffer_mempool;
@ -786,13 +857,13 @@ static inline void
brt_status_update_partial_fetch(u_int8_t state)
{
if (state == PT_AVAIL) {
brt_status.partial_fetch_hit++;
STATUS_VALUE(BRT_PARTIAL_FETCH_HIT)++;
}
else if (state == PT_COMPRESSED) {
brt_status.partial_fetch_compressed++;
STATUS_VALUE(BRT_PARTIAL_FETCH_COMPRESSED)++;
}
else if (state == PT_ON_DISK){
brt_status.partial_fetch_miss++;
STATUS_VALUE(BRT_PARTIAL_FETCH_MISS)++;
}
else {
invariant(FALSE);
@ -888,54 +959,54 @@ brt_status_update_partial_fetch_reason(
if (is_leaf) {
if (bfe->type == brtnode_fetch_prefetch) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_prefetch++;
STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH)++;
} else {
brt_status.num_basements_fetched_prefetch++;
STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_PREFETCH)++;
}
} else if (bfe->type == brtnode_fetch_all) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_write++;
STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_WRITE)++;
} else {
brt_status.num_basements_fetched_write++;
STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_WRITE)++;
}
} else if (i == bfe->child_to_read) {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_normal++;
STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_NORMAL)++;
} else {
brt_status.num_basements_fetched_normal++;
STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_NORMAL)++;
}
} else {
if (state == PT_COMPRESSED) {
brt_status.num_basements_decompressed_aggressive++;
STATUS_VALUE(BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE)++;
} else {
brt_status.num_basements_fetched_aggressive++;
STATUS_VALUE(BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE)++;
}
}
}
else {
if (bfe->type == brtnode_fetch_prefetch) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_prefetch++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_PREFETCH)++;
} else {
brt_status.num_msg_buffer_fetched_prefetch++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_PREFETCH)++;
}
} else if (bfe->type == brtnode_fetch_all) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_write++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_WRITE)++;
} else {
brt_status.num_msg_buffer_fetched_write++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_WRITE)++;
}
} else if (i == bfe->child_to_read) {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_normal++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_NORMAL)++;
} else {
brt_status.num_msg_buffer_fetched_normal++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_NORMAL)++;
}
} else {
if (state == PT_COMPRESSED) {
brt_status.num_msg_buffer_decompressed_aggressive++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_DECOMPRESSED_AGGRESSIVE)++;
} else {
brt_status.num_msg_buffer_fetched_aggressive++;
STATUS_VALUE(BRT_NUM_MSG_BUFFER_FETCHED_AGGRESSIVE)++;
}
}
}
@ -1082,9 +1153,9 @@ void toku_brtnode_free (BRTNODE *nodep) {
toku_mempool_destroy(mp);
}
}
brt_status.destroy_leaf++;
STATUS_VALUE(BRT_DESTROY_LEAF)++;
} else {
brt_status.destroy_nonleaf++;
STATUS_VALUE(BRT_DESTROY_NONLEAF)++;
}
toku_destroy_brtnode_internals(node);
toku_free(node);
@ -1178,9 +1249,9 @@ toku_initialize_empty_brtnode (BRTNODE n, BLOCKNUM nodename, int height, int num
assert(height >= 0);
if (height == 0)
brt_status.create_leaf++;
STATUS_VALUE(BRT_CREATE_LEAF)++;
else
brt_status.create_nonleaf++;
STATUS_VALUE(BRT_CREATE_NONLEAF)++;
n->max_msn_applied_to_node_on_disk = MIN_MSN; // correct value for root node, harmless for others
n->h = h;
@ -1382,8 +1453,8 @@ brt_leaf_apply_cmd_once (
}
if (workdone) { // test programs may call with NULL
*workdone += workdone_this_le;
if (*workdone > brt_status.max_workdone)
brt_status.max_workdone = *workdone;
if (*workdone > STATUS_VALUE(BRT_MAX_WORKDONE))
STATUS_VALUE(BRT_MAX_WORKDONE) = *workdone;
}
// if we created a new mempool buffer, free the old one
@ -1472,7 +1543,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafno
if (cmd->type == BRT_UPDATE) {
// key is passed in with command (should be same as from le)
// update function extra is passed in with command
brt_status.updates++;
STATUS_VALUE(BRT_UPDATES)++;
keyp = cmd->u.id.key;
update_function_extra = cmd->u.id.val;
} else if (cmd->type == BRT_UPDATE_BROADCAST_ALL) {
@ -1481,7 +1552,7 @@ static int do_update(brt_update_func update_fun, DESCRIPTOR desc, BRTNODE leafno
assert(le); // for broadcast updates, we just hit all leafentries
// so this cannot be null
assert(cmd->u.id.key->size == 0);
brt_status.updates_broadcast++;
STATUS_VALUE(BRT_UPDATES_BROADCAST)++;
keyp = toku_fill_dbt(&key, le_key(le), le_keylen(le));
update_function_extra = cmd->u.id.val;
} else {
@ -2070,8 +2141,8 @@ toku_bnc_flush_to_child(
r = toku_omt_clone_pool(&live_list_reverse, logger->live_list_reverse, sizeof(XID_PAIR_S));
assert_zero(r);
size_t buffsize = bnc->n_bytes_in_buffer;
brt_status.msg_bytes_out += buffsize; // take advantage of surrounding mutex
brt_status.msg_bytes_curr -= buffsize; // may be misleading if there's a broadcast message in there
STATUS_VALUE(BRT_MSG_BYTES_OUT) += buffsize; // take advantage of surrounding mutex
STATUS_VALUE(BRT_MSG_BYTES_CURR) -= buffsize; // may be misleading if there's a broadcast message in there
toku_pthread_mutex_unlock(&logger->txn_list_lock);
} else {
snapshot_txnids = NULL;
@ -2223,7 +2294,7 @@ void toku_apply_cmd_to_leaf(
snapshot_txnids,
live_list_reverse);
} else {
brt_status.msn_discards++;
STATUS_VALUE(BRT_MSN_DISCARDS)++;
}
}
else if (brt_msg_applies_all(cmd)) {
@ -2243,7 +2314,7 @@ void toku_apply_cmd_to_leaf(
live_list_reverse);
if (bn_made_change) *made_change = 1;
} else {
brt_status.msn_discards++;
STATUS_VALUE(BRT_MSN_DISCARDS)++;
}
}
}
@ -2284,14 +2355,14 @@ static void push_something_at_root (BRT brt, BRTNODE *nodep, BRT_MSG cmd)
// update some status variables
if (node->height != 0) {
uint64_t msgsize = brt_msg_size(cmd);
brt_status.msg_bytes_in += msgsize;
brt_status.msg_bytes_curr += msgsize;
if (brt_status.msg_bytes_curr > brt_status.msg_bytes_max) {
brt_status.msg_bytes_max = brt_status.msg_bytes_curr;
STATUS_VALUE(BRT_MSG_BYTES_IN) += msgsize;
STATUS_VALUE(BRT_MSG_BYTES_CURR) += msgsize;
if (STATUS_VALUE(BRT_MSG_BYTES_CURR) > STATUS_VALUE(BRT_MSG_BYTES_MAX)) {
STATUS_VALUE(BRT_MSG_BYTES_MAX) = STATUS_VALUE(BRT_MSG_BYTES_CURR);
}
brt_status.msg_num++;
STATUS_VALUE(BRT_MSG_NUM)++;
if (brt_msg_applies_all(cmd)) {
brt_status.msg_num_broadcast++;
STATUS_VALUE(BRT_MSG_NUM_BROADCAST)++;
}
}
}
@ -3213,7 +3284,7 @@ toku_brt_change_descriptor(
fd = toku_cachefile_get_and_pin_fd (t->cf);
r = toku_update_descriptor(t->h, &new_d, fd);
if (r == 0) // very infrequent operation, worth precise threadsafe count
brt_status.descriptor_set++;
STATUS_VALUE(BRT_DESCRIPTOR_SET)++;
toku_cachefile_unpin_fd(t->cf);
if (r!=0) goto cleanup;
@ -4422,7 +4493,7 @@ do_brt_leaf_put_cmd(BRT t, BRTNODE leafnode, BASEMENTNODE bn, BRTNODE ancestor,
bool made_change;
brt_leaf_put_cmd(t->compare_fun, t->update_fun, &t->h->descriptor, leafnode, bn, &brtcmd, &made_change, &BP_WORKDONE(ancestor, childnum), snapshot_txnids, live_list_reverse);
} else {
brt_status.msn_discards++;
STATUS_VALUE(BRT_MSN_DISCARDS)++;
}
}
@ -5333,34 +5404,34 @@ try_again:
//which can mean not found, but keep looking in another leaf.
if (r==TOKUDB_FOUND_BUT_REJECTED) r = DB_NOTFOUND;
else if (r==DB_NOTFOUND) {
//We truly did not find an answer to the query.
//Therefore, the BRT_GET_CALLBACK_FUNCTION has NOT been called.
//The contract specifies that the callback function must be called
//for 'r= (0|DB_NOTFOUND|TOKUDB_FOUND_BUT_REJECTED)'
//TODO: #1378 This is not the ultimate location of this call to the
//callback. It is surely wrong for node-level locking, and probably
//wrong for the STRADDLE callback for heaviside function(two sets of key/vals)
int r2 = getf(0,NULL, 0,NULL, getf_v, false);
if (r2!=0) r = r2;
//We truly did not find an answer to the query.
//Therefore, the BRT_GET_CALLBACK_FUNCTION has NOT been called.
//The contract specifies that the callback function must be called
//for 'r= (0|DB_NOTFOUND|TOKUDB_FOUND_BUT_REJECTED)'
//TODO: #1378 This is not the ultimate location of this call to the
//callback. It is surely wrong for node-level locking, and probably
//wrong for the STRADDLE callback for heaviside function(two sets of key/vals)
int r2 = getf(0,NULL, 0,NULL, getf_v, false);
if (r2!=0) r = r2;
}
{ // accounting (to detect and measure thrashing)
uint retrycount = trycount - 1; // how many retries were needed?
brt_status.total_searches++;
brt_status.total_retries += retrycount;
if (root_tries > 1) { // if root was read from disk more than once
brt_status.search_root_retries++;
if (root_tries > brt_status.max_search_root_tries)
brt_status.max_search_root_tries = root_tries;
}
if (retrycount > tree_height) { // if at least one node was read from disk more than once
brt_status.search_tries_gt_height++;
uint excess_tries = retrycount - tree_height;
if (excess_tries > brt_status.max_search_excess_retries)
brt_status.max_search_excess_retries = excess_tries;
if (retrycount > (tree_height+3))
brt_status.search_tries_gt_heightplus3++;
}
uint retrycount = trycount - 1; // how many retries were needed?
STATUS_VALUE(BRT_TOTAL_SEARCHES)++;
STATUS_VALUE(BRT_TOTAL_RETRIES) += retrycount;
if (root_tries > 1) { // if root was read from disk more than once
STATUS_VALUE(BRT_SEARCH_ROOT_RETRIES)++;
if (root_tries > STATUS_VALUE(BRT_MAX_SEARCH_ROOT_TRIES))
STATUS_VALUE(BRT_MAX_SEARCH_ROOT_TRIES) = root_tries;
}
if (retrycount > tree_height) { // if at least one node was read from disk more than once
STATUS_VALUE(BRT_SEARCH_TRIES_GT_HEIGHT)++;
uint excess_tries = retrycount - tree_height;
if (excess_tries > STATUS_VALUE(BRT_MAX_SEARCH_EXCESS_RETRIES))
STATUS_VALUE(BRT_MAX_SEARCH_EXCESS_RETRIES) = excess_tries;
if (retrycount > (tree_height+3))
STATUS_VALUE(BRT_SEARCH_TRIES_GT_HEIGHTPLUS3)++;
}
}
return r;
@ -6107,8 +6178,6 @@ int toku_brt_init(void (*ydb_lock_callback)(void),
r = toku_brt_serialize_init();
if (r==0)
callback_db_set_brt = db_set_brt;
toku_brt_flusher_status_init();
toku_brt_hot_status_init();
return r;
}
@ -6447,3 +6516,5 @@ void
toku_brt_drd_ignore(void) {
DRD_IGNORE_VAR(brt_status);
}
#undef STATUS_VALUE

View file

@ -19,7 +19,6 @@
extern "C" {
#endif
typedef struct brt_status BRT_STATUS_S, *BRT_STATUS;
typedef struct brt *BRT;
typedef struct brtnode *BRTNODE;
typedef struct brtnode_leaf_basement_node *BASEMENTNODE;

View file

@ -37,7 +37,15 @@ static void cachetable_partial_reader(WORKITEM);
#define WHEN_TRACE_CT(x) ((void)0)
#endif
// these should be in the cachetable object, but we make them file-wide so that gdb can get them easily
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
// These should be in the cachetable object, but we make them file-wide so that gdb can get them easily.
// They were left here after engine status cleanup (#2949, rather than moved into the status struct)
// so they are still easily available to the debugger and to save lots of typing.
static u_int64_t cachetable_lock_taken = 0;
static u_int64_t cachetable_lock_released = 0;
static u_int64_t cachetable_hit;
@ -54,6 +62,54 @@ static u_int64_t cachetable_maybe_get_and_pin_hits; // how many times has get_a
static u_int64_t cachetable_evictions;
static u_int64_t cleaner_executions; // number of times the cleaner thread's loop has executed
static CACHETABLE_STATUS_S ct_status;
// Note, toku_cachetable_get_status() is below, after declaration of cachetable.
#define STATUS_INIT(k,t,l) { \
ct_status.status[k].keyname = #k; \
ct_status.status[k].type = t; \
ct_status.status[k].legend = "cachetable: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(CT_LOCK_TAKEN, UINT64, "lock taken");
STATUS_INIT(CT_LOCK_RELEASED, UINT64, "lock released");
STATUS_INIT(CT_HIT, UINT64, "hit");
STATUS_INIT(CT_MISS, UINT64, "miss");
STATUS_INIT(CT_MISSTIME, UINT64, "miss time");
STATUS_INIT(CT_WAITTIME, UINT64, "wait time");
STATUS_INIT(CT_WAIT_READING, UINT64, "wait reading");
STATUS_INIT(CT_WAIT_WRITING, UINT64, "wait writing");
STATUS_INIT(CT_WAIT_CHECKPOINT, UINT64, "wait checkpoint");
STATUS_INIT(CT_PUTS, UINT64, "puts (new nodes created)");
STATUS_INIT(CT_PREFETCHES, UINT64, "prefetches");
STATUS_INIT(CT_MAYBE_GET_AND_PINS, UINT64, "maybe_get_and_pin");
STATUS_INIT(CT_MAYBE_GET_AND_PIN_HITS, UINT64, "maybe_get_and_pin hits");
STATUS_INIT(CT_SIZE_CURRENT, UINT64, "size current");
STATUS_INIT(CT_SIZE_LIMIT, UINT64, "size limit");
STATUS_INIT(CT_SIZE_MAX, UINT64, "size max");
STATUS_INIT(CT_SIZE_WRITING, UINT64, "size writing");
STATUS_INIT(CT_SIZE_NONLEAF, UINT64, "size nonleaf");
STATUS_INIT(CT_SIZE_LEAF, UINT64, "size leaf");
STATUS_INIT(CT_SIZE_ROLLBACK, UINT64, "size rollback");
STATUS_INIT(CT_SIZE_CACHEPRESSURE, UINT64, "size cachepressure");
STATUS_INIT(CT_EVICTIONS, UINT64, "evictions");
STATUS_INIT(CT_CLEANER_EXECUTIONS, UINT64, "cleaner executions");
STATUS_INIT(CT_CLEANER_PERIOD, UINT64, "cleaner period");
STATUS_INIT(CT_CLEANER_ITERATIONS, UINT64, "cleaner iterations");
ct_status.initialized = true;
}
#undef STATUS_INIT
#define STATUS_VALUE(x) ct_status.status[x].value.num
enum ctpair_state {
CTPAIR_IDLE = 1, // in memory
@ -199,6 +255,42 @@ struct cachetable {
int64_t size_cachepressure;
};
void
toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS statp) {
if (!ct_status.initialized)
status_init();
STATUS_VALUE(CT_LOCK_TAKEN) = cachetable_lock_taken;
STATUS_VALUE(CT_LOCK_RELEASED) = cachetable_lock_released;
STATUS_VALUE(CT_HIT) = cachetable_hit;
STATUS_VALUE(CT_MISS) = cachetable_miss;
STATUS_VALUE(CT_MISSTIME) = cachetable_misstime;
STATUS_VALUE(CT_WAITTIME) = cachetable_waittime;
STATUS_VALUE(CT_WAIT_READING) = cachetable_wait_reading;
STATUS_VALUE(CT_WAIT_WRITING) = cachetable_wait_writing;
STATUS_VALUE(CT_WAIT_CHECKPOINT) = cachetable_wait_checkpoint;
STATUS_VALUE(CT_PUTS) = cachetable_puts;
STATUS_VALUE(CT_PREFETCHES) = cachetable_prefetches;
STATUS_VALUE(CT_MAYBE_GET_AND_PINS) = cachetable_maybe_get_and_pins;
STATUS_VALUE(CT_MAYBE_GET_AND_PIN_HITS) = cachetable_maybe_get_and_pin_hits;
STATUS_VALUE(CT_SIZE_CURRENT) = ct->size_current;
STATUS_VALUE(CT_SIZE_LIMIT) = ct->size_limit;
STATUS_VALUE(CT_SIZE_MAX) = ct->size_max;
STATUS_VALUE(CT_SIZE_WRITING) = ct->size_evicting;
STATUS_VALUE(CT_SIZE_NONLEAF) = ct->size_nonleaf;
STATUS_VALUE(CT_SIZE_LEAF) = ct->size_leaf;
STATUS_VALUE(CT_SIZE_ROLLBACK) = ct->size_rollback;
STATUS_VALUE(CT_SIZE_CACHEPRESSURE) = ct->size_cachepressure;
STATUS_VALUE(CT_EVICTIONS) = cachetable_evictions;
STATUS_VALUE(CT_CLEANER_EXECUTIONS) = cleaner_executions;
STATUS_VALUE(CT_CLEANER_PERIOD) = toku_get_cleaner_period_unlocked(ct);
STATUS_VALUE(CT_CLEANER_ITERATIONS) = toku_get_cleaner_iterations_unlocked(ct);
*statp = ct_status;
}
// Code bracketed with {BEGIN_CRITICAL_REGION; ... END_CRITICAL_REGION;} macros
// are critical regions in which a checkpoint is not permitted to begin.
// Must increment checkpoint_prohibited before testing checkpoint_is_beginning
@ -3799,32 +3891,6 @@ toku_cachefile_size_in_memory(CACHEFILE cf)
return result;
}
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s) {
s->lock_taken = cachetable_lock_taken;
s->lock_released = cachetable_lock_released;
s->hit = cachetable_hit;
s->miss = cachetable_miss;
s->misstime = cachetable_misstime;
s->waittime = cachetable_waittime;
s->wait_reading = cachetable_wait_reading;
s->wait_writing = cachetable_wait_writing;
s->wait_checkpoint = cachetable_wait_checkpoint;
s->puts = cachetable_puts;
s->prefetches = cachetable_prefetches;
s->maybe_get_and_pins = cachetable_maybe_get_and_pins;
s->maybe_get_and_pin_hits = cachetable_maybe_get_and_pin_hits;
s->size_current = ct->size_current;
s->size_limit = ct->size_limit;
s->size_max = ct->size_max;
s->size_writing = ct->size_evicting;
s->size_nonleaf = ct->size_nonleaf;
s->size_leaf = ct->size_leaf;
s->size_rollback = ct->size_rollback;
s->size_cachepressure = ct->size_cachepressure;
s->evictions = cachetable_evictions;
s->cleaner_executions = cleaner_executions;
}
char *
toku_construct_full_name(int count, ...) {
va_list ap;
@ -4005,7 +4071,9 @@ void __attribute__((__constructor__)) toku_cachetable_drd_ignore(void);
void
toku_cachetable_drd_ignore(void) {
// incremented only while lock is held, but read by engine status asynchronously.
DRD_IGNORE_VAR(cachetable_lock_taken);
DRD_IGNORE_VAR(cachetable_lock_released);
DRD_IGNORE_VAR(cachetable_evictions);
DRD_IGNORE_VAR(STATUS_VALUE(CT_LOCK_TAKEN));
DRD_IGNORE_VAR(STATUS_VALUE(CT_LOCK_RELEASED));
DRD_IGNORE_VAR(STATUS_VALUE(CT_EVICTIONS));
}
#undef STATUS_VALUE

View file

@ -485,31 +485,38 @@ void toku_cachetable_maybe_flush_some(CACHETABLE ct);
u_int64_t toku_cachefile_size_in_memory(CACHEFILE cf);
typedef enum {
CT_LOCK_TAKEN = 0,
CT_LOCK_RELEASED,
CT_HIT,
CT_MISS,
CT_MISSTIME, // how many usec spent waiting for disk read because of cache miss
CT_WAITTIME, // how many usec spent waiting for another thread to release cache line
CT_WAIT_READING,
CT_WAIT_WRITING,
CT_WAIT_CHECKPOINT, // number of times get_and_pin waits for a node to be written for a checkpoint
CT_PUTS, // how many times has a newly created node been put into the cachetable?
CT_PREFETCHES, // how many times has a block been prefetched into the cachetable?
CT_MAYBE_GET_AND_PINS, // how many times has maybe_get_and_pin(_clean) been called?
CT_MAYBE_GET_AND_PIN_HITS, // how many times has maybe_get_and_pin(_clean) returned with a node?
CT_SIZE_CURRENT, // the sum of the sizes of the nodes represented in the cachetable
CT_SIZE_LIMIT, // the limit to the sum of the node sizes
CT_SIZE_MAX, // high water mark of size_current (max value size_current ever had)
CT_SIZE_WRITING, // the sum of the sizes of the nodes being written
CT_SIZE_NONLEAF, // number of bytes in cachetable belonging to nonleaf nodes
CT_SIZE_LEAF, // number of bytes in cachetable belonging to leaf nodes
CT_SIZE_ROLLBACK, // number of bytes in cachetable belonging to rollback nodes
CT_SIZE_CACHEPRESSURE, // number of bytes causing cache pressure (sum of buffers and workdone counters)
CT_EVICTIONS,
CT_CLEANER_EXECUTIONS, // number of times the cleaner thread's loop has executed
CT_CLEANER_PERIOD,
CT_CLEANER_ITERATIONS, // number of times the cleaner thread runs the cleaner per period
CT_STATUS_NUM_ROWS
} ct_status_entry;
typedef struct cachetable_status {
u_int64_t lock_taken;
u_int64_t lock_released;
u_int64_t hit;
u_int64_t miss;
u_int64_t misstime; /* how many usec spent waiting for disk read because of cache miss */
u_int64_t waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t wait_reading;
u_int64_t wait_writing;
u_int64_t wait_checkpoint; // number of times get_and_pin waits for a node to be written for a checkpoint
u_int64_t puts; // how many times has a newly created node been put into the cachetable?
u_int64_t prefetches; // how many times has a block been prefetched into the cachetable?
u_int64_t maybe_get_and_pins; // how many times has maybe_get_and_pin(_clean) been called?
u_int64_t maybe_get_and_pin_hits; // how many times has maybe_get_and_pin(_clean) returned with a node?
uint64_t size_current; // the sum of the sizes of the nodes represented in the cachetable
uint64_t size_limit; // the limit to the sum of the node sizes
uint64_t size_max; // high water mark of size_current (max value size_current ever had)
uint64_t size_writing; // the sum of the sizes of the nodes being written
uint64_t size_nonleaf; // number of bytes in cachetable belonging to nonleaf nodes
uint64_t size_leaf; // number of bytes in cachetable belonging to leaf nodes
uint64_t size_rollback; // number of bytes in cachetable belonging to rollback nodes
uint64_t size_cachepressure; // number of bytes causing cache pressure (sum of buffers and workdone counters)
u_int64_t evictions;
u_int64_t cleaner_executions; // number of times the cleaner thread's loop has executed
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[CT_STATUS_NUM_ROWS];
} CACHETABLE_STATUS_S, *CACHETABLE_STATUS;
void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s);

View file

@ -57,7 +57,61 @@
#include "logger.h"
#include "checkpoint.h"
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static CHECKPOINT_STATUS_S cp_status;
#define STATUS_INIT(k,t,l) { \
cp_status.status[k].keyname = #k; \
cp_status.status[k].type = t; \
cp_status.status[k].legend = "checkpoint: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(CP_PERIOD, UINT64, "period");
STATUS_INIT(CP_FOOTPRINT, UINT64, "footprint");
STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN, UNIXTIME, "last checkpoint began ");
STATUS_INIT(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE, UNIXTIME, "last complete checkpoint began ");
STATUS_INIT(CP_TIME_LAST_CHECKPOINT_END, UNIXTIME, "last complete checkpoint ended");
STATUS_INIT(CP_LAST_LSN, UINT64, "last complete checkpoint LSN");
STATUS_INIT(CP_CHECKPOINT_COUNT, UINT64, "checkpoints taken ");
STATUS_INIT(CP_CHECKPOINT_COUNT_FAIL, UINT64, "checkpoints failed");
STATUS_INIT(CP_WAITERS_NOW, UINT64, "waiters now");
STATUS_INIT(CP_WAITERS_MAX, UINT64, "waiters max");
STATUS_INIT(CP_CLIENT_WAIT_ON_MO, UINT64, "non-checkpoint client wait on mo lock");
STATUS_INIT(CP_CLIENT_WAIT_ON_CS, UINT64, "non-checkpoint client wait on cs lock");
STATUS_INIT(CP_WAIT_SCHED_CS, UINT64, "sched wait on cs lock");
STATUS_INIT(CP_WAIT_CLIENT_CS, UINT64, "client wait on cs lock");
STATUS_INIT(CP_WAIT_TXN_CS, UINT64, "txn wait on cs lock");
STATUS_INIT(CP_WAIT_OTHER_CS, UINT64, "other wait on cs lock");
STATUS_INIT(CP_WAIT_SCHED_MO, UINT64, "sched wait on mo lock");
STATUS_INIT(CP_WAIT_CLIENT_MO, UINT64, "client wait on mo lock");
STATUS_INIT(CP_WAIT_TXN_MO, UINT64, "txn wait on mo lock");
STATUS_INIT(CP_WAIT_OTHER_MO, UINT64, "other wait on mo lock");
cp_status.initialized = true;
}
#undef STATUS_INIT
#define STATUS_VALUE(x) cp_status.status[x].value.num
void
toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS statp) {
if (!cp_status.initialized)
status_init();
STATUS_VALUE(CP_PERIOD) = toku_get_checkpoint_period_unlocked(ct);
*statp = cp_status;
}
static LSN last_completed_checkpoint_lsn;
static toku_pthread_rwlock_t checkpoint_safe_lock;
@ -145,7 +199,7 @@ checkpoint_safe_checkpoint_unlock(void) {
void
toku_multi_operation_client_lock(void) {
if (locked_mo)
(void) __sync_fetch_and_add(&cp_status.client_wait_on_mo, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_MO), 1);
int r = toku_pthread_rwlock_rdlock(&multi_operation_lock);
assert(r == 0);
}
@ -159,7 +213,7 @@ toku_multi_operation_client_unlock(void) {
void
toku_checkpoint_safe_client_lock(void) {
if (locked_cs)
(void) __sync_fetch_and_add(&cp_status.client_wait_on_cs, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_CLIENT_WAIT_ON_CS), 1);
int r = toku_pthread_rwlock_rdlock(&checkpoint_safe_lock);
assert(r == 0);
toku_multi_operation_client_lock();
@ -173,12 +227,6 @@ toku_checkpoint_safe_client_unlock(void) {
}
void
toku_checkpoint_get_status(CHECKPOINT_STATUS s) {
*s = cp_status;
}
// Initialize the checkpoint mechanism, must be called before any client operations.
int
@ -206,7 +254,7 @@ toku_checkpoint_destroy(void) {
return r;
}
#define SET_CHECKPOINT_FOOTPRINT(x) cp_status.footprint = footprint_offset + x
#define SET_CHECKPOINT_FOOTPRINT(x) STATUS_VALUE(CP_FOOTPRINT) = footprint_offset + x
// Take a checkpoint of all currently open dictionaries
@ -222,39 +270,39 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
if (locked_cs) {
if (caller_id == SCHEDULED_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_sched_cs, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_SCHED_CS), 1);
else if (caller_id == CLIENT_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_client_cs, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_CLIENT_CS), 1);
else if (caller_id == TXN_COMMIT_CHECKPOINT)
(void) __sync_fetch_and_add(&cp_status.cp_wait_txn_cs, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_TXN_CS), 1);
else
(void) __sync_fetch_and_add(&cp_status.cp_wait_other_cs, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAIT_OTHER_CS), 1);
}
(void) __sync_fetch_and_add(&cp_status.waiters_now, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(CP_WAITERS_NOW), 1);
checkpoint_safe_checkpoint_lock();
(void) __sync_fetch_and_sub(&cp_status.waiters_now, 1);
(void) __sync_fetch_and_sub(&STATUS_VALUE(CP_WAITERS_NOW), 1);
if (cp_status.waiters_now > cp_status.waiters_max)
cp_status.waiters_max = cp_status.waiters_now; // threadsafe, within checkpoint_safe lock
if (STATUS_VALUE(CP_WAITERS_NOW) > STATUS_VALUE(CP_WAITERS_MAX))
STATUS_VALUE(CP_WAITERS_MAX) = STATUS_VALUE(CP_WAITERS_NOW); // threadsafe, within checkpoint_safe lock
SET_CHECKPOINT_FOOTPRINT(10);
if (locked_mo) {
if (caller_id == SCHEDULED_CHECKPOINT)
cp_status.cp_wait_sched_mo++; // threadsafe, within checkpoint_safe lock
STATUS_VALUE(CP_WAIT_SCHED_MO)++; // threadsafe, within checkpoint_safe lock
else if (caller_id == CLIENT_CHECKPOINT)
cp_status.cp_wait_client_mo++;
STATUS_VALUE(CP_WAIT_CLIENT_MO)++;
else if (caller_id == TXN_COMMIT_CHECKPOINT)
cp_status.cp_wait_txn_mo++;
STATUS_VALUE(CP_WAIT_TXN_MO)++;
else
cp_status.cp_wait_other_mo++;
STATUS_VALUE(CP_WAIT_OTHER_MO)++;
}
multi_operation_checkpoint_lock();
SET_CHECKPOINT_FOOTPRINT(20);
ydb_lock();
SET_CHECKPOINT_FOOTPRINT(30);
cp_status.time_last_checkpoint_begin = time(NULL);
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN) = time(NULL);
r = toku_cachetable_begin_checkpoint(ct, logger);
multi_operation_checkpoint_unlock();
@ -270,26 +318,23 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
if (r==0 && logger) {
last_completed_checkpoint_lsn = logger->last_completed_checkpoint_lsn;
r = toku_logger_maybe_trim_log(logger, last_completed_checkpoint_lsn);
cp_status.last_lsn = last_completed_checkpoint_lsn.lsn;
STATUS_VALUE(CP_LAST_LSN) = last_completed_checkpoint_lsn.lsn;
}
SET_CHECKPOINT_FOOTPRINT(60);
cp_status.time_last_checkpoint_end = time(NULL);
cp_status.time_last_checkpoint_begin_complete = cp_status.time_last_checkpoint_begin;
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_END) = time(NULL);
STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE) = STATUS_VALUE(CP_TIME_LAST_CHECKPOINT_BEGIN);
if (r == 0)
cp_status.checkpoint_count++;
STATUS_VALUE(CP_CHECKPOINT_COUNT)++;
else
cp_status.checkpoint_count_fail++;
STATUS_VALUE(CP_CHECKPOINT_COUNT_FAIL)++;
cp_status.footprint = 0;
STATUS_VALUE(CP_FOOTPRINT) = 0;
checkpoint_safe_checkpoint_unlock();
return r;
}
#undef SET_CHECKPOINT_FOOTPRINT
#include <valgrind/drd.h>
void __attribute__((__constructor__)) toku_checkpoint_drd_ignore(void);
void
@ -298,3 +343,6 @@ toku_checkpoint_drd_ignore(void) {
DRD_IGNORE_VAR(locked_mo);
DRD_IGNORE_VAR(locked_cs);
}
#undef SET_CHECKPOINT_FOOTPRINT
#undef STATUS_VALUE

View file

@ -80,35 +80,42 @@ int toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
/******
/******
* These functions are called from the ydb level.
* They return status information and have no side effects.
* Some status information may be incorrect because no locks are taken to collect status.
* (If checkpoint is in progress, it may overwrite status info while it is being read.)
*****/
typedef enum {
CP_PERIOD,
CP_FOOTPRINT,
CP_TIME_LAST_CHECKPOINT_BEGIN,
CP_TIME_LAST_CHECKPOINT_BEGIN_COMPLETE,
CP_TIME_LAST_CHECKPOINT_END,
CP_LAST_LSN,
CP_CHECKPOINT_COUNT,
CP_CHECKPOINT_COUNT_FAIL,
CP_WAITERS_NOW, // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint
CP_WAITERS_MAX, // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
CP_CLIENT_WAIT_ON_MO, // how many times a client thread waited to take the multi_operation lock, not for checkpoint
CP_CLIENT_WAIT_ON_CS, // how many times a client thread waited for the checkpoint_safe lock, not for checkpoint
CP_WAIT_SCHED_CS, // how many times a scheduled checkpoint waited for the checkpoint_safe lock
CP_WAIT_CLIENT_CS, // how many times a client checkpoint waited for the checkpoint_safe lock
CP_WAIT_TXN_CS, // how many times a txn_commit checkpoint waited for the checkpoint_safe lock
CP_WAIT_OTHER_CS, // how many times a checkpoint for another purpose waited for the checkpoint_safe lock
CP_WAIT_SCHED_MO, // how many times a scheduled checkpoint waited for the multi_operation lock
CP_WAIT_CLIENT_MO, // how many times a client checkpoint waited for the multi_operation lock
CP_WAIT_TXN_MO, // how many times a txn_commit checkpoint waited for the multi_operation lock
CP_WAIT_OTHER_MO, // how many times a checkpoint for another purpose waited for the multi_operation lock
CP_STATUS_NUM_ROWS // number of rows in this status array
} cp_status_entry;
typedef struct {
uint64_t footprint;
time_t time_last_checkpoint_begin_complete;
time_t time_last_checkpoint_begin;
time_t time_last_checkpoint_end;
uint64_t last_lsn;
uint64_t checkpoint_count;
uint64_t checkpoint_count_fail;
uint64_t waiters_now; // how many threads are currently waiting for the checkpoint_safe lock to perform a checkpoint
uint64_t waiters_max; // max threads ever simultaneously waiting for the checkpoint_safe lock to perform a checkpoint
uint64_t client_wait_on_mo; // how many times a client thread waited for the multi_operation lock
uint64_t client_wait_on_cs; // how many times a client thread waited for the checkpoint_safe lock
uint64_t cp_wait_sched_cs; // how many times a scheduled checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_client_cs; // how many times a client checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_txn_cs; // how many times a txn_commit checkpoint waited for the checkpoint_safe lock
uint64_t cp_wait_other_cs; // how many times a checkpoint for another purpose waited for the checkpoint_safe lock
uint64_t cp_wait_sched_mo; // how many times a scheduled checkpoint waited for the multi_operation lock
uint64_t cp_wait_client_mo; // how many times a client checkpoint waited for the multi_operation lock
uint64_t cp_wait_txn_mo; // how many times a txn_commit checkpoint waited for the multi_operation lock
uint64_t cp_wait_other_mo; // how many times a checkpoint for another purpose waited for the multi_operation lock
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[CP_STATUS_NUM_ROWS];
} CHECKPOINT_STATUS_S, *CHECKPOINT_STATUS;
void toku_checkpoint_get_status(CHECKPOINT_STATUS stat);
void toku_checkpoint_get_status(CACHETABLE ct, CHECKPOINT_STATUS stat);
#if defined(__cplusplus) || defined(__cilkplusplus)
};

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
@ -1350,25 +1350,54 @@ toku_logger_call_remove_finalize_callback(TOKULOGGER logger, DICTIONARY_ID dict_
logger->remove_finalize_callback(dict_id, logger->remove_finalize_callback_extra);
}
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
void
toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s) {
static LOGGER_STATUS_S logger_status;
#define STATUS_INIT(k,t,l) { \
logger_status.status[k].keyname = #k; \
logger_status.status[k].type = t; \
logger_status.status[k].legend = "logger: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(LOGGER_NEXT_LSN, UINT64, "next LSN");
STATUS_INIT(LOGGER_ILOCK_CTR, UINT64, "ilock count");
STATUS_INIT(LOGGER_OLOCK_CTR, UINT64, "olock count");
STATUS_INIT(LOGGER_SWAP_CTR, UINT64, "swap count");
STATUS_INIT(LOGGER_PANICKED, UINT64, "panic");
STATUS_INIT(LOGGER_PANIC_ERRNO, UINT64, "panic errno");
logger_status.initialized = true;
}
#undef STATUS_INIT
#define STATUS_VALUE(x) logger_status.status[x].value.num
void
toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS statp) {
if (!logger_status.initialized)
status_init();
if (logger) {
s->ilock_ctr = logger->input_lock_ctr;
s->olock_ctr = logger->output_condition_lock_ctr;
s->swap_ctr = logger->swap_ctr;
s->panicked = logger->is_panicked;
s->panic_errno = logger->panic_errno;
}
else {
s->ilock_ctr = 0;
s->olock_ctr = 0;
s->swap_ctr = 0;
s->panicked = 0;
s->panic_errno = 0;
STATUS_VALUE(LOGGER_NEXT_LSN) = logger->lsn.lsn;
STATUS_VALUE(LOGGER_ILOCK_CTR) = logger->input_lock_ctr;
STATUS_VALUE(LOGGER_OLOCK_CTR) = logger->output_condition_lock_ctr;
STATUS_VALUE(LOGGER_SWAP_CTR) = logger->swap_ctr;
STATUS_VALUE(LOGGER_PANICKED) = logger->is_panicked;
STATUS_VALUE(LOGGER_PANIC_ERRNO) = logger->panic_errno;
}
*statp = logger_status;
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Used for upgrade:
// if any valid log files exist in log_dir, then
// set *found_any_logs to TRUE and set *version_found to version number of latest log
@ -1411,3 +1440,4 @@ toku_get_version_of_logs_on_disk(const char *log_dir, BOOL *found_any_logs, uint
return r;
}
#undef STATUS_VALUE

View file

@ -164,14 +164,23 @@ toku_logger_maybe_fsync (TOKULOGGER logger, LSN lsn, int do_fsync);
// fsync
// release the outlock
typedef struct logger_status {
u_int64_t ilock_ctr;
u_int64_t olock_ctr;
u_int64_t swap_ctr;
u_int64_t panicked;
u_int64_t panic_errno;
typedef enum {
LOGGER_NEXT_LSN = 0,
LOGGER_ILOCK_CTR,
LOGGER_OLOCK_CTR,
LOGGER_SWAP_CTR,
LOGGER_PANICKED,
LOGGER_PANIC_ERRNO,
LOGGER_STATUS_NUM_ROWS
} logger_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LOGGER_STATUS_NUM_ROWS];
} LOGGER_STATUS_S, *LOGGER_STATUS;
void toku_logger_get_status(TOKULOGGER logger, LOGGER_STATUS s);
int toku_get_version_of_logs_on_disk(const char *log_dir, BOOL *found_any_logs, uint32_t *version_found);

View file

@ -10,6 +10,8 @@
toku_pthread_mutex_t attr_mutex;
// used to access engine status variables
#define STATUS_VALUE(x) ct_status.status[x].value.num
const PAIR_ATTR attrs[] = {
{ .size = 20, .nonleaf_size = 13, .leaf_size = 900, .rollback_size = 123, .cache_pressure_size = 403 },
@ -62,12 +64,12 @@ run_test (void) {
CACHEFILE f1;
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
CACHETABLE_STATUS_S ct_stat;
toku_cachetable_get_status(ct, &ct_stat);
assert(ct_stat.size_nonleaf == 0);
assert(ct_stat.size_leaf == 0);
assert(ct_stat.size_rollback == 0);
assert(ct_stat.size_cachepressure == 0);
CACHETABLE_STATUS_S ct_status;
toku_cachetable_get_status(ct, &ct_status);
assert(STATUS_VALUE(CT_SIZE_NONLEAF) == 0);
assert(STATUS_VALUE(CT_SIZE_LEAF) == 0);
assert(STATUS_VALUE(CT_SIZE_ROLLBACK) == 0);
assert(STATUS_VALUE(CT_SIZE_CACHEPRESSURE) == 0);
void* vs[n_pairs];
//void* v2;
@ -94,11 +96,11 @@ run_test (void) {
expect.cache_pressure_size += attrs[i].cache_pressure_size;
}
toku_cachetable_get_status(ct, &ct_stat);
assert(ct_stat.size_nonleaf == (uint64_t) expect.nonleaf_size);
assert(ct_stat.size_leaf == (uint64_t) expect.leaf_size);
assert(ct_stat.size_rollback == (uint64_t) expect.rollback_size);
assert(ct_stat.size_cachepressure == (uint64_t) expect.cache_pressure_size);
toku_cachetable_get_status(ct, &ct_status);
assert(STATUS_VALUE(CT_SIZE_NONLEAF ) == (uint64_t) expect.nonleaf_size);
assert(STATUS_VALUE(CT_SIZE_LEAF ) == (uint64_t) expect.leaf_size);
assert(STATUS_VALUE(CT_SIZE_ROLLBACK ) == (uint64_t) expect.rollback_size);
assert(STATUS_VALUE(CT_SIZE_CACHEPRESSURE) == (uint64_t) expect.cache_pressure_size);
void *big_v;
long big_s;
@ -116,11 +118,11 @@ run_test (void) {
usleep(2*1024*1024);
toku_cachetable_get_status(ct, &ct_stat);
assert(ct_stat.size_nonleaf == (uint64_t) expect.nonleaf_size);
assert(ct_stat.size_leaf == (uint64_t) expect.leaf_size);
assert(ct_stat.size_rollback == (uint64_t) expect.rollback_size);
assert(ct_stat.size_cachepressure == (uint64_t) expect.cache_pressure_size);
toku_cachetable_get_status(ct, &ct_status);
assert(STATUS_VALUE(CT_SIZE_NONLEAF ) == (uint64_t) expect.nonleaf_size);
assert(STATUS_VALUE(CT_SIZE_LEAF ) == (uint64_t) expect.leaf_size);
assert(STATUS_VALUE(CT_SIZE_ROLLBACK ) == (uint64_t) expect.rollback_size);
assert(STATUS_VALUE(CT_SIZE_CACHEPRESSURE) == (uint64_t) expect.cache_pressure_size);
toku_cachetable_verify(ct);
r = toku_cachefile_close(&f1, 0, FALSE, ZERO_LSN); assert(r == 0 && f1 == 0);
@ -133,3 +135,5 @@ test_main(int argc, const char *argv[]) {
run_test();
return 0;
}
#undef STATUS_VALUE

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "$Id$"
#ident "Copyright (c) 2007-2010 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
@ -14,13 +14,48 @@ BOOL garbage_collection_debug = FALSE;
static void verify_snapshot_system(TOKULOGGER logger);
// accountability
static TXN_STATUS_S status;
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static TXN_STATUS_S txn_status;
#define STATUS_INIT(k,t,l) { \
txn_status.status[k].keyname = #k; \
txn_status.status[k].type = t; \
txn_status.status[k].legend = "txn: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(TXN_BEGIN, UINT64, "begin");
STATUS_INIT(TXN_COMMIT, UINT64, "successful commits");
STATUS_INIT(TXN_ABORT, UINT64, "aborts");
STATUS_INIT(TXN_CLOSE, UINT64, "close (should be sum of aborts and commits)");
STATUS_INIT(TXN_NUM_OPEN, UINT64, "number currently open (should be begin - close)");
STATUS_INIT(TXN_MAX_OPEN, UINT64, "max number open simultaneously");
STATUS_INIT(TXN_OLDEST_LIVE, UINT64, "xid of oldest live transaction");
STATUS_INIT(TXN_OLDEST_STARTTIME, UNIXTIME, "start time of oldest live transaction");
txn_status.initialized = true;
}
#undef STATUS_INIT
#define STATUS_VALUE(x) txn_status.status[x].value.num
void
toku_txn_get_status(TXN_STATUS s) {
*s = status;
toku_txn_get_status(TOKULOGGER logger, TXN_STATUS s) {
if (!txn_status.initialized)
status_init();
{
time_t oldest_starttime;
STATUS_VALUE(TXN_OLDEST_LIVE) = toku_logger_get_oldest_living_xid(logger, &oldest_starttime);
STATUS_VALUE(TXN_OLDEST_STARTTIME) = (uint64_t) oldest_starttime;
}
*s = txn_status;
}
@ -274,10 +309,10 @@ int toku_txn_begin_with_xid (
if (r != 0) goto died;
*tokutxn = result;
status.begin++;
status.num_open++;
if (status.num_open > status.max_open)
status.max_open = status.num_open;
STATUS_VALUE(TXN_BEGIN)++;
STATUS_VALUE(TXN_NUM_OPEN)++;
if (STATUS_VALUE(TXN_NUM_OPEN) > STATUS_VALUE(TXN_MAX_OPEN))
STATUS_VALUE(TXN_MAX_OPEN) = STATUS_VALUE(TXN_NUM_OPEN);
if (garbage_collection_debug) {
verify_snapshot_system(logger);
}
@ -383,7 +418,7 @@ int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, YIELDF yield, void *yieldv
}
if (r==0) {
r = toku_rollback_commit(txn, yield, yieldv, oplsn);
status.commit++;
STATUS_VALUE(TXN_COMMIT)++;
}
// Make sure we release that lock (even if there was an error)
if (release_multi_operation_client_lock) toku_multi_operation_client_unlock();
@ -420,7 +455,7 @@ int toku_txn_abort_with_lsn(TOKUTXN txn, YIELDF yield, void *yieldv, LSN oplsn,
r = toku_log_xabort(txn->logger, &txn->do_fsync_lsn, 0, txn->txnid64);
if (r==0) {
r = toku_rollback_abort(txn, yield, yieldv, oplsn);
status.abort++;
STATUS_VALUE(TXN_ABORT)++;
}
// Make sure we multi_operation_client_unlock release will happen even if there is an error
if (release_multi_operation_client_lock) toku_multi_operation_client_unlock();
@ -463,8 +498,8 @@ void toku_txn_close_txn(TOKUTXN txn) {
if (garbage_collection_debug)
verify_snapshot_system(logger);
status.close++;
status.num_open--;
STATUS_VALUE(TXN_CLOSE)++;
STATUS_VALUE(TXN_NUM_OPEN)--;
return;
}
@ -741,3 +776,5 @@ TOKUTXN_STATE
toku_txn_get_state(TOKUTXN txn) {
return txn->state;
}
#undef STATUS_VALUE

View file

@ -1,3 +1,4 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ifndef TOKUTXN_H
#define TOKUTXN_H
@ -65,16 +66,24 @@ BOOL toku_txnid_newer(TXNID a, TXNID b);
void toku_txn_force_fsync_on_commit(TOKUTXN txn);
typedef struct txn_status {
u_int64_t begin; // total number of transactions begun (does not include recovered txns)
u_int64_t commit; // successful commits
u_int64_t abort;
u_int64_t close; // should be sum of aborts and commits
u_int64_t num_open; // should be begin - close
u_int64_t max_open; // max value of num_open
typedef enum {
TXN_BEGIN, // total number of transactions begun (does not include recovered txns)
TXN_COMMIT, // successful commits
TXN_ABORT,
TXN_CLOSE, // should be sum of aborts and commits
TXN_NUM_OPEN, // should be begin - close
TXN_MAX_OPEN, // max value of num_open
TXN_OLDEST_LIVE, // xid of oldest live transaction
TXN_OLDEST_STARTTIME, // start time of oldest live txn
TXN_STATUS_NUM_ROWS
} txn_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[TXN_STATUS_NUM_ROWS];
} TXN_STATUS_S, *TXN_STATUS;
void toku_txn_get_status(TXN_STATUS s);
void toku_txn_get_status(TOKULOGGER logger, TXN_STATUS s);
BOOL toku_is_txn_in_live_root_txn_list(TOKUTXN txn, TXNID xid);

View file

@ -31,11 +31,45 @@
#define ULE_DEBUG 0
static LE_STATUS_S status;
static uint32_t ule_get_innermost_numbytes(ULE ule);
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static LE_STATUS_S le_status;
#define STATUS_INIT(k,t,l) { \
le_status.status[k].keyname = #k; \
le_status.status[k].type = t; \
le_status.status[k].legend = "le: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(LE_MAX_COMMITTED_XR, UINT64, "max committed xr");
STATUS_INIT(LE_MAX_PROVISIONAL_XR, UINT64, "max provisional xr");
STATUS_INIT(LE_EXPANDED, UINT64, "expanded");
STATUS_INIT(LE_MAX_MEMSIZE, UINT64, "max memsize");
le_status.initialized = true;
}
#undef STATUS_INIT
void
toku_le_get_status(LE_STATUS statp) {
if (!le_status.initialized)
status_init();
*statp = le_status;
}
#define STATUS_VALUE(x) le_status.status[x].value.num
///////////////////////////////////////////////////////////////////////////////////
// Accessor functions used by outside world (e.g. indexer)
//
@ -52,10 +86,6 @@ void toku_ule_free(ULEHANDLE ule_p) {
toku_free(ule_p);
}
void
toku_le_get_status(LE_STATUS s) {
*s = status;
}
///////////////////////////////////////////////////////////////////////////////////
@ -625,15 +655,15 @@ uxr_unpack_data(UXR uxr, uint8_t *p) {
// executed too often to be worth making threadsafe
static inline void
update_le_status(ULE ule, size_t memsize, LE_STATUS s) {
if (ule->num_cuxrs > s->max_committed_xr)
s->max_committed_xr = ule->num_cuxrs;
if (ule->num_puxrs > s->max_provisional_xr)
s->max_provisional_xr = ule->num_puxrs;
update_le_status(ULE ule, size_t memsize) {
if (ule->num_cuxrs > STATUS_VALUE(LE_MAX_COMMITTED_XR))
STATUS_VALUE(LE_MAX_COMMITTED_XR) = ule->num_cuxrs;
if (ule->num_puxrs > STATUS_VALUE(LE_MAX_PROVISIONAL_XR))
STATUS_VALUE(LE_MAX_PROVISIONAL_XR) = ule->num_puxrs;
if (ule->num_cuxrs > MAX_TRANSACTION_RECORDS)
s->expanded++;
if (memsize > s->max_memsize)
s->max_memsize = memsize;
STATUS_VALUE(LE_EXPANDED)++;
if (memsize > STATUS_VALUE(LE_MAX_MEMSIZE))
STATUS_VALUE(LE_MAX_MEMSIZE) = memsize;
}
// Purpose is to return a newly allocated leaf entry in packed format, or
@ -801,7 +831,7 @@ found_insert:;
*new_leafentry_memorysize = memsize;
rval = 0;
cleanup:
update_le_status(ule, memsize, &status);
update_le_status(ule, memsize);
return rval;
}
@ -2260,5 +2290,7 @@ toku_le_upgrade_13_14(LEAFENTRY_13 old_leafentry,
void __attribute__((__constructor__)) toku_ule_drd_ignore(void);
void
toku_ule_drd_ignore(void) {
DRD_IGNORE_VAR(status);
DRD_IGNORE_VAR(le_status);
}
#undef STATUS_VALUE

View file

@ -25,27 +25,46 @@ struct ydb_big_lock {
};
static struct ydb_big_lock ydb_big_lock;
// status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static SCHEDULE_STATUS_S status;
static inline u_int64_t u64max(u_int64_t a, u_int64_t b) {return a > b ? a : b; }
static void
init_status(void) {
status.ydb_lock_ctr = 0;
status.num_waiters_now = 0;
status.max_waiters = 0;
status.total_sleep_time = 0;
status.max_time_ydb_lock_held = 0;
status.total_time_ydb_lock_held = 0;
status.total_time_since_start = 0;
/* Status is intended for display to humans to help understand system behavior.
* It does not need to be perfectly thread-safe.
*/
static volatile YDB_LOCK_STATUS_S ydb_lock_status;
#define STATUS_INIT(k,t,l) { \
ydb_lock_status.status[k].keyname = #k; \
ydb_lock_status.status[k].type = t; \
ydb_lock_status.status[k].legend = "ydb lock: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(YDB_LOCK_TAKEN, UINT64, "taken");
STATUS_INIT(YDB_LOCK_RELEASED, UINT64, "released");
STATUS_INIT(YDB_NUM_WAITERS_NOW, UINT64, "num waiters now");
STATUS_INIT(YDB_MAX_WAITERS, UINT64, "max waiters");
STATUS_INIT(YDB_TOTAL_SLEEP_TIME, UINT64, "total sleep time (usec)");
STATUS_INIT(YDB_MAX_TIME_YDB_LOCK_HELD, TOKUTIME, "max time held (sec)");
STATUS_INIT(YDB_TOTAL_TIME_YDB_LOCK_HELD, TOKUTIME, "total time held (sec)");
STATUS_INIT(YDB_TOTAL_TIME_SINCE_START, TOKUTIME, "total time since start (sec)");
ydb_lock_status.initialized = true;
}
#undef STATUS_INIT
void
toku_ydb_lock_get_status(YDB_LOCK_STATUS statp) {
if (!ydb_lock_status.initialized)
status_init();
*statp = ydb_lock_status;
}
void
toku_ydb_lock_get_status(SCHEDULE_STATUS statp) {
*statp = status;
}
#define STATUS_VALUE(x) ydb_lock_status.status[x].value.num
/* End of status section.
*/
int
toku_ydb_lock_init(void) {
@ -53,7 +72,6 @@ toku_ydb_lock_init(void) {
r = toku_pthread_mutex_init(&ydb_big_lock.lock, NULL); resource_assert_zero(r);
ydb_big_lock.starttime = get_tokutime();
ydb_big_lock.acquired_time = 0;
init_status();
return r;
}
@ -66,7 +84,7 @@ toku_ydb_lock_destroy(void) {
void
toku_ydb_lock(void) {
u_int32_t new_num_waiters = __sync_add_and_fetch(&status.num_waiters_now, 1);
u_int32_t new_num_waiters = __sync_add_and_fetch(&STATUS_VALUE(YDB_NUM_WAITERS_NOW), 1);
int r = toku_pthread_mutex_lock(&ydb_big_lock.lock); resource_assert_zero(r);
@ -76,30 +94,29 @@ toku_ydb_lock(void) {
ydb_big_lock.acquired_time = now;
// Update status
status.ydb_lock_ctr++;
if (new_num_waiters > status.max_waiters) status.max_waiters = new_num_waiters;
status.total_time_since_start = now - ydb_big_lock.starttime;
// invariant((status.ydb_lock_ctr & 0x01) == 1);
STATUS_VALUE(YDB_LOCK_TAKEN)++;
if (new_num_waiters > STATUS_VALUE(YDB_MAX_WAITERS))
STATUS_VALUE(YDB_MAX_WAITERS) = new_num_waiters;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
}
static void
ydb_unlock_internal(unsigned long useconds) {
status.ydb_lock_ctr++;
// invariant((status.ydb_lock_ctr & 0x01) == 0);
STATUS_VALUE(YDB_LOCK_RELEASED)++;
tokutime_t now = get_tokutime();
tokutime_t time_held = now - ydb_big_lock.acquired_time;
status.total_time_ydb_lock_held += time_held;
if (time_held > status.max_time_ydb_lock_held) status.max_time_ydb_lock_held = time_held;
status.total_time_since_start = now - ydb_big_lock.starttime;
STATUS_VALUE(YDB_TOTAL_TIME_YDB_LOCK_HELD) += time_held;
if (time_held > STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD))
STATUS_VALUE(YDB_MAX_TIME_YDB_LOCK_HELD) = time_held;
STATUS_VALUE(YDB_TOTAL_TIME_SINCE_START) = now - ydb_big_lock.starttime;
int r = toku_pthread_mutex_unlock(&ydb_big_lock.lock); resource_assert_zero(r);
int new_num_waiters = __sync_add_and_fetch(&status.num_waiters_now, -1);
int new_num_waiters = __sync_add_and_fetch(&STATUS_VALUE(YDB_NUM_WAITERS_NOW), -1);
if (new_num_waiters > 0 && useconds > 0) {
__sync_add_and_fetch(&status.total_sleep_time, useconds);
__sync_add_and_fetch(&STATUS_VALUE(YDB_TOTAL_SLEEP_TIME), useconds);
usleep(useconds);
}
}
@ -118,3 +135,5 @@ toku_pthread_mutex_t *
toku_ydb_mutex(void) {
return &ydb_big_lock.lock;
}
#undef STATUS_VALUE

View file

@ -24,8 +24,49 @@
#include "xids.h"
#include "log-internal.h"
// for now
static INDEXER_STATUS_S status;
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static INDEXER_STATUS_S indexer_status;
#define STATUS_INIT(k,t,l) { \
indexer_status.status[k].keyname = #k; \
indexer_status.status[k].type = t; \
indexer_status.status[k].legend = "indexer: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(INDEXER_CREATE, UINT64, "number of indexers successfully created");
STATUS_INIT(INDEXER_CREATE_FAIL, UINT64, "number of calls to toku_indexer_create_indexer() that failed");
STATUS_INIT(INDEXER_BUILD, UINT64, "number of calls to indexer->build() succeeded");
STATUS_INIT(INDEXER_BUILD_FAIL, UINT64, "number of calls to indexer->build() failed");
STATUS_INIT(INDEXER_CLOSE, UINT64, "number of calls to indexer->close() that succeeded");
STATUS_INIT(INDEXER_CLOSE_FAIL, UINT64, "number of calls to indexer->close() that failed");
STATUS_INIT(INDEXER_ABORT, UINT64, "number of calls to indexer->abort()");
STATUS_INIT(INDEXER_CURRENT, UINT64, "number of indexers currently in existence");
STATUS_INIT(INDEXER_MAX, UINT64, "max number of indexers that ever existed simultaneously");
indexer_status.initialized = true;
}
#undef STATUS_INIT
void
toku_indexer_get_status(INDEXER_STATUS statp) {
if (!indexer_status.initialized)
status_init();
*statp = indexer_status;
}
#define STATUS_VALUE(x) indexer_status.status[x].value.num
#include "indexer-internal.h"
@ -175,13 +216,13 @@ create_exit:
*indexerp = indexer;
(void) __sync_fetch_and_add(&status.create, 1);
(void) __sync_fetch_and_add(&status.current, 1);
if ( status.current > status.max )
status.max = status.current; // not worth a lock to make threadsafe, may be inaccurate
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_CREATE), 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_CURRENT), 1);
if ( STATUS_VALUE(INDEXER_CURRENT) > STATUS_VALUE(INDEXER_MAX) )
STATUS_VALUE(INDEXER_MAX) = STATUS_VALUE(INDEXER_CURRENT); // NOT WORTH A LOCK TO MAKE THREADSAFE), may be inaccurate
} else {
(void) __sync_fetch_and_add(&status.create_fail, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_CREATE_FAIL), 1);
free_indexer(indexer);
}
@ -270,9 +311,9 @@ build_index(DB_INDEXER *indexer) {
// - unique checks?
if ( result == 0 ) {
(void) __sync_fetch_and_add(&status.build, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_BUILD), 1);
} else {
(void) __sync_fetch_and_add(&status.build_fail, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_BUILD_FAIL), 1);
}
@ -282,7 +323,7 @@ build_index(DB_INDEXER *indexer) {
static int
close_indexer(DB_INDEXER *indexer) {
int r = 0;
(void) __sync_fetch_and_sub(&status.current, 1);
(void) __sync_fetch_and_sub(&STATUS_VALUE(INDEXER_CURRENT), 1);
toku_ydb_lock();
{
@ -307,17 +348,17 @@ close_indexer(DB_INDEXER *indexer) {
toku_ydb_unlock();
if ( r == 0 ) {
(void) __sync_fetch_and_add(&status.close, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_CLOSE), 1);
} else {
(void) __sync_fetch_and_add(&status.close_fail, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_CLOSE_FAIL), 1);
}
return r;
}
static int
abort_indexer(DB_INDEXER *indexer) {
(void) __sync_fetch_and_sub(&status.current, 1);
(void) __sync_fetch_and_add(&status.abort, 1);
(void) __sync_fetch_and_sub(&STATUS_VALUE(INDEXER_CURRENT), 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(INDEXER_ABORT), 1);
toku_ydb_lock();
{
@ -391,10 +432,6 @@ maybe_call_poll_func(DB_INDEXER *indexer, uint64_t loop_count) {
return result;
}
void
toku_indexer_get_status(INDEXER_STATUS s) {
*s = status;
}
// this allows us to force errors under test. Flags are defined in indexer.h
void
@ -407,3 +444,7 @@ DB *
toku_indexer_get_src_db(DB_INDEXER *indexer) {
return indexer->i->src_db;
}
#undef STATUS_VALUE

View file

@ -1,3 +1,4 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ifndef TOKU_INDEXER_H
#define TOKU_INDEXER_H
@ -70,16 +71,22 @@ void toku_indexer_set_test_only_flags(DB_INDEXER *indexer, int flags) __attribut
#define INDEXER_TEST_ONLY_ERROR_CALLBACK 1
typedef struct indexer_status {
uint64_t create; // number of indexers successfully created
uint64_t create_fail; // number of calls to toku_indexer_create_indexer() that failed
uint64_t build; // number of calls to indexer->build() succeeded
uint64_t build_fail; // number of calls to indexer->build() failed
uint64_t close; // number of calls to indexer->close() that succeeded
uint64_t close_fail; // number of calls to indexer->close() that failed
uint64_t abort; // number of calls to indexer->abort()
uint32_t current; // number of indexers currently in existence
uint32_t max; // max number of indexers that ever existed simultaneously
typedef enum {
INDEXER_CREATE = 0, // number of indexers successfully created
INDEXER_CREATE_FAIL, // number of calls to toku_indexer_create_indexer() that failed
INDEXER_BUILD, // number of calls to indexer->build() succeeded
INDEXER_BUILD_FAIL, // number of calls to indexer->build() failed
INDEXER_CLOSE, // number of calls to indexer->close() that succeeded
INDEXER_CLOSE_FAIL, // number of calls to indexer->close() that failed
INDEXER_ABORT, // number of calls to indexer->abort()
INDEXER_CURRENT, // number of indexers currently in existence
INDEXER_MAX, // max number of indexers that ever existed simultaneously
INDEXER_STATUS_NUM_ROWS
} indexer_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[INDEXER_STATUS_NUM_ROWS];
} INDEXER_STATUS_S, *INDEXER_STATUS;
void toku_indexer_get_status(INDEXER_STATUS s);

View file

@ -28,7 +28,46 @@
enum {MAX_FILE_SIZE=256};
static LOADER_STATUS_S status; // accountability
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
static LOADER_STATUS_S loader_status;
#define STATUS_INIT(k,t,l) { \
loader_status.status[k].keyname = #k; \
loader_status.status[k].type = t; \
loader_status.status[k].legend = "loader: " l; \
}
static void
status_init(void) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(LOADER_CREATE, UINT64, "number of loaders successfully created");
STATUS_INIT(LOADER_CREATE_FAIL, UINT64, "number of calls to toku_loader_create_loader() that failed");
STATUS_INIT(LOADER_PUT, UINT64, "number of calls to loader->put() succeeded");
STATUS_INIT(LOADER_PUT_FAIL, UINT64, "number of calls to loader->put() failed");
STATUS_INIT(LOADER_CLOSE, UINT64, "number of calls to loader->close() that succeeded");
STATUS_INIT(LOADER_CLOSE_FAIL, UINT64, "number of calls to loader->close() that failed");
STATUS_INIT(LOADER_ABORT, UINT64, "number of calls to loader->abort()");
STATUS_INIT(LOADER_CURRENT, UINT64, "number of loaders currently in existence");
STATUS_INIT(LOADER_MAX, UINT64, "max number of loaders that ever existed simultaneously");
loader_status.initialized = true;
}
#undef STATUS_INIT
void
toku_loader_get_status(LOADER_STATUS statp) {
if (!loader_status.initialized)
status_init();
*statp = loader_status;
}
#define STATUS_VALUE(x) loader_status.status[x].value.num
struct __toku_loader_internal {
DB_ENV *env;
@ -253,13 +292,13 @@ int toku_loader_create_loader(DB_ENV *env,
create_exit:
loader_add_refs(loader);
if (rval == 0) {
(void) __sync_fetch_and_add(&status.create, 1);
(void) __sync_fetch_and_add(&status.current, 1);
if (status.current > status.max)
status.max = status.current; // not worth a lock to make threadsafe, may be inaccurate
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_CREATE), 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_CURRENT), 1);
if (STATUS_VALUE(LOADER_CURRENT) > STATUS_VALUE(LOADER_MAX) )
STATUS_VALUE(LOADER_MAX) = STATUS_VALUE(LOADER_CURRENT); // not worth a lock to make threadsafe, may be inaccurate
}
else {
(void) __sync_fetch_and_add(&status.create_fail, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_CREATE_FAIL), 1);
free_loader(loader);
}
return rval;
@ -336,15 +375,15 @@ int toku_loader_put(DB_LOADER *loader, DBT *key, DBT *val)
}
cleanup:
if (r==0)
status.put++; // executed too often to be worth making threadsafe
STATUS_VALUE(LOADER_PUT)++; // executed too often to be worth making threadsafe
else
status.put_fail++;
STATUS_VALUE(LOADER_PUT_FAIL)++;
return r;
}
int toku_loader_close(DB_LOADER *loader)
{
(void) __sync_fetch_and_sub(&status.current, 1);
(void) __sync_fetch_and_sub(&STATUS_VALUE(LOADER_CURRENT), 1);
int r=0;
if ( loader->i->err_errno != 0 ) {
if ( loader->i->error_callback != NULL ) {
@ -380,16 +419,16 @@ int toku_loader_close(DB_LOADER *loader)
free_loader(loader);
toku_ydb_unlock();
if (r==0)
(void) __sync_fetch_and_add(&status.close, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_CLOSE), 1);
else
(void) __sync_fetch_and_add(&status.close_fail, 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_CLOSE_FAIL), 1);
return r;
}
int toku_loader_abort(DB_LOADER *loader)
{
(void) __sync_fetch_and_sub(&status.current, 1);
(void) __sync_fetch_and_add(&status.abort, 1);
(void) __sync_fetch_and_sub(&STATUS_VALUE(LOADER_CURRENT), 1);
(void) __sync_fetch_and_add(&STATUS_VALUE(LOADER_ABORT), 1);
int r=0;
if ( loader->i->err_errno != 0 ) {
if ( loader->i->error_callback != NULL ) {
@ -442,7 +481,7 @@ exit:
return result;
}
void
toku_loader_get_status(LOADER_STATUS s) {
*s = status;
}
#undef STATUS_VALUE

View file

@ -1,3 +1,4 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ifndef TOKU_LOADER_H
#define TOKU_LOADER_H
@ -108,18 +109,26 @@ int toku_loader_abort(DB_LOADER *loader);
// Remove any loader temp files that may have been left from a crashed system
int toku_loader_cleanup_temp_files(DB_ENV *env);
typedef struct loader_status {
uint64_t create; // number of loaders succefully created
uint64_t create_fail; // number of calls to toku_loader_create_loader() that failed
uint64_t put; // number of calls to toku_loader_put() that succeeded
uint64_t put_fail; // number of calls to toku_loader_put() that failed
uint64_t close; // number of calls to toku_loader_close()
uint64_t close_fail; // number of calls to toku_loader_close() that failed
uint64_t abort; // number of calls to toku_loader_abort()
uint32_t current; // number of loaders currently in existence
uint32_t max; // max number of loaders that ever existed simultaneously
typedef enum {
LOADER_CREATE = 0, // number of loaders successfully created
LOADER_CREATE_FAIL, // number of calls to toku_loader_create_loader() that failed
LOADER_PUT, // number of calls to toku_loader_put() that succeeded
LOADER_PUT_FAIL, // number of calls to toku_loader_put() that failed
LOADER_CLOSE, // number of calls to toku_loader_close()
LOADER_CLOSE_FAIL, // number of calls to toku_loader_close() that failed
LOADER_ABORT, // number of calls to toku_loader_abort()
LOADER_CURRENT, // number of loaders currently in existence
LOADER_MAX, // max number of loaders that ever existed simultaneously
LOADER_STATUS_NUM_ROWS
} loader_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LOADER_STATUS_NUM_ROWS];
} LOADER_STATUS_S, *LOADER_STATUS;
void toku_loader_get_status(LOADER_STATUS s);

View file

@ -36,6 +36,58 @@
static int toku_lt_debug = 0;
#endif
///////////////////////////////////////////////////////////////////////////////////
// Engine status
//
// Status is intended for display to humans to help understand system behavior.
// It does not need to be perfectly thread-safe.
#define STATUS_INIT(k,t,l) { \
mgr->status.status[k].keyname = #k; \
mgr->status.status[k].type = t; \
mgr->status.status[k].legend = "row locks: " l; \
}
static void
status_init(toku_ltm* mgr) {
// Note, this function initializes the keyname, type, and legend fields.
// Value fields are initialized to zero by compiler.
STATUS_INIT(LTM_LOCKS_LIMIT, UINT64, "number of locks allowed");
STATUS_INIT(LTM_LOCKS_CURR, UINT64, "number of locks in existence");
STATUS_INIT(LTM_LOCK_MEMORY_LIMIT, UINT64, "maximum amount of memory allowed for locks");
STATUS_INIT(LTM_LOCK_MEMORY_CURR, UINT64, "maximum amount of memory allowed for locks");
STATUS_INIT(LTM_LOCK_ESCALATION_SUCCESSES, UINT64, "number of times lock escalation succeeded");
STATUS_INIT(LTM_LOCK_ESCALATION_FAILURES, UINT64, "number of times lock escalation failed");
STATUS_INIT(LTM_READ_LOCK, UINT64, "number of times read lock taken successfully");
STATUS_INIT(LTM_READ_LOCK_FAIL, UINT64, "number of times read lock denied");
STATUS_INIT(LTM_OUT_OF_READ_LOCKS, UINT64, "number of times read lock denied for out_of_locks");
STATUS_INIT(LTM_WRITE_LOCK, UINT64, "number of times write lock taken successfully");
STATUS_INIT(LTM_WRITE_LOCK_FAIL, UINT64, "number of times write lock denied");
STATUS_INIT(LTM_OUT_OF_WRITE_LOCKS, UINT64, "number of times write lock denied for out_of_locks");
STATUS_INIT(LTM_LT_CREATE, UINT64, "number of locktrees created");
STATUS_INIT(LTM_LT_CREATE_FAIL, UINT64, "number of locktrees unable to be created");
STATUS_INIT(LTM_LT_DESTROY, UINT64, "number of locktrees destroyed");
STATUS_INIT(LTM_LT_NUM, UINT64, "number of locktrees (should be created - destroyed)");
STATUS_INIT(LTM_LT_NUM_MAX, UINT64, "max number of locktrees that have existed simultaneously");
mgr->status.initialized = true;
}
#undef STATUS_INIT
#define STATUS_VALUE(x) status.status[x].value.num
void
toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS statp) {
if (!mgr->status.initialized)
status_init(mgr);
mgr->STATUS_VALUE(LTM_LOCKS_LIMIT) = mgr->locks_limit;
mgr->STATUS_VALUE(LTM_LOCKS_CURR) = mgr->curr_locks;
mgr->STATUS_VALUE(LTM_LOCK_MEMORY_LIMIT) = mgr->lock_memory_limit;
mgr->STATUS_VALUE(LTM_LOCK_MEMORY_CURR) = mgr->curr_lock_memory;
*statp = mgr->status;
}
static inline int lt_panic(toku_lock_tree *tree, int r) {
return tree->panic(tree->db, r);
}
@ -179,14 +231,14 @@ toku_lt_point_cmp(const toku_point* x, const toku_point* y) {
/* Lock tree manager functions begin here */
int
toku_ltm_create(toku_ltm** pmgr,
uint32_t max_locks,
uint64_t max_lock_memory,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*)) {
int r = ENOSYS;
toku_ltm* tmp_mgr = NULL;
if (!pmgr || !max_locks) {
if (!pmgr || !locks_limit) {
r = EINVAL; goto cleanup;
}
assert(panic && get_compare_fun_from_db);
@ -197,10 +249,10 @@ toku_ltm_create(toku_ltm** pmgr,
}
memset(tmp_mgr, 0, sizeof(toku_ltm));
r = toku_ltm_set_max_locks(tmp_mgr, max_locks);
r = toku_ltm_set_max_locks(tmp_mgr, locks_limit);
if (r != 0)
goto cleanup;
r = toku_ltm_set_max_lock_memory(tmp_mgr, max_lock_memory);
r = toku_ltm_set_max_lock_memory(tmp_mgr, lock_memory_limit);
if (r != 0)
goto cleanup;
tmp_mgr->panic = panic;
@ -262,50 +314,40 @@ cleanup:
return r;
}
void
toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
LTM_STATUS s) {
*max_locks = mgr->max_locks;
*curr_locks = mgr->curr_locks;
*max_lock_memory = mgr->max_lock_memory;
*curr_lock_memory = mgr->curr_lock_memory;
*s = mgr->status;
}
int
toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks) {
if (!mgr || !max_locks)
toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit) {
if (!mgr || !locks_limit)
return EINVAL;
*max_locks = mgr->max_locks;
*locks_limit = mgr->locks_limit;
return 0;
}
int
toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks) {
if (!mgr || !max_locks)
toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit) {
if (!mgr || !locks_limit)
return EINVAL;
if (max_locks < mgr->curr_locks)
if (locks_limit < mgr->curr_locks)
return EDOM;
mgr->max_locks = max_locks;
mgr->locks_limit = locks_limit;
return 0;
}
int
toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory) {
if (!mgr || !max_lock_memory)
toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit) {
if (!mgr || !lock_memory_limit)
return EINVAL;
*max_lock_memory = mgr->max_lock_memory;
*lock_memory_limit = mgr->lock_memory_limit;
return 0;
}
int
toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory) {
if (!mgr || !max_lock_memory)
toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit) {
if (!mgr || !lock_memory_limit)
return EINVAL;
if (max_lock_memory < mgr->curr_locks)
if (lock_memory_limit < mgr->curr_locks)
return EDOM;
mgr->max_lock_memory = max_lock_memory;
mgr->lock_memory_limit = lock_memory_limit;
return 0;
}
@ -326,7 +368,7 @@ ltm_decr_locks(toku_ltm* tree_mgr, uint32_t locks) {
static int
ltm_out_of_locks(toku_ltm *mgr) {
int r = 0;
if (mgr->curr_locks >= mgr->max_locks || mgr->curr_lock_memory >= mgr->max_lock_memory)
if (mgr->curr_locks >= mgr->locks_limit || mgr->curr_lock_memory >= mgr->lock_memory_limit)
r = TOKUDB_OUT_OF_LOCKS;
return r;
}
@ -1459,10 +1501,10 @@ toku_ltm_get_lt(toku_ltm* mgr, toku_lock_tree** ptree,
r = 0;
cleanup:
if (r == 0) {
mgr->status.lt_create++;
mgr->status.lt_num++;
if (mgr->status.lt_num > mgr->status.lt_num_max)
mgr->status.lt_num_max = mgr->status.lt_num;
mgr->STATUS_VALUE(LTM_LT_CREATE)++;
mgr->STATUS_VALUE(LTM_LT_NUM)++;
if (mgr->STATUS_VALUE(LTM_LT_NUM) > mgr->STATUS_VALUE(LTM_LT_NUM_MAX))
mgr->STATUS_VALUE(LTM_LT_NUM_MAX) = mgr->STATUS_VALUE(LTM_LT_NUM);
}
else {
if (tree != NULL) {
@ -1474,7 +1516,7 @@ cleanup:
lt_remove_db(tree, db);
toku_lt_close(tree);
}
mgr->status.lt_create_fail++;
mgr->STATUS_VALUE(LTM_LT_CREATE_FAIL)++;
}
return r;
}
@ -1486,8 +1528,8 @@ toku_lt_close(toku_lock_tree* tree) {
if (!tree) {
r = EINVAL; goto cleanup;
}
tree->mgr->status.lt_destroy++;
tree->mgr->status.lt_num--;
tree->mgr->STATUS_VALUE(LTM_LT_DESTROY)++;
tree->mgr->STATUS_VALUE(LTM_LT_NUM)--;
toku_lock_request_tree_destroy(tree);
r = toku_rt_close(tree->borderwrite);
if (!first_error && r != 0)
@ -1841,23 +1883,22 @@ toku_lt_acquire_range_read_lock(toku_lock_tree* tree, DB* db, TXNID txn, const D
r = lt_try_acquire_range_read_lock(tree, db, txn,
key_left, key_right);
if (r == 0) {
tree->mgr->status.lock_escalation_successes++;
tree->mgr->STATUS_VALUE(LTM_LOCK_ESCALATION_SUCCESSES)++;
}
else if (r==TOKUDB_OUT_OF_LOCKS) {
tree->mgr->status.lock_escalation_failures++;
tree->mgr->STATUS_VALUE(LTM_LOCK_ESCALATION_FAILURES)++;
}
}
}
if (tree) {
LTM_STATUS s = &(tree->mgr->status);
if (r == 0) {
s->read_lock++;
tree->mgr->STATUS_VALUE(LTM_READ_LOCK)++;
}
else {
s->read_lock_fail++;
tree->mgr->STATUS_VALUE(LTM_READ_LOCK_FAIL)++;
if (r == TOKUDB_OUT_OF_LOCKS)
s->out_of_read_locks++;
tree->mgr->STATUS_VALUE(LTM_OUT_OF_READ_LOCKS)++;
}
}
return r;
@ -1959,23 +2000,22 @@ toku_lt_acquire_range_write_lock(toku_lock_tree* tree, DB* db, TXNID txn, const
if (r == 0) {
r = lt_try_acquire_range_write_lock(tree, db, txn, key_left, key_right);
if (r == 0) {
tree->mgr->status.lock_escalation_successes++;
tree->mgr->STATUS_VALUE(LTM_LOCK_ESCALATION_SUCCESSES)++;
}
else if (r==TOKUDB_OUT_OF_LOCKS) {
tree->mgr->status.lock_escalation_failures++;
tree->mgr->STATUS_VALUE(LTM_LOCK_ESCALATION_FAILURES)++;
}
}
}
if (tree) {
LTM_STATUS s = &(tree->mgr->status);
if (r == 0) {
s->write_lock++;
tree->mgr->STATUS_VALUE(LTM_WRITE_LOCK)++;
}
else {
s->write_lock_fail++;
tree->mgr->STATUS_VALUE(LTM_WRITE_LOCK_FAIL)++;
if (r == TOKUDB_OUT_OF_LOCKS)
s->out_of_write_locks++;
tree->mgr->STATUS_VALUE(LTM_OUT_OF_WRITE_LOCKS)++;
}
}
return r;
@ -2785,3 +2825,5 @@ toku_lt_verify(toku_lock_tree *lt, DB *db) {
lt_verify(lt);
lt_clear_comparison_functions(lt);
}
#undef STATUS_VALUE

View file

@ -118,32 +118,39 @@ struct __toku_lock_tree {
};
// accountability
typedef struct ltm_status {
uint32_t lock_escalation_successes; // number of times lock escalation succeeded
uint32_t lock_escalation_failures; // number of times lock escalation failed
uint64_t read_lock; // number of times read lock taken successfully
uint64_t read_lock_fail; // number of times read lock denied
uint64_t out_of_read_locks; // number of times read lock denied for out_of_locks
uint64_t write_lock; // number of times write lock taken successfully
uint64_t write_lock_fail; // number of times write lock denied
uint64_t out_of_write_locks; // number of times write lock denied for out_of_locks
uint64_t lt_create; // number of locktrees created
uint64_t lt_create_fail; // number of locktrees unable to be created
uint64_t lt_destroy; // number of locktrees destroyed
uint64_t lt_num; // number of locktrees (should be created - destroyed)
uint64_t lt_num_max; // max number of locktrees that have existed simultaneously
typedef enum {
LTM_LOCKS_LIMIT, // number of locks allowed (obsolete)
LTM_LOCKS_CURR, // number of locks in existence
LTM_LOCK_MEMORY_LIMIT, // maximum amount of memory allowed for locks
LTM_LOCK_MEMORY_CURR, // maximum amount of memory allowed for locks
LTM_LOCK_ESCALATION_SUCCESSES, // number of times lock escalation succeeded
LTM_LOCK_ESCALATION_FAILURES, // number of times lock escalation failed
LTM_READ_LOCK, // number of times read lock taken successfully
LTM_READ_LOCK_FAIL, // number of times read lock denied
LTM_OUT_OF_READ_LOCKS, // number of times read lock denied for out_of_locks
LTM_WRITE_LOCK, // number of times write lock taken successfully
LTM_WRITE_LOCK_FAIL, // number of times write lock denied
LTM_OUT_OF_WRITE_LOCKS, // number of times write lock denied for out_of_locks
LTM_LT_CREATE, // number of locktrees created
LTM_LT_CREATE_FAIL, // number of locktrees unable to be created
LTM_LT_DESTROY, // number of locktrees destroyed
LTM_LT_NUM, // number of locktrees (should be created - destroyed)
LTM_LT_NUM_MAX, // max number of locktrees that have existed simultaneously
LTM_STATUS_NUM_ROWS
} ltm_status_entry;
typedef struct {
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[LTM_STATUS_NUM_ROWS];
} LTM_STATUS_S, *LTM_STATUS;
struct __toku_ltm {
/** The maximum number of locks allowed for the environment. */
uint32_t max_locks;
uint64_t locks_limit;
/** The current number of locks for the environment. */
uint32_t curr_locks;
uint64_t curr_locks;
/** The maximum amount of memory for locks allowed for the environment. */
uint64_t max_lock_memory;
uint64_t lock_memory_limit;
/** The current amount of memory for locks for the environment. */
uint64_t curr_lock_memory;
/** Status / accountability information */
@ -398,7 +405,7 @@ int toku_lt_unlock(toku_lock_tree* tree, TXNID txn);
Creates a lock tree manager..
\param pmgr A buffer for the new lock tree manager.
\param max_locks The maximum number of locks.
\param locks_limit The maximum number of locks.
\return
- 0 on success.
@ -406,8 +413,8 @@ int toku_lt_unlock(toku_lock_tree* tree, TXNID txn);
- May return other errors due to system calls.
*/
int toku_ltm_create(toku_ltm** pmgr,
uint32_t max_locks,
uint64_t max_lock_memory,
uint32_t locks_limit,
uint64_t lock_memory_limit,
int (*panic)(DB*, int),
toku_dbt_cmp (*get_compare_fun_from_db)(DB*));
@ -426,26 +433,24 @@ int toku_ltm_close(toku_ltm* mgr);
/**
Sets the maximum number of locks on the lock tree manager.
\param mgr The lock tree manager to which to set max_locks.
\param max_locks The new maximum number of locks.
\param mgr The lock tree manager to which to set locks_limit.
\param locks_limit The new maximum number of locks.
\return
- 0 on success.
- EINVAL if tree is NULL or max_locks is 0
- EDOM if max_locks is less than the number of locks held by any lock tree
- EINVAL if tree is NULL or locks_limit is 0
- EDOM if locks_limit is less than the number of locks held by any lock tree
held by the manager
*/
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t max_locks);
int toku_ltm_set_max_locks(toku_ltm* mgr, uint32_t locks_limit);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* max_lock_memory);
int toku_ltm_get_max_lock_memory(toku_ltm* mgr, uint64_t* lock_memory_limit);
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t max_lock_memory);
int toku_ltm_set_max_lock_memory(toku_ltm* mgr, uint64_t lock_memory_limit);
void toku_ltm_get_status(toku_ltm* mgr, uint32_t * max_locks, uint32_t * curr_locks,
uint64_t *max_lock_memory, uint64_t *curr_lock_memory,
LTM_STATUS s);
void toku_ltm_get_status(toku_ltm* mgr, LTM_STATUS s);
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* max_locks);
int toku_ltm_get_max_locks(toku_ltm* mgr, uint32_t* locks_limit);
void toku_ltm_incr_lock_memory(void *extra, size_t s);
void toku_ltm_decr_lock_memory(void *extra, size_t s);

View file

@ -19,11 +19,15 @@ static uint64_t htonl64(uint64_t x) {
struct my_ltm_status {
uint32_t max_locks, curr_locks;
uint64_t max_lock_memory, curr_lock_memory;
LTM_STATUS_S status;
};
static void my_ltm_get_status(toku_ltm *ltm, struct my_ltm_status *my_status) {
toku_ltm_get_status(ltm, &my_status->max_locks, &my_status->curr_locks, &my_status->max_lock_memory, &my_status->curr_lock_memory, &my_status->status);
LTM_STATUS_S status;
toku_ltm_get_status(ltm, &status);
my_status->max_locks = status.status[LTM_LOCKS_LIMIT].value;
my_status->curr_locks = status.status[LTM_LOCKS_CURR].value;
my_status->max_lock_memory = status.status[LTM_LOCK_MEMORY_LIMIT].value;
my_status->curr_lock_memory = status.status[LTM_LOCK_MEMORY_CURR].value;
}
static void *my_malloc(size_t s) {

View file

@ -19,11 +19,15 @@ static uint64_t htonl64(uint64_t x) {
struct my_ltm_status {
uint32_t max_locks, curr_locks;
uint64_t max_lock_memory, curr_lock_memory;
LTM_STATUS_S status;
};
static void my_ltm_get_status(toku_ltm *ltm, struct my_ltm_status *my_status) {
toku_ltm_get_status(ltm, &my_status->max_locks, &my_status->curr_locks, &my_status->max_lock_memory, &my_status->curr_lock_memory, &my_status->status);
LTM_STATUS_S status;
toku_ltm_get_status(ltm, &status);
my_status->max_locks = status.status[LTM_LOCKS_LIMIT].value;
my_status->curr_locks = status.status[LTM_LOCKS_CURR].value;
my_status->max_lock_memory = status.status[LTM_LOCK_MEMORY_LIMIT].value;
my_status->curr_lock_memory = status.status[LTM_LOCK_MEMORY_CURR].value;
}
static void *my_malloc(size_t s) {

View file

@ -5,14 +5,12 @@
enum { MAX_LOCKS = 1000, MAX_LOCK_MEMORY = MAX_LOCKS * 64 };
static void do_ltm_status(toku_ltm *ltm) {
uint32_t max_locks, curr_locks;
uint64_t max_lock_memory, curr_lock_memory;
LTM_STATUS_S s;
toku_ltm_get_status(ltm, &max_locks, &curr_locks, &max_lock_memory, &curr_lock_memory, &s);
assert(max_locks == MAX_LOCKS);
assert(curr_locks == 0);
assert(max_lock_memory == MAX_LOCK_MEMORY);
assert(curr_lock_memory == 0);
toku_ltm_get_status(ltm, &s);
assert(s.status[LTM_LOCKS_LIMIT].value == MAX_LOCKS);
assert(s.status[LTM_LOCKS_CURR].value == 0);
assert(s.status[LTM_LOCK_MEMORY_LIMIT].value == MAX_LOCK_MEMORY);
assert(s.status[LTM_LOCK_MEMORY_CURR].value == 0);
}
int main(int argc, const char *argv[]) {

View file

@ -94,10 +94,9 @@ run_test(void) {
r = db->put(db, txn, &k, &v, 0); CKERR(r);
}
r = txn->commit(txn, 0); CKERR(r);
ENGINE_STATUS es;
r = env->get_engine_status(env, &es, NULL, 0);
CKERR(r);
if (es.merge_leaf > 0) {
uint64_t merge_leaf = get_engine_status_val(env, "BRT_FLUSHER_MERGE_LEAF");
if (merge_leaf > 0) {
if (verbose) printf("t=%d\n", t);
break;
}

View file

@ -0,0 +1,144 @@
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ident "Copyright (c) 2009 Tokutek Inc. All rights reserved."
#ident "$Id$"
/* Purpose of this test is to verify the basic functioning
* of the engine status functions.
*/
#include "test.h"
#include <db.h>
#include "toku_time.h"
static DB_ENV *env;
#define FLAGS_NOLOG DB_INIT_LOCK|DB_INIT_MPOOL|DB_CREATE|DB_PRIVATE
#define FLAGS_LOG FLAGS_NOLOG|DB_INIT_TXN|DB_INIT_LOG
static int mode = S_IRWXU+S_IRWXG+S_IRWXO;
static void test_shutdown(void);
static void
test_shutdown(void) {
int r;
r=env->close(env, 0); CKERR(r);
env = NULL;
}
static void
setup (u_int32_t flags) {
int r;
if (env)
test_shutdown();
r = system("rm -rf " ENVDIR);
CKERR(r);
r=toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
CKERR(r);
r=db_env_create(&env, 0);
CKERR(r);
env->set_errfile(env, stderr);
r=env->open(env, ENVDIR, flags, mode);
CKERR(r);
}
static void
print_raw(TOKU_ENGINE_STATUS_ROW row) {
printf("keyname is %s, type is %d, legend is %s\n",
row->keyname,
row->type,
row->legend);
}
static void
status_format_time(const time_t *timer, char *buf) {
ctime_r(timer, buf);
size_t len = strlen(buf);
assert(len < 26);
char end;
assert(len>=1);
end = buf[len-1];
while (end == '\n' || end == '\r') {
buf[len-1] = '\0';
len--;
assert(len>=1);
end = buf[len-1];
}
}
int
test_main (int argc, char * const argv[]) {
uint64_t nrows;
fs_redzone_state redzone_state;
uint64_t panic;
const int panic_string_len = 1024;
char panic_string[panic_string_len];
// char buf[bufsiz] = {'\0'};
parse_args(argc, argv);
setup(FLAGS_LOG);
env->txn_checkpoint(env, 0, 0, 0);
env->get_engine_status_num_rows(env, &nrows);
TOKU_ENGINE_STATUS_ROW_S mystat[nrows];
int r = env->get_engine_status (env, mystat, nrows, &redzone_state, &panic, panic_string, panic_string_len);
assert(r==0);
if (verbose) {
printf("First all the raw fields:\n");
for (uint64_t i = 0; i < nrows; i++) {
printf("%s ", mystat[i].keyname);
printf("%s ", mystat[i].legend);
printf("type=%d val = ", mystat[i].type);
switch(mystat[i].type) {
case FS_STATE:
printf("fs_state not supported yet, code is %"PRIu64"\n", mystat[i].value.num);
break;
case UINT64:
printf("%"PRIu64"\n", mystat[i].value.num);
break;
case CHARSTR:
printf("%s\n", mystat[i].value.str);
break;
case UNIXTIME:
{
char tbuf[26];
status_format_time((time_t*)&mystat[i].value.num, tbuf);
printf("%s\n", tbuf);
}
break;
case TOKUTIME:
{
double t = tokutime_to_seconds(mystat[i].value.num);
printf("%.6f\n", t);
}
break;
default:
printf("UNKNOWN STATUS TYPE:\n");
print_raw(&mystat[i]);
break;
}
}
printf("\n\n\n\n\nNow as reported by get_engine_status_text():\n\n");
int bufsiz = nrows * 128; // assume 128 characters per row
char buff[bufsiz];
r = env->get_engine_status_text(env, buff, bufsiz);
printf(buff);
printf("\n\n\n\n\nFinally, print as reported by test utility print_engine_status()\n");
print_engine_status(env);
printf("That's all, folks.\n");
}
test_shutdown();
return 0;
}

View file

@ -117,15 +117,43 @@ static __attribute__((__unused__)) void
print_engine_status(DB_ENV * UU(env)) {
#ifdef USE_TDB
if (verbose) { // verbose declared statically in this file
int buffsize = 1024 * 32;
char buff[buffsize];
env->get_engine_status_text(env, buff, buffsize);
printf("Engine status:\n");
printf("%s", buff);
uint64_t nrows;
env->get_engine_status_num_rows(env, &nrows);
int bufsiz = nrows * 128; // assume 128 characters per row
char buff[bufsiz];
env->get_engine_status_text(env, buff, bufsiz);
printf("Engine status:\n");
printf("%s", buff);
}
#endif
}
static __attribute__((__unused__)) uint64_t
get_engine_status_val(DB_ENV * UU(env), char * keyname) {
#ifdef USE_TDB
uint64_t nrows;
env->get_engine_status_num_rows(env, &nrows);
TOKU_ENGINE_STATUS_ROW_S mystat[nrows];
fs_redzone_state redzone_state;
uint64_t panic;
uint32_t panic_string_len = 1024;
char panic_string[panic_string_len];
int r = env->get_engine_status (env, mystat, nrows, &redzone_state, &panic, panic_string, panic_string_len);
CKERR(r);
int found = 0;
uint64_t rval = 0;
for (uint64_t i = 0; i < nrows && !found; i++) {
if (strcmp(keyname, mystat[i].keyname) == 0) {
found++;
rval = mystat[i].value.num;
}
}
CKERR2(found, 1);
return rval;
#endif
}
static __attribute__((__unused__)) DBT *
dbt_init(DBT *dbt, const void *data, u_int32_t size) {

View file

@ -60,38 +60,29 @@ u_int64_t num_basements_fetched_prefetch;
static void
init_eng_stat_vars(DB_ENV* env) {
ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
num_pivots_fetched_prefetch = engstat.num_pivots_fetched_prefetch;
num_basements_decompressed_aggressive = engstat.num_basements_decompressed_aggressive;
num_basements_decompressed_prefetch = engstat.num_basements_decompressed_prefetch;
num_basements_fetched_aggressive = engstat.num_basements_fetched_aggressive;
num_basements_fetched_prefetch = engstat.num_basements_fetched_prefetch;
num_pivots_fetched_prefetch = get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_PREFETCH");
num_basements_decompressed_aggressive = get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE");
num_basements_decompressed_prefetch = get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH");
num_basements_fetched_aggressive = get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE");
num_basements_fetched_prefetch = get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_PREFETCH");
}
static void
check_eng_stat_vars_unchanged(DB_ENV* env) {
ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
assert(num_pivots_fetched_prefetch == engstat.num_pivots_fetched_prefetch);
assert(num_basements_decompressed_aggressive == engstat.num_basements_decompressed_aggressive);
assert(num_basements_decompressed_prefetch == engstat.num_basements_decompressed_prefetch);
assert(num_basements_fetched_aggressive == engstat.num_basements_fetched_aggressive);
assert(num_basements_fetched_prefetch == engstat.num_basements_fetched_prefetch);
assert(num_pivots_fetched_prefetch == get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_PREFETCH"));
assert(num_basements_decompressed_aggressive == get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE"));
assert(num_basements_decompressed_prefetch == get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH"));
assert(num_basements_fetched_aggressive == get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE"));
assert(num_basements_fetched_prefetch == get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_PREFETCH"));
}
static void
print_relevant_eng_stat_vars(DB_ENV* env) {
ENGINE_STATUS engstat;
int r = env->get_engine_status(env, &engstat, NULL, 0);
CKERR(r);
printf("num_pivots_fetched_prefetch %"PRId64" \n", engstat.num_pivots_fetched_prefetch);
printf("num_basements_decompressed_aggressive %"PRId64" \n", engstat.num_basements_decompressed_aggressive);
printf("num_basements_decompressed_prefetch %"PRId64" \n", engstat.num_basements_decompressed_prefetch);
printf("num_basements_fetched_aggressive %"PRId64" \n", engstat.num_basements_fetched_aggressive);
printf("num_basements_fetched_prefetch %"PRId64" \n", engstat.num_basements_fetched_prefetch);
printf("num_pivots_fetched_prefetch %"PRId64" \n", get_engine_status_val(env, "BRT_NUM_PIVOTS_FETCHED_PREFETCH"));
printf("num_basements_decompressed_aggressive %"PRId64" \n", get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_AGGRESSIVE"));
printf("num_basements_decompressed_prefetch %"PRId64" \n", get_engine_status_val(env, "BRT_NUM_BASEMENTS_DECOMPRESSED_PREFETCH"));
printf("num_basements_fetched_aggressive %"PRId64" \n", get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_AGGRESSIVE"));
printf("num_basements_fetched_prefetch %"PRId64" \n", get_engine_status_val(env, "BRT_NUM_BASEMENTS_FETCHED_PREFETCH"));
}
static void

View file

@ -1,4 +1,4 @@
/* -*- mode: C; c-basic-offset: 4 -*- */
/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
#ifndef YDB_INTERNAL_H
#define YDB_INTERNAL_H
@ -80,11 +80,7 @@ struct __toku_db_env_internal {
char *real_log_dir; // log dir used when the env is opened (relative to cwd, or absolute with leading /)
char *real_tmp_dir; // tmp dir used for temporary files (relative to cwd, or absoulte with leading /)
enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent operations)
} fs_state;
fs_redzone_state fs_state;
uint64_t fs_seq; // how many times has fs_poller run?
uint64_t last_seq_entered_red;
uint64_t last_seq_entered_yellow;
@ -107,15 +103,22 @@ struct __toku_db_env_internal {
********************************************************* */
typedef enum {YDB_LOCK_TAKEN = 0, /* how many times has ydb lock been taken. This is precise since it is updated only when the lock is held. */
YDB_LOCK_RELEASED, /* how many times has ydb lock been released. This is precise since it is updated only when the lock is held. */
YDB_NUM_WAITERS_NOW, /* How many are waiting on the ydb lock right now (including the current lock holder). This is precise since it is updated with a fetch-and-add. */
YDB_MAX_WAITERS, /* max number of simultaneous client threads kept waiting for ydb lock. This is precise (updated only when the lock is held) but may be running a little behind (while waiting for the lock it hasn't been updated). */
YDB_TOTAL_SLEEP_TIME, /* total time spent sleeping for ydb lock scheduling (useconds). This adds up over many clients. This is precise since it is updated with an atomic fetch-and-add. */
YDB_MAX_TIME_YDB_LOCK_HELD, /* max time the ydb lock was held (in microseconds). This is precise since it is updated only when the lock is held. */
YDB_TOTAL_TIME_YDB_LOCK_HELD, /* total time the ydb lock has been held. */
YDB_TOTAL_TIME_SINCE_START, /* total time since the ydb lock was initialized. This is only updated when the lock is accessed (so if you don't acquire the lock this doesn't increase), and it is updated precisely (even though it isn't updated continuously). */
YDB_LOCK_STATUS_NUM_ROWS /* number of rows in this status array */
} ydb_lock_status_entry;
typedef struct {
volatile u_int64_t ydb_lock_ctr; /* how many times has ydb lock been taken/released. This is precise since it is updated only when the lock is held. */
volatile u_int64_t num_waiters_now; /* How many are waiting on the ydb lock right now (including the current lock holder). This is precise since it is updated with a fetch-and-add. */
volatile u_int64_t max_waiters; /* max number of simultaneous client threads kept waiting for ydb lock. This is precise (updated only when the lock is held) but may be running a little behind (while waiting for the lock it hasn't been updated). */
volatile u_int64_t total_sleep_time; /* total time spent sleeping for ydb lock scheduling (useconds). This adds up over many clients. This is precise since it is updated with an atomic fetch-and-add. */
volatile tokutime_t max_time_ydb_lock_held; /* max time the ydb lock was held (in microseconds). This is precise since it is updated only when the lock is held. */
volatile tokutime_t total_time_ydb_lock_held;/* total time the ydb lock has been held. */
volatile tokutime_t total_time_since_start; /* total time since the ydb lock was initialized. This is only updated when the lock is accessed (so if you don't acquire the lock this doesn't increase), and it is updated precisely (even though it isn't updated continuously). */
} SCHEDULE_STATUS_S, *SCHEDULE_STATUS;
BOOL initialized;
TOKU_ENGINE_STATUS_ROW_S status[YDB_LOCK_STATUS_NUM_ROWS];
} YDB_LOCK_STATUS_S, *YDB_LOCK_STATUS;
@ -126,7 +129,7 @@ void toku_ydb_unlock(void);
void toku_ydb_unlock_and_yield(unsigned long useconds);
toku_pthread_mutex_t *toku_ydb_mutex(void);
void toku_ydb_lock_get_status(SCHEDULE_STATUS statp);
void toku_ydb_lock_get_status(YDB_LOCK_STATUS statp);
int toku_ydb_check_avail_fs_space(DB_ENV *env);

1134
src/ydb.c

File diff suppressed because it is too large Load diff

View file

@ -5,6 +5,7 @@
#include <stdlib.h>
#include <toku_portability.h>
#include <db.h>
#if defined(__cplusplus) || defined(__cilkplusplus)
extern "C" {
@ -97,20 +98,27 @@ void toku_set_func_xrealloc_only(realloc_fun_t f);
void toku_set_func_realloc_only(realloc_fun_t f);
void toku_set_func_free(free_fun_t f);
typedef struct memory_status {
uint64_t malloc_count; // number of malloc operations
uint64_t free_count; // number of free operations
uint64_t realloc_count; // number of realloc operations
uint64_t malloc_fail; // number of malloc operations that failed
uint64_t realloc_fail; // number of realloc operations that failed
uint64_t requested; // number of bytes requested
uint64_t used; // number of bytes used (requested + overhead), obtained from malloc_usable_size()
uint64_t freed; // number of bytes freed;
volatile uint64_t max_in_use; // maximum memory footprint (used - freed), approximate (not worth threadsafety overhead for exact)
const char *mallocator_version;
uint64_t mmap_threshold;
typedef enum {
MEMORY_MALLOC_COUNT = 0,
MEMORY_FREE_COUNT,
MEMORY_REALLOC_COUNT,
MEMORY_MALLOC_FAIL,
MEMORY_REALLOC_FAIL,
MEMORY_REQUESTED,
MEMORY_USED,
MEMORY_FREED,
MEMORY_MAX_IN_USE,
MEMORY_MALLOCATOR_VERSION,
MEMORY_MMAP_THRESHOLD,
MEMORY_STATUS_NUM_ROWS
} memory_status_entry;
typedef struct {
int initialized; // TODO 2949 make this a bool
TOKU_ENGINE_STATUS_ROW_S status[MEMORY_STATUS_NUM_ROWS];
} MEMORY_STATUS_S, *MEMORY_STATUS;
void toku_memory_get_status(MEMORY_STATUS s);
size_t toku_memory_footprint(void * p, size_t touched);

View file

@ -16,7 +16,8 @@ C_BEGIN
void toku_assert_set_fpointers(int (*toku_maybe_get_engine_status_text_pointer)(char*, int),
void (*toku_maybe_set_env_panic_pointer)(int, char*));
void (*toku_maybe_set_env_panic_pointer)(int, char*),
uint64_t num_rows);
void toku_do_assert(int /*expr*/,const char*/*expr_as_string*/,const char */*fun*/,const char*/*file*/,int/*line*/, int/*errno*/) __attribute__((__visibility__("default")));