Merge from 3.1.0 using following commands:

> svn merge --accept=postpone -r18405:HEAD https://svn.tokutek.com/tokudb/mysql.branches/3.0.5/tokudb
> svn merge --accept=postpone -r18484:HEAD https://svn.tokutek.com/tokudb/mysql.branches/3.1.0/tokudb
> svn merge --accept=postpone -c18403 https://svn.tokutek.com/tokudb/mysql.branches/3.0.5/tokudb
> all executed inside directory tokudb/toku/tokudb

git-svn-id: file:///svn/toku/tokudb@18877 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
Barry Perlman 2013-04-16 23:59:04 -04:00 committed by Yoni Fogel
parent 5500a39946
commit 13c324ed06
17 changed files with 406 additions and 82 deletions

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -233,7 +236,9 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
void* __toku_dummy0[19];
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[17];
char __toku_dummy1[64];
void *api1_internal; /* 32-bit offset=212 size=4, 64=bit offset=360 size=8 */
void* __toku_dummy2[7];

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -235,7 +238,9 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
void* __toku_dummy0[19];
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[17];
char __toku_dummy1[96];
void *api1_internal; /* 32-bit offset=244 size=4, 64=bit offset=392 size=8 */
void* __toku_dummy2[7];

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -236,7 +239,9 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
void* __toku_dummy0[34];
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[32];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[7];

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -236,7 +239,9 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
void* __toku_dummy0[34];
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[32];
char __toku_dummy1[128];
void *api1_internal; /* 32-bit offset=336 size=4, 64=bit offset=544 size=8 */
void* __toku_dummy2[8];

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -238,7 +241,9 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
void* __toku_dummy0[35];
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void* __toku_dummy0[33];
char __toku_dummy1[144];
void *api1_internal; /* 32-bit offset=356 size=4, 64=bit offset=568 size=8 */
void* __toku_dummy2[8];

View file

@ -440,6 +440,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */ \n");
printf(" u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */ \n");
printf(" u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */ \n");
printf(" u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/ \n");
printf(" u_int64_t puts; /* how many times has a newly created node been put into the cachetable */ \n");
printf(" u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */ \n");
printf(" u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */ \n");
@ -449,7 +450,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */ \n");
printf(" int64_t get_and_pin_footprint; /* state of get_and_pin procedure */ \n");
printf(" u_int32_t range_locks_max; /* max total number of range locks */ \n");
printf(" u_int32_t range_locks_max_per_db; /* max range locks per dictionary */ \n");
printf(" u_int32_t range_locks_max_per_index; /* max range locks per dictionary */ \n");
printf(" u_int32_t range_locks_curr; /* total range locks currently in use */ \n");
printf(" u_int64_t inserts; /* ydb row insert operations */ \n");
printf(" u_int64_t deletes; /* ydb row delete operations */ \n");
@ -465,7 +466,8 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
printf(" char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */ \n");
printf(" u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */ \n");
printf(" u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */ \n");
printf(" u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */ \n");
printf(" u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */ \n");
printf("} ENGINE_STATUS;\n");
print_dbtype();
@ -510,6 +512,8 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
" DBT *dest_key,\n"
" const DBT *src_key, const DBT *src_val,\n"
" void *extra))",
"int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */",
"int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */",
NULL};
print_struct("db_env", 1, db_env_fields32, db_env_fields64, sizeof(db_env_fields32)/sizeof(db_env_fields32[0]), extra);
}

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -238,6 +241,8 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void *api1_internal;
int (*close) (DB_ENV *, u_int32_t);
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t);

View file

@ -66,6 +66,8 @@ int do_checkpoint_period = 0;
u_int32_t checkpoint_period = 0;
static const char *log_dir = NULL;
static int commitflags = 0;
static int redzone = 0;
static int redzone_set = 0;
static int use_random = 0;
enum { MAX_RANDOM_C = 16000057 }; // prime-numbers.org
@ -192,6 +194,13 @@ static void benchmark_setup (void) {
}
#endif
#if defined(TOKUDB)
if (redzone_set) {
r = dbenv->set_redzone(dbenv, redzone);
assert(r == 0);
}
#endif
r = dbenv->open(dbenv, dbdir, env_open_flags, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
assert(r == 0);
@ -486,15 +495,13 @@ static int print_usage (const char *argv0) {
fprintf(stderr, " --log_dir LOGDIR Put the logs in LOGDIR\n");
fprintf(stderr, " --env DIR\n");
fprintf(stderr, " --periter N how many insertions per iteration (default=%d)\n", DEFAULT_ITEMS_TO_INSERT_PER_ITERATION);
// fprintf(stderr, " --DB_INIT_TXN (1|0) turn on or off the DB_INIT_TXN env_open_flag\n");
// fprintf(stderr, " --DB_INIT_LOG (1|0) turn on or off the DB_INIT_LOG env_open_flag\n");
// fprintf(stderr, " --DB_INIT_LOCK (1|0) turn on or off the DB_INIT_LOCK env_open_flag\n");
fprintf(stderr, " --1514 do a point query for something not there at end. See #1514. (Requires --norandom)\n");
fprintf(stderr, " --append append to an existing file\n");
fprintf(stderr, " --userandom use random()\n");
fprintf(stderr, " --checkpoint-period %"PRIu32" checkpoint period\n", checkpoint_period);
fprintf(stderr, " --numdbs N Insert same items into N dbs (1 to %d)\n", MAX_DBS);
fprintf(stderr, " --insertmultiple Use DB_ENV->put_multiple api. Requires transactions.\n");
fprintf(stderr, " --redzone N redzone in percent\n");
fprintf(stderr, " n_iterations how many iterations (default %lld)\n", default_n_items/DEFAULT_ITEMS_TO_INSERT_PER_ITERATION);
return 1;
@ -641,22 +648,10 @@ int main (int argc, const char *const argv[]) {
} else if (strcmp(arg, "--log_dir") == 0) {
if (i+1 >= argc) return print_usage(argv[0]);
log_dir = argv[++i];
} else if (strcmp(arg, "--DB_INIT_TXN") == 0) {
if (i+1 >= argc) return print_usage(argv[0]);
if (atoi(argv[++i]))
env_open_flags |= DB_INIT_TXN;
else
env_open_flags &= ~DB_INIT_TXN;
} else if (strcmp(arg, "--DB_INIT_LOG") == 0) {
if (atoi(argv[++i]))
env_open_flags |= DB_INIT_LOG;
else
env_open_flags &= ~DB_INIT_LOG;
} else if (strcmp(arg, "--DB_INIT_LOCK") == 0) {
if (atoi(argv[++i]))
env_open_flags |= DB_INIT_LOCK;
else
env_open_flags &= ~DB_INIT_LOCK;
} else if (strcmp(arg, "--redzone") == 0) {
if (i+1 >= argc) return print_usage(argv[0]);
redzone_set = 1;
redzone = atoi(argv[++i]);
} else {
return print_usage(argv[0]);
}

View file

@ -84,6 +84,7 @@ typedef struct __toku_engine_status {
u_int64_t cachetable_waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t cachetable_wait_reading; /* how many times get_and_pin waits for a node to be read */
u_int64_t cachetable_wait_writing; /* how many times get_and_pin waits for a node to be written */
u_int64_t cachetable_wait_checkpoint; /* how many times get_and_pin waits for a node to be written for a checkpoint*/
u_int64_t puts; /* how many times has a newly created node been put into the cachetable */
u_int64_t prefetches; /* how many times has a block been prefetched into the cachetable */
u_int64_t maybe_get_and_pins; /* how many times has maybe_get_and_pin(_clean) been called */
@ -93,7 +94,7 @@ typedef struct __toku_engine_status {
int64_t cachetable_size_writing; /* the sum of the sizes of the nodes being written */
int64_t get_and_pin_footprint; /* state of get_and_pin procedure */
u_int32_t range_locks_max; /* max total number of range locks */
u_int32_t range_locks_max_per_db; /* max range locks per dictionary */
u_int32_t range_locks_max_per_index; /* max range locks per dictionary */
u_int32_t range_locks_curr; /* total range locks currently in use */
u_int64_t inserts; /* ydb row insert operations */
u_int64_t deletes; /* ydb row delete operations */
@ -109,6 +110,8 @@ typedef struct __toku_engine_status {
char enospc_most_recent[26]; /* time of most recent ENOSPC error return from disk write */
u_int64_t enospc_threads_blocked; /* how many threads are currently blocked by ENOSPC */
u_int64_t enospc_total; /* how many times has ENOSPC been returned by disk write */
u_int64_t enospc_seal_ctr; /* how many times has ENOSPC been returned to user (red zone) */
u_int64_t enospc_seal_state; /* state of ydb-level seal (0 = green, 1 = yellow, 2 = red) */
} ENGINE_STATUS;
typedef enum {
DB_BTREE=1,
@ -238,6 +241,8 @@ struct __toku_db_env {
DBT *dest_key,
const DBT *src_key, const DBT *src_val,
void *extra));
int (*get_redzone) (DB_ENV *env, int *redzone) /* get the redzone limit */;
int (*set_redzone) (DB_ENV *env, int redzone) /* set the redzone limit in percent of total space */;
void *api1_internal;
int (*close) (DB_ENV *, u_int32_t);
int (*dbremove) (DB_ENV *, DB_TXN *, const char *, const char *, u_int32_t);

View file

@ -219,6 +219,7 @@ toku_file_fsync_without_accounting (int fd) {
return r;
}
int
toku_fsync_dirfd_without_accounting(DIR *dirp) {
int r;

View file

@ -6,15 +6,16 @@
#include <fcntl.h>
#include <errno.h>
#include <toku_assert.h>
#include <malloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
#include <syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/statvfs.h>
#include "toku_portability.h"
#include "toku_os.h"
#include <malloc.h>
static int
toku_mallopt_init(void) {
@ -281,6 +282,27 @@ toku_os_get_processor_frequency(uint64_t *hzret) {
return r;
}
int
toku_get_filesystem_sizes(const char *path, uint64_t *avail_size, uint64_t *free_size, uint64_t *total_size) {
struct statvfs s;
int r = statvfs(path, &s);
if (r == -1)
r = errno;
else {
// get the block size in bytes
uint64_t bsize = s.f_frsize ? s.f_frsize : s.f_bsize;
// convert blocks to bytes
if (avail_size)
*avail_size = (uint64_t) s.f_bavail * bsize;
if (free_size)
*free_size = (uint64_t) s.f_bfree * bsize;
if (total_size)
*total_size = (uint64_t) s.f_blocks * bsize;
}
return r;
}
int
toku_dup2(int fd, int fd2) {
int r;
@ -298,3 +320,5 @@ uint64_t toku_sync_fetch_and_add_uint64(volatile uint64_t *a, uint64_t b) {
}
#endif

View file

@ -0,0 +1,47 @@
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <toku_stdint.h>
#include <unistd.h>
#include <toku_assert.h>
#include "toku_os.h"
int main(int argc, const char *const argv[]) {
int verbose = 0;
int limit = 1;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-v") == 0 || strcmp(argv[i], "--verbose") == 0) {
verbose = 1;
continue;
}
if (strcmp(argv[i], "--timeit") == 0) {
limit = 100000;
continue;
}
}
int r;
#if 0
r = toku_get_filesystem_sizes(NULL, NULL, NULL, NULL);
assert(r == EFAULT);
#endif
r = toku_get_filesystem_sizes(".", NULL, NULL, NULL);
assert(r == 0);
uint64_t free_size = 0, avail_size = 0, total_size = 0;
for (int i = 0; i < limit; i++) {
r = toku_get_filesystem_sizes(".", &avail_size, &free_size, &total_size);
assert(r == 0);
assert(avail_size <= free_size && free_size <= total_size);
}
if (verbose) {
printf("avail=%"PRIu64"\n", avail_size);
printf("free=%"PRIu64"\n", free_size);
printf("total=%"PRIu64"\n", total_size);
}
return 0;
}

View file

@ -2348,6 +2348,7 @@ void toku_cachetable_get_status(CACHETABLE ct, CACHETABLE_STATUS s) {
s->waittime = cachetable_waittime;
s->wait_reading = cachetable_wait_reading;
s->wait_writing = cachetable_wait_writing;
s->wait_checkpoint = cachetable_wait_checkpoint;
s->puts = cachetable_puts;
s->prefetches = cachetable_prefetches;
s->maybe_get_and_pins = cachetable_maybe_get_and_pins;
@ -2388,3 +2389,8 @@ toku_cachetable_get_fname_in_cwd(CACHETABLE ct, const char * fname_in_env) {
return toku_construct_full_name(2, ct->env_dir, fname_in_env);
}
// Returns the limit on the size of the cache table
uint64_t toku_cachetable_get_size_limit(CACHETABLE ct) {
return ct->size_limit;
}

View file

@ -75,6 +75,10 @@ void toku_cachetable_get_miss_times(CACHETABLE ct, uint64_t *misscount, uint64_t
// Open a file and bind the file to a new cachefile object. (For use by test programs only.)
int toku_cachetable_openf (CACHEFILE *,CACHETABLE, const char */*fname_in_env*/, int flags, mode_t mode);
// Returns the limit on the cachetable size
uint64_t toku_cachetable_get_size_limit(CACHETABLE ct);
// Bind a file to a new cachefile object.
int toku_cachetable_openfd (CACHEFILE *,CACHETABLE, int /*fd*/,
const char *fname_relative_to_env); /*(used for logging)*/
@ -298,6 +302,7 @@ typedef struct cachetable_status {
u_int64_t waittime; /* how many usec spent waiting for another thread to release cache line */
u_int64_t wait_reading;
u_int64_t wait_writing;
u_int64_t wait_checkpoint; // number of times get_and_pin waits for a node to be written for a checkpoint
u_int64_t puts; // how many times has a newly created node been put into the cachetable?
u_int64_t prefetches; // how many times has a block been prefetched into the cachetable?
u_int64_t maybe_get_and_pins; // how many times has maybe_get_and_pin(_clean) been called?

View file

@ -10,6 +10,7 @@
#include "toku_list.h"
#include "./lock_tree/locktree.h"
#include "./lock_tree/idlth.h"
#include "../newbrt/minicron.h"
#include <limits.h>
struct __toku_lock_tree;
@ -61,9 +62,26 @@ struct __toku_db_env_internal {
TOKULOGGER logger;
toku_ltm* ltm;
struct toku_list open_txns;
DB *directory; //Maps dnames to inames
DB *persistent_environment; //Stores environment settings, can be used for upgrade
OMT open_dbs; //Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
DB *directory; // Maps dnames to inames
DB *persistent_environment; // Stores environment settings, can be used for upgrade
OMT open_dbs; // Stores open db handles, sorted first by dname and then by numerical value of pointer to the db (arbitrarily assigned memory location)
char *real_data_dir; // data dir used when the env is opened (relative to cwd)
char *real_log_dir; // log dir used when the env is opened (relative to cwd)
enum {
FS_GREEN = 0, // green zone (we have lots of space)
FS_YELLOW = 1, // yellow zone (issue warning but allow operations)
FS_RED = 2, // red zone (prevent operations)
} fs_state;
uint64_t fs_seq; // how many times has fs_poller run?
uint64_t last_seq_entered_red;
uint64_t last_seq_entered_yellow;
int redzone; // percent of total fs space that marks boundary between yellow and red zones
int enospc_seal_ctr; // number of operations rejected by enospc seal (red zone)
int fs_poll_time; // Time in seconds between statfs calls
struct minicron fs_poller; // Poll the file systems
BOOL fs_poller_is_init;
};
/* *********************************************************

271
src/ydb.c
View file

@ -140,6 +140,146 @@ static void env_remove_open_txn(DB_ENV *UU(env), DB_TXN *txn) {
static int toku_txn_abort(DB_TXN * txn, TXN_PROGRESS_POLL_FUNCTION, void*);
static void
env_fs_report_in_yellow(DB_ENV *UU(env)) {
char tbuf[26];
time_t tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb file system space is low\n", ctime_r(&tnow, tbuf)); fflush(stderr);
}
static void
env_fs_report_in_red(DB_ENV *UU(env)) {
char tbuf[26];
time_t tnow = time(NULL);
fprintf(stderr, "%.24s Tokudb file system space is really low and access is restricted\n", ctime_r(&tnow, tbuf)); fflush(stderr);
}
static inline uint64_t
env_fs_redzone(DB_ENV *env, uint64_t total) {
return total * env->i->redzone / 100;
}
#define ZONEREPORTLIMIT 12
// Check the available space in the file systems used by tokudb and erect barriers when available space gets low.
static int
env_fs_poller(void *arg) {
if (0) printf("%s:%d %p\n", __FUNCTION__, __LINE__, arg);
DB_ENV *env = (DB_ENV *) arg;
int r;
#if 0
// get the cachetable size limit (not yet needed)
uint64_t cs = toku_cachetable_get_size_limit(env->i->cachetable);
#endif
int in_yellow; // set true to issue warning to user
int in_red; // set true to seal off certain operations (returning ENOSPC)
// get the fs sizes for the home dir
uint64_t avail_size, total_size;
r = toku_get_filesystem_sizes(env->i->dir, &avail_size, NULL, &total_size);
assert(r == 0);
if (0) fprintf(stderr, "%s %"PRIu64" %"PRIu64"\n", env->i->dir, avail_size, total_size);
in_yellow = (avail_size < 2 * env_fs_redzone(env, total_size));
in_red = (avail_size < env_fs_redzone(env, total_size));
// get the fs sizes for the data dir if different than the home dir
if (strcmp(env->i->dir, env->i->real_data_dir) != 0) {
r = toku_get_filesystem_sizes(env->i->real_data_dir, &avail_size, NULL, &total_size);
assert(r == 0);
if (0) fprintf(stderr, "%s %"PRIu64" %"PRIu64"\n", env->i->real_data_dir, avail_size, total_size);
in_yellow += (avail_size < 2 * env_fs_redzone(env, total_size));
in_red += (avail_size < env_fs_redzone(env, total_size));
}
// get the fs sizes for the log dir if different than the home dir and data dir
if (strcmp(env->i->dir, env->i->real_log_dir) != 0 && strcmp(env->i->real_data_dir, env->i->real_log_dir) != 0) {
r = toku_get_filesystem_sizes(env->i->real_log_dir, &avail_size, NULL, &total_size);
assert(r == 0);
if (0) fprintf(stderr, "%s %"PRIu64" %"PRIu64"\n", env->i->real_log_dir, avail_size, total_size);
in_yellow += (avail_size < 2 * env_fs_redzone(env, total_size));
in_red += (avail_size < env_fs_redzone(env, total_size));
}
env->i->fs_seq++; // how many times through this polling loop?
uint64_t now = env->i->fs_seq;
// Don't issue report if we have not been out of this fs_state for a while, unless we're at system startup
switch (env->i->fs_state) {
case FS_RED:
if (!in_red) {
if (in_yellow) {
env->i->fs_state = FS_YELLOW;
} else {
env->i->fs_state = FS_GREEN;
}
}
break;
case FS_YELLOW:
if (in_red) {
if ((now - env->i->last_seq_entered_red > ZONEREPORTLIMIT) || (now < ZONEREPORTLIMIT))
env_fs_report_in_red(env);
env->i->fs_state = FS_RED;
env->i->last_seq_entered_red = now;
} else if (!in_yellow) {
env->i->fs_state = FS_GREEN;
}
break;
case FS_GREEN:
if (in_red) {
if ((now - env->i->last_seq_entered_red > ZONEREPORTLIMIT) || (now < ZONEREPORTLIMIT))
env_fs_report_in_red(env);
env->i->fs_state = FS_RED;
env->i->last_seq_entered_red = now;
} else if (in_yellow) {
if ((now - env->i->last_seq_entered_yellow > ZONEREPORTLIMIT) || (now < ZONEREPORTLIMIT))
env_fs_report_in_yellow(env);
env->i->fs_state = FS_YELLOW;
env->i->last_seq_entered_yellow = now;
}
break;
}
return 0;
}
#undef ZONEREPORTLIMIT
static void
env_fs_init(DB_ENV *env) {
env->i->fs_state = FS_GREEN;
env->i->fs_poll_time = 5; // seconds
env->i->redzone = 5; // percent of total space
env->i->fs_poller_is_init = FALSE;
}
// Initialize the minicron that polls file system space
static int
env_fs_init_minicron(DB_ENV *env) {
int r = toku_minicron_setup(&env->i->fs_poller, env->i->fs_poll_time, env_fs_poller, env);
assert(r == 0);
env->i->fs_poller_is_init = TRUE;
return r;
}
// Destroy the file system space minicron
static void
env_fs_destroy(DB_ENV *env) {
if (env->i->fs_poller_is_init) {
int r = toku_minicron_shutdown(&env->i->fs_poller);
assert(r == 0);
env->i->fs_poller_is_init = FALSE;
}
}
// Check if the available file system space is less than the reserve
// Returns ENOSPC if not enough space, othersize 0
static inline int
env_check_avail_fs_space(DB_ENV *env) {
int r = env->i->fs_state == FS_RED ? ENOSPC : 0;
if (r) env->i->enospc_seal_ctr++;
return r;
}
/* db methods */
static inline int db_opened(DB *db) {
return db->i->opened != 0;
@ -318,46 +458,53 @@ static int toku_c_del(DBC *c, u_int32_t flags);
static int toku_c_count(DBC *cursor, db_recno_t *count, u_int32_t flags);
static int toku_c_close(DBC * c);
static int delete_rolltmp_files(DB_ENV *env) {
const char *datadir=env->i->dir;
char *logdir;
static void
env_setup_real_data_dir(DB_ENV *env) {
toku_free(env->i->real_data_dir);
env->i->real_data_dir = NULL;
assert(env->i->dir);
if (env->i->data_dir)
env->i->real_data_dir = toku_construct_full_name(2, env->i->dir, env->i->data_dir);
else
env->i->real_data_dir = toku_strdup(env->i->dir);
}
static void
env_setup_real_log_dir(DB_ENV *env) {
toku_free(env->i->real_log_dir);
env->i->real_log_dir = NULL;
if (env->i->lg_dir) {
logdir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
assert(env->i->dir);
env->i->real_log_dir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
} else {
logdir = toku_strdup(env->i->dir);
assert(env->i->dir);
env->i->real_log_dir = toku_strdup(env->i->dir);
}
int r = tokudb_recover_delete_rolltmp_files(datadir, logdir);
toku_free(logdir);
}
static int delete_rolltmp_files(DB_ENV *env) {
assert(env->i->real_data_dir);
assert(env->i->real_log_dir);
int r = tokudb_recover_delete_rolltmp_files(env->i->real_data_dir, env->i->real_log_dir);
return r;
}
static int
ydb_do_recovery (DB_ENV *env) {
const char *envdir=env->i->dir;
char *logdir;
if (env->i->lg_dir) {
logdir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
} else {
logdir = toku_strdup(env->i->dir);
}
assert(env->i->real_log_dir);
toku_ydb_unlock();
int r = tokudb_recover(envdir, logdir, env->i->bt_compare, env->i->dup_compare,
int r = tokudb_recover(env->i->dir, env->i->real_log_dir, env->i->bt_compare, env->i->dup_compare,
env->i->generate_row_for_put, env->i->generate_row_for_del,
env->i->cachetable_size);
toku_ydb_lock();
toku_free(logdir);
return r;
}
static int needs_recovery (DB_ENV *env) {
char *logdir;
if (env->i->lg_dir) {
logdir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
} else {
logdir = toku_strdup(env->i->dir);
}
int recovery_needed = tokudb_needs_recovery(logdir, TRUE);
toku_free(logdir);
assert(env->i->real_log_dir);
int recovery_needed = tokudb_needs_recovery(env->i->real_log_dir, TRUE);
return recovery_needed ? DB_RUNRECOVERY : 0;
}
@ -442,14 +589,7 @@ upgrade_env(DB_ENV * env, DB_TXN * txn) {
// return 0 if log exists or ENOENT if log does not exist
static int
ydb_recover_log_exists(DB_ENV *env) {
char *logdir;
if (env->i->lg_dir) {
logdir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
} else {
logdir = toku_strdup(env->i->dir);
}
int r = tokudb_recover_log_exists(logdir);
toku_free(logdir);
int r = tokudb_recover_log_exists(env->i->real_log_dir);
return r;
}
@ -596,6 +736,9 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
env->i->open_flags = flags;
env->i->open_mode = mode;
env_setup_real_data_dir(env);
env_setup_real_log_dir(env);
r = validate_env(env, &newenv); // make sure that environment is either new or complete
if (r != 0) return r;
@ -623,15 +766,9 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
}
if (flags & (DB_INIT_TXN | DB_INIT_LOG)) {
char* full_dir = NULL;
if (env->i->lg_dir) {
full_dir = toku_construct_full_name(2, env->i->dir, env->i->lg_dir);
assert(full_dir);
}
assert(env->i->logger);
toku_logger_write_log_files(env->i->logger, (BOOL)((flags & DB_INIT_LOG) != 0));
r = toku_logger_open(full_dir ? full_dir : env->i->dir, env->i->logger);
if (full_dir) toku_free(full_dir);
r = toku_logger_open(env->i->real_log_dir, env->i->logger);
if (r!=0) {
toku_ydb_do_error(env, r, "Could not open logger\n");
died2:
@ -721,6 +858,8 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
r = toku_checkpoint(env->i->cachetable, env->i->logger, NULL, NULL, NULL, NULL);
assert(r==0);
toku_ydb_lock();
env_fs_poller(env); // get the file system state at startup
env_fs_init_minicron(env);
return 0;
}
@ -796,15 +935,21 @@ static int toku_env_close(DB_ENV * env, u_int32_t flags) {
else
assert(env->i->panic_string==0);
env_fs_destroy(env);
if (env->i->data_dir)
toku_free(env->i->data_dir);
if (env->i->lg_dir)
toku_free(env->i->lg_dir);
if (env->i->tmp_dir)
toku_free(env->i->tmp_dir);
if (env->i->real_data_dir)
toku_free(env->i->real_data_dir);
if (env->i->real_log_dir)
toku_free(env->i->real_log_dir);
if (env->i->open_dbs)
toku_omt_destroy(&env->i->open_dbs);
toku_free(env->i->dir);
if (env->i->dir)
toku_free(env->i->dir);
toku_ltm_close(env->i->ltm);
toku_free(env->i);
env->i = NULL;
@ -1290,6 +1435,27 @@ locked_env_del_multiple(DB_ENV *env, DB *src_db, DB_TXN *txn, const DBT *key, co
}
static int
env_set_redzone(DB_ENV *env, int redzone) {
HANDLE_PANICKED_ENV(env);
int r;
if (env_opened(env))
r = EINVAL;
else {
env->i->redzone = redzone;
r = 0;
}
return r;
}
static int
locked_env_set_redzone(DB_ENV *env, int redzone) {
toku_ydb_lock();
int r= env_set_redzone(env, redzone);
toku_ydb_unlock();
return r;
}
static void
format_time(const time_t *timer, char *buf) {
ctime_r(timer, buf);
@ -1359,6 +1525,7 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
engstat->cachetable_waittime = ctstat.waittime;
engstat->cachetable_wait_reading = ctstat.wait_reading;
engstat->cachetable_wait_writing = ctstat.wait_writing;
engstat->cachetable_wait_checkpoint = ctstat.wait_checkpoint;
engstat->puts = ctstat.puts;
engstat->prefetches = ctstat.prefetches;
engstat->maybe_get_and_pins = ctstat.maybe_get_and_pins;
@ -1370,9 +1537,9 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
}
{
toku_ltm* ltm = env->i->ltm;
r = toku_ltm_get_max_locks(ltm, &(engstat->range_locks_max)); assert(r==0);
r = toku_ltm_get_max_locks_per_db(ltm, &(engstat->range_locks_max_per_db)); assert(r==0);
r = toku_ltm_get_curr_locks(ltm, &(engstat->range_locks_curr)); assert(r==0);
r = toku_ltm_get_max_locks(ltm, &(engstat->range_locks_max)); assert(r==0);
r = toku_ltm_get_max_locks_per_db(ltm, &(engstat->range_locks_max_per_index)); assert(r==0);
r = toku_ltm_get_curr_locks(ltm, &(engstat->range_locks_curr)); assert(r==0);
}
{
engstat->inserts = num_inserts;
@ -1404,6 +1571,10 @@ env_get_engine_status(DB_ENV * env, ENGINE_STATUS * engstat) {
engstat->enospc_threads_blocked = enospc_threads_blocked;
engstat->enospc_total = enospc_total;
}
{
engstat->enospc_seal_ctr = env->i->enospc_seal_ctr; // number of operations rejected by enospc seal (red zone)
engstat->enospc_seal_state = env->i->fs_state;
}
}
return r;
}
@ -1452,7 +1623,7 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "cachetable_size_writing %"PRId64"\n", engstat.cachetable_size_writing);
n += snprintf(buff + n, bufsiz - n, "get_and_pin_footprint %"PRId64"\n", engstat.get_and_pin_footprint);
n += snprintf(buff + n, bufsiz - n, "range_locks_max %"PRIu32"\n", engstat.range_locks_max);
n += snprintf(buff + n, bufsiz - n, "range_locks_max_per_db %"PRIu32"\n", engstat.range_locks_max_per_db);
n += snprintf(buff + n, bufsiz - n, "range_locks_max_per_index %"PRIu32"\n", engstat.range_locks_max_per_index);
n += snprintf(buff + n, bufsiz - n, "range_locks_curr %"PRIu32"\n", engstat.range_locks_curr);
n += snprintf(buff + n, bufsiz - n, "inserts %"PRIu64"\n", engstat.inserts);
n += snprintf(buff + n, bufsiz - n, "deletes %"PRIu64"\n", engstat.deletes);
@ -1468,6 +1639,8 @@ env_get_engine_status_text(DB_ENV * env, char * buff, int bufsiz) {
n += snprintf(buff + n, bufsiz - n, "enospc_most_recent %s \n", engstat.enospc_most_recent);
n += snprintf(buff + n, bufsiz - n, "enospc threads blocked %"PRIu64"\n", engstat.enospc_threads_blocked);
n += snprintf(buff + n, bufsiz - n, "enospc total %"PRIu64"\n", engstat.enospc_total);
n += snprintf(buff + n, bufsiz - n, "enospc seal ctr %"PRIu64"\n", engstat.enospc_seal_ctr);
n += snprintf(buff + n, bufsiz - n, "enospc seal state %"PRIu64"\n", engstat.enospc_seal_state);
if (n > bufsiz) {
char * errmsg = "BUFFER TOO SMALL\n";
@ -1542,6 +1715,7 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) {
SENV(log_archive);
SENV(txn_stat);
result->txn_begin = locked_txn_begin;
SENV(set_redzone);
#undef SENV
result->create_loader = toku_loader_create_loader;
@ -1549,6 +1723,7 @@ static int toku_env_create(DB_ENV ** envp, u_int32_t flags) {
if (result->i == 0) { r = ENOMEM; goto cleanup; }
memset(result->i, 0, sizeof *result->i);
env_init_open_txn(result);
env_fs_init(result);
r = toku_ltm_create(&result->i->ltm, __toku_env_default_max_locks,
toku_db_lt_panic,
@ -4981,7 +5156,13 @@ static inline int autotxn_db_put(DB* db, DB_TXN* txn, DBT* key, DBT* data,
}
static int locked_db_put(DB * db, DB_TXN * txn, DBT * key, DBT * data, u_int32_t flags) {
toku_ydb_lock(); int r = autotxn_db_put(db, txn, key, data, flags); toku_ydb_unlock(); return r;
int r = env_check_avail_fs_space(db->dbenv);
if (r == 0) {
toku_ydb_lock();
r = autotxn_db_put(db, txn, key, data, flags);
toku_ydb_unlock();
}
return r;
}
static int locked_db_remove(DB * db, const char *fname, const char *dbname, u_int32_t flags) {

View file

@ -77,8 +77,16 @@ void toku_set_assert_on_write_enospc(int do_assert) __attribute__((__visibility_
// *enospc_total is the number of times ENOSPC was returned by write or pwrite
void toku_fs_get_write_info(time_t *enospc_last_time, uint64_t *enospc_current, uint64_t *enospc_total);
int toku_fsync_dirfd_without_accounting(DIR *dirp);
// Get the file system free and total space for the file system that contains a path name
// *avail_size is set to the bytes of free space in the file system available for non-root
// *free_size is set to the bytes of free space in the file system
// *total_size is set to the total bytes in the file system
// Return 0 on success, otherwise an error number
int toku_get_filesystem_sizes(const char *path, uint64_t *avail_size, uint64_t *free_size, uint64_t *total_size);
#if TOKU_WINDOWS
#include <sys/types.h>
#include <sys/stat.h>