mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 06:44:16 +01:00
closes[t:2449] [t:2484] Merge #2449 changes to main.
Rollback logs are now checkpointed. There are no rolltmp files. git-svn-id: file:///svn/toku/tokudb@19167 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
parent
bf8e181e9e
commit
1bf7a7a403
64 changed files with 2018 additions and 1700 deletions
|
@ -379,7 +379,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
||||||
|
|
|
@ -395,7 +395,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
||||||
|
|
|
@ -403,7 +403,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
||||||
|
|
|
@ -403,7 +403,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
||||||
|
|
|
@ -407,7 +407,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/; /* 32-bit offset=0 size=4, 64=bit offset=0 size=8 */
|
||||||
|
|
|
@ -585,7 +585,7 @@ int main (int argc __attribute__((__unused__)), char *const argv[] __attribute__
|
||||||
printf("} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;\n");
|
printf("} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;\n");
|
||||||
printf("typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);\n");
|
printf("typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);\n");
|
||||||
|
|
||||||
printf("struct txn_stat {\n u_int64_t rolltmp_raw_count;\n};\n");
|
printf("struct txn_stat {\n u_int64_t rollback_raw_count;\n};\n");
|
||||||
const char *extra[] = {
|
const char *extra[] = {
|
||||||
"int (*txn_stat)(DB_TXN *, struct txn_stat **)",
|
"int (*txn_stat)(DB_TXN *, struct txn_stat **)",
|
||||||
"struct { void *next, *prev; } open_txns",
|
"struct { void *next, *prev; } open_txns",
|
||||||
|
|
|
@ -354,7 +354,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/;
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/;
|
||||||
|
|
|
@ -64,7 +64,7 @@ build: build.tdb build.bdb
|
||||||
build.bdb: $(TARGET_BDB) $(SCANSCAN_BDB) $(WINDOWS_BDB_LIB_NAME)
|
build.bdb: $(TARGET_BDB) $(SCANSCAN_BDB) $(WINDOWS_BDB_LIB_NAME)
|
||||||
build.tdb: $(TARGET_TDB) $(SCANSCAN_TDB)
|
build.tdb: $(TARGET_TDB) $(SCANSCAN_TDB)
|
||||||
|
|
||||||
check: check-default check-rowsize-dup check-rowsize check-xfast check-x check-no-rolltmp check-4G
|
check: check-default check-rowsize-dup check-rowsize check-xfast check-x check-no-rollback check-4G child.benchmark.dir
|
||||||
|
|
||||||
SUPPORT_KEYSIZE=$$((3*1024)) # at least 3KiB
|
SUPPORT_KEYSIZE=$$((3*1024)) # at least 3KiB
|
||||||
SUPPORT_ROWSIZE=$$((80*1024)) # at least 80KiB
|
SUPPORT_ROWSIZE=$$((80*1024)) # at least 80KiB
|
||||||
|
@ -96,8 +96,8 @@ check-xfast: $(TARGET_TDB)
|
||||||
./$(TARGET_TDB) $(VERBVERBOSE) --noserial -x --valsize 1000 --cachesize 8000000 --xcount 1000 --periter 20000 --env xfast.dir 1 $(SUMMARIZE_CMD)
|
./$(TARGET_TDB) $(VERBVERBOSE) --noserial -x --valsize 1000 --cachesize 8000000 --xcount 1000 --periter 20000 --env xfast.dir 1 $(SUMMARIZE_CMD)
|
||||||
|
|
||||||
# A relatively fast test that detects #853 (don't log changes to a dictionary created in the same txn)
|
# A relatively fast test that detects #853 (don't log changes to a dictionary created in the same txn)
|
||||||
check-no-rolltmp: $(TARGET_TDB)
|
check-no-rollback: $(TARGET_TDB)
|
||||||
./$(TARGET_TDB) $(VERBVERBOSE) --env no-rolltmp.dir --singlex --nolog --check_small_rolltmp $(SUMMARIZE_CMD)
|
./$(TARGET_TDB) $(VERBVERBOSE) --env no-rollback.dir --singlex --nolog --check_small_rollback $(SUMMARIZE_CMD)
|
||||||
|
|
||||||
# Check to make sure that if we make a file that's bigger than 4GB that we can read the file back out and get all the rows.
|
# Check to make sure that if we make a file that's bigger than 4GB that we can read the file back out and get all the rows.
|
||||||
ifeq ($(TOKU_SKIP_4G),1)
|
ifeq ($(TOKU_SKIP_4G),1)
|
||||||
|
|
|
@ -53,7 +53,7 @@ int singlex_child = 0; // Do a single transaction, but do all work with a child
|
||||||
int singlex = 0; // Do a single transaction
|
int singlex = 0; // Do a single transaction
|
||||||
int singlex_create = 0; // Create the db using the single transaction (only valid if singlex)
|
int singlex_create = 0; // Create the db using the single transaction (only valid if singlex)
|
||||||
int insert1first = 0; // insert 1 before doing the rest
|
int insert1first = 0; // insert 1 before doing the rest
|
||||||
int check_small_rolltmp = 0; // verify that the rollback logs are small (only valid if singlex)
|
int check_small_rollback = 0; // verify that the rollback logs are small (only valid if singlex)
|
||||||
int do_transactions = 0;
|
int do_transactions = 0;
|
||||||
int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used
|
int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used
|
||||||
int do_abort = 0;
|
int do_abort = 0;
|
||||||
|
@ -294,14 +294,14 @@ static void benchmark_shutdown (void) {
|
||||||
#endif
|
#endif
|
||||||
if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) {
|
if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) {
|
||||||
#if defined(TOKUDB)
|
#if defined(TOKUDB)
|
||||||
//There should be a single 'truncate' in the rolltmp instead of many 'insert' entries.
|
//There should be a single 'truncate' in the rollback instead of many 'insert' entries.
|
||||||
struct txn_stat *s;
|
struct txn_stat *s;
|
||||||
r = tid->txn_stat(tid, &s);
|
r = tid->txn_stat(tid, &s);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
//TODO: #1125 Always do the test after performance testing is done.
|
//TODO: #1125 Always do the test after performance testing is done.
|
||||||
if (singlex_child) fprintf(stderr, "SKIPPED 'small rolltmp' test for child txn\n");
|
if (singlex_child) fprintf(stderr, "SKIPPED 'small rollback' test for child txn\n");
|
||||||
else
|
else
|
||||||
assert(s->rolltmp_raw_count < 100); // gross test, not worth investigating details
|
assert(s->rollback_raw_count < 100); // gross test, not worth investigating details
|
||||||
os_free(s);
|
os_free(s);
|
||||||
//system("ls -l bench.tokudb");
|
//system("ls -l bench.tokudb");
|
||||||
#endif
|
#endif
|
||||||
|
@ -487,7 +487,7 @@ static int print_usage (const char *argv0) {
|
||||||
fprintf(stderr, " --singlex-child (implies -x) Run the whole job as a single transaction, do all work a child of that transaction.\n");
|
fprintf(stderr, " --singlex-child (implies -x) Run the whole job as a single transaction, do all work a child of that transaction.\n");
|
||||||
fprintf(stderr, " --finish-child-first Commit/abort child before doing so to parent (no effect if no child).\n");
|
fprintf(stderr, " --finish-child-first Commit/abort child before doing so to parent (no effect if no child).\n");
|
||||||
fprintf(stderr, " --singlex-create (implies --singlex) Create the file using the single transaction (Default is to use a different transaction to create.)\n");
|
fprintf(stderr, " --singlex-create (implies --singlex) Create the file using the single transaction (Default is to use a different transaction to create.)\n");
|
||||||
fprintf(stderr, " --check_small_rolltmp (Only valid in --singlex mode) Verify that very little data was saved in the rollback logs.\n");
|
fprintf(stderr, " --check_small_rollback (Only valid in --singlex mode) Verify that very little data was saved in the rollback logs.\n");
|
||||||
fprintf(stderr, " --prelock Prelock the database.\n");
|
fprintf(stderr, " --prelock Prelock the database.\n");
|
||||||
fprintf(stderr, " --prelockflag Prelock the database and send the DB_PRELOCKED_WRITE flag.\n");
|
fprintf(stderr, " --prelockflag Prelock the database and send the DB_PRELOCKED_WRITE flag.\n");
|
||||||
fprintf(stderr, " --abort Abort the singlex after the transaction is over. (Requires --singlex.)\n");
|
fprintf(stderr, " --abort Abort the singlex after the transaction is over. (Requires --singlex.)\n");
|
||||||
|
@ -589,8 +589,8 @@ int main (int argc, const char *const argv[]) {
|
||||||
singlex = 1;
|
singlex = 1;
|
||||||
} else if (strcmp(arg, "--insert1first") == 0) {
|
} else if (strcmp(arg, "--insert1first") == 0) {
|
||||||
insert1first = 1;
|
insert1first = 1;
|
||||||
} else if (strcmp(arg, "--check_small_rolltmp") == 0) {
|
} else if (strcmp(arg, "--check_small_rollback") == 0) {
|
||||||
check_small_rolltmp = 1;
|
check_small_rollback = 1;
|
||||||
} else if (strcmp(arg, "--xcount") == 0) {
|
} else if (strcmp(arg, "--xcount") == 0) {
|
||||||
if (i+1 >= argc) return print_usage(argv[0]);
|
if (i+1 >= argc) return print_usage(argv[0]);
|
||||||
items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
|
items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
|
||||||
|
@ -685,8 +685,8 @@ int main (int argc, const char *const argv[]) {
|
||||||
fprintf(stderr, "--insert_multiple only works on the TokuDB (not BDB)\n");
|
fprintf(stderr, "--insert_multiple only works on the TokuDB (not BDB)\n");
|
||||||
return print_usage(argv[0]);
|
return print_usage(argv[0]);
|
||||||
}
|
}
|
||||||
if (check_small_rolltmp) {
|
if (check_small_rollback) {
|
||||||
fprintf(stderr, "--check_small_rolltmp only works on the TokuDB (not BDB)\n");
|
fprintf(stderr, "--check_small_rollback only works on the TokuDB (not BDB)\n");
|
||||||
return print_usage(argv[0]);
|
return print_usage(argv[0]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -697,8 +697,8 @@ int main (int argc, const char *const argv[]) {
|
||||||
put_flagss[i] = put_flags;
|
put_flagss[i] = put_flags;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (check_small_rolltmp && !singlex) {
|
if (check_small_rollback && !singlex) {
|
||||||
fprintf(stderr, "--check_small_rolltmp requires --singlex\n");
|
fprintf(stderr, "--check_small_rollback requires --singlex\n");
|
||||||
return print_usage(argv[0]);
|
return print_usage(argv[0]);
|
||||||
}
|
}
|
||||||
if (!do_transactions && insert_multiple) {
|
if (!do_transactions && insert_multiple) {
|
||||||
|
|
|
@ -354,7 +354,7 @@ typedef struct __toku_txn_progress {
|
||||||
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
} *TOKU_TXN_PROGRESS, TOKU_TXN_PROGRESS_S;
|
||||||
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
typedef void(*TXN_PROGRESS_POLL_FUNCTION)(TOKU_TXN_PROGRESS, void*);
|
||||||
struct txn_stat {
|
struct txn_stat {
|
||||||
u_int64_t rolltmp_raw_count;
|
u_int64_t rollback_raw_count;
|
||||||
};
|
};
|
||||||
struct __toku_db_txn {
|
struct __toku_db_txn {
|
||||||
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/;
|
DB_ENV *mgrp /*In TokuDB, mgrp is a DB_ENV not a DB_TXNMGR*/;
|
||||||
|
|
|
@ -41,7 +41,6 @@ local: bins libs $(TEST_NEWBRT);
|
||||||
BRT_SOURCES = \
|
BRT_SOURCES = \
|
||||||
block_allocator \
|
block_allocator \
|
||||||
block_table \
|
block_table \
|
||||||
bread \
|
|
||||||
brt-serialize \
|
brt-serialize \
|
||||||
brt-verify \
|
brt-verify \
|
||||||
brt \
|
brt \
|
||||||
|
|
|
@ -596,6 +596,19 @@ toku_block_verify_no_free_blocknums(BLOCK_TABLE bt) {
|
||||||
assert(bt->current.blocknum_freelist_head.b == freelist_null.b);
|
assert(bt->current.blocknum_freelist_head.b == freelist_null.b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Verify there are no data blocks except root.
|
||||||
|
void
|
||||||
|
toku_block_verify_no_data_blocks_except_root_unlocked(BLOCK_TABLE bt, BLOCKNUM root) {
|
||||||
|
//Relies on checkpoint having used optimize_translation
|
||||||
|
assert(root.b >= RESERVED_BLOCKNUMS);
|
||||||
|
assert(bt->current.smallest_never_used_blocknum.b == root.b + 1);
|
||||||
|
int64_t i;
|
||||||
|
for (i=RESERVED_BLOCKNUMS; i < root.b; i++) {
|
||||||
|
BLOCKNUM b = make_blocknum(i);
|
||||||
|
assert(bt->current.block_translation[b.b].size == size_is_free);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//Verify a blocknum is currently allocated.
|
//Verify a blocknum is currently allocated.
|
||||||
void
|
void
|
||||||
toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b) {
|
toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b) {
|
||||||
|
|
|
@ -35,6 +35,7 @@ void toku_allocate_blocknum(BLOCK_TABLE bt, BLOCKNUM *res, struct brt_header * h
|
||||||
void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, struct brt_header * h);
|
void toku_allocate_blocknum_unlocked(BLOCK_TABLE bt, BLOCKNUM *res, struct brt_header * h);
|
||||||
void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, struct brt_header * h);
|
void toku_free_blocknum(BLOCK_TABLE bt, BLOCKNUM *b, struct brt_header * h);
|
||||||
void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b);
|
void toku_verify_blocknum_allocated(BLOCK_TABLE bt, BLOCKNUM b);
|
||||||
|
void toku_block_verify_no_data_blocks_except_root_unlocked(BLOCK_TABLE bt, BLOCKNUM root);
|
||||||
void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt);
|
void toku_block_verify_no_free_blocknums(BLOCK_TABLE bt);
|
||||||
void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct brt_header * h);
|
void toku_realloc_descriptor_on_disk(BLOCK_TABLE bt, DISKOFF size, DISKOFF *offset, struct brt_header * h);
|
||||||
void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size);
|
void toku_get_descriptor_offset_size(BLOCK_TABLE bt, DISKOFF *offset, DISKOFF *size);
|
||||||
|
|
|
@ -1,80 +0,0 @@
|
||||||
/* Buffered read. */
|
|
||||||
|
|
||||||
#ident "$Id$"
|
|
||||||
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
#include "includes.h"
|
|
||||||
|
|
||||||
struct bread {
|
|
||||||
int64_t fileoff; // The byte before this offset is the next byte we will read (since we are reading backward)
|
|
||||||
int fd;
|
|
||||||
int bufoff; // The current offset in the buf. The next byte we will read is buf[bufoff-1] (assuming that bufoff>0).
|
|
||||||
char *buf; // A buffer with at least bufoff bytes in it.
|
|
||||||
};
|
|
||||||
|
|
||||||
BREAD create_bread_from_fd_initialize_at(int fd) {
|
|
||||||
BREAD XMALLOC(result);
|
|
||||||
int r = toku_os_get_file_size(fd, &result->fileoff);
|
|
||||||
assert(r==0);
|
|
||||||
result->fd=fd;
|
|
||||||
result->bufoff=0;
|
|
||||||
result->buf = 0;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
int close_bread_without_closing_fd(BREAD br) {
|
|
||||||
toku_free(br->buf);
|
|
||||||
toku_free(br);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
ssize_t bread_backwards(BREAD br, void *vbuf, size_t nbytes) {
|
|
||||||
char *buf=vbuf;
|
|
||||||
ssize_t result=0;
|
|
||||||
const int i4 = sizeof(u_int32_t);
|
|
||||||
while (nbytes > 0) {
|
|
||||||
// read whatever we can out of the buffer.
|
|
||||||
if (br->bufoff>0) {
|
|
||||||
size_t to_copy = ((size_t)br->bufoff >= nbytes) ? nbytes : (size_t)br->bufoff;
|
|
||||||
memcpy(buf+nbytes-to_copy, &br->buf[br->bufoff-to_copy], to_copy);
|
|
||||||
nbytes -= to_copy;
|
|
||||||
result += to_copy;
|
|
||||||
br->bufoff -= to_copy;
|
|
||||||
}
|
|
||||||
if (nbytes>0) {
|
|
||||||
assert(br->bufoff==0);
|
|
||||||
u_int32_t compressed_length_n, uncompressed_length_n;
|
|
||||||
assert(br->fileoff>=i4); // there better be the three lengths plus the compressed data.
|
|
||||||
{ ssize_t r = pread(br->fd, &compressed_length_n, i4, br->fileoff- i4); assert(r==i4); }
|
|
||||||
u_int32_t compressed_length = toku_dtoh32(compressed_length_n);
|
|
||||||
assert(br->fileoff >= compressed_length + 3*i4);
|
|
||||||
{ ssize_t r = pread(br->fd, &uncompressed_length_n, i4, br->fileoff-2*i4); assert(r==i4); }
|
|
||||||
u_int32_t uncompressed_length = toku_dtoh32(uncompressed_length_n);
|
|
||||||
char *XMALLOC_N(compressed_length, zbuf);
|
|
||||||
{
|
|
||||||
ssize_t r = pread(br->fd, zbuf, compressed_length, br->fileoff- compressed_length -2*i4);
|
|
||||||
assert(r==(ssize_t)compressed_length);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t compressed_length_n_again;
|
|
||||||
ssize_t r = pread(br->fd, &compressed_length_n_again, i4, br->fileoff-compressed_length-3*i4); assert(r==i4);
|
|
||||||
assert(compressed_length_n_again == compressed_length_n);
|
|
||||||
}
|
|
||||||
uLongf destlen = uncompressed_length;
|
|
||||||
XREALLOC_N(uncompressed_length, br->buf);
|
|
||||||
uncompress((Bytef*)br->buf, &destlen, (Bytef*)zbuf, compressed_length);
|
|
||||||
assert(destlen==uncompressed_length);
|
|
||||||
toku_free(zbuf);
|
|
||||||
|
|
||||||
br->bufoff = uncompressed_length;
|
|
||||||
br->fileoff -= (compressed_length + 3*i4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
int bread_has_more(BREAD br) {
|
|
||||||
return (br->fileoff>0) || (br->bufoff>0);
|
|
||||||
}
|
|
|
@ -1,30 +0,0 @@
|
||||||
#ifndef BREAD_H
|
|
||||||
#define BREAD_H
|
|
||||||
#ident "$Id$"
|
|
||||||
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
|
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
||||||
|
|
||||||
// A BREAD reads a file backwards using buffered I/O. BREAD stands for Buffered Read or Backwards Read.
|
|
||||||
// Conceivably, we could read forward too.
|
|
||||||
// The buffered I/O is buffered using a large buffer (e.g., something like a megabyte).
|
|
||||||
// Furthermore, data is compressed into blocks. Each block is a 4-byte compressed length (in network order), followed by compressed data, followed by a 4-byte uncompressed-length (in network order), followed by a 4-byte compressed length
|
|
||||||
// The compressed-length appears twice so that the file can be read backward or forward.
|
|
||||||
// If not for the large-buffer requirement, as well as compression, as well as reading backward, we could have used a FILE.
|
|
||||||
|
|
||||||
#include <sys/types.h>
|
|
||||||
typedef struct bread *BREAD;
|
|
||||||
|
|
||||||
BREAD create_bread_from_fd_initialize_at(int fd);
|
|
||||||
// Effect: Given a file descriptor, fd, create a BREAD.
|
|
||||||
// Requires: The fd must be an open fd.
|
|
||||||
|
|
||||||
int close_bread_without_closing_fd(BREAD);
|
|
||||||
// Effect: Close the BREAD, but don't close the underlying fd.
|
|
||||||
|
|
||||||
ssize_t bread_backwards(BREAD, void *buf, size_t nbytes);
|
|
||||||
// Read nbytes into buf, reading backwards.
|
|
||||||
|
|
||||||
int bread_has_more(BREAD);
|
|
||||||
// Is there more to read?
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -217,6 +217,10 @@ int toku_serialize_brtnode_to_memory (BRTNODE node, int n_workitems, int n_threa
|
||||||
/*out*/ size_t *n_bytes_to_write,
|
/*out*/ size_t *n_bytes_to_write,
|
||||||
/*out*/ char **bytes_to_write);
|
/*out*/ char **bytes_to_write);
|
||||||
int toku_serialize_brtnode_to(int fd, BLOCKNUM, BRTNODE node, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint);
|
int toku_serialize_brtnode_to(int fd, BLOCKNUM, BRTNODE node, struct brt_header *h, int n_workitems, int n_threads, BOOL for_checkpoint);
|
||||||
|
int toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log,
|
||||||
|
struct brt_header *h, int n_workitems, int n_threads,
|
||||||
|
BOOL for_checkpoint);
|
||||||
|
int toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, ROLLBACK_LOG_NODE *logp, TOKUTXN txn, struct brt_header *h);
|
||||||
int toku_deserialize_brtnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, BRTNODE *brtnode, struct brt_header *h);
|
int toku_deserialize_brtnode_from (int fd, BLOCKNUM off, u_int32_t /*fullhash*/, BRTNODE *brtnode, struct brt_header *h);
|
||||||
unsigned int toku_serialize_brtnode_size(BRTNODE node); /* How much space will it take? */
|
unsigned int toku_serialize_brtnode_size(BRTNODE node); /* How much space will it take? */
|
||||||
int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
|
int toku_keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
|
||||||
|
|
|
@ -168,7 +168,7 @@ toku_full_pwrite_extend (int fd, const void *buf, size_t count, toku_off_t offse
|
||||||
// Overhead calculated in same order fields are written to wbuf
|
// Overhead calculated in same order fields are written to wbuf
|
||||||
enum {
|
enum {
|
||||||
|
|
||||||
node_header_overhead = (8+ // magic "tokunode" or "tokuleaf"
|
node_header_overhead = (8+ // magic "tokunode" or "tokuleaf" or "tokuroll"
|
||||||
4+ // layout_version
|
4+ // layout_version
|
||||||
4), // layout_version_original
|
4), // layout_version_original
|
||||||
|
|
||||||
|
@ -430,48 +430,27 @@ serialize_node(BRTNODE node, char *buf, size_t calculated_size, int n_sub_blocks
|
||||||
assert(calculated_size==wb.ndone);
|
assert(calculated_size==wb.ndone);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
|
||||||
toku_serialize_brtnode_to_memory (BRTNODE node, int UU(n_workitems), int UU(n_threads), /*out*/ size_t *n_bytes_to_write, /*out*/ char **bytes_to_write) {
|
|
||||||
|
|
||||||
// get the size of the serialized node
|
static void
|
||||||
unsigned int calculated_size = toku_serialize_brtnode_size(node);
|
serialize_uncompressed_block_to_memory(char * uncompressed_buf,
|
||||||
|
int n_sub_blocks,
|
||||||
// choose sub block parameters
|
struct sub_block sub_block[n_sub_blocks],
|
||||||
int n_sub_blocks = 0, sub_block_size = 0;
|
/*out*/ size_t *n_bytes_to_write,
|
||||||
size_t data_size = calculated_size - node_header_overhead;
|
/*out*/ char **bytes_to_write) {
|
||||||
choose_sub_block_size(data_size, max_sub_blocks, &sub_block_size, &n_sub_blocks);
|
// allocate space for the compressed uncompressed_buf
|
||||||
assert(0 < n_sub_blocks && n_sub_blocks <= max_sub_blocks);
|
|
||||||
assert(sub_block_size > 0);
|
|
||||||
|
|
||||||
// set the initial sub block size for all of the sub blocks
|
|
||||||
struct sub_block sub_block[n_sub_blocks];
|
|
||||||
for (int i = 0; i < n_sub_blocks; i++)
|
|
||||||
sub_block_init(&sub_block[i]);
|
|
||||||
set_all_sub_block_sizes(data_size, sub_block_size, n_sub_blocks, sub_block);
|
|
||||||
|
|
||||||
// alloocate space for the serialized node
|
|
||||||
char *MALLOC_N(calculated_size, buf);
|
|
||||||
//toku_verify_counts(node);
|
|
||||||
//assert(size>0);
|
|
||||||
//printf("%s:%d serializing %lld w height=%d p0=%p\n", __FILE__, __LINE__, off, node->height, node->mdicts[0]);
|
|
||||||
|
|
||||||
// serialize the node into buf
|
|
||||||
serialize_node(node, buf, calculated_size, n_sub_blocks, sub_block);
|
|
||||||
|
|
||||||
// allocate space for the compressed buf
|
|
||||||
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block);
|
size_t compressed_len = get_sum_compressed_size_bound(n_sub_blocks, sub_block);
|
||||||
size_t sub_block_header_len = sub_block_header_size(n_sub_blocks);
|
size_t sub_block_header_len = sub_block_header_size(n_sub_blocks);
|
||||||
size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum
|
size_t header_len = node_header_overhead + sub_block_header_len + sizeof (uint32_t); // node + sub_block + checksum
|
||||||
char *MALLOC_N(header_len + compressed_len, compressed_buf);
|
char *MALLOC_N(header_len + compressed_len, compressed_buf);
|
||||||
|
|
||||||
// copy the header
|
// copy the header
|
||||||
memcpy(compressed_buf, buf, node_header_overhead);
|
memcpy(compressed_buf, uncompressed_buf, node_header_overhead);
|
||||||
if (0) printf("First 4 bytes before compressing data are %02x%02x%02x%02x\n",
|
if (0) printf("First 4 bytes before compressing data are %02x%02x%02x%02x\n",
|
||||||
buf[node_header_overhead], buf[node_header_overhead+1],
|
uncompressed_buf[node_header_overhead], uncompressed_buf[node_header_overhead+1],
|
||||||
buf[node_header_overhead+2], buf[node_header_overhead+3]);
|
uncompressed_buf[node_header_overhead+2], uncompressed_buf[node_header_overhead+3]);
|
||||||
|
|
||||||
// compress all of the sub blocks
|
// compress all of the sub blocks
|
||||||
char *uncompressed_ptr = buf + node_header_overhead;
|
char *uncompressed_ptr = uncompressed_buf + node_header_overhead;
|
||||||
char *compressed_ptr = compressed_buf + header_len;
|
char *compressed_ptr = compressed_buf + header_len;
|
||||||
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores);
|
compressed_len = compress_all_sub_blocks(n_sub_blocks, sub_block, uncompressed_ptr, compressed_ptr, num_cores);
|
||||||
|
|
||||||
|
@ -494,9 +473,40 @@ toku_serialize_brtnode_to_memory (BRTNODE node, int UU(n_workitems), int UU(n_th
|
||||||
|
|
||||||
*n_bytes_to_write = header_len + compressed_len;
|
*n_bytes_to_write = header_len + compressed_len;
|
||||||
*bytes_to_write = compressed_buf;
|
*bytes_to_write = compressed_buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_serialize_brtnode_to_memory (BRTNODE node, int UU(n_workitems), int UU(n_threads), /*out*/ size_t *n_bytes_to_write, /*out*/ char **bytes_to_write) {
|
||||||
|
|
||||||
|
// get the size of the serialized node
|
||||||
|
size_t calculated_size = toku_serialize_brtnode_size(node);
|
||||||
|
|
||||||
|
// choose sub block parameters
|
||||||
|
int n_sub_blocks = 0, sub_block_size = 0;
|
||||||
|
size_t data_size = calculated_size - node_header_overhead;
|
||||||
|
choose_sub_block_size(data_size, max_sub_blocks, &sub_block_size, &n_sub_blocks);
|
||||||
|
assert(0 < n_sub_blocks && n_sub_blocks <= max_sub_blocks);
|
||||||
|
assert(sub_block_size > 0);
|
||||||
|
|
||||||
|
// set the initial sub block size for all of the sub blocks
|
||||||
|
struct sub_block sub_block[n_sub_blocks];
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++)
|
||||||
|
sub_block_init(&sub_block[i]);
|
||||||
|
set_all_sub_block_sizes(data_size, sub_block_size, n_sub_blocks, sub_block);
|
||||||
|
|
||||||
|
// allocate space for the serialized node
|
||||||
|
char *MALLOC_N(calculated_size, buf);
|
||||||
|
//toku_verify_counts(node);
|
||||||
|
//assert(size>0);
|
||||||
|
//printf("%s:%d serializing %lld w height=%d p0=%p\n", __FILE__, __LINE__, off, node->height, node->mdicts[0]);
|
||||||
|
|
||||||
|
// serialize the node into buf
|
||||||
|
serialize_node(node, buf, calculated_size, n_sub_blocks, sub_block);
|
||||||
|
|
||||||
|
//Compress and malloc buffer to write
|
||||||
|
serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block,
|
||||||
|
n_bytes_to_write, bytes_to_write);
|
||||||
toku_free(buf);
|
toku_free(buf);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -522,9 +532,8 @@ toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct brt_h
|
||||||
//printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation);
|
//printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation);
|
||||||
DISKOFF offset;
|
DISKOFF offset;
|
||||||
|
|
||||||
//h will be dirtied
|
|
||||||
toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset,
|
toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset,
|
||||||
h, for_checkpoint);
|
h, for_checkpoint); //dirties h
|
||||||
lock_for_pwrite();
|
lock_for_pwrite();
|
||||||
toku_full_pwrite_extend(fd, compressed_buf, n_to_write, offset);
|
toku_full_pwrite_extend(fd, compressed_buf, n_to_write, offset);
|
||||||
unlock_for_pwrite();
|
unlock_for_pwrite();
|
||||||
|
@ -852,7 +861,7 @@ deserialize_brtnode_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *b
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
decompress_brtnode_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
|
decompress_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
|
||||||
toku_trace("decompress");
|
toku_trace("decompress");
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
|
@ -914,14 +923,14 @@ decompress_brtnode_from_raw_block_into_rbuf(u_int8_t *raw_block, struct rbuf *rb
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
decompress_brtnode_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
|
decompress_from_raw_block_into_rbuf_versioned(u_int32_t version, u_int8_t *raw_block, struct rbuf *rb, BLOCKNUM blocknum) {
|
||||||
int r;
|
int r;
|
||||||
switch (version) {
|
switch (version) {
|
||||||
case BRT_LAYOUT_VERSION_10:
|
case BRT_LAYOUT_VERSION_10:
|
||||||
r = decompress_brtnode_from_raw_block_into_rbuf_10(raw_block, rb, blocknum);
|
r = decompress_brtnode_from_raw_block_into_rbuf_10(raw_block, rb, blocknum);
|
||||||
break;
|
break;
|
||||||
case BRT_LAYOUT_VERSION:
|
case BRT_LAYOUT_VERSION:
|
||||||
r = decompress_brtnode_from_raw_block_into_rbuf(raw_block, rb, blocknum);
|
r = decompress_from_raw_block_into_rbuf(raw_block, rb, blocknum);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
assert(FALSE);
|
assert(FALSE);
|
||||||
|
@ -959,19 +968,16 @@ deserialize_brtnode_from_rbuf_versioned (u_int32_t version, BLOCKNUM blocknum, u
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
// Read brt node from file into struct. Perform version upgrade if necessary.
|
read_and_decompress_block_from_fd_into_rbuf(int fd, BLOCKNUM blocknum,
|
||||||
int
|
struct brt_header *h,
|
||||||
toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) {
|
struct rbuf *rb,
|
||||||
toku_trace("deserial start");
|
/* out */ int *layout_version_p) {
|
||||||
|
|
||||||
int r;
|
int r;
|
||||||
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
|
|
||||||
|
|
||||||
if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b);
|
if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b);
|
||||||
if (h->panic) return h->panic;
|
if (h->panic) return h->panic;
|
||||||
|
|
||||||
toku_trace("deserial start");
|
toku_trace("deserial start nopanic");
|
||||||
|
|
||||||
// get the file offset and block size for the block
|
// get the file offset and block size for the block
|
||||||
DISKOFF offset, size;
|
DISKOFF offset, size;
|
||||||
|
@ -986,7 +992,9 @@ toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BR
|
||||||
int layout_version;
|
int layout_version;
|
||||||
{
|
{
|
||||||
u_int8_t *magic = raw_block + uncompressed_magic_offset;
|
u_int8_t *magic = raw_block + uncompressed_magic_offset;
|
||||||
if (memcmp(magic, "tokuleaf", 8)!=0 && memcmp(magic, "tokunode", 8)!=0) {
|
if (memcmp(magic, "tokuleaf", 8)!=0 &&
|
||||||
|
memcmp(magic, "tokunode", 8)!=0 &&
|
||||||
|
memcmp(magic, "tokuroll", 8)!=0) {
|
||||||
r = toku_db_badformat();
|
r = toku_db_badformat();
|
||||||
goto cleanup;
|
goto cleanup;
|
||||||
}
|
}
|
||||||
|
@ -1006,16 +1014,47 @@ toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BR
|
||||||
u_int32_t stored_xsum = toku_dtoh32(*(u_int32_t *)(raw_block + header_length));
|
u_int32_t stored_xsum = toku_dtoh32(*(u_int32_t *)(raw_block + header_length));
|
||||||
assert(xsum == stored_xsum);
|
assert(xsum == stored_xsum);
|
||||||
|
|
||||||
r = decompress_brtnode_from_raw_block_into_rbuf_versioned(layout_version, raw_block, &rb, blocknum);
|
r = decompress_from_raw_block_into_rbuf_versioned(layout_version, raw_block, rb, blocknum);
|
||||||
if (r!=0) goto cleanup;
|
if (r!=0) goto cleanup;
|
||||||
|
|
||||||
|
*layout_version_p = layout_version;
|
||||||
|
cleanup:
|
||||||
|
if (r!=0) {
|
||||||
|
if (rb->buf) toku_free(rb->buf);
|
||||||
|
rb->buf = NULL;
|
||||||
|
}
|
||||||
|
if (raw_block) toku_free(raw_block);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read brt node from file into struct. Perform version upgrade if necessary.
|
||||||
|
int
|
||||||
|
toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash,
|
||||||
|
BRTNODE *brtnode, struct brt_header *h) {
|
||||||
|
toku_trace("deserial start");
|
||||||
|
|
||||||
|
int r;
|
||||||
|
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
|
||||||
|
|
||||||
|
int layout_version;
|
||||||
|
r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, h, &rb, &layout_version);
|
||||||
|
if (r!=0) goto cleanup;
|
||||||
|
|
||||||
|
{
|
||||||
|
u_int8_t *magic = rb.buf + uncompressed_magic_offset;
|
||||||
|
if (memcmp(magic, "tokuleaf", 8)!=0 &&
|
||||||
|
memcmp(magic, "tokunode", 8)!=0) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
r = deserialize_brtnode_from_rbuf_versioned(layout_version, blocknum, fullhash, brtnode, h, &rb);
|
r = deserialize_brtnode_from_rbuf_versioned(layout_version, blocknum, fullhash, brtnode, h, &rb);
|
||||||
|
|
||||||
toku_trace("deserial done");
|
toku_trace("deserial done");
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
if (rb.buf) toku_free(rb.buf);
|
if (rb.buf) toku_free(rb.buf);
|
||||||
if (raw_block) toku_free(raw_block);
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1603,5 +1642,245 @@ toku_db_badformat(void) {
|
||||||
return DB_BADFORMAT;
|
return DB_BADFORMAT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
serialize_rollback_log_size(ROLLBACK_LOG_NODE log) {
|
||||||
|
size_t size = node_header_overhead //8 "tokuroll", 4 version, 4 version_original
|
||||||
|
+8 //TXNID
|
||||||
|
+8 //sequence
|
||||||
|
+8 //thislogname
|
||||||
|
+8 //older (blocknum)
|
||||||
|
+8 //resident_bytecount
|
||||||
|
+8 //memarena_size_needed_to_load
|
||||||
|
+log->rollentry_resident_bytecount;
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calculated_size, int UU(n_sub_blocks), struct sub_block UU(sub_block[])) {
|
||||||
|
struct wbuf wb;
|
||||||
|
wbuf_init(&wb, buf, calculated_size);
|
||||||
|
{ //Serialize rollback log to local wbuf
|
||||||
|
wbuf_nocrc_literal_bytes(&wb, "tokuroll", 8);
|
||||||
|
assert(log->layout_version == BRT_LAYOUT_VERSION);
|
||||||
|
wbuf_nocrc_int(&wb, log->layout_version);
|
||||||
|
wbuf_nocrc_int(&wb, log->layout_version_original);
|
||||||
|
wbuf_nocrc_TXNID(&wb, log->txnid);
|
||||||
|
wbuf_nocrc_ulonglong(&wb, log->sequence);
|
||||||
|
wbuf_nocrc_BLOCKNUM(&wb, log->thislogname);
|
||||||
|
wbuf_nocrc_BLOCKNUM(&wb, log->older);
|
||||||
|
wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount);
|
||||||
|
//Write down memarena size needed to restore
|
||||||
|
wbuf_nocrc_ulonglong(&wb, memarena_total_size_in_use(log->rollentry_arena));
|
||||||
|
|
||||||
|
{
|
||||||
|
//Store rollback logs
|
||||||
|
struct roll_entry *item;
|
||||||
|
size_t done_before = wb.ndone;
|
||||||
|
for (item = log->newest_logentry; item; item = item->prev) {
|
||||||
|
toku_logger_rollback_wbuf_nocrc_write(&wb, item);
|
||||||
|
}
|
||||||
|
assert(done_before + log->rollentry_resident_bytecount == wb.ndone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(wb.ndone == wb.size);
|
||||||
|
assert(calculated_size==wb.ndone);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
toku_serialize_rollback_log_to_memory (ROLLBACK_LOG_NODE log,
|
||||||
|
int UU(n_workitems), int UU(n_threads),
|
||||||
|
/*out*/ size_t *n_bytes_to_write,
|
||||||
|
/*out*/ char **bytes_to_write) {
|
||||||
|
// get the size of the serialized node
|
||||||
|
size_t calculated_size = serialize_rollback_log_size(log);
|
||||||
|
|
||||||
|
// choose sub block parameters
|
||||||
|
int n_sub_blocks = 0, sub_block_size = 0;
|
||||||
|
size_t data_size = calculated_size - node_header_overhead;
|
||||||
|
choose_sub_block_size(data_size, max_sub_blocks, &sub_block_size, &n_sub_blocks);
|
||||||
|
assert(0 < n_sub_blocks && n_sub_blocks <= max_sub_blocks);
|
||||||
|
assert(sub_block_size > 0);
|
||||||
|
|
||||||
|
// set the initial sub block size for all of the sub blocks
|
||||||
|
struct sub_block sub_block[n_sub_blocks];
|
||||||
|
for (int i = 0; i < n_sub_blocks; i++)
|
||||||
|
sub_block_init(&sub_block[i]);
|
||||||
|
set_all_sub_block_sizes(data_size, sub_block_size, n_sub_blocks, sub_block);
|
||||||
|
|
||||||
|
// allocate space for the serialized node
|
||||||
|
char *XMALLOC_N(calculated_size, buf);
|
||||||
|
// serialize the node into buf
|
||||||
|
serialize_rollback_log_node_to_buf(log, buf, calculated_size, n_sub_blocks, sub_block);
|
||||||
|
|
||||||
|
//Compress and malloc buffer to write
|
||||||
|
serialize_uncompressed_block_to_memory(buf, n_sub_blocks, sub_block,
|
||||||
|
n_bytes_to_write, bytes_to_write);
|
||||||
|
toku_free(buf);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_serialize_rollback_log_to (int fd, BLOCKNUM blocknum, ROLLBACK_LOG_NODE log,
|
||||||
|
struct brt_header *h, int n_workitems, int n_threads,
|
||||||
|
BOOL for_checkpoint) {
|
||||||
|
size_t n_to_write;
|
||||||
|
char *compressed_buf;
|
||||||
|
{
|
||||||
|
int r = toku_serialize_rollback_log_to_memory(log, n_workitems, n_threads, &n_to_write, &compressed_buf);
|
||||||
|
if (r!=0) return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
assert(blocknum.b>=0);
|
||||||
|
DISKOFF offset;
|
||||||
|
toku_blocknum_realloc_on_disk(h->blocktable, blocknum, n_to_write, &offset,
|
||||||
|
h, for_checkpoint); //dirties h
|
||||||
|
lock_for_pwrite();
|
||||||
|
toku_full_pwrite_extend(fd, compressed_buf, n_to_write, offset);
|
||||||
|
unlock_for_pwrite();
|
||||||
|
}
|
||||||
|
toku_free(compressed_buf);
|
||||||
|
log->dirty = 0; // See #1957. Must set the node to be clean after serializing it so that it doesn't get written again on the next checkpoint or eviction.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, u_int32_t fullhash, ROLLBACK_LOG_NODE *log_p,
|
||||||
|
TOKUTXN txn, struct brt_header *h, struct rbuf *rb) {
|
||||||
|
TAGMALLOC(ROLLBACK_LOG_NODE, result);
|
||||||
|
int r;
|
||||||
|
if (result==NULL) {
|
||||||
|
r=errno;
|
||||||
|
if (0) { died0: toku_free(result); }
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
//printf("Deserializing %lld datasize=%d\n", off, datasize);
|
||||||
|
bytevec magic;
|
||||||
|
rbuf_literal_bytes(rb, &magic, 8);
|
||||||
|
assert(!memcmp(magic, "tokuroll", 8));
|
||||||
|
|
||||||
|
result->layout_version = rbuf_int(rb);
|
||||||
|
assert(result->layout_version == BRT_LAYOUT_VERSION);
|
||||||
|
result->layout_version_original = rbuf_int(rb);
|
||||||
|
result->layout_version_read_from_disk = result->layout_version;
|
||||||
|
result->dirty = FALSE;
|
||||||
|
//TODO: Maybe add descriptor (or just descriptor version) here eventually?
|
||||||
|
//TODO: This is hard.. everything is shared in a single dictionary.
|
||||||
|
rbuf_TXNID(rb, &result->txnid);
|
||||||
|
result->sequence = rbuf_ulonglong(rb);
|
||||||
|
if (result->txnid == txn->txnid64 && result->sequence > txn->num_rollback_nodes) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto died0;
|
||||||
|
}
|
||||||
|
result->thislogname = rbuf_blocknum(rb);
|
||||||
|
if (result->thislogname.b != blocknum.b) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto died0;
|
||||||
|
}
|
||||||
|
result->thishash = toku_cachetable_hash(h->cf, result->thislogname);
|
||||||
|
if (result->thishash != fullhash) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto died0;
|
||||||
|
}
|
||||||
|
result->older = rbuf_blocknum(rb);
|
||||||
|
result->older_hash = toku_cachetable_hash(h->cf, result->older);
|
||||||
|
result->rollentry_resident_bytecount = rbuf_ulonglong(rb);
|
||||||
|
|
||||||
|
size_t arena_initial_size = rbuf_ulonglong(rb);
|
||||||
|
result->rollentry_arena = memarena_create_presized(arena_initial_size);
|
||||||
|
if (0) { died1: memarena_close(&result->rollentry_arena); goto died0; }
|
||||||
|
|
||||||
|
//Load rollback entries
|
||||||
|
assert(rb->size > 4);
|
||||||
|
//Start with empty list
|
||||||
|
result->oldest_logentry = result->newest_logentry = NULL;
|
||||||
|
while (rb->ndone < rb->size) {
|
||||||
|
struct roll_entry *item;
|
||||||
|
uint32_t rollback_fsize = rbuf_int(rb); //Already read 4. Rest is 4 smaller
|
||||||
|
bytevec item_vec;
|
||||||
|
rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4);
|
||||||
|
unsigned char* item_buf = (unsigned char*)item_vec;
|
||||||
|
r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena);
|
||||||
|
if (r!=0) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto died1;
|
||||||
|
}
|
||||||
|
//Add to head of list
|
||||||
|
if (result->oldest_logentry) {
|
||||||
|
result->oldest_logentry->prev = item;
|
||||||
|
result->oldest_logentry = item;
|
||||||
|
item->prev = NULL;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
result->oldest_logentry = result->newest_logentry = item;
|
||||||
|
item->prev = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
toku_free(rb->buf);
|
||||||
|
rb->buf = NULL;
|
||||||
|
*log_p = result;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
deserialize_rollback_log_from_rbuf_versioned (u_int32_t version, BLOCKNUM blocknum, u_int32_t fullhash,
|
||||||
|
ROLLBACK_LOG_NODE *log,
|
||||||
|
TOKUTXN txn, struct brt_header *h, struct rbuf *rb) {
|
||||||
|
int r = 0;
|
||||||
|
ROLLBACK_LOG_NODE rollback_log_node = NULL;
|
||||||
|
|
||||||
|
int upgrade = 0;
|
||||||
|
switch (version) {
|
||||||
|
case BRT_LAYOUT_VERSION:
|
||||||
|
if (!upgrade)
|
||||||
|
r = deserialize_rollback_log_from_rbuf(blocknum, fullhash, &rollback_log_node, txn, h, rb);
|
||||||
|
if (r==0) {
|
||||||
|
assert(rollback_log_node);
|
||||||
|
*log = rollback_log_node;
|
||||||
|
}
|
||||||
|
if (upgrade && r == 0) (*log)->dirty = 1;
|
||||||
|
break; // this is the only break
|
||||||
|
default:
|
||||||
|
assert(FALSE);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read rollback log node from file into struct. Perform version upgrade if necessary.
|
||||||
|
int
|
||||||
|
toku_deserialize_rollback_log_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash,
|
||||||
|
ROLLBACK_LOG_NODE *logp, TOKUTXN txn, struct brt_header *h) {
|
||||||
|
toku_trace("deserial start");
|
||||||
|
|
||||||
|
int r;
|
||||||
|
struct rbuf rb = {.buf = NULL, .size = 0, .ndone = 0};
|
||||||
|
|
||||||
|
int layout_version;
|
||||||
|
r = read_and_decompress_block_from_fd_into_rbuf(fd, blocknum, h, &rb, &layout_version);
|
||||||
|
if (r!=0) goto cleanup;
|
||||||
|
|
||||||
|
{
|
||||||
|
u_int8_t *magic = rb.buf + uncompressed_magic_offset;
|
||||||
|
if (memcmp(magic, "tokuroll", 8)!=0) {
|
||||||
|
r = toku_db_badformat();
|
||||||
|
goto cleanup;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r = deserialize_rollback_log_from_rbuf_versioned(layout_version, blocknum, fullhash, logp, txn, h, &rb);
|
||||||
|
|
||||||
|
toku_trace("deserial done");
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
if (rb.buf) toku_free(rb.buf);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// NOTE: Backwards compatibility functions are in the included .c file(s):
|
// NOTE: Backwards compatibility functions are in the included .c file(s):
|
||||||
#include "backwards_10.c"
|
#include "backwards_10.c"
|
||||||
|
|
||||||
|
|
128
newbrt/brt.c
128
newbrt/brt.c
|
@ -2650,13 +2650,6 @@ int toku_brt_insert (BRT brt, DBT *key, DBT *val, TOKUTXN txn) {
|
||||||
return toku_brt_maybe_insert(brt, key, val, txn, FALSE, ZERO_LSN, TRUE, BRT_INSERT);
|
return toku_brt_maybe_insert(brt, key, val, txn, FALSE, ZERO_LSN, TRUE, BRT_INSERT);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
|
||||||
txn_note_doing_work(TOKUTXN txn) {
|
|
||||||
if (txn)
|
|
||||||
txn->has_done_work = TRUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_brt_load_recovery(TOKUTXN txn, char const * old_iname, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) {
|
toku_brt_load_recovery(TOKUTXN txn, char const * old_iname, char const * new_iname, int do_fsync, int do_log, LSN *load_lsn) {
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
@ -2665,12 +2658,9 @@ toku_brt_load_recovery(TOKUTXN txn, char const * old_iname, char const * new_ina
|
||||||
//before the (old) file is actually unlinked
|
//before the (old) file is actually unlinked
|
||||||
TOKULOGGER logger = toku_txn_logger(txn);
|
TOKULOGGER logger = toku_txn_logger(txn);
|
||||||
|
|
||||||
BYTESTRING old_iname_bs = {.len=strlen(old_iname),
|
BYTESTRING old_iname_bs = {.len=strlen(old_iname), .data=(char*)old_iname};
|
||||||
.data=toku_memdup_in_rollback(txn, old_iname, strlen(old_iname))};
|
BYTESTRING new_iname_bs = {.len=strlen(new_iname), .data=(char*)new_iname};
|
||||||
BYTESTRING new_iname_bs = {.len=strlen(new_iname),
|
r = toku_logger_save_rollback_load(txn, &old_iname_bs, &new_iname_bs);
|
||||||
.data=toku_memdup_in_rollback(txn, new_iname, strlen(new_iname))};
|
|
||||||
|
|
||||||
r = toku_logger_save_rollback_load(txn, old_iname_bs, new_iname_bs);
|
|
||||||
if (r==0 && do_log && logger) {
|
if (r==0 && do_log && logger) {
|
||||||
TXNID xid = toku_txn_get_txnid(txn);
|
TXNID xid = toku_txn_get_txnid(txn);
|
||||||
r = toku_log_load(logger, load_lsn, do_fsync, xid, old_iname_bs, new_iname_bs);
|
r = toku_log_load(logger, load_lsn, do_fsync, xid, old_iname_bs, new_iname_bs);
|
||||||
|
@ -2715,15 +2705,14 @@ int toku_brt_maybe_insert (BRT brt, DBT *key, DBT *val, TOKUTXN txn, BOOL oplsn_
|
||||||
int r = 0;
|
int r = 0;
|
||||||
XIDS message_xids;
|
XIDS message_xids;
|
||||||
TXNID xid = toku_txn_get_txnid(txn);
|
TXNID xid = toku_txn_get_txnid(txn);
|
||||||
txn_note_doing_work(txn);
|
|
||||||
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
||||||
BYTESTRING keybs = {key->size, toku_memdup_in_rollback(txn, key->data, key->size)};
|
BYTESTRING keybs = {key->size, key->data};
|
||||||
int need_data = (brt->flags&TOKU_DB_DUPSORT)!=0; // dupsorts don't need the data part
|
int need_data = (brt->flags&TOKU_DB_DUPSORT)!=0; // dupsorts don't need the data part
|
||||||
if (need_data) {
|
if (need_data) {
|
||||||
BYTESTRING databs = {val->size, toku_memdup_in_rollback(txn, val->data, val->size)};
|
BYTESTRING databs = {val->size, val->data};
|
||||||
r = toku_logger_save_rollback_cmdinsertboth(txn, toku_cachefile_filenum(brt->cf), keybs, databs);
|
r = toku_logger_save_rollback_cmdinsertboth(txn, toku_cachefile_filenum(brt->cf), &keybs, &databs);
|
||||||
} else {
|
} else {
|
||||||
r = toku_logger_save_rollback_cmdinsert (txn, toku_cachefile_filenum(brt->cf), keybs);
|
r = toku_logger_save_rollback_cmdinsert (txn, toku_cachefile_filenum(brt->cf), &keybs);
|
||||||
}
|
}
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
r = toku_txn_note_brt(txn, brt);
|
r = toku_txn_note_brt(txn, brt);
|
||||||
|
@ -2788,10 +2777,9 @@ int toku_brt_maybe_delete(BRT brt, DBT *key, TOKUTXN txn, BOOL oplsn_valid, LSN
|
||||||
int r;
|
int r;
|
||||||
XIDS message_xids;
|
XIDS message_xids;
|
||||||
TXNID xid = toku_txn_get_txnid(txn);
|
TXNID xid = toku_txn_get_txnid(txn);
|
||||||
txn_note_doing_work(txn);
|
|
||||||
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
||||||
BYTESTRING keybs = {key->size, toku_memdup_in_rollback(txn, key->data, key->size)};
|
BYTESTRING keybs = {key->size, key->data};
|
||||||
r = toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(brt->cf), keybs);
|
r = toku_logger_save_rollback_cmddelete(txn, toku_cachefile_filenum(brt->cf), &keybs);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
r = toku_txn_note_brt(txn, brt);
|
r = toku_txn_note_brt(txn, brt);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
|
@ -2975,6 +2963,20 @@ brtheader_log_fassociate_during_checkpoint (CACHEFILE cf, void *header_v) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
brtheader_log_suppress_rollback_during_checkpoint (CACHEFILE cf, void *header_v) {
|
||||||
|
int r = 0;
|
||||||
|
struct brt_header *h = header_v;
|
||||||
|
TXNID xid = h->txnid_that_created_or_locked_when_empty;
|
||||||
|
if (xid != TXNID_NONE) {
|
||||||
|
//Only log if useful.
|
||||||
|
TOKULOGGER logger = toku_cachefile_logger(cf);
|
||||||
|
FILENUM filenum = toku_cachefile_filenum (cf);
|
||||||
|
r = toku_log_suppress_rollback(logger, NULL, 0, filenum, xid);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int brtheader_note_pin_by_checkpoint (CACHEFILE cachefile, void *header_v);
|
static int brtheader_note_pin_by_checkpoint (CACHEFILE cachefile, void *header_v);
|
||||||
static int brtheader_note_unpin_by_checkpoint (CACHEFILE cachefile, void *header_v);
|
static int brtheader_note_unpin_by_checkpoint (CACHEFILE cachefile, void *header_v);
|
||||||
|
@ -2997,6 +2999,7 @@ brt_init_header_partial (BRT t) {
|
||||||
toku_cachefile_set_userdata(t->cf,
|
toku_cachefile_set_userdata(t->cf,
|
||||||
t->h,
|
t->h,
|
||||||
brtheader_log_fassociate_during_checkpoint,
|
brtheader_log_fassociate_during_checkpoint,
|
||||||
|
brtheader_log_suppress_rollback_during_checkpoint,
|
||||||
toku_brtheader_close,
|
toku_brtheader_close,
|
||||||
toku_brtheader_checkpoint,
|
toku_brtheader_checkpoint,
|
||||||
toku_brtheader_begin_checkpoint,
|
toku_brtheader_begin_checkpoint,
|
||||||
|
@ -3074,6 +3077,7 @@ int toku_read_brt_header_and_store_in_cachefile (CACHEFILE cf, struct brt_header
|
||||||
toku_cachefile_set_userdata(cf,
|
toku_cachefile_set_userdata(cf,
|
||||||
(void*)h,
|
(void*)h,
|
||||||
brtheader_log_fassociate_during_checkpoint,
|
brtheader_log_fassociate_during_checkpoint,
|
||||||
|
brtheader_log_suppress_rollback_during_checkpoint,
|
||||||
toku_brtheader_close,
|
toku_brtheader_close,
|
||||||
toku_brtheader_checkpoint,
|
toku_brtheader_checkpoint,
|
||||||
toku_brtheader_begin_checkpoint,
|
toku_brtheader_begin_checkpoint,
|
||||||
|
@ -3129,7 +3133,7 @@ verify_builtin_comparisons_consistent(BRT t, u_int32_t flags) {
|
||||||
// This is the actual open, used for various purposes, such as normal use, recovery, and redirect.
|
// This is the actual open, used for various purposes, such as normal use, recovery, and redirect.
|
||||||
// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix)
|
// fname_in_env is the iname, relative to the env_dir (data_dir is already in iname as prefix)
|
||||||
static int
|
static int
|
||||||
brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db, int recovery_force_fcreate, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id) {
|
brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db, FILENUM use_filenum, DICTIONARY_ID use_dictionary_id) {
|
||||||
int r;
|
int r;
|
||||||
BOOL txn_created = FALSE;
|
BOOL txn_created = FALSE;
|
||||||
|
|
||||||
|
@ -3147,11 +3151,11 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
|
||||||
assert(is_create || !only_create);
|
assert(is_create || !only_create);
|
||||||
t->db = db;
|
t->db = db;
|
||||||
BOOL log_fopen = FALSE; // set true if we're opening a pre-existing file
|
BOOL log_fopen = FALSE; // set true if we're opening a pre-existing file
|
||||||
|
BOOL did_create = FALSE;
|
||||||
|
FILENUM reserved_filenum = use_filenum;
|
||||||
{
|
{
|
||||||
int fd = -1;
|
int fd = -1;
|
||||||
BOOL did_create = FALSE;
|
|
||||||
r = brt_open_file(fname_in_cwd, &fd);
|
r = brt_open_file(fname_in_cwd, &fd);
|
||||||
FILENUM reserved_filenum = use_filenum;
|
|
||||||
int use_reserved_filenum = reserved_filenum.fileid != FILENUM_NONE.fileid;
|
int use_reserved_filenum = reserved_filenum.fileid != FILENUM_NONE.fileid;
|
||||||
if (r==ENOENT && is_create) {
|
if (r==ENOENT && is_create) {
|
||||||
toku_cachetable_reserve_filenum(cachetable, &reserved_filenum, use_reserved_filenum, reserved_filenum);
|
toku_cachetable_reserve_filenum(cachetable, &reserved_filenum, use_reserved_filenum, reserved_filenum);
|
||||||
|
@ -3164,6 +3168,12 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
|
||||||
if (use_reserved_filenum) assert(reserved_filenum.fileid == use_filenum.fileid);
|
if (use_reserved_filenum) assert(reserved_filenum.fileid == use_filenum.fileid);
|
||||||
did_create = TRUE;
|
did_create = TRUE;
|
||||||
mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
|
mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
|
||||||
|
if (txn) {
|
||||||
|
BYTESTRING bs = { .len=strlen(fname_in_env), .data = (char*)fname_in_env };
|
||||||
|
r = toku_logger_save_rollback_fcreate(txn, reserved_filenum, &bs); // bs is a copy of the fname relative to the environment
|
||||||
|
if (r != 0) goto died1;
|
||||||
|
}
|
||||||
|
txn_created = (BOOL)(txn!=NULL);
|
||||||
r = toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor));
|
r = toku_logger_log_fcreate(txn, fname_in_env, reserved_filenum, mode, t->flags, &(t->temp_descriptor));
|
||||||
if (r!=0) goto died1;
|
if (r!=0) goto died1;
|
||||||
r = brt_create_file(t, fname_in_cwd, &fd);
|
r = brt_create_file(t, fname_in_cwd, &fd);
|
||||||
|
@ -3176,14 +3186,7 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
|
||||||
fname_in_env,
|
fname_in_env,
|
||||||
use_reserved_filenum||did_create, reserved_filenum, did_create);
|
use_reserved_filenum||did_create, reserved_filenum, did_create);
|
||||||
if (r != 0) goto died1;
|
if (r != 0) goto died1;
|
||||||
if (did_create || recovery_force_fcreate) {
|
if (!did_create)
|
||||||
if (txn) {
|
|
||||||
BYTESTRING bs = { .len=strlen(fname_in_env), .data = toku_strdup_in_rollback(txn, fname_in_env) };
|
|
||||||
r = toku_logger_save_rollback_fcreate(txn, toku_cachefile_filenum(t->cf), bs); // bs is a copy of the fname relative to the environment
|
|
||||||
if (r != 0) goto died_after_open;
|
|
||||||
}
|
|
||||||
txn_created = (BOOL)(txn!=NULL);
|
|
||||||
} else
|
|
||||||
log_fopen = TRUE; //Log of fopen must be delayed till flags are available
|
log_fopen = TRUE; //Log of fopen must be delayed till flags are available
|
||||||
}
|
}
|
||||||
if (r!=0) {
|
if (r!=0) {
|
||||||
|
@ -3294,7 +3297,7 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
|
||||||
if (t->db) t->db->descriptor = &t->h->descriptor.dbt;
|
if (t->db) t->db->descriptor = &t->h->descriptor.dbt;
|
||||||
if (txn_created) {
|
if (txn_created) {
|
||||||
assert(txn);
|
assert(txn);
|
||||||
assert(t->h->txnid_that_created_or_locked_when_empty == 0); // Uses 0 for no transaction.
|
assert(t->h->txnid_that_created_or_locked_when_empty == TXNID_NONE);
|
||||||
t->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(txn);
|
t->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(txn);
|
||||||
r = toku_txn_note_brt(txn, t);
|
r = toku_txn_note_brt(txn, t);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
@ -3312,11 +3315,11 @@ brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHET
|
||||||
|
|
||||||
// Open a brt for the purpose of recovery, which requires that the brt be open to a pre-determined FILENUM. (dict_id is assigned by the brt_open() function.)
|
// Open a brt for the purpose of recovery, which requires that the brt be open to a pre-determined FILENUM. (dict_id is assigned by the brt_open() function.)
|
||||||
int
|
int
|
||||||
toku_brt_open_recovery(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db, int recovery_force_fcreate, FILENUM use_filenum) {
|
toku_brt_open_recovery(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db, FILENUM use_filenum) {
|
||||||
int r;
|
int r;
|
||||||
assert(use_filenum.fileid != FILENUM_NONE.fileid);
|
assert(use_filenum.fileid != FILENUM_NONE.fileid);
|
||||||
r = brt_open(t, fname_in_env, is_create, only_create, cachetable,
|
r = brt_open(t, fname_in_env, is_create, only_create, cachetable,
|
||||||
txn, db, recovery_force_fcreate, use_filenum, DICTIONARY_ID_NONE);
|
txn, db, use_filenum, DICTIONARY_ID_NONE);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3324,7 +3327,7 @@ toku_brt_open_recovery(BRT t, const char *fname_in_env, int is_create, int only_
|
||||||
int
|
int
|
||||||
toku_brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db) {
|
toku_brt_open(BRT t, const char *fname_in_env, int is_create, int only_create, CACHETABLE cachetable, TOKUTXN txn, DB *db) {
|
||||||
int r;
|
int r;
|
||||||
r = brt_open(t, fname_in_env, is_create, only_create, cachetable, txn, db, FALSE, FILENUM_NONE, DICTIONARY_ID_NONE);
|
r = brt_open(t, fname_in_env, is_create, only_create, cachetable, txn, db, FILENUM_NONE, DICTIONARY_ID_NONE);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3359,7 +3362,7 @@ brt_open_for_redirect(BRT *new_brtp, const char *fname_in_env, TOKUTXN txn, BRT
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
CACHETABLE ct = toku_cachefile_get_cachetable(old_brt->cf);
|
CACHETABLE ct = toku_cachefile_get_cachetable(old_brt->cf);
|
||||||
r = brt_open(t, fname_in_env, 0, 0, ct, txn, old_brt->db, FALSE, FILENUM_NONE, old_h->dict_id);
|
r = brt_open(t, fname_in_env, 0, 0, ct, txn, old_brt->db, FILENUM_NONE, old_h->dict_id);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
if (old_h->descriptor.version==0) {
|
if (old_h->descriptor.version==0) {
|
||||||
assert(t->h->descriptor.version == 0);
|
assert(t->h->descriptor.version == 0);
|
||||||
|
@ -3400,7 +3403,7 @@ brt_redirect_db (BRT brt_to, BRT brt_from) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
redirect_brt_close_delayed(DB *db, u_int32_t UU(flags)) {
|
fake_db_brt_close_delayed(DB *db, u_int32_t UU(flags)) {
|
||||||
BRT brt_to_close = db->api_internal;
|
BRT brt_to_close = db->api_internal;
|
||||||
char *error_string = NULL;
|
char *error_string = NULL;
|
||||||
int r = toku_close_brt(brt_to_close, &error_string);
|
int r = toku_close_brt(brt_to_close, &error_string);
|
||||||
|
@ -3435,7 +3438,7 @@ toku_brt_header_close_redirected_brts(struct brt_header * h) {
|
||||||
assert(which == num_brts);
|
assert(which == num_brts);
|
||||||
for (which = 0; which < num_brts; which++) {
|
for (which = 0; which < num_brts; which++) {
|
||||||
int r;
|
int r;
|
||||||
r = toku_brt_db_delay_closed(brts[which], dbs[which], redirect_brt_close_delayed, 0);
|
r = toku_brt_db_delay_closed(brts[which], dbs[which], fake_db_brt_close_delayed, 0);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -3590,7 +3593,6 @@ toku_dictionary_redirect (const char *dst_fname_in_env, BRT old_brt, TOKUTXN txn
|
||||||
}
|
}
|
||||||
|
|
||||||
if (txn) {
|
if (txn) {
|
||||||
txn_note_doing_work(txn);
|
|
||||||
r = toku_txn_note_brt(txn, old_brt); // mark old brt as touched by this txn
|
r = toku_txn_note_brt(txn, old_brt); // mark old brt as touched by this txn
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
|
@ -3618,6 +3620,8 @@ toku_dictionary_redirect (const char *dst_fname_in_env, BRT old_brt, TOKUTXN txn
|
||||||
assert(new_h->txnid_that_created_or_locked_when_empty == TXNID_NONE);
|
assert(new_h->txnid_that_created_or_locked_when_empty == TXNID_NONE);
|
||||||
TXNID xid = toku_txn_get_txnid(txn);
|
TXNID xid = toku_txn_get_txnid(txn);
|
||||||
new_h->txnid_that_created_or_locked_when_empty = xid;
|
new_h->txnid_that_created_or_locked_when_empty = xid;
|
||||||
|
r = toku_log_suppress_rollback(txn->logger, NULL, 0, new_filenum, xid);
|
||||||
|
assert(r==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup:
|
cleanup:
|
||||||
|
@ -3856,6 +3860,8 @@ toku_brtheader_close (CACHEFILE cachefile, int fd, void *header_v, char **malloc
|
||||||
if (h->panic) {
|
if (h->panic) {
|
||||||
r = h->panic;
|
r = h->panic;
|
||||||
} else if (h->dictionary_opened) { //Otherwise header has never fully been created.
|
} else if (h->dictionary_opened) { //Otherwise header has never fully been created.
|
||||||
|
assert(h->cf == cachefile);
|
||||||
|
TOKULOGGER logger = toku_cachefile_logger(cachefile);
|
||||||
LSN lsn = ZERO_LSN;
|
LSN lsn = ZERO_LSN;
|
||||||
//Get LSN
|
//Get LSN
|
||||||
if (oplsn_valid) {
|
if (oplsn_valid) {
|
||||||
|
@ -3868,17 +3874,19 @@ toku_brtheader_close (CACHEFILE cachefile, int fd, void *header_v, char **malloc
|
||||||
else {
|
else {
|
||||||
//Get LSN from logger
|
//Get LSN from logger
|
||||||
lsn = ZERO_LSN; // if there is no logger, we use zero for the lsn
|
lsn = ZERO_LSN; // if there is no logger, we use zero for the lsn
|
||||||
TOKULOGGER logger = toku_cachefile_logger(cachefile);
|
|
||||||
if (logger) {
|
if (logger) {
|
||||||
//NEED NAME
|
|
||||||
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
|
char* fname_in_env = toku_cachefile_fname_in_env(cachefile);
|
||||||
assert(fname_in_env);
|
assert(fname_in_env);
|
||||||
BYTESTRING bs = {.len=strlen(fname_in_env), .data=fname_in_env};
|
BYTESTRING bs = {.len=strlen(fname_in_env), .data=fname_in_env};
|
||||||
r = toku_log_fclose(logger, &lsn, h->dirty, bs, toku_cachefile_filenum(cachefile), h->flags); // flush the log on close (if new header is being written), otherwise it might not make it out.
|
r = toku_log_fclose(logger, &lsn, h->dirty, bs, toku_cachefile_filenum(cachefile)); // flush the log on close (if new header is being written), otherwise it might not make it out.
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (h->dirty) { // this is the only place this bit is tested (in currentheader)
|
if (h->dirty) { // this is the only place this bit is tested (in currentheader)
|
||||||
|
if (logger) { //Rollback cachefile MUST NOT BE CLOSED DIRTY
|
||||||
|
//It can be checkpointed only via 'checkpoint'
|
||||||
|
assert(logger->rollback_cachefile != cachefile);
|
||||||
|
}
|
||||||
int r2;
|
int r2;
|
||||||
//assert(lsn.lsn!=0);
|
//assert(lsn.lsn!=0);
|
||||||
r2 = toku_brtheader_begin_checkpoint(cachefile, fd, lsn, header_v);
|
r2 = toku_brtheader_begin_checkpoint(cachefile, fd, lsn, header_v);
|
||||||
|
@ -5315,11 +5323,10 @@ int toku_brt_maybe_delete_both(BRT brt, DBT *key, DBT *val, TOKUTXN txn, BOOL op
|
||||||
int r;
|
int r;
|
||||||
XIDS message_xids;
|
XIDS message_xids;
|
||||||
TXNID xid = toku_txn_get_txnid(txn);
|
TXNID xid = toku_txn_get_txnid(txn);
|
||||||
txn_note_doing_work(txn);
|
|
||||||
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
if (txn && (brt->h->txnid_that_created_or_locked_when_empty != xid)) {
|
||||||
BYTESTRING keybs = {key->size, toku_memdup_in_rollback(txn, key->data, key->size)};
|
BYTESTRING keybs = {key->size, key->data};
|
||||||
BYTESTRING databs = {val->size, toku_memdup_in_rollback(txn, val->data, val->size)};
|
BYTESTRING databs = {val->size, val->data};
|
||||||
r = toku_logger_save_rollback_cmddeleteboth(txn, toku_cachefile_filenum(brt->cf), keybs, databs);
|
r = toku_logger_save_rollback_cmddeleteboth(txn, toku_cachefile_filenum(brt->cf), &keybs, &databs);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
r = toku_txn_note_brt(txn, brt);
|
r = toku_txn_note_brt(txn, brt);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
|
@ -5671,8 +5678,8 @@ int toku_brt_destroy(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
//Return TRUE if empty, FALSE if not empty.
|
//Return TRUE if empty, FALSE if not empty.
|
||||||
static BOOL
|
BOOL
|
||||||
brt_is_empty (BRT brt) {
|
toku_brt_is_empty (BRT brt) {
|
||||||
BRT_CURSOR cursor;
|
BRT_CURSOR cursor;
|
||||||
int r, r2;
|
int r, r2;
|
||||||
BOOL is_empty;
|
BOOL is_empty;
|
||||||
|
@ -5687,12 +5694,12 @@ brt_is_empty (BRT brt) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_brt_note_table_lock (BRT brt, TOKUTXN txn)
|
toku_brt_note_table_lock (BRT brt, TOKUTXN txn, BOOL ignore_not_empty) {
|
||||||
{
|
|
||||||
int r = 0;
|
int r = 0;
|
||||||
if (brt->h->txnid_that_created_or_locked_when_empty != toku_txn_get_txnid(txn) &&
|
if (brt->h->txnid_that_created_or_locked_when_empty != toku_txn_get_txnid(txn) &&
|
||||||
brt_is_empty(brt) &&
|
(ignore_not_empty || toku_brt_is_empty(brt)) &&
|
||||||
brt->h->txnid_that_created_or_locked_when_empty == 0) {
|
brt->h->txnid_that_created_or_locked_when_empty == TXNID_NONE)
|
||||||
|
{
|
||||||
brt->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(txn);
|
brt->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(txn);
|
||||||
r = toku_txn_note_brt(txn, brt);
|
r = toku_txn_note_brt(txn, brt);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
@ -5711,7 +5718,7 @@ LSN toku_brt_checkpoint_lsn(BRT brt) {
|
||||||
return brt->h->checkpoint_lsn;
|
return brt->h->checkpoint_lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int toku_brt_header_set_panic(struct brt_header *h, int panic, char *panic_string) {
|
int toku_brt_header_set_panic(struct brt_header *h, int panic, char *panic_string) {
|
||||||
if (h->panic == 0) {
|
if (h->panic == 0) {
|
||||||
h->panic = panic;
|
h->panic = panic;
|
||||||
if (h->panic_string)
|
if (h->panic_string)
|
||||||
|
@ -5743,7 +5750,7 @@ int toku_logger_log_fdelete (TOKUTXN txn, const char *fname, FILENUM filenum, u_
|
||||||
// Prepare to remove a dictionary from the database when this transaction is committed:
|
// Prepare to remove a dictionary from the database when this transaction is committed:
|
||||||
// - if cachetable has file open, mark it as in use so that cf remains valid until we're done
|
// - if cachetable has file open, mark it as in use so that cf remains valid until we're done
|
||||||
// - mark transaction as NEED fsync on commit
|
// - mark transaction as NEED fsync on commit
|
||||||
// - make entry in rolltmp log
|
// - make entry in rollback log
|
||||||
// - make fdelete entry in recovery log
|
// - make fdelete entry in recovery log
|
||||||
int toku_brt_remove_on_commit(TOKUTXN txn, DBT* iname_in_env_dbt_p) {
|
int toku_brt_remove_on_commit(TOKUTXN txn, DBT* iname_in_env_dbt_p) {
|
||||||
assert(txn);
|
assert(txn);
|
||||||
|
@ -5779,12 +5786,9 @@ int toku_brt_remove_on_commit(TOKUTXN txn, DBT* iname_in_env_dbt_p) {
|
||||||
toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log
|
toku_txn_force_fsync_on_commit(txn); //If the txn commits, the commit MUST be in the log
|
||||||
//before the file is actually unlinked
|
//before the file is actually unlinked
|
||||||
{
|
{
|
||||||
BYTESTRING iname_in_env_bs = {
|
BYTESTRING iname_in_env_bs = { .len=strlen(iname_in_env), .data = (char*)iname_in_env };
|
||||||
.len=strlen(iname_in_env),
|
// make entry in rollback log
|
||||||
.data = toku_strdup_in_rollback(txn, iname_in_env)
|
r = toku_logger_save_rollback_fdelete(txn, was_open, filenum, &iname_in_env_bs);
|
||||||
};
|
|
||||||
// make entry in rolltmp log
|
|
||||||
r = toku_logger_save_rollback_fdelete(txn, was_open, filenum, iname_in_env_bs);
|
|
||||||
assert(r==0); //On error we would need to remove the CF reference, which is complicated.
|
assert(r==0); //On error we would need to remove the CF reference, which is complicated.
|
||||||
}
|
}
|
||||||
if (r==0)
|
if (r==0)
|
||||||
|
@ -5794,7 +5798,7 @@ int toku_brt_remove_on_commit(TOKUTXN txn, DBT* iname_in_env_dbt_p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//
|
// Non-transaction version of fdelete
|
||||||
int toku_brt_remove_now(CACHETABLE ct, DBT* iname_in_env_dbt_p) {
|
int toku_brt_remove_now(CACHETABLE ct, DBT* iname_in_env_dbt_p) {
|
||||||
int r;
|
int r;
|
||||||
const char *iname_in_env = iname_in_env_dbt_p->data;
|
const char *iname_in_env = iname_in_env_dbt_p->data;
|
||||||
|
|
|
@ -52,7 +52,7 @@ int brt_set_cachetable(BRT, CACHETABLE);
|
||||||
int toku_brt_open(BRT, const char *fname_in_env,
|
int toku_brt_open(BRT, const char *fname_in_env,
|
||||||
int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, DB *db);
|
int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, DB *db);
|
||||||
int toku_brt_open_recovery(BRT, const char *fname_in_env,
|
int toku_brt_open_recovery(BRT, const char *fname_in_env,
|
||||||
int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, DB *db, int recovery_force_fcreate, FILENUM use_filenum);
|
int is_create, int only_create, CACHETABLE ct, TOKUTXN txn, DB *db, FILENUM use_filenum);
|
||||||
|
|
||||||
int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags);
|
int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags);
|
||||||
|
|
||||||
|
@ -206,12 +206,14 @@ void toku_maybe_truncate_cachefile (CACHEFILE cf, int fd, u_int64_t size_used);
|
||||||
int maybe_preallocate_in_file (int fd, u_int64_t size);
|
int maybe_preallocate_in_file (int fd, u_int64_t size);
|
||||||
// Effect: If file size is less than SIZE, make it bigger by either doubling it or growing by 16MB whichever is less.
|
// Effect: If file size is less than SIZE, make it bigger by either doubling it or growing by 16MB whichever is less.
|
||||||
|
|
||||||
int toku_brt_note_table_lock (BRT brt, TOKUTXN txn);
|
int toku_brt_note_table_lock (BRT brt, TOKUTXN txn, BOOL ignore_not_empty);
|
||||||
// Effect: Record the fact that the BRT has a table lock (and thus no other txn will modify it until this txn completes. As a result, we can limit the amount of information in the rollback data structure.
|
// Effect: Record the fact that the BRT has a table lock (and thus no other txn will modify it until this txn completes. As a result, we can limit the amount of information in the rollback data structure.
|
||||||
|
|
||||||
int toku_brt_zombie_needed (BRT brt);
|
int toku_brt_zombie_needed (BRT brt);
|
||||||
|
|
||||||
int toku_brt_get_fragmentation(BRT brt, TOKU_DB_FRAGMENTATION report);
|
int toku_brt_get_fragmentation(BRT brt, TOKU_DB_FRAGMENTATION report);
|
||||||
|
int toku_brt_header_set_panic(struct brt_header *h, int panic, char *panic_string);
|
||||||
|
BOOL toku_brt_is_empty (BRT brt);
|
||||||
|
|
||||||
double get_tdiff(void) __attribute__((__visibility__("default")));
|
double get_tdiff(void) __attribute__((__visibility__("default")));
|
||||||
|
|
||||||
|
|
|
@ -33,6 +33,7 @@ typedef u_int64_t TXNID;
|
||||||
#define TXNID_NONE ((TXNID)0)
|
#define TXNID_NONE ((TXNID)0)
|
||||||
|
|
||||||
typedef struct s_blocknum { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems.
|
typedef struct s_blocknum { int64_t b; } BLOCKNUM; // make a struct so that we will notice type problems.
|
||||||
|
#define ROLLBACK_NONE ((BLOCKNUM){0})
|
||||||
|
|
||||||
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
|
static inline BLOCKNUM make_blocknum(int64_t b) { BLOCKNUM result={b}; return result; }
|
||||||
|
|
||||||
|
@ -70,6 +71,7 @@ typedef enum __toku_bool { FALSE=0, TRUE=1} BOOL;
|
||||||
typedef struct tokulogger *TOKULOGGER;
|
typedef struct tokulogger *TOKULOGGER;
|
||||||
#define NULL_LOGGER ((TOKULOGGER)0)
|
#define NULL_LOGGER ((TOKULOGGER)0)
|
||||||
typedef struct tokutxn *TOKUTXN;
|
typedef struct tokutxn *TOKUTXN;
|
||||||
|
typedef struct txninfo *TXNINFO;
|
||||||
#define NULL_TXN ((TOKUTXN)0)
|
#define NULL_TXN ((TOKUTXN)0)
|
||||||
|
|
||||||
struct logged_btt_pair {
|
struct logged_btt_pair {
|
||||||
|
@ -121,5 +123,8 @@ typedef int (*generate_row_for_del_func)(DB *dest_db, DB *src_db, DBT *dest_val,
|
||||||
|
|
||||||
#define UU(x) x __attribute__((__unused__))
|
#define UU(x) x __attribute__((__unused__))
|
||||||
|
|
||||||
|
typedef struct memarena *MEMARENA;
|
||||||
|
typedef struct rollback_log_node *ROLLBACK_LOG_NODE;
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -100,8 +100,6 @@ struct ctpair {
|
||||||
PAIR next,prev; // In LRU list.
|
PAIR next,prev; // In LRU list.
|
||||||
PAIR hash_chain;
|
PAIR hash_chain;
|
||||||
|
|
||||||
LSN modified_lsn; // What was the LSN when modified (undefined if not dirty)
|
|
||||||
LSN written_lsn; // What was the LSN when written (we need to get this information when we fetch)
|
|
||||||
|
|
||||||
BOOL checkpoint_pending; // If this is on, then we have got to write the pair out to disk before modifying it.
|
BOOL checkpoint_pending; // If this is on, then we have got to write the pair out to disk before modifying it.
|
||||||
PAIR pending_next;
|
PAIR pending_next;
|
||||||
|
@ -155,6 +153,8 @@ struct cachetable {
|
||||||
struct workqueue wq; // async work queue
|
struct workqueue wq; // async work queue
|
||||||
THREADPOOL threadpool; // pool of worker threads
|
THREADPOOL threadpool; // pool of worker threads
|
||||||
LSN lsn_of_checkpoint_in_progress;
|
LSN lsn_of_checkpoint_in_progress;
|
||||||
|
u_int32_t checkpoint_num_files; // how many cachefiles are in the checkpoint
|
||||||
|
u_int32_t checkpoint_num_txns; // how many transactions are in the checkpoint
|
||||||
PAIR pending_head; // list of pairs marked with checkpoint_pending
|
PAIR pending_head; // list of pairs marked with checkpoint_pending
|
||||||
struct rwlock pending_lock; // multiple writer threads, single checkpoint thread
|
struct rwlock pending_lock; // multiple writer threads, single checkpoint thread
|
||||||
struct minicron checkpointer; // the periodic checkpointing thread
|
struct minicron checkpointer; // the periodic checkpointing thread
|
||||||
|
@ -165,7 +165,6 @@ struct cachetable {
|
||||||
BOOL set_env_dir; //Can only set env_dir once
|
BOOL set_env_dir; //Can only set env_dir once
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Lock the cachetable
|
// Lock the cachetable
|
||||||
static inline void cachefiles_lock(CACHETABLE ct) {
|
static inline void cachefiles_lock(CACHETABLE ct) {
|
||||||
int r = toku_pthread_mutex_lock(&ct->cachefiles_mutex); assert(r == 0);
|
int r = toku_pthread_mutex_lock(&ct->cachefiles_mutex); assert(r == 0);
|
||||||
|
@ -224,6 +223,7 @@ struct cachefile {
|
||||||
|
|
||||||
void *userdata;
|
void *userdata;
|
||||||
int (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
int (*log_fassociate_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log all open files.
|
||||||
|
int (*log_suppress_rollback_during_checkpoint)(CACHEFILE cf, void *userdata); // When starting a checkpoint we must log which files need rollbacks suppressed
|
||||||
int (*close_userdata)(CACHEFILE cf, int fd, void *userdata, char **error_string, BOOL lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
int (*close_userdata)(CACHEFILE cf, int fd, void *userdata, char **error_string, BOOL lsnvalid, LSN); // when closing the last reference to a cachefile, first call this function.
|
||||||
int (*begin_checkpoint_userdata)(CACHEFILE cf, int fd, LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
int (*begin_checkpoint_userdata)(CACHEFILE cf, int fd, LSN lsn_of_checkpoint, void *userdata); // before checkpointing cachefiles call this function.
|
||||||
int (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
int (*checkpoint_userdata)(CACHEFILE cf, int fd, void *userdata); // when checkpointing a cachefile, call this function.
|
||||||
|
@ -1239,8 +1239,7 @@ static PAIR cachetable_insert_at(CACHETABLE ct,
|
||||||
CACHETABLE_FLUSH_CALLBACK flush_callback,
|
CACHETABLE_FLUSH_CALLBACK flush_callback,
|
||||||
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
CACHETABLE_FETCH_CALLBACK fetch_callback,
|
||||||
void *extraargs,
|
void *extraargs,
|
||||||
enum cachetable_dirty dirty,
|
enum cachetable_dirty dirty) {
|
||||||
LSN written_lsn) {
|
|
||||||
TAGMALLOC(PAIR, p);
|
TAGMALLOC(PAIR, p);
|
||||||
assert(p);
|
assert(p);
|
||||||
memset(p, 0, sizeof *p);
|
memset(p, 0, sizeof *p);
|
||||||
|
@ -1255,8 +1254,6 @@ static PAIR cachetable_insert_at(CACHETABLE ct,
|
||||||
p->flush_callback = flush_callback;
|
p->flush_callback = flush_callback;
|
||||||
p->fetch_callback = fetch_callback;
|
p->fetch_callback = fetch_callback;
|
||||||
p->extraargs = extraargs;
|
p->extraargs = extraargs;
|
||||||
p->modified_lsn.lsn = 0;
|
|
||||||
p->written_lsn = written_lsn;
|
|
||||||
p->fullhash = fullhash;
|
p->fullhash = fullhash;
|
||||||
p->next = p->prev = 0;
|
p->next = p->prev = 0;
|
||||||
rwlock_init(&p->rwlock);
|
rwlock_init(&p->rwlock);
|
||||||
|
@ -1321,7 +1318,7 @@ int toku_cachetable_put(CACHEFILE cachefile, CACHEKEY key, u_int32_t fullhash, v
|
||||||
}
|
}
|
||||||
// flushing could change the table size, but wont' change the fullhash
|
// flushing could change the table size, but wont' change the fullhash
|
||||||
cachetable_puts++;
|
cachetable_puts++;
|
||||||
PAIR p = cachetable_insert_at(ct, cachefile, key, value, CTPAIR_IDLE, fullhash, size, flush_callback, fetch_callback, extraargs, CACHETABLE_DIRTY, ZERO_LSN);
|
PAIR p = cachetable_insert_at(ct, cachefile, key, value, CTPAIR_IDLE, fullhash, size, flush_callback, fetch_callback, extraargs, CACHETABLE_DIRTY);
|
||||||
assert(p);
|
assert(p);
|
||||||
rwlock_read_lock(&p->rwlock, ct->mutex);
|
rwlock_read_lock(&p->rwlock, ct->mutex);
|
||||||
note_hash_count(count);
|
note_hash_count(count);
|
||||||
|
@ -1465,7 +1462,7 @@ int toku_cachetable_get_and_pin(CACHEFILE cachefile, CACHEKEY key, u_int32_t ful
|
||||||
int r;
|
int r;
|
||||||
// Note. hashit(t,key) may have changed as a result of flushing. But fullhash won't have changed.
|
// Note. hashit(t,key) may have changed as a result of flushing. But fullhash won't have changed.
|
||||||
{
|
{
|
||||||
p = cachetable_insert_at(ct, cachefile, key, zero_value, CTPAIR_READING, fullhash, zero_size, flush_callback, fetch_callback, extraargs, CACHETABLE_CLEAN, ZERO_LSN);
|
p = cachetable_insert_at(ct, cachefile, key, zero_value, CTPAIR_READING, fullhash, zero_size, flush_callback, fetch_callback, extraargs, CACHETABLE_CLEAN);
|
||||||
assert(p);
|
assert(p);
|
||||||
get_and_pin_footprint = 10;
|
get_and_pin_footprint = 10;
|
||||||
rwlock_write_lock(&p->rwlock, ct->mutex);
|
rwlock_write_lock(&p->rwlock, ct->mutex);
|
||||||
|
@ -1619,7 +1616,7 @@ int toku_cachefile_prefetch(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash,
|
||||||
// if not found then create a pair in the READING state and fetch it
|
// if not found then create a pair in the READING state and fetch it
|
||||||
if (p == 0) {
|
if (p == 0) {
|
||||||
cachetable_prefetches++;
|
cachetable_prefetches++;
|
||||||
p = cachetable_insert_at(ct, cf, key, zero_value, CTPAIR_READING, fullhash, zero_size, flush_callback, fetch_callback, extraargs, CACHETABLE_CLEAN, ZERO_LSN);
|
p = cachetable_insert_at(ct, cf, key, zero_value, CTPAIR_READING, fullhash, zero_size, flush_callback, fetch_callback, extraargs, CACHETABLE_CLEAN);
|
||||||
assert(p);
|
assert(p);
|
||||||
rwlock_write_lock(&p->rwlock, ct->mutex);
|
rwlock_write_lock(&p->rwlock, ct->mutex);
|
||||||
#if DO_WORKER_THREAD
|
#if DO_WORKER_THREAD
|
||||||
|
@ -1906,18 +1903,53 @@ int toku_cachetable_unpin_and_remove (CACHEFILE cachefile, CACHEKEY key) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
log_open_txn (OMTVALUE txnv, u_int32_t UU(index), void *loggerv) {
|
set_filenum_in_array(OMTVALUE brtv, u_int32_t index, void*arrayv) {
|
||||||
TOKUTXN txn = txnv;
|
FILENUM *array = arrayv;
|
||||||
TOKULOGGER logger = loggerv;
|
BRT brt = brtv;
|
||||||
if (toku_logger_txn_parent(txn)==NULL) { // only have to log the open root transactions
|
array[index] = toku_cachefile_filenum(brt->cf);
|
||||||
int r = toku_log_xstillopen(logger, NULL, 0,
|
|
||||||
toku_txn_get_txnid(txn),
|
|
||||||
toku_txn_get_txnid(toku_logger_txn_parent(txn)));
|
|
||||||
assert(r==0);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
log_open_txn (OMTVALUE txnv, u_int32_t UU(index), void *UU(extra)) {
|
||||||
|
TOKUTXN txn = txnv;
|
||||||
|
TOKULOGGER logger = txn->logger;
|
||||||
|
FILENUMS open_filenums;
|
||||||
|
uint32_t num_filenums = toku_omt_size(txn->open_brts);
|
||||||
|
FILENUM array[num_filenums];
|
||||||
|
{
|
||||||
|
open_filenums.num = num_filenums;
|
||||||
|
open_filenums.filenums = array;
|
||||||
|
//Fill in open_filenums
|
||||||
|
int r = toku_omt_iterate(txn->open_brts, set_filenum_in_array, array);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
int r = toku_log_xstillopen(logger, NULL, 0,
|
||||||
|
toku_txn_get_txnid(txn),
|
||||||
|
toku_txn_get_txnid(toku_logger_txn_parent(txn)),
|
||||||
|
txn->rollentry_raw_count,
|
||||||
|
open_filenums,
|
||||||
|
txn->force_fsync_on_commit,
|
||||||
|
txn->num_rollback_nodes,
|
||||||
|
txn->num_rollentries,
|
||||||
|
txn->spilled_rollback_head,
|
||||||
|
txn->spilled_rollback_tail,
|
||||||
|
txn->current_rollback);
|
||||||
|
assert(r==0);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
unpin_rollback_log_for_checkpoint (OMTVALUE txnv, u_int32_t UU(index), void *UU(extra)) {
|
||||||
|
int r = 0;
|
||||||
|
TOKUTXN txn = txnv;
|
||||||
|
if (txn->pinned_inprogress_rollback_log) {
|
||||||
|
r = toku_rollback_log_unpin(txn, txn->pinned_inprogress_rollback_log);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: #1510 locking of cachetable is suspect
|
// TODO: #1510 locking of cachetable is suspect
|
||||||
// verify correct algorithm overall
|
// verify correct algorithm overall
|
||||||
|
|
||||||
|
@ -1931,7 +1963,17 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
|
||||||
|
|
||||||
{
|
{
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
if (logger) { // Unpin all 'inprogress rollback log nodes' pinned by transactions
|
||||||
|
int r = toku_omt_iterate(logger->live_txns,
|
||||||
|
unpin_rollback_log_for_checkpoint,
|
||||||
|
NULL);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
cachetable_lock(ct);
|
cachetable_lock(ct);
|
||||||
|
//Initialize accountability counters
|
||||||
|
ct->checkpoint_num_files = 0;
|
||||||
|
ct->checkpoint_num_txns = 0;
|
||||||
|
|
||||||
//Make list of cachefiles to be included in checkpoint.
|
//Make list of cachefiles to be included in checkpoint.
|
||||||
//If refcount is 0, the cachefile is closing (performing a local checkpoint)
|
//If refcount is 0, the cachefile is closing (performing a local checkpoint)
|
||||||
{
|
{
|
||||||
|
@ -1960,11 +2002,6 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
ct->lsn_of_checkpoint_in_progress = begin_lsn;
|
ct->lsn_of_checkpoint_in_progress = begin_lsn;
|
||||||
}
|
}
|
||||||
// Log all the open transactions
|
|
||||||
{
|
|
||||||
int r = toku_omt_iterate(logger->live_txns, log_open_txn, logger);
|
|
||||||
assert(r==0);
|
|
||||||
}
|
|
||||||
// Log all the open files
|
// Log all the open files
|
||||||
{
|
{
|
||||||
//Must loop through ALL open files (even if not included in checkpoint).
|
//Must loop through ALL open files (even if not included in checkpoint).
|
||||||
|
@ -1973,6 +2010,26 @@ toku_cachetable_begin_checkpoint (CACHETABLE ct, TOKULOGGER logger) {
|
||||||
for (cf = ct->cachefiles; cf; cf=cf->next) {
|
for (cf = ct->cachefiles; cf; cf=cf->next) {
|
||||||
if (cf->log_fassociate_during_checkpoint) {
|
if (cf->log_fassociate_during_checkpoint) {
|
||||||
int r = cf->log_fassociate_during_checkpoint(cf, cf->userdata);
|
int r = cf->log_fassociate_during_checkpoint(cf, cf->userdata);
|
||||||
|
ct->checkpoint_num_files++;
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cachefiles_unlock(ct);
|
||||||
|
}
|
||||||
|
// Log all the open transactions MUST BE AFTER OPEN FILES
|
||||||
|
{
|
||||||
|
ct->checkpoint_num_txns = toku_omt_size(logger->live_txns);
|
||||||
|
int r = toku_omt_iterate(logger->live_txns, log_open_txn, NULL);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
// Log rollback suppression for all the open files MUST BE AFTER TXNS
|
||||||
|
{
|
||||||
|
//Must loop through ALL open files (even if not included in checkpoint).
|
||||||
|
CACHEFILE cf;
|
||||||
|
cachefiles_lock(ct);
|
||||||
|
for (cf = ct->cachefiles; cf; cf=cf->next) {
|
||||||
|
if (cf->log_suppress_rollback_during_checkpoint) {
|
||||||
|
int r = cf->log_suppress_rollback_during_checkpoint(cf, cf->userdata);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2115,7 +2172,10 @@ toku_cachetable_end_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
if (logger) {
|
if (logger) {
|
||||||
int r = toku_log_end_checkpoint(logger, NULL,
|
int r = toku_log_end_checkpoint(logger, NULL,
|
||||||
1, // want the end_checkpoint to be fsync'd
|
1, // want the end_checkpoint to be fsync'd
|
||||||
ct->lsn_of_checkpoint_in_progress.lsn, 0);
|
ct->lsn_of_checkpoint_in_progress.lsn,
|
||||||
|
0,
|
||||||
|
ct->checkpoint_num_files,
|
||||||
|
ct->checkpoint_num_txns);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
toku_logger_note_checkpoint(logger, ct->lsn_of_checkpoint_in_progress);
|
toku_logger_note_checkpoint(logger, ct->lsn_of_checkpoint_in_progress);
|
||||||
}
|
}
|
||||||
|
@ -2262,6 +2322,7 @@ void
|
||||||
toku_cachefile_set_userdata (CACHEFILE cf,
|
toku_cachefile_set_userdata (CACHEFILE cf,
|
||||||
void *userdata,
|
void *userdata,
|
||||||
int (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
int (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
||||||
|
int (*log_suppress_rollback_during_checkpoint)(CACHEFILE, void*),
|
||||||
int (*close_userdata)(CACHEFILE, int, void*, char**, BOOL, LSN),
|
int (*close_userdata)(CACHEFILE, int, void*, char**, BOOL, LSN),
|
||||||
int (*checkpoint_userdata)(CACHEFILE, int, void*),
|
int (*checkpoint_userdata)(CACHEFILE, int, void*),
|
||||||
int (*begin_checkpoint_userdata)(CACHEFILE, int, LSN, void*),
|
int (*begin_checkpoint_userdata)(CACHEFILE, int, LSN, void*),
|
||||||
|
@ -2270,6 +2331,7 @@ toku_cachefile_set_userdata (CACHEFILE cf,
|
||||||
int (*note_unpin_by_checkpoint)(CACHEFILE, void*)) {
|
int (*note_unpin_by_checkpoint)(CACHEFILE, void*)) {
|
||||||
cf->userdata = userdata;
|
cf->userdata = userdata;
|
||||||
cf->log_fassociate_during_checkpoint = log_fassociate_during_checkpoint;
|
cf->log_fassociate_during_checkpoint = log_fassociate_during_checkpoint;
|
||||||
|
cf->log_suppress_rollback_during_checkpoint = log_suppress_rollback_during_checkpoint;
|
||||||
cf->close_userdata = close_userdata;
|
cf->close_userdata = close_userdata;
|
||||||
cf->checkpoint_userdata = checkpoint_userdata;
|
cf->checkpoint_userdata = checkpoint_userdata;
|
||||||
cf->begin_checkpoint_userdata = begin_checkpoint_userdata;
|
cf->begin_checkpoint_userdata = begin_checkpoint_userdata;
|
||||||
|
|
|
@ -123,6 +123,7 @@ typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, int fd, CACHEKEY key, u_int3
|
||||||
|
|
||||||
void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
|
void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata,
|
||||||
int (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
int (*log_fassociate_during_checkpoint)(CACHEFILE, void*),
|
||||||
|
int (*log_suppress_rollback_during_checkpoint)(CACHEFILE, void*),
|
||||||
int (*close_userdata)(CACHEFILE, int, void*, char **/*error_string*/, BOOL, LSN),
|
int (*close_userdata)(CACHEFILE, int, void*, char **/*error_string*/, BOOL, LSN),
|
||||||
int (*checkpoint_userdata)(CACHEFILE, int, void*),
|
int (*checkpoint_userdata)(CACHEFILE, int, void*),
|
||||||
int (*begin_checkpoint_userdata)(CACHEFILE, int, LSN, void*),
|
int (*begin_checkpoint_userdata)(CACHEFILE, int, LSN, void*),
|
||||||
|
|
|
@ -218,7 +218,6 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
checkpoint_footprint = 40;
|
checkpoint_footprint = 40;
|
||||||
time_last_checkpoint_begin = time(NULL);
|
time_last_checkpoint_begin = time(NULL);
|
||||||
r = toku_cachetable_begin_checkpoint(ct, logger);
|
r = toku_cachetable_begin_checkpoint(ct, logger);
|
||||||
LSN oldest_live_lsn = toku_logger_get_oldest_living_lsn(logger);
|
|
||||||
|
|
||||||
multi_operation_checkpoint_unlock();
|
multi_operation_checkpoint_unlock();
|
||||||
ydb_unlock();
|
ydb_unlock();
|
||||||
|
@ -230,7 +229,7 @@ toku_checkpoint(CACHETABLE ct, TOKULOGGER logger,
|
||||||
r = toku_cachetable_end_checkpoint(ct, logger, ydb_lock, ydb_unlock, callback2_f, extra2);
|
r = toku_cachetable_end_checkpoint(ct, logger, ydb_lock, ydb_unlock, callback2_f, extra2);
|
||||||
}
|
}
|
||||||
if (r==0 && logger) {
|
if (r==0 && logger) {
|
||||||
LSN trim_lsn = (oldest_live_lsn.lsn < logger->checkpoint_lsn.lsn) ? oldest_live_lsn : logger->checkpoint_lsn;
|
LSN trim_lsn = logger->last_completed_checkpoint_lsn;
|
||||||
r = toku_logger_maybe_trim_log(logger, trim_lsn);
|
r = toku_logger_maybe_trim_log(logger, trim_lsn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,7 @@ struct tokulogger {
|
||||||
// To access these, you must have the output condition lock.
|
// To access these, you must have the output condition lock.
|
||||||
LSN written_lsn; // the last lsn written
|
LSN written_lsn; // the last lsn written
|
||||||
LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held)
|
LSN fsynced_lsn; // What is the LSN of the highest fsynced log entry (accessed only while holding the output lock, and updated only when the output lock and output permission are held)
|
||||||
LSN checkpoint_lsn; // What is the LSN of the most recent completed checkpoint.
|
LSN last_completed_checkpoint_lsn; // What is the LSN of the most recent completed checkpoint.
|
||||||
long long next_log_file_number;
|
long long next_log_file_number;
|
||||||
struct logbuf outbuf; // data being written to the file
|
struct logbuf outbuf; // data being written to the file
|
||||||
int n_in_file; // The amount of data in the current file
|
int n_in_file; // The amount of data in the current file
|
||||||
|
@ -101,6 +101,7 @@ struct tokulogger {
|
||||||
u_int64_t swap_ctr; // how many times have input/output log buffers been swapped
|
u_int64_t swap_ctr; // how many times have input/output log buffers been swapped
|
||||||
void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ...
|
void (*remove_finalize_callback) (DICTIONARY_ID, void*); // ydb-level callback to be called when a transaction that ...
|
||||||
void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted.
|
void * remove_finalize_callback_extra; // ... deletes a file is committed or when one that creates a file is aborted.
|
||||||
|
CACHEFILE rollback_cachefile;
|
||||||
};
|
};
|
||||||
|
|
||||||
int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
|
int toku_logger_find_next_unused_log_file(const char *directory, long long *result);
|
||||||
|
@ -116,25 +117,36 @@ struct tokutxn {
|
||||||
u_int64_t txnid64; /* this happens to be the first lsn */
|
u_int64_t txnid64; /* this happens to be the first lsn */
|
||||||
TOKULOGGER logger;
|
TOKULOGGER logger;
|
||||||
TOKUTXN parent;
|
TOKUTXN parent;
|
||||||
LSN last_lsn; /* Everytime anything is logged, update the LSN. (We need to atomically record the LSN along with writing into the log.) */
|
|
||||||
LSN first_lsn; /* The first lsn in the transaction. */
|
|
||||||
struct roll_entry *oldest_logentry,*newest_logentry; /* Only logentries with rollbacks are here. There is a list going from newest to oldest. */
|
|
||||||
|
|
||||||
MEMARENA rollentry_arena;
|
|
||||||
|
|
||||||
size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
|
|
||||||
char *rollentry_filename;
|
|
||||||
int rollentry_fd; // If we spill the roll_entries, we write them into this fd.
|
|
||||||
toku_off_t rollentry_filesize; // How many bytes are in the rollentry file (this is the uncompressed bytes. If the file is compressed it may actually be smaller (or even larger with header information))
|
|
||||||
u_int64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children.
|
u_int64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children.
|
||||||
OMT open_brts; // a collection of the brts that we touched. Indexed by filenum.
|
OMT open_brts; // a collection of the brts that we touched. Indexed by filenum.
|
||||||
XIDS xids; //Represents the xid list
|
XIDS xids; //Represents the xid list
|
||||||
BOOL force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn)
|
BOOL force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn)
|
||||||
BOOL has_done_work; //If this transaction has not done work, there is no need to fsync.
|
|
||||||
TXN_PROGRESS_POLL_FUNCTION progress_poll_fun;
|
TXN_PROGRESS_POLL_FUNCTION progress_poll_fun;
|
||||||
void * progress_poll_fun_extra;
|
void * progress_poll_fun_extra;
|
||||||
|
uint64_t num_rollback_nodes;
|
||||||
uint64_t num_rollentries;
|
uint64_t num_rollentries;
|
||||||
uint64_t num_rollentries_processed;
|
uint64_t num_rollentries_processed;
|
||||||
|
BLOCKNUM spilled_rollback_head;
|
||||||
|
uint32_t spilled_rollback_head_hash;
|
||||||
|
BLOCKNUM spilled_rollback_tail;
|
||||||
|
uint32_t spilled_rollback_tail_hash;
|
||||||
|
BLOCKNUM current_rollback;
|
||||||
|
uint32_t current_rollback_hash;
|
||||||
|
BOOL recovered_from_checkpoint;
|
||||||
|
ROLLBACK_LOG_NODE pinned_inprogress_rollback_log;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct txninfo {
|
||||||
|
uint64_t rollentry_raw_count; // the total count of every byte in the transaction and all its children.
|
||||||
|
uint32_t num_brts;
|
||||||
|
BRT *open_brts;
|
||||||
|
BOOL force_fsync_on_commit; //This transaction NEEDS an fsync once (if) it commits. (commit means root txn)
|
||||||
|
uint64_t num_rollback_nodes;
|
||||||
|
uint64_t num_rollentries;
|
||||||
|
BLOCKNUM spilled_rollback_head;
|
||||||
|
BLOCKNUM spilled_rollback_tail;
|
||||||
|
BLOCKNUM current_rollback;
|
||||||
};
|
};
|
||||||
|
|
||||||
static inline int toku_logsizeof_u_int8_t (u_int32_t v __attribute__((__unused__))) {
|
static inline int toku_logsizeof_u_int8_t (u_int32_t v __attribute__((__unused__))) {
|
||||||
|
@ -180,5 +192,4 @@ static inline char *fixup_fname(BYTESTRING *f) {
|
||||||
return fname;
|
return fname;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_read_rollback_backwards(BREAD, struct roll_entry **item, MEMARENA);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -11,7 +11,6 @@
|
||||||
#include "../include/db.h"
|
#include "../include/db.h"
|
||||||
#include "brttypes.h"
|
#include "brttypes.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "bread.h"
|
|
||||||
#include "x1764.h"
|
#include "x1764.h"
|
||||||
|
|
||||||
typedef void(*voidfp)(void);
|
typedef void(*voidfp)(void);
|
||||||
|
|
|
@ -41,8 +41,6 @@ struct logtype {
|
||||||
|
|
||||||
// In the fields, don't mention the command, the LSN, the CRC or the trailing LEN.
|
// In the fields, don't mention the command, the LSN, the CRC or the trailing LEN.
|
||||||
|
|
||||||
int logformat_version_number = 0;
|
|
||||||
|
|
||||||
const struct logtype rollbacks[] = {
|
const struct logtype rollbacks[] = {
|
||||||
//TODO: #2037 Add dname
|
//TODO: #2037 Add dname
|
||||||
{"fdelete", 'U', FA{{"u_int8_t", "file_was_open", 0},
|
{"fdelete", 'U', FA{{"u_int8_t", "file_was_open", 0},
|
||||||
|
@ -72,7 +70,12 @@ const struct logtype rollbacks[] = {
|
||||||
{"FILENUM", "filenum", 0},
|
{"FILENUM", "filenum", 0},
|
||||||
{"BYTESTRING", "key", 0},
|
{"BYTESTRING", "key", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
{"rollinclude", 'r', FA{{"BYTESTRING", "fname", 0},
|
{"rollinclude", 'r', FA{{"TXNID", "xid", 0},
|
||||||
|
{"u_int64_t", "num_nodes", 0},
|
||||||
|
{"BLOCKNUM", "spilled_head", 0},
|
||||||
|
{"u_int32_t", "spilled_head_hash", 0},
|
||||||
|
{"BLOCKNUM", "spilled_tail", 0},
|
||||||
|
{"u_int32_t", "spilled_tail_hash", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
{"tablelock_on_empty_table", 'L', FA{{"FILENUM", "filenum", 0},
|
{"tablelock_on_empty_table", 'L', FA{{"FILENUM", "filenum", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
|
@ -82,46 +85,44 @@ const struct logtype rollbacks[] = {
|
||||||
{"dictionary_redirect", 'R', FA{{"FILENUM", "old_filenum", 0},
|
{"dictionary_redirect", 'R', FA{{"FILENUM", "old_filenum", 0},
|
||||||
{"FILENUM", "new_filenum", 0},
|
{"FILENUM", "new_filenum", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
// {"fclose", 'c', FA{{"FILENUM", "filenum", 0},
|
|
||||||
// {"BYTESTRING", "fname", 0},
|
|
||||||
// NULLFIELD}},
|
|
||||||
// {"deleteatleaf", 'd', FA{{"FILENUM", "filenum", 0}, // Note a delete for rollback. The delete takes place in a leaf.
|
|
||||||
// {"BYTESTRING", "key", 0},
|
|
||||||
// {"BYTESTRING", "data", 0},
|
|
||||||
// NULLFIELD}},
|
|
||||||
// {"insertatleaf", 'i', FA{{"FILENUM", "filenum", 0}, // Note an insert for rollback. The insert takes place in a leaf.
|
|
||||||
// {"BYTESTRING", "key", 0},
|
|
||||||
// {"BYTESTRING", "data", 0},
|
|
||||||
// NULLFIELD}},
|
|
||||||
// {"xactiontouchednonleaf", 'n', FA{{"FILENUM", "filenum", 0},
|
|
||||||
// {"DISKOFFARRAY", "parents", 0},
|
|
||||||
// {"DISKOFF", "diskoff", 0},
|
|
||||||
// NULLFIELD}},
|
|
||||||
{0,0,FA{NULLFIELD}}
|
{0,0,FA{NULLFIELD}}
|
||||||
};
|
};
|
||||||
|
|
||||||
const struct logtype logtypes[] = {
|
const struct logtype logtypes[] = {
|
||||||
// Records produced by checkpoints
|
// Records produced by checkpoints
|
||||||
{"begin_checkpoint", 'x', FA{{"u_int64_t", "timestamp", 0}, NULLFIELD}},
|
{"begin_checkpoint", 'x', FA{{"u_int64_t", "timestamp", 0}, NULLFIELD}},
|
||||||
{"end_checkpoint", 'X', FA{{"TXNID", "txnid", 0}, {"u_int64_t", "timestamp", 0}, NULLFIELD}}, // TXNID is LSN of begin_checkpoint
|
{"end_checkpoint", 'X', FA{{"TXNID", "xid", 0}, // xid is LSN of begin_checkpoint
|
||||||
|
{"u_int64_t", "timestamp", 0},
|
||||||
|
{"u_int32_t", "num_fassociate_entries", 0}, // how many files were checkpointed
|
||||||
|
{"u_int32_t", "num_xstillopen_entries", 0}, // how many txns were checkpointed
|
||||||
|
NULLFIELD}},
|
||||||
//TODO: #2037 Add dname
|
//TODO: #2037 Add dname
|
||||||
{"fassociate", 'f', FA{{"FILENUM", "filenum", 0},
|
{"fassociate", 'f', FA{{"FILENUM", "filenum", 0},
|
||||||
{"u_int32_t", "treeflags", 0},
|
{"u_int32_t", "treeflags", 0},
|
||||||
{"BYTESTRING", "iname", 0}, // pathname of file
|
{"BYTESTRING", "iname", 0}, // pathname of file
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
{"xstillopen", 's', FA{{"TXNID", "txnid", 0},
|
//We do not use a TXNINFO struct since recovery log has
|
||||||
{"TXNID", "parent", 0},
|
//FILENUMS and TOKUTXN has BRTs (for open_brts)
|
||||||
NULLFIELD}}, // only record root transactions
|
{"xstillopen", 's', FA{{"TXNID", "xid", 0},
|
||||||
|
{"TXNID", "parentxid", 0},
|
||||||
|
{"u_int64_t", "rollentry_raw_count", 0},
|
||||||
|
{"FILENUMS", "open_filenums", 0},
|
||||||
|
{"u_int8_t", "force_fsync_on_commit", 0},
|
||||||
|
{"u_int64_t", "num_rollback_nodes", 0},
|
||||||
|
{"u_int64_t", "num_rollentries", 0},
|
||||||
|
{"BLOCKNUM", "spilled_rollback_head", 0},
|
||||||
|
{"BLOCKNUM", "spilled_rollback_tail", 0},
|
||||||
|
{"BLOCKNUM", "current_rollback", 0},
|
||||||
|
NULLFIELD}}, // record all transactions
|
||||||
|
{"suppress_rollback", 'S', FA{{"FILENUM", "filenum", 0},
|
||||||
|
{"TXNID", "xid", 0},
|
||||||
|
NULLFIELD}},
|
||||||
// Records produced by transactions
|
// Records produced by transactions
|
||||||
{"commit", 'C', FA{{"TXNID", "txnid", 0},NULLFIELD}},
|
{"xbegin", 'b', FA{{"TXNID", "parentxid", 0},NULLFIELD}},
|
||||||
{"xabort", 'q', FA{{"TXNID", "txnid", 0},NULLFIELD}},
|
{"xcommit",'C', FA{{"TXNID", "xid", 0},NULLFIELD}},
|
||||||
{"xbegin", 'b', FA{{"TXNID", "parenttxnid", 0},NULLFIELD}},
|
{"xabort", 'q', FA{{"TXNID", "xid", 0},NULLFIELD}},
|
||||||
//TODO: #2037 Add dname
|
//TODO: #2037 Add dname
|
||||||
{"fdelete", 'U', FA{{"TXNID", "txnid", 0},
|
{"fcreate", 'F', FA{{"TXNID", "xid", 0},
|
||||||
{"BYTESTRING", "iname", 0},
|
|
||||||
NULLFIELD}},
|
|
||||||
//TODO: #2037 Add dname
|
|
||||||
{"fcreate", 'F', FA{{"TXNID", "txnid", 0},
|
|
||||||
{"FILENUM", "filenum", 0},
|
{"FILENUM", "filenum", 0},
|
||||||
{"BYTESTRING", "iname", 0},
|
{"BYTESTRING", "iname", 0},
|
||||||
{"u_int32_t", "mode", "0%o"},
|
{"u_int32_t", "mode", "0%o"},
|
||||||
|
@ -137,21 +138,24 @@ const struct logtype logtypes[] = {
|
||||||
//TODO: #2037 Add dname
|
//TODO: #2037 Add dname
|
||||||
{"fclose", 'e', FA{{"BYTESTRING", "iname", 0},
|
{"fclose", 'e', FA{{"BYTESTRING", "iname", 0},
|
||||||
{"FILENUM", "filenum", 0},
|
{"FILENUM", "filenum", 0},
|
||||||
{"u_int32_t", "treeflags", 0},
|
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
|
//TODO: #2037 Add dname
|
||||||
|
{"fdelete", 'U', FA{{"TXNID", "xid", 0},
|
||||||
|
{"BYTESTRING", "iname", 0},
|
||||||
|
NULLFIELD}},
|
||||||
{"tablelock_on_empty_table", 'L', FA{{"FILENUM", "filenum", 0},
|
{"tablelock_on_empty_table", 'L', FA{{"FILENUM", "filenum", 0},
|
||||||
{"TXNID", "xid", 0},
|
{"TXNID", "xid", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
|
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
|
||||||
|
{"TXNID", "xid", 0},
|
||||||
|
{"BYTESTRING", "key", 0},
|
||||||
|
{"BYTESTRING", "value", 0},
|
||||||
|
NULLFIELD}},
|
||||||
{"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0},
|
{"enq_insert_no_overwrite", 'i', FA{{"FILENUM", "filenum", 0},
|
||||||
{"TXNID", "xid", 0},
|
{"TXNID", "xid", 0},
|
||||||
{"BYTESTRING", "key", 0},
|
{"BYTESTRING", "key", 0},
|
||||||
{"BYTESTRING", "value", 0},
|
{"BYTESTRING", "value", 0},
|
||||||
NULLFIELD}},
|
NULLFIELD}},
|
||||||
{"enq_insert", 'I', FA{{"FILENUM", "filenum", 0},
|
|
||||||
{"TXNID", "xid", 0},
|
|
||||||
{"BYTESTRING", "key", 0},
|
|
||||||
{"BYTESTRING", "value", 0},
|
|
||||||
NULLFIELD}},
|
|
||||||
{"enq_delete_both", 'D', FA{{"FILENUM", "filenum", 0},
|
{"enq_delete_both", 'D', FA{{"FILENUM", "filenum", 0},
|
||||||
{"TXNID", "xid", 0},
|
{"TXNID", "xid", 0},
|
||||||
{"BYTESTRING", "key", 0},
|
{"BYTESTRING", "key", 0},
|
||||||
|
@ -277,11 +281,10 @@ generate_log_struct (void) {
|
||||||
|
|
||||||
fprintf(hf, "struct roll_entry {\n");
|
fprintf(hf, "struct roll_entry {\n");
|
||||||
fprintf(hf, " enum rt_cmd cmd;\n");
|
fprintf(hf, " enum rt_cmd cmd;\n");
|
||||||
|
fprintf(hf, " struct roll_entry *prev; /* for in-memory list of log entries. Threads from newest to oldest. */\n");
|
||||||
fprintf(hf, " union {\n");
|
fprintf(hf, " union {\n");
|
||||||
DO_ROLLBACKS(lt, fprintf(hf," struct rolltype_%s %s;\n", lt->name, lt->name));
|
DO_ROLLBACKS(lt, fprintf(hf," struct rolltype_%s %s;\n", lt->name, lt->name));
|
||||||
fprintf(hf, " } u;\n");
|
fprintf(hf, " } u;\n");
|
||||||
fprintf(hf, " struct roll_entry *prev; /* for in-memory list of log entries. Threads from newest to oldest. */\n");
|
|
||||||
fprintf(hf, " struct roll_entry *next; /* Points to a newer logentry. Needed for flushing to disk, since we want to write the oldest one first. */\n");
|
|
||||||
fprintf(hf, "};\n");
|
fprintf(hf, "};\n");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -532,47 +535,76 @@ static void
|
||||||
generate_rollbacks (void) {
|
generate_rollbacks (void) {
|
||||||
DO_ROLLBACKS(lt, {
|
DO_ROLLBACKS(lt, {
|
||||||
fprintf2(cf, hf, "int toku_logger_save_rollback_%s (TOKUTXN txn", lt->name);
|
fprintf2(cf, hf, "int toku_logger_save_rollback_%s (TOKUTXN txn", lt->name);
|
||||||
DO_FIELDS(ft, lt, fprintf2(cf, hf, ", %s %s", ft->type, ft->name));
|
DO_FIELDS(ft, lt, {
|
||||||
|
if ( strcmp(ft->type, "BYTESTRING") == 0 ) {
|
||||||
|
fprintf2(cf, hf, ", BYTESTRING *%s_ptr", ft->name);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf2(cf, hf, ", %s %s", ft->type, ft->name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
fprintf(hf, ");\n");
|
fprintf(hf, ");\n");
|
||||||
fprintf(cf, ") {\n");
|
fprintf(cf, ") {\n");
|
||||||
|
fprintf(cf, " int r;\n");
|
||||||
|
fprintf(cf, " ROLLBACK_LOG_NODE log;\n");
|
||||||
|
fprintf(cf, " r = toku_get_and_pin_rollback_log_for_new_entry(txn, &log);\n");
|
||||||
|
fprintf(cf, " assert(r==0);\n");
|
||||||
|
// 'memdup' all BYTESTRINGS here
|
||||||
|
DO_FIELDS(ft, lt, {
|
||||||
|
if ( strcmp(ft->type, "BYTESTRING") == 0 ) {
|
||||||
|
fprintf(cf, " BYTESTRING %s = {\n"
|
||||||
|
" .len = %s_ptr->len,\n"
|
||||||
|
" .data = toku_memdup_in_rollback(log, %s_ptr->data, %s_ptr->len)\n"
|
||||||
|
" };\n",
|
||||||
|
ft->name, ft->name, ft->name, ft->name);
|
||||||
|
}
|
||||||
|
});
|
||||||
{
|
{
|
||||||
int count=0;
|
int count=0;
|
||||||
fprintf(cf, " u_int32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name);
|
fprintf(cf, " u_int32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name);
|
||||||
DO_FIELDS(ft, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", ft->name));
|
DO_FIELDS(ft, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", ft->name));
|
||||||
fprintf(cf, ");\n");
|
fprintf(cf, ");\n");
|
||||||
}
|
}
|
||||||
fprintf(cf, " struct roll_entry *v = toku_malloc_in_rollback(txn, sizeof(*v));\n");
|
fprintf(cf, " struct roll_entry *v;\n");
|
||||||
|
fprintf(cf, " size_t mem_needed = sizeof(v->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
|
||||||
|
fprintf(cf, " v = toku_malloc_in_rollback(log, mem_needed);\n");
|
||||||
fprintf(cf, " if (v==0) return errno;\n");
|
fprintf(cf, " if (v==0) return errno;\n");
|
||||||
fprintf(cf, " v->cmd = (enum rt_cmd)%u;\n", lt->command_and_flags&0xff);
|
fprintf(cf, " v->cmd = (enum rt_cmd)%u;\n", lt->command_and_flags&0xff);
|
||||||
DO_FIELDS(ft, lt, fprintf(cf, " v->u.%s.%s = %s;\n", lt->name, ft->name, ft->name));
|
DO_FIELDS(ft, lt, fprintf(cf, " v->u.%s.%s = %s;\n", lt->name, ft->name, ft->name));
|
||||||
fprintf(cf, " v->prev = txn->newest_logentry;\n");
|
fprintf(cf, " v->prev = log->newest_logentry;\n");
|
||||||
fprintf(cf, " v->next = 0;\n");
|
fprintf(cf, " if (log->oldest_logentry==NULL) log->oldest_logentry=v;\n");
|
||||||
fprintf(cf, " if (txn->oldest_logentry==0) txn->oldest_logentry=v;\n");
|
fprintf(cf, " log->newest_logentry = v;\n");
|
||||||
fprintf(cf, " else txn->newest_logentry->next = v;\n");
|
fprintf(cf, " log->rollentry_resident_bytecount += rollback_fsize;\n");
|
||||||
fprintf(cf, " txn->newest_logentry = v;\n");
|
|
||||||
fprintf(cf, " txn->rollentry_resident_bytecount += rollback_fsize;\n");
|
|
||||||
fprintf(cf, " txn->rollentry_raw_count += rollback_fsize;\n");
|
fprintf(cf, " txn->rollentry_raw_count += rollback_fsize;\n");
|
||||||
fprintf(cf, " txn->num_rollentries++;\n");
|
fprintf(cf, " txn->num_rollentries++;\n");
|
||||||
fprintf(cf, " return toku_maybe_spill_rollbacks(txn);\n}\n");
|
fprintf(cf, " log->dirty = TRUE;\n");
|
||||||
|
fprintf(cf, " return toku_maybe_spill_rollbacks(txn, log);\n}\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
DO_ROLLBACKS(lt, {
|
DO_ROLLBACKS(lt, {
|
||||||
fprintf2(cf, hf, "void toku_logger_rollback_wbufwrite_%s (struct wbuf *wbuf", lt->name);
|
fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write_%s (struct wbuf *wbuf", lt->name);
|
||||||
DO_FIELDS(ft, lt, fprintf2(cf, hf, ", %s %s", ft->type, ft->name));
|
DO_FIELDS(ft, lt, fprintf2(cf, hf, ", %s %s", ft->type, ft->name));
|
||||||
fprintf2(cf, hf, ")");
|
fprintf2(cf, hf, ")");
|
||||||
fprintf(hf, ";\n");
|
fprintf(hf, ";\n");
|
||||||
fprintf(cf, " {\n");
|
fprintf(cf, " {\n");
|
||||||
fprintf(cf, " u_int32_t ndone_at_start = wbuf->ndone;\n");
|
|
||||||
fprintf(cf, " wbuf_char(wbuf, '%c');\n", (char)(0xff<->command_and_flags));
|
{
|
||||||
DO_FIELDS(ft, lt, fprintf(cf, " wbuf_%s(wbuf, %s);\n", ft->type, ft->name));
|
int count=0;
|
||||||
fprintf(cf, " wbuf_int(wbuf, 4+wbuf->ndone - ndone_at_start);\n");
|
fprintf(cf, " u_int32_t rollback_fsize = toku_logger_rollback_fsize_%s(", lt->name);
|
||||||
|
DO_FIELDS(ft, lt, fprintf(cf, "%s%s", (count++>0)?", ":"", ft->name));
|
||||||
|
fprintf(cf, ");\n");
|
||||||
|
fprintf(cf, " wbuf_nocrc_int(wbuf, rollback_fsize);\n");
|
||||||
|
}
|
||||||
|
fprintf(cf, " wbuf_nocrc_char(wbuf, '%c');\n", (char)(0xff<->command_and_flags));
|
||||||
|
DO_FIELDS(ft, lt, fprintf(cf, " wbuf_nocrc_%s(wbuf, %s);\n", ft->type, ft->name));
|
||||||
fprintf(cf, "}\n");
|
fprintf(cf, "}\n");
|
||||||
});
|
});
|
||||||
fprintf2(cf, hf, "void toku_logger_rollback_wbufwrite (struct wbuf *wbuf, struct roll_entry *r)");
|
fprintf2(cf, hf, "void toku_logger_rollback_wbuf_nocrc_write (struct wbuf *wbuf, struct roll_entry *r)");
|
||||||
fprintf(hf, ";\n");
|
fprintf(hf, ";\n");
|
||||||
fprintf(cf, " {\n switch (r->cmd) {\n");
|
fprintf(cf, " {\n switch (r->cmd) {\n");
|
||||||
DO_ROLLBACKS(lt, {
|
DO_ROLLBACKS(lt, {
|
||||||
fprintf(cf, " case RT_%s: toku_logger_rollback_wbufwrite_%s(wbuf", lt->name, lt->name);
|
fprintf(cf, " case RT_%s: toku_logger_rollback_wbuf_nocrc_write_%s(wbuf", lt->name, lt->name);
|
||||||
DO_FIELDS(ft, lt, fprintf(cf, ", r->u.%s.%s", lt->name, ft->name));
|
DO_FIELDS(ft, lt, fprintf(cf, ", r->u.%s.%s", lt->name, ft->name));
|
||||||
fprintf(cf, "); return;\n");
|
fprintf(cf, "); return;\n");
|
||||||
});
|
});
|
||||||
|
@ -604,12 +636,15 @@ generate_rollbacks (void) {
|
||||||
|
|
||||||
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, u_int32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)");
|
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, u_int32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)");
|
||||||
fprintf(hf, ";\n");
|
fprintf(hf, ";\n");
|
||||||
fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item = malloc_in_memarena(ma, sizeof(*item));\n item->cmd=(enum rt_cmd)(buf[0]);\n");
|
fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n");
|
||||||
fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n");
|
fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n");
|
||||||
fprintf(cf, " switch(item->cmd) {\n");
|
fprintf(cf, " switch(cmd) {\n");
|
||||||
DO_ROLLBACKS(lt, {
|
DO_ROLLBACKS(lt, {
|
||||||
fprintf(cf, " case RT_%s:\n", lt->name);
|
fprintf(cf, " case RT_%s:\n", lt->name);
|
||||||
DO_FIELDS(ft, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", ft->type, lt->name, ft->name));
|
fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
|
||||||
|
fprintf(cf, " item = malloc_in_memarena(ma, mem_needed);\n");
|
||||||
|
fprintf(cf, " item->cmd = cmd;\n");
|
||||||
|
DO_FIELDS(ft, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", ft->type, lt->name, ft->name));
|
||||||
fprintf(cf, " *itemp = item;\n");
|
fprintf(cf, " *itemp = item;\n");
|
||||||
fprintf(cf, " return 0;\n");
|
fprintf(cf, " return 0;\n");
|
||||||
});
|
});
|
||||||
|
|
113
newbrt/logger.c
113
newbrt/logger.c
|
@ -55,7 +55,7 @@ int toku_logger_create (TOKULOGGER *resultp) {
|
||||||
result->outbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN};
|
result->outbuf = (struct logbuf) {0, LOGGER_MIN_BUF_SIZE, toku_xmalloc(LOGGER_MIN_BUF_SIZE), ZERO_LSN};
|
||||||
// written_lsn is uninitialized
|
// written_lsn is uninitialized
|
||||||
// fsynced_lsn is uninitialized
|
// fsynced_lsn is uninitialized
|
||||||
result->checkpoint_lsn = ZERO_LSN;
|
result->last_completed_checkpoint_lsn = ZERO_LSN;
|
||||||
// next_log_file_number is uninitialized
|
// next_log_file_number is uninitialized
|
||||||
// n_in_file is uninitialized
|
// n_in_file is uninitialized
|
||||||
result->write_block_size = BRT_DEFAULT_NODE_SIZE; // default logging size is the same as the default brt block size
|
result->write_block_size = BRT_DEFAULT_NODE_SIZE; // default logging size is the same as the default brt block size
|
||||||
|
@ -68,6 +68,7 @@ int toku_logger_create (TOKULOGGER *resultp) {
|
||||||
result->input_lock_ctr = 0;
|
result->input_lock_ctr = 0;
|
||||||
result->output_condition_lock_ctr = 0;
|
result->output_condition_lock_ctr = 0;
|
||||||
result->swap_ctr = 0;
|
result->swap_ctr = 0;
|
||||||
|
result->rollback_cachefile = NULL;
|
||||||
result->output_is_available = TRUE;
|
result->output_is_available = TRUE;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -136,6 +137,68 @@ int toku_logger_open (const char *directory, TOKULOGGER logger) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create) {
|
||||||
|
assert(logger->is_open);
|
||||||
|
assert(!logger->is_panicked);
|
||||||
|
assert(!logger->rollback_cachefile);
|
||||||
|
|
||||||
|
int r;
|
||||||
|
BRT t = NULL; // Note, there is no DB associated with this BRT.
|
||||||
|
|
||||||
|
r = toku_brt_create(&t);
|
||||||
|
assert(r==0);
|
||||||
|
r = toku_brt_open(t, ROLLBACK_CACHEFILE_NAME, create, create, cachetable, NULL_TXN, NULL);
|
||||||
|
assert(r==0);
|
||||||
|
logger->rollback_cachefile = t->cf;
|
||||||
|
toku_brtheader_lock(t->h);
|
||||||
|
//Verify it is empty
|
||||||
|
assert(!t->h->panic);
|
||||||
|
//Must have no data blocks (rollback logs or otherwise).
|
||||||
|
toku_block_verify_no_data_blocks_except_root_unlocked(t->h->blocktable, t->h->root);
|
||||||
|
toku_brtheader_unlock(t->h);
|
||||||
|
assert(toku_brt_is_empty(t));
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Requires: Rollback cachefile can only be closed immediately after a checkpoint,
|
||||||
|
// so it will always be clean (!h->dirty) when about to be closed.
|
||||||
|
// Rollback log can only be closed when there are no open transactions,
|
||||||
|
// so it will always be empty (no data blocks) when about to be closed.
|
||||||
|
int
|
||||||
|
toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed) {
|
||||||
|
int r = 0;
|
||||||
|
CACHEFILE cf = logger->rollback_cachefile; // stored in logger at rollback cachefile open
|
||||||
|
if (!logger->is_panicked && cf) {
|
||||||
|
BRT brt_to_close;
|
||||||
|
{ //Find "brt"
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
||||||
|
toku_brtheader_lock(h);
|
||||||
|
if (!h->panic && recovery_failed) {
|
||||||
|
toku_brt_header_set_panic(h, EINVAL, "Recovery failed");
|
||||||
|
}
|
||||||
|
//Verify it is safe to close it.
|
||||||
|
if (!h->panic) { //If paniced, it is safe to close.
|
||||||
|
assert(!h->dirty); //Must not be dirty.
|
||||||
|
//Must have no data blocks (rollback logs or otherwise).
|
||||||
|
toku_block_verify_no_data_blocks_except_root_unlocked(h->blocktable, h->root);
|
||||||
|
}
|
||||||
|
assert(!toku_list_empty(&h->live_brts)); // there is always one brt associated with the header
|
||||||
|
brt_to_close = toku_list_struct(toku_list_head(&h->live_brts), struct brt, live_brt_link);
|
||||||
|
assert(brt_to_close);
|
||||||
|
toku_brtheader_unlock(h);
|
||||||
|
assert(toku_brt_is_empty(brt_to_close));
|
||||||
|
}
|
||||||
|
|
||||||
|
char *error_string_ignore = NULL;
|
||||||
|
r = toku_close_brt(brt_to_close, &error_string_ignore);
|
||||||
|
//Set as dealt with already.
|
||||||
|
logger->rollback_cachefile = NULL;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
// No locks held on entry
|
// No locks held on entry
|
||||||
// No locks held on exit.
|
// No locks held on exit.
|
||||||
// No locks are needed, since you cannot legally close the log concurrently with doing anything else.
|
// No locks are needed, since you cannot legally close the log concurrently with doing anything else.
|
||||||
|
@ -183,7 +246,8 @@ int toku_logger_shutdown(TOKULOGGER logger) {
|
||||||
if (logger->is_open) {
|
if (logger->is_open) {
|
||||||
if (toku_omt_size(logger->live_txns) == 0) {
|
if (toku_omt_size(logger->live_txns) == 0) {
|
||||||
BYTESTRING comment = { strlen("shutdown"), "shutdown" };
|
BYTESTRING comment = { strlen("shutdown"), "shutdown" };
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, comment);
|
int r2 = toku_log_comment(logger, NULL, TRUE, 0, comment);
|
||||||
|
if (!r) r = r2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
|
@ -787,6 +851,10 @@ int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, u_int32_t *le
|
||||||
return toku_fread_u_int64_t (f, &lsn->lsn, checksum, len);
|
return toku_fread_u_int64_t (f, &lsn->lsn, checksum, len);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *b, struct x1764 *checksum, u_int32_t *len) {
|
||||||
|
return toku_fread_u_int64_t (f, (u_int64_t*)&b->b, checksum, len);
|
||||||
|
}
|
||||||
|
|
||||||
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, u_int32_t *len) {
|
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, u_int32_t *len) {
|
||||||
return toku_fread_u_int32_t (f, &filenum->fileid, checksum, len);
|
return toku_fread_u_int32_t (f, &filenum->fileid, checksum, len);
|
||||||
}
|
}
|
||||||
|
@ -903,6 +971,11 @@ int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, stru
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format) {
|
||||||
|
return toku_logprint_u_int64_t(outf, inf, fieldname, checksum, len, format);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format) {
|
int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format) {
|
||||||
return toku_logprint_u_int32_t(outf, inf, fieldname, checksum, len, format);
|
return toku_logprint_u_int32_t(outf, inf, fieldname, checksum, len, format);
|
||||||
|
|
||||||
|
@ -982,11 +1055,6 @@ TXNID toku_txn_get_txnid (TOKUTXN txn) {
|
||||||
else return txn->txnid64;
|
else return txn->txnid64;
|
||||||
}
|
}
|
||||||
|
|
||||||
LSN toku_txn_get_last_lsn (TOKUTXN txn) {
|
|
||||||
if (txn==0) return (LSN){0};
|
|
||||||
return txn->last_lsn;
|
|
||||||
}
|
|
||||||
|
|
||||||
LSN toku_logger_last_lsn(TOKULOGGER logger) {
|
LSN toku_logger_last_lsn(TOKULOGGER logger) {
|
||||||
return logger->lsn;
|
return logger->lsn;
|
||||||
}
|
}
|
||||||
|
@ -1083,32 +1151,20 @@ int toku_logger_log_archive (TOKULOGGER logger, char ***logs_p, int flags) {
|
||||||
// get them into increasing order
|
// get them into increasing order
|
||||||
qsort(all_logs, all_n_logs, sizeof(all_logs[0]), logfilenamecompare);
|
qsort(all_logs, all_n_logs, sizeof(all_logs[0]), logfilenamecompare);
|
||||||
|
|
||||||
LSN oldest_live_txn_lsn;
|
LSN save_lsn = logger->last_completed_checkpoint_lsn;
|
||||||
{
|
|
||||||
TXNID oldest_living_xid = toku_logger_get_oldest_living_xid(logger);
|
|
||||||
if (oldest_living_xid == TXNID_NONE_LIVING)
|
|
||||||
oldest_live_txn_lsn = MAX_LSN;
|
|
||||||
else
|
|
||||||
oldest_live_txn_lsn.lsn = oldest_living_xid;
|
|
||||||
}
|
|
||||||
|
|
||||||
//printf("%s:%d Oldest txn is %lld\n", __FILE__, __LINE__, (long long)oldest_live_txn_lsn.lsn);
|
|
||||||
|
|
||||||
// Now starting at the last one, look for archivable ones.
|
// Now starting at the last one, look for archivable ones.
|
||||||
// Count the total number of bytes, because we have to return a single big array. (That's the BDB interface. Bleah...)
|
// Count the total number of bytes, because we have to return a single big array. (That's the BDB interface. Bleah...)
|
||||||
LSN earliest_lsn_in_logfile={(unsigned long long)(-1LL)};
|
LSN earliest_lsn_in_logfile={(unsigned long long)(-1LL)};
|
||||||
r = peek_at_log(logger, all_logs[all_n_logs-1], &earliest_lsn_in_logfile); // try to find the lsn that's in the most recent log
|
r = peek_at_log(logger, all_logs[all_n_logs-1], &earliest_lsn_in_logfile); // try to find the lsn that's in the most recent log
|
||||||
if ((earliest_lsn_in_logfile.lsn <= logger->checkpoint_lsn.lsn)&&
|
if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) {
|
||||||
(earliest_lsn_in_logfile.lsn <= oldest_live_txn_lsn.lsn)) {
|
|
||||||
i=all_n_logs-1;
|
i=all_n_logs-1;
|
||||||
} else {
|
} else {
|
||||||
for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log
|
for (i=all_n_logs-2; i>=0; i--) { // start at all_n_logs-2 because we never archive the most recent log
|
||||||
r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile);
|
r = peek_at_log(logger, all_logs[i], &earliest_lsn_in_logfile);
|
||||||
if (r!=0) continue; // In case of error, just keep going
|
if (r!=0) continue; // In case of error, just keep going
|
||||||
|
|
||||||
//printf("%s:%d file=%s firstlsn=%lld checkpoint_lsns={%lld %lld}\n", __FILE__, __LINE__, all_logs[i], (long long)earliest_lsn_in_logfile.lsn, (long long)logger->checkpoint_lsns[0].lsn, (long long)logger->checkpoint_lsns[1].lsn);
|
if (earliest_lsn_in_logfile.lsn <= save_lsn.lsn) {
|
||||||
if ((earliest_lsn_in_logfile.lsn <= logger->checkpoint_lsn.lsn)&&
|
|
||||||
(earliest_lsn_in_logfile.lsn <= oldest_live_txn_lsn.lsn)) {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1148,7 +1204,7 @@ TOKUTXN toku_logger_txn_parent (TOKUTXN txn) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) {
|
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn) {
|
||||||
logger->checkpoint_lsn = lsn;
|
logger->last_completed_checkpoint_lsn = lsn;
|
||||||
}
|
}
|
||||||
|
|
||||||
TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger) {
|
TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger) {
|
||||||
|
@ -1158,17 +1214,6 @@ TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger) {
|
||||||
return rval;
|
return rval;
|
||||||
}
|
}
|
||||||
|
|
||||||
LSN toku_logger_get_oldest_living_lsn(TOKULOGGER logger) {
|
|
||||||
LSN lsn = {0};
|
|
||||||
if (logger) {
|
|
||||||
if (logger->oldest_living_xid == TXNID_NONE_LIVING)
|
|
||||||
lsn = MAX_LSN;
|
|
||||||
else
|
|
||||||
lsn.lsn = logger->oldest_living_xid;
|
|
||||||
}
|
|
||||||
return lsn;
|
|
||||||
}
|
|
||||||
|
|
||||||
LSN
|
LSN
|
||||||
toku_logger_get_next_lsn(TOKULOGGER logger) {
|
toku_logger_get_next_lsn(TOKULOGGER logger) {
|
||||||
return logger->lsn;
|
return logger->lsn;
|
||||||
|
|
|
@ -5,12 +5,20 @@
|
||||||
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
|
#ident "Copyright (c) 2007, 2008, 2009 Tokutek Inc. All rights reserved."
|
||||||
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
||||||
|
|
||||||
enum { TOKU_LOG_VERSION = 1 };
|
enum {
|
||||||
|
TOKU_LOG_VERSION_1 = 1,
|
||||||
|
TOKU_LOG_VERSION_2 = 2,
|
||||||
|
TOKU_LOG_NEXT_VERSION, // the version after the current version
|
||||||
|
TOKU_LOG_VERSION = TOKU_LOG_NEXT_VERSION-1, // A hack so I don't have to change this line.
|
||||||
|
};
|
||||||
|
#define ROLLBACK_CACHEFILE_NAME "tokudb.rollback"
|
||||||
|
|
||||||
int toku_logger_create (TOKULOGGER *resultp);
|
int toku_logger_create (TOKULOGGER *resultp);
|
||||||
int toku_logger_open (const char *directory, TOKULOGGER logger);
|
int toku_logger_open (const char *directory, TOKULOGGER logger);
|
||||||
int toku_logger_shutdown(TOKULOGGER logger);
|
int toku_logger_shutdown(TOKULOGGER logger);
|
||||||
int toku_logger_close(TOKULOGGER *loggerp);
|
int toku_logger_close(TOKULOGGER *loggerp);
|
||||||
|
int toku_logger_open_rollback(TOKULOGGER logger, CACHETABLE cachetable, BOOL create);
|
||||||
|
int toku_logger_close_rollback(TOKULOGGER logger, BOOL recovery_failed);
|
||||||
|
|
||||||
int toku_logger_fsync (TOKULOGGER logger);
|
int toku_logger_fsync (TOKULOGGER logger);
|
||||||
void toku_logger_panic (TOKULOGGER logger, int err);
|
void toku_logger_panic (TOKULOGGER logger, int err);
|
||||||
|
@ -49,6 +57,7 @@ int toku_fread_u_int32_t_nocrclen (FILE *f, u_int32_t *v);
|
||||||
int toku_fread_u_int32_t (FILE *f, u_int32_t *v, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_u_int32_t (FILE *f, u_int32_t *v, struct x1764 *checksum, u_int32_t *len);
|
||||||
int toku_fread_u_int64_t (FILE *f, u_int64_t *v, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_u_int64_t (FILE *f, u_int64_t *v, struct x1764 *checksum, u_int32_t *len);
|
||||||
int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_LSN (FILE *f, LSN *lsn, struct x1764 *checksum, u_int32_t *len);
|
||||||
|
int toku_fread_BLOCKNUM (FILE *f, BLOCKNUM *lsn, struct x1764 *checksum, u_int32_t *len);
|
||||||
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct x1764 *checksum, u_int32_t *len);
|
||||||
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_TXNID (FILE *f, TXNID *txnid, struct x1764 *checksum, u_int32_t *len);
|
||||||
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, u_int32_t *len);
|
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct x1764 *checksum, u_int32_t *len);
|
||||||
|
@ -58,6 +67,7 @@ int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct x176
|
||||||
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
|
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
|
||||||
int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
||||||
int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
||||||
|
int toku_logprint_BLOCKNUM (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
||||||
int toku_logprint_u_int64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
int toku_logprint_u_int64_t (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format);
|
||||||
void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data);
|
void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data);
|
||||||
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
|
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct x1764 *checksum, u_int32_t *len, const char *format __attribute__((__unused__)));
|
||||||
|
@ -67,7 +77,6 @@ int toku_read_and_print_logmagic (FILE *f, u_int32_t *versionp);
|
||||||
int toku_read_logmagic (FILE *f, u_int32_t *versionp);
|
int toku_read_logmagic (FILE *f, u_int32_t *versionp);
|
||||||
|
|
||||||
TXNID toku_txn_get_txnid (TOKUTXN txn);
|
TXNID toku_txn_get_txnid (TOKUTXN txn);
|
||||||
LSN toku_txn_get_last_lsn (TOKUTXN txn);
|
|
||||||
LSN toku_logger_last_lsn(TOKULOGGER logger);
|
LSN toku_logger_last_lsn(TOKULOGGER logger);
|
||||||
TOKULOGGER toku_txn_logger (TOKUTXN txn);
|
TOKULOGGER toku_txn_logger (TOKUTXN txn);
|
||||||
|
|
||||||
|
@ -81,7 +90,6 @@ TOKUTXN toku_logger_txn_parent (TOKUTXN txn);
|
||||||
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn);
|
void toku_logger_note_checkpoint(TOKULOGGER logger, LSN lsn);
|
||||||
|
|
||||||
TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger);
|
TXNID toku_logger_get_oldest_living_xid(TOKULOGGER logger);
|
||||||
LSN toku_logger_get_oldest_living_lsn(TOKULOGGER logger);
|
|
||||||
LSN toku_logger_get_next_lsn(TOKULOGGER logger);
|
LSN toku_logger_get_next_lsn(TOKULOGGER logger);
|
||||||
void toku_logger_set_remove_finalize_callback(TOKULOGGER logger, void (*funcp)(DICTIONARY_ID, void *), void * extra);
|
void toku_logger_set_remove_finalize_callback(TOKULOGGER logger, void (*funcp)(DICTIONARY_ID, void *), void * extra);
|
||||||
void toku_logger_call_remove_finalize_callback(TOKULOGGER logger, DICTIONARY_ID dict_id);
|
void toku_logger_call_remove_finalize_callback(TOKULOGGER logger, DICTIONARY_ID dict_id);
|
||||||
|
|
|
@ -12,9 +12,9 @@ struct memarena {
|
||||||
int n_other_bufs;
|
int n_other_bufs;
|
||||||
};
|
};
|
||||||
|
|
||||||
MEMARENA memarena_create (void) {
|
MEMARENA memarena_create_presized (size_t initial_size) {
|
||||||
MEMARENA MALLOC(result); assert(result);
|
MEMARENA XMALLOC(result);
|
||||||
result->buf_size = 1024;
|
result->buf_size = initial_size;
|
||||||
result->buf_used = 0;
|
result->buf_used = 0;
|
||||||
result->other_bufs = NULL;
|
result->other_bufs = NULL;
|
||||||
result->size_of_other_bufs = 0;
|
result->size_of_other_bufs = 0;
|
||||||
|
@ -23,6 +23,10 @@ MEMARENA memarena_create (void) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MEMARENA memarena_create (void) {
|
||||||
|
return memarena_create_presized(1024);
|
||||||
|
}
|
||||||
|
|
||||||
void memarena_clear (MEMARENA ma) {
|
void memarena_clear (MEMARENA ma) {
|
||||||
// Free the other bufs.
|
// Free the other bufs.
|
||||||
int i;
|
int i;
|
||||||
|
|
|
@ -19,10 +19,11 @@
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
typedef struct memarena *MEMARENA;
|
MEMARENA memarena_create_presized (size_t initial_size);
|
||||||
|
// Effect: Create a memarena with initial size. In case of ENOMEM, aborts.
|
||||||
|
|
||||||
MEMARENA memarena_create (void);
|
MEMARENA memarena_create (void);
|
||||||
// Effect: Create a memarena. In case of ENOMEM, aborts.
|
// Effect: Create a memarena with default initial size. In case of ENOMEM, aborts.
|
||||||
|
|
||||||
void memarena_clear (MEMARENA ma);
|
void memarena_clear (MEMARENA ma);
|
||||||
// Effect: Reset the internal state so that the allocated memory can be used again.
|
// Effect: Reset the internal state so that the allocated memory can be used again.
|
||||||
|
|
|
@ -100,6 +100,18 @@ static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) {
|
||||||
BLOCKNUM result = make_blocknum(rbuf_longlong(r));
|
BLOCKNUM result = make_blocknum(rbuf_longlong(r));
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
|
||||||
|
*blocknum = rbuf_blocknum(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_u_int32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), u_int32_t *num) {
|
||||||
|
*num = rbuf_int(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void rbuf_ma_u_int64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), u_int64_t *num) {
|
||||||
|
*num = rbuf_ulonglong(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) {
|
static inline void rbuf_TXNID (struct rbuf *r, TXNID *txnid) {
|
||||||
*txnid = rbuf_ulonglong(r);
|
*txnid = rbuf_ulonglong(r);
|
||||||
|
@ -119,7 +131,7 @@ static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((_
|
||||||
static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
|
static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
|
||||||
bs->len = rbuf_int(r);
|
bs->len = rbuf_int(r);
|
||||||
u_int32_t newndone = r->ndone + bs->len;
|
u_int32_t newndone = r->ndone + bs->len;
|
||||||
assert(newndone < r->size);
|
assert(newndone <= r->size);
|
||||||
bs->data = toku_memdup(&r->buf[r->ndone], (size_t)bs->len);
|
bs->data = toku_memdup(&r->buf[r->ndone], (size_t)bs->len);
|
||||||
assert(bs->data);
|
assert(bs->data);
|
||||||
r->ndone = newndone;
|
r->ndone = newndone;
|
||||||
|
@ -128,7 +140,7 @@ static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
|
||||||
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) {
|
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) {
|
||||||
bs->len = rbuf_int(r);
|
bs->len = rbuf_int(r);
|
||||||
u_int32_t newndone = r->ndone + bs->len;
|
u_int32_t newndone = r->ndone + bs->len;
|
||||||
assert(newndone < r->size);
|
assert(newndone <= r->size);
|
||||||
bs->data = memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len);
|
bs->data = memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len);
|
||||||
assert(bs->data);
|
assert(bs->data);
|
||||||
r->ndone = newndone;
|
r->ndone = newndone;
|
||||||
|
|
1075
newbrt/recover.c
1075
newbrt/recover.c
File diff suppressed because it is too large
Load diff
|
@ -11,7 +11,6 @@
|
||||||
#include "../include/db.h"
|
#include "../include/db.h"
|
||||||
#include "brttypes.h"
|
#include "brttypes.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "bread.h"
|
|
||||||
#include "x1764.h"
|
#include "x1764.h"
|
||||||
|
|
||||||
// Run tokudb recovery from the log
|
// Run tokudb recovery from the log
|
||||||
|
@ -29,10 +28,6 @@ int tokudb_recover (const char *env_dir, const char *log_dir,
|
||||||
// Returns: TRUE if we need recovery, otherwise FALSE.
|
// Returns: TRUE if we need recovery, otherwise FALSE.
|
||||||
int tokudb_needs_recovery(const char *logdir, BOOL ignore_empty_log);
|
int tokudb_needs_recovery(const char *logdir, BOOL ignore_empty_log);
|
||||||
|
|
||||||
// Delete the rolltmp files
|
|
||||||
// Ruturns 0 if success
|
|
||||||
int tokudb_recover_delete_rolltmp_files(const char *datadir, const char *logdir);
|
|
||||||
|
|
||||||
// Return 0 if recovery log exists, ENOENT if log is missing
|
// Return 0 if recovery log exists, ENOENT if log is missing
|
||||||
int tokudb_recover_log_exists(const char * log_dir);
|
int tokudb_recover_log_exists(const char * log_dir);
|
||||||
|
|
||||||
|
|
390
newbrt/roll.c
390
newbrt/roll.c
|
@ -12,11 +12,11 @@
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_fdelete (u_int8_t file_was_open,
|
toku_commit_fdelete (u_int8_t file_was_open,
|
||||||
FILENUM filenum, // valid if file_was_open
|
FILENUM filenum, // valid if file_was_open
|
||||||
BYTESTRING bs_fname, // cwd/iname
|
BYTESTRING bs_fname, // cwd/iname
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void *UU(yield_v),
|
void *UU(yield_v),
|
||||||
LSN UU(oplsn)) //oplsn is the lsn of the commit
|
LSN UU(oplsn)) //oplsn is the lsn of the commit
|
||||||
{
|
{
|
||||||
//TODO: #2037 verify the file is (user) closed
|
//TODO: #2037 verify the file is (user) closed
|
||||||
|
@ -24,18 +24,23 @@ toku_commit_fdelete (u_int8_t file_was_open,
|
||||||
CACHEFILE cf;
|
CACHEFILE cf;
|
||||||
int r;
|
int r;
|
||||||
if (file_was_open) { // file was open when toku_brt_remove_on_commit() was called
|
if (file_was_open) { // file was open when toku_brt_remove_on_commit() was called
|
||||||
r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
||||||
assert(r == 0); // must still be open (toku_brt_remove_on_commit() incremented refcount)
|
if (r==ENOENT) { //Missing file on recovered transaction is not an error
|
||||||
{
|
assert(txn->recovered_from_checkpoint);
|
||||||
(void)toku_cachefile_get_and_pin_fd(cf);
|
r = 0;
|
||||||
assert(!toku_cachefile_is_dev_null_unlocked(cf));
|
goto done;
|
||||||
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
}
|
||||||
DICTIONARY_ID dict_id = h->dict_id;
|
assert(r == 0); // must still be open (toku_brt_remove_on_commit() incremented refcount)
|
||||||
toku_logger_call_remove_finalize_callback(txn->logger, dict_id);
|
{
|
||||||
|
(void)toku_cachefile_get_and_pin_fd(cf);
|
||||||
|
assert(!toku_cachefile_is_dev_null_unlocked(cf));
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
||||||
|
DICTIONARY_ID dict_id = h->dict_id;
|
||||||
|
toku_logger_call_remove_finalize_callback(txn->logger, dict_id);
|
||||||
toku_cachefile_unpin_fd(cf);
|
toku_cachefile_unpin_fd(cf);
|
||||||
}
|
}
|
||||||
r = toku_cachefile_redirect_nullfd(cf);
|
r = toku_cachefile_redirect_nullfd(cf);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
char *fname_in_env = fixup_fname(&bs_fname);
|
char *fname_in_env = fixup_fname(&bs_fname);
|
||||||
char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(txn->logger->ct, fname_in_env);
|
char *fname_in_cwd = toku_cachetable_get_fname_in_cwd(txn->logger->ct, fname_in_env);
|
||||||
|
@ -44,16 +49,17 @@ toku_commit_fdelete (u_int8_t file_was_open,
|
||||||
assert(r==0 || errno==ENOENT);
|
assert(r==0 || errno==ENOENT);
|
||||||
toku_free(fname_in_env);
|
toku_free(fname_in_env);
|
||||||
toku_free(fname_in_cwd);
|
toku_free(fname_in_cwd);
|
||||||
|
done:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_fdelete (u_int8_t UU(file_was_open),
|
toku_rollback_fdelete (u_int8_t UU(file_was_open),
|
||||||
FILENUM UU(filenum),
|
FILENUM UU(filenum),
|
||||||
BYTESTRING UU(bs_fname),
|
BYTESTRING UU(bs_fname),
|
||||||
TOKUTXN UU(txn),
|
TOKUTXN UU(txn),
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void* UU(yield_v),
|
void* UU(yield_v),
|
||||||
LSN UU(oplsn)) //oplsn is the lsn of the abort
|
LSN UU(oplsn)) //oplsn is the lsn of the abort
|
||||||
{
|
{
|
||||||
//Rolling back an fdelete is an no-op.
|
//Rolling back an fdelete is an no-op.
|
||||||
|
@ -62,10 +68,10 @@ toku_rollback_fdelete (u_int8_t UU(file_was_open),
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_fcreate (FILENUM UU(filenum),
|
toku_commit_fcreate (FILENUM UU(filenum),
|
||||||
BYTESTRING UU(bs_fname),
|
BYTESTRING UU(bs_fname),
|
||||||
TOKUTXN UU(txn),
|
TOKUTXN UU(txn),
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void *UU(yield_v),
|
void *UU(yield_v),
|
||||||
LSN UU(oplsn))
|
LSN UU(oplsn))
|
||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -73,10 +79,10 @@ toku_commit_fcreate (FILENUM UU(filenum),
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_fcreate (FILENUM filenum,
|
toku_rollback_fcreate (FILENUM filenum,
|
||||||
BYTESTRING bs_fname, // cwd/iname
|
BYTESTRING bs_fname, // cwd/iname
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void* UU(yield_v),
|
void* UU(yield_v),
|
||||||
LSN UU(oplsn))
|
LSN UU(oplsn))
|
||||||
{
|
{
|
||||||
//TODO: #2037 verify the file is (user) closed
|
//TODO: #2037 verify the file is (user) closed
|
||||||
|
@ -84,13 +90,18 @@ toku_rollback_fcreate (FILENUM filenum,
|
||||||
//Remove reference to the fd in the cachetable
|
//Remove reference to the fd in the cachetable
|
||||||
CACHEFILE cf = NULL;
|
CACHEFILE cf = NULL;
|
||||||
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
||||||
|
if (r==ENOENT) { //Missing file on recovered transaction is not an error
|
||||||
|
assert(txn->recovered_from_checkpoint);
|
||||||
|
r = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
{
|
{
|
||||||
(void)toku_cachefile_get_and_pin_fd(cf);
|
(void)toku_cachefile_get_and_pin_fd(cf);
|
||||||
assert(!toku_cachefile_is_dev_null_unlocked(cf));
|
assert(!toku_cachefile_is_dev_null_unlocked(cf));
|
||||||
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
||||||
DICTIONARY_ID dict_id = h->dict_id;
|
DICTIONARY_ID dict_id = h->dict_id;
|
||||||
toku_logger_call_remove_finalize_callback(txn->logger, dict_id);
|
toku_logger_call_remove_finalize_callback(txn->logger, dict_id);
|
||||||
toku_cachefile_unpin_fd(cf);
|
toku_cachefile_unpin_fd(cf);
|
||||||
}
|
}
|
||||||
r = toku_cachefile_redirect_nullfd(cf);
|
r = toku_cachefile_redirect_nullfd(cf);
|
||||||
|
@ -103,6 +114,7 @@ toku_rollback_fcreate (FILENUM filenum,
|
||||||
assert(r==0 || errno==ENOENT);
|
assert(r==0 || errno==ENOENT);
|
||||||
toku_free(fname_in_env);
|
toku_free(fname_in_env);
|
||||||
toku_free(fname_in_cwd);
|
toku_free(fname_in_cwd);
|
||||||
|
done:
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,6 +131,11 @@ static int do_insertion (enum brt_msg_type type, FILENUM filenum, BYTESTRING key
|
||||||
CACHEFILE cf;
|
CACHEFILE cf;
|
||||||
//printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
|
//printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
|
||||||
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
||||||
|
if (r==ENOENT) { //Missing file on recovered transaction is not an error
|
||||||
|
assert(txn->recovered_from_checkpoint);
|
||||||
|
r = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
|
||||||
(void)toku_cachefile_get_and_pin_fd(cf);
|
(void)toku_cachefile_get_and_pin_fd(cf);
|
||||||
|
@ -146,6 +163,7 @@ static int do_insertion (enum brt_msg_type type, FILENUM filenum, BYTESTRING key
|
||||||
}
|
}
|
||||||
cleanup:
|
cleanup:
|
||||||
toku_cachefile_unpin_fd(cf);
|
toku_cachefile_unpin_fd(cf);
|
||||||
|
done:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -166,11 +184,11 @@ int toku_commit_cmdinsert (FILENUM filenum, BYTESTRING key, TOKUTXN txn, YIELDF
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_cmdinsertboth (FILENUM filenum,
|
toku_commit_cmdinsertboth (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
BYTESTRING data,
|
BYTESTRING data,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
#if TOKU_DO_COMMIT_CMD_INSERT
|
#if TOKU_DO_COMMIT_CMD_INSERT
|
||||||
|
@ -183,10 +201,10 @@ toku_commit_cmdinsertboth (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_cmdinsert (FILENUM filenum,
|
toku_rollback_cmdinsert (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
return do_insertion (BRT_ABORT_ANY, filenum, key, 0, txn, oplsn);
|
return do_insertion (BRT_ABORT_ANY, filenum, key, 0, txn, oplsn);
|
||||||
|
@ -194,11 +212,11 @@ toku_rollback_cmdinsert (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_cmdinsertboth (FILENUM filenum,
|
toku_rollback_cmdinsertboth (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
BYTESTRING data,
|
BYTESTRING data,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
return do_insertion (BRT_ABORT_BOTH, filenum, key, &data, txn, oplsn);
|
return do_insertion (BRT_ABORT_BOTH, filenum, key, &data, txn, oplsn);
|
||||||
|
@ -206,11 +224,11 @@ toku_rollback_cmdinsertboth (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_cmddeleteboth (FILENUM filenum,
|
toku_commit_cmddeleteboth (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
BYTESTRING data,
|
BYTESTRING data,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
#if TOKU_DO_COMMIT_CMD_DELETE_BOTH
|
#if TOKU_DO_COMMIT_CMD_DELETE_BOTH
|
||||||
|
@ -223,11 +241,11 @@ toku_commit_cmddeleteboth (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_cmddeleteboth (FILENUM filenum,
|
toku_rollback_cmddeleteboth (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
BYTESTRING data,
|
BYTESTRING data,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
return do_insertion (BRT_ABORT_BOTH, filenum, key, &data, txn, oplsn);
|
return do_insertion (BRT_ABORT_BOTH, filenum, key, &data, txn, oplsn);
|
||||||
|
@ -235,10 +253,10 @@ toku_rollback_cmddeleteboth (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_cmddelete (FILENUM filenum,
|
toku_commit_cmddelete (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
#if TOKU_DO_COMMIT_CMD_DELETE
|
#if TOKU_DO_COMMIT_CMD_DELETE
|
||||||
|
@ -251,113 +269,114 @@ toku_commit_cmddelete (FILENUM filenum,
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_cmddelete (FILENUM filenum,
|
toku_rollback_cmddelete (FILENUM filenum,
|
||||||
BYTESTRING key,
|
BYTESTRING key,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yieldv),
|
void * UU(yieldv),
|
||||||
LSN oplsn)
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
return do_insertion (BRT_ABORT_ANY, filenum, key, 0, txn, oplsn);
|
return do_insertion (BRT_ABORT_ANY, filenum, key, 0, txn, oplsn);
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
static int
|
||||||
toku_commit_fileentries (int fd,
|
toku_apply_rollinclude (TXNID xid,
|
||||||
TOKUTXN txn,
|
uint64_t num_nodes,
|
||||||
YIELDF yield,
|
BLOCKNUM spilled_head,
|
||||||
void * yieldv,
|
uint32_t spilled_head_hash,
|
||||||
LSN oplsn)
|
BLOCKNUM spilled_tail,
|
||||||
{
|
uint32_t spilled_tail_hash,
|
||||||
BREAD f = create_bread_from_fd_initialize_at(fd);
|
TOKUTXN txn,
|
||||||
int r=0;
|
YIELDF yield,
|
||||||
MEMARENA ma = memarena_create();
|
void * yieldv,
|
||||||
|
LSN oplsn,
|
||||||
|
apply_rollback_item func) {
|
||||||
|
int r;
|
||||||
|
struct roll_entry *item;
|
||||||
int count=0;
|
int count=0;
|
||||||
while (bread_has_more(f)) {
|
|
||||||
struct roll_entry *item;
|
BLOCKNUM next_log = spilled_tail;
|
||||||
r = toku_read_rollback_backwards(f, &item, ma);
|
uint32_t next_log_hash = spilled_tail_hash;
|
||||||
if (r!=0) goto finish;
|
uint64_t last_sequence = num_nodes;
|
||||||
r = toku_commit_rollback_item(txn, item, yield, yieldv, oplsn);
|
|
||||||
if (r!=0) goto finish;
|
BOOL found_head = FALSE;
|
||||||
memarena_clear(ma);
|
assert(next_log.b != ROLLBACK_NONE.b);
|
||||||
count++;
|
while (next_log.b != ROLLBACK_NONE.b) {
|
||||||
if (count%2==0) yield(NULL, yieldv);
|
ROLLBACK_LOG_NODE log;
|
||||||
|
//pin log
|
||||||
|
r = toku_get_and_pin_rollback_log(txn, xid, last_sequence - 1, next_log, next_log_hash, &log);
|
||||||
|
assert(r==0);
|
||||||
|
last_sequence = log->sequence;
|
||||||
|
|
||||||
|
while ((item=log->newest_logentry)) {
|
||||||
|
log->newest_logentry = item->prev;
|
||||||
|
r = func(txn, item, yield, yieldv, oplsn);
|
||||||
|
if (r!=0) return r;
|
||||||
|
count++;
|
||||||
|
if (count%2 == 0) yield(NULL, yieldv);
|
||||||
|
}
|
||||||
|
if (next_log.b == spilled_head.b) {
|
||||||
|
assert(!found_head);
|
||||||
|
found_head = TRUE;
|
||||||
|
assert(log->sequence == 0);
|
||||||
|
}
|
||||||
|
next_log = log->older;
|
||||||
|
next_log_hash = log->older_hash;
|
||||||
|
{
|
||||||
|
//Clean up transaction structure to prevent
|
||||||
|
//toku_txn_close from double-freeing
|
||||||
|
spilled_tail = next_log;
|
||||||
|
spilled_tail_hash = next_log_hash;
|
||||||
|
if (found_head) {
|
||||||
|
assert(next_log.b == ROLLBACK_NONE.b);
|
||||||
|
spilled_head = next_log;
|
||||||
|
spilled_head_hash = next_log_hash;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Unpins log
|
||||||
|
r = toku_delete_rollback_log(txn, log);
|
||||||
|
assert(r==0);
|
||||||
}
|
}
|
||||||
finish:
|
|
||||||
{ int r2 = close_bread_without_closing_fd(f); assert(r2==0); }
|
|
||||||
memarena_close(&ma);
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_fileentries (int fd,
|
toku_commit_rollinclude (TXNID xid,
|
||||||
TOKUTXN txn,
|
uint64_t num_nodes,
|
||||||
YIELDF yield,
|
BLOCKNUM spilled_head,
|
||||||
void * yieldv,
|
uint32_t spilled_head_hash,
|
||||||
LSN oplsn)
|
BLOCKNUM spilled_tail,
|
||||||
{
|
uint32_t spilled_tail_hash,
|
||||||
BREAD f = create_bread_from_fd_initialize_at(fd);
|
TOKUTXN txn,
|
||||||
assert(f);
|
YIELDF yield,
|
||||||
int r=0;
|
void * yieldv,
|
||||||
MEMARENA ma = memarena_create();
|
|
||||||
int count=0;
|
|
||||||
while (bread_has_more(f)) {
|
|
||||||
struct roll_entry *item;
|
|
||||||
r = toku_read_rollback_backwards(f, &item, ma);
|
|
||||||
if (r!=0) goto finish;
|
|
||||||
r = toku_abort_rollback_item(txn, item, yield, yieldv, oplsn);
|
|
||||||
if (r!=0) goto finish;
|
|
||||||
memarena_clear(ma);
|
|
||||||
count++;
|
|
||||||
if (count%2==0) yield(NULL, yieldv);
|
|
||||||
}
|
|
||||||
finish:
|
|
||||||
{ int r2 = close_bread_without_closing_fd(f); assert(r2==0); }
|
|
||||||
memarena_close(&ma);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
toku_commit_rollinclude (BYTESTRING bs,
|
|
||||||
TOKUTXN txn,
|
|
||||||
YIELDF yield,
|
|
||||||
void * yieldv,
|
|
||||||
LSN oplsn) {
|
LSN oplsn) {
|
||||||
int r;
|
int r;
|
||||||
char *fname_in_logger = fixup_fname(&bs);
|
r = toku_apply_rollinclude(xid, num_nodes,
|
||||||
char *fname_in_cwd = toku_construct_full_name(2, txn->logger->directory, fname_in_logger);
|
spilled_head, spilled_head_hash,
|
||||||
int fd = open(fname_in_cwd, O_RDONLY+O_BINARY);
|
spilled_tail, spilled_tail_hash,
|
||||||
assert(fd>=0);
|
txn, yield, yieldv, oplsn,
|
||||||
r = toku_commit_fileentries(fd, txn, yield, yieldv, oplsn);
|
toku_commit_rollback_item);
|
||||||
assert(r==0);
|
return r;
|
||||||
r = close(fd);
|
|
||||||
assert(r==0);
|
|
||||||
r = unlink(fname_in_cwd);
|
|
||||||
assert(r==0);
|
|
||||||
toku_free(fname_in_logger);
|
|
||||||
toku_free(fname_in_cwd);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_rollinclude (BYTESTRING bs,
|
toku_rollback_rollinclude (TXNID xid,
|
||||||
TOKUTXN txn,
|
uint64_t num_nodes,
|
||||||
YIELDF yield,
|
BLOCKNUM spilled_head,
|
||||||
void * yieldv,
|
uint32_t spilled_head_hash,
|
||||||
LSN oplsn)
|
BLOCKNUM spilled_tail,
|
||||||
{
|
uint32_t spilled_tail_hash,
|
||||||
|
TOKUTXN txn,
|
||||||
|
YIELDF yield,
|
||||||
|
void * yieldv,
|
||||||
|
LSN oplsn) {
|
||||||
int r;
|
int r;
|
||||||
char *fname_in_logger = fixup_fname(&bs);
|
r = toku_apply_rollinclude(xid, num_nodes,
|
||||||
char *fname_in_cwd = toku_construct_full_name(2, txn->logger->directory, fname_in_logger);
|
spilled_head, spilled_head_hash,
|
||||||
int fd = open(fname_in_cwd, O_RDONLY+O_BINARY);
|
spilled_tail, spilled_tail_hash,
|
||||||
assert(fd>=0);
|
txn, yield, yieldv, oplsn,
|
||||||
r = toku_rollback_fileentries(fd, txn, yield, yieldv, oplsn);
|
toku_abort_rollback_item);
|
||||||
assert(r==0);
|
return r;
|
||||||
r = close(fd);
|
|
||||||
assert(r==0);
|
|
||||||
r = unlink(fname_in_cwd);
|
|
||||||
assert(r==0);
|
|
||||||
toku_free(fname_in_logger);
|
|
||||||
toku_free(fname_in_cwd);
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
|
@ -365,7 +384,7 @@ toku_rollback_tablelock_on_empty_table (FILENUM filenum,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF yield,
|
YIELDF yield,
|
||||||
void* yield_v,
|
void* yield_v,
|
||||||
LSN UU(oplsn))
|
LSN oplsn)
|
||||||
{
|
{
|
||||||
//TODO: Replace truncate function with something that doesn't need to mess with checkpoints.
|
//TODO: Replace truncate function with something that doesn't need to mess with checkpoints.
|
||||||
// on rollback we have to make the file be empty, since we locked an empty table, and then may have done things to it.
|
// on rollback we have to make the file be empty, since we locked an empty table, and then may have done things to it.
|
||||||
|
@ -373,22 +392,33 @@ toku_rollback_tablelock_on_empty_table (FILENUM filenum,
|
||||||
CACHEFILE cf;
|
CACHEFILE cf;
|
||||||
//printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
|
//printf("%s:%d committing insert %s %s\n", __FILE__, __LINE__, key.data, data.data);
|
||||||
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
int r = toku_cachefile_of_filenum(txn->logger->ct, filenum, &cf);
|
||||||
|
if (r==ENOENT) { //Missing file on recovered transaction is not an error
|
||||||
|
assert(txn->recovered_from_checkpoint);
|
||||||
|
r = 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
|
||||||
OMTVALUE brtv=NULL;
|
OMTVALUE brtv=NULL;
|
||||||
r = toku_omt_find_zero(txn->open_brts, find_brt_from_filenum, &filenum, &brtv, NULL, NULL);
|
r = toku_omt_find_zero(txn->open_brts, find_brt_from_filenum, &filenum, &brtv, NULL, NULL);
|
||||||
if (r==0) {
|
assert(r==0);
|
||||||
// If r!=0 it could be because we grabbed a log on an empty table that doesn't even exist, and we never put anything into it.
|
BRT brt = brtv;
|
||||||
// So, just don't do anything in this case.
|
{ //Do NOT truncate the file if
|
||||||
BRT brt = brtv;
|
//the file already survived the truncate and was checkpointed.
|
||||||
toku_poll_txn_progress_function(txn, FALSE, TRUE);
|
LSN treelsn = toku_brt_checkpoint_lsn(brt);
|
||||||
yield(toku_checkpoint_safe_client_lock, yield_v);
|
if (oplsn.lsn != 0 && oplsn.lsn <= treelsn.lsn) {
|
||||||
toku_poll_txn_progress_function(txn, FALSE, FALSE);
|
r = 0;
|
||||||
r = toku_brt_truncate(brt);
|
goto done;
|
||||||
assert(r==0);
|
}
|
||||||
toku_checkpoint_safe_client_unlock();
|
|
||||||
}
|
}
|
||||||
|
toku_poll_txn_progress_function(txn, FALSE, TRUE);
|
||||||
|
yield(toku_checkpoint_safe_client_lock, yield_v);
|
||||||
|
toku_poll_txn_progress_function(txn, FALSE, FALSE);
|
||||||
|
r = toku_brt_truncate(brt);
|
||||||
|
assert(r==0);
|
||||||
|
toku_checkpoint_safe_client_unlock();
|
||||||
|
|
||||||
|
done:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -411,7 +441,7 @@ toku_commit_load (BYTESTRING old_iname,
|
||||||
char *fname_in_env = fixup_fname(&old_iname); //Delete old file
|
char *fname_in_env = fixup_fname(&old_iname); //Delete old file
|
||||||
r = toku_cachefile_of_iname_in_env(txn->logger->ct, fname_in_env, &cf);
|
r = toku_cachefile_of_iname_in_env(txn->logger->ct, fname_in_env, &cf);
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
r = toku_cachefile_redirect_nullfd(cf);
|
r = toku_cachefile_redirect_nullfd(cf);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -438,7 +468,7 @@ toku_rollback_load (BYTESTRING UU(old_iname),
|
||||||
char *fname_in_env = fixup_fname(&new_iname); //Delete new file
|
char *fname_in_env = fixup_fname(&new_iname); //Delete new file
|
||||||
r = toku_cachefile_of_iname_in_env(txn->logger->ct, fname_in_env, &cf);
|
r = toku_cachefile_of_iname_in_env(txn->logger->ct, fname_in_env, &cf);
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
r = toku_cachefile_redirect_nullfd(cf);
|
r = toku_cachefile_redirect_nullfd(cf);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -455,38 +485,44 @@ toku_rollback_load (BYTESTRING UU(old_iname),
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_commit_dictionary_redirect (FILENUM UU(old_filenum),
|
toku_commit_dictionary_redirect (FILENUM UU(old_filenum),
|
||||||
FILENUM UU(new_filenum),
|
FILENUM UU(new_filenum),
|
||||||
TOKUTXN UU(txn),
|
TOKUTXN UU(txn),
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yield_v),
|
void * UU(yield_v),
|
||||||
LSN UU(oplsn)) //oplsn is the lsn of the commit
|
LSN UU(oplsn)) //oplsn is the lsn of the commit
|
||||||
{
|
{
|
||||||
//NO-OP
|
//Redirect only has meaning during normal operation (NOT during recovery).
|
||||||
|
if (!txn->recovered_from_checkpoint) {
|
||||||
|
//NO-OP
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
toku_rollback_dictionary_redirect (FILENUM old_filenum,
|
toku_rollback_dictionary_redirect (FILENUM old_filenum,
|
||||||
FILENUM new_filenum,
|
FILENUM new_filenum,
|
||||||
TOKUTXN txn,
|
TOKUTXN txn,
|
||||||
YIELDF UU(yield),
|
YIELDF UU(yield),
|
||||||
void * UU(yield_v),
|
void * UU(yield_v),
|
||||||
LSN UU(oplsn)) //oplsn is the lsn of the abort
|
LSN UU(oplsn)) //oplsn is the lsn of the abort
|
||||||
{
|
{
|
||||||
int r = 0;
|
int r = 0;
|
||||||
CACHEFILE new_cf = NULL;
|
//Redirect only has meaning during normal operation (NOT during recovery).
|
||||||
r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf);
|
if (!txn->recovered_from_checkpoint) {
|
||||||
assert(r == 0);
|
CACHEFILE new_cf = NULL;
|
||||||
struct brt_header *new_h = toku_cachefile_get_userdata(new_cf);
|
r = toku_cachefile_of_filenum(txn->logger->ct, new_filenum, &new_cf);
|
||||||
|
assert(r == 0);
|
||||||
|
struct brt_header *new_h = toku_cachefile_get_userdata(new_cf);
|
||||||
|
|
||||||
CACHEFILE old_cf = NULL;
|
CACHEFILE old_cf = NULL;
|
||||||
r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf);
|
r = toku_cachefile_of_filenum(txn->logger->ct, old_filenum, &old_cf);
|
||||||
assert(r == 0);
|
assert(r == 0);
|
||||||
struct brt_header *old_h = toku_cachefile_get_userdata(old_cf);
|
struct brt_header *old_h = toku_cachefile_get_userdata(old_cf);
|
||||||
|
|
||||||
//Redirect back from new to old.
|
//Redirect back from new to old.
|
||||||
r = toku_dictionary_redirect_abort(old_h, new_h, txn);
|
r = toku_dictionary_redirect_abort(old_h, new_h, txn);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -37,18 +37,114 @@ int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, YIELDF yield
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
void toku_rollback_txn_close (TOKUTXN txn) {
|
static inline int
|
||||||
memarena_close(&txn->rollentry_arena);
|
txn_has_inprogress_rollback_log(TOKUTXN txn) {
|
||||||
if (txn->rollentry_filename!=0) {
|
return txn->current_rollback.b != ROLLBACK_NONE.b;
|
||||||
int r = close(txn->rollentry_fd);
|
}
|
||||||
assert(r==0);
|
|
||||||
char *fname_in_cwd = toku_construct_full_name(2, txn->logger->directory, txn->rollentry_filename);
|
static inline int
|
||||||
r = unlink(fname_in_cwd);
|
txn_has_spilled_rollback_logs(TOKUTXN txn) {
|
||||||
assert(r==0);
|
return txn->spilled_rollback_tail.b != ROLLBACK_NONE.b;
|
||||||
toku_free(txn->rollentry_filename);
|
}
|
||||||
toku_free(fname_in_cwd);
|
|
||||||
|
int
|
||||||
|
toku_delete_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
|
||||||
|
int r;
|
||||||
|
CACHEFILE cf = txn->logger->rollback_cachefile;
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
||||||
|
BLOCKNUM to_free = log->thislogname;
|
||||||
|
if (txn->pinned_inprogress_rollback_log == log) {
|
||||||
|
txn->pinned_inprogress_rollback_log = NULL;
|
||||||
|
}
|
||||||
|
r = toku_cachetable_unpin_and_remove (cf, log->thislogname);
|
||||||
|
assert(r==0);
|
||||||
|
toku_free_blocknum(h->blocktable, &to_free, h);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
toku_apply_txn (TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn,
|
||||||
|
apply_rollback_item func) {
|
||||||
|
int r = 0;
|
||||||
|
// do the commit/abort calls and free everything
|
||||||
|
// we do the commit/abort calls in reverse order too.
|
||||||
|
struct roll_entry *item;
|
||||||
|
//printf("%s:%d abort\n", __FILE__, __LINE__);
|
||||||
|
int count=0;
|
||||||
|
|
||||||
|
BLOCKNUM next_log = ROLLBACK_NONE;
|
||||||
|
uint32_t next_log_hash = 0;
|
||||||
|
|
||||||
|
BOOL is_current = FALSE;
|
||||||
|
if (txn_has_inprogress_rollback_log(txn)) {
|
||||||
|
next_log = txn->current_rollback;
|
||||||
|
next_log_hash = txn->current_rollback_hash;
|
||||||
|
is_current = TRUE;
|
||||||
|
}
|
||||||
|
else if (txn_has_spilled_rollback_logs(txn)) {
|
||||||
|
next_log = txn->spilled_rollback_tail;
|
||||||
|
next_log_hash = txn->spilled_rollback_tail_hash;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint64_t last_sequence = txn->num_rollback_nodes;
|
||||||
|
BOOL found_head = FALSE;
|
||||||
|
while (next_log.b != ROLLBACK_NONE.b) {
|
||||||
|
ROLLBACK_LOG_NODE log;
|
||||||
|
//pin log
|
||||||
|
r = toku_get_and_pin_rollback_log(txn, txn->txnid64, last_sequence-1, next_log, next_log_hash, &log);
|
||||||
|
assert(r==0);
|
||||||
|
last_sequence = log->sequence;
|
||||||
|
if (func) {
|
||||||
|
while ((item=log->newest_logentry)) {
|
||||||
|
log->newest_logentry = item->prev;
|
||||||
|
r = func(txn, item, yield, yieldv, lsn);
|
||||||
|
if (r!=0) return r;
|
||||||
|
count++;
|
||||||
|
if (count%2 == 0) yield(NULL, yieldv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (next_log.b == txn->spilled_rollback_head.b) {
|
||||||
|
assert(!found_head);
|
||||||
|
found_head = TRUE;
|
||||||
|
assert(log->sequence == 0);
|
||||||
|
}
|
||||||
|
next_log = log->older;
|
||||||
|
next_log_hash = log->older_hash;
|
||||||
|
{
|
||||||
|
//Clean up transaction structure to prevent
|
||||||
|
//toku_txn_close from double-freeing
|
||||||
|
if (is_current) {
|
||||||
|
txn->current_rollback = ROLLBACK_NONE;
|
||||||
|
txn->current_rollback_hash = 0;
|
||||||
|
is_current = FALSE;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
txn->spilled_rollback_tail = next_log;
|
||||||
|
txn->spilled_rollback_tail_hash = next_log_hash;
|
||||||
|
}
|
||||||
|
if (found_head) {
|
||||||
|
assert(next_log.b == ROLLBACK_NONE.b);
|
||||||
|
txn->spilled_rollback_head = next_log;
|
||||||
|
txn->spilled_rollback_head_hash = next_log_hash;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Unpins log
|
||||||
|
r = toku_delete_rollback_log(txn, log);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
void toku_rollback_txn_close (TOKUTXN txn) {
|
||||||
|
{
|
||||||
|
//Clean up all rollback logs if they exist.
|
||||||
|
//Note: This will NOT cleanup any rollback logs as in 'rollinclude'
|
||||||
|
int r = toku_apply_txn(txn, NULL, NULL, ZERO_LSN, NULL);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
assert(txn->spilled_rollback_head.b == ROLLBACK_NONE.b);
|
||||||
|
assert(txn->spilled_rollback_tail.b == ROLLBACK_NONE.b);
|
||||||
|
assert(txn->current_rollback.b == ROLLBACK_NONE.b);
|
||||||
{
|
{
|
||||||
//Remove txn from list (omt) of live transactions
|
//Remove txn from list (omt) of live transactions
|
||||||
OMTVALUE txnagain;
|
OMTVALUE txnagain;
|
||||||
|
@ -86,20 +182,16 @@ void toku_rollback_txn_close (TOKUTXN txn) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void* toku_malloc_in_rollback(TOKUTXN txn, size_t size) {
|
void* toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
|
||||||
return malloc_in_memarena(txn->rollentry_arena, size);
|
return malloc_in_memarena(log->rollentry_arena, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void *toku_memdup_in_rollback(TOKUTXN txn, const void *v, size_t len) {
|
void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) {
|
||||||
void *r=toku_malloc_in_rollback(txn, len);
|
void *r=toku_malloc_in_rollback(log, len);
|
||||||
memcpy(r,v,len);
|
memcpy(r,v,len);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
char *toku_strdup_in_rollback(TOKUTXN txn, const char *s) {
|
|
||||||
return toku_memdup_in_rollback(txn, s, strlen(s)+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int note_brt_used_in_txns_parent(OMTVALUE brtv, u_int32_t UU(index), void*txnv) {
|
static int note_brt_used_in_txns_parent(OMTVALUE brtv, u_int32_t UU(index), void*txnv) {
|
||||||
TOKUTXN child = txnv;
|
TOKUTXN child = txnv;
|
||||||
TOKUTXN parent = child->parent;
|
TOKUTXN parent = child->parent;
|
||||||
|
@ -107,215 +199,246 @@ static int note_brt_used_in_txns_parent(OMTVALUE brtv, u_int32_t UU(index), void
|
||||||
int r = toku_txn_note_brt(parent, brt);
|
int r = toku_txn_note_brt(parent, brt);
|
||||||
if (r==0 &&
|
if (r==0 &&
|
||||||
brt->h->txnid_that_created_or_locked_when_empty == toku_txn_get_txnid(child)) {
|
brt->h->txnid_that_created_or_locked_when_empty == toku_txn_get_txnid(child)) {
|
||||||
//Pass magic "no rolltmp needed" flag to parent.
|
//Pass magic "no rollback needed" flag to parent.
|
||||||
brt->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(parent);
|
brt->h->txnid_that_created_or_locked_when_empty = toku_txn_get_txnid(parent);
|
||||||
}
|
}
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Commit each entry in the rollback (rolltmp) log.
|
//Commit each entry in the rollback log.
|
||||||
//If the transaction has a parent, it just promotes its information to its parent.
|
//If the transaction has a parent, it just promotes its information to its parent.
|
||||||
int toku_rollback_commit(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn) {
|
int toku_rollback_commit(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn) {
|
||||||
int r=0;
|
int r=0;
|
||||||
if (txn->parent!=0) {
|
if (txn->parent!=0) {
|
||||||
// First we must put a rollinclude entry into the parent if we have a rollentry file.
|
// First we must put a rollinclude entry into the parent if we spilled
|
||||||
if (txn->rollentry_filename) {
|
|
||||||
int len = strlen(txn->rollentry_filename);
|
if (txn_has_spilled_rollback_logs(txn)) {
|
||||||
// Don't have to strdup the rollentry_filename because
|
uint64_t num_nodes = txn->num_rollback_nodes;
|
||||||
// we take ownership of it.
|
if (txn_has_inprogress_rollback_log(txn)) {
|
||||||
BYTESTRING fname = {len, toku_strdup_in_rollback(txn, txn->rollentry_filename)};
|
num_nodes--; //Don't count the in-progress rollback log.
|
||||||
r = toku_logger_save_rollback_rollinclude(txn->parent, fname);
|
}
|
||||||
|
r = toku_logger_save_rollback_rollinclude(txn->parent, txn->txnid64, num_nodes,
|
||||||
|
txn->spilled_rollback_head, txn->spilled_rollback_head_hash,
|
||||||
|
txn->spilled_rollback_tail, txn->spilled_rollback_tail_hash);
|
||||||
if (r!=0) return r;
|
if (r!=0) return r;
|
||||||
r = close(txn->rollentry_fd);
|
//Remove ownership from child.
|
||||||
if (r!=0) {
|
txn->spilled_rollback_head = ROLLBACK_NONE;
|
||||||
//TODO: #2249.. this is a panic/crash situation
|
txn->spilled_rollback_head_hash = 0;
|
||||||
// If the rolltmp file is necessary for a checkpoint
|
txn->spilled_rollback_tail = ROLLBACK_NONE;
|
||||||
// we CANNOT delete it!
|
txn->spilled_rollback_tail_hash = 0;
|
||||||
// For now.. delete it, but figure out how to deal with this later.
|
}
|
||||||
// Maybe we should just assert that the close succeeds?
|
if (txn_has_inprogress_rollback_log(txn)) {
|
||||||
// We have to do the unlink ourselves, and then
|
ROLLBACK_LOG_NODE parent_log;
|
||||||
// set txn->rollentry_filename=0 so that the cleanup
|
//Pin parent log
|
||||||
// won't try to close the fd again.
|
r = toku_get_and_pin_rollback_log_for_new_entry(txn->parent, &parent_log);
|
||||||
char *fname_in_cwd = toku_construct_full_name(2, txn->logger->directory, txn->rollentry_filename);
|
assert(r==0);
|
||||||
r = unlink(fname_in_cwd);
|
|
||||||
assert(r==0); //Can we assert this at this point?
|
ROLLBACK_LOG_NODE child_log;
|
||||||
unlink(txn->rollentry_filename);
|
//Pin child log
|
||||||
toku_free(txn->rollentry_filename);
|
r = toku_get_and_pin_rollback_log(txn, txn->txnid64, txn->num_rollback_nodes - 1,
|
||||||
toku_free(fname_in_cwd);
|
txn->current_rollback, txn->current_rollback_hash,
|
||||||
txn->rollentry_filename = 0;
|
&child_log);
|
||||||
return r;
|
assert(r==0);
|
||||||
|
|
||||||
|
// Append the list to the front of the parent.
|
||||||
|
if (child_log->oldest_logentry) {
|
||||||
|
// There are some entries, so link them in.
|
||||||
|
child_log->oldest_logentry->prev = parent_log->newest_logentry;
|
||||||
|
if (!parent_log->oldest_logentry) {
|
||||||
|
parent_log->oldest_logentry = child_log->oldest_logentry;
|
||||||
|
}
|
||||||
|
parent_log->newest_logentry = child_log->newest_logentry;
|
||||||
|
parent_log->rollentry_resident_bytecount += child_log->rollentry_resident_bytecount;
|
||||||
|
txn->parent->rollentry_raw_count += txn->rollentry_raw_count;
|
||||||
|
child_log->rollentry_resident_bytecount = 0;
|
||||||
}
|
}
|
||||||
// Stop the cleanup from closing and unlinking the file.
|
if (parent_log->oldest_logentry==NULL) {
|
||||||
toku_free(txn->rollentry_filename);
|
parent_log->oldest_logentry = child_log->oldest_logentry;
|
||||||
txn->rollentry_filename = 0;
|
|
||||||
}
|
|
||||||
// Append the list to the front of the parent.
|
|
||||||
if (txn->oldest_logentry) {
|
|
||||||
// There are some entries, so link them in.
|
|
||||||
txn->oldest_logentry->prev = txn->parent->newest_logentry;
|
|
||||||
if (txn->parent->newest_logentry) {
|
|
||||||
txn->parent->newest_logentry->next = txn->oldest_logentry;
|
|
||||||
} else {
|
|
||||||
txn->parent->oldest_logentry = txn->oldest_logentry;
|
|
||||||
}
|
}
|
||||||
txn->parent->newest_logentry = txn->newest_logentry;
|
child_log->newest_logentry = child_log->oldest_logentry = 0;
|
||||||
txn->parent->rollentry_resident_bytecount += txn->rollentry_resident_bytecount;
|
// Put all the memarena data into the parent.
|
||||||
txn->parent->rollentry_raw_count += txn->rollentry_raw_count;
|
if (memarena_total_size_in_use(child_log->rollentry_arena) > 0) {
|
||||||
txn->rollentry_resident_bytecount = 0;
|
// If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
|
||||||
}
|
memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena);
|
||||||
if (txn->parent->oldest_logentry==0) {
|
}
|
||||||
txn->parent->oldest_logentry = txn->oldest_logentry;
|
//Delete child log (unpins child_log)
|
||||||
}
|
r = toku_delete_rollback_log(txn, child_log);
|
||||||
txn->newest_logentry = txn->oldest_logentry = 0;
|
assert(r==0);
|
||||||
// Put all the memarena data into the parent.
|
txn->current_rollback = ROLLBACK_NONE;
|
||||||
if (memarena_total_size_in_use(txn->rollentry_arena) > 0) {
|
txn->current_rollback_hash = 0;
|
||||||
// If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
|
|
||||||
memarena_move_buffers(txn->parent->rollentry_arena, txn->rollentry_arena);
|
r = toku_maybe_spill_rollbacks(txn->parent, parent_log); //unpins parent_log
|
||||||
|
assert(r==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note the open brts, the omts must be merged
|
// Note the open brts, the omts must be merged
|
||||||
r = toku_omt_iterate(txn->open_brts, note_brt_used_in_txns_parent, txn);
|
r = toku_omt_iterate(txn->open_brts, note_brt_used_in_txns_parent, txn);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
|
|
||||||
r = toku_maybe_spill_rollbacks(txn->parent);
|
|
||||||
assert(r==0);
|
|
||||||
|
|
||||||
//If this transaction needs an fsync (if it commits)
|
//If this transaction needs an fsync (if it commits)
|
||||||
//save that in the parent. Since the commit really happens in the root txn.
|
//save that in the parent. Since the commit really happens in the root txn.
|
||||||
txn->parent->force_fsync_on_commit |= txn->force_fsync_on_commit;
|
txn->parent->force_fsync_on_commit |= txn->force_fsync_on_commit;
|
||||||
txn->parent->has_done_work |= txn->has_done_work;
|
|
||||||
txn->parent->num_rollentries += txn->num_rollentries;
|
txn->parent->num_rollentries += txn->num_rollentries;
|
||||||
} else {
|
} else {
|
||||||
// do the commit calls and free everything
|
r = toku_apply_txn(txn, yield, yieldv, lsn, toku_commit_rollback_item);
|
||||||
// we do the commit calls in reverse order too.
|
assert(r==0);
|
||||||
{
|
|
||||||
struct roll_entry *item;
|
|
||||||
//printf("%s:%d abort\n", __FILE__, __LINE__);
|
|
||||||
int count=0;
|
|
||||||
while ((item=txn->newest_logentry)) {
|
|
||||||
txn->newest_logentry = item->prev;
|
|
||||||
r = toku_commit_rollback_item(txn, item, yield, yieldv, lsn);
|
|
||||||
if (r!=0) return r;
|
|
||||||
count++;
|
|
||||||
if (count%2 == 0) yield(NULL, yieldv);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read stuff out of the file and execute it.
|
|
||||||
if (txn->rollentry_filename) {
|
|
||||||
r = toku_commit_fileentries(txn->rollentry_fd, txn, yield, yieldv, lsn);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_rollback_abort(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn) {
|
int toku_rollback_abort(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn) {
|
||||||
struct roll_entry *item;
|
int r;
|
||||||
int count=0;
|
r = toku_apply_txn(txn, yield, yieldv, lsn, toku_abort_rollback_item);
|
||||||
int r=0;
|
assert(r==0);
|
||||||
while ((item=txn->newest_logentry)) {
|
return r;
|
||||||
txn->newest_logentry = item->prev;
|
|
||||||
r = toku_abort_rollback_item(txn, item, yield, yieldv, lsn);
|
|
||||||
if (r!=0)
|
|
||||||
return r;
|
|
||||||
count++;
|
|
||||||
if (count%2 == 0) yield(NULL, yieldv);
|
|
||||||
}
|
|
||||||
// Read stuff out of the file and roll it back.
|
|
||||||
if (txn->rollentry_filename) {
|
|
||||||
r = toku_rollback_fileentries(txn->rollentry_fd, txn, yield, yieldv, lsn);
|
|
||||||
assert(r==0);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write something out. Keep trying even if partial writes occur.
|
// Write something out. Keep trying even if partial writes occur.
|
||||||
// On error: Return negative with errno set.
|
// On error: Return negative with errno set.
|
||||||
// On success return nbytes.
|
// On success return nbytes.
|
||||||
|
|
||||||
// NOTE : duplicated from logger.c - FIX THIS!!!
|
static size_t
|
||||||
static int write_it (int fd, const void *bufv, int nbytes) {
|
rollback_memory_size(ROLLBACK_LOG_NODE log) {
|
||||||
toku_os_full_write(fd, bufv, nbytes);
|
size_t size = sizeof(*log);
|
||||||
return nbytes;
|
size += memarena_total_memory_size(log->rollentry_arena);
|
||||||
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_maybe_spill_rollbacks (TOKUTXN txn) {
|
static void
|
||||||
// Previously:
|
toku_rollback_log_free(ROLLBACK_LOG_NODE *log_p) {
|
||||||
// if (txn->rollentry_resident_bytecount>txn->logger->write_block_size) {
|
ROLLBACK_LOG_NODE log = *log_p;
|
||||||
// But now we use t
|
*log_p = NULL; //Sanitize
|
||||||
if (memarena_total_memory_size(txn->rollentry_arena) > txn->logger->write_block_size) {
|
|
||||||
struct roll_entry *item;
|
|
||||||
ssize_t bufsize = txn->rollentry_resident_bytecount;
|
|
||||||
char *MALLOC_N(bufsize, buf);
|
|
||||||
if (bufsize==0) return errno;
|
|
||||||
struct wbuf w;
|
|
||||||
wbuf_init(&w, buf, bufsize);
|
|
||||||
while ((item=txn->oldest_logentry)) {
|
|
||||||
assert(item->prev==0);
|
|
||||||
u_int32_t rollback_fsize = toku_logger_rollback_fsize(item);
|
|
||||||
txn->rollentry_resident_bytecount -= rollback_fsize;
|
|
||||||
txn->oldest_logentry = item->next;
|
|
||||||
if (item->next) { item->next->prev=0; }
|
|
||||||
toku_logger_rollback_wbufwrite(&w, item);
|
|
||||||
}
|
|
||||||
assert(txn->rollentry_resident_bytecount==0);
|
|
||||||
assert((ssize_t)w.ndone==bufsize);
|
|
||||||
txn->oldest_logentry = txn->newest_logentry = 0;
|
|
||||||
if (txn->rollentry_fd<0) {
|
|
||||||
char filenamepart[sizeof("__tokudb_rolltmp.") + 16];
|
|
||||||
snprintf(filenamepart, sizeof(filenamepart), "__tokudb_rolltmp.%.16"PRIx64, txn->txnid64);
|
|
||||||
txn->rollentry_filename = toku_xstrdup(filenamepart);
|
|
||||||
char *rollentry_filename_in_cwd = toku_construct_full_name(2, txn->logger->directory, filenamepart);
|
|
||||||
txn->rollentry_fd = open(rollentry_filename_in_cwd, O_CREAT+O_RDWR+O_EXCL+O_BINARY, 0600);
|
|
||||||
int r = errno;
|
|
||||||
toku_free(rollentry_filename_in_cwd);
|
|
||||||
if (txn->rollentry_fd == -1) return r;
|
|
||||||
}
|
|
||||||
uLongf compressed_len = compressBound(w.ndone);
|
|
||||||
char *MALLOC_N(compressed_len, compressed_buf);
|
|
||||||
{
|
|
||||||
int r = compress2((Bytef*)compressed_buf, &compressed_len,
|
|
||||||
(Bytef*)buf, w.ndone,
|
|
||||||
1);
|
|
||||||
assert(r==Z_OK);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(compressed_len);
|
|
||||||
ssize_t r = write_it(txn->rollentry_fd, &v, sizeof(v)); assert(r==sizeof(v));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
ssize_t r = write_it(txn->rollentry_fd, compressed_buf, compressed_len);
|
|
||||||
if (r<0) return r;
|
|
||||||
assert(r==(ssize_t)compressed_len);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(w.ndone);
|
|
||||||
ssize_t r = write_it(txn->rollentry_fd, &v, sizeof(v)); assert(r==sizeof(v));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(compressed_len);
|
|
||||||
ssize_t r = write_it(txn->rollentry_fd, &v, sizeof(v)); assert(r==sizeof(v));
|
|
||||||
}
|
|
||||||
toku_free(compressed_buf);
|
|
||||||
txn->rollentry_filesize+=w.ndone;
|
|
||||||
toku_free(buf);
|
|
||||||
|
|
||||||
// Cleanup the rollback memory
|
// Cleanup the rollback memory
|
||||||
memarena_clear(txn->rollentry_arena);
|
memarena_close(&log->rollentry_arena);
|
||||||
|
toku_free(log);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void toku_rollback_flush_callback (CACHEFILE cachefile, int fd, BLOCKNUM logname,
|
||||||
|
void *rollback_v, void *extraargs, long UU(size),
|
||||||
|
BOOL write_me, BOOL keep_me, BOOL for_checkpoint) {
|
||||||
|
assert(extraargs);
|
||||||
|
int r;
|
||||||
|
TOKUTXN txn = extraargs;
|
||||||
|
ROLLBACK_LOG_NODE log = rollback_v;
|
||||||
|
CACHEFILE rollback_cachefile = txn->logger->rollback_cachefile;
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(rollback_cachefile);
|
||||||
|
|
||||||
|
assert(log->thislogname.b==logname.b);
|
||||||
|
assert(rollback_cachefile == cachefile);
|
||||||
|
if (write_me && !h->panic) {
|
||||||
|
int n_workitems, n_threads;
|
||||||
|
toku_cachefile_get_workqueue_load(cachefile, &n_workitems, &n_threads);
|
||||||
|
|
||||||
|
r = toku_serialize_rollback_log_to(fd, log->thislogname, log, h, n_workitems, n_threads, for_checkpoint);
|
||||||
|
if (r) {
|
||||||
|
if (h->panic==0) {
|
||||||
|
char *e = strerror(r);
|
||||||
|
int l = 200 + strlen(e);
|
||||||
|
char s[l];
|
||||||
|
h->panic=r;
|
||||||
|
snprintf(s, l-1, "While writing data to disk, error %d (%s)", r, e);
|
||||||
|
h->panic_string = toku_strdup(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
if (!keep_me) {
|
||||||
|
toku_rollback_log_free(&log);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int toku_rollback_fetch_callback (CACHEFILE cachefile, int fd, BLOCKNUM logname, u_int32_t fullhash,
|
||||||
|
void **rollback_pv, long *sizep, void *extraargs) {
|
||||||
|
assert(extraargs);
|
||||||
|
int r;
|
||||||
|
TOKUTXN txn = extraargs;
|
||||||
|
CACHEFILE rollback_cachefile = txn->logger->rollback_cachefile;
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(rollback_cachefile);
|
||||||
|
assert(rollback_cachefile == cachefile);
|
||||||
|
|
||||||
|
ROLLBACK_LOG_NODE *result = (ROLLBACK_LOG_NODE*)rollback_pv;
|
||||||
|
r = toku_deserialize_rollback_log_from(fd, logname, fullhash, result, txn, h);
|
||||||
|
if (r==0) {
|
||||||
|
*sizep = rollback_memory_size(*result);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int toku_create_new_rollback_log (TOKUTXN txn, BLOCKNUM older, uint32_t older_hash, ROLLBACK_LOG_NODE *result) {
|
||||||
|
TAGMALLOC(ROLLBACK_LOG_NODE, log);
|
||||||
|
assert(log);
|
||||||
|
|
||||||
|
int r;
|
||||||
|
CACHEFILE cf = txn->logger->rollback_cachefile;
|
||||||
|
struct brt_header *h = toku_cachefile_get_userdata(cf);
|
||||||
|
|
||||||
|
log->layout_version = BRT_LAYOUT_VERSION;
|
||||||
|
log->layout_version_original = BRT_LAYOUT_VERSION;
|
||||||
|
log->layout_version_read_from_disk = BRT_LAYOUT_VERSION;
|
||||||
|
log->dirty = TRUE;
|
||||||
|
log->txnid = txn->txnid64;
|
||||||
|
log->sequence = txn->num_rollback_nodes++;
|
||||||
|
toku_allocate_blocknum(h->blocktable, &log->thislogname, h);
|
||||||
|
log->thishash = toku_cachetable_hash(cf, log->thislogname);
|
||||||
|
log->older = older;
|
||||||
|
log->older_hash = older_hash;
|
||||||
|
log->oldest_logentry = NULL;
|
||||||
|
log->newest_logentry = NULL;
|
||||||
|
log->rollentry_arena = memarena_create();
|
||||||
|
log->rollentry_resident_bytecount = 0;
|
||||||
|
|
||||||
|
*result = log;
|
||||||
|
r=toku_cachetable_put(cf, log->thislogname, log->thishash,
|
||||||
|
log, rollback_memory_size(log),
|
||||||
|
toku_rollback_flush_callback, toku_rollback_fetch_callback,
|
||||||
|
txn);
|
||||||
|
assert(r==0);
|
||||||
|
txn->current_rollback = log->thislogname;
|
||||||
|
txn->current_rollback_hash = log->thishash;
|
||||||
|
txn->pinned_inprogress_rollback_log = log;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_read_rollback_backwards(BREAD br, struct roll_entry **item, MEMARENA ma) {
|
int
|
||||||
u_int32_t nbytes_n; ssize_t sr;
|
toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log) {
|
||||||
if ((sr=bread_backwards(br, &nbytes_n, 4))!=4) { assert(sr<0); return errno; }
|
int r;
|
||||||
u_int32_t n_bytes=toku_dtoh32(nbytes_n);
|
CACHEFILE cf = txn->logger->rollback_cachefile;
|
||||||
unsigned char *buf = malloc_in_memarena(ma, n_bytes);
|
if (txn->pinned_inprogress_rollback_log == log) {
|
||||||
if (buf==0) return errno;
|
txn->pinned_inprogress_rollback_log = NULL;
|
||||||
if ((sr=bread_backwards(br, buf, n_bytes-4))!=(ssize_t)n_bytes-4) { assert(sr<0); return errno; }
|
}
|
||||||
int r = toku_parse_rollback(buf, n_bytes, item, ma);
|
r = toku_cachetable_unpin(cf, log->thislogname, log->thishash,
|
||||||
if (r!=0) return r;
|
(enum cachetable_dirty)log->dirty, rollback_memory_size(log));
|
||||||
return 0;
|
assert(r==0);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
//Requires: log is pinned
|
||||||
|
// log is current
|
||||||
|
//After:
|
||||||
|
// log is unpinned if a spill happened
|
||||||
|
// Maybe there is no current after (if it spilled)
|
||||||
|
int toku_maybe_spill_rollbacks (TOKUTXN txn, ROLLBACK_LOG_NODE log) {
|
||||||
|
int r = 0;
|
||||||
|
if (log->rollentry_resident_bytecount > txn->logger->write_block_size) {
|
||||||
|
assert(log->thislogname.b == txn->current_rollback.b);
|
||||||
|
//spill
|
||||||
|
if (!txn_has_spilled_rollback_logs(txn)) {
|
||||||
|
//First spilled. Copy to head.
|
||||||
|
txn->spilled_rollback_head = txn->current_rollback;
|
||||||
|
txn->spilled_rollback_head_hash = txn->current_rollback_hash;
|
||||||
|
}
|
||||||
|
//Unconditionally copy to tail. Old tail does not need to be cached anymore.
|
||||||
|
txn->spilled_rollback_tail = txn->current_rollback;
|
||||||
|
txn->spilled_rollback_tail_hash = txn->current_rollback_hash;
|
||||||
|
|
||||||
|
txn->current_rollback = ROLLBACK_NONE;
|
||||||
|
txn->current_rollback_hash = 0;
|
||||||
|
//Unpin
|
||||||
|
r = toku_rollback_log_unpin(txn, log);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
|
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
|
||||||
|
@ -452,7 +575,7 @@ static void note_txn_closing (TOKUTXN txn) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return the number of bytes that went into the rollback data structure (the uncompressed count if there is compression)
|
// Return the number of bytes that went into the rollback data structure (the uncompressed count if there is compression)
|
||||||
int toku_logger_txn_rolltmp_raw_count(TOKUTXN txn, u_int64_t *raw_count)
|
int toku_logger_txn_rollback_raw_count(TOKUTXN txn, u_int64_t *raw_count)
|
||||||
{
|
{
|
||||||
*raw_count = txn->rollentry_raw_count;
|
*raw_count = txn->rollentry_raw_count;
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -466,3 +589,60 @@ int toku_txn_find_by_xid (BRT brt, TXNID xid, TOKUTXN *txnptr) {
|
||||||
if (r == 0) *txnptr = txnv;
|
if (r == 0) *txnptr = txnv;
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int toku_get_and_pin_rollback_log(TOKUTXN txn, TXNID xid, uint64_t sequence, BLOCKNUM name, uint32_t hash, ROLLBACK_LOG_NODE *result) {
|
||||||
|
BOOL save_inprogress_node = FALSE;
|
||||||
|
assert(name.b != ROLLBACK_NONE.b);
|
||||||
|
int r = 0;
|
||||||
|
ROLLBACK_LOG_NODE log = NULL;
|
||||||
|
|
||||||
|
if (name.b == txn->current_rollback.b) {
|
||||||
|
assert(hash == txn->current_rollback_hash);
|
||||||
|
log = txn->pinned_inprogress_rollback_log;
|
||||||
|
save_inprogress_node = TRUE;
|
||||||
|
}
|
||||||
|
if (!log) {
|
||||||
|
CACHEFILE cf = txn->logger->rollback_cachefile;
|
||||||
|
void * log_v;
|
||||||
|
r = toku_cachetable_get_and_pin(cf, name, hash,
|
||||||
|
&log_v, NULL,
|
||||||
|
toku_rollback_flush_callback, toku_rollback_fetch_callback,
|
||||||
|
txn);
|
||||||
|
assert(r==0);
|
||||||
|
log = (ROLLBACK_LOG_NODE)log_v;
|
||||||
|
}
|
||||||
|
if (r==0) {
|
||||||
|
assert(log->thislogname.b == name.b);
|
||||||
|
assert(log->txnid == xid);
|
||||||
|
assert(log->sequence == sequence);
|
||||||
|
if (save_inprogress_node) {
|
||||||
|
txn->pinned_inprogress_rollback_log = log;
|
||||||
|
}
|
||||||
|
*result = log;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
int toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE *result) {
|
||||||
|
int r;
|
||||||
|
ROLLBACK_LOG_NODE log;
|
||||||
|
if (txn_has_inprogress_rollback_log(txn)) {
|
||||||
|
r = toku_get_and_pin_rollback_log(txn, txn->txnid64, txn->num_rollback_nodes-1,
|
||||||
|
txn->current_rollback, txn->current_rollback_hash, &log);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//Generate new one.
|
||||||
|
//tail will be ROLLBACK_NONE if this is the very first
|
||||||
|
r = toku_create_new_rollback_log(txn, txn->spilled_rollback_tail, txn->spilled_rollback_tail_hash, &log);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
if (r==0) {
|
||||||
|
assert(log->txnid == txn->txnid64);
|
||||||
|
assert(log->thislogname.b != ROLLBACK_NONE.b);
|
||||||
|
*result = log;
|
||||||
|
}
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -13,19 +13,24 @@ void toku_poll_txn_progress_function(TOKUTXN txn, uint8_t is_commit, uint8_t sta
|
||||||
int toku_rollback_commit(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn);
|
int toku_rollback_commit(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn);
|
||||||
int toku_rollback_abort(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn);
|
int toku_rollback_abort(TOKUTXN txn, YIELDF yield, void*yieldv, LSN lsn);
|
||||||
void toku_rollback_txn_close (TOKUTXN txn);
|
void toku_rollback_txn_close (TOKUTXN txn);
|
||||||
|
int toku_get_and_pin_rollback_log_for_new_entry (TOKUTXN txn, ROLLBACK_LOG_NODE *result);
|
||||||
|
int toku_get_and_pin_rollback_log(TOKUTXN txn, TXNID xid, uint64_t sequence, BLOCKNUM name, uint32_t hash, ROLLBACK_LOG_NODE *result);
|
||||||
|
int toku_rollback_log_unpin(TOKUTXN txn, ROLLBACK_LOG_NODE log);
|
||||||
|
int toku_delete_rollback_log(TOKUTXN txn, ROLLBACK_LOG_NODE log);
|
||||||
|
|
||||||
|
typedef int(*apply_rollback_item)(TOKUTXN txn, struct roll_entry *item, YIELDF yield, void*yieldv, LSN lsn);
|
||||||
|
|
||||||
int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, YIELDF yield, void*yieldv, LSN lsn);
|
int toku_commit_rollback_item (TOKUTXN txn, struct roll_entry *item, YIELDF yield, void*yieldv, LSN lsn);
|
||||||
int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, YIELDF yield, void*yieldv, LSN lsn);
|
int toku_abort_rollback_item (TOKUTXN txn, struct roll_entry *item, YIELDF yield, void*yieldv, LSN lsn);
|
||||||
|
|
||||||
void *toku_malloc_in_rollback(TOKUTXN txn, size_t size);
|
void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size);
|
||||||
void *toku_memdup_in_rollback(TOKUTXN txn, const void *v, size_t len);
|
void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len);
|
||||||
char *toku_strdup_in_rollback(TOKUTXN txn, const char *s);
|
int toku_maybe_spill_rollbacks (TOKUTXN txn, ROLLBACK_LOG_NODE log);
|
||||||
int toku_maybe_spill_rollbacks (TOKUTXN txn);
|
|
||||||
|
|
||||||
int toku_txn_note_brt (TOKUTXN txn, BRT brt);
|
int toku_txn_note_brt (TOKUTXN txn, BRT brt);
|
||||||
int toku_txn_note_swap_brt (BRT live, BRT zombie);
|
int toku_txn_note_swap_brt (BRT live, BRT zombie);
|
||||||
int toku_txn_note_close_brt (BRT brt);
|
int toku_txn_note_close_brt (BRT brt);
|
||||||
int toku_logger_txn_rolltmp_raw_count(TOKUTXN txn, u_int64_t *raw_count);
|
int toku_logger_txn_rollback_raw_count(TOKUTXN txn, u_int64_t *raw_count);
|
||||||
|
|
||||||
int toku_txn_find_by_xid (BRT brt, TXNID xid, TOKUTXN *txnptr);
|
int toku_txn_find_by_xid (BRT brt, TXNID xid, TOKUTXN *txnptr);
|
||||||
|
|
||||||
|
@ -36,4 +41,23 @@ int toku_commit_fileentries (int fd, TOKUTXN txn, YIELDF yield,void *yieldv, LSN
|
||||||
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
|
//Heaviside function to find a TOKUTXN by TOKUTXN (used to find the index)
|
||||||
int find_xid (OMTVALUE v, void *txnv);
|
int find_xid (OMTVALUE v, void *txnv);
|
||||||
|
|
||||||
|
struct rollback_log_node {
|
||||||
|
enum typ_tag tag;
|
||||||
|
int layout_version;
|
||||||
|
int layout_version_original;
|
||||||
|
int layout_version_read_from_disk;
|
||||||
|
int dirty;
|
||||||
|
TXNID txnid; // Which transaction made this?
|
||||||
|
uint64_t sequence; // Which rollback log in the sequence is this?
|
||||||
|
BLOCKNUM thislogname; // Which block number is this chunk of the log?
|
||||||
|
uint32_t thishash;
|
||||||
|
BLOCKNUM older; // Which block number is the next oldest chunk of the log?
|
||||||
|
uint32_t older_hash;
|
||||||
|
struct roll_entry *oldest_logentry;
|
||||||
|
struct roll_entry *newest_logentry;
|
||||||
|
MEMARENA rollentry_arena;
|
||||||
|
size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -58,15 +58,6 @@ check_ok:
|
||||||
test 0 = 0 $(SUMMARIZE_CMD)
|
test 0 = 0 $(SUMMARIZE_CMD)
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(TOKU_SKIP_1305),1)
|
|
||||||
check_test1305:
|
|
||||||
@echo SKIPPED SLOW TEST $@
|
|
||||||
else
|
|
||||||
# Don't run 1305 under valgrind. It takes way too long.
|
|
||||||
check_test1305$(BINSUF): test1305$(BINSUF)
|
|
||||||
./$< $(VERBVERBOSE) $(SUMMARIZE_CMD)
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(TOKU_SKIP_MINICRON),1)
|
ifeq ($(TOKU_SKIP_MINICRON),1)
|
||||||
check_minicron-test$(BINSUF):
|
check_minicron-test$(BINSUF):
|
||||||
@echo "SKIPPED TEST $@ (Fails in win64 vm due to timing issues)"
|
@echo "SKIPPED TEST $@ (Fails in win64 vm due to timing issues)"
|
||||||
|
|
|
@ -1,80 +0,0 @@
|
||||||
/* Test bread by writing random data and then reading it using bread_backwards() to see if it gives the right answer.
|
|
||||||
* See test_1305 for another bread test (testing to see if it can read 1GB files) */
|
|
||||||
|
|
||||||
#include "test.h"
|
|
||||||
#include <toku_portability.h>
|
|
||||||
|
|
||||||
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <zlib.h>
|
|
||||||
|
|
||||||
#include "../brttypes.h"
|
|
||||||
#include "../bread.h"
|
|
||||||
|
|
||||||
#define FNAME "bread-test.data"
|
|
||||||
|
|
||||||
#define RECORDS 20
|
|
||||||
#define RECORDLEN 100
|
|
||||||
|
|
||||||
char buf[RECORDS][RECORDLEN];
|
|
||||||
int sizes[RECORDS];
|
|
||||||
int sizesn[RECORDS];
|
|
||||||
int nwrote=0;
|
|
||||||
char wrotedata[RECORDS*RECORDLEN];
|
|
||||||
|
|
||||||
static void
|
|
||||||
test (int seed) {
|
|
||||||
srandom(seed);
|
|
||||||
unlink(FNAME);
|
|
||||||
int i;
|
|
||||||
{
|
|
||||||
int fd = open(FNAME, O_CREAT+O_RDWR+O_BINARY, 0777);
|
|
||||||
assert(fd>=0);
|
|
||||||
for (i=0; i<RECORDS; i++) {
|
|
||||||
sizes[i] = 1+ random()%RECORDLEN;
|
|
||||||
sizesn[i] = toku_htod32(sizes[i]);
|
|
||||||
int j;
|
|
||||||
for (j=0; j<sizes[i]; j++) {
|
|
||||||
buf[i][j] = wrotedata[nwrote++] = (char)random();
|
|
||||||
}
|
|
||||||
uLongf compressed_size = compressBound(sizes[i]);
|
|
||||||
Bytef compressed_buf[compressed_size];
|
|
||||||
{ int r = compress2(compressed_buf, &compressed_size, (Bytef*)(buf[i]), sizes[i], 1); assert(r==Z_OK); }
|
|
||||||
u_int32_t compressed_size_n = toku_htod32(compressed_size);
|
|
||||||
{ toku_os_full_write(fd, &compressed_size_n, 4); }
|
|
||||||
{ toku_os_full_write(fd, compressed_buf, compressed_size); }
|
|
||||||
{ toku_os_full_write(fd, &sizesn[i], 4); } // the uncompressed size
|
|
||||||
{ toku_os_full_write(fd, &compressed_size_n, 4); }
|
|
||||||
}
|
|
||||||
{ int r=close(fd); assert(r==0); }
|
|
||||||
}
|
|
||||||
int fd = open(FNAME, O_RDONLY+O_BINARY); assert(fd>=0);
|
|
||||||
// Now read it all backward
|
|
||||||
BREAD br = create_bread_from_fd_initialize_at(fd);
|
|
||||||
while (bread_has_more(br)) {
|
|
||||||
assert(nwrote>0);
|
|
||||||
int to_read = 1+(random()%RECORDLEN); // read from 1 to 100 (if RECORDLEN is 100)
|
|
||||||
if (to_read>nwrote) to_read=nwrote;
|
|
||||||
char rbuf[to_read];
|
|
||||||
int r = bread_backwards(br, rbuf, to_read);
|
|
||||||
assert(r==to_read);
|
|
||||||
assert(memcmp(rbuf, &wrotedata[nwrote-to_read], to_read)==0);
|
|
||||||
nwrote-=to_read;
|
|
||||||
}
|
|
||||||
assert(nwrote==0);
|
|
||||||
|
|
||||||
{ int r=close_bread_without_closing_fd(br); assert(r==0); }
|
|
||||||
{ int r=close(fd); assert(r==0); }
|
|
||||||
unlink(FNAME);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute__((__unused__))) {
|
|
||||||
int i;
|
|
||||||
for (i=0; i<10; i++) test(i);
|
|
||||||
return 0;
|
|
||||||
}
|
|
|
@ -102,7 +102,7 @@ static void checkpoint_pending(void) {
|
||||||
char fname1[] = __FILE__ "test1.dat";
|
char fname1[] = __FILE__ "test1.dat";
|
||||||
r = unlink(fname1); if (r!=0) CKERR2(errno, ENOENT);
|
r = unlink(fname1); if (r!=0) CKERR2(errno, ENOENT);
|
||||||
r = toku_cachetable_openf(&cf, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
r = toku_cachetable_openf(&cf, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
||||||
toku_cachefile_set_userdata(cf, NULL, NULL, NULL, NULL, NULL, NULL,
|
toku_cachefile_set_userdata(cf, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
dummy_pin_unpin, dummy_pin_unpin);
|
dummy_pin_unpin, dummy_pin_unpin);
|
||||||
|
|
||||||
// Insert items into the cachetable. All dirty.
|
// Insert items into the cachetable. All dirty.
|
||||||
|
|
|
@ -60,7 +60,7 @@ static void cachetable_checkpoint_test(int n, enum cachetable_dirty dirty) {
|
||||||
unlink(fname1);
|
unlink(fname1);
|
||||||
CACHEFILE f1;
|
CACHEFILE f1;
|
||||||
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
||||||
toku_cachefile_set_userdata(f1, NULL, NULL, NULL, NULL, NULL, NULL,
|
toku_cachefile_set_userdata(f1, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
dummy_pin_unpin, dummy_pin_unpin);
|
dummy_pin_unpin, dummy_pin_unpin);
|
||||||
|
|
||||||
// insert items into the cachetable. all should be dirty
|
// insert items into the cachetable. all should be dirty
|
||||||
|
|
|
@ -46,7 +46,7 @@ static void cachetable_prefetch_checkpoint_test(int n, enum cachetable_dirty dir
|
||||||
unlink(fname1);
|
unlink(fname1);
|
||||||
CACHEFILE f1;
|
CACHEFILE f1;
|
||||||
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
r = toku_cachetable_openf(&f1, ct, fname1, O_RDWR|O_CREAT, S_IRWXU|S_IRWXG|S_IRWXO); assert(r == 0);
|
||||||
toku_cachefile_set_userdata(f1, NULL, NULL, NULL, NULL, NULL, NULL,
|
toku_cachefile_set_userdata(f1, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
dummy_pin_unpin, dummy_pin_unpin);
|
dummy_pin_unpin, dummy_pin_unpin);
|
||||||
|
|
||||||
// prefetch block n+1. this will take 10 seconds.
|
// prefetch block n+1. this will take 10 seconds.
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#define TESTDIR "dir." __FILE__
|
#define TESTDIR "dir." __FILE__
|
||||||
|
|
||||||
static const int magic_begin_end_checkpoint_sz = 77; // leave this many bytes in file
|
static const int magic_begin_end_checkpoint_sz = 85; // leave this many bytes in file
|
||||||
|
|
||||||
static int
|
static int
|
||||||
run_test(void) {
|
run_test(void) {
|
||||||
|
@ -28,12 +28,12 @@ run_test(void) {
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
// all logs must contain a valid checkpoint
|
// all logs must contain a valid checkpoint
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
|
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, world); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, world); assert(r == 0);
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, there); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, there); assert(r == 0);
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
|
@ -21,7 +21,7 @@ run_test(void) {
|
||||||
// add begin checkpoint, end checkpoint
|
// add begin checkpoint, end checkpoint
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, FALSE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, FALSE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
||||||
// add hello
|
// add hello
|
||||||
|
|
|
@ -21,7 +21,7 @@ run_test(void) {
|
||||||
|
|
||||||
r = toku_logger_create(&logger); assert(r == 0);
|
r = toku_logger_create(&logger); assert(r == 0);
|
||||||
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
||||||
// run recovery
|
// run recovery
|
||||||
|
|
|
@ -17,7 +17,7 @@ run_test(void) {
|
||||||
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ run_test(void) {
|
||||||
LSN firstbegin = ZERO_LSN;
|
LSN firstbegin = ZERO_LSN;
|
||||||
r = toku_log_begin_checkpoint(logger, &firstbegin, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &firstbegin, TRUE, 0); assert(r == 0);
|
||||||
assert(firstbegin.lsn != ZERO_LSN.lsn);
|
assert(firstbegin.lsn != ZERO_LSN.lsn);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, FALSE, firstbegin.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, FALSE, firstbegin.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_log_begin_checkpoint(logger, NULL, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, NULL, TRUE, 0); assert(r == 0);
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ run_test(void) {
|
||||||
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
r = toku_logger_open(TESTDIR, logger); assert(r == 0);
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
|
|
||||||
BYTESTRING iname = { strlen("missing_tokudb_file"), "missing_tokudb_file" };
|
BYTESTRING iname = { strlen("missing_tokudb_file"), "missing_tokudb_file" };
|
||||||
FILENUM filenum = {42};
|
FILENUM filenum = {42};
|
||||||
|
|
|
@ -21,7 +21,7 @@ run_test(void) {
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, hello); assert(r == 0);
|
||||||
BYTESTRING there = { strlen("there"), "there" };
|
BYTESTRING there = { strlen("there"), "there" };
|
||||||
r = toku_log_comment(logger, NULL, TRUE, 0, there); assert(r == 0);
|
r = toku_log_comment(logger, NULL, TRUE, 0, there); assert(r == 0);
|
||||||
|
|
|
@ -30,7 +30,7 @@ run_test(void) {
|
||||||
|
|
||||||
LSN beginlsn;
|
LSN beginlsn;
|
||||||
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
r = toku_log_begin_checkpoint(logger, &beginlsn, TRUE, 0); assert(r == 0);
|
||||||
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0); assert(r == 0);
|
r = toku_log_end_checkpoint(logger, NULL, TRUE, beginlsn.lsn, 0, 0, 0); assert(r == 0);
|
||||||
|
|
||||||
r = toku_logger_close(&logger); assert(r == 0);
|
r = toku_logger_close(&logger); assert(r == 0);
|
||||||
|
|
||||||
|
|
|
@ -1,106 +0,0 @@
|
||||||
/* Test bread_backwards to make sure it can read backwards even for large files. */
|
|
||||||
|
|
||||||
#include <toku_portability.h>
|
|
||||||
|
|
||||||
#include <fcntl.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <zlib.h>
|
|
||||||
|
|
||||||
#include "../brttypes.h"
|
|
||||||
#include "../bread.h"
|
|
||||||
#include "test.h"
|
|
||||||
|
|
||||||
#define FNAME "test1305.data"
|
|
||||||
|
|
||||||
// THe buffer size in units of 64-bit integers.
|
|
||||||
#define N_BIGINTS (1<<20)
|
|
||||||
#define BIGINT_SIZE (sizeof(u_int64_t))
|
|
||||||
// How big is the readback buffer (in 8-bit integers)?
|
|
||||||
#define READBACK_BUFSIZE (1<<20)
|
|
||||||
|
|
||||||
|
|
||||||
static void
|
|
||||||
test (u_int64_t fsize) {
|
|
||||||
unlink(FNAME);
|
|
||||||
// Create a file of size fsize. Fill it with 8-byte values which are integers, in order)
|
|
||||||
assert(fsize%(N_BIGINTS*sizeof(u_int64_t)) == 0); // Make sure the fsize is a multiple of the buffer size.
|
|
||||||
u_int64_t i = 0;
|
|
||||||
{
|
|
||||||
int fd = open(FNAME, O_CREAT+O_RDWR+O_BINARY, 0777);
|
|
||||||
assert(fd>=0);
|
|
||||||
static u_int64_t buf[N_BIGINTS]; //windows cannot handle this on the stack
|
|
||||||
static char compressed_buf[N_BIGINTS*2 + 1000]; // this is more than compressbound returns
|
|
||||||
uLongf compressed_len;
|
|
||||||
while (i*BIGINT_SIZE < fsize) {
|
|
||||||
if (verbose>0 && i % (1<<25) == 0) {
|
|
||||||
printf(" %s:test (%"PRIu64") forwards [%"PRIu64"%%]\n", __FILE__, fsize, 100*BIGINT_SIZE*((u_int64_t)i) / fsize);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
int j;
|
|
||||||
for (j=0; j<N_BIGINTS; j++) {
|
|
||||||
buf[j] = i++;
|
|
||||||
}
|
|
||||||
assert(sizeof(buf) == N_BIGINTS * BIGINT_SIZE);
|
|
||||||
{
|
|
||||||
compressed_len = sizeof(compressed_buf);
|
|
||||||
int r = compress2((Bytef*)compressed_buf, &compressed_len, (Bytef*)buf, sizeof(buf), 1);
|
|
||||||
assert(r==Z_OK);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(compressed_len);
|
|
||||||
toku_os_full_write(fd, &v, sizeof(v));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
toku_os_full_write(fd, compressed_buf, compressed_len);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(sizeof(buf));
|
|
||||||
toku_os_full_write(fd, &v, sizeof(v));
|
|
||||||
}
|
|
||||||
{
|
|
||||||
u_int32_t v = toku_htod32(compressed_len);
|
|
||||||
toku_os_full_write(fd, &v, sizeof(v));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
{ int r = close(fd); assert(r==0); }
|
|
||||||
}
|
|
||||||
assert(i*BIGINT_SIZE == fsize);
|
|
||||||
// Now read it all backward
|
|
||||||
{
|
|
||||||
int fd = open(FNAME, O_RDONLY+O_BINARY); assert(fd>=0);
|
|
||||||
BREAD br = create_bread_from_fd_initialize_at(fd);
|
|
||||||
while (bread_has_more(br)) {
|
|
||||||
if (verbose>0 && (fsize/BIGINT_SIZE - i) % (1<<25) == 0) {
|
|
||||||
printf(" %s:test (%"PRIu64") backwards [%"PRIu64"%%]\n", __FILE__, fsize, 100*BIGINT_SIZE*((u_int64_t)i) / fsize);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
assert(i>0);
|
|
||||||
i--;
|
|
||||||
u_int64_t storedi;
|
|
||||||
{ int r = bread_backwards(br, &storedi, sizeof(storedi)); assert(r==sizeof(storedi)); }
|
|
||||||
assert(storedi==i);
|
|
||||||
}
|
|
||||||
assert(i==0);
|
|
||||||
{ int r=close_bread_without_closing_fd(br); assert(r==0); }
|
|
||||||
{ int r=close(fd); assert(r==0); }
|
|
||||||
}
|
|
||||||
//printf("Did %" PRIu64 "\n", fsize);
|
|
||||||
//system("ls -l " FNAME);
|
|
||||||
unlink(FNAME);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
test_main (int argc, const char *argv[]) {
|
|
||||||
default_parse_args(argc, argv);
|
|
||||||
test(1LL<<23);
|
|
||||||
test(1LL<<30);
|
|
||||||
test(1LL<<31);
|
|
||||||
test(1LL<<32);
|
|
||||||
test(1LL<<33);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
|
@ -153,6 +153,9 @@ int create_logfiles() {
|
||||||
LSN lsn = {0};
|
LSN lsn = {0};
|
||||||
TXNID txnid = 0;
|
TXNID txnid = 0;
|
||||||
TXNID cp_txnid = 0;
|
TXNID cp_txnid = 0;
|
||||||
|
|
||||||
|
u_int32_t num_fassociate = 0;
|
||||||
|
u_int32_t num_xstillopen = 0;
|
||||||
|
|
||||||
bs_aname.len = 4; bs_aname.data="a.db";
|
bs_aname.len = 4; bs_aname.data="a.db";
|
||||||
bs_bname.len = 4; bs_bname.data="b.db";
|
bs_bname.len = 4; bs_bname.data="b.db";
|
||||||
|
@ -171,31 +174,40 @@ int create_logfiles() {
|
||||||
//fcreate 'F': lsn=2 txnid=1 filenum=0 fname={len=4 data="a.db"} mode=0777 treeflags=0 crc=18a3d525 len=49
|
//fcreate 'F': lsn=2 txnid=1 filenum=0 fname={len=4 data="a.db"} mode=0777 treeflags=0 crc=18a3d525 len=49
|
||||||
r = toku_log_fcreate(logger, &lsn, NO_FSYNC, txnid, fn_aname, bs_aname, 0x0777, 0, 0, bs_empty); assert(r==0);
|
r = toku_log_fcreate(logger, &lsn, NO_FSYNC, txnid, fn_aname, bs_aname, 0x0777, 0, 0, bs_empty); assert(r==0);
|
||||||
//commit 'C': lsn=3 txnid=1 crc=00001f1e len=29
|
//commit 'C': lsn=3 txnid=1 crc=00001f1e len=29
|
||||||
r = toku_log_commit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
r = toku_log_xcommit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
||||||
//xbegin 'b': lsn=4 parenttxnid=0 crc=00000a1f len=29
|
//xbegin 'b': lsn=4 parenttxnid=0 crc=00000a1f len=29
|
||||||
r = toku_log_xbegin(logger, &lsn, NO_FSYNC, 0); assert(r==0); txnid = lsn.lsn;
|
r = toku_log_xbegin(logger, &lsn, NO_FSYNC, 0); assert(r==0); txnid = lsn.lsn;
|
||||||
//fcreate 'F': lsn=5 txnid=4 filenum=1 fname={len=4 data="b.db"} mode=0777 treeflags=0 crc=14a47925 len=49
|
//fcreate 'F': lsn=5 txnid=4 filenum=1 fname={len=4 data="b.db"} mode=0777 treeflags=0 crc=14a47925 len=49
|
||||||
r = toku_log_fcreate(logger, &lsn, NO_FSYNC, txnid, fn_bname, bs_bname, 0x0777, 0, 0, bs_empty); assert(r==0);
|
r = toku_log_fcreate(logger, &lsn, NO_FSYNC, txnid, fn_bname, bs_bname, 0x0777, 0, 0, bs_empty); assert(r==0);
|
||||||
//commit 'C': lsn=6 txnid=4 crc=0000c11e len=29
|
//commit 'C': lsn=6 txnid=4 crc=0000c11e len=29
|
||||||
r = toku_log_commit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
r = toku_log_xcommit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
||||||
//xbegin 'b': lsn=7 parenttxnid=0 crc=0000f91f len=29
|
//xbegin 'b': lsn=7 parenttxnid=0 crc=0000f91f len=29
|
||||||
r = toku_log_xbegin(logger, &lsn, NO_FSYNC, 0); assert(r==0); txnid = lsn.lsn;
|
r = toku_log_xbegin(logger, &lsn, NO_FSYNC, 0); assert(r==0); txnid = lsn.lsn;
|
||||||
//enq_insert 'I': lsn=8 filenum=0 xid=7 key={len=2 data="a\000"} value={len=2 data="b\000"} crc=40b863e4 len=45
|
//enq_insert 'I': lsn=8 filenum=0 xid=7 key={len=2 data="a\000"} value={len=2 data="b\000"} crc=40b863e4 len=45
|
||||||
r = toku_log_enq_insert(logger, &lsn, NO_FSYNC, fn_aname, txnid, bs_a, bs_b); assert(r==0);
|
r = toku_log_enq_insert(logger, &lsn, NO_FSYNC, fn_aname, txnid, bs_a, bs_b); assert(r==0);
|
||||||
//begin_checkpoint 'x': lsn=9 timestamp=1251309957584197 crc=cd067878 len=29
|
//begin_checkpoint 'x': lsn=9 timestamp=1251309957584197 crc=cd067878 len=29
|
||||||
r = toku_log_begin_checkpoint(logger, &lsn, NO_FSYNC, 1251309957584197); assert(r==0); cp_txnid = lsn.lsn;
|
r = toku_log_begin_checkpoint(logger, &lsn, NO_FSYNC, 1251309957584197); assert(r==0); cp_txnid = lsn.lsn;
|
||||||
//xstillopen 's': lsn=10 txnid=7 parent=0 crc=00061816 len=37
|
|
||||||
r = toku_log_xstillopen(logger, &lsn, NO_FSYNC, txnid, 0); assert(r==0);
|
|
||||||
//fassociate 'f': lsn=11 filenum=1 fname={len=4 data="b.db"} crc=a7126035 len=33
|
//fassociate 'f': lsn=11 filenum=1 fname={len=4 data="b.db"} crc=a7126035 len=33
|
||||||
r = toku_log_fassociate(logger, &lsn, NO_FSYNC, fn_bname, 0, bs_bname); assert(r==0);
|
r = toku_log_fassociate(logger, &lsn, NO_FSYNC, fn_bname, 0, bs_bname); assert(r==0);
|
||||||
|
num_fassociate++;
|
||||||
//fassociate 'f': lsn=12 filenum=0 fname={len=4 data="a.db"} crc=a70c5f35 len=33
|
//fassociate 'f': lsn=12 filenum=0 fname={len=4 data="a.db"} crc=a70c5f35 len=33
|
||||||
r = toku_log_fassociate(logger, &lsn, NO_FSYNC, fn_aname, 0, bs_aname); assert(r==0);
|
r = toku_log_fassociate(logger, &lsn, NO_FSYNC, fn_aname, 0, bs_aname); assert(r==0);
|
||||||
|
num_fassociate++;
|
||||||
|
//xstillopen 's': lsn=10 txnid=7 parent=0 crc=00061816 len=37 <- obsolete
|
||||||
|
{
|
||||||
|
FILENUMS filenums = {0, NULL};
|
||||||
|
r = toku_log_xstillopen(logger, &lsn, NO_FSYNC, txnid, 0,
|
||||||
|
0, filenums, 0, 0, 0,
|
||||||
|
ROLLBACK_NONE, ROLLBACK_NONE, ROLLBACK_NONE);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
num_xstillopen++;
|
||||||
//end_checkpoint 'X': lsn=13 txnid=9 timestamp=1251309957586872 crc=cd285c30 len=37
|
//end_checkpoint 'X': lsn=13 txnid=9 timestamp=1251309957586872 crc=cd285c30 len=37
|
||||||
r = toku_log_end_checkpoint(logger, &lsn, FSYNC, cp_txnid, 1251309957586872); assert(r==0);
|
r = toku_log_end_checkpoint(logger, &lsn, FSYNC, cp_txnid, 1251309957586872, num_fassociate, num_xstillopen); assert(r==0);
|
||||||
//enq_insert 'I': lsn=14 filenum=1 xid=7 key={len=2 data="b\000"} value={len=2 data="a\000"} crc=40388be4 len=45
|
//enq_insert 'I': lsn=14 filenum=1 xid=7 key={len=2 data="b\000"} value={len=2 data="a\000"} crc=40388be4 len=45
|
||||||
r = toku_log_enq_insert(logger, &lsn, NO_FSYNC, fn_bname, txnid, bs_b, bs_a); assert(r==0);
|
r = toku_log_enq_insert(logger, &lsn, NO_FSYNC, fn_bname, txnid, bs_b, bs_a); assert(r==0);
|
||||||
//commit 'C': lsn=15 txnid=7 crc=00016d1e len=29
|
//commit 'C': lsn=15 txnid=7 crc=00016d1e len=29
|
||||||
r = toku_log_commit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
r = toku_log_xcommit(logger, &lsn, FSYNC, txnid); assert(r==0);
|
||||||
|
|
||||||
// close logger
|
// close logger
|
||||||
r = toku_logger_close(&logger); assert(r==0);
|
r = toku_logger_close(&logger); assert(r==0);
|
||||||
|
|
58
newbrt/txn.c
58
newbrt/txn.c
|
@ -12,18 +12,20 @@ int toku_txn_begin_txn (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER log
|
||||||
|
|
||||||
int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger, TXNID xid) {
|
int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger, TXNID xid) {
|
||||||
if (logger->is_panicked) return EINVAL;
|
if (logger->is_panicked) return EINVAL;
|
||||||
|
assert(logger->rollback_cachefile);
|
||||||
TAGMALLOC(TOKUTXN, result);
|
TAGMALLOC(TOKUTXN, result);
|
||||||
if (result==0)
|
if (result==0)
|
||||||
return errno;
|
return errno;
|
||||||
int r;
|
int r;
|
||||||
|
LSN first_lsn;
|
||||||
if (xid == 0) {
|
if (xid == 0) {
|
||||||
r = toku_log_xbegin(logger, &result->first_lsn, 0, parent_tokutxn ? parent_tokutxn->txnid64 : 0);
|
r = toku_log_xbegin(logger, &first_lsn, 0, parent_tokutxn ? parent_tokutxn->txnid64 : 0);
|
||||||
if (r!=0) goto died;
|
if (r!=0) goto died;
|
||||||
} else
|
} else
|
||||||
result->first_lsn.lsn = xid;
|
first_lsn.lsn = xid;
|
||||||
r = toku_omt_create(&result->open_brts);
|
r = toku_omt_create(&result->open_brts);
|
||||||
if (r!=0) goto died;
|
if (r!=0) goto died;
|
||||||
result->txnid64 = result->first_lsn.lsn;
|
result->txnid64 = first_lsn.lsn;
|
||||||
XIDS parent_xids;
|
XIDS parent_xids;
|
||||||
if (parent_tokutxn==NULL)
|
if (parent_tokutxn==NULL)
|
||||||
parent_xids = xids_get_root_xids();
|
parent_xids = xids_get_root_xids();
|
||||||
|
@ -33,13 +35,19 @@ int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGE
|
||||||
goto died;
|
goto died;
|
||||||
result->logger = logger;
|
result->logger = logger;
|
||||||
result->parent = parent_tokutxn;
|
result->parent = parent_tokutxn;
|
||||||
result->oldest_logentry = result->newest_logentry = 0;
|
|
||||||
|
|
||||||
result->rollentry_arena = memarena_create();
|
|
||||||
result->num_rollentries = 0;
|
result->num_rollentries = 0;
|
||||||
result->num_rollentries_processed = 0;
|
result->num_rollentries_processed = 0;
|
||||||
result->progress_poll_fun = NULL;
|
result->progress_poll_fun = NULL;
|
||||||
result->progress_poll_fun_extra = NULL;
|
result->progress_poll_fun_extra = NULL;
|
||||||
|
result->spilled_rollback_head = ROLLBACK_NONE;
|
||||||
|
result->spilled_rollback_tail = ROLLBACK_NONE;
|
||||||
|
result->spilled_rollback_head_hash = 0;
|
||||||
|
result->spilled_rollback_tail_hash = 0;
|
||||||
|
result->current_rollback = ROLLBACK_NONE;
|
||||||
|
result->current_rollback_hash = 0;
|
||||||
|
result->num_rollback_nodes = 0;
|
||||||
|
result->pinned_inprogress_rollback_log = NULL;
|
||||||
|
|
||||||
if (toku_omt_size(logger->live_txns) == 0) {
|
if (toku_omt_size(logger->live_txns) == 0) {
|
||||||
assert(logger->oldest_living_xid == TXNID_NONE_LIVING);
|
assert(logger->oldest_living_xid == TXNID_NONE_LIVING);
|
||||||
|
@ -59,13 +67,9 @@ int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGE
|
||||||
assert(idx > 0);
|
assert(idx > 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
result->rollentry_resident_bytecount=0;
|
|
||||||
result->rollentry_raw_count = 0;
|
result->rollentry_raw_count = 0;
|
||||||
result->rollentry_filename = 0;
|
|
||||||
result->rollentry_fd = -1;
|
|
||||||
result->rollentry_filesize = 0;
|
|
||||||
result->force_fsync_on_commit = FALSE;
|
result->force_fsync_on_commit = FALSE;
|
||||||
result->has_done_work = FALSE;
|
result->recovered_from_checkpoint = FALSE;
|
||||||
*tokutxn = result;
|
*tokutxn = result;
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
|
@ -75,6 +79,36 @@ died:
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Used on recovery to recover a transaction.
|
||||||
|
int
|
||||||
|
toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info) {
|
||||||
|
#define COPY_FROM_INFO(field) txn->field = info->field
|
||||||
|
COPY_FROM_INFO(rollentry_raw_count);
|
||||||
|
uint32_t i;
|
||||||
|
for (i = 0; i < info->num_brts; i++) {
|
||||||
|
BRT brt = info->open_brts[i];
|
||||||
|
int r = toku_txn_note_brt(txn, brt);
|
||||||
|
assert(r==0);
|
||||||
|
}
|
||||||
|
COPY_FROM_INFO(force_fsync_on_commit );
|
||||||
|
COPY_FROM_INFO(num_rollback_nodes);
|
||||||
|
COPY_FROM_INFO(num_rollentries);
|
||||||
|
|
||||||
|
CACHEFILE rollback_cachefile = txn->logger->rollback_cachefile;
|
||||||
|
|
||||||
|
COPY_FROM_INFO(spilled_rollback_head);
|
||||||
|
txn->spilled_rollback_head_hash = toku_cachetable_hash(rollback_cachefile,
|
||||||
|
txn->spilled_rollback_head);
|
||||||
|
COPY_FROM_INFO(spilled_rollback_tail);
|
||||||
|
txn->spilled_rollback_tail_hash = toku_cachetable_hash(rollback_cachefile,
|
||||||
|
txn->spilled_rollback_tail);
|
||||||
|
COPY_FROM_INFO(current_rollback);
|
||||||
|
txn->current_rollback_hash = toku_cachetable_hash(rollback_cachefile,
|
||||||
|
txn->current_rollback);
|
||||||
|
#undef COPY_FROM_INFO
|
||||||
|
txn->recovered_from_checkpoint = TRUE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Doesn't close the txn, just performs the commit operations.
|
// Doesn't close the txn, just performs the commit operations.
|
||||||
int toku_txn_commit_txn(TOKUTXN txn, int nosync, YIELDF yield, void *yieldv,
|
int toku_txn_commit_txn(TOKUTXN txn, int nosync, YIELDF yield, void *yieldv,
|
||||||
|
@ -92,13 +126,13 @@ int toku_txn_commit_with_lsn(TOKUTXN txn, int nosync, YIELDF yield, void *yieldv
|
||||||
// panic handled in log_commit
|
// panic handled in log_commit
|
||||||
|
|
||||||
//Child transactions do not actually 'commit'. They promote their changes to parent, so no need to fsync if this txn has a parent.
|
//Child transactions do not actually 'commit'. They promote their changes to parent, so no need to fsync if this txn has a parent.
|
||||||
int do_fsync = !txn->parent && (txn->force_fsync_on_commit || (!nosync && txn->has_done_work));
|
int do_fsync = !txn->parent && (txn->force_fsync_on_commit || (!nosync && txn->num_rollentries>0));
|
||||||
|
|
||||||
txn->progress_poll_fun = poll;
|
txn->progress_poll_fun = poll;
|
||||||
txn->progress_poll_fun_extra = poll_extra;
|
txn->progress_poll_fun_extra = poll_extra;
|
||||||
|
|
||||||
if (release_locks) release_locks(locks_thunk);
|
if (release_locks) release_locks(locks_thunk);
|
||||||
r = toku_log_commit(txn->logger, (LSN*)0, do_fsync, txn->txnid64); // exits holding neither of the tokulogger locks.
|
r = toku_log_xcommit(txn->logger, (LSN*)0, do_fsync, txn->txnid64); // exits holding neither of the tokulogger locks.
|
||||||
if (reacquire_locks) reacquire_locks(locks_thunk);
|
if (reacquire_locks) reacquire_locks(locks_thunk);
|
||||||
if (r!=0)
|
if (r!=0)
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
int toku_txn_begin_txn (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger);
|
int toku_txn_begin_txn (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger);
|
||||||
int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger, TXNID xid);
|
int toku_txn_begin_with_xid (TOKUTXN parent_tokutxn, TOKUTXN *tokutxn, TOKULOGGER logger, TXNID xid);
|
||||||
|
int toku_txn_load_txninfo (TOKUTXN txn, TXNINFO info);
|
||||||
|
|
||||||
int toku_txn_commit_txn (TOKUTXN txn, int nosync, YIELDF yield, void *yieldv,
|
int toku_txn_commit_txn (TOKUTXN txn, int nosync, YIELDF yield, void *yieldv,
|
||||||
TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
|
TXN_PROGRESS_POLL_FUNCTION poll, void *poll_extra,
|
||||||
|
|
|
@ -170,7 +170,6 @@ static inline void wbuf_DISKOFF (struct wbuf *w, DISKOFF off) {
|
||||||
static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
static inline void wbuf_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||||
wbuf_ulonglong(w, b.b);
|
wbuf_ulonglong(w, b.b);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
static inline void wbuf_nocrc_BLOCKNUM (struct wbuf *w, BLOCKNUM b) {
|
||||||
wbuf_nocrc_ulonglong(w, b.b);
|
wbuf_nocrc_ulonglong(w, b.b);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,7 @@ int singlex_child = 0; // Do a single transaction, but do all work with a child
|
||||||
int singlex = 0; // Do a single transaction
|
int singlex = 0; // Do a single transaction
|
||||||
int singlex_create = 0; // Create the db using the single transaction (only valid if singlex)
|
int singlex_create = 0; // Create the db using the single transaction (only valid if singlex)
|
||||||
int insert1first = 0; // insert 1 before doing the rest
|
int insert1first = 0; // insert 1 before doing the rest
|
||||||
int check_small_rolltmp = 0; // verify that the rollback logs are small (only valid if singlex)
|
int check_small_rollback = 0; // verify that the rollback logs are small (only valid if singlex)
|
||||||
int do_transactions = 0;
|
int do_transactions = 0;
|
||||||
int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used
|
int if_transactions_do_logging = DB_INIT_LOG; // set this to zero if we want no logging when transactions are used
|
||||||
int do_abort = 0;
|
int do_abort = 0;
|
||||||
|
@ -210,14 +210,14 @@ static void benchmark_shutdown (void) {
|
||||||
#endif
|
#endif
|
||||||
if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) {
|
if (do_transactions && singlex && !insert1first && (singlex_create || prelock)) {
|
||||||
#if defined(TOKUDB)
|
#if defined(TOKUDB)
|
||||||
//There should be a single 'truncate' in the rolltmp instead of many 'insert' entries.
|
//There should be a single 'truncate' in the rollback instead of many 'insert' entries.
|
||||||
struct txn_stat *s;
|
struct txn_stat *s;
|
||||||
r = tid->txn_stat(tid, &s);
|
r = tid->txn_stat(tid, &s);
|
||||||
assert(r==0);
|
assert(r==0);
|
||||||
//TODO: #1125 Always do the test after performance testing is done.
|
//TODO: #1125 Always do the test after performance testing is done.
|
||||||
if (singlex_child) fprintf(stderr, "SKIPPED 'small rolltmp' test for child txn\n");
|
if (singlex_child) fprintf(stderr, "SKIPPED 'small rollback' test for child txn\n");
|
||||||
else
|
else
|
||||||
assert(s->rolltmp_raw_count < 100); // gross test, not worth investigating details
|
assert(s->rollback_raw_count < 100); // gross test, not worth investigating details
|
||||||
free(s);
|
free(s);
|
||||||
//system("ls -l bench.tokudb");
|
//system("ls -l bench.tokudb");
|
||||||
#endif
|
#endif
|
||||||
|
@ -374,7 +374,7 @@ static int print_usage (const char *argv0) {
|
||||||
fprintf(stderr, " --singlex-child (implies -x) Run the whole job as a single transaction, do all work a child of that transaction.\n");
|
fprintf(stderr, " --singlex-child (implies -x) Run the whole job as a single transaction, do all work a child of that transaction.\n");
|
||||||
fprintf(stderr, " --finish-child-first Commit/abort child before doing so to parent (no effect if no child).\n");
|
fprintf(stderr, " --finish-child-first Commit/abort child before doing so to parent (no effect if no child).\n");
|
||||||
fprintf(stderr, " --singlex-create (implies --singlex) Create the file using the single transaction (Default is to use a different transaction to create.)\n");
|
fprintf(stderr, " --singlex-create (implies --singlex) Create the file using the single transaction (Default is to use a different transaction to create.)\n");
|
||||||
fprintf(stderr, " --check_small_rolltmp (Only valid in --singlex mode) Verify that very little data was saved in the rollback logs.\n");
|
fprintf(stderr, " --check_small_rollback (Only valid in --singlex mode) Verify that very little data was saved in the rollback logs.\n");
|
||||||
fprintf(stderr, " --prelock Prelock the database.\n");
|
fprintf(stderr, " --prelock Prelock the database.\n");
|
||||||
fprintf(stderr, " --prelockflag Prelock the database and send the DB_PRELOCKED_WRITE flag.\n");
|
fprintf(stderr, " --prelockflag Prelock the database and send the DB_PRELOCKED_WRITE flag.\n");
|
||||||
fprintf(stderr, " --abort Abort the singlex after the transaction is over. (Requires --singlex.)\n");
|
fprintf(stderr, " --abort Abort the singlex after the transaction is over. (Requires --singlex.)\n");
|
||||||
|
@ -463,8 +463,8 @@ int main (int argc, const char *argv[]) {
|
||||||
singlex = 1;
|
singlex = 1;
|
||||||
} else if (strcmp(arg, "--insert1first") == 0) {
|
} else if (strcmp(arg, "--insert1first") == 0) {
|
||||||
insert1first = 1;
|
insert1first = 1;
|
||||||
} else if (strcmp(arg, "--check_small_rolltmp") == 0) {
|
} else if (strcmp(arg, "--check_small_rollback") == 0) {
|
||||||
check_small_rolltmp = 1;
|
check_small_rollback = 1;
|
||||||
} else if (strcmp(arg, "--xcount") == 0) {
|
} else if (strcmp(arg, "--xcount") == 0) {
|
||||||
if (i+1 >= argc) return print_usage(argv[0]);
|
if (i+1 >= argc) return print_usage(argv[0]);
|
||||||
items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
|
items_per_transaction = strtoll(argv[++i], &endptr, 10); assert(*endptr == 0);
|
||||||
|
@ -560,13 +560,13 @@ int main (int argc, const char *argv[]) {
|
||||||
printf("insertions of %d per batch%s\n", items_per_iteration, do_transactions ? " (with transactions)" : "");
|
printf("insertions of %d per batch%s\n", items_per_iteration, do_transactions ? " (with transactions)" : "");
|
||||||
}
|
}
|
||||||
#if !defined TOKUDB
|
#if !defined TOKUDB
|
||||||
if (check_small_rolltmp) {
|
if (check_small_rollback) {
|
||||||
fprintf(stderr, "--check_small_rolltmp only works on the TokuDB (not BDB)\n");
|
fprintf(stderr, "--check_small_rollback only works on the TokuDB (not BDB)\n");
|
||||||
return print_usage(argv[0]);
|
return print_usage(argv[0]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
if (check_small_rolltmp && !singlex) {
|
if (check_small_rollback && !singlex) {
|
||||||
fprintf(stderr, "--check_small_rolltmp requires --singlex\n");
|
fprintf(stderr, "--check_small_rollback requires --singlex\n");
|
||||||
return print_usage(argv[0]);
|
return print_usage(argv[0]);
|
||||||
}
|
}
|
||||||
benchmark_setup();
|
benchmark_setup();
|
||||||
|
|
|
@ -332,6 +332,7 @@ endif
|
||||||
mkdir dir.$*.c.tdb.recover && \
|
mkdir dir.$*.c.tdb.recover && \
|
||||||
cp dir.$*.c.tdb/tokudb.directory dir.$*.c.tdb.recover/ && \
|
cp dir.$*.c.tdb/tokudb.directory dir.$*.c.tdb.recover/ && \
|
||||||
cp dir.$*.c.tdb/tokudb.environment dir.$*.c.tdb.recover/ && \
|
cp dir.$*.c.tdb/tokudb.environment dir.$*.c.tdb.recover/ && \
|
||||||
|
cp dir.$*.c.tdb/tokudb.rollback dir.$*.c.tdb.recover/ && \
|
||||||
cp dir.$*.c.tdb/*.tokulog dir.$*.c.tdb.recover/ && \
|
cp dir.$*.c.tdb/*.tokulog dir.$*.c.tdb.recover/ && \
|
||||||
echo doing recovery &&\
|
echo doing recovery &&\
|
||||||
$(VGRIND) ../../newbrt/tdb-recover dir.$*.c.tdb.recover dir.$*.c.tdb.recover && \
|
$(VGRIND) ../../newbrt/tdb-recover dir.$*.c.tdb.recover dir.$*.c.tdb.recover && \
|
||||||
|
|
|
@ -60,13 +60,13 @@ static void do_1381_maybe_lock (int do_table_lock, u_int64_t *raw_count) {
|
||||||
}
|
}
|
||||||
|
|
||||||
r = txn->txn_stat(txn, &s2); CKERR(r);
|
r = txn->txn_stat(txn, &s2); CKERR(r);
|
||||||
//printf("Raw counts = %" PRId64 ", %" PRId64 "\n", s1->rolltmp_raw_count, s2->rolltmp_raw_count);
|
//printf("Raw counts = %" PRId64 ", %" PRId64 "\n", s1->rollback_raw_count, s2->rollback_raw_count);
|
||||||
|
|
||||||
*raw_count = s2->rolltmp_raw_count - s1->rolltmp_raw_count;
|
*raw_count = s2->rollback_raw_count - s1->rollback_raw_count;
|
||||||
if (do_table_lock) {
|
if (do_table_lock) {
|
||||||
assert(s1->rolltmp_raw_count == s2->rolltmp_raw_count);
|
assert(s1->rollback_raw_count == s2->rollback_raw_count);
|
||||||
} else {
|
} else {
|
||||||
assert(s1->rolltmp_raw_count < s2->rolltmp_raw_count);
|
assert(s1->rollback_raw_count < s2->rollback_raw_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
toku_free(s1); toku_free(s2);
|
toku_free(s1); toku_free(s2);
|
||||||
|
|
|
@ -91,7 +91,7 @@ do_db_work(void) {
|
||||||
}
|
}
|
||||||
if (did_fail) goto shutdown2;
|
if (did_fail) goto shutdown2;
|
||||||
|
|
||||||
// Put an extra item in so that the rolltmp file will be created.
|
// Put an extra item in
|
||||||
r=env->txn_begin(env, 0, &tid, 0); assert(r==0);
|
r=env->txn_begin(env, 0, &tid, 0); assert(r==0);
|
||||||
r=db->put(db, tid, dbt_init(&key, "a", 2), dbt_init(&data, "b", 2), 0); DOERR(r);
|
r=db->put(db, tid, dbt_init(&key, "a", 2), dbt_init(&data, "b", 2), 0); DOERR(r);
|
||||||
if (did_fail) {
|
if (did_fail) {
|
||||||
|
|
|
@ -21,7 +21,7 @@ test_stat64 (unsigned int N) {
|
||||||
DB_TXN *txn;
|
DB_TXN *txn;
|
||||||
r = db_env_create(&env, 0); CKERR(r);
|
r = db_env_create(&env, 0); CKERR(r);
|
||||||
|
|
||||||
r = env->set_cachesize(env, 0, 10*1000000, 1);
|
r = env->set_cachesize(env, 0, 20*1000000, 1);
|
||||||
r = env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
|
r = env->open(env, ENVDIR, DB_INIT_LOCK|DB_INIT_LOG|DB_INIT_MPOOL|DB_INIT_TXN|DB_CREATE|DB_PRIVATE, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
|
||||||
r = db_create(&db, env, 0); CKERR(r);
|
r = db_create(&db, env, 0); CKERR(r);
|
||||||
|
|
||||||
|
@ -38,6 +38,10 @@ test_stat64 (unsigned int N) {
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
u_int64_t dsize=0;
|
u_int64_t dsize=0;
|
||||||
for (i=0; i<N; i++) {
|
for (i=0; i<N; i++) {
|
||||||
|
if (verbose>1 && i % (1<<14) == 0) {
|
||||||
|
printf("%s(total=%u) inserted %u so far\n", __FILE__, N, i);
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
char hello[30], there[30];
|
char hello[30], there[30];
|
||||||
snprintf(hello, sizeof(hello), "hello%8d", i);
|
snprintf(hello, sizeof(hello), "hello%8d", i);
|
||||||
snprintf(there, sizeof(there), "there%d", i);
|
snprintf(there, sizeof(there), "there%d", i);
|
||||||
|
|
|
@ -1,85 +0,0 @@
|
||||||
/* -*- mode: C; c-basic-offset: 4 -*- */
|
|
||||||
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
|
|
||||||
#include "test.h"
|
|
||||||
/* Test for #1324. Make sure rolltmp files are removed. */
|
|
||||||
#include <db.h>
|
|
||||||
#include <fcntl.h>
|
|
||||||
|
|
||||||
#ifndef USE_TDB
|
|
||||||
#error This test only works for TokuDB.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void mkfile (const char *fname) {
|
|
||||||
mode_t mode = S_IRWXU|S_IRWXG|S_IRWXO;
|
|
||||||
int fd = open(fname, O_WRONLY | O_CREAT | O_BINARY, mode);
|
|
||||||
if (fd<0) perror("opening");
|
|
||||||
assert(fd>=0);
|
|
||||||
toku_os_full_write(fd, "hello\n", 6);
|
|
||||||
int r = close(fd); assert(r==0);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
do_1324 (int moreflags)
|
|
||||||
{
|
|
||||||
const char fname[] = ENVDIR "/__tokudb_rolltmp.12345";
|
|
||||||
const char fnamekeep[] = ENVDIR "/keepme";
|
|
||||||
|
|
||||||
system("rm -rf " ENVDIR);
|
|
||||||
toku_os_mkdir(ENVDIR, S_IRWXU+S_IRWXG+S_IRWXO);
|
|
||||||
mkfile(fname);
|
|
||||||
mkfile(fnamekeep);
|
|
||||||
|
|
||||||
const int envflags = DB_CREATE|DB_INIT_MPOOL|DB_INIT_TXN|DB_INIT_LOCK |DB_THREAD |DB_PRIVATE | DB_RECOVER | moreflags;
|
|
||||||
|
|
||||||
{
|
|
||||||
DB_ENV *env;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
if (moreflags & DB_INIT_LOG) {
|
|
||||||
// create the log
|
|
||||||
r = db_env_create(&env, 0); CKERR(r);
|
|
||||||
r = env->open(env, ENVDIR, envflags & ~DB_RECOVER, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
|
|
||||||
r = env->close(env, 0); CKERR(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
r = db_env_create(&env, 0); CKERR(r);
|
|
||||||
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
|
|
||||||
|
|
||||||
{
|
|
||||||
toku_struct_stat sbuf;
|
|
||||||
r = toku_stat(fname, &sbuf);
|
|
||||||
if (r==0) {
|
|
||||||
fprintf(stderr, "The rolltmp file %s should have been deleted, but was not.\n", fname);
|
|
||||||
}
|
|
||||||
assert(r!=0);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
toku_struct_stat sbuf;
|
|
||||||
r = toku_stat(fnamekeep, &sbuf);
|
|
||||||
if (r!=0) {
|
|
||||||
fprintf(stderr, "The keepme file %s should NOT have been deleted, but was not.\n", fnamekeep);
|
|
||||||
}
|
|
||||||
assert(r==0);
|
|
||||||
}
|
|
||||||
|
|
||||||
r = env->close(env, 0); CKERR(r);
|
|
||||||
}
|
|
||||||
system("ls -l " ENVDIR);
|
|
||||||
// make sure we can open the env again.
|
|
||||||
{
|
|
||||||
DB_ENV *env;
|
|
||||||
int r;
|
|
||||||
r = db_env_create(&env, 0); CKERR(r);
|
|
||||||
r = env->open(env, ENVDIR, envflags, S_IRWXU+S_IRWXG+S_IRWXO); CKERR(r);
|
|
||||||
r = env->close(env, 0); CKERR(r);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
test_main (int argc, char *const argv[])
|
|
||||||
{
|
|
||||||
parse_args(argc, argv);
|
|
||||||
do_1324(DB_INIT_LOG);
|
|
||||||
do_1324(0);
|
|
||||||
return 0;
|
|
||||||
}
|
|
70
src/ydb.c
70
src/ydb.c
|
@ -484,13 +484,6 @@ env_setup_real_log_dir(DB_ENV *env) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int delete_rolltmp_files(DB_ENV *env) {
|
|
||||||
assert(env->i->real_data_dir);
|
|
||||||
assert(env->i->real_log_dir);
|
|
||||||
int r = tokudb_recover_delete_rolltmp_files(env->i->real_data_dir, env->i->real_log_dir);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
ydb_do_recovery (DB_ENV *env) {
|
ydb_do_recovery (DB_ENV *env) {
|
||||||
assert(env->i->real_log_dir);
|
assert(env->i->real_log_dir);
|
||||||
|
@ -600,9 +593,9 @@ ydb_recover_log_exists(DB_ENV *env) {
|
||||||
// Set *valid_newenv if creating a new environment (all files missing).
|
// Set *valid_newenv if creating a new environment (all files missing).
|
||||||
// (Note, if special dictionaries exist, then they were created transactionally and log should exist.)
|
// (Note, if special dictionaries exist, then they were created transactionally and log should exist.)
|
||||||
static int
|
static int
|
||||||
validate_env(DB_ENV * env, BOOL * valid_newenv) {
|
validate_env(DB_ENV * env, BOOL * valid_newenv, BOOL need_rollback_cachefile) {
|
||||||
int r;
|
int r;
|
||||||
BOOL expect_newenv; // set true if we expect to create a new env
|
BOOL expect_newenv = FALSE; // set true if we expect to create a new env
|
||||||
toku_struct_stat buf;
|
toku_struct_stat buf;
|
||||||
char* path = NULL;
|
char* path = NULL;
|
||||||
|
|
||||||
|
@ -610,11 +603,12 @@ validate_env(DB_ENV * env, BOOL * valid_newenv) {
|
||||||
path = toku_construct_full_name(2, env->i->dir, environmentdictionary);
|
path = toku_construct_full_name(2, env->i->dir, environmentdictionary);
|
||||||
assert(path);
|
assert(path);
|
||||||
r = toku_stat(path, &buf);
|
r = toku_stat(path, &buf);
|
||||||
|
int stat_errno = errno;
|
||||||
toku_free(path);
|
toku_free(path);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
expect_newenv = FALSE; // persistent info exists
|
expect_newenv = FALSE; // persistent info exists
|
||||||
}
|
}
|
||||||
else if (errno == ENOENT) {
|
else if (stat_errno == ENOENT) {
|
||||||
expect_newenv = TRUE;
|
expect_newenv = TRUE;
|
||||||
r = 0;
|
r = 0;
|
||||||
}
|
}
|
||||||
|
@ -623,17 +617,41 @@ validate_env(DB_ENV * env, BOOL * valid_newenv) {
|
||||||
assert(r);
|
assert(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test for rollback cachefile
|
||||||
|
if (r == 0 && need_rollback_cachefile) {
|
||||||
|
path = toku_construct_full_name(2, env->i->dir, ROLLBACK_CACHEFILE_NAME);
|
||||||
|
assert(path);
|
||||||
|
r = toku_stat(path, &buf);
|
||||||
|
stat_errno = errno;
|
||||||
|
toku_free(path);
|
||||||
|
if (r == 0) {
|
||||||
|
if (expect_newenv) // rollback cachefile exists, but persistent env is missing
|
||||||
|
r = toku_ydb_do_error(env, ENOENT, "Persistent environment is missing\n");
|
||||||
|
}
|
||||||
|
else if (stat_errno == ENOENT) {
|
||||||
|
if (!expect_newenv) // rollback cachefile is missing but persistent env exists
|
||||||
|
r = toku_ydb_do_error(env, ENOENT, "rollback cachefile directory is missing\n");
|
||||||
|
else
|
||||||
|
r = 0; // both rollback cachefile and persistent env are missing
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
r = toku_ydb_do_error(env, errno, "Unable to access rollback cachefile\n");
|
||||||
|
assert(r);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Test for fileops directory
|
// Test for fileops directory
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
path = toku_construct_full_name(2, env->i->dir, fileopsdirectory);
|
path = toku_construct_full_name(2, env->i->dir, fileopsdirectory);
|
||||||
assert(path);
|
assert(path);
|
||||||
r = toku_stat(path, &buf);
|
r = toku_stat(path, &buf);
|
||||||
|
stat_errno = errno;
|
||||||
toku_free(path);
|
toku_free(path);
|
||||||
if (r == 0) {
|
if (r == 0) {
|
||||||
if (expect_newenv) // fileops directory exists, but persistent env is missing
|
if (expect_newenv) // fileops directory exists, but persistent env is missing
|
||||||
r = toku_ydb_do_error(env, ENOENT, "Persistent environment is missing\n");
|
r = toku_ydb_do_error(env, ENOENT, "Persistent environment is missing\n");
|
||||||
}
|
}
|
||||||
else if (errno == ENOENT) {
|
else if (stat_errno == ENOENT) {
|
||||||
if (!expect_newenv) // fileops directory is missing but persistent env exists
|
if (!expect_newenv) // fileops directory is missing but persistent env exists
|
||||||
r = toku_ydb_do_error(env, ENOENT, "Fileops directory is missing\n");
|
r = toku_ydb_do_error(env, ENOENT, "Fileops directory is missing\n");
|
||||||
else
|
else
|
||||||
|
@ -739,16 +757,16 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
|
||||||
env_setup_real_data_dir(env);
|
env_setup_real_data_dir(env);
|
||||||
env_setup_real_log_dir(env);
|
env_setup_real_log_dir(env);
|
||||||
|
|
||||||
r = validate_env(env, &newenv); // make sure that environment is either new or complete
|
BOOL need_rollback_cachefile = FALSE;
|
||||||
|
if (flags & (DB_INIT_TXN | DB_INIT_LOG)) {
|
||||||
|
need_rollback_cachefile = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
r = validate_env(env, &newenv, need_rollback_cachefile); // make sure that environment is either new or complete
|
||||||
if (r != 0) return r;
|
if (r != 0) return r;
|
||||||
|
|
||||||
unused_flags &= ~DB_INIT_TXN & ~DB_INIT_LOG;
|
unused_flags &= ~DB_INIT_TXN & ~DB_INIT_LOG;
|
||||||
|
|
||||||
if (flags & DB_INIT_TXN) {
|
|
||||||
r = delete_rolltmp_files(env);
|
|
||||||
if (r != 0) return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
// do recovery only if there exists a log and recovery is requested
|
// do recovery only if there exists a log and recovery is requested
|
||||||
// otherwise, a log is created when the logger is opened later
|
// otherwise, a log is created when the logger is opened later
|
||||||
if (!newenv) {
|
if (!newenv) {
|
||||||
|
@ -805,6 +823,8 @@ toku_env_open(DB_ENV * env, const char *home, u_int32_t flags, int mode) {
|
||||||
assert (using_txns);
|
assert (using_txns);
|
||||||
toku_logger_set_cachetable(env->i->logger, env->i->cachetable);
|
toku_logger_set_cachetable(env->i->logger, env->i->cachetable);
|
||||||
toku_logger_set_remove_finalize_callback(env->i->logger, finalize_file_removal, env->i->ltm);
|
toku_logger_set_remove_finalize_callback(env->i->logger, finalize_file_removal, env->i->ltm);
|
||||||
|
r = toku_logger_open_rollback(env->i->logger, env->i->cachetable, newenv);
|
||||||
|
assert(r==0);
|
||||||
}
|
}
|
||||||
|
|
||||||
DB_TXN *txn=NULL;
|
DB_TXN *txn=NULL;
|
||||||
|
@ -894,7 +914,6 @@ static int toku_env_close(DB_ENV * env, u_int32_t flags) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (env->i->cachetable) {
|
if (env->i->cachetable) {
|
||||||
toku_ydb_unlock(); // ydb lock must not be held when shutting down minicron
|
toku_ydb_unlock(); // ydb lock must not be held when shutting down minicron
|
||||||
toku_cachetable_minicron_shutdown(env->i->cachetable);
|
toku_cachetable_minicron_shutdown(env->i->cachetable);
|
||||||
|
@ -907,6 +926,17 @@ static int toku_env_close(DB_ENV * env, u_int32_t flags) {
|
||||||
toku_ydb_do_error(env, r, "Cannot close environment (error during checkpoint)\n");
|
toku_ydb_do_error(env, r, "Cannot close environment (error during checkpoint)\n");
|
||||||
goto panic_and_quit_early;
|
goto panic_and_quit_early;
|
||||||
}
|
}
|
||||||
|
r = toku_logger_close_rollback(env->i->logger, FALSE);
|
||||||
|
if (r) {
|
||||||
|
toku_ydb_do_error(env, r, "Cannot close environment (error during closing rollback cachefile)\n");
|
||||||
|
goto panic_and_quit_early;
|
||||||
|
}
|
||||||
|
//Do a second checkpoint now that the rollback cachefile is closed.
|
||||||
|
r = toku_checkpoint(env->i->cachetable, env->i->logger, NULL, NULL, NULL, NULL);
|
||||||
|
if (r) {
|
||||||
|
toku_ydb_do_error(env, r, "Cannot close environment (error during checkpoint)\n");
|
||||||
|
goto panic_and_quit_early;
|
||||||
|
}
|
||||||
r = toku_logger_shutdown(env->i->logger);
|
r = toku_logger_shutdown(env->i->logger);
|
||||||
if (r) {
|
if (r) {
|
||||||
toku_ydb_do_error(env, r, "Cannot close environment (error during logger shutdown)\n");
|
toku_ydb_do_error(env, r, "Cannot close environment (error during logger shutdown)\n");
|
||||||
|
@ -1954,7 +1984,7 @@ static u_int32_t locked_txn_id(DB_TXN *txn) {
|
||||||
|
|
||||||
static int toku_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
|
static int toku_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
|
||||||
XMALLOC(*txn_stat);
|
XMALLOC(*txn_stat);
|
||||||
return toku_logger_txn_rolltmp_raw_count(db_txn_struct_i(txn)->tokutxn, &(*txn_stat)->rolltmp_raw_count);
|
return toku_logger_txn_rollback_raw_count(db_txn_struct_i(txn)->tokutxn, &(*txn_stat)->rollback_raw_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int locked_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
|
static int locked_txn_stat (DB_TXN *txn, struct txn_stat **txn_stat) {
|
||||||
|
@ -5018,7 +5048,7 @@ int toku_db_pre_acquire_table_lock(DB *db, DB_TXN *txn) {
|
||||||
toku_lt_neg_infinity, toku_lt_neg_infinity,
|
toku_lt_neg_infinity, toku_lt_neg_infinity,
|
||||||
toku_lt_infinity, toku_lt_infinity);
|
toku_lt_infinity, toku_lt_infinity);
|
||||||
if (r==0) {
|
if (r==0) {
|
||||||
r = toku_brt_note_table_lock(db->i->brt, db_txn_struct_i(txn)->tokutxn); // tell the BRT layer that the table is locked (so that it can reduce the amount of rollback (rolltmp) data.
|
r = toku_brt_note_table_lock(db->i->brt, db_txn_struct_i(txn)->tokutxn, FALSE); // tell the BRT layer that the table is locked (so that it can reduce the amount of rollback data.
|
||||||
}
|
}
|
||||||
|
|
||||||
return r;
|
return r;
|
||||||
|
|
|
@ -21,7 +21,8 @@ enum typ_tag { TYP_BRTNODE = 0xdead0001,
|
||||||
TYP_GPMA,
|
TYP_GPMA,
|
||||||
TYP_TOKULOGGER,
|
TYP_TOKULOGGER,
|
||||||
TYP_TOKUTXN,
|
TYP_TOKUTXN,
|
||||||
TYP_LEAFENTRY
|
TYP_LEAFENTRY,
|
||||||
|
TYP_ROLLBACK_LOG_NODE
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Everything should call toku_malloc() instead of malloc(), and toku_calloc() instead of calloc() */
|
/* Everything should call toku_malloc() instead of malloc(), and toku_calloc() instead of calloc() */
|
||||||
|
|
|
@ -78,7 +78,7 @@ static inline void toku_list_move(struct toku_list *newhead, struct toku_list *o
|
||||||
// Note: Need the extra level of parens in these macros so that
|
// Note: Need the extra level of parens in these macros so that
|
||||||
// toku_list_struct(h, foo, b)->zot
|
// toku_list_struct(h, foo, b)->zot
|
||||||
// will work right. Otherwise the type cast will try to include ->zot, and it will be all messed up.
|
// will work right. Otherwise the type cast will try to include ->zot, and it will be all messed up.
|
||||||
#if defined(__GNUC__) && __GNUC__ >= 4
|
#if (defined(__GNUC__) && __GNUC__ >= 4) || defined(__builtin_offsetof)
|
||||||
#define toku_list_struct(p, t, f) ((t*)((char*)(p) - __builtin_offsetof(t, f)))
|
#define toku_list_struct(p, t, f) ((t*)((char*)(p) - __builtin_offsetof(t, f)))
|
||||||
#else
|
#else
|
||||||
#define toku_list_struct(p, t, f) ((t*)((char*)(p) - ((char*)&((t*)0)->f)))
|
#define toku_list_struct(p, t, f) ((t*)((char*)(p) - ((char*)&((t*)0)->f)))
|
||||||
|
|
|
@ -7,6 +7,7 @@ extern "C" {
|
||||||
|
|
||||||
#include "toku_os.h"
|
#include "toku_os.h"
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
//These are functions that really exist in windows but are named
|
//These are functions that really exist in windows but are named
|
||||||
//something else.
|
//something else.
|
||||||
|
@ -82,6 +83,9 @@ int mkstemp(char * ttemplate);
|
||||||
|
|
||||||
toku_off_t ftello(FILE *stream);
|
toku_off_t ftello(FILE *stream);
|
||||||
|
|
||||||
|
#define __builtin_offsetof(type, member) offsetof(type, member)
|
||||||
|
|
||||||
|
|
||||||
#if defined(__cplusplus)
|
#if defined(__cplusplus)
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Reference in a new issue