mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 23:04:20 +01:00
free diskblocks. Addresses #1195.
git-svn-id: file:///svn/toku/tokudb.1195@7679 c7de825b-a66e-492c-adef-691d508d4ae1
This commit is contained in:
parent
e9a8a72193
commit
6b654211c0
6 changed files with 78 additions and 22 deletions
|
@ -115,8 +115,8 @@ struct remembered_hash {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct block_translation_pair {
|
struct block_translation_pair {
|
||||||
DISKOFF diskoff;
|
DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM.
|
||||||
DISKOFF size;
|
DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free
|
||||||
};
|
};
|
||||||
|
|
||||||
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
|
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
|
||||||
|
|
|
@ -294,7 +294,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
|
||||||
assert(r==Z_OK);
|
assert(r==Z_OK);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (0) printf("Size before compressing %u, after compression %lu\n", calculated_size-uncompressed_magic_len, compressed_len);
|
if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %lu\n", blocknum.b, calculated_size-uncompressed_magic_len, compressed_len);
|
||||||
|
|
||||||
((int32_t*)(compressed_buf+uncompressed_magic_len))[0] = htonl(compressed_len);
|
((int32_t*)(compressed_buf+uncompressed_magic_len))[0] = htonl(compressed_len);
|
||||||
((int32_t*)(compressed_buf+uncompressed_magic_len))[1] = htonl(uncompressed_len);
|
((int32_t*)(compressed_buf+uncompressed_magic_len))[1] = htonl(uncompressed_len);
|
||||||
|
@ -308,18 +308,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
|
||||||
//printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, h->translated_blocknum_limit, blocknum.b);
|
//printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, h->translated_blocknum_limit, blocknum.b);
|
||||||
//printf("%s:%d allocator=%p\n", __FILE__, __LINE__, h->block_allocator);
|
//printf("%s:%d allocator=%p\n", __FILE__, __LINE__, h->block_allocator);
|
||||||
//printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation);
|
//printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation);
|
||||||
if (h->translated_blocknum_limit <= (u_int64_t)blocknum.b) {
|
extend_block_translation(blocknum, h);
|
||||||
if (h->block_translation == 0) assert(h->translated_blocknum_limit==0);
|
|
||||||
u_int64_t new_limit = blocknum.b + 1;
|
|
||||||
u_int64_t old_limit = h->translated_blocknum_limit;
|
|
||||||
u_int64_t j;
|
|
||||||
XREALLOC_N(new_limit, h->block_translation);
|
|
||||||
for (j=old_limit; j<new_limit; j++) {
|
|
||||||
h->block_translation[j].diskoff = 0;
|
|
||||||
h->block_translation[j].size = 0;
|
|
||||||
}
|
|
||||||
h->translated_blocknum_limit = new_limit;
|
|
||||||
}
|
|
||||||
if (h->block_translation[blocknum.b].size > 0) {
|
if (h->block_translation[blocknum.b].size > 0) {
|
||||||
block_allocator_free_block(h->block_allocator, h->block_translation[blocknum.b].diskoff);
|
block_allocator_free_block(h->block_allocator, h->block_translation[blocknum.b].diskoff);
|
||||||
h->block_translation[blocknum.b].diskoff = 0;
|
h->block_translation[blocknum.b].diskoff = 0;
|
||||||
|
@ -344,6 +333,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
|
||||||
}
|
}
|
||||||
|
|
||||||
int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) {
|
int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) {
|
||||||
|
if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b);
|
||||||
assert(0 <= blocknum.b && (u_int64_t)blocknum.b < h->translated_blocknum_limit);
|
assert(0 <= blocknum.b && (u_int64_t)blocknum.b < h->translated_blocknum_limit);
|
||||||
DISKOFF offset = h->block_translation[blocknum.b].diskoff;
|
DISKOFF offset = h->block_translation[blocknum.b].diskoff;
|
||||||
TAGMALLOC(BRTNODE, result);
|
TAGMALLOC(BRTNODE, result);
|
||||||
|
@ -371,8 +361,8 @@ int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash
|
||||||
compressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len]));
|
compressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len]));
|
||||||
if (compressed_size<=0 || compressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
|
if (compressed_size<=0 || compressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
|
||||||
uncompressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len+4]));
|
uncompressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len+4]));
|
||||||
|
if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size);
|
||||||
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
|
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
|
||||||
if (0) printf("Compressed size = %u, uncompressed size=%u\n", compressed_size, uncompressed_size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//printf("%s:%d serializing %" PRIu64 " size=%d\n", __FILE__, __LINE__, blocknum.b, uncompressed_size);
|
//printf("%s:%d serializing %" PRIu64 " size=%d\n", __FILE__, __LINE__, blocknum.b, uncompressed_size);
|
||||||
|
|
64
newbrt/brt.c
64
newbrt/brt.c
|
@ -546,13 +546,59 @@ void toku_brtheader_free (struct brt_header *h) {
|
||||||
toku_free(h);
|
toku_free(h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
extend_block_translation (BLOCKNUM blocknum, struct brt_header *h)
|
||||||
|
// Effect: Record a block translation. This means extending the translation table, and setting the diskoff and size to zero in any of the unused spots.
|
||||||
|
{
|
||||||
|
if (h->translated_blocknum_limit <= (u_int64_t)blocknum.b) {
|
||||||
|
if (h->block_translation == 0) assert(h->translated_blocknum_limit==0);
|
||||||
|
u_int64_t new_limit = blocknum.b + 1;
|
||||||
|
u_int64_t old_limit = h->translated_blocknum_limit;
|
||||||
|
u_int64_t j;
|
||||||
|
XREALLOC_N(new_limit, h->block_translation);
|
||||||
|
for (j=old_limit; j<new_limit; j++) {
|
||||||
|
h->block_translation[j].diskoff = 0;
|
||||||
|
h->block_translation[j].size = 0;
|
||||||
|
}
|
||||||
|
h->translated_blocknum_limit = new_limit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const DISKOFF diskoff_is_null = (DISKOFF)-1; // in a freelist, this indicates end of list
|
||||||
|
const DISKOFF size_is_free = (DISKOFF)-1;
|
||||||
|
|
||||||
static int
|
static int
|
||||||
allocate_diskblocknumber (BLOCKNUM *res, BRT brt, TOKULOGGER logger __attribute__((__unused__))) {
|
allocate_diskblocknumber (BLOCKNUM *res, BRT brt, TOKULOGGER logger __attribute__((__unused__))) {
|
||||||
assert(brt->h->free_blocks.b == -1); // no blocks in the free list
|
BLOCKNUM result;
|
||||||
BLOCKNUM result = brt->h->unused_blocks;
|
if (brt->h->free_blocks.b == diskoff_is_null) {
|
||||||
|
// no blocks in the free list
|
||||||
|
result = brt->h->unused_blocks;
|
||||||
brt->h->unused_blocks.b++;
|
brt->h->unused_blocks.b++;
|
||||||
brt->h->dirty = 1;
|
} else {
|
||||||
|
result = brt->h->free_blocks;
|
||||||
|
assert(brt->h->block_translation[result.b].size = size_is_free);
|
||||||
|
brt->h->block_translation[result.b].size = 0;
|
||||||
|
brt->h->free_blocks.b = brt->h->block_translation[result.b].diskoff; // pop the freelist
|
||||||
|
}
|
||||||
|
assert(result.b>0);
|
||||||
*res = result;
|
*res = result;
|
||||||
|
brt->h->dirty = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
free_diskblocknumber (BLOCKNUM *b, struct brt_header *h, TOKULOGGER logger __attribute__((__unused__)))
|
||||||
|
// Effect: Free a diskblock
|
||||||
|
// Watch out for the case where the disk block was never yet written to disk and is beyond the translated_blocknum_limit.
|
||||||
|
{
|
||||||
|
extend_block_translation(*b, h);
|
||||||
|
assert((u_int64_t)b->b <= h->translated_blocknum_limit);
|
||||||
|
assert(h->block_translation[b->b].size != size_is_free);
|
||||||
|
h->block_translation[b->b].size = size_is_free;
|
||||||
|
h->block_translation[b->b].diskoff = h->free_blocks.b;
|
||||||
|
h->free_blocks.b = b->b;
|
||||||
|
b->b = 0;
|
||||||
|
h->dirty = 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2080,14 +2126,18 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, TOKUL
|
||||||
// Unpin both, and return the first nonzero error code that is found
|
// Unpin both, and return the first nonzero error code that is found
|
||||||
assert(node->dirty);
|
assert(node->dirty);
|
||||||
{
|
{
|
||||||
|
int rrb1 = 0;
|
||||||
int rra = toku_unpin_brtnode(t, childa);
|
int rra = toku_unpin_brtnode(t, childa);
|
||||||
int rrb;
|
int rrb;
|
||||||
if (did_merge) {
|
if (did_merge) {
|
||||||
rrb = toku_cachetable_unpin_and_remove(t->cf, childb->thisnodename);
|
BLOCKNUM bn = childb->thisnodename;
|
||||||
|
rrb = toku_cachetable_unpin_and_remove(t->cf, bn);
|
||||||
|
rrb1 = free_diskblocknumber(&bn, t->h, logger);
|
||||||
} else {
|
} else {
|
||||||
rrb = toku_unpin_brtnode(t, childb);
|
rrb = toku_unpin_brtnode(t, childb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rrb1) return rrb1;
|
||||||
if (rra) return rra;
|
if (rra) return rra;
|
||||||
if (rrb) return rrb;
|
if (rrb) return rrb;
|
||||||
}
|
}
|
||||||
|
@ -4125,6 +4175,12 @@ int toku_dump_brt (FILE *f, BRT brt) {
|
||||||
CACHEKEY *rootp;
|
CACHEKEY *rootp;
|
||||||
assert(brt->h);
|
assert(brt->h);
|
||||||
u_int32_t fullhash;
|
u_int32_t fullhash;
|
||||||
|
u_int64_t i;
|
||||||
|
fprintf(f, "Block translation:");
|
||||||
|
for (i=0; i<brt->h->translated_blocknum_limit; i++) {
|
||||||
|
fprintf(f, " %"PRIu64": %"PRId64" %"PRId64"", i, brt->h->block_translation[i].diskoff, brt->h->block_translation[i].size);
|
||||||
|
}
|
||||||
|
fprintf(f, "\n");
|
||||||
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
|
rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
|
||||||
return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0, 0, 0);
|
return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -113,4 +113,6 @@ enum brt_header_flags {
|
||||||
|
|
||||||
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater);
|
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater);
|
||||||
|
|
||||||
|
void extend_block_translation (BLOCKNUM blocknum, struct brt_header *h);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -291,7 +291,14 @@ main (int argc, const char *argv[]) {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
BLOCKNUM blocknum;
|
BLOCKNUM blocknum;
|
||||||
|
printf("Block translation:");
|
||||||
|
for (blocknum.b=0; blocknum.b<h->unused_blocks.b; blocknum.b++) {
|
||||||
|
printf(" %" PRIu64 ":", blocknum.b);
|
||||||
|
if (h->block_translation[blocknum.b].size == -1) printf("free");
|
||||||
|
else printf("%" PRIu64 ":%" PRIu64, h->block_translation[blocknum.b].diskoff, h->block_translation[blocknum.b].size);
|
||||||
|
}
|
||||||
for (blocknum.b=1; blocknum.b<h->unused_blocks.b; blocknum.b++) {
|
for (blocknum.b=1; blocknum.b<h->unused_blocks.b; blocknum.b++) {
|
||||||
|
if (h->block_translation[blocknum.b].size != -1)
|
||||||
dump_node(f, blocknum, h);
|
dump_node(f, blocknum, h);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1089,6 +1089,7 @@ static void test_new_brt_cursor_last(int n, int dup_mode) {
|
||||||
memcpy(&vv, val.data, val.size);
|
memcpy(&vv, val.data, val.size);
|
||||||
assert(vv == (int) htonl(i));
|
assert(vv == (int) htonl(i));
|
||||||
|
|
||||||
|
//if (n==512 && i<=360) { printf("i=%d\n", i); toku_dump_brt(stdout, t); }
|
||||||
r = toku_brt_cursor_delete(cursor, 0, null_txn); assert(r == 0);
|
r = toku_brt_cursor_delete(cursor, 0, null_txn); assert(r == 0);
|
||||||
}
|
}
|
||||||
assert(i == -1);
|
assert(i == -1);
|
||||||
|
|
Loading…
Add table
Reference in a new issue