mirror of
https://github.com/MariaDB/server.git
synced 2025-01-31 02:51:44 +01:00
1157 lines
42 KiB
C++
1157 lines
42 KiB
C++
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
|
#ident "$Id$"
|
|
/*======
|
|
This file is part of PerconaFT.
|
|
|
|
|
|
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
|
|
|
|
PerconaFT is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License, version 2,
|
|
as published by the Free Software Foundation.
|
|
|
|
PerconaFT is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
----------------------------------------
|
|
|
|
PerconaFT is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License, version 3,
|
|
as published by the Free Software Foundation.
|
|
|
|
PerconaFT is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with PerconaFT. If not, see <http://www.gnu.org/licenses/>.
|
|
======= */
|
|
|
|
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
|
|
|
|
#include <my_global.h>
|
|
#include "portability/memory.h"
|
|
#include "portability/toku_assert.h"
|
|
#include "portability/toku_portability.h"
|
|
#include "portability/toku_pthread.h"
|
|
|
|
// ugly but pragmatic, need access to dirty bits while holding translation lock
|
|
// TODO: Refactor this (possibly with FT-301)
|
|
#include "ft/ft-internal.h"
|
|
|
|
// TODO: reorganize this dependency (FT-303)
|
|
#include "ft/ft-ops.h" // for toku_maybe_truncate_file
|
|
#include "ft/serialize/block_table.h"
|
|
#include "ft/serialize/rbuf.h"
|
|
#include "ft/serialize/wbuf.h"
|
|
#include "ft/serialize/block_allocator.h"
|
|
#include "util/nb_mutex.h"
|
|
#include "util/scoped_malloc.h"
|
|
|
|
|
|
toku_instr_key *block_table_mutex_key;
|
|
toku_instr_key *safe_file_size_lock_mutex_key;
|
|
toku_instr_key *safe_file_size_lock_rwlock_key;
|
|
|
|
// indicates the end of a freelist
|
|
static const BLOCKNUM freelist_null = {-1};
|
|
|
|
// value of block_translation_pair.size if blocknum is unused
|
|
static const DISKOFF size_is_free = (DISKOFF)-1;
|
|
|
|
// value of block_translation_pair.u.diskoff if blocknum is used but does not
|
|
// yet have a diskblock
|
|
static const DISKOFF diskoff_unused = (DISKOFF)-2;
|
|
|
|
void block_table::_mutex_lock() { toku_mutex_lock(&_mutex); }
|
|
|
|
void block_table::_mutex_unlock() { toku_mutex_unlock(&_mutex); }
|
|
|
|
// TODO: Move lock to FT
|
|
void toku_ft_lock(FT ft) {
|
|
block_table *bt = &ft->blocktable;
|
|
bt->_mutex_lock();
|
|
}
|
|
|
|
// TODO: Move lock to FT
|
|
void toku_ft_unlock(FT ft) {
|
|
block_table *bt = &ft->blocktable;
|
|
toku_mutex_assert_locked(&bt->_mutex);
|
|
bt->_mutex_unlock();
|
|
}
|
|
|
|
// There are two headers: the reserve must fit them both and be suitably
|
|
// aligned.
|
|
static_assert(BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE %
|
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT ==
|
|
0,
|
|
"Block allocator's header reserve must be suitibly aligned");
|
|
static_assert(
|
|
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE * 2 ==
|
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
|
"Block allocator's total header reserve must exactly fit two headers");
|
|
|
|
// does NOT initialize the block allocator: the caller is responsible
|
|
void block_table::_create_internal() {
|
|
memset(&_current, 0, sizeof(struct translation));
|
|
memset(&_inprogress, 0, sizeof(struct translation));
|
|
memset(&_checkpointed, 0, sizeof(struct translation));
|
|
memset(&_mutex, 0, sizeof(_mutex));
|
|
_bt_block_allocator = new BlockAllocator();
|
|
toku_mutex_init(*block_table_mutex_key, &_mutex, nullptr);
|
|
nb_mutex_init(*safe_file_size_lock_mutex_key,
|
|
*safe_file_size_lock_rwlock_key,
|
|
&_safe_file_size_lock);
|
|
}
|
|
|
|
// Fill in the checkpointed translation from buffer, and copy checkpointed to
|
|
// current.
|
|
// The one read from disk is the last known checkpointed one, so we are keeping
|
|
// it in
|
|
// place and then setting current (which is never stored on disk) for current
|
|
// use.
|
|
// The translation_buffer has translation only, we create the rest of the
|
|
// block_table.
|
|
int block_table::create_from_buffer(
|
|
int fd,
|
|
DISKOFF location_on_disk, // Location of translation_buffer
|
|
DISKOFF size_on_disk,
|
|
unsigned char *translation_buffer) {
|
|
// Does not initialize the block allocator
|
|
_create_internal();
|
|
|
|
// Deserialize the translation and copy it to current
|
|
int r = _translation_deserialize_from_buffer(
|
|
&_checkpointed, location_on_disk, size_on_disk, translation_buffer);
|
|
if (r != 0) {
|
|
return r;
|
|
}
|
|
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
|
|
|
|
// Determine the file size
|
|
int64_t file_size = 0;
|
|
r = toku_os_get_file_size(fd, &file_size);
|
|
lazy_assert_zero(r);
|
|
invariant(file_size >= 0);
|
|
_safe_file_size = file_size;
|
|
|
|
// Gather the non-empty translations and use them to create the block
|
|
// allocator
|
|
toku::scoped_malloc pairs_buf(_checkpointed.smallest_never_used_blocknum.b *
|
|
sizeof(struct BlockAllocator::BlockPair));
|
|
struct BlockAllocator::BlockPair *CAST_FROM_VOIDP(pairs, pairs_buf.get());
|
|
uint64_t n_pairs = 0;
|
|
for (int64_t i = 0; i < _checkpointed.smallest_never_used_blocknum.b; i++) {
|
|
struct block_translation_pair pair = _checkpointed.block_translation[i];
|
|
if (pair.size > 0) {
|
|
invariant(pair.u.diskoff != diskoff_unused);
|
|
pairs[n_pairs++] =
|
|
BlockAllocator::BlockPair(pair.u.diskoff, pair.size);
|
|
}
|
|
}
|
|
|
|
_bt_block_allocator->CreateFromBlockPairs(
|
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT,
|
|
pairs,
|
|
n_pairs);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void block_table::create() {
|
|
// Does not initialize the block allocator
|
|
_create_internal();
|
|
|
|
_checkpointed.type = TRANSLATION_CHECKPOINTED;
|
|
_checkpointed.smallest_never_used_blocknum =
|
|
make_blocknum(RESERVED_BLOCKNUMS);
|
|
_checkpointed.length_of_array =
|
|
_checkpointed.smallest_never_used_blocknum.b;
|
|
_checkpointed.blocknum_freelist_head = freelist_null;
|
|
XMALLOC_N(_checkpointed.length_of_array, _checkpointed.block_translation);
|
|
for (int64_t i = 0; i < _checkpointed.length_of_array; i++) {
|
|
_checkpointed.block_translation[i].size = 0;
|
|
_checkpointed.block_translation[i].u.diskoff = diskoff_unused;
|
|
}
|
|
|
|
// we just created a default checkpointed, now copy it to current.
|
|
_copy_translation(&_current, &_checkpointed, TRANSLATION_CURRENT);
|
|
|
|
// Create an empty block allocator.
|
|
_bt_block_allocator->Create(
|
|
BlockAllocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE,
|
|
BlockAllocator::BLOCK_ALLOCATOR_ALIGNMENT);
|
|
}
|
|
|
|
// TODO: Refactor with FT-303
|
|
static void ft_set_dirty(FT ft, bool for_checkpoint) {
|
|
invariant(ft->h->type == FT_CURRENT);
|
|
if (for_checkpoint) {
|
|
invariant(ft->checkpoint_header->type == FT_CHECKPOINT_INPROGRESS);
|
|
ft->checkpoint_header->dirty = 1;
|
|
} else {
|
|
ft->h->dirty = 1;
|
|
}
|
|
}
|
|
|
|
void block_table::_maybe_truncate_file(int fd, uint64_t size_needed_before) {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
uint64_t new_size_needed = _bt_block_allocator->AllocatedLimit();
|
|
// Save a call to toku_os_get_file_size (kernel call) if unlikely to be
|
|
// useful.
|
|
if (new_size_needed < size_needed_before &&
|
|
new_size_needed < _safe_file_size) {
|
|
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
|
|
|
|
// Must hold _safe_file_size_lock to change _safe_file_size.
|
|
if (new_size_needed < _safe_file_size) {
|
|
int64_t safe_file_size_before = _safe_file_size;
|
|
// Not safe to use the 'to-be-truncated' portion until truncate is
|
|
// done.
|
|
_safe_file_size = new_size_needed;
|
|
_mutex_unlock();
|
|
|
|
uint64_t size_after;
|
|
toku_maybe_truncate_file(
|
|
fd, new_size_needed, safe_file_size_before, &size_after);
|
|
_mutex_lock();
|
|
|
|
_safe_file_size = size_after;
|
|
}
|
|
nb_mutex_unlock(&_safe_file_size_lock);
|
|
}
|
|
}
|
|
|
|
void block_table::maybe_truncate_file_on_open(int fd) {
|
|
_mutex_lock();
|
|
_maybe_truncate_file(fd, _safe_file_size);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
void block_table::_copy_translation(struct translation *dst,
|
|
struct translation *src,
|
|
enum translation_type newtype) {
|
|
// We intend to malloc a fresh block, so the incoming translation should be
|
|
// empty
|
|
invariant_null(dst->block_translation);
|
|
|
|
invariant(src->length_of_array >= src->smallest_never_used_blocknum.b);
|
|
invariant(newtype == TRANSLATION_DEBUG ||
|
|
(src->type == TRANSLATION_CURRENT &&
|
|
newtype == TRANSLATION_INPROGRESS) ||
|
|
(src->type == TRANSLATION_CHECKPOINTED &&
|
|
newtype == TRANSLATION_CURRENT));
|
|
dst->type = newtype;
|
|
dst->smallest_never_used_blocknum = src->smallest_never_used_blocknum;
|
|
dst->blocknum_freelist_head = src->blocknum_freelist_head;
|
|
|
|
// destination btt is of fixed size. Allocate + memcpy the exact length
|
|
// necessary.
|
|
dst->length_of_array = dst->smallest_never_used_blocknum.b;
|
|
XMALLOC_N(dst->length_of_array, dst->block_translation);
|
|
memcpy(dst->block_translation,
|
|
src->block_translation,
|
|
dst->length_of_array * sizeof(*dst->block_translation));
|
|
|
|
// New version of btt is not yet stored on disk.
|
|
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size = 0;
|
|
dst->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff =
|
|
diskoff_unused;
|
|
}
|
|
|
|
int64_t block_table::get_blocks_in_use_unlocked() {
|
|
BLOCKNUM b;
|
|
struct translation *t = &_current;
|
|
int64_t num_blocks = 0;
|
|
{
|
|
// Reserved blocknums do not get upgraded; They are part of the header.
|
|
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
|
|
b.b++) {
|
|
if (t->block_translation[b.b].size != size_is_free) {
|
|
num_blocks++;
|
|
}
|
|
}
|
|
}
|
|
return num_blocks;
|
|
}
|
|
|
|
void block_table::_maybe_optimize_translation(struct translation *t) {
|
|
// Reduce 'smallest_never_used_blocknum.b' (completely free blocknums
|
|
// instead of just
|
|
// on a free list. Doing so requires us to regenerate the free list.
|
|
// This is O(n) work, so do it only if you're already doing that.
|
|
|
|
BLOCKNUM b;
|
|
paranoid_invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
|
|
// Calculate how large the free suffix is.
|
|
int64_t freed;
|
|
{
|
|
for (b.b = t->smallest_never_used_blocknum.b; b.b > RESERVED_BLOCKNUMS;
|
|
b.b--) {
|
|
if (t->block_translation[b.b - 1].size != size_is_free) {
|
|
break;
|
|
}
|
|
}
|
|
freed = t->smallest_never_used_blocknum.b - b.b;
|
|
}
|
|
if (freed > 0) {
|
|
t->smallest_never_used_blocknum.b = b.b;
|
|
if (t->length_of_array / 4 > t->smallest_never_used_blocknum.b) {
|
|
// We're using more memory than necessary to represent this now.
|
|
// Reduce.
|
|
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
|
|
XREALLOC_N(new_length, t->block_translation);
|
|
t->length_of_array = new_length;
|
|
// No need to zero anything out.
|
|
}
|
|
|
|
// Regenerate free list.
|
|
t->blocknum_freelist_head.b = freelist_null.b;
|
|
for (b.b = RESERVED_BLOCKNUMS; b.b < t->smallest_never_used_blocknum.b;
|
|
b.b++) {
|
|
if (t->block_translation[b.b].size == size_is_free) {
|
|
t->block_translation[b.b].u.next_free_blocknum =
|
|
t->blocknum_freelist_head;
|
|
t->blocknum_freelist_head = b;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// block table must be locked by caller of this function
|
|
void block_table::note_start_checkpoint_unlocked() {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
|
|
// We're going to do O(n) work to copy the translation, so we
|
|
// can afford to do O(n) work by optimizing the translation
|
|
_maybe_optimize_translation(&_current);
|
|
|
|
// Copy current translation to inprogress translation.
|
|
_copy_translation(&_inprogress, &_current, TRANSLATION_INPROGRESS);
|
|
|
|
_checkpoint_skipped = false;
|
|
}
|
|
|
|
void block_table::note_skipped_checkpoint() {
|
|
// Purpose, alert block translation that the checkpoint was skipped, e.x.
|
|
// for a non-dirty header
|
|
_mutex_lock();
|
|
paranoid_invariant_notnull(_inprogress.block_translation);
|
|
_checkpoint_skipped = true;
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Purpose: free any disk space used by previous checkpoint that isn't in use by
|
|
// either
|
|
// - current state
|
|
// - in-progress checkpoint
|
|
// capture inprogress as new checkpointed.
|
|
// For each entry in checkpointBTT
|
|
// if offset does not match offset in inprogress
|
|
// assert offset does not match offset in current
|
|
// free (offset,len) from checkpoint
|
|
// move inprogress to checkpoint (resetting type)
|
|
// inprogress = NULL
|
|
void block_table::note_end_checkpoint(int fd) {
|
|
// Free unused blocks
|
|
_mutex_lock();
|
|
uint64_t allocated_limit_at_start = _bt_block_allocator->AllocatedLimit();
|
|
paranoid_invariant_notnull(_inprogress.block_translation);
|
|
if (_checkpoint_skipped) {
|
|
toku_free(_inprogress.block_translation);
|
|
memset(&_inprogress, 0, sizeof(_inprogress));
|
|
goto end;
|
|
}
|
|
|
|
// Make certain inprogress was allocated space on disk
|
|
invariant(
|
|
_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].size > 0);
|
|
invariant(
|
|
_inprogress.block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff >
|
|
0);
|
|
|
|
{
|
|
struct translation *t = &_checkpointed;
|
|
for (int64_t i = 0; i < t->length_of_array; i++) {
|
|
struct block_translation_pair *pair = &t->block_translation[i];
|
|
if (pair->size > 0 &&
|
|
!_translation_prevents_freeing(
|
|
&_inprogress, make_blocknum(i), pair)) {
|
|
invariant(!_translation_prevents_freeing(
|
|
&_current, make_blocknum(i), pair));
|
|
_bt_block_allocator->FreeBlock(pair->u.diskoff, pair->size);
|
|
}
|
|
}
|
|
toku_free(_checkpointed.block_translation);
|
|
_checkpointed = _inprogress;
|
|
_checkpointed.type = TRANSLATION_CHECKPOINTED;
|
|
memset(&_inprogress, 0, sizeof(_inprogress));
|
|
_maybe_truncate_file(fd, allocated_limit_at_start);
|
|
}
|
|
end:
|
|
_mutex_unlock();
|
|
}
|
|
|
|
bool block_table::_is_valid_blocknum(struct translation *t, BLOCKNUM b) {
|
|
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
|
|
return b.b >= 0 && b.b < t->smallest_never_used_blocknum.b;
|
|
}
|
|
|
|
void block_table::_verify_valid_blocknum(struct translation *UU(t),
|
|
BLOCKNUM UU(b)) {
|
|
invariant(_is_valid_blocknum(t, b));
|
|
}
|
|
|
|
bool block_table::_is_valid_freeable_blocknum(struct translation *t,
|
|
BLOCKNUM b) {
|
|
invariant(t->length_of_array >= t->smallest_never_used_blocknum.b);
|
|
return b.b >= RESERVED_BLOCKNUMS && b.b < t->smallest_never_used_blocknum.b;
|
|
}
|
|
|
|
// should be freeable
|
|
void block_table::_verify_valid_freeable_blocknum(struct translation *UU(t),
|
|
BLOCKNUM UU(b)) {
|
|
invariant(_is_valid_freeable_blocknum(t, b));
|
|
}
|
|
|
|
// Also used only in ft-serialize-test.
|
|
void block_table::block_free(uint64_t offset, uint64_t size) {
|
|
_mutex_lock();
|
|
_bt_block_allocator->FreeBlock(offset, size);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
int64_t block_table::_calculate_size_on_disk(struct translation *t) {
|
|
return 8 + // smallest_never_used_blocknum
|
|
8 + // blocknum_freelist_head
|
|
t->smallest_never_used_blocknum.b * 16 + // Array
|
|
4; // 4 for checksum
|
|
}
|
|
|
|
// We cannot free the disk space allocated to this blocknum if it is still in
|
|
// use by the given translation table.
|
|
bool block_table::_translation_prevents_freeing(
|
|
struct translation *t,
|
|
BLOCKNUM b,
|
|
struct block_translation_pair *old_pair) {
|
|
return t->block_translation && b.b < t->smallest_never_used_blocknum.b &&
|
|
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
|
|
}
|
|
|
|
void block_table::_realloc_on_disk_internal(BLOCKNUM b,
|
|
DISKOFF size,
|
|
DISKOFF *offset,
|
|
FT ft,
|
|
bool for_checkpoint) {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
ft_set_dirty(ft, for_checkpoint);
|
|
|
|
struct translation *t = &_current;
|
|
struct block_translation_pair old_pair = t->block_translation[b.b];
|
|
// Free the old block if it is not still in use by the checkpoint in
|
|
// progress or the previous checkpoint
|
|
bool cannot_free =
|
|
(!for_checkpoint &&
|
|
_translation_prevents_freeing(&_inprogress, b, &old_pair)) ||
|
|
_translation_prevents_freeing(&_checkpointed, b, &old_pair);
|
|
if (!cannot_free && old_pair.u.diskoff != diskoff_unused) {
|
|
_bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
|
|
}
|
|
|
|
uint64_t allocator_offset = diskoff_unused;
|
|
t->block_translation[b.b].size = size;
|
|
if (size > 0) {
|
|
// Allocate a new block if the size is greater than 0,
|
|
// if the size is just 0, offset will be set to diskoff_unused
|
|
_bt_block_allocator->AllocBlock(size, &allocator_offset);
|
|
}
|
|
t->block_translation[b.b].u.diskoff = allocator_offset;
|
|
*offset = allocator_offset;
|
|
|
|
// Update inprogress btt if appropriate (if called because Pending bit is
|
|
// set).
|
|
if (for_checkpoint) {
|
|
paranoid_invariant(b.b < _inprogress.length_of_array);
|
|
_inprogress.block_translation[b.b] = t->block_translation[b.b];
|
|
}
|
|
}
|
|
|
|
void block_table::_ensure_safe_write_unlocked(int fd,
|
|
DISKOFF block_size,
|
|
DISKOFF block_offset) {
|
|
// Requires: holding _mutex
|
|
uint64_t size_needed = block_size + block_offset;
|
|
if (size_needed > _safe_file_size) {
|
|
// Must hold _safe_file_size_lock to change _safe_file_size.
|
|
nb_mutex_lock(&_safe_file_size_lock, &_mutex);
|
|
if (size_needed > _safe_file_size) {
|
|
_mutex_unlock();
|
|
|
|
int64_t size_after;
|
|
toku_maybe_preallocate_in_file(
|
|
fd, size_needed, _safe_file_size, &size_after);
|
|
|
|
_mutex_lock();
|
|
_safe_file_size = size_after;
|
|
}
|
|
nb_mutex_unlock(&_safe_file_size_lock);
|
|
}
|
|
}
|
|
|
|
void block_table::realloc_on_disk(BLOCKNUM b,
|
|
DISKOFF size,
|
|
DISKOFF *offset,
|
|
FT ft,
|
|
int fd,
|
|
bool for_checkpoint) {
|
|
_mutex_lock();
|
|
struct translation *t = &_current;
|
|
_verify_valid_freeable_blocknum(t, b);
|
|
_realloc_on_disk_internal(b, size, offset, ft, for_checkpoint);
|
|
|
|
_ensure_safe_write_unlocked(fd, size, *offset);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
bool block_table::_pair_is_unallocated(struct block_translation_pair *pair) {
|
|
return pair->size == 0 && pair->u.diskoff == diskoff_unused;
|
|
}
|
|
|
|
// Effect: figure out where to put the inprogress btt on disk, allocate space
|
|
// for it there.
|
|
// The space must be 512-byte aligned (both the starting address and the
|
|
// size).
|
|
// As a result, the allcoated space may be a little bit bigger (up to the next
|
|
// 512-byte boundary) than the actual btt.
|
|
void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
|
|
struct translation *t = &_inprogress;
|
|
paranoid_invariant_notnull(t->block_translation);
|
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
|
// Each inprogress is allocated only once
|
|
paranoid_invariant(_pair_is_unallocated(&t->block_translation[b.b]));
|
|
|
|
// Allocate a new block
|
|
int64_t size = _calculate_size_on_disk(t);
|
|
uint64_t offset;
|
|
_bt_block_allocator->AllocBlock(size, &offset);
|
|
t->block_translation[b.b].u.diskoff = offset;
|
|
t->block_translation[b.b].size = size;
|
|
}
|
|
|
|
// Effect: Serializes the blocktable to a wbuf (which starts uninitialized)
|
|
// A clean shutdown runs checkpoint start so that current and inprogress are
|
|
// copies.
|
|
// The resulting wbuf buffer is guaranteed to be be 512-byte aligned and the
|
|
// total length is a multiple of 512 (so we pad with zeros at the end if
|
|
// needd)
|
|
// The address is guaranteed to be 512-byte aligned, but the size is not
|
|
// guaranteed.
|
|
// It *is* guaranteed that we can read up to the next 512-byte boundary,
|
|
// however
|
|
void block_table::serialize_translation_to_wbuf(int fd,
|
|
struct wbuf *w,
|
|
int64_t *address,
|
|
int64_t *size) {
|
|
_mutex_lock();
|
|
struct translation *t = &_inprogress;
|
|
|
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
|
_alloc_inprogress_translation_on_disk_unlocked(); // The allocated block
|
|
// must be 512-byte
|
|
// aligned to make
|
|
// O_DIRECT happy.
|
|
uint64_t size_translation = _calculate_size_on_disk(t);
|
|
uint64_t size_aligned = roundup_to_multiple(512, size_translation);
|
|
invariant((int64_t)size_translation == t->block_translation[b.b].size);
|
|
{
|
|
// Init wbuf
|
|
if (0)
|
|
printf(
|
|
"%s:%d writing translation table of size_translation %" PRIu64
|
|
" at %" PRId64 "\n",
|
|
__FILE__,
|
|
__LINE__,
|
|
size_translation,
|
|
t->block_translation[b.b].u.diskoff);
|
|
char *XMALLOC_N_ALIGNED(512, size_aligned, buf);
|
|
for (uint64_t i = size_translation; i < size_aligned; i++)
|
|
buf[i] = 0; // fill in the end of the buffer with zeros.
|
|
wbuf_init(w, buf, size_aligned);
|
|
}
|
|
wbuf_BLOCKNUM(w, t->smallest_never_used_blocknum);
|
|
wbuf_BLOCKNUM(w, t->blocknum_freelist_head);
|
|
int64_t i;
|
|
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
|
if (0)
|
|
printf("%s:%d %" PRId64 ",%" PRId64 "\n",
|
|
__FILE__,
|
|
__LINE__,
|
|
t->block_translation[i].u.diskoff,
|
|
t->block_translation[i].size);
|
|
wbuf_DISKOFF(w, t->block_translation[i].u.diskoff);
|
|
wbuf_DISKOFF(w, t->block_translation[i].size);
|
|
}
|
|
uint32_t checksum = toku_x1764_finish(&w->checksum);
|
|
wbuf_int(w, checksum);
|
|
*address = t->block_translation[b.b].u.diskoff;
|
|
*size = size_translation;
|
|
invariant((*address) % 512 == 0);
|
|
|
|
_ensure_safe_write_unlocked(fd, size_aligned, *address);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Perhaps rename: purpose is get disk address of a block, given its blocknum
|
|
// (blockid?)
|
|
void block_table::_translate_blocknum_to_offset_size_unlocked(BLOCKNUM b,
|
|
DISKOFF *offset,
|
|
DISKOFF *size) {
|
|
struct translation *t = &_current;
|
|
_verify_valid_blocknum(t, b);
|
|
if (offset) {
|
|
*offset = t->block_translation[b.b].u.diskoff;
|
|
}
|
|
if (size) {
|
|
*size = t->block_translation[b.b].size;
|
|
}
|
|
}
|
|
|
|
// Perhaps rename: purpose is get disk address of a block, given its blocknum
|
|
// (blockid?)
|
|
void block_table::translate_blocknum_to_offset_size(BLOCKNUM b,
|
|
DISKOFF *offset,
|
|
DISKOFF *size) {
|
|
_mutex_lock();
|
|
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Only called by toku_allocate_blocknum
|
|
// Effect: expand the array to maintain size invariant
|
|
// given that one more never-used blocknum will soon be used.
|
|
void block_table::_maybe_expand_translation(struct translation *t) {
|
|
if (t->length_of_array <= t->smallest_never_used_blocknum.b) {
|
|
// expansion is necessary
|
|
uint64_t new_length = t->smallest_never_used_blocknum.b * 2;
|
|
XREALLOC_N(new_length, t->block_translation);
|
|
uint64_t i;
|
|
for (i = t->length_of_array; i < new_length; i++) {
|
|
t->block_translation[i].u.next_free_blocknum = freelist_null;
|
|
t->block_translation[i].size = size_is_free;
|
|
}
|
|
t->length_of_array = new_length;
|
|
}
|
|
}
|
|
|
|
void block_table::_allocate_blocknum_unlocked(BLOCKNUM *res, FT ft) {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
BLOCKNUM result;
|
|
struct translation *t = &_current;
|
|
if (t->blocknum_freelist_head.b == freelist_null.b) {
|
|
// no previously used blocknums are available
|
|
// use a never used blocknum
|
|
_maybe_expand_translation(
|
|
t); // Ensure a never used blocknums is available
|
|
result = t->smallest_never_used_blocknum;
|
|
t->smallest_never_used_blocknum.b++;
|
|
} else { // reuse a previously used blocknum
|
|
result = t->blocknum_freelist_head;
|
|
BLOCKNUM next = t->block_translation[result.b].u.next_free_blocknum;
|
|
t->blocknum_freelist_head = next;
|
|
}
|
|
// Verify the blocknum is free
|
|
paranoid_invariant(t->block_translation[result.b].size == size_is_free);
|
|
// blocknum is not free anymore
|
|
t->block_translation[result.b].u.diskoff = diskoff_unused;
|
|
t->block_translation[result.b].size = 0;
|
|
_verify_valid_freeable_blocknum(t, result);
|
|
*res = result;
|
|
ft_set_dirty(ft, false);
|
|
}
|
|
|
|
void block_table::allocate_blocknum(BLOCKNUM *res, FT ft) {
|
|
_mutex_lock();
|
|
_allocate_blocknum_unlocked(res, ft);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
void block_table::_free_blocknum_in_translation(struct translation *t,
|
|
BLOCKNUM b) {
|
|
_verify_valid_freeable_blocknum(t, b);
|
|
paranoid_invariant(t->block_translation[b.b].size != size_is_free);
|
|
|
|
t->block_translation[b.b].size = size_is_free;
|
|
t->block_translation[b.b].u.next_free_blocknum = t->blocknum_freelist_head;
|
|
t->blocknum_freelist_head = b;
|
|
}
|
|
|
|
// Effect: Free a blocknum.
|
|
// If the blocknum holds the only reference to a block on disk, free that block
|
|
void block_table::_free_blocknum_unlocked(BLOCKNUM *bp,
|
|
FT ft,
|
|
bool for_checkpoint) {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
BLOCKNUM b = *bp;
|
|
bp->b = 0; // Remove caller's reference.
|
|
|
|
struct block_translation_pair old_pair = _current.block_translation[b.b];
|
|
|
|
_free_blocknum_in_translation(&_current, b);
|
|
if (for_checkpoint) {
|
|
paranoid_invariant(ft->checkpoint_header->type ==
|
|
FT_CHECKPOINT_INPROGRESS);
|
|
_free_blocknum_in_translation(&_inprogress, b);
|
|
}
|
|
|
|
// If the size is 0, no disk block has ever been assigned to this blocknum.
|
|
if (old_pair.size > 0) {
|
|
// Free the old block if it is not still in use by the checkpoint in
|
|
// progress or the previous checkpoint
|
|
bool cannot_free =
|
|
_translation_prevents_freeing(&_inprogress, b, &old_pair) ||
|
|
_translation_prevents_freeing(&_checkpointed, b, &old_pair);
|
|
if (!cannot_free) {
|
|
_bt_block_allocator->FreeBlock(old_pair.u.diskoff, old_pair.size);
|
|
}
|
|
} else {
|
|
paranoid_invariant(old_pair.size == 0);
|
|
paranoid_invariant(old_pair.u.diskoff == diskoff_unused);
|
|
}
|
|
ft_set_dirty(ft, for_checkpoint);
|
|
}
|
|
|
|
void block_table::free_blocknum(BLOCKNUM *bp, FT ft, bool for_checkpoint) {
|
|
_mutex_lock();
|
|
_free_blocknum_unlocked(bp, ft, for_checkpoint);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Verify there are no free blocks.
|
|
void block_table::verify_no_free_blocknums() {
|
|
invariant(_current.blocknum_freelist_head.b == freelist_null.b);
|
|
}
|
|
|
|
// Frees blocknums that have a size of 0 and unused diskoff
|
|
// Currently used for eliminating unused cached rollback log nodes
|
|
void block_table::free_unused_blocknums(BLOCKNUM root) {
|
|
_mutex_lock();
|
|
int64_t smallest = _current.smallest_never_used_blocknum.b;
|
|
for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
|
|
if (i == root.b) {
|
|
continue;
|
|
}
|
|
BLOCKNUM b = make_blocknum(i);
|
|
if (_current.block_translation[b.b].size == 0) {
|
|
invariant(_current.block_translation[b.b].u.diskoff ==
|
|
diskoff_unused);
|
|
_free_blocknum_in_translation(&_current, b);
|
|
}
|
|
}
|
|
_mutex_unlock();
|
|
}
|
|
|
|
bool block_table::_no_data_blocks_except_root(BLOCKNUM root) {
|
|
bool ok = true;
|
|
_mutex_lock();
|
|
int64_t smallest = _current.smallest_never_used_blocknum.b;
|
|
if (root.b < RESERVED_BLOCKNUMS) {
|
|
ok = false;
|
|
goto cleanup;
|
|
}
|
|
for (int64_t i = RESERVED_BLOCKNUMS; i < smallest; i++) {
|
|
if (i == root.b) {
|
|
continue;
|
|
}
|
|
BLOCKNUM b = make_blocknum(i);
|
|
if (_current.block_translation[b.b].size != size_is_free) {
|
|
ok = false;
|
|
goto cleanup;
|
|
}
|
|
}
|
|
cleanup:
|
|
_mutex_unlock();
|
|
return ok;
|
|
}
|
|
|
|
// Verify there are no data blocks except root.
|
|
// TODO(leif): This actually takes a lock, but I don't want to fix all the
|
|
// callers right now.
|
|
void block_table::verify_no_data_blocks_except_root(BLOCKNUM UU(root)) {
|
|
paranoid_invariant(_no_data_blocks_except_root(root));
|
|
}
|
|
|
|
bool block_table::_blocknum_allocated(BLOCKNUM b) {
|
|
_mutex_lock();
|
|
struct translation *t = &_current;
|
|
_verify_valid_blocknum(t, b);
|
|
bool ok = t->block_translation[b.b].size != size_is_free;
|
|
_mutex_unlock();
|
|
return ok;
|
|
}
|
|
|
|
// Verify a blocknum is currently allocated.
|
|
void block_table::verify_blocknum_allocated(BLOCKNUM UU(b)) {
|
|
paranoid_invariant(_blocknum_allocated(b));
|
|
}
|
|
|
|
// Only used by toku_dump_translation table (debug info)
|
|
void block_table::_dump_translation_internal(FILE *f, struct translation *t) {
|
|
if (t->block_translation) {
|
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_TRANSLATION);
|
|
fprintf(f, " length_of_array[%" PRId64 "]", t->length_of_array);
|
|
fprintf(f,
|
|
" smallest_never_used_blocknum[%" PRId64 "]",
|
|
t->smallest_never_used_blocknum.b);
|
|
fprintf(f,
|
|
" blocknum_free_list_head[%" PRId64 "]",
|
|
t->blocknum_freelist_head.b);
|
|
fprintf(
|
|
f, " size_on_disk[%" PRId64 "]", t->block_translation[b.b].size);
|
|
fprintf(f,
|
|
" location_on_disk[%" PRId64 "]\n",
|
|
t->block_translation[b.b].u.diskoff);
|
|
int64_t i;
|
|
for (i = 0; i < t->length_of_array; i++) {
|
|
fprintf(f,
|
|
" %" PRId64 ": %" PRId64 " %" PRId64 "\n",
|
|
i,
|
|
t->block_translation[i].u.diskoff,
|
|
t->block_translation[i].size);
|
|
}
|
|
fprintf(f, "\n");
|
|
} else {
|
|
fprintf(f, " does not exist\n");
|
|
}
|
|
}
|
|
|
|
// Only used by toku_ft_dump which is only for debugging purposes
|
|
// "pretty" just means we use tabs so we can parse output easier later
|
|
void block_table::dump_translation_table_pretty(FILE *f) {
|
|
_mutex_lock();
|
|
struct translation *t = &_checkpointed;
|
|
invariant(t->block_translation != nullptr);
|
|
for (int64_t i = 0; i < t->length_of_array; ++i) {
|
|
fprintf(f,
|
|
"%" PRId64 "\t%" PRId64 "\t%" PRId64 "\n",
|
|
i,
|
|
t->block_translation[i].u.diskoff,
|
|
t->block_translation[i].size);
|
|
}
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Only used by toku_ft_dump which is only for debugging purposes
|
|
void block_table::dump_translation_table(FILE *f) {
|
|
_mutex_lock();
|
|
fprintf(f, "Current block translation:");
|
|
_dump_translation_internal(f, &_current);
|
|
fprintf(f, "Checkpoint in progress block translation:");
|
|
_dump_translation_internal(f, &_inprogress);
|
|
fprintf(f, "Checkpointed block translation:");
|
|
_dump_translation_internal(f, &_checkpointed);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Only used by ftdump
|
|
void block_table::blocknum_dump_translation(BLOCKNUM b) {
|
|
_mutex_lock();
|
|
|
|
struct translation *t = &_current;
|
|
if (b.b < t->length_of_array) {
|
|
struct block_translation_pair *bx = &t->block_translation[b.b];
|
|
printf("%" PRId64 ": %" PRId64 " %" PRId64 "\n",
|
|
b.b,
|
|
bx->u.diskoff,
|
|
bx->size);
|
|
}
|
|
_mutex_unlock();
|
|
}
|
|
|
|
// Must not call this function when anything else is using the blocktable.
|
|
// No one may use the blocktable afterwards.
|
|
void block_table::destroy(void) {
|
|
// TODO: translation.destroy();
|
|
toku_free(_current.block_translation);
|
|
toku_free(_inprogress.block_translation);
|
|
toku_free(_checkpointed.block_translation);
|
|
|
|
_bt_block_allocator->Destroy();
|
|
delete _bt_block_allocator;
|
|
toku_mutex_destroy(&_mutex);
|
|
nb_mutex_destroy(&_safe_file_size_lock);
|
|
}
|
|
|
|
int block_table::_translation_deserialize_from_buffer(
|
|
struct translation *t,
|
|
DISKOFF location_on_disk,
|
|
uint64_t size_on_disk,
|
|
// out: buffer with serialized translation
|
|
unsigned char *translation_buffer) {
|
|
int r = 0;
|
|
invariant(location_on_disk != 0);
|
|
t->type = TRANSLATION_CHECKPOINTED;
|
|
|
|
// check the checksum
|
|
uint32_t x1764 = toku_x1764_memory(translation_buffer, size_on_disk - 4);
|
|
uint64_t offset = size_on_disk - 4;
|
|
uint32_t stored_x1764 = toku_dtoh32(*(int *)(translation_buffer + offset));
|
|
if (x1764 != stored_x1764) {
|
|
fprintf(stderr,
|
|
"Translation table checksum failure: calc=0x%08x read=0x%08x\n",
|
|
x1764,
|
|
stored_x1764);
|
|
r = TOKUDB_BAD_CHECKSUM;
|
|
goto exit;
|
|
}
|
|
|
|
struct rbuf rb;
|
|
rb.buf = translation_buffer;
|
|
rb.ndone = 0;
|
|
rb.size = size_on_disk - 4; // 4==checksum
|
|
|
|
t->smallest_never_used_blocknum = rbuf_blocknum(&rb);
|
|
t->length_of_array = t->smallest_never_used_blocknum.b;
|
|
invariant(t->smallest_never_used_blocknum.b >= RESERVED_BLOCKNUMS);
|
|
t->blocknum_freelist_head = rbuf_blocknum(&rb);
|
|
XMALLOC_N(t->length_of_array, t->block_translation);
|
|
for (int64_t i = 0; i < t->length_of_array; i++) {
|
|
t->block_translation[i].u.diskoff = rbuf_DISKOFF(&rb);
|
|
t->block_translation[i].size = rbuf_DISKOFF(&rb);
|
|
}
|
|
invariant(_calculate_size_on_disk(t) == (int64_t)size_on_disk);
|
|
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].size ==
|
|
(int64_t)size_on_disk);
|
|
invariant(t->block_translation[RESERVED_BLOCKNUM_TRANSLATION].u.diskoff ==
|
|
location_on_disk);
|
|
|
|
exit:
|
|
return r;
|
|
}
|
|
|
|
int block_table::iterate(enum translation_type type,
|
|
BLOCKTABLE_CALLBACK f,
|
|
void *extra,
|
|
bool data_only,
|
|
bool used_only) {
|
|
struct translation *src;
|
|
|
|
int r = 0;
|
|
switch (type) {
|
|
case TRANSLATION_CURRENT:
|
|
src = &_current;
|
|
break;
|
|
case TRANSLATION_INPROGRESS:
|
|
src = &_inprogress;
|
|
break;
|
|
case TRANSLATION_CHECKPOINTED:
|
|
src = &_checkpointed;
|
|
break;
|
|
default:
|
|
r = EINVAL;
|
|
}
|
|
|
|
struct translation fakecurrent;
|
|
memset(&fakecurrent, 0, sizeof(struct translation));
|
|
|
|
struct translation *t = &fakecurrent;
|
|
if (r == 0) {
|
|
_mutex_lock();
|
|
_copy_translation(t, src, TRANSLATION_DEBUG);
|
|
t->block_translation[RESERVED_BLOCKNUM_TRANSLATION] =
|
|
src->block_translation[RESERVED_BLOCKNUM_TRANSLATION];
|
|
_mutex_unlock();
|
|
int64_t i;
|
|
for (i = 0; i < t->smallest_never_used_blocknum.b; i++) {
|
|
struct block_translation_pair pair = t->block_translation[i];
|
|
if (data_only && i < RESERVED_BLOCKNUMS)
|
|
continue;
|
|
if (used_only && pair.size <= 0)
|
|
continue;
|
|
r = f(make_blocknum(i), pair.size, pair.u.diskoff, extra);
|
|
if (r != 0)
|
|
break;
|
|
}
|
|
toku_free(t->block_translation);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
typedef struct {
|
|
int64_t used_space;
|
|
int64_t total_space;
|
|
} frag_extra;
|
|
|
|
static int frag_helper(BLOCKNUM UU(b),
|
|
int64_t size,
|
|
int64_t address,
|
|
void *extra) {
|
|
frag_extra *info = (frag_extra *)extra;
|
|
|
|
if (size + address > info->total_space)
|
|
info->total_space = size + address;
|
|
info->used_space += size;
|
|
return 0;
|
|
}
|
|
|
|
void block_table::internal_fragmentation(int64_t *total_sizep,
|
|
int64_t *used_sizep) {
|
|
frag_extra info = {0, 0};
|
|
int r = iterate(TRANSLATION_CHECKPOINTED, frag_helper, &info, false, true);
|
|
invariant_zero(r);
|
|
|
|
if (total_sizep)
|
|
*total_sizep = info.total_space;
|
|
if (used_sizep)
|
|
*used_sizep = info.used_space;
|
|
}
|
|
|
|
void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size,
|
|
DISKOFF *offset,
|
|
FT ft) {
|
|
toku_mutex_assert_locked(&_mutex);
|
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
|
|
_realloc_on_disk_internal(b, size, offset, ft, false);
|
|
}
|
|
|
|
void block_table::realloc_descriptor_on_disk(DISKOFF size,
|
|
DISKOFF *offset,
|
|
FT ft,
|
|
int fd) {
|
|
_mutex_lock();
|
|
_realloc_descriptor_on_disk_unlocked(size, offset, ft);
|
|
_ensure_safe_write_unlocked(fd, size, *offset);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
void block_table::get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size) {
|
|
_mutex_lock();
|
|
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
|
|
_translate_blocknum_to_offset_size_unlocked(b, offset, size);
|
|
_mutex_unlock();
|
|
}
|
|
|
|
void block_table::get_fragmentation_unlocked(TOKU_DB_FRAGMENTATION report) {
|
|
// Requires: blocktable lock is held.
|
|
// Requires: report->file_size_bytes is already filled in.
|
|
|
|
// Count the headers.
|
|
report->data_bytes = BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
|
report->data_blocks = 1;
|
|
report->checkpoint_bytes_additional =
|
|
BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE;
|
|
report->checkpoint_blocks_additional = 1;
|
|
|
|
struct translation *current = &_current;
|
|
for (int64_t i = 0; i < current->length_of_array; i++) {
|
|
struct block_translation_pair *pair = ¤t->block_translation[i];
|
|
if (pair->size > 0) {
|
|
report->data_bytes += pair->size;
|
|
report->data_blocks++;
|
|
}
|
|
}
|
|
|
|
struct translation *checkpointed = &_checkpointed;
|
|
for (int64_t i = 0; i < checkpointed->length_of_array; i++) {
|
|
struct block_translation_pair *pair =
|
|
&checkpointed->block_translation[i];
|
|
if (pair->size > 0 &&
|
|
!(i < current->length_of_array &&
|
|
current->block_translation[i].size > 0 &&
|
|
current->block_translation[i].u.diskoff == pair->u.diskoff)) {
|
|
report->checkpoint_bytes_additional += pair->size;
|
|
report->checkpoint_blocks_additional++;
|
|
}
|
|
}
|
|
|
|
struct translation *inprogress = &_inprogress;
|
|
for (int64_t i = 0; i < inprogress->length_of_array; i++) {
|
|
struct block_translation_pair *pair = &inprogress->block_translation[i];
|
|
if (pair->size > 0 &&
|
|
!(i < current->length_of_array &&
|
|
current->block_translation[i].size > 0 &&
|
|
current->block_translation[i].u.diskoff == pair->u.diskoff) &&
|
|
!(i < checkpointed->length_of_array &&
|
|
checkpointed->block_translation[i].size > 0 &&
|
|
checkpointed->block_translation[i].u.diskoff ==
|
|
pair->u.diskoff)) {
|
|
report->checkpoint_bytes_additional += pair->size;
|
|
report->checkpoint_blocks_additional++;
|
|
}
|
|
}
|
|
|
|
_bt_block_allocator->UnusedStatistics(report);
|
|
}
|
|
|
|
void block_table::get_info64(struct ftinfo64 *s) {
|
|
_mutex_lock();
|
|
|
|
struct translation *current = &_current;
|
|
s->num_blocks_allocated = current->length_of_array;
|
|
s->num_blocks_in_use = 0;
|
|
s->size_allocated = 0;
|
|
s->size_in_use = 0;
|
|
|
|
for (int64_t i = 0; i < current->length_of_array; ++i) {
|
|
struct block_translation_pair *block = ¤t->block_translation[i];
|
|
if (block->size != size_is_free) {
|
|
++s->num_blocks_in_use;
|
|
s->size_in_use += block->size;
|
|
if (block->u.diskoff != diskoff_unused) {
|
|
uint64_t limit = block->u.diskoff + block->size;
|
|
if (limit > s->size_allocated) {
|
|
s->size_allocated = limit;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
_mutex_unlock();
|
|
}
|
|
|
|
int block_table::iterate_translation_tables(
|
|
uint64_t checkpoint_count,
|
|
int (*iter)(uint64_t checkpoint_count,
|
|
int64_t total_num_rows,
|
|
int64_t blocknum,
|
|
int64_t diskoff,
|
|
int64_t size,
|
|
void *extra),
|
|
void *iter_extra) {
|
|
int error = 0;
|
|
_mutex_lock();
|
|
|
|
int64_t total_num_rows =
|
|
_current.length_of_array + _checkpointed.length_of_array;
|
|
for (int64_t i = 0; error == 0 && i < _current.length_of_array; ++i) {
|
|
struct block_translation_pair *block = &_current.block_translation[i];
|
|
error = iter(checkpoint_count,
|
|
total_num_rows,
|
|
i,
|
|
block->u.diskoff,
|
|
block->size,
|
|
iter_extra);
|
|
}
|
|
for (int64_t i = 0; error == 0 && i < _checkpointed.length_of_array; ++i) {
|
|
struct block_translation_pair *block =
|
|
&_checkpointed.block_translation[i];
|
|
error = iter(checkpoint_count - 1,
|
|
total_num_rows,
|
|
i,
|
|
block->u.diskoff,
|
|
block->size,
|
|
iter_extra);
|
|
}
|
|
|
|
_mutex_unlock();
|
|
return error;
|
|
}
|