FT-259 Clean up memarena API / code. Use a memarena in the locktree to store

each transaction's ranges instead of a hand-rolled buffer.
This commit is contained in:
John Esmet 2014-06-14 20:40:33 -04:00
parent ac575d01ba
commit b6abf2063c
16 changed files with 770 additions and 522 deletions

View file

@ -2548,7 +2548,7 @@ serialize_rollback_log_node_to_buf(ROLLBACK_LOG_NODE log, char *buf, size_t calc
wbuf_nocrc_BLOCKNUM(&wb, log->previous);
wbuf_nocrc_ulonglong(&wb, log->rollentry_resident_bytecount);
//Write down memarena size needed to restore
wbuf_nocrc_ulonglong(&wb, toku_memarena_total_size_in_use(log->rollentry_arena));
wbuf_nocrc_ulonglong(&wb, log->rollentry_arena.total_size_in_use());
{
//Store rollback logs
@ -2712,8 +2712,8 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
result->rollentry_resident_bytecount = rbuf_ulonglong(rb);
size_t arena_initial_size = rbuf_ulonglong(rb);
result->rollentry_arena = toku_memarena_create_presized(arena_initial_size);
if (0) { died1: toku_memarena_destroy(&result->rollentry_arena); goto died0; }
result->rollentry_arena.create(arena_initial_size);
if (0) { died1: result->rollentry_arena.destroy(); goto died0; }
//Load rollback entries
lazy_assert(rb->size > 4);
@ -2725,7 +2725,7 @@ deserialize_rollback_log_from_rbuf (BLOCKNUM blocknum, ROLLBACK_LOG_NODE *log_p,
bytevec item_vec;
rbuf_literal_bytes(rb, &item_vec, rollback_fsize-4);
unsigned char* item_buf = (unsigned char*)item_vec;
r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, result->rollentry_arena);
r = toku_parse_rollback(item_buf, rollback_fsize-4, &item, &result->rollentry_arena);
if (r!=0) {
r = toku_db_badformat();
goto died1;

View file

@ -798,7 +798,7 @@ generate_rollbacks (void) {
fprintf(cf, " }\n assert(0);\n return 0;\n");
fprintf(cf, "}\n");
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, MEMARENA ma)");
fprintf2(cf, hf, "int toku_parse_rollback(unsigned char *buf, uint32_t n_bytes, struct roll_entry **itemp, memarena *ma)");
fprintf(hf, ";\n");
fprintf(cf, " {\n assert(n_bytes>0);\n struct roll_entry *item;\n enum rt_cmd cmd = (enum rt_cmd)(buf[0]);\n size_t mem_needed;\n");
fprintf(cf, " struct rbuf rc = {buf, n_bytes, 1};\n");
@ -806,7 +806,7 @@ generate_rollbacks (void) {
DO_ROLLBACKS(lt, {
fprintf(cf, " case RT_%s:\n", lt->name);
fprintf(cf, " mem_needed = sizeof(item->u.%s) + __builtin_offsetof(struct roll_entry, u.%s);\n", lt->name, lt->name);
fprintf(cf, " CAST_FROM_VOIDP(item, toku_memarena_malloc(ma, mem_needed));\n");
fprintf(cf, " CAST_FROM_VOIDP(item, ma->malloc_from_arena(mem_needed));\n");
fprintf(cf, " item->cmd = cmd;\n");
DO_FIELDS(field_type, lt, fprintf(cf, " rbuf_ma_%s(&rc, ma, &item->u.%s.%s);\n", field_type->type, lt->name, field_type->name));
fprintf(cf, " *itemp = item;\n");

View file

@ -92,13 +92,14 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_portability.h>
#include "toku_assert.h"
#include "fttypes.h"
#include "memory.h"
#include <toku_htonl.h>
#include <string.h>
#include <util/memarena.h>
#include "ft/fttypes.h"
#include "portability/memory.h"
#include "portability/toku_assert.h"
#include "portability/toku_htonl.h"
#include "portability/toku_portability.h"
#include "util/memarena.h"
struct rbuf {
unsigned char *buf;
@ -122,11 +123,11 @@ static inline unsigned char rbuf_char (struct rbuf *r) {
return r->buf[r->ndone++];
}
static inline void rbuf_ma_uint8_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint8_t *num) {
static inline void rbuf_ma_uint8_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint8_t *num) {
*num = rbuf_char(r);
}
static inline void rbuf_ma_bool (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), bool *b) {
static inline void rbuf_ma_bool (struct rbuf *r, memarena *ma __attribute__((__unused__)), bool *b) {
uint8_t n = rbuf_char(r);
*b = (n!=0);
}
@ -199,15 +200,15 @@ static inline BLOCKNUM rbuf_blocknum (struct rbuf *r) {
BLOCKNUM result = make_blocknum(rbuf_longlong(r));
return result;
}
static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
static inline void rbuf_ma_BLOCKNUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), BLOCKNUM *blocknum) {
*blocknum = rbuf_blocknum(r);
}
static inline void rbuf_ma_uint32_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint32_t *num) {
static inline void rbuf_ma_uint32_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint32_t *num) {
*num = rbuf_int(r);
}
static inline void rbuf_ma_uint64_t (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), uint64_t *num) {
static inline void rbuf_ma_uint64_t (struct rbuf *r, memarena *ma __attribute__((__unused__)), uint64_t *num) {
*num = rbuf_ulonglong(r);
}
@ -221,18 +222,18 @@ static inline void rbuf_TXNID_PAIR (struct rbuf *r, TXNID_PAIR *txnid) {
txnid->child_id64 = rbuf_ulonglong(r);
}
static inline void rbuf_ma_TXNID (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID *txnid) {
static inline void rbuf_ma_TXNID (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID *txnid) {
rbuf_TXNID(r, txnid);
}
static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
static inline void rbuf_ma_TXNID_PAIR (struct rbuf *r, memarena *ma __attribute__((__unused__)), TXNID_PAIR *txnid) {
rbuf_TXNID_PAIR(r, txnid);
}
static inline void rbuf_FILENUM (struct rbuf *r, FILENUM *filenum) {
filenum->fileid = rbuf_int(r);
}
static inline void rbuf_ma_FILENUM (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUM *filenum) {
static inline void rbuf_ma_FILENUM (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUM *filenum) {
rbuf_FILENUM(r, filenum);
}
@ -248,9 +249,9 @@ static inline void rbuf_FILENUMS(struct rbuf *r, FILENUMS *filenums) {
}
// 2954
static inline void rbuf_ma_FILENUMS (struct rbuf *r, MEMARENA ma __attribute__((__unused__)), FILENUMS *filenums) {
static inline void rbuf_ma_FILENUMS (struct rbuf *r, memarena *ma __attribute__((__unused__)), FILENUMS *filenums) {
rbuf_ma_uint32_t(r, ma, &(filenums->num));
filenums->filenums = (FILENUM *) toku_memarena_malloc(ma, filenums->num * sizeof(FILENUM) );
filenums->filenums = (FILENUM *) ma->malloc_from_arena(filenums->num * sizeof(FILENUM));
assert(filenums->filenums != NULL);
for (uint32_t i=0; i < filenums->num; i++) {
rbuf_ma_FILENUM(r, ma, &(filenums->filenums[i]));
@ -267,11 +268,12 @@ static inline void rbuf_BYTESTRING (struct rbuf *r, BYTESTRING *bs) {
r->ndone = newndone;
}
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, MEMARENA ma, BYTESTRING *bs) {
static inline void rbuf_ma_BYTESTRING (struct rbuf *r, memarena *ma, BYTESTRING *bs) {
bs->len = rbuf_int(r);
uint32_t newndone = r->ndone + bs->len;
assert(newndone <= r->size);
bs->data = (char *) toku_memarena_memdup(ma, &r->buf[r->ndone], (size_t)bs->len);
bs->data = (char *) ma->malloc_from_arena(bs->len);
assert(bs->data);
memcpy(bs->data, &r->buf[r->ndone], bs->len);
r->ndone = newndone;
}

View file

@ -258,9 +258,9 @@ int toku_rollback_commit(TOKUTXN txn, LSN lsn) {
}
child_log->newest_logentry = child_log->oldest_logentry = 0;
// Put all the memarena data into the parent.
if (toku_memarena_total_size_in_use(child_log->rollentry_arena) > 0) {
if (child_log->rollentry_arena.total_size_in_use() > 0) {
// If there are no bytes to move, then just leave things alone, and let the memory be reclaimed on txn is closed.
toku_memarena_move_buffers(parent_log->rollentry_arena, child_log->rollentry_arena);
child_log->rollentry_arena.move_memory(&parent_log->rollentry_arena);
}
// each txn tries to give back at most one rollback log node
// to the cache. All other rollback log nodes for this child

View file

@ -120,13 +120,17 @@ toku_find_xid_by_xid (const TXNID &xid, const TXNID &xidfind) {
return 0;
}
// TODO: fix this name
// toku_rollback_malloc
void *toku_malloc_in_rollback(ROLLBACK_LOG_NODE log, size_t size) {
return toku_memarena_malloc(log->rollentry_arena, size);
return log->rollentry_arena.malloc_from_arena(size);
}
// TODO: fix this name
// toku_rollback_memdup
void *toku_memdup_in_rollback(ROLLBACK_LOG_NODE log, const void *v, size_t len) {
void *r=toku_malloc_in_rollback(log, len);
memcpy(r,v,len);
void *r = toku_malloc_in_rollback(log, len);
memcpy(r, v, len);
return r;
}
@ -145,8 +149,8 @@ static inline PAIR_ATTR make_rollback_pair_attr(long size) {
PAIR_ATTR
rollback_memory_size(ROLLBACK_LOG_NODE log) {
size_t size = sizeof(*log);
if (log->rollentry_arena) {
size += toku_memarena_total_footprint(log->rollentry_arena);
if (&log->rollentry_arena) {
size += log->rollentry_arena.total_footprint();
}
return make_rollback_pair_attr(size);
}
@ -175,12 +179,10 @@ void rollback_empty_log_init(ROLLBACK_LOG_NODE log) {
log->previous = make_blocknum(0);
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = NULL;
log->rollentry_arena.create(0);
log->rollentry_resident_bytecount = 0;
}
static void rollback_initialize_for_txn(
ROLLBACK_LOG_NODE log,
TOKUTXN txn,
@ -192,13 +194,14 @@ static void rollback_initialize_for_txn(
log->previous = previous;
log->oldest_logentry = NULL;
log->newest_logentry = NULL;
log->rollentry_arena = toku_memarena_create();
log->rollentry_arena.create(1024);
log->rollentry_resident_bytecount = 0;
log->dirty = true;
}
// TODO: fix this name
void make_rollback_log_empty(ROLLBACK_LOG_NODE log) {
toku_memarena_destroy(&log->rollentry_arena);
log->rollentry_arena.destroy();
rollback_empty_log_init(log);
}

View file

@ -165,7 +165,7 @@ struct rollback_log_node {
BLOCKNUM previous;
struct roll_entry *oldest_logentry;
struct roll_entry *newest_logentry;
MEMARENA rollentry_arena;
struct memarena rollentry_arena;
size_t rollentry_resident_bytecount; // How many bytes for the rollentries that are stored in main memory.
PAIR ct_pair;
};

View file

@ -258,18 +258,18 @@ void locktree::sto_append(const DBT *left_key, const DBT *right_key) {
keyrange range;
range.create(left_key, right_key);
buffer_mem = m_sto_buffer.get_num_bytes();
buffer_mem = m_sto_buffer.total_memory_size();
m_sto_buffer.append(left_key, right_key);
delta = m_sto_buffer.get_num_bytes() - buffer_mem;
delta = m_sto_buffer.total_memory_size() - buffer_mem;
if (m_mgr != nullptr) {
m_mgr->note_mem_used(delta);
}
}
void locktree::sto_end(void) {
uint64_t num_bytes = m_sto_buffer.get_num_bytes();
uint64_t mem_size = m_sto_buffer.total_memory_size();
if (m_mgr != nullptr) {
m_mgr->note_mem_released(num_bytes);
m_mgr->note_mem_released(mem_size);
}
m_sto_buffer.destroy();
m_sto_buffer.create();
@ -302,9 +302,8 @@ void locktree::sto_migrate_buffer_ranges_to_tree(void *prepared_lkr) {
sto_rangetree.create(m_cmp);
// insert all of the ranges from the single txnid buffer into a new rangtree
range_buffer::iterator iter;
range_buffer::iterator iter(&m_sto_buffer);
range_buffer::iterator::record rec;
iter.create(&m_sto_buffer);
while (iter.current(&rec)) {
sto_lkr.prepare(&sto_rangetree);
int r = acquire_lock_consolidated(&sto_lkr,
@ -575,9 +574,8 @@ void locktree::release_locks(TXNID txnid, const range_buffer *ranges) {
// locks are already released, otherwise we need to do it here.
bool released = sto_try_release(txnid);
if (!released) {
range_buffer::iterator iter;
range_buffer::iterator iter(ranges);
range_buffer::iterator::record rec;
iter.create(ranges);
while (iter.current(&rec)) {
const DBT *left_key = rec.get_left_key();
const DBT *right_key = rec.get_right_key();
@ -647,10 +645,10 @@ struct txnid_range_buffer {
TXNID txnid;
range_buffer buffer;
static int find_by_txnid(const struct txnid_range_buffer &other_buffer, const TXNID &txnid) {
if (txnid < other_buffer.txnid) {
static int find_by_txnid(struct txnid_range_buffer *const &other_buffer, const TXNID &txnid) {
if (txnid < other_buffer->txnid) {
return -1;
} else if (other_buffer.txnid == txnid) {
} else if (other_buffer->txnid == txnid) {
return 0;
} else {
return 1;
@ -666,7 +664,7 @@ struct txnid_range_buffer {
// has locks in a random/alternating order, then this does
// not work so well.
void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_escalate_callback_extra) {
omt<struct txnid_range_buffer, struct txnid_range_buffer *> range_buffers;
omt<struct txnid_range_buffer *, struct txnid_range_buffer *> range_buffers;
range_buffers.create();
// prepare and acquire a locked keyrange on the entire locktree
@ -716,7 +714,6 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
// Try to find a range buffer for the current txnid. Create one if it doesn't exist.
// Then, append the new escalated range to the buffer.
uint32_t idx;
struct txnid_range_buffer new_range_buffer;
struct txnid_range_buffer *existing_range_buffer;
int r = range_buffers.find_zero<TXNID, txnid_range_buffer::find_by_txnid>(
current_txnid,
@ -724,9 +721,10 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
&idx
);
if (r == DB_NOTFOUND) {
new_range_buffer.txnid = current_txnid;
new_range_buffer.buffer.create();
new_range_buffer.buffer.append(escalated_left_key, escalated_right_key);
struct txnid_range_buffer *XMALLOC(new_range_buffer);
new_range_buffer->txnid = current_txnid;
new_range_buffer->buffer.create();
new_range_buffer->buffer.append(escalated_left_key, escalated_right_key);
range_buffers.insert_at(new_range_buffer, idx);
} else {
invariant_zero(r);
@ -754,9 +752,8 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
invariant_zero(r);
const TXNID current_txnid = current_range_buffer->txnid;
range_buffer::iterator iter;
range_buffer::iterator iter(&current_range_buffer->buffer);
range_buffer::iterator::record rec;
iter.create(&current_range_buffer->buffer);
while (iter.current(&rec)) {
keyrange range;
range.create(rec.get_left_key(), rec.get_right_key());
@ -771,6 +768,15 @@ void locktree::escalate(lt_escalate_cb after_escalate_callback, void *after_esca
}
current_range_buffer->buffer.destroy();
}
while (range_buffers.size() > 0) {
struct txnid_range_buffer *buffer;
int r = range_buffers.fetch(0, &buffer);
invariant_zero(r);
r = range_buffers.delete_at(0);
invariant_zero(r);
toku_free(buffer);
}
range_buffers.destroy();
lkr.release();

View file

@ -137,7 +137,6 @@ namespace toku {
class locktree;
class locktree_manager;
class lock_request;
class memory_tracker;
class concurrent_tree;
typedef int (*lt_create_cb)(locktree *lt, void *extra);
@ -246,7 +245,6 @@ namespace toku {
// tracks the current number of locks and lock memory
uint64_t m_max_lock_memory;
uint64_t m_current_lock_memory;
memory_tracker *m_mem_tracker;
struct lt_counters m_lt_counters;

View file

@ -97,207 +97,201 @@ PATENT RIGHTS GRANT:
namespace toku {
bool range_buffer::record_header::left_is_infinite(void) const {
return left_neg_inf || left_pos_inf;
}
bool range_buffer::record_header::right_is_infinite(void) const {
return right_neg_inf || right_pos_inf;
}
void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
left_neg_inf = left_key == toku_dbt_negative_infinity();
left_pos_inf = left_key == toku_dbt_positive_infinity();
left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
if (right_key) {
right_neg_inf = right_key == toku_dbt_negative_infinity();
right_pos_inf = right_key == toku_dbt_positive_infinity();
right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size;
} else {
right_neg_inf = left_neg_inf;
right_pos_inf = left_pos_inf;
right_key_size = 0;
}
}
const DBT *range_buffer::iterator::record::get_left_key(void) const {
if (m_header.left_neg_inf) {
return toku_dbt_negative_infinity();
} else if (m_header.left_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &m_left_key;
}
}
const DBT *range_buffer::iterator::record::get_right_key(void) const {
if (m_header.right_neg_inf) {
return toku_dbt_negative_infinity();
} else if (m_header.right_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &m_right_key;
}
}
size_t range_buffer::iterator::record::size(void) const {
return sizeof(record_header) + m_header.left_key_size + m_header.right_key_size;
}
void range_buffer::iterator::record::deserialize(const char *buf) {
size_t current = 0;
// deserialize the header
memcpy(&m_header, buf, sizeof(record_header));
current += sizeof(record_header);
// deserialize the left key if necessary
if (!m_header.left_is_infinite()) {
// point the left DBT's buffer into ours
toku_fill_dbt(&m_left_key, buf + current, m_header.left_key_size);
current += m_header.left_key_size;
bool range_buffer::record_header::left_is_infinite(void) const {
return left_neg_inf || left_pos_inf;
}
// deserialize the right key if necessary
if (!m_header.right_is_infinite()) {
if (m_header.right_key_size == 0) {
toku_copyref_dbt(&m_right_key, m_left_key);
bool range_buffer::record_header::right_is_infinite(void) const {
return right_neg_inf || right_pos_inf;
}
void range_buffer::record_header::init(const DBT *left_key, const DBT *right_key) {
left_neg_inf = left_key == toku_dbt_negative_infinity();
left_pos_inf = left_key == toku_dbt_positive_infinity();
left_key_size = toku_dbt_is_infinite(left_key) ? 0 : left_key->size;
if (right_key) {
right_neg_inf = right_key == toku_dbt_negative_infinity();
right_pos_inf = right_key == toku_dbt_positive_infinity();
right_key_size = toku_dbt_is_infinite(right_key) ? 0 : right_key->size;
} else {
toku_fill_dbt(&m_right_key, buf + current, m_header.right_key_size);
right_neg_inf = left_neg_inf;
right_pos_inf = left_pos_inf;
right_key_size = 0;
}
}
}
void range_buffer::iterator::create(const range_buffer *buffer) {
m_buffer = buffer;
m_current_offset = 0;
m_current_size = 0;
}
bool range_buffer::iterator::current(record *rec) {
if (m_current_offset < m_buffer->m_buf_current) {
rec->deserialize(m_buffer->m_buf + m_current_offset);
m_current_size = rec->size();
return true;
} else {
return false;
}
}
// move the iterator to the next record in the buffer
void range_buffer::iterator::next(void) {
invariant(m_current_offset < m_buffer->m_buf_current);
invariant(m_current_size > 0);
// the next record is m_current_size bytes forward
// now, we don't know how big the current is, set it to 0.
m_current_offset += m_current_size;
m_current_size = 0;
}
void range_buffer::create(void) {
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
m_buf = nullptr;
m_buf_size = 0;
m_buf_current = 0;
m_num_ranges = 0;
}
void range_buffer::append(const DBT *left_key, const DBT *right_key) {
// if the keys are equal, then only one copy is stored.
if (toku_dbt_equals(left_key, right_key)) {
append_point(left_key);
} else {
append_range(left_key, right_key);
}
m_num_ranges++;
}
bool range_buffer::is_empty(void) const {
return m_buf == nullptr;
}
uint64_t range_buffer::get_num_bytes(void) const {
return m_buf_current;
}
int range_buffer::get_num_ranges(void) const {
return m_num_ranges;
}
void range_buffer::destroy(void) {
if (m_buf) {
toku_free(m_buf);
}
}
void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
maybe_grow(sizeof(record_header) + left_key->size + right_key->size);
record_header h;
h.init(left_key, right_key);
// serialize the header
memcpy(m_buf + m_buf_current, &h, sizeof(record_header));
m_buf_current += sizeof(record_header);
// serialize the left key if necessary
if (!h.left_is_infinite()) {
memcpy(m_buf + m_buf_current, left_key->data, left_key->size);
m_buf_current += left_key->size;
}
// serialize the right key if necessary
if (!h.right_is_infinite()) {
memcpy(m_buf + m_buf_current, right_key->data, right_key->size);
m_buf_current += right_key->size;
}
}
void range_buffer::append_point(const DBT *key) {
maybe_grow(sizeof(record_header) + key->size);
record_header h;
h.init(key, nullptr);
// serialize the header
memcpy(m_buf + m_buf_current, &h, sizeof(record_header));
m_buf_current += sizeof(record_header);
// serialize the key if necessary
if (!h.left_is_infinite()) {
memcpy(m_buf + m_buf_current, key->data, key->size);
m_buf_current += key->size;
}
}
void range_buffer::maybe_grow(size_t size) {
static const size_t initial_size = 4096;
static const size_t aggressive_growth_threshold = 128 * 1024;
const size_t needed = m_buf_current + size;
if (m_buf_size < needed) {
if (m_buf_size == 0) {
m_buf_size = initial_size;
const DBT *range_buffer::iterator::record::get_left_key(void) const {
if (_header.left_neg_inf) {
return toku_dbt_negative_infinity();
} else if (_header.left_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &_left_key;
}
// aggressively grow the range buffer to the threshold,
// but only additivately increase the size after that.
while (m_buf_size < needed && m_buf_size < aggressive_growth_threshold) {
m_buf_size <<= 1;
}
while (m_buf_size < needed) {
m_buf_size += aggressive_growth_threshold;
}
XREALLOC(m_buf, m_buf_size);
}
}
size_t range_buffer::get_initial_size(size_t n) const {
size_t r = 4096;
while (r < n) {
r *= 2;
const DBT *range_buffer::iterator::record::get_right_key(void) const {
if (_header.right_neg_inf) {
return toku_dbt_negative_infinity();
} else if (_header.right_pos_inf) {
return toku_dbt_positive_infinity();
} else {
return &_right_key;
}
}
size_t range_buffer::iterator::record::size(void) const {
return sizeof(record_header) + _header.left_key_size + _header.right_key_size;
}
void range_buffer::iterator::record::deserialize(const char *buf) {
size_t current = 0;
// deserialize the header
memcpy(&_header, buf, sizeof(record_header));
current += sizeof(record_header);
// deserialize the left key if necessary
if (!_header.left_is_infinite()) {
// point the left DBT's buffer into ours
toku_fill_dbt(&_left_key, buf + current, _header.left_key_size);
current += _header.left_key_size;
}
// deserialize the right key if necessary
if (!_header.right_is_infinite()) {
if (_header.right_key_size == 0) {
toku_copyref_dbt(&_right_key, _left_key);
} else {
toku_fill_dbt(&_right_key, buf + current, _header.right_key_size);
}
}
}
toku::range_buffer::iterator::iterator() :
_ma_chunk_iterator(nullptr),
_current_chunk_base(nullptr),
_current_chunk_offset(0), _current_chunk_max(0),
_current_rec_size(0) {
}
toku::range_buffer::iterator::iterator(const range_buffer *buffer) :
_ma_chunk_iterator(&buffer->_arena),
_current_chunk_base(nullptr),
_current_chunk_offset(0), _current_chunk_max(0),
_current_rec_size(0) {
reset_current_chunk();
}
void range_buffer::iterator::reset_current_chunk() {
_current_chunk_base = _ma_chunk_iterator.current(&_current_chunk_max);
_current_chunk_offset = 0;
}
bool range_buffer::iterator::current(record *rec) {
if (_current_chunk_offset < _current_chunk_max) {
const char *buf = reinterpret_cast<const char *>(_current_chunk_base);
rec->deserialize(buf + _current_chunk_offset);
_current_rec_size = rec->size();
return true;
} else {
return false;
}
}
// move the iterator to the next record in the buffer
void range_buffer::iterator::next(void) {
invariant(_current_chunk_offset < _current_chunk_max);
invariant(_current_rec_size > 0);
// the next record is _current_rec_size bytes forward
_current_chunk_offset += _current_rec_size;
// now, we don't know how big the current is, set it to 0.
_current_rec_size = 0;
if (_current_chunk_offset >= _current_chunk_max) {
// current chunk is exhausted, try moving to the next one
if (_ma_chunk_iterator.more()) {
_ma_chunk_iterator.next();
reset_current_chunk();
}
}
}
void range_buffer::create(void) {
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
_arena.create(0);
_num_ranges = 0;
}
void range_buffer::append(const DBT *left_key, const DBT *right_key) {
// if the keys are equal, then only one copy is stored.
if (toku_dbt_equals(left_key, right_key)) {
invariant(left_key->size <= MAX_KEY_SIZE);
append_point(left_key);
} else {
invariant(left_key->size <= MAX_KEY_SIZE);
invariant(right_key->size <= MAX_KEY_SIZE);
append_range(left_key, right_key);
}
_num_ranges++;
}
bool range_buffer::is_empty(void) const {
return total_memory_size() == 0;
}
uint64_t range_buffer::total_memory_size(void) const {
return _arena.total_size_in_use();
}
int range_buffer::get_num_ranges(void) const {
return _num_ranges;
}
void range_buffer::destroy(void) {
_arena.destroy();
}
void range_buffer::append_range(const DBT *left_key, const DBT *right_key) {
size_t record_length = sizeof(record_header) + left_key->size + right_key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
h.init(left_key, right_key);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
buf += sizeof(record_header);
// serialize the left key if necessary
if (!h.left_is_infinite()) {
memcpy(buf, left_key->data, left_key->size);
buf += left_key->size;
}
// serialize the right key if necessary
if (!h.right_is_infinite()) {
memcpy(buf, right_key->data, right_key->size);
}
}
void range_buffer::append_point(const DBT *key) {
size_t record_length = sizeof(record_header) + key->size;
char *buf = reinterpret_cast<char *>(_arena.malloc_from_arena(record_length));
record_header h;
h.init(key, nullptr);
// serialize the header
memcpy(buf, &h, sizeof(record_header));
buf += sizeof(record_header);
// serialize the key if necessary
if (!h.left_is_infinite()) {
memcpy(buf, key->data, key->size);
}
}
return r;
}
} /* namespace toku */

View file

@ -91,128 +91,120 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <toku_stdint.h>
#include <ft/ybt.h>
#include "ft/ybt.h"
#include "portability/toku_stdint.h"
#include "util/memarena.h"
namespace toku {
// a key range buffer represents a set of key ranges that can
// be stored, iterated over, and then destroyed all at once.
// a key range buffer represents a set of key ranges that can
// be stored, iterated over, and then destroyed all at once.
class range_buffer {
private:
class range_buffer {
// Private in spirit: We fail POD asserts when we try to store range_buffers in an omt.
// So make it all public, but don't touch.
public:
//private:
// the key range buffer is a bunch of records in a row.
// each record has the following header, followed by the
// left key and right key data payload, if applicable.
// we limit keys to be 2^16, since we store lengths as 2 bytes.
static const size_t MAX_KEY_SIZE = 1 << 16;
// the key range buffer is a bunch of records in a row.
// each record has the following header, followed by the
// left key and right key data payload, if applicable.
struct record_header {
bool left_neg_inf;
bool left_pos_inf;
bool right_pos_inf;
bool right_neg_inf;
uint16_t left_key_size;
uint16_t right_key_size;
struct record_header {
bool left_neg_inf;
bool left_pos_inf;
bool right_pos_inf;
bool right_neg_inf;
uint32_t left_key_size;
uint32_t right_key_size;
bool left_is_infinite(void) const;
bool left_is_infinite(void) const;
bool right_is_infinite(void) const;
bool right_is_infinite(void) const;
void init(const DBT *left_key, const DBT *right_key);
};
static_assert(sizeof(record_header) == 12, "record header format is off");
public:
// the iterator abstracts reading over a buffer of variable length
// records one by one until there are no more left.
class iterator {
void init(const DBT *left_key, const DBT *right_key);
};
static_assert(sizeof(record_header) == 8, "record header format is off");
public:
// a record represents the user-view of a serialized key range.
// it handles positive and negative infinity and the optimized
// point range case, where left and right points share memory.
class record {
// the iterator abstracts reading over a buffer of variable length
// records one by one until there are no more left.
class iterator {
public:
// get a read-only pointer to the left key of this record's range
const DBT *get_left_key(void) const;
iterator();
iterator(const range_buffer *buffer);
// get a read-only pointer to the right key of this record's range
const DBT *get_right_key(void) const;
// a record represents the user-view of a serialized key range.
// it handles positive and negative infinity and the optimized
// point range case, where left and right points share memory.
class record {
public:
// get a read-only pointer to the left key of this record's range
const DBT *get_left_key(void) const;
// how big is this record? this tells us where the next record is
size_t size(void) const;
// get a read-only pointer to the right key of this record's range
const DBT *get_right_key(void) const;
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
// how big is this record? this tells us where the next record is
size_t size(void) const;
// populate a record header and point our DBT's
// buffers into ours if they are not infinite.
void deserialize(const char *buf);
private:
record_header _header;
DBT _left_key;
DBT _right_key;
};
// populate the given record object with the current
// the memory referred to by record is valid for only
// as long as the record exists.
bool current(record *rec);
// move the iterator to the next record in the buffer
void next(void);
private:
record_header m_header;
DBT m_left_key;
DBT m_right_key;
void reset_current_chunk();
// the key range buffer we are iterating over, the current
// offset in that buffer, and the size of the current record.
memarena::chunk_iterator _ma_chunk_iterator;
const void *_current_chunk_base;
size_t _current_chunk_offset;
size_t _current_chunk_max;
size_t _current_rec_size;
};
void create(const range_buffer *buffer);
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
void create(void);
// populate the given record object with the current
// the memory referred to by record is valid for only
// as long as the record exists.
bool current(record *rec);
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key);
// move the iterator to the next record in the buffer
void next(void);
// is this range buffer empty?
bool is_empty(void) const;
// how much memory is being used by this range buffer?
uint64_t total_memory_size(void) const;
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
void destroy(void);
private:
// the key range buffer we are iterating over, the current
// offset in that buffer, and the size of the current record.
const range_buffer *m_buffer;
size_t m_current_offset;
size_t m_current_size;
memarena _arena;
int _num_ranges;
void append_range(const DBT *left_key, const DBT *right_key);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
void append_point(const DBT *key);
};
// allocate buffer space lazily instead of on creation. this way,
// no malloc/free is done if the transaction ends up taking no locks.
void create(void);
// append a left/right key range to the buffer.
// if the keys are equal, then only one copy is stored.
void append(const DBT *left_key, const DBT *right_key);
// is this range buffer empty?
bool is_empty(void) const;
// how many bytes are stored in this range buffer?
uint64_t get_num_bytes(void) const;
// how many ranges are stored in this range buffer?
int get_num_ranges(void) const;
void destroy(void);
//private:
char *m_buf;
size_t m_buf_size;
size_t m_buf_current;
int m_num_ranges;
void append_range(const DBT *left_key, const DBT *right_key);
// append a point to the buffer. this is the space/time saving
// optimization for key ranges where left == right.
void append_point(const DBT *key);
void maybe_grow(size_t size);
// the initial size of the buffer is the next power of 2
// greater than the first entry we insert into the buffer.
size_t get_initial_size(size_t n) const;
};
} /* namespace toku */

View file

@ -121,9 +121,8 @@ static void test_points(void) {
}
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
iter.create(&buffer);
while (iter.current(&rec)) {
const DBT *expected_point = get_dbt_by_iteration(i);
invariant(compare_dbts(nullptr, expected_point, rec.get_left_key()) == 0);
@ -151,9 +150,8 @@ static void test_ranges(void) {
}
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
iter.create(&buffer);
while (iter.current(&rec)) {
const DBT *expected_left = get_dbt_by_iteration(i);
const DBT *expected_right = get_dbt_by_iteration(i + 1);
@ -187,9 +185,8 @@ static void test_mixed(void) {
}
size_t i = 0;
range_buffer::iterator iter;
range_buffer::iterator iter(&buffer);
range_buffer::iterator::record rec;
iter.create(&buffer);
while (iter.current(&rec)) {
const DBT *expected_left = get_dbt_by_iteration(i);
const DBT *expected_right = get_dbt_by_iteration(i + 1);
@ -232,10 +229,10 @@ static void test_small_and_large_points(void) {
// Append a small dbt, the buf should be able to fit it.
buffer.append(&small_dbt, &small_dbt);
invariant(buffer.m_buf_size >= small_dbt.size);
invariant(buffer.total_memory_size() >= small_dbt.size);
// Append a large dbt, the buf should be able to fit it.
buffer.append(&large_dbt, &large_dbt);
invariant(buffer.m_buf_size >= (small_dbt.size + large_dbt.size));
invariant(buffer.total_memory_size() >= (small_dbt.size + large_dbt.size));
toku_free(small_buf);
toku_free(large_buf);

View file

@ -2463,7 +2463,7 @@ struct iter_txn_row_locks_callback_extra {
const int r = lt_map->fetch(which_lt, &ranges);
invariant_zero(r);
current_db = locked_get_db_by_dict_id(env, ranges.lt->get_dict_id());
iter.create(ranges.buffer);
iter = toku::range_buffer::iterator(ranges.buffer);
}
DB_ENV *env;

View file

@ -144,11 +144,11 @@ static void db_txn_note_row_lock(DB *db, DB_TXN *txn, const DBT *left_key, const
}
// add a new lock range to this txn's row lock buffer
size_t old_num_bytes = ranges.buffer->get_num_bytes();
size_t old_mem_size = ranges.buffer->total_memory_size();
ranges.buffer->append(left_key, right_key);
size_t new_num_bytes = ranges.buffer->get_num_bytes();
invariant(new_num_bytes > old_num_bytes);
lt->get_manager()->note_mem_used(new_num_bytes - old_num_bytes);
size_t new_mem_size = ranges.buffer->total_memory_size();
invariant(new_mem_size > old_mem_size);
lt->get_manager()->note_mem_used(new_mem_size - old_mem_size);
toku_mutex_unlock(&db_txn_struct_i(txn)->txn_mutex);
}
@ -201,17 +201,16 @@ void toku_db_txn_escalate_callback(TXNID txnid, const toku::locktree *lt, const
//
// We could theoretically steal the memory from the caller instead of copying
// it, but it's simpler to have a callback API that doesn't transfer memory ownership.
lt->get_manager()->note_mem_released(ranges.buffer->get_num_bytes());
lt->get_manager()->note_mem_released(ranges.buffer->total_memory_size());
ranges.buffer->destroy();
ranges.buffer->create();
toku::range_buffer::iterator iter;
toku::range_buffer::iterator iter(&buffer);
toku::range_buffer::iterator::record rec;
iter.create(&buffer);
while (iter.current(&rec)) {
ranges.buffer->append(rec.get_left_key(), rec.get_right_key());
iter.next();
}
lt->get_manager()->note_mem_used(ranges.buffer->get_num_bytes());
lt->get_manager()->note_mem_used(ranges.buffer->total_memory_size());
} else {
// In rare cases, we may not find the associated locktree, because we are
// racing with the transaction trying to add this locktree to the lt map
@ -315,7 +314,7 @@ void toku_db_release_lt_key_ranges(DB_TXN *txn, txn_lt_key_ranges *ranges) {
// release all of the locks this txn has ever successfully
// acquired and stored in the range buffer for this locktree
lt->release_locks(txnid, ranges->buffer);
lt->get_manager()->note_mem_released(ranges->buffer->get_num_bytes());
lt->get_manager()->note_mem_released(ranges->buffer->total_memory_size());
ranges->buffer->destroy();
toku_free(ranges->buffer);

View file

@ -89,157 +89,142 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#include <algorithm>
#include <string.h>
#include <memory.h>
#include <util/memarena.h>
struct memarena {
char *buf;
size_t buf_used, buf_size;
size_t size_of_other_bufs; // the buf_size of all the other bufs.
size_t footprint_of_other_bufs; // the footprint of all the other bufs.
char **other_bufs;
int n_other_bufs;
};
void memarena::create(size_t initial_size) {
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_size_of_other_chunks = 0;
_footprint_of_other_chunks = 0;
_n_other_chunks = 0;
MEMARENA toku_memarena_create_presized (size_t initial_size) {
MEMARENA XMALLOC(result);
result->buf_size = initial_size;
result->buf_used = 0;
result->other_bufs = NULL;
result->size_of_other_bufs = 0;
result->footprint_of_other_bufs = 0;
result->n_other_bufs = 0;
XMALLOC_N(result->buf_size, result->buf);
return result;
}
MEMARENA toku_memarena_create (void) {
return toku_memarena_create_presized(1024);
}
void toku_memarena_clear (MEMARENA ma) {
// Free the other bufs.
int i;
for (i=0; i<ma->n_other_bufs; i++) {
toku_free(ma->other_bufs[i]);
ma->other_bufs[i]=0;
_current_chunk.size = initial_size;
if (_current_chunk.size > 0) {
XMALLOC_N(_current_chunk.size, _current_chunk.buf);
}
ma->n_other_bufs=0;
// But reuse the main buffer
ma->buf_used = 0;
ma->size_of_other_bufs = 0;
ma->footprint_of_other_bufs = 0;
}
static size_t
round_to_page (size_t size) {
const size_t _PAGE_SIZE = 4096;
const size_t result = _PAGE_SIZE+((size-1)&~(_PAGE_SIZE-1));
assert(0==(result&(_PAGE_SIZE-1))); // make sure it's aligned
assert(result>=size); // make sure it's not too small
assert(result<size+_PAGE_SIZE); // make sure we didn't grow by more than a page.
return result;
}
void* toku_memarena_malloc (MEMARENA ma, size_t size) {
if (ma->buf_size < ma->buf_used + size) {
// The existing block isn't big enough.
// Add the block to the vector of blocks.
if (ma->buf) {
int old_n = ma->n_other_bufs;
REALLOC_N(old_n+1, ma->other_bufs);
assert(ma->other_bufs);
ma->other_bufs[old_n]=ma->buf;
ma->n_other_bufs = old_n+1;
ma->size_of_other_bufs += ma->buf_size;
ma->footprint_of_other_bufs += toku_memory_footprint(ma->buf, ma->buf_used);
}
// Make a new one
{
size_t new_size = 2*ma->buf_size;
if (new_size<size) new_size=size;
new_size=round_to_page(new_size); // at least size, but round to the next page size
XMALLOC_N(new_size, ma->buf);
ma->buf_used = 0;
ma->buf_size = new_size;
}
void memarena::destroy(void) {
if (_current_chunk.buf) {
toku_free(_current_chunk.buf);
}
// allocate in the existing block.
char *result=ma->buf+ma->buf_used;
ma->buf_used+=size;
return result;
for (int i = 0; i < _n_other_chunks; i++) {
toku_free(_other_chunks[i].buf);
}
if (_other_chunks) {
toku_free(_other_chunks);
}
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_n_other_chunks = 0;
}
void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len) {
void *r=toku_memarena_malloc(ma, len);
memcpy(r,v,len);
static size_t round_to_page(size_t size) {
const size_t page_size = 4096;
const size_t r = page_size + ((size - 1) & ~(page_size - 1));
assert((r & (page_size - 1)) == 0); // make sure it's aligned
assert(r >= size); // make sure it's not too small
assert(r < size + page_size); // make sure we didn't grow by more than a page.
return r;
}
void toku_memarena_destroy(MEMARENA *map) {
MEMARENA ma=*map;
if (ma->buf) {
toku_free(ma->buf);
ma->buf=0;
static const size_t MEMARENA_MAX_CHUNK_SIZE = 64 * 1024 * 1024;
void *memarena::malloc_from_arena(size_t size) {
if (_current_chunk.buf == nullptr || _current_chunk.size < _current_chunk.used + size) {
// The existing block isn't big enough.
// Add the block to the vector of blocks.
if (_current_chunk.buf) {
invariant(_current_chunk.size > 0);
int old_n = _n_other_chunks;
XREALLOC_N(old_n + 1, _other_chunks);
_other_chunks[old_n] = _current_chunk;
_n_other_chunks = old_n + 1;
_size_of_other_chunks += _current_chunk.size;
_footprint_of_other_chunks += toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
}
// Make a new one. Grow the buffer size exponentially until we hit
// the max chunk size, but make it at least `size' bytes so the
// current allocation always fit.
size_t new_size = std::min(MEMARENA_MAX_CHUNK_SIZE, 2 * _current_chunk.size);
if (new_size < size) {
new_size = size;
}
new_size = round_to_page(new_size); // at least size, but round to the next page size
XMALLOC_N(new_size, _current_chunk.buf);
_current_chunk.used = 0;
_current_chunk.size = new_size;
}
int i;
for (i=0; i<ma->n_other_bufs; i++) {
toku_free(ma->other_bufs[i]);
invariant(_current_chunk.buf != nullptr);
// allocate in the existing block.
char *p = _current_chunk.buf + _current_chunk.used;
_current_chunk.used += size;
return p;
}
void memarena::move_memory(memarena *dest) {
// Move memory to dest
XREALLOC_N(dest->_n_other_chunks + _n_other_chunks + 1, dest->_other_chunks);
dest->_size_of_other_chunks += _size_of_other_chunks + _current_chunk.size;
dest->_footprint_of_other_chunks += _footprint_of_other_chunks + toku_memory_footprint(_current_chunk.buf, _current_chunk.used);
for (int i = 0; i < _n_other_chunks; i++) {
dest->_other_chunks[dest->_n_other_chunks++] = _other_chunks[i];
}
if (ma->other_bufs) toku_free(ma->other_bufs);
ma->other_bufs=0;
ma->n_other_bufs=0;
toku_free(ma);
*map = 0;
dest->_other_chunks[dest->_n_other_chunks++] = _current_chunk;
// Clear out this memarena's memory
toku_free(_other_chunks);
_current_chunk = arena_chunk();
_other_chunks = nullptr;
_size_of_other_chunks = 0;
_footprint_of_other_chunks = 0;
_n_other_chunks = 0;
}
void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source) {
int i;
char **other_bufs = dest->other_bufs;
static int move_counter = 0;
move_counter++;
REALLOC_N(dest->n_other_bufs + source->n_other_bufs + 1, other_bufs);
size_t memarena::total_memory_size(void) const {
return sizeof(*this) +
total_size_in_use() +
_n_other_chunks * sizeof(*_other_chunks);
}
dest ->size_of_other_bufs += source->size_of_other_bufs + source->buf_size;
dest ->footprint_of_other_bufs += source->footprint_of_other_bufs + toku_memory_footprint(source->buf, source->buf_used);
source->size_of_other_bufs = 0;
source->footprint_of_other_bufs = 0;
size_t memarena::total_size_in_use(void) const {
return _size_of_other_chunks + _current_chunk.used;
}
assert(other_bufs);
dest->other_bufs = other_bufs;
for (i=0; i<source->n_other_bufs; i++) {
dest->other_bufs[dest->n_other_bufs++] = source->other_bufs[i];
size_t memarena::total_footprint(void) const {
return sizeof(*this) +
_footprint_of_other_chunks +
toku_memory_footprint(_current_chunk.buf, _current_chunk.used) +
_n_other_chunks * sizeof(*_other_chunks);
}
////////////////////////////////////////////////////////////////////////////////
const void *memarena::chunk_iterator::current(size_t *used) const {
if (_chunk_idx < 0) {
*used = _ma->_current_chunk.used;
return _ma->_current_chunk.buf;
} else if (_chunk_idx < _ma->_n_other_chunks) {
*used = _ma->_other_chunks[_chunk_idx].used;
return _ma->_other_chunks[_chunk_idx].buf;
}
dest->other_bufs[dest->n_other_bufs++] = source->buf;
source->n_other_bufs = 0;
toku_free(source->other_bufs);
source->other_bufs = 0;
source->buf = 0;
source->buf_size = 0;
source->buf_used = 0;
*used = 0;
return nullptr;
}
size_t
toku_memarena_total_memory_size (MEMARENA m)
{
return (toku_memarena_total_size_in_use(m) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs));
void memarena::chunk_iterator::next() {
_chunk_idx++;
}
size_t
toku_memarena_total_size_in_use (MEMARENA m)
{
return m->size_of_other_bufs + m->buf_used;
}
size_t
toku_memarena_total_footprint (MEMARENA m)
{
return m->footprint_of_other_bufs + toku_memory_footprint(m->buf, m->buf_used) +
sizeof(*m) +
m->n_other_bufs * sizeof(*m->other_bufs);
bool memarena::chunk_iterator::more() const {
if (_chunk_idx < 0) {
return _ma->_current_chunk.buf != nullptr;
}
return _chunk_idx < _ma->_n_other_chunks;
}

View file

@ -92,43 +92,85 @@ PATENT RIGHTS GRANT:
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* We have too many memory management tricks:
* memarena (this code) is for a collection of objects that cannot be moved.
* The pattern is allocate more and more stuff.
* Don't free items as you go.
* Free all the items at once.
* Then reuse the same buffer again.
* Allocated objects never move.
* A memarena (as currently implemented) is not suitable for interprocess memory sharing. No reason it couldn't be made to work though.
/*
* A memarena is used to efficiently store a collection of objects that never move
* The pattern is allocate more and more stuff and free all of the items at once.
* The underlying memory will store 1 or more objects per chunk. Each chunk is
* contiguously laid out in memory but chunks are not necessarily contiguous with
* each other.
*/
class memarena {
public:
memarena() :
_current_chunk(arena_chunk()),
_other_chunks(nullptr),
_n_other_chunks(0),
_size_of_other_chunks(0),
_footprint_of_other_chunks(0) {
}
struct memarena;
// Effect: Create a memarena with the specified initial size
void create(size_t initial_size);
typedef struct memarena *MEMARENA;
void destroy(void);
MEMARENA toku_memarena_create_presized (size_t initial_size);
// Effect: Create a memarena with initial size. In case of ENOMEM, aborts.
// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed.
// In case of ENOMEM, aborts.
void *malloc_from_arena(size_t size);
MEMARENA toku_memarena_create (void);
// Effect: Create a memarena with default initial size. In case of ENOMEM, aborts.
// Effect: Move all the memory from this memarena into DEST.
// When SOURCE is closed the memory won't be freed.
// When DEST is closed, the memory will be freed, unless DEST moves its memory to another memarena...
void move_memory(memarena *dest);
void toku_memarena_clear (MEMARENA ma);
// Effect: Reset the internal state so that the allocated memory can be used again.
// Effect: Calculate the amount of memory used by a memory arena.
size_t total_memory_size(void) const;
void* toku_memarena_malloc (MEMARENA ma, size_t size);
// Effect: Allocate some memory. The returned value remains valid until the memarena is cleared or closed.
// In case of ENOMEM, aborts.
// Effect: Calculate the used space of the memory arena (ie: excludes unused space)
size_t total_size_in_use(void) const;
void *toku_memarena_memdup (MEMARENA ma, const void *v, size_t len);
// Effect: Calculate the amount of memory used, according to toku_memory_footprint(),
// which is a more expensive but more accurate count of memory used.
size_t total_footprint(void) const;
void toku_memarena_destroy(MEMARENA *ma);
// iterator over the underlying chunks that store objects in the memarena.
// a chunk is represented by a pointer to const memory and a usable byte count.
class chunk_iterator {
public:
chunk_iterator(const memarena *ma) :
_ma(ma), _chunk_idx(-1) {
}
void toku_memarena_move_buffers(MEMARENA dest, MEMARENA source);
// Effect: Move all the memory from SOURCE into DEST. When SOURCE is closed the memory won't be freed. When DEST is closed, the memory will be freed. (Unless DEST moves its memory to another memarena...)
// returns: base pointer to the current chunk
// *used set to the number of usable bytes
// if more() is false, returns nullptr and *used = 0
const void *current(size_t *used) const;
size_t toku_memarena_total_memory_size (MEMARENA);
// Effect: Calculate the amount of memory used by a memory arena.
// requires: more() is true
void next();
size_t toku_memarena_total_size_in_use (MEMARENA);
bool more() const;
size_t toku_memarena_total_footprint (MEMARENA);
private:
// -1 represents the 'initial' chunk in a memarena, ie: ma->_current_chunk
// >= 0 represents the i'th chunk in the ma->_other_chunks array
const memarena *_ma;
int _chunk_idx;
};
private:
struct arena_chunk {
arena_chunk() : buf(nullptr), used(0), size(0) { }
char *buf;
size_t used;
size_t size;
};
struct arena_chunk _current_chunk;
struct arena_chunk *_other_chunks;
int _n_other_chunks;
size_t _size_of_other_chunks; // the buf_size of all the other chunks.
size_t _footprint_of_other_chunks; // the footprint of all the other chunks.
friend class memarena_unit_test;
};

230
util/tests/memarena-test.cc Normal file
View file

@ -0,0 +1,230 @@
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
/*
COPYING CONDITIONS NOTICE:
This program is free software; you can redistribute it and/or modify
it under the terms of version 2 of the GNU General Public License as
published by the Free Software Foundation, and provided that the
following conditions are met:
* Redistributions of source code must retain this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below).
* Redistributions in binary form must reproduce this COPYING
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
GRANT (below) in the documentation and/or other materials
provided with the distribution.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
COPYRIGHT NOTICE:
TokuDB, Tokutek Fractal Tree Indexing Library.
Copyright (C) 2007-2013 Tokutek, Inc.
DISCLAIMER:
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
UNIVERSITY PATENT NOTICE:
The technology is licensed by the Massachusetts Institute of
Technology, Rutgers State University of New Jersey, and the Research
Foundation of State University of New York at Stony Brook under
United States of America Serial No. 11/760379 and to the patents
and/or patent applications resulting from it.
PATENT MARKING NOTICE:
This software is covered by US Patent No. 8,185,551.
This software is covered by US Patent No. 8,489,638.
PATENT RIGHTS GRANT:
"THIS IMPLEMENTATION" means the copyrightable works distributed by
Tokutek as part of the Fractal Tree project.
"PATENT CLAIMS" means the claims of patents that are owned or
licensable by Tokutek, both currently or in the future; and that in
the absence of this license would be infringed by THIS
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
"PATENT CHALLENGE" shall mean a challenge to the validity,
patentability, enforceability and/or non-infringement of any of the
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
Tokutek hereby grants to you, for the term and geographical scope of
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
irrevocable (except as stated in this section) patent license to
make, have made, use, offer to sell, sell, import, transfer, and
otherwise run, modify, and propagate the contents of THIS
IMPLEMENTATION, where such license applies only to the PATENT
CLAIMS. This grant does not include claims that would be infringed
only as a consequence of further modifications of THIS
IMPLEMENTATION. If you or your agent or licensee institute or order
or agree to the institution of patent litigation against any entity
(including a cross-claim or counterclaim in a lawsuit) alleging that
THIS IMPLEMENTATION constitutes direct or contributory patent
infringement, or inducement of patent infringement, then any rights
granted to you under this License shall terminate as of the date
such litigation is filed. If you or your agent or exclusive
licensee institute or order or agree to the institution of a PATENT
CHALLENGE, then Tokutek may terminate any rights granted to you
under this License.
*/
#include <string.h>
#include "portability/toku_assert.h"
#include "util/memarena.h"
class memarena_unit_test {
private:
static const int magic = 37;
template <typename F>
void iterate_chunks(memarena *ma, F &fn) {
for (memarena::chunk_iterator it(ma); it.more(); it.next()) {
size_t used = 0;
const void *buf = it.current(&used);
fn(buf, used);
}
}
void test_create(size_t size) {
memarena ma;
ma.create(size);
invariant(ma._current_chunk.size == size);
invariant(ma._current_chunk.used == 0);
if (size == 0) {
invariant_null(ma._current_chunk.buf);
} else {
invariant_notnull(ma._current_chunk.buf);
}
// make sure memory was allocated ok by
// writing to buf and reading it back
memset(ma._current_chunk.buf, magic, size);
for (size_t i = 0; i < size; i++) {
const char *buf = reinterpret_cast<char *>(ma._current_chunk.buf);
invariant(buf[i] == magic);
}
ma.destroy();
}
void test_malloc(size_t size) {
memarena ma;
ma.create(14);
void *v = ma.malloc_from_arena(size);
invariant_notnull(v);
// make sure memory was allocated ok by
// writing to buf and reading it back
memset(ma._current_chunk.buf, magic, size);
for (size_t i = 0; i < size; i++) {
const char *c = reinterpret_cast<char *>(ma._current_chunk.buf);
invariant(c[i] == magic);
}
ma.destroy();
}
static void test_iterate_fn(const void *buf, size_t used) {
for (size_t i = 0; i < used; i++) {
const char *c = reinterpret_cast<const char *>(buf);
invariant(c[i] == (char) ((intptr_t) &c[i]));
}
}
void test_iterate(size_t size) {
memarena ma;
ma.create(14);
for (size_t k = 0; k < size / 64; k += 64) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
}
}
size_t rest = size % 64;
if (rest != 0) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
}
}
iterate_chunks(&ma, test_iterate_fn);
ma.destroy();
}
void test_move_memory(size_t size) {
memarena ma;
ma.create(14);
for (size_t k = 0; k < size / 64; k += 64) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
}
}
size_t rest = size % 64;
if (rest != 0) {
void *v = ma.malloc_from_arena(64);
for (size_t i = 0; i < 64; i++) {
char *c = reinterpret_cast<char *>(v);
c[i] = (char) ((intptr_t) &c[i]);
}
}
memarena ma2;
ma.move_memory(&ma2);
iterate_chunks(&ma2, test_iterate_fn);
ma.destroy();
ma2.destroy();
}
public:
void test() {
test_create(0);
test_create(64);
test_create(128 * 1024 * 1024);
test_malloc(0);
test_malloc(63);
test_malloc(64);
test_malloc(64 * 1024 * 1024);
test_malloc((64 * 1024 * 1024) + 1);
test_iterate(0);
test_iterate(63);
test_iterate(128 * 1024);
test_iterate(64 * 1024 * 1024);
test_iterate((64 * 1024 * 1024) + 1);
test_move_memory(0);
test_move_memory(1);
test_move_memory(63);
test_move_memory(65);
test_move_memory(65 * 1024 * 1024);
test_move_memory(101 * 1024 * 1024);
}
};
int main(void) {
memarena_unit_test test;
test.test();
return 0;
}