mirror of
https://github.com/MariaDB/server.git
synced 2025-01-24 15:54:37 +01:00
474 lines
17 KiB
C++
474 lines
17 KiB
C++
/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
|
|
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
|
|
#ident "$Id$"
|
|
/*
|
|
COPYING CONDITIONS NOTICE:
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of version 2 of the GNU General Public License as
|
|
published by the Free Software Foundation, and provided that the
|
|
following conditions are met:
|
|
|
|
* Redistributions of source code must retain this COPYING
|
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
GRANT (below).
|
|
|
|
* Redistributions in binary form must reproduce this COPYING
|
|
CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
|
|
DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
|
|
PATENT MARKING NOTICE (below), and the PATENT RIGHTS
|
|
GRANT (below) in the documentation and/or other materials
|
|
provided with the distribution.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
02110-1301, USA.
|
|
|
|
COPYRIGHT NOTICE:
|
|
|
|
TokuDB, Tokutek Fractal Tree Indexing Library.
|
|
Copyright (C) 2007-2013 Tokutek, Inc.
|
|
|
|
DISCLAIMER:
|
|
|
|
This program is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
UNIVERSITY PATENT NOTICE:
|
|
|
|
The technology is licensed by the Massachusetts Institute of
|
|
Technology, Rutgers State University of New Jersey, and the Research
|
|
Foundation of State University of New York at Stony Brook under
|
|
United States of America Serial No. 11/760379 and to the patents
|
|
and/or patent applications resulting from it.
|
|
|
|
PATENT MARKING NOTICE:
|
|
|
|
This software is covered by US Patent No. 8,185,551.
|
|
This software is covered by US Patent No. 8,489,638.
|
|
|
|
PATENT RIGHTS GRANT:
|
|
|
|
"THIS IMPLEMENTATION" means the copyrightable works distributed by
|
|
Tokutek as part of the Fractal Tree project.
|
|
|
|
"PATENT CLAIMS" means the claims of patents that are owned or
|
|
licensable by Tokutek, both currently or in the future; and that in
|
|
the absence of this license would be infringed by THIS
|
|
IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
|
|
|
|
"PATENT CHALLENGE" shall mean a challenge to the validity,
|
|
patentability, enforceability and/or non-infringement of any of the
|
|
PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
|
|
|
|
Tokutek hereby grants to you, for the term and geographical scope of
|
|
the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
|
|
irrevocable (except as stated in this section) patent license to
|
|
make, have made, use, offer to sell, sell, import, transfer, and
|
|
otherwise run, modify, and propagate the contents of THIS
|
|
IMPLEMENTATION, where such license applies only to the PATENT
|
|
CLAIMS. This grant does not include claims that would be infringed
|
|
only as a consequence of further modifications of THIS
|
|
IMPLEMENTATION. If you or your agent or licensee institute or order
|
|
or agree to the institution of patent litigation against any entity
|
|
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
|
THIS IMPLEMENTATION constitutes direct or contributory patent
|
|
infringement, or inducement of patent infringement, then any rights
|
|
granted to you under this License shall terminate as of the date
|
|
such litigation is filed. If you or your agent or exclusive
|
|
licensee institute or order or agree to the institution of a PATENT
|
|
CHALLENGE, then Tokutek may terminate any rights granted to you
|
|
under this License.
|
|
*/
|
|
|
|
#ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
|
|
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
|
|
|
|
#include <bndata.h>
|
|
|
|
static uint32_t klpair_size(KLPAIR klpair){
|
|
return sizeof(*klpair) + klpair->keylen + leafentry_memsize(get_le_from_klpair(klpair));
|
|
}
|
|
|
|
static uint32_t klpair_disksize(KLPAIR klpair){
|
|
return sizeof(*klpair) + klpair->keylen + leafentry_disksize(get_le_from_klpair(klpair));
|
|
}
|
|
|
|
void bn_data::init_zero() {
|
|
toku_mempool_zero(&m_buffer_mempool);
|
|
}
|
|
|
|
void bn_data::initialize_empty() {
|
|
toku_mempool_zero(&m_buffer_mempool);
|
|
m_buffer.create();
|
|
}
|
|
|
|
void bn_data::initialize_from_data(uint32_t num_entries, unsigned char *buf, uint32_t data_size) {
|
|
if (data_size == 0) {
|
|
invariant_zero(num_entries);
|
|
}
|
|
KLPAIR *XMALLOC_N(num_entries, array); // create array of pointers to leafentries
|
|
unsigned char *newmem = NULL;
|
|
// add same wiggle room that toku_mempool_construct would, 25% extra
|
|
uint32_t allocated_bytes = data_size + data_size/4;
|
|
CAST_FROM_VOIDP(newmem, toku_xmalloc(allocated_bytes));
|
|
unsigned char* curr_src_pos = buf;
|
|
unsigned char* curr_dest_pos = newmem;
|
|
for (uint32_t i = 0; i < num_entries; i++) {
|
|
KLPAIR curr_kl = (KLPAIR)curr_dest_pos;
|
|
array[i] = curr_kl;
|
|
|
|
uint8_t curr_type = curr_src_pos[0];
|
|
curr_src_pos++;
|
|
// first thing we do is lay out the key,
|
|
// to do so, we must extract it from the leafentry
|
|
// and write it in
|
|
uint32_t keylen = 0;
|
|
void* keyp = NULL;
|
|
keylen = *(uint32_t *)curr_src_pos;
|
|
curr_src_pos += sizeof(uint32_t);
|
|
uint32_t clean_vallen = 0;
|
|
uint32_t num_cxrs = 0;
|
|
uint8_t num_pxrs = 0;
|
|
if (curr_type == LE_CLEAN) {
|
|
clean_vallen = toku_dtoh32(*(uint32_t *)curr_src_pos);
|
|
curr_src_pos += sizeof(clean_vallen); // val_len
|
|
keyp = curr_src_pos;
|
|
curr_src_pos += keylen;
|
|
}
|
|
else {
|
|
paranoid_invariant(curr_type == LE_MVCC);
|
|
num_cxrs = toku_htod32(*(uint32_t *)curr_src_pos);
|
|
curr_src_pos += sizeof(uint32_t); // num_cxrs
|
|
num_pxrs = curr_src_pos[0];
|
|
curr_src_pos += sizeof(uint8_t); //num_pxrs
|
|
keyp = curr_src_pos;
|
|
curr_src_pos += keylen;
|
|
}
|
|
// now that we have the keylen and the key, we can copy it
|
|
// into the destination
|
|
*(uint32_t *)curr_dest_pos = keylen;
|
|
curr_dest_pos += sizeof(keylen);
|
|
memcpy(curr_dest_pos, keyp, keylen);
|
|
curr_dest_pos += keylen;
|
|
// now curr_dest_pos is pointing to where the leafentry should be packed
|
|
curr_dest_pos[0] = curr_type;
|
|
curr_dest_pos++;
|
|
if (curr_type == LE_CLEAN) {
|
|
*(uint32_t *)curr_dest_pos = toku_htod32(clean_vallen);
|
|
curr_dest_pos += sizeof(clean_vallen);
|
|
memcpy(curr_dest_pos, curr_src_pos, clean_vallen); // copy the val
|
|
curr_dest_pos += clean_vallen;
|
|
curr_src_pos += clean_vallen;
|
|
}
|
|
else {
|
|
// pack num_cxrs and num_pxrs
|
|
*(uint32_t *)curr_dest_pos = toku_htod32(num_cxrs);
|
|
curr_dest_pos += sizeof(num_cxrs);
|
|
*(uint8_t *)curr_dest_pos = num_pxrs;
|
|
curr_dest_pos += sizeof(num_pxrs);
|
|
// now we need to pack the rest of the data
|
|
uint32_t num_rest_bytes = leafentry_rest_memsize(num_pxrs, num_cxrs, curr_src_pos);
|
|
memcpy(curr_dest_pos, curr_src_pos, num_rest_bytes);
|
|
curr_dest_pos += num_rest_bytes;
|
|
curr_src_pos += num_rest_bytes;
|
|
}
|
|
}
|
|
uint32_t num_bytes_read UU() = (uint32_t)(curr_src_pos - buf);
|
|
paranoid_invariant( num_bytes_read == data_size);
|
|
uint32_t num_bytes_written = curr_dest_pos - newmem;
|
|
paranoid_invariant( num_bytes_written == data_size);
|
|
toku_mempool_init(&m_buffer_mempool, newmem, (size_t)(num_bytes_written), allocated_bytes);
|
|
|
|
// destroy old omt that was created by toku_create_empty_bn(), so we can create a new one
|
|
m_buffer.destroy();
|
|
m_buffer.create_steal_sorted_array(&array, num_entries, num_entries);
|
|
}
|
|
|
|
uint64_t bn_data::get_memory_size() {
|
|
uint64_t retval = 0;
|
|
// include fragmentation overhead but do not include space in the
|
|
// mempool that has not yet been allocated for leaf entries
|
|
size_t poolsize = toku_mempool_footprint(&m_buffer_mempool);
|
|
invariant(poolsize >= get_disk_size());
|
|
retval += poolsize;
|
|
retval += m_buffer.memory_size();
|
|
return retval;
|
|
}
|
|
|
|
void bn_data::delete_leafentry (
|
|
uint32_t idx,
|
|
uint32_t keylen,
|
|
uint32_t old_le_size
|
|
)
|
|
{
|
|
m_buffer.delete_at(idx);
|
|
toku_mempool_mfree(&m_buffer_mempool, 0, old_le_size + keylen + sizeof(keylen)); // Must pass 0, since le is no good any more.
|
|
}
|
|
|
|
/* mempool support */
|
|
|
|
struct omt_compressor_state {
|
|
struct mempool *new_kvspace;
|
|
KLPAIR *newvals;
|
|
};
|
|
|
|
static int move_it (const KLPAIR &klpair, const uint32_t idx, struct omt_compressor_state * const oc) {
|
|
uint32_t size = klpair_size(klpair);
|
|
KLPAIR CAST_FROM_VOIDP(newdata, toku_mempool_malloc(oc->new_kvspace, size, 1));
|
|
paranoid_invariant_notnull(newdata); // we do this on a fresh mempool, so nothing bad should happen
|
|
memcpy(newdata, klpair, size);
|
|
oc->newvals[idx] = newdata;
|
|
return 0;
|
|
}
|
|
|
|
// Compress things, and grow the mempool if needed.
|
|
void bn_data::omt_compress_kvspace(size_t added_size, void **maybe_free) {
|
|
uint32_t total_size_needed = toku_mempool_get_used_space(&m_buffer_mempool) + added_size;
|
|
if (total_size_needed+total_size_needed >= m_buffer_mempool.size) {
|
|
m_buffer_mempool.size = total_size_needed+total_size_needed;
|
|
}
|
|
struct mempool new_kvspace;
|
|
toku_mempool_construct(&new_kvspace, m_buffer_mempool.size);
|
|
uint32_t numvals = omt_size();
|
|
KLPAIR *XMALLOC_N(numvals, newvals);
|
|
struct omt_compressor_state oc = { &new_kvspace, newvals };
|
|
|
|
m_buffer.iterate_on_range< decltype(oc), move_it >(0, omt_size(), &oc);
|
|
|
|
m_buffer.destroy();
|
|
m_buffer.create_steal_sorted_array(&newvals, numvals, numvals);
|
|
|
|
if (maybe_free) {
|
|
*maybe_free = m_buffer_mempool.base;
|
|
} else {
|
|
toku_free(m_buffer_mempool.base);
|
|
}
|
|
m_buffer_mempool = new_kvspace;
|
|
}
|
|
|
|
// Effect: Allocate a new object of size SIZE in MP. If MP runs out of space, allocate new a new mempool space, and copy all the items
|
|
// from the OMT (which items refer to items in the old mempool) into the new mempool.
|
|
// If MAYBE_FREE is NULL then free the old mempool's space.
|
|
// Otherwise, store the old mempool's space in maybe_free.
|
|
KLPAIR bn_data::mempool_malloc_from_omt(size_t size, void **maybe_free) {
|
|
void *v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
|
|
if (v == NULL) {
|
|
omt_compress_kvspace(size, maybe_free);
|
|
v = toku_mempool_malloc(&m_buffer_mempool, size, 1);
|
|
paranoid_invariant_notnull(v);
|
|
}
|
|
return (KLPAIR)v;
|
|
}
|
|
|
|
//TODO: probably not free the "maybe_free" right away?
|
|
void bn_data::get_space_for_overwrite(
|
|
uint32_t idx,
|
|
const void* keyp,
|
|
uint32_t keylen,
|
|
uint32_t old_le_size,
|
|
uint32_t new_size,
|
|
LEAFENTRY* new_le_space
|
|
)
|
|
{
|
|
void* maybe_free = nullptr;
|
|
uint32_t size_alloc = new_size + keylen + sizeof(keylen);
|
|
KLPAIR new_kl = mempool_malloc_from_omt(
|
|
size_alloc,
|
|
&maybe_free
|
|
);
|
|
uint32_t size_freed = old_le_size + keylen + sizeof(keylen);
|
|
toku_mempool_mfree(&m_buffer_mempool, nullptr, size_freed); // Must pass nullptr, since le is no good any more.
|
|
new_kl->keylen = keylen;
|
|
memcpy(new_kl->key_le, keyp, keylen);
|
|
m_buffer.set_at(new_kl, idx);
|
|
*new_le_space = get_le_from_klpair(new_kl);
|
|
// free at end, so that the keyp and keylen
|
|
// passed in is still valid
|
|
if (maybe_free) {
|
|
toku_free(maybe_free);
|
|
}
|
|
}
|
|
|
|
//TODO: probably not free the "maybe_free" right away?
|
|
void bn_data::get_space_for_insert(
|
|
uint32_t idx,
|
|
const void* keyp,
|
|
uint32_t keylen,
|
|
size_t size,
|
|
LEAFENTRY* new_le_space
|
|
)
|
|
{
|
|
void* maybe_free = nullptr;
|
|
uint32_t size_alloc = size + keylen + sizeof(keylen);
|
|
KLPAIR new_kl = mempool_malloc_from_omt(
|
|
size_alloc,
|
|
&maybe_free
|
|
);
|
|
new_kl->keylen = keylen;
|
|
memcpy(new_kl->key_le, keyp, keylen);
|
|
m_buffer.insert_at(new_kl, idx);
|
|
*new_le_space = get_le_from_klpair(new_kl);
|
|
// free at end, so that the keyp and keylen
|
|
// passed in is still valid (you never know if
|
|
// it was part of the old mempool, this is just
|
|
// safer).
|
|
if (maybe_free) {
|
|
toku_free(maybe_free);
|
|
}
|
|
}
|
|
|
|
void bn_data::move_leafentries_to(
|
|
BN_DATA dest_bd,
|
|
uint32_t lbi, //lower bound inclusive
|
|
uint32_t ube //upper bound exclusive
|
|
)
|
|
//Effect: move leafentries in the range [lbi, ube) from this to src_omt to newly created dest_omt
|
|
{
|
|
paranoid_invariant(lbi < ube);
|
|
paranoid_invariant(ube <= omt_size());
|
|
KLPAIR *XMALLOC_N(ube-lbi, newklpointers); // create new omt
|
|
|
|
size_t mpsize = toku_mempool_get_used_space(&m_buffer_mempool); // overkill, but safe
|
|
struct mempool *dest_mp = &dest_bd->m_buffer_mempool;
|
|
struct mempool *src_mp = &m_buffer_mempool;
|
|
toku_mempool_construct(dest_mp, mpsize);
|
|
|
|
uint32_t i = 0;
|
|
for (i = lbi; i < ube; i++) {
|
|
KLPAIR curr_kl;
|
|
m_buffer.fetch(i, &curr_kl);
|
|
|
|
size_t kl_size = klpair_size(curr_kl);
|
|
KLPAIR new_kl = NULL;
|
|
CAST_FROM_VOIDP(new_kl, toku_mempool_malloc(dest_mp, kl_size, 1));
|
|
memcpy(new_kl, curr_kl, kl_size);
|
|
newklpointers[i-lbi] = new_kl;
|
|
toku_mempool_mfree(src_mp, curr_kl, kl_size);
|
|
}
|
|
|
|
dest_bd->m_buffer.create_steal_sorted_array(&newklpointers, ube-lbi, ube-lbi);
|
|
// now remove the elements from src_omt
|
|
for (i=ube-1; i >= lbi; i--) {
|
|
m_buffer.delete_at(i);
|
|
}
|
|
}
|
|
|
|
uint64_t bn_data::get_disk_size() {
|
|
return toku_mempool_get_used_space(&m_buffer_mempool);
|
|
}
|
|
|
|
void bn_data::verify_mempool(void) {
|
|
// TODO: implement something
|
|
}
|
|
|
|
uint32_t bn_data::omt_size(void) const {
|
|
return m_buffer.size();
|
|
}
|
|
|
|
void bn_data::destroy(void) {
|
|
// The buffer may have been freed already, in some cases.
|
|
m_buffer.destroy();
|
|
toku_mempool_destroy(&m_buffer_mempool);
|
|
}
|
|
|
|
//TODO: Splitting key/val requires changing this
|
|
void bn_data::replace_contents_with_clone_of_sorted_array(
|
|
uint32_t num_les,
|
|
const void** old_key_ptrs,
|
|
uint32_t* old_keylens,
|
|
LEAFENTRY* old_les,
|
|
size_t *le_sizes,
|
|
size_t mempool_size
|
|
)
|
|
{
|
|
toku_mempool_construct(&m_buffer_mempool, mempool_size);
|
|
KLPAIR *XMALLOC_N(num_les, le_array);
|
|
for (uint32_t idx = 0; idx < num_les; idx++) {
|
|
KLPAIR new_kl = (KLPAIR)toku_mempool_malloc(
|
|
&m_buffer_mempool,
|
|
le_sizes[idx] + old_keylens[idx] + sizeof(uint32_t),
|
|
1); // point to new location
|
|
new_kl->keylen = old_keylens[idx];
|
|
memcpy(new_kl->key_le, old_key_ptrs[idx], new_kl->keylen);
|
|
memcpy(get_le_from_klpair(new_kl), old_les[idx], le_sizes[idx]);
|
|
CAST_FROM_VOIDP(le_array[idx], new_kl);
|
|
}
|
|
//TODO: Splitting key/val requires changing this; keys are stored in old omt.. cannot delete it yet?
|
|
m_buffer.destroy();
|
|
m_buffer.create_steal_sorted_array(&le_array, num_les, num_les);
|
|
}
|
|
|
|
|
|
// get info about a single leafentry by index
|
|
int bn_data::fetch_le(uint32_t idx, LEAFENTRY *le) {
|
|
KLPAIR klpair = NULL;
|
|
int r = m_buffer.fetch(idx, &klpair);
|
|
if (r == 0) {
|
|
*le = get_le_from_klpair(klpair);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
int bn_data::fetch_klpair(uint32_t idx, LEAFENTRY *le, uint32_t *len, void** key) {
|
|
KLPAIR klpair = NULL;
|
|
int r = m_buffer.fetch(idx, &klpair);
|
|
if (r == 0) {
|
|
*len = klpair->keylen;
|
|
*key = klpair->key_le;
|
|
*le = get_le_from_klpair(klpair);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
int bn_data::fetch_klpair_disksize(uint32_t idx, size_t *size) {
|
|
KLPAIR klpair = NULL;
|
|
int r = m_buffer.fetch(idx, &klpair);
|
|
if (r == 0) {
|
|
*size = klpair_disksize(klpair);
|
|
}
|
|
return r;
|
|
}
|
|
|
|
int bn_data::fetch_le_key_and_len(uint32_t idx, uint32_t *len, void** key) {
|
|
KLPAIR klpair = NULL;
|
|
int r = m_buffer.fetch(idx, &klpair);
|
|
if (r == 0) {
|
|
*len = klpair->keylen;
|
|
*key = klpair->key_le;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
struct mp_pair {
|
|
void* orig_base;
|
|
void* new_base;
|
|
klpair_omt_t* omt;
|
|
};
|
|
|
|
static int fix_mp_offset(const KLPAIR &klpair, const uint32_t idx, struct mp_pair * const p) {
|
|
char* old_value = (char *) klpair;
|
|
char *new_value = old_value - (char *)p->orig_base + (char *)p->new_base;
|
|
p->omt->set_at((KLPAIR)new_value, idx);
|
|
return 0;
|
|
}
|
|
|
|
void bn_data::clone(bn_data* orig_bn_data) {
|
|
toku_mempool_clone(&orig_bn_data->m_buffer_mempool, &m_buffer_mempool);
|
|
m_buffer.clone(orig_bn_data->m_buffer);
|
|
struct mp_pair p;
|
|
p.orig_base = toku_mempool_get_base(&orig_bn_data->m_buffer_mempool);
|
|
p.new_base = toku_mempool_get_base(&m_buffer_mempool);
|
|
p.omt = &m_buffer;
|
|
|
|
int r = m_buffer.iterate_on_range<decltype(p), fix_mp_offset>(0, omt_size(), &p);
|
|
invariant_zero(r);
|
|
}
|
|
|
|
|