2008-02-05 17:00:53 +01:00
|
|
|
#ifdef USE_PRAGMA_IMPLEMENTATION
|
2013-04-17 06:01:37 +02:00
|
|
|
#pragma implementation // gcc: Class implementation
|
2008-02-05 17:00:53 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#define MYSQL_SERVER 1
|
|
|
|
#include "mysql_priv.h"
|
2013-04-17 06:01:46 +02:00
|
|
|
extern "C" {
|
|
|
|
#include "stdint.h"
|
|
|
|
#if defined(_WIN32)
|
|
|
|
#include "misc.h"
|
|
|
|
#endif
|
2013-04-17 06:01:46 +02:00
|
|
|
#include "dlmalloc.h"
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:38 +02:00
|
|
|
#if !defined(HA_END_SPACE_KEY) || HA_END_SPACE_KEY != 0
|
|
|
|
#error
|
|
|
|
#endif
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
static inline void *thd_data_get(THD *thd, int slot) {
|
|
|
|
#if MYSQL_VERSION_ID <= 50123
|
|
|
|
return thd->ha_data[slot];
|
|
|
|
#else
|
|
|
|
return thd->ha_data[slot].ha_ptr;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void thd_data_set(THD *thd, int slot, void *data) {
|
|
|
|
#if MYSQL_VERSION_ID <= 50123
|
|
|
|
thd->ha_data[slot] = data;
|
|
|
|
#else
|
|
|
|
thd->ha_data[slot].ha_ptr = data;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2008-02-05 17:00:53 +01:00
|
|
|
#undef PACKAGE
|
|
|
|
#undef VERSION
|
|
|
|
#undef HAVE_DTRACE
|
|
|
|
#undef _DTRACE_VERSION
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
//#include "tokudb_config.h"
|
2008-02-05 17:00:53 +01:00
|
|
|
|
|
|
|
/* We define DTRACE after mysql_priv.h in case it disabled dtrace in the main server */
|
|
|
|
#ifdef HAVE_DTRACE
|
|
|
|
#define _DTRACE_VERSION 1
|
|
|
|
#else
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "tokudb_probes.h"
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
#include "hatoku_defines.h"
|
2008-02-05 17:00:53 +01:00
|
|
|
#include "ha_tokudb.h"
|
2013-04-17 06:01:46 +02:00
|
|
|
#include "hatoku_hton.h"
|
|
|
|
#include "hatoku_cmptrace.h"
|
2008-02-05 17:00:53 +01:00
|
|
|
#include <mysql/plugin.h>
|
|
|
|
|
|
|
|
|
|
|
|
/** @brief
|
|
|
|
Simple lock controls. The "share" it creates is a structure we will
|
|
|
|
pass to each tokudb handler. Do you have to have one of these? Well, you have
|
|
|
|
pieces that are used for locking, and they are needed to function.
|
|
|
|
*/
|
2013-04-17 06:01:37 +02:00
|
|
|
static TOKUDB_SHARE *get_share(const char *table_name, TABLE * table) {
|
|
|
|
TOKUDB_SHARE *share;
|
|
|
|
uint length;
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
pthread_mutex_lock(&tokudb_mutex);
|
|
|
|
length = (uint) strlen(table_name);
|
|
|
|
|
|
|
|
if (!(share = (TOKUDB_SHARE *) hash_search(&tokudb_open_tables, (uchar *) table_name, length))) {
|
|
|
|
char *tmp_name;
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// create share and fill it with all zeroes
|
|
|
|
// hence, all pointers are initialized to NULL
|
|
|
|
//
|
2013-04-17 06:01:38 +02:00
|
|
|
if (!(share = (TOKUDB_SHARE *)
|
|
|
|
my_multi_malloc(MYF(MY_WME | MY_ZEROFILL),
|
|
|
|
&share, sizeof(*share),
|
|
|
|
&tmp_name, length + 1,
|
|
|
|
NullS))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
pthread_mutex_unlock(&tokudb_mutex);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
share->use_count = 0;
|
|
|
|
share->table_name_length = length;
|
|
|
|
share->table_name = tmp_name;
|
|
|
|
strmov(share->table_name, table_name);
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
bzero((void *) share->key_file, sizeof(share->key_file));
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
if (my_hash_insert(&tokudb_open_tables, (uchar *) share))
|
|
|
|
goto error;
|
|
|
|
thr_lock_init(&share->lock);
|
|
|
|
pthread_mutex_init(&share->mutex, MY_MUTEX_INIT_FAST);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
pthread_mutex_unlock(&tokudb_mutex);
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
return share;
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
error:
|
2008-02-05 17:00:53 +01:00
|
|
|
pthread_mutex_destroy(&share->mutex);
|
|
|
|
my_free((uchar *) share, MYF(0));
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
return NULL;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
static int free_share(TOKUDB_SHARE * share, TABLE * table, uint hidden_primary_key, bool mutex_is_locked) {
|
|
|
|
int error, result = 0;
|
|
|
|
|
|
|
|
pthread_mutex_lock(&tokudb_mutex);
|
|
|
|
|
|
|
|
if (mutex_is_locked)
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
if (!--share->use_count) {
|
|
|
|
DBUG_PRINT("info", ("share->use_count %u", share->use_count));
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// number of open DB's may not be equal to number of keys we have because add_index
|
|
|
|
// may have added some. So, we loop through entire array and close any non-NULL value
|
|
|
|
// It is imperative that we reset a DB to NULL once we are done with it.
|
|
|
|
//
|
|
|
|
for (uint i = 0; i < sizeof(share->key_file)/sizeof(share->key_file[0]); i++) {
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
TOKUDB_TRACE("dbclose:%p\n", share->key_file[i]);
|
|
|
|
}
|
|
|
|
if (share->key_file[i]) {
|
|
|
|
error = share->key_file[i]->close(share->key_file[i], 0);
|
|
|
|
if (error) {
|
|
|
|
result = error;
|
|
|
|
}
|
|
|
|
share->key_file[i] = NULL;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
if (share->status_block && (error = share->status_block->close(share->status_block, 0))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
result = error;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
hash_delete(&tokudb_open_tables, (uchar *) share);
|
|
|
|
thr_lock_delete(&share->lock);
|
|
|
|
pthread_mutex_destroy(&share->mutex);
|
|
|
|
my_free((uchar *) share, MYF(0));
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&tokudb_mutex);
|
|
|
|
|
|
|
|
return result;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
static int get_name_length(const char *name) {
|
|
|
|
int n = 0;
|
|
|
|
const char *newname = name;
|
|
|
|
if (tokudb_data_dir) {
|
|
|
|
n += strlen(tokudb_data_dir) + 1;
|
|
|
|
if (strncmp("./", name, 2) == 0)
|
|
|
|
newname = name + 2;
|
|
|
|
}
|
|
|
|
n += strlen(newname);
|
|
|
|
n += strlen(ha_tokudb_ext);
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void make_name(char *newname, const char *tablename, const char *dictname) {
|
|
|
|
const char *newtablename = tablename;
|
|
|
|
char *nn = newname;
|
|
|
|
if (tokudb_data_dir) {
|
|
|
|
nn += sprintf(nn, "%s/", tokudb_data_dir);
|
|
|
|
if (strncmp("./", tablename, 2) == 0)
|
|
|
|
newtablename = tablename + 2;
|
|
|
|
}
|
|
|
|
nn += sprintf(nn, "%s%s", newtablename, ha_tokudb_ext);
|
|
|
|
if (dictname)
|
|
|
|
nn += sprintf(nn, "/%s%s", dictname, ha_tokudb_ext);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
#define HANDLE_INVALID_CURSOR() \
|
2013-04-17 06:01:42 +02:00
|
|
|
if (cursor == NULL) { \
|
|
|
|
error = last_cursor_error; \
|
|
|
|
goto cleanup; \
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
ha_tokudb::ha_tokudb(handlerton * hton, TABLE_SHARE * table_arg):handler(hton, table_arg)
|
2013-04-17 06:01:40 +02:00
|
|
|
// flags defined in sql\handler.h
|
2013-04-17 06:01:46 +02:00
|
|
|
{
|
|
|
|
int_table_flags = HA_REC_NOT_IN_SEQ | HA_FAST_KEY_READ | HA_NULL_IN_KEY | HA_CAN_INDEX_BLOBS | HA_PRIMARY_KEY_IN_READ_INDEX |
|
|
|
|
HA_FILE_BASED | HA_AUTO_PART_KEY | HA_TABLE_SCAN_ON_INDEX |HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE;
|
|
|
|
alloc_ptr = NULL;
|
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:43 +02:00
|
|
|
transaction = NULL;
|
2013-04-17 06:01:46 +02:00
|
|
|
added_rows = 0;
|
|
|
|
deleted_rows = 0;
|
|
|
|
last_dup_key = UINT_MAX;
|
|
|
|
using_ignore = 0;
|
|
|
|
last_cursor_error = 0;
|
|
|
|
range_lock_grabbed = false;
|
|
|
|
primary_key_offsets = NULL;
|
2013-04-17 06:01:46 +02:00
|
|
|
num_added_rows_in_stmt = 0;
|
|
|
|
num_deleted_rows_in_stmt = 0;
|
|
|
|
num_updated_rows_in_stmt = 0;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
static const char *ha_tokudb_exts[] = {
|
|
|
|
ha_tokudb_ext,
|
|
|
|
NullS
|
2008-02-05 17:00:53 +01:00
|
|
|
};
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
/*
|
|
|
|
* returns NULL terminated file extension string
|
|
|
|
*/
|
2013-04-17 06:01:37 +02:00
|
|
|
const char **ha_tokudb::bas_ext() const {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::bas_ext");
|
|
|
|
DBUG_RETURN(ha_tokudb_exts);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Returns a bit mask of capabilities of the key or its part specified by
|
|
|
|
// the arguments. The capabilities are defined in sql/handler.h.
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
ulong ha_tokudb::index_flags(uint idx, uint part, bool all_parts) const {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_flags");
|
2013-04-17 06:01:37 +02:00
|
|
|
ulong flags = (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER | HA_KEYREAD_ONLY | HA_READ_RANGE);
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_RETURN(flags);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
static int tokudb_cmp_hidden_key(DB * file, const DBT * new_key, const DBT * saved_key) {
|
|
|
|
ulonglong a = uint5korr((char *) new_key->data);
|
|
|
|
ulonglong b = uint5korr((char *) saved_key->data);
|
|
|
|
return a < b ? -1 : (a > b ? 1 : 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
static int tokudb_compare_two_keys(KEY *key, const DBT * new_key, const DBT * saved_key, bool cmp_prefix) {
|
2013-04-17 06:01:45 +02:00
|
|
|
uchar new_key_inf_val = *(uchar *) new_key->data;
|
|
|
|
uchar saved_key_inf_val = *(uchar *) saved_key->data;
|
|
|
|
//
|
|
|
|
// first byte is "infinity" byte
|
|
|
|
//
|
|
|
|
uchar *new_key_ptr = (uchar *)(new_key->data) + 1;
|
|
|
|
uchar *saved_key_ptr = (uchar *)(saved_key->data) + 1;
|
2013-04-17 06:01:37 +02:00
|
|
|
KEY_PART_INFO *key_part = key->key_part, *end = key_part + key->key_parts;
|
2013-04-17 06:01:45 +02:00
|
|
|
int ret_val;
|
|
|
|
//
|
|
|
|
// do not include the inf val at the beginning
|
|
|
|
//
|
|
|
|
uint new_key_length = new_key->size - sizeof(uchar);
|
|
|
|
uint saved_key_length = saved_key->size - sizeof(uchar);
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
//DBUG_DUMP("key_in_index", saved_key_ptr, saved_key->size);
|
2013-04-17 06:01:45 +02:00
|
|
|
for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) {
|
2013-04-17 06:01:37 +02:00
|
|
|
int cmp;
|
2013-04-17 06:01:40 +02:00
|
|
|
uint new_key_field_length;
|
|
|
|
uint saved_key_field_length;
|
2013-04-17 06:01:41 +02:00
|
|
|
if (key_part->field->null_bit) {
|
2013-04-17 06:01:40 +02:00
|
|
|
assert(new_key_ptr < (uchar *) new_key->data + new_key->size);
|
2013-04-17 06:01:39 +02:00
|
|
|
assert(saved_key_ptr < (uchar *) saved_key->data + saved_key->size);
|
2013-04-17 06:01:40 +02:00
|
|
|
if (*new_key_ptr != *saved_key_ptr) {
|
|
|
|
return ((int) *new_key_ptr - (int) *saved_key_ptr); }
|
|
|
|
saved_key_ptr++;
|
2013-04-17 06:01:45 +02:00
|
|
|
new_key_length--;
|
2013-04-17 06:01:39 +02:00
|
|
|
saved_key_length--;
|
2013-04-17 06:01:40 +02:00
|
|
|
if (!*new_key_ptr++) { continue; }
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
new_key_field_length = key_part->field->packed_col_length(new_key_ptr, key_part->length);
|
|
|
|
saved_key_field_length = key_part->field->packed_col_length(saved_key_ptr, key_part->length);
|
2013-04-17 06:01:45 +02:00
|
|
|
assert(new_key_length >= new_key_field_length);
|
2013-04-17 06:01:40 +02:00
|
|
|
assert(saved_key_length >= saved_key_field_length);
|
2013-04-17 06:01:39 +02:00
|
|
|
if ((cmp = key_part->field->pack_cmp(new_key_ptr, saved_key_ptr, key_part->length, 0)))
|
|
|
|
return cmp;
|
2013-04-17 06:01:40 +02:00
|
|
|
new_key_ptr += new_key_field_length;
|
2013-04-17 06:01:45 +02:00
|
|
|
new_key_length -= new_key_field_length;
|
2013-04-17 06:01:40 +02:00
|
|
|
saved_key_ptr += saved_key_field_length;
|
|
|
|
saved_key_length -= saved_key_field_length;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
if (cmp_prefix || (new_key_length == 0 && saved_key_length == 0) ) {
|
|
|
|
ret_val = 0;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// at this point, one SHOULD be 0
|
|
|
|
//
|
|
|
|
else if (new_key_length == 0 && saved_key_length > 0) {
|
|
|
|
ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
|
|
|
|
}
|
|
|
|
else if (new_key_length > 0 && saved_key_length == 0) {
|
|
|
|
ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// this should never happen, perhaps we should assert(false)
|
|
|
|
//
|
|
|
|
else {
|
|
|
|
ret_val = new_key_length - saved_key_length;
|
|
|
|
}
|
|
|
|
return ret_val;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// this is super super ugly, copied from compare_two_keys so that it can get done fast
|
|
|
|
//
|
|
|
|
static int tokudb_compare_two_clustered_keys(KEY *key, KEY* primary_key, const DBT * new_key, const DBT * saved_key) {
|
|
|
|
uchar new_key_inf_val = *(uchar *) new_key->data;
|
|
|
|
uchar saved_key_inf_val = *(uchar *) saved_key->data;
|
|
|
|
//
|
|
|
|
// first byte is "infinity" byte
|
|
|
|
//
|
|
|
|
uchar *new_key_ptr = (uchar *)(new_key->data) + 1;
|
|
|
|
uchar *saved_key_ptr = (uchar *)(saved_key->data) + 1;
|
|
|
|
KEY_PART_INFO *key_part = key->key_part, *end = key_part + key->key_parts;
|
|
|
|
int ret_val;
|
|
|
|
//
|
|
|
|
// do not include the inf val at the beginning
|
|
|
|
//
|
|
|
|
uint new_key_length = new_key->size - sizeof(uchar);
|
|
|
|
uint saved_key_length = saved_key->size - sizeof(uchar);
|
|
|
|
|
|
|
|
//DBUG_DUMP("key_in_index", saved_key_ptr, saved_key->size);
|
|
|
|
for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) {
|
|
|
|
int cmp;
|
|
|
|
uint new_key_field_length;
|
|
|
|
uint saved_key_field_length;
|
|
|
|
if (key_part->field->null_bit) {
|
|
|
|
assert(new_key_ptr < (uchar *) new_key->data + new_key->size);
|
|
|
|
assert(saved_key_ptr < (uchar *) saved_key->data + saved_key->size);
|
|
|
|
if (*new_key_ptr != *saved_key_ptr) {
|
|
|
|
return ((int) *new_key_ptr - (int) *saved_key_ptr); }
|
|
|
|
saved_key_ptr++;
|
|
|
|
new_key_length--;
|
|
|
|
saved_key_length--;
|
|
|
|
if (!*new_key_ptr++) { continue; }
|
|
|
|
}
|
|
|
|
new_key_field_length = key_part->field->packed_col_length(new_key_ptr, key_part->length);
|
|
|
|
saved_key_field_length = key_part->field->packed_col_length(saved_key_ptr, key_part->length);
|
|
|
|
assert(new_key_length >= new_key_field_length);
|
|
|
|
assert(saved_key_length >= saved_key_field_length);
|
|
|
|
if ((cmp = key_part->field->pack_cmp(new_key_ptr, saved_key_ptr, key_part->length, 0)))
|
|
|
|
return cmp;
|
|
|
|
new_key_ptr += new_key_field_length;
|
|
|
|
new_key_length -= new_key_field_length;
|
|
|
|
saved_key_ptr += saved_key_field_length;
|
|
|
|
saved_key_length -= saved_key_field_length;
|
|
|
|
}
|
|
|
|
if (new_key_length == 0 && saved_key_length == 0){
|
|
|
|
ret_val = 0;
|
|
|
|
}
|
|
|
|
else if (new_key_length == 0 && saved_key_length > 0) {
|
|
|
|
ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
|
|
|
|
}
|
|
|
|
else if (new_key_length > 0 && saved_key_length == 0) {
|
|
|
|
ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// now we compare the primary key
|
|
|
|
//
|
|
|
|
else {
|
|
|
|
if (primary_key == NULL) {
|
|
|
|
//
|
|
|
|
// primary key hidden
|
|
|
|
//
|
|
|
|
ulonglong a = uint5korr((char *) new_key_ptr);
|
|
|
|
ulonglong b = uint5korr((char *) saved_key_ptr);
|
|
|
|
ret_val = a < b ? -1 : (a > b ? 1 : 0);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
//
|
|
|
|
// primary key not hidden, I know this is bad, basically copying the code from above
|
|
|
|
//
|
|
|
|
key_part = primary_key->key_part;
|
|
|
|
end = key_part + primary_key->key_parts;
|
|
|
|
for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) {
|
|
|
|
int cmp;
|
|
|
|
uint new_key_field_length;
|
|
|
|
uint saved_key_field_length;
|
|
|
|
if (key_part->field->null_bit) {
|
|
|
|
assert(new_key_ptr < (uchar *) new_key->data + new_key->size);
|
|
|
|
assert(saved_key_ptr < (uchar *) saved_key->data + saved_key->size);
|
|
|
|
if (*new_key_ptr != *saved_key_ptr) {
|
|
|
|
return ((int) *new_key_ptr - (int) *saved_key_ptr); }
|
|
|
|
saved_key_ptr++;
|
|
|
|
new_key_length--;
|
|
|
|
saved_key_length--;
|
|
|
|
if (!*new_key_ptr++) { continue; }
|
|
|
|
}
|
|
|
|
new_key_field_length = key_part->field->packed_col_length(new_key_ptr, key_part->length);
|
|
|
|
saved_key_field_length = key_part->field->packed_col_length(saved_key_ptr, key_part->length);
|
|
|
|
assert(new_key_length >= new_key_field_length);
|
|
|
|
assert(saved_key_length >= saved_key_field_length);
|
|
|
|
if ((cmp = key_part->field->pack_cmp(new_key_ptr, saved_key_ptr, key_part->length, 0)))
|
|
|
|
return cmp;
|
|
|
|
new_key_ptr += new_key_field_length;
|
|
|
|
new_key_length -= new_key_field_length;
|
|
|
|
saved_key_ptr += saved_key_field_length;
|
|
|
|
saved_key_length -= saved_key_field_length;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// at this point, we have compared the actual keys and the primary key, we return 0
|
|
|
|
//
|
|
|
|
ret_val = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ret_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
static int tokudb_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb) {
|
2013-04-17 06:01:39 +02:00
|
|
|
assert(file->app_private != 0);
|
|
|
|
KEY *key = (KEY *) file->app_private;
|
2013-04-17 06:01:45 +02:00
|
|
|
KEY *primary_key = (KEY *) file->api_internal;
|
|
|
|
if (key->flags & HA_CLUSTERING) {
|
|
|
|
return tokudb_compare_two_clustered_keys(key, primary_key, keya, keyb);
|
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
return tokudb_compare_two_keys(key, keya, keyb, false);
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
static int tokudb_cmp_primary_key(DB *file, const DBT *keya, const DBT *keyb) {
|
|
|
|
assert(file->app_private != 0);
|
|
|
|
KEY *key = (KEY *) file->api_internal;
|
|
|
|
return tokudb_compare_two_keys(key, keya, keyb, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
//TODO: QQQ Only do one direction for prefix.
|
|
|
|
static int tokudb_prefix_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb) {
|
|
|
|
assert(file->app_private != 0);
|
|
|
|
KEY *key = (KEY *) file->app_private;
|
|
|
|
return tokudb_compare_two_keys(key, keya, keyb, true);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
int primary_key_part_compare (const void* left, const void* right) {
|
|
|
|
PRIM_KEY_PART_INFO* left_part= (PRIM_KEY_PART_INFO *)left;
|
|
|
|
PRIM_KEY_PART_INFO* right_part = (PRIM_KEY_PART_INFO *)right;
|
|
|
|
return left_part->offset - right_part->offset;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// struct that will be used as a context for smart DBT callbacks
|
|
|
|
// contains parameters needed to complete the smart DBT cursor call
|
|
|
|
//
|
|
|
|
typedef struct smart_dbt_info {
|
|
|
|
ha_tokudb* ha; //instance to ha_tokudb needed for reading the row
|
|
|
|
uchar* buf; // output buffer where row will be written
|
|
|
|
uint keynr; // index into share->key_file that represents DB we are currently operating on
|
|
|
|
} *SMART_DBT_INFO;
|
|
|
|
|
|
|
|
//
|
|
|
|
// struct that will be used as a context for smart DBT callbacks
|
|
|
|
// ONLY for the function add_index
|
|
|
|
//
|
|
|
|
typedef struct smart_dbt_ai_info {
|
|
|
|
ha_tokudb* ha; //instance to ha_tokudb needed for reading the row
|
|
|
|
DBT* prim_key; // DBT to store the primary key
|
|
|
|
uchar* buf; // buffer to unpack the row
|
|
|
|
} *SMART_DBT_AI_INFO;
|
|
|
|
|
|
|
|
static void smart_dbt_ai_callback (DBT const *key, DBT const *row, void *context) {
|
|
|
|
SMART_DBT_AI_INFO info = (SMART_DBT_AI_INFO)context;
|
2013-04-17 06:01:45 +02:00
|
|
|
info->ha->unpack_row(info->buf,row,key, true);
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// copy the key to prim_key
|
|
|
|
//
|
|
|
|
info->prim_key->size = key->size;
|
|
|
|
memcpy(info->prim_key->data, key->data, key->size);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// smart DBT callback function for optimize
|
|
|
|
// in optimize, we want to flatten DB by doing
|
|
|
|
// a full table scan. Therefore, we don't
|
|
|
|
// want to actually do anything with the data, hence
|
|
|
|
// callback does nothing
|
|
|
|
//
|
|
|
|
static void smart_dbt_opt_callback (DBT const *key, DBT const *row, void *context) {
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// Smart DBT callback function in case where we have a covering index
|
|
|
|
//
|
|
|
|
static void smart_dbt_callback_keyread(DBT const *key, DBT const *row, void *context) {
|
|
|
|
SMART_DBT_INFO info = (SMART_DBT_INFO)context;
|
|
|
|
info->ha->extract_hidden_primary_key(info->keynr, row, key);
|
|
|
|
info->ha->read_key_only(info->buf,info->keynr,row,key);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Smart DBT callback function in case where we do NOT have a covering index
|
|
|
|
//
|
|
|
|
static void smart_dbt_callback_rowread(DBT const *key, DBT const *row, void *context) {
|
|
|
|
SMART_DBT_INFO info = (SMART_DBT_INFO)context;
|
|
|
|
info->ha->extract_hidden_primary_key(info->keynr, row, key);
|
|
|
|
info->ha->read_primary_key(info->buf,info->keynr,row,key);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Smart DBT callback function in c_getf_heavi, in case where we have a covering index,
|
|
|
|
//
|
|
|
|
static void smart_dbt_callback_keyread_heavi(DBT const *key, DBT const *row, void *context, int r_h) {
|
2013-04-17 06:01:45 +02:00
|
|
|
SMART_DBT_INFO info = (SMART_DBT_INFO)context;
|
|
|
|
info->ha->heavi_ret_val = r_h;
|
2013-04-17 06:01:44 +02:00
|
|
|
smart_dbt_callback_keyread(key,row,context);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Smart DBT callback function in c_getf_heavi, in case where we do NOT have a covering index
|
|
|
|
//
|
|
|
|
static void smart_dbt_callback_rowread_heavi(DBT const *key, DBT const *row, void *context, int r_h) {
|
2013-04-17 06:01:45 +02:00
|
|
|
SMART_DBT_INFO info = (SMART_DBT_INFO)context;
|
|
|
|
info->ha->heavi_ret_val = r_h;
|
2013-04-17 06:01:44 +02:00
|
|
|
smart_dbt_callback_rowread(key,row,context);
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// Smart DBT callback function in records_in_range
|
|
|
|
//
|
|
|
|
static void smart_dbt_callback_ror_heavi(DBT const *key, DBT const *row, void *context, int r_h) {
|
|
|
|
DBT* copied_key = (DBT *)context;
|
|
|
|
copied_key->size = key->size;
|
|
|
|
memcpy(copied_key->data, key->data, key->size);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// macro for Smart DBT callback function,
|
|
|
|
// so we do not need to put this long line of code in multiple places
|
|
|
|
//
|
|
|
|
#define SMART_DBT_CALLBACK ( this->key_read ? smart_dbt_callback_keyread : smart_dbt_callback_rowread )
|
2013-04-17 06:01:43 +02:00
|
|
|
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// macro that modifies read flag for cursor operations depending on whether
|
|
|
|
// we have preacquired lock or not
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
#define SET_READ_FLAG(flg) ((range_lock_grabbed || current_thd->options & OPTION_TABLE_LOCK) ? ((flg) | DB_PRELOCKED) : (flg))
|
2013-04-17 06:01:42 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// This method retrieves the value of the auto increment column of a record in MySQL format
|
|
|
|
// This was basically taken from MyISAM
|
|
|
|
// Parameters:
|
|
|
|
// type - the type of the auto increment column (e.g. int, float, double...)
|
|
|
|
// offset - offset into the record where the auto increment column is stored
|
|
|
|
// [in] record - MySQL row whose auto increment value we want to extract
|
|
|
|
// Returns:
|
|
|
|
// The value of the auto increment column in record
|
|
|
|
//
|
|
|
|
ulonglong retrieve_auto_increment(uint16 type, uint32 offset,const uchar *record)
|
|
|
|
{
|
2013-04-17 06:01:45 +02:00
|
|
|
const uchar *key; /* Key */
|
2013-04-17 06:01:46 +02:00
|
|
|
ulonglong unsigned_autoinc = 0; /* Unsigned auto-increment */
|
|
|
|
longlong signed_autoinc = 0; /* Signed auto-increment */
|
2013-04-17 06:01:45 +02:00
|
|
|
enum { unsigned_type, signed_type } autoinc_type;
|
|
|
|
float float_tmp; /* Temporary variable */
|
|
|
|
double double_tmp; /* Temporary variable */
|
|
|
|
|
|
|
|
key = ((uchar *) record) + offset;
|
|
|
|
|
|
|
|
/* Set default autoincrement type */
|
|
|
|
autoinc_type = unsigned_type;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case HA_KEYTYPE_INT8:
|
|
|
|
signed_autoinc = (longlong) *(char*)key;
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_BINARY:
|
|
|
|
unsigned_autoinc = (ulonglong) *(uchar*) key;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_SHORT_INT:
|
|
|
|
signed_autoinc = (longlong) sint2korr(key);
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_USHORT_INT:
|
|
|
|
unsigned_autoinc = (ulonglong) uint2korr(key);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_LONG_INT:
|
|
|
|
signed_autoinc = (longlong) sint4korr(key);
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_ULONG_INT:
|
|
|
|
unsigned_autoinc = (ulonglong) uint4korr(key);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_INT24:
|
|
|
|
signed_autoinc = (longlong) sint3korr(key);
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_UINT24:
|
|
|
|
unsigned_autoinc = (ulonglong) uint3korr(key);
|
2013-04-17 06:01:44 +02:00
|
|
|
break;
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
case HA_KEYTYPE_LONGLONG:
|
|
|
|
signed_autoinc = sint8korr(key);
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_ULONGLONG:
|
|
|
|
unsigned_autoinc = uint8korr(key);
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* The remaining two cases should not be used but are included for
|
|
|
|
compatibility */
|
|
|
|
case HA_KEYTYPE_FLOAT:
|
|
|
|
float4get(float_tmp, key); /* Note: float4get is a macro */
|
|
|
|
signed_autoinc = (longlong) float_tmp;
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case HA_KEYTYPE_DOUBLE:
|
|
|
|
float8get(double_tmp, key); /* Note: float8get is a macro */
|
|
|
|
signed_autoinc = (longlong) double_tmp;
|
|
|
|
autoinc_type = signed_type;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
DBUG_ASSERT(0);
|
|
|
|
unsigned_autoinc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (signed_autoinc < 0) {
|
|
|
|
signed_autoinc = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return autoinc_type == unsigned_type ?
|
|
|
|
unsigned_autoinc : (ulonglong) signed_autoinc;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
inline uint get_null_offset(TABLE* table, Field* field) {
|
2013-04-17 06:01:45 +02:00
|
|
|
return (uint) ((uchar*) field->null_ptr - (uchar*) table->record[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline bool
|
|
|
|
is_null_field( TABLE* table, Field* field, const uchar* record) {
|
2013-04-17 06:01:46 +02:00
|
|
|
uint null_offset;
|
2013-04-17 06:01:45 +02:00
|
|
|
bool ret_val;
|
2013-04-17 06:01:46 +02:00
|
|
|
if (!field->null_ptr) {
|
|
|
|
ret_val = false;
|
2013-04-17 06:01:45 +02:00
|
|
|
goto exitpt;
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
|
|
|
null_offset = get_null_offset(table,field);
|
2013-04-17 06:01:45 +02:00
|
|
|
ret_val = (record[null_offset] & field->null_bit) ? true: false;
|
|
|
|
|
|
|
|
exitpt:
|
2013-04-17 06:01:46 +02:00
|
|
|
return ret_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
inline ulong field_offset(Field* field, TABLE* table) {
|
|
|
|
return((ulong) (field->ptr - table->record[0]));
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
inline HA_TOKU_ISO_LEVEL tx_to_toku_iso(ulong tx_isolation) {
|
|
|
|
if (tx_isolation == ISO_READ_UNCOMMITTED) {
|
|
|
|
return hatoku_iso_read_uncommitted;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return hatoku_iso_serializable;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
inline u_int32_t toku_iso_to_txn_flag (HA_TOKU_ISO_LEVEL lvl) {
|
|
|
|
if (lvl == hatoku_iso_read_uncommitted) {
|
2013-04-17 06:01:46 +02:00
|
|
|
return DB_READ_UNCOMMITTED;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// Open a secondary table, the key will be a secondary index, the data will be a primary key
|
|
|
|
//
|
|
|
|
int ha_tokudb::open_secondary_table(DB** ptr, KEY* key_info, const char* name, int mode, u_int32_t* key_type) {
|
|
|
|
int error = ENOSYS;
|
|
|
|
char part[MAX_ALIAS_NAME + 10];
|
|
|
|
char name_buff[FN_REFLEN];
|
|
|
|
uint open_flags = (mode == O_RDONLY ? DB_RDONLY : 0) | DB_THREAD;
|
|
|
|
DBT cmp_byte_stream;
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(strlen(name) + NAME_CHAR_LEN, MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
open_flags += DB_AUTO_COMMIT;
|
2013-04-17 06:01:41 +02:00
|
|
|
|
|
|
|
if ((error = db_create(ptr, db_env, 0))) {
|
|
|
|
my_errno = error;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
sprintf(part, "key-%s", key_info->name);
|
|
|
|
make_name(newname, name, part);
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
|
|
|
*key_type = key_info->flags & HA_NOSAME ? DB_NOOVERWRITE : DB_YESOVERWRITE;
|
|
|
|
(*ptr)->app_private = (void *) (key_info);
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_SAVE_TRACE) {
|
|
|
|
bzero((void *) &cmp_byte_stream, sizeof(cmp_byte_stream));
|
|
|
|
cmp_byte_stream.flags = DB_DBT_MALLOC;
|
|
|
|
if ((error = tokutrace_db_get_cmp_byte_stream(*ptr, &cmp_byte_stream))) {
|
|
|
|
my_errno = error;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
(*ptr)->set_bt_compare(*ptr, tokudb_cmp_packed_key);
|
|
|
|
my_free(cmp_byte_stream.data, MYF(0));
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
2013-04-17 06:01:41 +02:00
|
|
|
(*ptr)->set_bt_compare(*ptr, tokudb_cmp_packed_key);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
DBUG_PRINT("info", ("Setting DB_DUP+DB_DUPSORT for key %s\n", key_info->name));
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// clustering keys are not DB_DUP, because their keys are unique (they have the PK embedded)
|
|
|
|
//
|
|
|
|
if (!(key_info->flags & HA_CLUSTERING)) {
|
|
|
|
(*ptr)->set_flags(*ptr, DB_DUP + DB_DUPSORT);
|
|
|
|
(*ptr)->set_dup_compare(*ptr, hidden_primary_key ? tokudb_cmp_hidden_key : tokudb_cmp_primary_key);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
(*ptr)->api_internal = share->file->app_private;
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = (*ptr)->open(*ptr, 0, name_buff, NULL, DB_BTREE, open_flags, 0))) {
|
|
|
|
my_errno = error;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
TOKUDB_TRACE("open:%s:file=%p\n", newname, *ptr);
|
|
|
|
}
|
|
|
|
cleanup:
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:41 +02:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Creates and opens a handle to a table which already exists in a tokudb
|
|
|
|
// database.
|
|
|
|
// Parameters:
|
|
|
|
// [in] name - table name
|
|
|
|
// mode - seems to specify if table is read only
|
|
|
|
// test_if_locked - unused
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// 1 on error
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::open(const char *name, int mode, uint test_if_locked) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::open %p %s", this, name);
|
2013-04-17 06:01:37 +02:00
|
|
|
TOKUDB_OPEN();
|
|
|
|
|
|
|
|
char name_buff[FN_REFLEN];
|
2013-04-17 06:01:38 +02:00
|
|
|
uint open_flags = (mode == O_RDONLY ? DB_RDONLY : 0) | DB_THREAD;
|
2013-04-17 06:01:37 +02:00
|
|
|
uint max_key_length;
|
|
|
|
int error;
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
transaction = NULL;
|
|
|
|
cursor = NULL;
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
open_flags += DB_AUTO_COMMIT;
|
2013-04-17 06:01:40 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(strlen(name) + NAME_CHAR_LEN,MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL) {
|
|
|
|
TOKUDB_DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
/* Open primary key */
|
|
|
|
hidden_primary_key = 0;
|
|
|
|
if ((primary_key = table_share->primary_key) >= MAX_KEY) {
|
|
|
|
// No primary key
|
|
|
|
primary_key = table_share->keys;
|
|
|
|
key_used_on_scan = MAX_KEY;
|
|
|
|
ref_length = hidden_primary_key = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:37 +02:00
|
|
|
key_used_on_scan = primary_key;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
/* Need some extra memory in case of packed keys */
|
2013-04-17 06:01:45 +02:00
|
|
|
// the "+ 1" is for the first byte that states +/- infinity
|
2013-04-17 06:01:45 +02:00
|
|
|
// multiply everything by 2 to account for clustered keys having a key and primary key together
|
|
|
|
max_key_length = 2*(table_share->max_key_length + MAX_REF_PARTS * 3 + sizeof(uchar));
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!(alloc_ptr =
|
|
|
|
my_multi_malloc(MYF(MY_WME),
|
2013-04-17 06:01:38 +02:00
|
|
|
&key_buff, max_key_length,
|
|
|
|
&key_buff2, max_key_length,
|
2013-04-17 06:01:46 +02:00
|
|
|
&primary_key_buff, (hidden_primary_key ? 0 : max_key_length),
|
2013-04-17 06:01:45 +02:00
|
|
|
NullS))) {
|
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!(rec_buff = (uchar *) my_malloc((alloced_rec_buff_length = table_share->rec_buff_length), MYF(MY_WME)))) {
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:37 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Init shared structure */
|
|
|
|
if (!(share = get_share(name, table))) {
|
|
|
|
my_free((char *) rec_buff, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
/* Make sorted list of primary key parts, if they exist*/
|
|
|
|
if (!hidden_primary_key) {
|
|
|
|
uint num_prim_key_parts = table_share->key_info[table_share->primary_key].key_parts;
|
|
|
|
primary_key_offsets = (PRIM_KEY_PART_INFO *)my_malloc(
|
|
|
|
num_prim_key_parts*sizeof(*primary_key_offsets),
|
|
|
|
MYF(MY_WME)
|
|
|
|
);
|
|
|
|
|
|
|
|
if (!primary_key_offsets) {
|
|
|
|
free_share(share, table, hidden_primary_key, 1);
|
|
|
|
my_free((char *) rec_buff, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:41 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
|
|
|
}
|
|
|
|
for (uint i = 0; i < table_share->key_info[table_share->primary_key].key_parts; i++) {
|
|
|
|
primary_key_offsets[i].offset = table_share->key_info[table_share->primary_key].key_part[i].offset;
|
|
|
|
primary_key_offsets[i].part_index = i;
|
|
|
|
}
|
|
|
|
qsort(
|
|
|
|
primary_key_offsets, // start of array
|
|
|
|
num_prim_key_parts, //num elements
|
|
|
|
sizeof(*primary_key_offsets), //size of each element
|
|
|
|
primary_key_part_compare
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
thr_lock_data_init(&share->lock, &lock, NULL);
|
|
|
|
bzero((void *) ¤t_row, sizeof(current_row));
|
|
|
|
|
|
|
|
/* Fill in shared structure, if needed */
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("tokudbopen:%p:share=%p:file=%p:table=%p:table->s=%p:%d\n",
|
|
|
|
this, share, share->file, table, table->s, share->use_count);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!share->use_count++) {
|
|
|
|
DBUG_PRINT("info", ("share->use_count %u", share->use_count));
|
2013-04-17 06:01:40 +02:00
|
|
|
DBT cmp_byte_stream;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = db_create(&share->file, db_env, 0))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
free_share(share, table, hidden_primary_key, 1);
|
|
|
|
my_free((char *) rec_buff, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
|
|
|
if (primary_key_offsets) {
|
|
|
|
my_free(primary_key_offsets, MYF(0));
|
|
|
|
primary_key_offsets = NULL;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
my_errno = error;
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!hidden_primary_key) {
|
2013-04-17 06:01:41 +02:00
|
|
|
share->file->app_private = (void *) (table_share->key_info + table_share->primary_key);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
share->file->app_private = NULL;
|
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_SAVE_TRACE) {
|
|
|
|
bzero((void *) &cmp_byte_stream, sizeof(cmp_byte_stream));
|
|
|
|
cmp_byte_stream.flags = DB_DBT_MALLOC;
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = tokutrace_db_get_cmp_byte_stream(share->file, &cmp_byte_stream))) {
|
2013-04-17 06:01:40 +02:00
|
|
|
free_share(share, table, hidden_primary_key, 1);
|
|
|
|
my_free((char *) rec_buff, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:40 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
|
|
|
if (primary_key_offsets) {
|
|
|
|
my_free(primary_key_offsets, MYF(0));
|
|
|
|
primary_key_offsets = NULL;
|
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
my_errno = error;
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
share->file->set_bt_compare(share->file, (hidden_primary_key ? tokudb_cmp_hidden_key : tokudb_cmp_packed_key));
|
2013-04-17 06:01:40 +02:00
|
|
|
my_free(cmp_byte_stream.data, MYF(0));
|
|
|
|
}
|
|
|
|
else
|
2013-04-17 06:01:41 +02:00
|
|
|
share->file->set_bt_compare(share->file, (hidden_primary_key ? tokudb_cmp_hidden_key : tokudb_cmp_packed_key));
|
2013-04-17 06:01:40 +02:00
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newname, name, "main");
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = share->file->open(share->file, 0, name_buff, NULL, DB_BTREE, open_flags, 0))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
free_share(share, table, hidden_primary_key, 1);
|
|
|
|
my_free((char *) rec_buff, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
my_free(alloc_ptr, MYF(0));
|
2013-04-17 06:01:46 +02:00
|
|
|
alloc_ptr = NULL;
|
|
|
|
if (primary_key_offsets) {
|
|
|
|
my_free(primary_key_offsets, MYF(0));
|
|
|
|
primary_key_offsets = NULL;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
my_errno = error;
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN)
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_TRACE("open:%s:file=%p\n", newname, share->file);
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
/* Open other keys; These are part of the share structure */
|
2013-04-17 06:01:41 +02:00
|
|
|
share->key_file[primary_key] = share->file;
|
2013-04-17 06:01:44 +02:00
|
|
|
share->key_type[primary_key] = hidden_primary_key ? DB_YESOVERWRITE : DB_NOOVERWRITE;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
DB **ptr = share->key_file;
|
|
|
|
for (uint i = 0; i < table_share->keys; i++, ptr++) {
|
2013-04-17 06:01:37 +02:00
|
|
|
if (i != primary_key) {
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = open_secondary_table(ptr,&table_share->key_info[i],name,mode,&share->key_type[i]))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
__close(1);
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Calculate pack_length of primary key */
|
|
|
|
share->fixed_length_primary_key = 1;
|
|
|
|
if (!hidden_primary_key) {
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// I realize this is incredibly confusing, and refactoring should take
|
|
|
|
// care of this, but we need to set the ref_length to start at 1, to account for
|
|
|
|
// the "infinity byte" in keys.
|
|
|
|
//
|
|
|
|
ref_length = sizeof(uchar);
|
2013-04-17 06:01:37 +02:00
|
|
|
KEY_PART_INFO *key_part = table->key_info[primary_key].key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + table->key_info[primary_key].key_parts;
|
|
|
|
for (; key_part != end; key_part++)
|
|
|
|
ref_length += key_part->field->max_packed_col_length(key_part->length);
|
2013-04-17 06:01:45 +02:00
|
|
|
share->fixed_length_primary_key = (ref_length == table->key_info[primary_key].key_length + sizeof(uchar));
|
2013-04-17 06:01:37 +02:00
|
|
|
share->status |= STATUS_PRIMARY_KEY_INIT;
|
|
|
|
}
|
|
|
|
share->ref_length = ref_length;
|
2013-04-17 06:01:43 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
error = get_status();
|
2013-04-17 06:01:45 +02:00
|
|
|
if (error || share->version < HA_TOKU_VERSION) {
|
2013-04-17 06:01:43 +02:00
|
|
|
__close(1);
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:43 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
//////////////////////
|
|
|
|
u_int64_t num_rows = 0;
|
|
|
|
int error = estimate_num_rows(share->file,&num_rows);
|
|
|
|
//
|
|
|
|
// estimate_num_rows should not fail under normal conditions
|
|
|
|
//
|
|
|
|
if (error == 0) {
|
|
|
|
share->rows = num_rows;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
__close(1);
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:43 +02:00
|
|
|
TOKUDB_DBUG_RETURN(1);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// initialize auto increment data
|
|
|
|
//
|
|
|
|
share->has_auto_inc = has_auto_increment_flag(&share->ai_field_index);
|
|
|
|
if (share->has_auto_inc) {
|
|
|
|
init_auto_increment();
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
ref_length = share->ref_length; // If second open
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
key_read = false;
|
2013-04-17 06:01:38 +02:00
|
|
|
stats.block_size = 1<<20; // QQQ Tokudb DB block size
|
2013-04-17 06:01:37 +02:00
|
|
|
share->fixed_length_row = !(table_share->db_create_options & HA_OPTION_PACK_RECORD);
|
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
init_hidden_prim_key_info();
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
//
|
|
|
|
// estimate the number of rows in a DB
|
|
|
|
// Parameters:
|
|
|
|
// [in] db - DB whose number of rows will be estimated
|
|
|
|
// [out] num_rows - number of estimated rows in db
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::estimate_num_rows(DB* db, u_int64_t* num_rows) {
|
|
|
|
DBT key;
|
|
|
|
DBT data;
|
|
|
|
int error = ENOSYS;
|
|
|
|
DBC* crsr = NULL;
|
|
|
|
u_int64_t less, equal, greater;
|
|
|
|
int is_exact;
|
|
|
|
bool do_commit = false;
|
|
|
|
|
|
|
|
bzero((void *)&key, sizeof(key));
|
|
|
|
bzero((void *)&data, sizeof(data));
|
|
|
|
|
|
|
|
if (transaction == NULL) {
|
2013-04-17 06:01:46 +02:00
|
|
|
error = db_env->txn_begin(db_env, 0, &transaction, DB_READ_UNCOMMITTED);
|
2013-04-17 06:01:43 +02:00
|
|
|
if (error) goto cleanup;
|
|
|
|
do_commit = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = db->cursor(db, transaction, &crsr, 0);
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
|
|
|
|
//
|
|
|
|
// get the first element, then estimate number of records
|
|
|
|
// by calling key_range64 on the first element
|
|
|
|
//
|
|
|
|
error = crsr->c_get(crsr, &key, &data, DB_FIRST);
|
|
|
|
if (error == DB_NOTFOUND) {
|
|
|
|
*num_rows = 0;
|
|
|
|
error = 0;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else if (error) { goto cleanup; }
|
|
|
|
|
|
|
|
error = db->key_range64(
|
|
|
|
db,
|
|
|
|
transaction,
|
|
|
|
&key,
|
|
|
|
&less,
|
|
|
|
&equal,
|
|
|
|
&greater,
|
|
|
|
&is_exact
|
|
|
|
);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
*num_rows = equal + greater;
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
2013-04-17 06:01:43 +02:00
|
|
|
if (do_commit) {
|
2013-04-17 06:01:43 +02:00
|
|
|
transaction->commit(transaction, 0);
|
|
|
|
transaction = NULL;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
if (crsr != NULL) {
|
|
|
|
crsr->c_close(crsr);
|
|
|
|
crsr = NULL;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// states if table has an auto increment column, if so, sets index where auto inc column is to index
|
|
|
|
// Parameters:
|
|
|
|
// [out] index - if auto inc exists, then this param is set to where it exists in table, if not, then unchanged
|
|
|
|
// Returns:
|
|
|
|
// true if auto inc column exists, false otherwise
|
|
|
|
//
|
|
|
|
bool ha_tokudb::has_auto_increment_flag(uint* index) {
|
|
|
|
//
|
|
|
|
// check to see if we have auto increment field
|
|
|
|
//
|
|
|
|
bool ai_found = false;
|
|
|
|
uint i = 0;
|
|
|
|
for (Field ** field = table->field; *field; field++,i++) {
|
|
|
|
if ((*field)->flags & AUTO_INCREMENT_FLAG) {
|
|
|
|
ai_found = true;
|
|
|
|
*index = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ai_found;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// helper function to write a piece of metadata in to status.tokudb
|
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
int ha_tokudb::write_metadata(DB* db, HA_METADATA_KEY curr_key_data, void* data, uint size ){
|
2013-04-17 06:01:44 +02:00
|
|
|
int error;
|
|
|
|
DBT key;
|
|
|
|
DBT value;
|
|
|
|
DB_TXN* txn = NULL;
|
|
|
|
//
|
|
|
|
// transaction to be used for putting metadata into status.tokudb
|
|
|
|
//
|
|
|
|
error = db_env->txn_begin(db_env, 0, &txn, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
bzero(&key, sizeof(key));
|
|
|
|
bzero(&value, sizeof(value));
|
|
|
|
key.data = &curr_key_data;
|
|
|
|
key.size = sizeof(curr_key_data);
|
|
|
|
value.data = data;
|
2013-04-17 06:01:44 +02:00
|
|
|
value.size = size;
|
2013-04-17 06:01:44 +02:00
|
|
|
error = db->put(db, txn, &key, &value, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
|
|
|
if (txn) {
|
|
|
|
if (!error) {
|
|
|
|
txn->commit(txn, DB_TXN_NOSYNC);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
txn->abort(txn);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// Updates status.tokudb with a new max value used for the auto increment column
|
|
|
|
// Parameters:
|
|
|
|
// [in] db - this will always be status.tokudb
|
|
|
|
// val - value to store
|
|
|
|
// Returns:
|
|
|
|
// 0 on success, error otherwise
|
|
|
|
//
|
|
|
|
//
|
|
|
|
int ha_tokudb::update_max_auto_inc(DB* db, ulonglong val){
|
|
|
|
return write_metadata(db,hatoku_max_ai,&val,sizeof(val));
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Writes the initial auto increment value, as specified by create table
|
|
|
|
// so if a user does "create table t1 (a int auto_increment, primary key (a)) auto_increment=100",
|
|
|
|
// then the value 100 will be stored here in val
|
|
|
|
// Parameters:
|
|
|
|
// [in] db - this will always be status.tokudb
|
|
|
|
// val - value to store
|
|
|
|
// Returns:
|
|
|
|
// 0 on success, error otherwise
|
|
|
|
//
|
|
|
|
//
|
|
|
|
int ha_tokudb::write_auto_inc_create(DB* db, ulonglong val){
|
|
|
|
return write_metadata(db,hatoku_ai_create_value,&val,sizeof(val));
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Closes a handle to a table.
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::close(void) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::close %p", this);
|
2013-04-17 06:01:37 +02:00
|
|
|
TOKUDB_CLOSE();
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(__close(0));
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int ha_tokudb::__close(int mutex_is_locked) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::__close %p", this);
|
2013-04-17 06:01:39 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN)
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("close:%p\n", this);
|
2013-04-17 06:01:37 +02:00
|
|
|
my_free(rec_buff, MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(alloc_ptr, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:41 +02:00
|
|
|
my_free(primary_key_offsets, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:46 +02:00
|
|
|
rec_buff = NULL;
|
|
|
|
alloc_ptr = NULL;
|
|
|
|
primary_key_offsets = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
ha_tokudb::reset(); // current_row buffer
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(free_share(share, table, hidden_primary_key, mutex_is_locked));
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reallocate record buffer (rec_buff) if needed
|
|
|
|
// If not needed, does nothing
|
|
|
|
// Parameters:
|
|
|
|
// length - size of buffer required for rec_buff
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
bool ha_tokudb::fix_rec_buff_for_blob(ulong length) {
|
|
|
|
if (!rec_buff || length > alloced_rec_buff_length) {
|
|
|
|
uchar *newptr;
|
|
|
|
if (!(newptr = (uchar *) my_realloc((void *) rec_buff, length, MYF(MY_ALLOW_ZERO_PTR))))
|
|
|
|
return 1;
|
|
|
|
rec_buff = newptr;
|
|
|
|
alloced_rec_buff_length = length;
|
|
|
|
}
|
|
|
|
return 0;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Calculate max length needed for row */
|
2013-04-17 06:01:37 +02:00
|
|
|
ulong ha_tokudb::max_row_length(const uchar * buf) {
|
|
|
|
ulong length = table_share->reclength + table_share->fields * 2;
|
|
|
|
uint *ptr, *end;
|
|
|
|
for (ptr = table_share->blob_field, end = ptr + table_share->blob_fields; ptr != end; ptr++) {
|
|
|
|
Field_blob *blob = ((Field_blob *) table->field[*ptr]);
|
2013-04-17 06:01:46 +02:00
|
|
|
length += blob->get_length((uchar *) (buf + field_offset(blob, table))) + 2;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
return length;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
*/
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// take the row passed in as a DBT*, and convert it into a row in MySQL format in record
|
|
|
|
// Pack a row for storage.
|
|
|
|
// If the row is of fixed length, just store the row 'as is'.
|
|
|
|
// If not, we will generate a packed row suitable for storage.
|
|
|
|
// This will only fail if we don't have enough memory to pack the row,
|
|
|
|
// which may only happen in rows with blobs, as the default row length is
|
|
|
|
// pre-allocated.
|
|
|
|
// Parameters:
|
|
|
|
// [out] row - row stored in DBT to be converted
|
|
|
|
// [in] record - row in MySQL format
|
|
|
|
//
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
int ha_tokudb::pack_row(DBT * row, const uchar * record, bool strip_pk) {
|
2013-04-17 06:01:37 +02:00
|
|
|
uchar *ptr;
|
2013-04-17 06:01:41 +02:00
|
|
|
int r = ENOSYS;
|
2013-04-17 06:01:37 +02:00
|
|
|
bzero((void *) row, sizeof(*row));
|
2013-04-17 06:01:41 +02:00
|
|
|
uint curr_skip_index;
|
|
|
|
|
|
|
|
KEY *key_info = table->key_info + primary_key;
|
|
|
|
my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
|
|
|
|
|
|
|
|
//
|
|
|
|
// two cases, fixed length row, and variable length row
|
|
|
|
// fixed length row is first below
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
if (share->fixed_length_row) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (hidden_primary_key || !strip_pk) {
|
2013-04-17 06:01:41 +02:00
|
|
|
row->data = (void *)record;
|
|
|
|
row->size = table_share->reclength;
|
|
|
|
r = 0;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
//
|
|
|
|
// if the primary key is not hidden, then it is part of the record
|
|
|
|
// because primary key information is already stored in the key
|
|
|
|
// that will be passed to the fractal tree, we do not copy
|
|
|
|
// components that belong to the primary key
|
|
|
|
//
|
|
|
|
if (fix_rec_buff_for_blob(table_share->reclength)) {
|
|
|
|
r = HA_ERR_OUT_OF_MEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
uchar* tmp_dest = rec_buff;
|
|
|
|
const uchar* tmp_src = record;
|
|
|
|
uint i = 0;
|
|
|
|
//
|
|
|
|
// say we have 100 bytes in record, and bytes 25-50 and 75-90 belong to the primary key
|
|
|
|
// this for loop will do a memcpy [0,25], [51,75] and [90,100]
|
|
|
|
//
|
|
|
|
for (i =0; i < key_info->key_parts; i++){
|
|
|
|
uint curr_index = primary_key_offsets[i].part_index;
|
|
|
|
uint bytes_to_copy = record + key_info->key_part[curr_index].offset - tmp_src;
|
|
|
|
memcpy(tmp_dest,tmp_src, bytes_to_copy);
|
|
|
|
tmp_dest += bytes_to_copy;
|
|
|
|
tmp_src = record + key_info->key_part[curr_index].offset + key_info->key_part[curr_index].length;
|
|
|
|
}
|
|
|
|
memcpy(tmp_dest,tmp_src, record + table_share->reclength - tmp_src);
|
|
|
|
tmp_dest += record + table_share->reclength - tmp_src;
|
|
|
|
|
|
|
|
row->data = rec_buff;
|
|
|
|
row->size = (size_t) (tmp_dest - rec_buff);
|
|
|
|
|
|
|
|
r = 0;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
if (table_share->blob_fields) {
|
2013-04-17 06:01:41 +02:00
|
|
|
if (fix_rec_buff_for_blob(max_row_length(record))) {
|
|
|
|
r = HA_ERR_OUT_OF_MEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2008-02-05 17:00:53 +01:00
|
|
|
/* Copy null bits */
|
2013-04-17 06:01:37 +02:00
|
|
|
memcpy(rec_buff, record, table_share->null_bytes);
|
|
|
|
ptr = rec_buff + table_share->null_bytes;
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// assert that when the hidden primary key exists, primary_key_offsets is NULL
|
|
|
|
//
|
|
|
|
assert( (hidden_primary_key != 0) == (primary_key_offsets == NULL));
|
|
|
|
curr_skip_index = 0;
|
2013-04-17 06:01:41 +02:00
|
|
|
for (Field ** field = table->field; *field; field++) {
|
2013-04-17 06:01:46 +02:00
|
|
|
uint curr_field_offset = field_offset(*field, table);
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// if the primary key is hidden, primary_key_offsets will be NULL and
|
|
|
|
// this clause will not execute
|
|
|
|
//
|
2013-04-17 06:01:46 +02:00
|
|
|
if (primary_key_offsets &&
|
|
|
|
strip_pk &&
|
|
|
|
curr_skip_index < table_share->key_info[table_share->primary_key].key_parts
|
|
|
|
) {
|
2013-04-17 06:01:41 +02:00
|
|
|
uint curr_skip_offset = primary_key_offsets[curr_skip_index].offset;
|
|
|
|
if (curr_skip_offset == curr_field_offset) {
|
|
|
|
//
|
|
|
|
// we have hit a field that is a portion of the primary key
|
|
|
|
//
|
|
|
|
uint curr_key_index = primary_key_offsets[curr_skip_index].part_index;
|
|
|
|
curr_skip_index++;
|
|
|
|
//
|
|
|
|
// only choose to continue over the key if the key's length matches the field's length
|
|
|
|
// otherwise, we may have a situation where the column is a varchar(10), the
|
|
|
|
// key is only the first 3 characters, and we end up losing the last 7 bytes of the
|
|
|
|
// column
|
|
|
|
//
|
|
|
|
if (table->key_info[primary_key].key_part[curr_key_index].length == (*field)->field_length) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
if (is_null_field(table, *field, record)) {
|
|
|
|
continue;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
ptr = (*field)->pack(ptr, (const uchar *)
|
2013-04-17 06:01:41 +02:00
|
|
|
(record + curr_field_offset));
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
row->data = rec_buff;
|
|
|
|
row->size = (size_t) (ptr - rec_buff);
|
2013-04-17 06:01:41 +02:00
|
|
|
r = 0;
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
dbug_tmp_restore_column_map(table->write_set, old_map);
|
|
|
|
|
|
|
|
return r;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// take the row passed in as a DBT*, and convert it into a row in MySQL format in record
|
|
|
|
// Parameters:
|
|
|
|
// [out] record - row in MySQL format
|
|
|
|
// [in] row - row stored in DBT to be converted
|
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
void ha_tokudb::unpack_row(uchar * record, DBT const *row, DBT const *key, bool pk_stripped) {
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// two cases, fixed length row, and variable length row
|
|
|
|
// fixed length row is first below
|
|
|
|
//
|
|
|
|
if (share->fixed_length_row) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (hidden_primary_key || !pk_stripped) {
|
2013-04-17 06:01:41 +02:00
|
|
|
memcpy(record, (void *) row->data, table_share->reclength);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
|
|
|
|
KEY *key_info = table_share->key_info + primary_key;
|
|
|
|
|
|
|
|
uchar* tmp_dest = record;
|
|
|
|
uchar* tmp_src = (uchar *)row->data;
|
|
|
|
uint i = 0;
|
|
|
|
|
|
|
|
//
|
|
|
|
// unpack_key will fill in parts of record that are part of the primary key
|
|
|
|
//
|
|
|
|
unpack_key(record, key, primary_key);
|
|
|
|
|
|
|
|
//
|
|
|
|
// produces the opposite effect to what happened in pack_row
|
|
|
|
// first we fill in the parts of record that are not part of the key
|
|
|
|
//
|
|
|
|
for (i =0; i < key_info->key_parts; i++){
|
|
|
|
uint curr_index = primary_key_offsets[i].part_index;
|
|
|
|
uint bytes_to_copy = record + key_info->key_part[curr_index].offset - tmp_dest;
|
|
|
|
memcpy(tmp_dest,tmp_src, bytes_to_copy);
|
|
|
|
tmp_src += bytes_to_copy;
|
|
|
|
tmp_dest = record + key_info->key_part[curr_index].offset + key_info->key_part[curr_index].length;
|
|
|
|
}
|
|
|
|
memcpy(tmp_dest,tmp_src, record + table_share->reclength - tmp_dest);
|
|
|
|
dbug_tmp_restore_column_map(table->write_set, old_map);
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
else {
|
|
|
|
/* Copy null bits */
|
|
|
|
my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
|
|
|
|
const uchar *ptr = (const uchar *) row->data;
|
|
|
|
memcpy(record, ptr, table_share->null_bytes);
|
|
|
|
ptr += table_share->null_bytes;
|
2013-04-17 06:01:45 +02:00
|
|
|
if (primary_key_offsets && pk_stripped) {
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// unpack_key will fill in parts of record that are part of the primary key
|
|
|
|
//
|
|
|
|
unpack_key(record, key, primary_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// fill in parts of record that are not part of the key
|
|
|
|
//
|
|
|
|
uint curr_skip_index = 0;
|
|
|
|
for (Field ** field = table->field; *field; field++) {
|
2013-04-17 06:01:46 +02:00
|
|
|
uint curr_field_offset = field_offset(*field, table);
|
2013-04-17 06:01:46 +02:00
|
|
|
if (primary_key_offsets &&
|
2013-04-17 06:01:46 +02:00
|
|
|
pk_stripped &&
|
2013-04-17 06:01:46 +02:00
|
|
|
curr_skip_index < table_share->key_info[table_share->primary_key].key_parts
|
|
|
|
) {
|
2013-04-17 06:01:41 +02:00
|
|
|
uint curr_skip_offset = primary_key_offsets[curr_skip_index].offset;
|
|
|
|
if (curr_skip_offset == curr_field_offset) {
|
|
|
|
//
|
|
|
|
// we have hit a field that is a portion of the primary key
|
|
|
|
//
|
|
|
|
uint curr_key_index = primary_key_offsets[curr_skip_index].part_index;
|
|
|
|
curr_skip_index++;
|
|
|
|
//
|
|
|
|
// only choose to continue over the key if the key's length matches the field's length
|
|
|
|
// otherwise, we may have a situation where the column is a varchar(10), the
|
|
|
|
// key is only the first 3 characters, and we end up losing the last 7 bytes of the
|
|
|
|
// column
|
|
|
|
//
|
|
|
|
if (table->key_info[primary_key].key_part[curr_key_index].length == (*field)->field_length) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// null bytes MUST have been copied before doing this
|
|
|
|
//
|
|
|
|
if (is_null_field(table, *field, record)) {
|
|
|
|
continue;
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
ptr = (*field)->unpack(record + field_offset(*field, table), ptr);
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
dbug_tmp_restore_column_map(table->write_set, old_map);
|
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
u_int32_t ha_tokudb::place_key_into_mysql_buff(uchar * record, uchar* data, uint index) {
|
2013-04-17 06:01:37 +02:00
|
|
|
KEY *key_info = table->key_info + index;
|
|
|
|
KEY_PART_INFO *key_part = key_info->key_part, *end = key_part + key_info->key_parts;
|
2013-04-17 06:01:45 +02:00
|
|
|
uchar *pos = data;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
for (; key_part != end; key_part++) {
|
|
|
|
if (key_part->null_bit) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (*pos++ == NULL_COL_VAL) { // Null value
|
2013-04-17 06:01:37 +02:00
|
|
|
/*
|
|
|
|
We don't need to reset the record data as we will not access it
|
|
|
|
if the null data is set
|
|
|
|
*/
|
|
|
|
record[key_part->null_offset] |= key_part->null_bit;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
record[key_part->null_offset] &= ~key_part->null_bit;
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
/* tokutek change to make pack_key and unpack_key work for
|
|
|
|
decimals */
|
|
|
|
uint unpack_length = key_part->length;
|
2013-04-17 06:01:46 +02:00
|
|
|
pos = (uchar *) key_part->field->unpack_key(record + field_offset(key_part->field, table), pos,
|
2013-04-17 06:01:39 +02:00
|
|
|
#if MYSQL_VERSION_ID < 50123
|
2013-04-17 06:01:43 +02:00
|
|
|
unpack_length);
|
2008-02-05 17:00:53 +01:00
|
|
|
#else
|
2013-04-17 06:01:43 +02:00
|
|
|
unpack_length, table->s->db_low_byte_first);
|
2008-02-05 17:00:53 +01:00
|
|
|
#endif
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
return pos-data;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
// Store the key and the primary key into the row
|
2013-04-17 06:01:41 +02:00
|
|
|
// Parameters:
|
2013-04-17 06:01:45 +02:00
|
|
|
// [out] record - key stored in MySQL format
|
|
|
|
// [in] key - key stored in DBT to be converted
|
|
|
|
// index -index into key_file that represents the DB
|
|
|
|
// unpacking a key of
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
void ha_tokudb::unpack_key(uchar * record, DBT const *key, uint index) {
|
|
|
|
u_int32_t bytes_read;
|
|
|
|
uchar *pos = (uchar *) key->data + 1;
|
|
|
|
bytes_read = place_key_into_mysql_buff(record,pos,index);
|
|
|
|
if((table->key_info[index].flags & HA_CLUSTERING) && !hidden_primary_key) {
|
|
|
|
//
|
|
|
|
// also unpack primary key
|
|
|
|
//
|
|
|
|
place_key_into_mysql_buff(record,pos+bytes_read,primary_key);
|
|
|
|
}
|
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
u_int32_t ha_tokudb::place_key_into_dbt_buff(KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length) {
|
|
|
|
KEY_PART_INFO *key_part = key_info->key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + key_info->key_parts;
|
|
|
|
uchar* curr_buff = buff;
|
2013-04-17 06:01:44 +02:00
|
|
|
*has_null = false;
|
2013-04-17 06:01:37 +02:00
|
|
|
for (; key_part != end && key_length > 0; key_part++) {
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// accessing key_part->field->null_bit instead off key_part->null_bit
|
|
|
|
// because key_part->null_bit is not set in add_index
|
|
|
|
// filed ticket 862 to look into this
|
|
|
|
//
|
|
|
|
if (key_part->field->null_bit) {
|
2013-04-17 06:01:37 +02:00
|
|
|
/* Store 0 if the key part is a NULL part */
|
2013-04-17 06:01:45 +02:00
|
|
|
uint null_offset = (uint) ((char*) key_part->field->null_ptr
|
|
|
|
- (char*) table->record[0]);
|
|
|
|
if (record[null_offset] & key_part->field->null_bit) {
|
2013-04-17 06:01:45 +02:00
|
|
|
*curr_buff++ = NULL_COL_VAL;
|
2013-04-17 06:01:44 +02:00
|
|
|
*has_null = true;
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// fractal tree does not handle this falg at the moment
|
|
|
|
// so commenting out for now
|
|
|
|
//
|
|
|
|
//key->flags |= DB_DBT_DUPOK;
|
2013-04-17 06:01:37 +02:00
|
|
|
continue;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
*curr_buff++ = NONNULL_COL_VAL; // Store NOT NULL marker
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// accessing field_offset(key_part->field) instead off key_part->offset
|
|
|
|
// because key_part->offset is SET INCORRECTLY in add_index
|
|
|
|
// filed ticket 862 to look into this
|
|
|
|
//
|
2013-04-17 06:01:46 +02:00
|
|
|
curr_buff = key_part->field->pack_key(curr_buff, (uchar *) (record + field_offset(key_part->field, table)),
|
2013-04-17 06:01:39 +02:00
|
|
|
#if MYSQL_VERSION_ID < 50123
|
2013-04-17 06:01:37 +02:00
|
|
|
key_part->length);
|
2008-02-05 17:00:53 +01:00
|
|
|
#else
|
2013-04-17 06:01:37 +02:00
|
|
|
key_part->length, table->s->db_low_byte_first);
|
2008-02-05 17:00:53 +01:00
|
|
|
#endif
|
2013-04-17 06:01:37 +02:00
|
|
|
key_length -= key_part->length;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
return curr_buff - buff;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// Create a packed key from a row. This key will be written as such
|
|
|
|
// to the index tree. This will never fail as the key buffer is pre-allocated.
|
|
|
|
// Parameters:
|
|
|
|
// [out] key - DBT that holds the key
|
|
|
|
// [in] key_info - holds data about the key, such as it's length and offset into record
|
|
|
|
// [out] buff - buffer that will hold the data for key (unless
|
|
|
|
// we have a hidden primary key)
|
|
|
|
// [in] record - row from which to create the key
|
|
|
|
// key_length - currently set to MAX_KEY_LENGTH, is it size of buff?
|
|
|
|
// Returns:
|
|
|
|
// the parameter key
|
|
|
|
//
|
|
|
|
|
|
|
|
DBT* ha_tokudb::create_dbt_key_from_key(DBT * key, KEY* key_info, uchar * buff, const uchar * record, bool* has_null, int key_length) {
|
|
|
|
u_int32_t size = 0;
|
|
|
|
uchar* tmp_buff = buff;
|
|
|
|
my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
|
|
|
|
|
|
|
|
key->data = buff;
|
|
|
|
|
|
|
|
//
|
|
|
|
// first put the "infinity" byte at beginning. States if missing columns are implicitly
|
|
|
|
// positive infinity or negative infinity. For this, because we are creating key
|
|
|
|
// from a row, there is no way that columns can be missing, so in practice,
|
|
|
|
// this will be meaningless. Might as well put in a value
|
|
|
|
//
|
|
|
|
*tmp_buff++ = COL_NEG_INF;
|
|
|
|
size++;
|
|
|
|
size += place_key_into_dbt_buff(key_info, tmp_buff, record, has_null, key_length);
|
|
|
|
if (key_info->flags & HA_CLUSTERING) {
|
|
|
|
tmp_buff = buff + size;
|
|
|
|
if (hidden_primary_key) {
|
|
|
|
memcpy_fixed(tmp_buff, current_ident, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
|
|
|
size += TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
bool tmp_bool = false;
|
|
|
|
size += place_key_into_dbt_buff(
|
|
|
|
&table->key_info[primary_key],
|
|
|
|
tmp_buff,
|
|
|
|
record,
|
|
|
|
&tmp_bool,
|
|
|
|
MAX_KEY_LENGTH //this parameter does not matter
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
key->size = size;
|
2013-04-17 06:01:37 +02:00
|
|
|
DBUG_DUMP("key", (uchar *) key->data, key->size);
|
|
|
|
dbug_tmp_restore_column_map(table->write_set, old_map);
|
2013-04-17 06:01:41 +02:00
|
|
|
return key;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// Create a packed key from a row. This key will be written as such
|
|
|
|
// to the index tree. This will never fail as the key buffer is pre-allocated.
|
|
|
|
// Parameters:
|
|
|
|
// [out] key - DBT that holds the key
|
|
|
|
// keynr - index for which to create the key
|
|
|
|
// [out] buff - buffer that will hold the data for key (unless
|
|
|
|
// we have a hidden primary key)
|
|
|
|
// [in] record - row from which to create the key
|
2013-04-17 06:01:45 +02:00
|
|
|
// [out] has_null - says if the key has a NULL value for one of its columns
|
2013-04-17 06:01:41 +02:00
|
|
|
// key_length - currently set to MAX_KEY_LENGTH, is it size of buff?
|
|
|
|
// Returns:
|
|
|
|
// the parameter key
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
DBT *ha_tokudb::create_dbt_key_from_table(DBT * key, uint keynr, uchar * buff, const uchar * record, bool* has_null, int key_length) {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::create_dbt_key_from_table");
|
|
|
|
bzero((void *) key, sizeof(*key));
|
|
|
|
if (hidden_primary_key && keynr == primary_key) {
|
|
|
|
key->data = current_ident;
|
|
|
|
key->size = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
|
2013-04-17 06:01:44 +02:00
|
|
|
*has_null = false;
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_RETURN(key);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
DBUG_RETURN(create_dbt_key_from_key(key, &table->key_info[keynr],buff,record, has_null, key_length));
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// Create a packed key from from a MySQL unpacked key (like the one that is
|
|
|
|
// sent from the index_read() This key is to be used to read a row
|
|
|
|
// Parameters:
|
|
|
|
// [out] key - DBT that holds the key
|
|
|
|
// keynr - index for which to pack the key
|
|
|
|
// [out] buff - buffer that will hold the data for key
|
|
|
|
// [in] key_ptr - MySQL unpacked key
|
|
|
|
// key_length - length of key_ptr
|
|
|
|
// Returns:
|
|
|
|
// the parameter key
|
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
DBT *ha_tokudb::pack_key(DBT * key, uint keynr, uchar * buff, const uchar * key_ptr, uint key_length, uchar inf_byte) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::pack_key");
|
2013-04-17 06:01:37 +02:00
|
|
|
KEY *key_info = table->key_info + keynr;
|
|
|
|
KEY_PART_INFO *key_part = key_info->key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + key_info->key_parts;
|
|
|
|
my_bitmap_map *old_map = dbug_tmp_use_all_columns(table, table->write_set);
|
|
|
|
|
|
|
|
bzero((void *) key, sizeof(*key));
|
|
|
|
key->data = buff;
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// first put the "infinity" byte at beginning. States if missing columns are implicitly
|
|
|
|
// positive infinity or negative infinity
|
|
|
|
//
|
|
|
|
*buff++ = inf_byte;
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
for (; key_part != end && (int) key_length > 0; key_part++) {
|
|
|
|
uint offset = 0;
|
|
|
|
if (key_part->null_bit) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!(*key_ptr == 0)) {
|
|
|
|
*buff++ = NULL_COL_VAL;
|
2013-04-17 06:01:37 +02:00
|
|
|
key_length -= key_part->store_length;
|
|
|
|
key_ptr += key_part->store_length;
|
|
|
|
continue;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
*buff++ = NONNULL_COL_VAL;
|
2013-04-17 06:01:37 +02:00
|
|
|
offset = 1; // Data is at key_ptr+1
|
|
|
|
}
|
|
|
|
buff = key_part->field->pack_key_from_key_image(buff, (uchar *) key_ptr + offset,
|
2013-04-17 06:01:39 +02:00
|
|
|
#if MYSQL_VERSION_ID < 50123
|
2013-04-17 06:01:37 +02:00
|
|
|
key_part->length);
|
2008-02-05 17:00:53 +01:00
|
|
|
#else
|
2013-04-17 06:01:37 +02:00
|
|
|
key_part->length, table->s->db_low_byte_first);
|
2008-02-05 17:00:53 +01:00
|
|
|
#endif
|
2013-04-17 06:01:37 +02:00
|
|
|
key_ptr += key_part->store_length;
|
|
|
|
key_length -= key_part->store_length;
|
|
|
|
}
|
|
|
|
key->size = (buff - (uchar *) key->data);
|
|
|
|
DBUG_DUMP("key", (uchar *) key->data, key->size);
|
|
|
|
dbug_tmp_restore_column_map(table->write_set, old_map);
|
|
|
|
DBUG_RETURN(key);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
int ha_tokudb::read_last() {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_last");
|
2013-04-17 06:01:39 +02:00
|
|
|
int do_commit = 0;
|
2013-04-17 06:01:44 +02:00
|
|
|
if (transaction == NULL) {
|
2013-04-17 06:01:39 +02:00
|
|
|
int r = db_env->txn_begin(db_env, 0, &transaction, 0);
|
|
|
|
assert(r == 0);
|
|
|
|
do_commit = 1;
|
|
|
|
}
|
|
|
|
int error = index_init(primary_key, 0);
|
|
|
|
if (error == 0)
|
|
|
|
error = index_last(table->record[1]);
|
|
|
|
index_end();
|
|
|
|
if (do_commit) {
|
|
|
|
int r = transaction->commit(transaction, 0);
|
|
|
|
assert(r == 0);
|
2013-04-17 06:01:40 +02:00
|
|
|
transaction = NULL;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
//
|
|
|
|
// get max used hidden primary key value
|
|
|
|
//
|
|
|
|
void ha_tokudb::init_hidden_prim_key_info() {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::init_prim_key_info");
|
2013-04-17 06:01:43 +02:00
|
|
|
pthread_mutex_lock(&share->mutex);
|
2013-04-17 06:01:43 +02:00
|
|
|
if (!(share->status & STATUS_PRIMARY_KEY_INIT)) {
|
|
|
|
(void) extra(HA_EXTRA_KEYREAD);
|
|
|
|
int error = read_last();
|
|
|
|
(void) extra(HA_EXTRA_NO_KEYREAD);
|
|
|
|
if (error == 0) {
|
|
|
|
share->auto_ident = uint5korr(current_ident);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
share->status |= STATUS_PRIMARY_KEY_INIT;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
2013-04-17 06:01:43 +02:00
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
|
|
|
|
|
|
|
|
/** @brief
|
|
|
|
Get metadata info stored in status.tokudb
|
|
|
|
*/
|
2013-04-17 06:01:43 +02:00
|
|
|
int ha_tokudb::get_status() {
|
2013-04-17 06:01:43 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::get_status");
|
2013-04-17 06:01:43 +02:00
|
|
|
DB_TXN* txn = NULL;
|
|
|
|
DBT key, value;
|
|
|
|
HA_METADATA_KEY curr_key;
|
|
|
|
int error;
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
2013-04-17 06:01:43 +02:00
|
|
|
//
|
|
|
|
// open status.tokudb
|
|
|
|
//
|
|
|
|
if (!share->status_block) {
|
|
|
|
char name_buff[FN_REFLEN];
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(get_name_length(share->table_name) + NAME_CHAR_LEN, MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
make_name(newname, share->table_name, "status");
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
|
|
|
uint open_mode = (((table->db_stat & HA_READ_ONLY) ? DB_RDONLY : 0)
|
|
|
|
| DB_THREAD);
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
TOKUDB_TRACE("open:%s\n", newname);
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
error = db_create(&share->status_block, db_env, 0);
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
|
|
|
|
error = share->status_block->open(share->status_block, NULL, name_buff, NULL, DB_BTREE, open_mode, 0);
|
|
|
|
if (error) { goto cleanup; }
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
//
|
|
|
|
// transaction to be used for putting metadata into status.tokudb
|
|
|
|
//
|
|
|
|
bzero(&key, sizeof(key));
|
|
|
|
bzero(&value, sizeof(value));
|
|
|
|
key.data = &curr_key;
|
|
|
|
key.size = sizeof(curr_key);
|
|
|
|
value.flags = DB_DBT_MALLOC;
|
2013-04-17 06:01:43 +02:00
|
|
|
error = db_env->txn_begin(db_env, 0, &txn, 0);
|
|
|
|
if (error) { goto cleanup; }
|
2013-04-17 06:01:43 +02:00
|
|
|
|
|
|
|
if (share->status_block) {
|
|
|
|
int error;
|
|
|
|
//
|
|
|
|
// get version
|
|
|
|
//
|
|
|
|
curr_key = hatoku_version;
|
|
|
|
error = share->status_block->get(
|
|
|
|
share->status_block,
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&value,
|
|
|
|
0
|
|
|
|
);
|
|
|
|
if (error == DB_NOTFOUND) {
|
|
|
|
share->version = 0;
|
|
|
|
}
|
|
|
|
else if (error == 0 && value.size == sizeof(share->version)) {
|
|
|
|
share->version = *(uint *)value.data;
|
2013-04-17 06:01:46 +02:00
|
|
|
dlfree(value.data);
|
2013-04-17 06:01:44 +02:00
|
|
|
value.data = NULL;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:43 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
|
|
|
//
|
|
|
|
// get capabilities
|
|
|
|
//
|
|
|
|
curr_key = hatoku_capabilities;
|
|
|
|
error = share->status_block->get(
|
|
|
|
share->status_block,
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&value,
|
|
|
|
0
|
|
|
|
);
|
|
|
|
if (error == DB_NOTFOUND) {
|
|
|
|
share->capabilities= 0;
|
|
|
|
}
|
|
|
|
else if (error == 0 && value.size == sizeof(share->version)) {
|
|
|
|
share->capabilities= *(uint *)value.data;
|
2013-04-17 06:01:46 +02:00
|
|
|
dlfree(value.data);
|
2013-04-17 06:01:44 +02:00
|
|
|
value.data = NULL;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:43 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
error = 0;
|
|
|
|
cleanup:
|
|
|
|
if (txn) {
|
|
|
|
txn->commit(txn,0);
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:43 +02:00
|
|
|
if (error) {
|
|
|
|
if (share->status_block) {
|
|
|
|
share->status_block->close(share->status_block, 0);
|
|
|
|
share->status_block = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2008-02-05 17:00:53 +01:00
|
|
|
/** @brief
|
|
|
|
Return an estimated of the number of rows in the table.
|
|
|
|
Used when sorting to allocate buffers and by the optimizer.
|
2013-04-17 06:01:40 +02:00
|
|
|
This is used in filesort.cc.
|
2008-02-05 17:00:53 +01:00
|
|
|
*/
|
2013-04-17 06:01:37 +02:00
|
|
|
ha_rows ha_tokudb::estimate_rows_upper_bound() {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::estimate_rows_upper_bound");
|
2013-04-17 06:01:43 +02:00
|
|
|
DBUG_RETURN(share->rows + HA_TOKUDB_EXTRA_ROWS);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::cmp_ref(const uchar * ref1, const uchar * ref2) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (hidden_primary_key) {
|
2013-04-17 06:01:37 +02:00
|
|
|
return memcmp(ref1, ref2, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
int result;
|
|
|
|
Field *field;
|
|
|
|
KEY *key_info = table->key_info + table_share->primary_key;
|
|
|
|
KEY_PART_INFO *key_part = key_info->key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + key_info->key_parts;
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// HACK until we get refactoring in. manually move up by infinity byte
|
|
|
|
//
|
|
|
|
ref1++;
|
|
|
|
ref2++;
|
2013-04-17 06:01:37 +02:00
|
|
|
for (; key_part != end; key_part++) {
|
|
|
|
field = key_part->field;
|
|
|
|
result = field->pack_cmp((const uchar *) ref1, (const uchar *) ref2, key_part->length, 0);
|
|
|
|
if (result)
|
|
|
|
return result;
|
|
|
|
ref1 += field->packed_col_length((const uchar *) ref1, key_part->length);
|
|
|
|
ref2 += field->packed_col_length((const uchar *) ref2, key_part->length);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
bool ha_tokudb::check_if_incompatible_data(HA_CREATE_INFO * info, uint table_changes) {
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// This is a horrendous hack for now, as copied by InnoDB.
|
|
|
|
// This states that if the auto increment create field has changed,
|
|
|
|
// via a "alter table foo auto_increment=new_val", that this
|
|
|
|
// change is incompatible, and to rebuild the entire table
|
|
|
|
// This will need to be fixed
|
|
|
|
//
|
|
|
|
if ((info->used_fields & HA_CREATE_USED_AUTO) &&
|
|
|
|
info->auto_increment_value != 0) {
|
|
|
|
|
|
|
|
return COMPATIBLE_DATA_NO;
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
if (table_changes != IS_EQUAL_YES)
|
2013-04-17 06:01:37 +02:00
|
|
|
return COMPATIBLE_DATA_NO;
|
|
|
|
return COMPATIBLE_DATA_YES;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Stores a row in the table, called when handling an INSERT query
|
|
|
|
// Parameters:
|
|
|
|
// [in] record - a row in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::write_row(uchar * record) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::write_row");
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT row, prim_key, key;
|
|
|
|
int error;
|
2013-04-17 06:01:44 +02:00
|
|
|
THD *thd = NULL;
|
|
|
|
u_int32_t put_flags;
|
2013-04-17 06:01:44 +02:00
|
|
|
bool has_null;
|
2013-04-17 06:01:44 +02:00
|
|
|
DB_TXN* sub_trans = NULL;
|
|
|
|
DB_TXN* txn = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// status message to be shown in "show process list"
|
|
|
|
//
|
|
|
|
char status_msg[200]; //buffer of 200 should be a good upper bound.
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// some crap that needs to be done because MySQL does not properly abstract
|
|
|
|
// this work away from us, namely filling in auto increment and setting auto timestamp
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_write_count, &LOCK_status);
|
2013-04-17 06:01:41 +02:00
|
|
|
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) {
|
2013-04-17 06:01:37 +02:00
|
|
|
table->timestamp_field->set_time();
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
if (table->next_number_field && record == table->record[0]) {
|
2013-04-17 06:01:37 +02:00
|
|
|
update_auto_increment();
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// check to see if some value for the auto increment column that is bigger
|
|
|
|
// than anything else til now is being used. If so, update the metadata to reflect it
|
|
|
|
// the goal here is we never want to have a dup key error due to a bad increment
|
|
|
|
// of the auto inc field.
|
|
|
|
//
|
|
|
|
if (share->has_auto_inc && record == table->record[0]) {
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
ulonglong curr_auto_inc = retrieve_auto_increment(
|
|
|
|
table->field[share->ai_field_index]->key_type(),
|
2013-04-17 06:01:46 +02:00
|
|
|
field_offset(table->field[share->ai_field_index], table),
|
2013-04-17 06:01:44 +02:00
|
|
|
record
|
|
|
|
);
|
|
|
|
if (curr_auto_inc > share->last_auto_increment) {
|
|
|
|
error = update_max_auto_inc(share->status_block, curr_auto_inc);
|
|
|
|
if (!error) {
|
|
|
|
share->last_auto_increment = curr_auto_inc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
if ((error = pack_row(&row, (const uchar *) record, true))){
|
2013-04-17 06:01:44 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (hidden_primary_key) {
|
|
|
|
get_auto_primary_key(current_ident);
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
if (using_ignore) {
|
|
|
|
error = db_env->txn_begin(db_env, transaction, &sub_trans, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
txn = using_ignore ? sub_trans : transaction;
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// first the primary key (because it must be unique, has highest chance of failure)
|
|
|
|
//
|
|
|
|
put_flags = share->key_type[primary_key];
|
|
|
|
thd = ha_thd();
|
2013-04-17 06:01:38 +02:00
|
|
|
if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
|
2013-04-17 06:01:38 +02:00
|
|
|
put_flags = DB_YESOVERWRITE;
|
2013-04-17 06:01:38 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
error = share->file->put(
|
|
|
|
share->file,
|
2013-04-17 06:01:44 +02:00
|
|
|
txn,
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&prim_key, primary_key, key_buff, record, &has_null),
|
|
|
|
&row,
|
|
|
|
put_flags
|
|
|
|
);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (error) {
|
2013-04-17 06:01:37 +02:00
|
|
|
last_dup_key = primary_key;
|
2013-04-17 06:01:44 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// now insertion for rest of indexes
|
|
|
|
//
|
|
|
|
for (uint keynr = 0; keynr < table_share->keys; keynr++) {
|
2013-04-17 06:01:45 +02:00
|
|
|
bool cluster_row_created = false;
|
2013-04-17 06:01:44 +02:00
|
|
|
if (keynr == primary_key) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
put_flags = share->key_type[keynr];
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&key, keynr, key_buff2, record, &has_null);
|
|
|
|
|
|
|
|
if (put_flags == DB_NOOVERWRITE && (has_null || thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS))) {
|
2013-04-17 06:01:44 +02:00
|
|
|
put_flags = DB_YESOVERWRITE;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
if (table->key_info[keynr].flags & HA_CLUSTERING) {
|
|
|
|
if (!cluster_row_created) {
|
|
|
|
if ((error = pack_row(&row, (const uchar *) record, false))){
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
cluster_row_created = true;
|
|
|
|
}
|
|
|
|
error = share->key_file[keynr]->put(
|
|
|
|
share->key_file[keynr],
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&row,
|
|
|
|
put_flags
|
|
|
|
);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = share->key_file[keynr]->put(
|
|
|
|
share->key_file[keynr],
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&prim_key,
|
|
|
|
put_flags
|
|
|
|
);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// We break if we hit an error, unless it is a dup key error
|
|
|
|
// and MySQL told us to ignore duplicate key errors
|
|
|
|
//
|
|
|
|
if (error) {
|
2013-04-17 06:01:44 +02:00
|
|
|
last_dup_key = keynr;
|
|
|
|
goto cleanup;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
if (!error) {
|
|
|
|
added_rows++;
|
2013-04-17 06:01:46 +02:00
|
|
|
num_added_rows_in_stmt++;
|
|
|
|
if ((num_added_rows_in_stmt % 1000) == 0) {
|
|
|
|
sprintf(status_msg, "Inserted about %llu rows", num_added_rows_in_stmt);
|
|
|
|
thd_proc_info(thd, status_msg);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
cleanup:
|
2013-04-17 06:01:43 +02:00
|
|
|
if (error == DB_KEYEXIST) {
|
2013-04-17 06:01:37 +02:00
|
|
|
error = HA_ERR_FOUND_DUPP_KEY;
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
if (sub_trans) {
|
|
|
|
// no point in recording error value of abort.
|
|
|
|
// nothing we can do about it anyway and it is not what
|
|
|
|
// we want to return.
|
|
|
|
if (error) {
|
|
|
|
sub_trans->abort(sub_trans);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = sub_trans->commit(sub_trans, DB_TXN_NOSYNC);
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Compare if a key in a row has changed */
|
|
|
|
int ha_tokudb::key_cmp(uint keynr, const uchar * old_row, const uchar * new_row) {
|
|
|
|
KEY_PART_INFO *key_part = table->key_info[keynr].key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + table->key_info[keynr].key_parts;
|
|
|
|
|
|
|
|
for (; key_part != end; key_part++) {
|
|
|
|
if (key_part->null_bit) {
|
|
|
|
if ((old_row[key_part->null_offset] & key_part->null_bit) != (new_row[key_part->null_offset] & key_part->null_bit))
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
if (key_part->key_part_flag & (HA_BLOB_PART | HA_VAR_LENGTH_PART)) {
|
|
|
|
|
|
|
|
if (key_part->field->cmp_binary((uchar *) (old_row + key_part->offset), (uchar *) (new_row + key_part->offset), (ulong) key_part->length))
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
if (memcmp(old_row + key_part->offset, new_row + key_part->offset, key_part->length))
|
|
|
|
return 1;
|
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
return 0;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
Update a row from one value to another.
|
|
|
|
Clobbers key_buff2
|
|
|
|
*/
|
2013-04-17 06:01:44 +02:00
|
|
|
int ha_tokudb::update_primary_key(DB_TXN * trans, bool primary_key_changed, const uchar * old_row, DBT * old_key, const uchar * new_row, DBT * new_key) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("update_primary_key");
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT row;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (primary_key_changed) {
|
|
|
|
// Primary key changed or we are updating a key that can have duplicates.
|
|
|
|
// Delete the old row and add a new one
|
|
|
|
if (!(error = remove_key(trans, primary_key, old_row, old_key))) {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!(error = pack_row(&row, new_row, true))) {
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = share->file->put(share->file, trans, new_key, &row, share->key_type[primary_key]))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
// Probably a duplicated key; restore old key and row if needed
|
|
|
|
last_dup_key = primary_key;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:37 +02:00
|
|
|
// Primary key didn't change; just update the row data
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!(error = pack_row(&row, new_row, true))) {
|
2013-04-17 06:01:41 +02:00
|
|
|
error = share->file->put(share->file, trans, new_key, &row, 0);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Updates a row in the table, called when handling an UPDATE query
|
|
|
|
// Parameters:
|
|
|
|
// [in] old_row - row to be updated, in MySQL format
|
|
|
|
// [in] new_row - new row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::update_row(const uchar * old_row, uchar * new_row) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("update_row");
|
2013-04-17 06:01:45 +02:00
|
|
|
DBT prim_key, key, old_prim_key, row;
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
|
|
|
bool primary_key_changed;
|
2013-04-17 06:01:44 +02:00
|
|
|
bool has_null;
|
|
|
|
THD* thd = ha_thd();
|
2013-04-17 06:01:44 +02:00
|
|
|
DB_TXN* sub_trans = NULL;
|
|
|
|
DB_TXN* txn = NULL;
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// status message to be shown in "show process list"
|
|
|
|
//
|
|
|
|
char status_msg[200]; //buffer of 200 should be a good upper bound.
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
LINT_INIT(error);
|
|
|
|
statistic_increment(table->in_use->status_var.ha_update_count, &LOCK_status);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) {
|
2013-04-17 06:01:37 +02:00
|
|
|
table->timestamp_field->set_time();
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// check to see if some value for the auto increment column that is bigger
|
|
|
|
// than anything else til now is being used. If so, update the metadata to reflect it
|
|
|
|
// the goal here is we never want to have a dup key error due to a bad increment
|
|
|
|
// of the auto inc field.
|
|
|
|
//
|
|
|
|
if (share->has_auto_inc && new_row == table->record[0]) {
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
ulonglong curr_auto_inc = retrieve_auto_increment(
|
|
|
|
table->field[share->ai_field_index]->key_type(),
|
2013-04-17 06:01:46 +02:00
|
|
|
field_offset(table->field[share->ai_field_index], table),
|
2013-04-17 06:01:44 +02:00
|
|
|
new_row
|
|
|
|
);
|
|
|
|
if (curr_auto_inc > share->last_auto_increment) {
|
|
|
|
error = update_max_auto_inc(share->status_block, curr_auto_inc);
|
|
|
|
if (!error) {
|
|
|
|
share->last_auto_increment = curr_auto_inc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
if (hidden_primary_key) {
|
|
|
|
primary_key_changed = 0;
|
|
|
|
bzero((void *) &prim_key, sizeof(prim_key));
|
|
|
|
prim_key.data = (void *) current_ident;
|
|
|
|
prim_key.size = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
|
|
|
|
old_prim_key = prim_key;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&prim_key, primary_key, key_buff, new_row, &has_null);
|
2013-04-17 06:01:44 +02:00
|
|
|
if ((primary_key_changed = key_cmp(primary_key, old_row, new_row))) {
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&old_prim_key, primary_key, primary_key_buff, old_row, &has_null);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:37 +02:00
|
|
|
old_prim_key = prim_key;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
if (using_ignore) {
|
|
|
|
error = db_env->txn_begin(db_env, transaction, &sub_trans, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
txn = using_ignore ? sub_trans : transaction;
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
/* Start by updating the primary key */
|
2013-04-17 06:01:44 +02:00
|
|
|
error = update_primary_key(txn, primary_key_changed, old_row, &old_prim_key, new_row, &prim_key);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (error) {
|
|
|
|
last_dup_key = primary_key;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
// Update all other keys
|
|
|
|
for (uint keynr = 0; keynr < table_share->keys; keynr++) {
|
2013-04-17 06:01:45 +02:00
|
|
|
bool cluster_row_created = false;
|
2013-04-17 06:01:44 +02:00
|
|
|
if (keynr == primary_key) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (key_cmp(keynr, old_row, new_row) || primary_key_changed) {
|
2013-04-17 06:01:44 +02:00
|
|
|
u_int32_t put_flags;
|
2013-04-17 06:01:44 +02:00
|
|
|
if ((error = remove_key(txn, keynr, old_row, &old_prim_key))) {
|
2013-04-17 06:01:44 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&key, keynr, key_buff2, new_row, &has_null),
|
2013-04-17 06:01:44 +02:00
|
|
|
put_flags = share->key_type[keynr];
|
|
|
|
if (put_flags == DB_NOOVERWRITE && (has_null || thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS))) {
|
|
|
|
put_flags = DB_YESOVERWRITE;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
if (table->key_info[keynr].flags & HA_CLUSTERING) {
|
|
|
|
if (!cluster_row_created) {
|
|
|
|
if ((error = pack_row(&row, (const uchar *) new_row, false))){
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
cluster_row_created = true;
|
|
|
|
}
|
|
|
|
error = share->key_file[keynr]->put(
|
|
|
|
share->key_file[keynr],
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&row,
|
|
|
|
put_flags
|
|
|
|
);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = share->key_file[keynr]->put(
|
|
|
|
share->key_file[keynr],
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&prim_key,
|
|
|
|
put_flags
|
|
|
|
);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// We break if we hit an error, unless it is a dup key error
|
|
|
|
// and MySQL told us to ignore duplicate key errors
|
|
|
|
//
|
|
|
|
if (error) {
|
2013-04-17 06:01:44 +02:00
|
|
|
last_dup_key = keynr;
|
|
|
|
goto cleanup;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
if (!error) {
|
|
|
|
num_updated_rows_in_stmt++;
|
|
|
|
if ((num_updated_rows_in_stmt % 1000) == 0) {
|
|
|
|
sprintf(status_msg, "Inserted about %llu rows", num_updated_rows_in_stmt);
|
|
|
|
thd_proc_info(thd, status_msg);
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
cleanup:
|
|
|
|
if (error == DB_KEYEXIST) {
|
2013-04-17 06:01:37 +02:00
|
|
|
error = HA_ERR_FOUND_DUPP_KEY;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
if (sub_trans) {
|
|
|
|
// no point in recording error value of abort.
|
|
|
|
// nothing we can do about it anyway and it is not what
|
|
|
|
// we want to return.
|
|
|
|
if (error) {
|
|
|
|
sub_trans->abort(sub_trans);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = sub_trans->commit(sub_trans, DB_TXN_NOSYNC);
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
//
|
|
|
|
// Delete one key in key_file[keynr]
|
|
|
|
// This uses key_buff2, when keynr != primary key, so it's important that
|
|
|
|
// a function that calls this doesn't use this buffer for anything else.
|
|
|
|
// Parameters:
|
|
|
|
// [in] trans - transaction to be used for the delete
|
|
|
|
// keynr - index for which a key needs to be deleted
|
|
|
|
// [in] record - row in MySQL format. Must delete a key for this row
|
|
|
|
// [in] prim_key - key for record in primary table
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::remove_key(DB_TXN * trans, uint keynr, const uchar * record, DBT * prim_key) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::remove_key");
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
|
|
|
DBT key;
|
2013-04-17 06:01:44 +02:00
|
|
|
bool has_null;
|
2013-04-17 06:01:37 +02:00
|
|
|
DBUG_PRINT("enter", ("index: %d", keynr));
|
2013-04-17 06:01:40 +02:00
|
|
|
DBUG_PRINT("primary", ("index: %d", primary_key));
|
|
|
|
DBUG_DUMP("prim_key", (uchar *) prim_key->data, prim_key->size);
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
if (keynr == active_index && cursor) {
|
2013-04-17 06:01:37 +02:00
|
|
|
error = cursor->c_del(cursor, 0);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else if (keynr == primary_key) { // Unique key
|
2013-04-17 06:01:40 +02:00
|
|
|
DBUG_PRINT("Unique key", ("index: %d", keynr));
|
2013-04-17 06:01:44 +02:00
|
|
|
error = share->key_file[keynr]->del(share->key_file[keynr], trans, prim_key , 0);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
else if (table->key_info[keynr].flags & HA_CLUSTERING) {
|
|
|
|
DBUG_PRINT("clustering key", ("index: %d", keynr));
|
|
|
|
create_dbt_key_from_table(&key, keynr, key_buff2, record, &has_null);
|
|
|
|
error = share->key_file[keynr]->del(share->key_file[keynr], trans, &key , 0);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
|
|
|
create_dbt_key_from_table(&key, keynr, key_buff2, record, &has_null);
|
|
|
|
error = share->key_file[keynr]->delboth(
|
|
|
|
share->key_file[keynr],
|
|
|
|
trans,
|
|
|
|
&key,
|
|
|
|
prim_key,
|
|
|
|
DB_DELETE_ANY
|
|
|
|
);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// Delete all keys for new_record
|
|
|
|
// Parameters:
|
|
|
|
// [in] trans - transaction to be used for the delete
|
|
|
|
// [in] record - row in MySQL format. Must delete all keys for this row
|
|
|
|
// [in] prim_key - key for record in primary table
|
|
|
|
// [in] keys - array that states if a key is set, and hence needs
|
|
|
|
// removal
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
int ha_tokudb::remove_keys(DB_TXN * trans, const uchar * record, DBT * prim_key, key_map * keys) {
|
2013-04-17 06:01:37 +02:00
|
|
|
int result = 0;
|
|
|
|
for (uint keynr = 0; keynr < table_share->keys + test(hidden_primary_key); keynr++) {
|
|
|
|
if (keys->is_set(keynr)) {
|
|
|
|
int new_error = remove_key(trans, keynr, record, prim_key);
|
|
|
|
if (new_error) {
|
|
|
|
result = new_error; // Return last error
|
|
|
|
break; // Let rollback correct things
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// Deletes a row in the table, called when handling a DELETE query
|
2013-04-17 06:01:40 +02:00
|
|
|
// Parameters:
|
|
|
|
// [in] record - row to be deleted, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::delete_row(const uchar * record) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::delete_row");
|
2013-04-17 06:01:41 +02:00
|
|
|
int error = ENOSYS;
|
|
|
|
DBT prim_key;
|
2013-04-17 06:01:37 +02:00
|
|
|
key_map keys = table_share->keys_in_use;
|
2013-04-17 06:01:44 +02:00
|
|
|
bool has_null;
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// status message to be shown in "show process list"
|
|
|
|
//
|
|
|
|
char status_msg[200]; //buffer of 200 should be a good upper bound.
|
|
|
|
THD* thd = ha_thd();
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_delete_count, &LOCK_status);
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
create_dbt_key_from_table(&prim_key, primary_key, key_buff, record, &has_null);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (hidden_primary_key) {
|
2013-04-17 06:01:37 +02:00
|
|
|
keys.set_bit(primary_key);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
/* Subtransactions may be used in order to retry the delete in
|
|
|
|
case we get a DB_LOCK_DEADLOCK error. */
|
|
|
|
DB_TXN *sub_trans = transaction;
|
2013-04-17 06:01:44 +02:00
|
|
|
error = remove_keys(sub_trans, record, &prim_key, &keys);
|
|
|
|
if (error) {
|
|
|
|
DBUG_PRINT("error", ("Got error %d", error));
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
2013-04-17 06:01:43 +02:00
|
|
|
deleted_rows++;
|
2013-04-17 06:01:46 +02:00
|
|
|
num_deleted_rows_in_stmt++;
|
|
|
|
if ((num_deleted_rows_in_stmt % 1000) == 0) {
|
|
|
|
sprintf(status_msg, "Inserted about %llu rows", num_deleted_rows_in_stmt);
|
|
|
|
thd_proc_info(thd, status_msg);
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// Notification that a scan of entire secondary table is about
|
|
|
|
// to take place. Will pre acquire table read lock
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::prepare_index_scan() {
|
|
|
|
int error;
|
|
|
|
DB* db = share->key_file[active_index];
|
|
|
|
error = db->pre_acquire_read_lock(
|
|
|
|
db,
|
|
|
|
transaction,
|
|
|
|
db->dbt_neg_infty(), db->dbt_neg_infty(),
|
|
|
|
db->dbt_pos_infty(), db->dbt_pos_infty()
|
|
|
|
);
|
|
|
|
if (error) { last_cursor_error = error; goto cleanup; }
|
|
|
|
|
|
|
|
range_lock_grabbed = true;
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Initializes local cursor on DB with index keynr
|
|
|
|
// Parameters:
|
|
|
|
// keynr - key (index) number
|
|
|
|
// sorted - 1 if result MUST be sorted according to index
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_init(uint keynr, bool sorted) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_init %p %d", this, keynr);
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
|
|
|
DBUG_PRINT("enter", ("table: '%s' key: %d", table_share->table_name.str, keynr));
|
|
|
|
|
|
|
|
/*
|
|
|
|
Under some very rare conditions (like full joins) we may already have
|
|
|
|
an active cursor at this point
|
|
|
|
*/
|
|
|
|
if (cursor) {
|
|
|
|
DBUG_PRINT("note", ("Closing active cursor"));
|
|
|
|
cursor->c_close(cursor);
|
|
|
|
}
|
|
|
|
active_index = keynr;
|
2013-04-17 06:01:42 +02:00
|
|
|
last_cursor_error = 0;
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:37 +02:00
|
|
|
DBUG_ASSERT(keynr <= table->s->keys);
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_ASSERT(share->key_file[keynr]);
|
2013-04-17 06:01:44 +02:00
|
|
|
if ((error = share->key_file[keynr]->cursor(share->key_file[keynr], transaction, &cursor, 0))) {
|
2013-04-17 06:01:42 +02:00
|
|
|
last_cursor_error = error;
|
2013-04-17 06:01:40 +02:00
|
|
|
cursor = NULL; // Safety
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
bzero((void *) &last_key, sizeof(last_key));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// closes the local cursor
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_end() {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_end %p", this);
|
2013-04-17 06:01:37 +02:00
|
|
|
int error = 0;
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:37 +02:00
|
|
|
if (cursor) {
|
|
|
|
DBUG_PRINT("enter", ("table: '%s'", table_share->table_name.str));
|
|
|
|
error = cursor->c_close(cursor);
|
2013-04-17 06:01:40 +02:00
|
|
|
cursor = NULL;
|
2013-04-17 06:01:42 +02:00
|
|
|
last_cursor_error = 0;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
active_index = MAX_KEY;
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
int ha_tokudb::handle_cursor_error(int error, int err_to_return, uint keynr) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::handle_cursor_error");
|
2013-04-17 06:01:37 +02:00
|
|
|
if (error) {
|
2013-04-17 06:01:42 +02:00
|
|
|
last_cursor_error = error;
|
2013-04-17 06:01:37 +02:00
|
|
|
table->status = STATUS_NOT_FOUND;
|
2013-04-17 06:01:42 +02:00
|
|
|
cursor->c_close(cursor);
|
|
|
|
cursor = NULL;
|
|
|
|
if (error == DB_NOTFOUND || error == DB_KEYEMPTY) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = err_to_return;
|
|
|
|
if ((share->key_file[keynr]->cursor(share->key_file[keynr], transaction, &cursor, 0))) {
|
2013-04-17 06:01:42 +02:00
|
|
|
cursor = NULL; // Safety
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// Helper function for read_row and smart_dbt_callback_xxx functions
|
|
|
|
// When using a hidden primary key, upon reading a row,
|
|
|
|
// we set the current_ident field to whatever the primary key we retrieved
|
|
|
|
// was
|
|
|
|
//
|
|
|
|
void ha_tokudb::extract_hidden_primary_key(uint keynr, DBT const *row, DBT const *found_key) {
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// extract hidden primary key to current_ident
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
if (hidden_primary_key) {
|
2013-04-17 06:01:41 +02:00
|
|
|
if (keynr == primary_key) {
|
|
|
|
memcpy_fixed(current_ident, (char *) found_key->data, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// if clustering key, hidden primary key is at end of found_key
|
|
|
|
//
|
|
|
|
else if (table->key_info[keynr].flags & HA_CLUSTERING) {
|
|
|
|
memcpy_fixed(
|
|
|
|
current_ident,
|
|
|
|
(char *) found_key->data + found_key->size - TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
|
|
|
|
TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
|
|
|
|
);
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
else {
|
|
|
|
memcpy_fixed(current_ident, (char *) row->data, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Reads the contents of row and found_key, DBT's retrieved from the DB associated to keynr, into buf
|
|
|
|
// This function assumes that we are using a covering index, as a result, if keynr is the primary key,
|
|
|
|
// we do not read row into buf
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the row, in MySQL format
|
|
|
|
// keynr - index into key_file that represents DB we are currently operating on.
|
|
|
|
// [in] row - the row that has been read from the preceding DB call
|
|
|
|
// [in] found_key - key used to retrieve the row
|
|
|
|
//
|
|
|
|
void ha_tokudb::read_key_only(uchar * buf, uint keynr, DBT const *row, DBT const *found_key) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_key_only");
|
|
|
|
table->status = 0;
|
|
|
|
unpack_key(buf, found_key, keynr);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!hidden_primary_key && (keynr != primary_key) && !(table->key_info[keynr].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:44 +02:00
|
|
|
unpack_key(buf, row, primary_key);
|
|
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Helper function used to try to retrieve the entire row
|
|
|
|
// If keynr is associated with the main table, reads contents of found_key and row into buf, otherwise,
|
|
|
|
// makes copy of primary key and saves it to last_key. This can later be used to retrieve the entire row
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the row, in MySQL format
|
|
|
|
// keynr - index into key_file that represents DB we are currently operating on.
|
|
|
|
// [in] row - the row that has been read from the preceding DB call
|
|
|
|
// [in] found_key - key used to retrieve the row
|
|
|
|
//
|
|
|
|
void ha_tokudb::read_primary_key(uchar * buf, uint keynr, DBT const *row, DBT const *found_key) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_primary_key");
|
|
|
|
table->status = 0;
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// case where we read from secondary table that is not clustered
|
|
|
|
//
|
|
|
|
if (keynr != primary_key && !(table->key_info[keynr].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// create a DBT that has the same data as row,
|
|
|
|
//
|
|
|
|
bzero((void *) &last_key, sizeof(last_key));
|
|
|
|
last_key.data = key_buff;
|
|
|
|
last_key.size = row->size;
|
|
|
|
memcpy(key_buff, row->data, row->size);
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// case we read from clustered key, so unpack the entire row
|
|
|
|
//
|
|
|
|
else if (keynr != primary_key && (table->key_info[keynr].flags & HA_CLUSTERING)) {
|
|
|
|
unpack_row(buf, row, found_key, false);
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// case we read from the primary key, so unpack the entire row
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
unpack_row(buf, row, found_key, true);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
if (found_key) { DBUG_DUMP("read row key", (uchar *) found_key->data, found_key->size); }
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// This function reads an entire row into buf. This function also assumes that
|
|
|
|
// the key needed to retrieve the row is stored in the member variable last_key
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success, error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::read_full_row(uchar * buf) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_full_row");
|
|
|
|
int error;
|
|
|
|
//
|
|
|
|
// Read the data into current_row, assumes key is stored in this->last_key
|
|
|
|
//
|
|
|
|
current_row.flags = DB_DBT_REALLOC;
|
|
|
|
if ((error = share->file->get(share->file, transaction, &last_key, ¤t_row, 0))) {
|
|
|
|
table->status = STATUS_NOT_FOUND;
|
|
|
|
TOKUDB_DBUG_RETURN(error == DB_NOTFOUND ? HA_ERR_CRASHED : error);
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
unpack_row(buf, ¤t_row, &last_key, true);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
TOKUDB_DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// The funtion read_row checks whether the row was obtained from the primary table or
|
|
|
|
// from an index table. If it was obtained from an index table, it further dereferences on
|
|
|
|
// the main table. In the end, the read_row function will manage to return the actual row
|
|
|
|
// of interest in the buf parameter.
|
|
|
|
//
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the row, in MySQL format
|
|
|
|
// keynr - index into key_file that represents DB we are currently operating on.
|
|
|
|
// [in] row - the row that has been read from the preceding DB call
|
|
|
|
// [in] found_key - key used to retrieve the row
|
|
|
|
//
|
|
|
|
int ha_tokudb::read_row(uchar * buf, uint keynr, DBT const *row, DBT const *found_key) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_row");
|
|
|
|
int error;
|
|
|
|
|
|
|
|
extract_hidden_primary_key(keynr, row, found_key);
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
table->status = 0;
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// if the index shows that the table we read the row from was indexed on the primary key,
|
|
|
|
// that means we have our row and can skip
|
|
|
|
// this entire if clause. All that is required is to unpack row.
|
|
|
|
// if the index shows that what we read was from a table that was NOT indexed on the
|
|
|
|
// primary key, then we must still retrieve the row, as the "row" value is indeed just
|
|
|
|
// a primary key, whose row we must still read
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
if (keynr != primary_key) {
|
|
|
|
if (key_read && found_key) {
|
2013-04-17 06:01:40 +02:00
|
|
|
// TOKUDB_DBUG_DUMP("key=", found_key->data, found_key->size);
|
2013-04-17 06:01:37 +02:00
|
|
|
unpack_key(buf, found_key, keynr);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!hidden_primary_key && !(table->key_info[keynr].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:40 +02:00
|
|
|
// TOKUDB_DBUG_DUMP("row=", row->data, row->size);
|
2013-04-17 06:01:37 +02:00
|
|
|
unpack_key(buf, row, primary_key);
|
2013-04-17 06:01:40 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
// in this case we have a clustered key, so no need to do pt query
|
|
|
|
//
|
|
|
|
if (table->key_info[keynr].flags & HA_CLUSTERING) {
|
|
|
|
unpack_row(buf, row, found_key, false);
|
|
|
|
TOKUDB_DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
//
|
2013-04-17 06:01:40 +02:00
|
|
|
// create a DBT that has the same data as row,
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT key;
|
|
|
|
bzero((void *) &key, sizeof(key));
|
|
|
|
key.data = key_buff;
|
|
|
|
key.size = row->size;
|
|
|
|
memcpy(key_buff, row->data, row->size);
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Read the data into current_row
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
current_row.flags = DB_DBT_REALLOC;
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = share->file->get(share->file, transaction, &key, ¤t_row, 0))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
table->status = STATUS_NOT_FOUND;
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error == DB_NOTFOUND ? HA_ERR_CRASHED : error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
unpack_row(buf, ¤t_row, &key, true);
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// in this case we are dealing with the primary key
|
|
|
|
//
|
2013-04-17 06:01:43 +02:00
|
|
|
if (key_read && !hidden_primary_key) {
|
|
|
|
unpack_key(buf, found_key, keynr);
|
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
unpack_row(buf, row, found_key, true);
|
2013-04-17 06:01:43 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
if (found_key) { DBUG_DUMP("read row key", (uchar *) found_key->data, found_key->size); }
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// This is only used to read whole keys
|
|
|
|
// According to InnoDB handlerton: Positions an index cursor to the index
|
|
|
|
// specified in keynr. Fetches the row if any
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the returned row
|
|
|
|
// keynr - index to use
|
|
|
|
// [in] key - key value, according to InnoDB, if NULL,
|
|
|
|
// position cursor at start or end of index,
|
|
|
|
// not sure if this is done now
|
|
|
|
// key_len - length of key
|
|
|
|
// find_flag - according to InnoDB, search flags from my_base.h
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_KEY_NOT_FOUND if not found (per InnoDB),
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_read_idx(uchar * buf, uint keynr, const uchar * key, uint key_len, enum ha_rkey_function find_flag) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_read_idx");
|
2013-04-17 06:01:44 +02:00
|
|
|
int error;
|
2013-04-17 06:01:37 +02:00
|
|
|
table->in_use->status_var.ha_read_key_count++;
|
|
|
|
current_row.flags = DB_DBT_REALLOC;
|
|
|
|
active_index = MAX_KEY;
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
error = share->key_file[keynr]->get(share->key_file[keynr], transaction, pack_key(&last_key, keynr, key_buff, key, key_len, COL_NEG_INF), ¤t_row, 0);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (error == DB_NOTFOUND || error == DB_KEYEMPTY) {
|
|
|
|
error = HA_ERR_KEY_NOT_FOUND;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (!error) {
|
|
|
|
error = read_row(buf, keynr, ¤t_row, &last_key);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// context information for the heaviside functions.
|
|
|
|
// Context information includes data necessary
|
|
|
|
// to perform comparisons
|
|
|
|
//
|
|
|
|
typedef struct heavi_info {
|
|
|
|
DB *db;
|
|
|
|
const DBT *key;
|
|
|
|
} *HEAVI_INFO;
|
|
|
|
|
|
|
|
//
|
|
|
|
// effect:
|
|
|
|
// heaviside function used for HA_READ_AFTER_KEY.
|
|
|
|
// to use this heaviside function in ha_read_after_key, use direction>0
|
|
|
|
// the stored key (in heavi_info) contains a prefix of the columns in the candidate
|
|
|
|
// keys. only the columns in the stored key will be used for comparison.
|
|
|
|
//
|
|
|
|
// parameters:
|
|
|
|
// [in] key - candidate key in db that is being compared
|
|
|
|
// [in] value - candidate value, unused
|
|
|
|
// [in] extra_h - a heavi_info that contains information necessary for
|
|
|
|
// the comparison
|
|
|
|
// returns:
|
|
|
|
// >0 : candidate key > stored key
|
|
|
|
// <0 : otherwise
|
|
|
|
// examples:
|
|
|
|
// columns: (a,b,c,d)
|
|
|
|
// stored key = (3,4) (only a,b)
|
|
|
|
// candidate keys have (a,b,c,d)
|
|
|
|
// (3,2,1,1) < (3,4)
|
|
|
|
// (3,4,1,1) == (3,4)
|
|
|
|
// (3,5,1,1) > (3,4)
|
|
|
|
//
|
|
|
|
static int after_key_heavi(const DBT *key, const DBT *value, void *extra_h) {
|
|
|
|
HEAVI_INFO info = (HEAVI_INFO)extra_h;
|
|
|
|
int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key);
|
|
|
|
return cmp>0 ? 1 : -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// effect:
|
|
|
|
// heaviside function used for HA_READ_PREFIX_LAST_OR_PREV.
|
|
|
|
// to use this heaviside function in HA_READ_PREFIX_LAST_OR_PREV, use direction<0
|
|
|
|
// the stored key (in heavi_info) contains a prefix of the columns in the candidate
|
|
|
|
// keys. only the columns in the stored key will be used for comparison.
|
|
|
|
//
|
|
|
|
// parameters:
|
|
|
|
// [in] key - candidate key in db that is being compared
|
|
|
|
// [in] value - candidate value, unused
|
|
|
|
// [in] extra_h - a heavi_info that contains information necessary for
|
|
|
|
// the comparison
|
|
|
|
// returns:
|
|
|
|
// >0 : candidate key > stored key
|
|
|
|
// 0 : candidate key == stored key
|
|
|
|
// <0 : candidate key < stored key
|
|
|
|
// examples:
|
|
|
|
// columns: (a,b,c,d)
|
|
|
|
// stored key = (3,4) (only a,b)
|
|
|
|
// candidate keys have (a,b,c,d)
|
|
|
|
// (3,2,1,1) < (3,4)
|
|
|
|
// (3,4,1,1) == (3,4)
|
|
|
|
// (3,5,1,1) > (3,4)
|
|
|
|
//
|
|
|
|
static int prefix_last_or_prev_heavi(const DBT *key, const DBT *value, void *extra_h) {
|
|
|
|
HEAVI_INFO info = (HEAVI_INFO)extra_h;
|
|
|
|
int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key);
|
|
|
|
return cmp;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// effect:
|
|
|
|
// heaviside function used for HA_READ_BEFORE_KEY.
|
|
|
|
// to use this heaviside function in HA_READ_BEFORE_KEY, use direction<0
|
|
|
|
// the stored key (in heavi_info) contains a prefix of the columns in the candidate
|
|
|
|
// keys. only the columns in the stored key will be used for comparison.
|
|
|
|
//
|
|
|
|
// parameters:
|
|
|
|
// [in] key - candidate key in db that is being compared
|
|
|
|
// [in] value - candidate value, unused
|
|
|
|
// [in] extra_h - a heavi_info that contains information necessary for
|
|
|
|
// the comparison
|
|
|
|
// returns:
|
|
|
|
// <0 : candidate key < stored key
|
|
|
|
// >0 : otherwise
|
|
|
|
// examples:
|
|
|
|
// columns: (a,b,c,d)
|
|
|
|
// stored key = (3,4) (only a,b)
|
|
|
|
// candidate keys have (a,b,c,d)
|
|
|
|
// (3,2,1,1) < (3,4)
|
|
|
|
// (3,4,1,1) == (3,4)
|
|
|
|
// (3,5,1,1) > (3,4)
|
|
|
|
//
|
|
|
|
static int before_key_heavi(const DBT *key, const DBT *value, void *extra_h) {
|
|
|
|
HEAVI_INFO info = (HEAVI_INFO)extra_h;
|
|
|
|
int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key);
|
|
|
|
return (cmp<0) ? -1 : 1;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// According to InnoDB handlerton: Positions an index cursor to the index
|
|
|
|
// specified in keynr. Fetches the row if any
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the returned row
|
|
|
|
// [in] key - key value, according to InnoDB, if NULL,
|
|
|
|
// position cursor at start or end of index,
|
|
|
|
// not sure if this is done now
|
|
|
|
// key_len - length of key
|
|
|
|
// find_flag - according to InnoDB, search flags from my_base.h
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_KEY_NOT_FOUND if not found (per InnoDB),
|
|
|
|
// we seem to return HA_ERR_END_OF_FILE if find_flag != HA_READ_KEY_EXACT
|
|
|
|
// TODO: investigate this for correctness
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:40 +02:00
|
|
|
int ha_tokudb::index_read(uchar * buf, const uchar * key, uint key_len, enum ha_rkey_function find_flag) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_read %p find %d", this, find_flag);
|
2013-04-17 06:01:40 +02:00
|
|
|
// TOKUDB_DBUG_DUMP("key=", key, key_len);
|
2013-04-17 06:01:40 +02:00
|
|
|
DBT row;
|
2013-04-17 06:01:46 +02:00
|
|
|
int error;
|
|
|
|
u_int32_t flags = 0;
|
2013-04-17 06:01:44 +02:00
|
|
|
struct smart_dbt_info info;
|
|
|
|
struct heavi_info heavi_info;
|
|
|
|
bool do_read_row = true;
|
2013-04-17 06:01:40 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:42 +02:00
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
table->in_use->status_var.ha_read_key_count++;
|
|
|
|
bzero((void *) &row, sizeof(row));
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&last_key, active_index, key_buff, key, key_len, COL_NEG_INF);
|
2013-04-17 06:01:40 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
info.ha = this;
|
|
|
|
info.buf = buf;
|
|
|
|
info.keynr = active_index;
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
heavi_info.db = share->key_file[active_index];
|
|
|
|
heavi_info.key = &last_key;
|
2013-04-17 06:01:38 +02:00
|
|
|
switch (find_flag) {
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_KEY_EXACT: /* Find first record else error */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(DB_SET_RANGE);
|
|
|
|
error = cursor->c_get(cursor, &last_key, &row, flags);
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error == 0) {
|
|
|
|
DBT orig_key;
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&orig_key, active_index, key_buff2, key, key_len, COL_NEG_INF);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tokudb_prefix_cmp_packed_key(share->key_file[active_index], &orig_key, &last_key)) {
|
2013-04-17 06:01:39 +02:00
|
|
|
error = DB_NOTFOUND;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:38 +02:00
|
|
|
break;
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_AFTER_KEY: /* Find next rec. after key-record */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(0);
|
2013-04-17 06:01:44 +02:00
|
|
|
error = cursor->c_getf_heavi(
|
2013-04-17 06:01:46 +02:00
|
|
|
cursor, flags,
|
2013-04-17 06:01:44 +02:00
|
|
|
key_read ? smart_dbt_callback_keyread_heavi : smart_dbt_callback_rowread_heavi, &info,
|
|
|
|
after_key_heavi, &heavi_info,
|
|
|
|
1
|
|
|
|
);
|
|
|
|
do_read_row = false;
|
2013-04-17 06:01:38 +02:00
|
|
|
break;
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_BEFORE_KEY: /* Find next rec. before key-record */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(0);
|
2013-04-17 06:01:44 +02:00
|
|
|
error = cursor->c_getf_heavi(
|
2013-04-17 06:01:46 +02:00
|
|
|
cursor, flags,
|
2013-04-17 06:01:44 +02:00
|
|
|
key_read ? smart_dbt_callback_keyread_heavi : smart_dbt_callback_rowread_heavi, &info,
|
|
|
|
before_key_heavi, &heavi_info,
|
|
|
|
-1
|
|
|
|
);
|
|
|
|
do_read_row = false;
|
2013-04-17 06:01:39 +02:00
|
|
|
break;
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_KEY_OR_NEXT: /* Record or next record */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(DB_SET_RANGE);
|
|
|
|
error = cursor->c_get(cursor, &last_key, &row, flags);
|
2013-04-17 06:01:38 +02:00
|
|
|
break;
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_KEY_OR_PREV: /* Record or previous */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(DB_SET_RANGE);
|
|
|
|
error = cursor->c_get(cursor, &last_key, &row, flags);
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error == 0) {
|
|
|
|
DBT orig_key;
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&orig_key, active_index, key_buff2, key, key_len, COL_NEG_INF);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tokudb_prefix_cmp_packed_key(share->key_file[active_index], &orig_key, &last_key) != 0) {
|
2013-04-17 06:01:39 +02:00
|
|
|
error = cursor->c_get(cursor, &last_key, &row, DB_PREV);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
}
|
|
|
|
else if (error == DB_NOTFOUND)
|
2013-04-17 06:01:39 +02:00
|
|
|
error = cursor->c_get(cursor, &last_key, &row, DB_LAST);
|
2013-04-17 06:01:39 +02:00
|
|
|
break;
|
2013-04-17 06:01:40 +02:00
|
|
|
case HA_READ_PREFIX_LAST_OR_PREV: /* Last or prev key with the same prefix */
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(0);
|
2013-04-17 06:01:44 +02:00
|
|
|
error = cursor->c_getf_heavi(
|
2013-04-17 06:01:46 +02:00
|
|
|
cursor, flags,
|
2013-04-17 06:01:44 +02:00
|
|
|
key_read ? smart_dbt_callback_keyread_heavi : smart_dbt_callback_rowread_heavi, &info,
|
|
|
|
prefix_last_or_prev_heavi, &heavi_info,
|
|
|
|
-1
|
|
|
|
);
|
|
|
|
do_read_row = false;
|
2013-04-17 06:01:38 +02:00
|
|
|
break;
|
2013-04-17 06:01:45 +02:00
|
|
|
case HA_READ_PREFIX_LAST:
|
2013-04-17 06:01:46 +02:00
|
|
|
flags = SET_READ_FLAG(0);
|
2013-04-17 06:01:45 +02:00
|
|
|
error = cursor->c_getf_heavi(
|
2013-04-17 06:01:46 +02:00
|
|
|
cursor, flags,
|
2013-04-17 06:01:45 +02:00
|
|
|
key_read ? smart_dbt_callback_keyread_heavi : smart_dbt_callback_rowread_heavi, &info,
|
|
|
|
prefix_last_or_prev_heavi, &heavi_info,
|
|
|
|
-1
|
|
|
|
);
|
|
|
|
if (!error && heavi_ret_val != 0) {
|
|
|
|
error = DB_NOTFOUND;
|
|
|
|
}
|
|
|
|
do_read_row = false;
|
|
|
|
break;
|
2013-04-17 06:01:38 +02:00
|
|
|
default:
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("unsupported:%d\n", find_flag);
|
2013-04-17 06:01:39 +02:00
|
|
|
error = HA_ERR_UNSUPPORTED;
|
2013-04-17 06:01:38 +02:00
|
|
|
break;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
error = handle_cursor_error(error,HA_ERR_KEY_NOT_FOUND,active_index);
|
|
|
|
if (!error && do_read_row) {
|
|
|
|
error = read_row(buf, active_index, &row, &last_key);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
else if (!error && !key_read && active_index != primary_key && !(table->key_info[active_index].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = read_full_row(buf);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
if (error && (tokudb_debug & TOKUDB_DEBUG_ERROR)) {
|
|
|
|
TOKUDB_TRACE("error:%d:%d\n", error, find_flag);
|
|
|
|
}
|
|
|
|
cleanup:
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reads the next row from the active index (cursor) into buf, and advances cursor
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_next(uchar * buf) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_next");
|
2013-04-17 06:01:42 +02:00
|
|
|
int error;
|
2013-04-17 06:01:44 +02:00
|
|
|
struct smart_dbt_info info;
|
|
|
|
u_int32_t flags = SET_READ_FLAG(0);
|
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:42 +02:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_next_count, &LOCK_status);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
info.ha = this;
|
|
|
|
info.buf = buf;
|
|
|
|
info.keynr = active_index;
|
|
|
|
error = handle_cursor_error(cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK, &info), HA_ERR_END_OF_FILE,active_index);
|
|
|
|
//
|
|
|
|
// still need to get entire contents of the row if operation done on
|
|
|
|
// secondary DB and it was NOT a covering index
|
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!error && !key_read && (active_index != primary_key) && !(table->key_info[active_index].flags & HA_CLUSTERING) ) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = read_full_row(buf);
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reads the next row matching to the key, on success, advances cursor
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// [in] key - key value
|
|
|
|
// keylen - length of key
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_next_same(uchar * buf, const uchar * key, uint keylen) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_next_same %p", this);
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
2013-04-17 06:01:44 +02:00
|
|
|
struct smart_dbt_info info;
|
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:42 +02:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_next_count, &LOCK_status);
|
2013-04-17 06:01:44 +02:00
|
|
|
info.ha = this;
|
|
|
|
info.buf = buf;
|
|
|
|
info.keynr = active_index;
|
2013-04-17 06:01:39 +02:00
|
|
|
/* QQQ NEXT_DUP on nodup returns EINVAL for tokudb */
|
|
|
|
if (keylen == table->key_info[active_index].key_length &&
|
|
|
|
!(table->key_info[active_index].flags & HA_NOSAME) &&
|
|
|
|
!(table->key_info[active_index].flags & HA_END_SPACE_KEY)) {
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
u_int32_t flags = SET_READ_FLAG(0);
|
|
|
|
error = handle_cursor_error(cursor->c_getf_next_dup(cursor, flags, SMART_DBT_CALLBACK, &info),HA_ERR_END_OF_FILE,active_index);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!error && !key_read && active_index != primary_key && !(table->key_info[active_index].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = read_full_row(buf);
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
} else {
|
2013-04-17 06:01:44 +02:00
|
|
|
u_int32_t flags = SET_READ_FLAG(0);
|
|
|
|
error = handle_cursor_error(cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK, &info),HA_ERR_END_OF_FILE,active_index);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!error && !key_read && active_index != primary_key && !(table->key_info[active_index].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = read_full_row(buf);
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!error &&::key_cmp_if_same(table, key, active_index, keylen))
|
|
|
|
error = HA_ERR_END_OF_FILE;
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reads the previous row from the active index (cursor) into buf, and advances cursor
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_prev(uchar * buf) {
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_next");
|
|
|
|
int error;
|
|
|
|
struct smart_dbt_info info;
|
|
|
|
u_int32_t flags = SET_READ_FLAG(0);
|
|
|
|
HANDLE_INVALID_CURSOR();
|
|
|
|
|
|
|
|
statistic_increment(table->in_use->status_var.ha_read_next_count, &LOCK_status);
|
|
|
|
|
|
|
|
info.ha = this;
|
|
|
|
info.buf = buf;
|
|
|
|
info.keynr = active_index;
|
|
|
|
error = handle_cursor_error(cursor->c_getf_prev(cursor, flags, SMART_DBT_CALLBACK, &info),HA_ERR_END_OF_FILE,active_index);
|
|
|
|
//
|
|
|
|
// still need to get entire contents of the row if operation done on
|
|
|
|
// secondary DB and it was NOT a covering index
|
|
|
|
//
|
2013-04-17 06:01:45 +02:00
|
|
|
if (!error && !key_read && (active_index != primary_key) && !(table->key_info[active_index].flags & HA_CLUSTERING) ) {
|
2013-04-17 06:01:44 +02:00
|
|
|
error = read_full_row(buf);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reads the first row from the active index (cursor) into buf, and advances cursor
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_first(uchar * buf) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_first");
|
2013-04-17 06:01:42 +02:00
|
|
|
int error;
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT row;
|
2013-04-17 06:01:44 +02:00
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_first_count, &LOCK_status);
|
|
|
|
bzero((void *) &row, sizeof(row));
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
error = handle_cursor_error(cursor->c_get(cursor, &last_key, &row, DB_FIRST),HA_ERR_END_OF_FILE,active_index);
|
|
|
|
if (!error) {
|
|
|
|
error = read_row(buf, active_index, &row, &last_key);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Reads the last row from the active index (cursor) into buf, and advances cursor
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::index_last(uchar * buf) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::index_last");
|
2013-04-17 06:01:42 +02:00
|
|
|
int error;
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT row;
|
2013-04-17 06:01:44 +02:00
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_last_count, &LOCK_status);
|
|
|
|
bzero((void *) &row, sizeof(row));
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
error = handle_cursor_error(cursor->c_get(cursor, &last_key, &row, DB_LAST),HA_ERR_END_OF_FILE,active_index);
|
|
|
|
if (!error) {
|
|
|
|
error = read_row(buf, active_index, &row, &last_key);
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Initialize a scan of the table (which is why index_init is called on primary_key)
|
|
|
|
// Parameters:
|
|
|
|
// scan - unused
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::rnd_init(bool scan) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::rnd_init");
|
2013-04-17 06:01:42 +02:00
|
|
|
int error;
|
2013-04-17 06:01:37 +02:00
|
|
|
current_row.flags = DB_DBT_REALLOC;
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:42 +02:00
|
|
|
if (scan) {
|
|
|
|
DB* db = share->key_file[primary_key];
|
|
|
|
error = db->pre_acquire_read_lock(db, transaction, db->dbt_neg_infty(), NULL, db->dbt_pos_infty(), NULL);
|
2013-04-17 06:01:42 +02:00
|
|
|
if (error) { last_cursor_error = error; goto cleanup; }
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
error = index_init(primary_key, 0);
|
2013-04-17 06:01:42 +02:00
|
|
|
if (error) { goto cleanup;}
|
|
|
|
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
// only want to set range_lock_grabbed to true after index_init
|
2013-04-17 06:01:42 +02:00
|
|
|
// successfully executed for two reasons:
|
|
|
|
// 1) index_init will reset it to false anyway
|
|
|
|
// 2) if it fails, we don't want prelocking on,
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
if (scan) { range_lock_grabbed = true; }
|
2013-04-17 06:01:42 +02:00
|
|
|
error = 0;
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// End a scan of the table
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::rnd_end() {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::rnd_end");
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(index_end());
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Read the next row in a table scan
|
|
|
|
// Parameters:
|
|
|
|
// [out] buf - buffer for the next row, in MySQL format
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_END_OF_FILE if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::rnd_next(uchar * buf) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::ha_tokudb::rnd_next");
|
2013-04-17 06:01:42 +02:00
|
|
|
int error;
|
2013-04-17 06:01:44 +02:00
|
|
|
u_int32_t flags = SET_READ_FLAG(0);
|
|
|
|
struct smart_dbt_info info;
|
2013-04-17 06:01:42 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
HANDLE_INVALID_CURSOR();
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// The reason we do not just call index_next is that index_next
|
|
|
|
// increments a different variable than we do here
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_rnd_next_count, &LOCK_status);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
info.ha = this;
|
|
|
|
info.buf = buf;
|
|
|
|
info.keynr = primary_key;
|
|
|
|
|
|
|
|
error = handle_cursor_error(cursor->c_getf_next(cursor, flags, SMART_DBT_CALLBACK, &info),HA_ERR_END_OF_FILE,primary_key);
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
DBT *ha_tokudb::get_pos(DBT * to, uchar * pos) {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::get_pos");
|
2013-04-17 06:01:37 +02:00
|
|
|
/* We don't need to set app_data here */
|
|
|
|
bzero((void *) to, sizeof(*to));
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// this should really be done through pack_key functions
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
to->data = pos;
|
|
|
|
if (share->fixed_length_primary_key)
|
|
|
|
to->size = ref_length;
|
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// move up infinity byte
|
|
|
|
//
|
|
|
|
pos++;
|
2013-04-17 06:01:37 +02:00
|
|
|
KEY_PART_INFO *key_part = table->key_info[primary_key].key_part;
|
|
|
|
KEY_PART_INFO *end = key_part + table->key_info[primary_key].key_parts;
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
for (; key_part != end; key_part++) {
|
2013-04-17 06:01:37 +02:00
|
|
|
pos += key_part->field->packed_col_length(pos, key_part->length);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
to->size = (uint) (pos - (uchar *) to->data);
|
|
|
|
}
|
|
|
|
DBUG_DUMP("key", (const uchar *) to->data, to->size);
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_RETURN(to);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// Retrieves a row with based on the primary key saved in pos
|
2013-04-17 06:01:40 +02:00
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// HA_ERR_KEY_NOT_FOUND if not found
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::rnd_pos(uchar * buf, uchar * pos) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::rnd_pos");
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT db_pos;
|
2013-04-17 06:01:44 +02:00
|
|
|
int error;
|
2013-04-17 06:01:37 +02:00
|
|
|
statistic_increment(table->in_use->status_var.ha_read_rnd_count, &LOCK_status);
|
|
|
|
active_index = MAX_KEY;
|
2013-04-17 06:01:41 +02:00
|
|
|
DBT* key = get_pos(&db_pos, pos);
|
2013-04-17 06:01:44 +02:00
|
|
|
error = share->file->get(share->file, transaction, key, ¤t_row, 0);
|
|
|
|
if (error == DB_NOTFOUND || error == DB_KEYEMPTY) {
|
|
|
|
error = HA_ERR_KEY_NOT_FOUND;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!error) {
|
|
|
|
error = read_row(buf, primary_key, ¤t_row, key);
|
|
|
|
}
|
|
|
|
cleanup:
|
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
|
|
|
|
int ha_tokudb::read_range_first(
|
|
|
|
const key_range *start_key,
|
|
|
|
const key_range *end_key,
|
|
|
|
bool eq_range,
|
|
|
|
bool sorted)
|
|
|
|
{
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_range_first");
|
|
|
|
int error;
|
|
|
|
DBT start_dbt_key;
|
|
|
|
const DBT* start_dbt_data = NULL;
|
|
|
|
DBT end_dbt_key;
|
|
|
|
const DBT* end_dbt_data = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
uchar* start_key_buff = NULL;
|
|
|
|
uchar* end_key_buff = NULL;
|
|
|
|
start_key_buff = (uchar *)my_malloc(table_share->max_key_length + MAX_REF_PARTS * 3 + sizeof(uchar), MYF(MY_WME));
|
|
|
|
if (start_key_buff == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
end_key_buff = (uchar *)my_malloc(table_share->max_key_length + MAX_REF_PARTS * 3 + sizeof(uchar), MYF(MY_WME));
|
|
|
|
if (end_key_buff == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
bzero((void *) &start_dbt_key, sizeof(start_dbt_key));
|
|
|
|
bzero((void *) &end_dbt_key, sizeof(end_dbt_key));
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:42 +02:00
|
|
|
|
|
|
|
|
|
|
|
if (start_key) {
|
|
|
|
switch (start_key->flag) {
|
|
|
|
case HA_READ_AFTER_KEY:
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&start_dbt_key, active_index, start_key_buff, start_key->key, start_key->length, COL_POS_INF);
|
2013-04-17 06:01:42 +02:00
|
|
|
start_dbt_data = share->key_file[active_index]->dbt_pos_infty();
|
|
|
|
break;
|
|
|
|
default:
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&start_dbt_key, active_index, start_key_buff, start_key->key, start_key->length, COL_NEG_INF);
|
2013-04-17 06:01:42 +02:00
|
|
|
start_dbt_data = share->key_file[active_index]->dbt_neg_infty();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
start_dbt_data = share->key_file[active_index]->dbt_neg_infty();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end_key) {
|
|
|
|
switch (end_key->flag) {
|
|
|
|
case HA_READ_BEFORE_KEY:
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&end_dbt_key, active_index, end_key_buff, end_key->key, end_key->length, COL_NEG_INF);
|
2013-04-17 06:01:42 +02:00
|
|
|
end_dbt_data = share->key_file[active_index]->dbt_neg_infty();
|
|
|
|
break;
|
|
|
|
default:
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&end_dbt_key, active_index, end_key_buff, end_key->key, end_key->length, COL_POS_INF);
|
2013-04-17 06:01:42 +02:00
|
|
|
end_dbt_data = share->key_file[active_index]->dbt_pos_infty();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
end_dbt_data = share->key_file[active_index]->dbt_pos_infty();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
error = share->key_file[active_index]->pre_acquire_read_lock(
|
|
|
|
share->key_file[active_index],
|
|
|
|
transaction,
|
|
|
|
start_key ? &start_dbt_key : share->key_file[active_index]->dbt_neg_infty(),
|
|
|
|
start_dbt_data,
|
|
|
|
end_key ? &end_dbt_key : share->key_file[active_index]->dbt_pos_infty(),
|
|
|
|
end_dbt_data
|
|
|
|
);
|
2013-04-17 06:01:42 +02:00
|
|
|
if (error){
|
|
|
|
last_cursor_error = error;
|
|
|
|
//
|
|
|
|
// cursor should be initialized here, but in case it is not, we still check
|
|
|
|
//
|
|
|
|
if (cursor) {
|
|
|
|
cursor->c_close(cursor);
|
|
|
|
cursor = NULL;
|
|
|
|
}
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = true;
|
2013-04-17 06:01:42 +02:00
|
|
|
error = handler::read_range_first(start_key, end_key, eq_range, sorted);
|
|
|
|
|
|
|
|
cleanup:
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(start_key_buff, MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(end_key_buff, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
int ha_tokudb::read_range_next()
|
|
|
|
{
|
2013-04-17 06:01:42 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::read_range_next");
|
|
|
|
int error;
|
|
|
|
error = handler::read_range_next();
|
|
|
|
if (error) {
|
2013-04-17 06:01:42 +02:00
|
|
|
range_lock_grabbed = false;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2008-02-05 17:00:53 +01:00
|
|
|
/*
|
|
|
|
Set a reference to the current record in (ref,ref_length).
|
|
|
|
|
|
|
|
SYNOPSIS
|
|
|
|
ha_tokudb::position()
|
|
|
|
record The current record buffer
|
|
|
|
|
|
|
|
DESCRIPTION
|
|
|
|
The BDB handler stores the primary key in (ref,ref_length).
|
|
|
|
There is either an explicit primary key, or an implicit (hidden)
|
|
|
|
primary key.
|
|
|
|
During open(), 'ref_length' is calculated as the maximum primary
|
|
|
|
key length. When an actual key is shorter than that, the rest of
|
|
|
|
the buffer must be cleared out. The row cannot be identified, if
|
|
|
|
garbage follows behind the end of the key. There is no length
|
|
|
|
field for the current key, so that the whole ref_length is used
|
|
|
|
for comparison.
|
|
|
|
|
|
|
|
RETURN
|
|
|
|
nothing
|
|
|
|
*/
|
2013-04-17 06:01:37 +02:00
|
|
|
void ha_tokudb::position(const uchar * record) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::position");
|
2013-04-17 06:01:37 +02:00
|
|
|
DBT key;
|
|
|
|
if (hidden_primary_key) {
|
|
|
|
DBUG_ASSERT(ref_length == TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
|
|
|
memcpy_fixed(ref, (char *) current_ident, TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
bool has_null;
|
|
|
|
create_dbt_key_from_table(&key, primary_key, ref, record, &has_null);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (key.size < ref_length) {
|
2013-04-17 06:01:37 +02:00
|
|
|
bzero(ref + key.size, ref_length - key.size);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Per InnoDB: Returns statistics information of the table to the MySQL interpreter,
|
|
|
|
// in various fields of the handle object.
|
|
|
|
// Return:
|
|
|
|
// 0, always success
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::info(uint flag) {
|
2013-04-17 06:01:43 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::info %p %d %lld", this, flag, share->rows);
|
2013-04-17 06:01:37 +02:00
|
|
|
if (flag & HA_STATUS_VARIABLE) {
|
|
|
|
// Just to get optimizations right
|
2013-04-17 06:01:46 +02:00
|
|
|
stats.records = share->rows + share->rows_from_locked_table;
|
2013-04-17 06:01:37 +02:00
|
|
|
stats.deleted = 0;
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
if ((flag & HA_STATUS_CONST)) {
|
2013-04-17 06:01:37 +02:00
|
|
|
for (uint i = 0; i < table_share->keys; i++) {
|
2013-04-17 06:01:43 +02:00
|
|
|
table->key_info[i].rec_per_key[table->key_info[i].key_parts - 1] = 0;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Don't return key if we got an error for the internal primary key */
|
2013-04-17 06:01:45 +02:00
|
|
|
if (flag & HA_STATUS_ERRKEY && last_dup_key < table_share->keys) {
|
2013-04-17 06:01:37 +02:00
|
|
|
errkey = last_dup_key;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
|
2013-04-17 06:01:45 +02:00
|
|
|
THD *thd= table->in_use;
|
|
|
|
struct system_variables *variables= &thd->variables;
|
2013-04-17 06:01:46 +02:00
|
|
|
stats.auto_increment_value = share->last_auto_increment + variables->auto_increment_increment;
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Per InnoDB: Tells something additional to the handler about how to do things.
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::extra(enum ha_extra_function operation) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("extra %p %d", this, operation);
|
2013-04-17 06:01:37 +02:00
|
|
|
switch (operation) {
|
|
|
|
case HA_EXTRA_RESET_STATE:
|
|
|
|
reset();
|
|
|
|
break;
|
|
|
|
case HA_EXTRA_KEYREAD:
|
|
|
|
key_read = 1; // Query satisfied with key
|
|
|
|
break;
|
|
|
|
case HA_EXTRA_NO_KEYREAD:
|
|
|
|
key_read = 0;
|
|
|
|
break;
|
|
|
|
case HA_EXTRA_IGNORE_DUP_KEY:
|
|
|
|
using_ignore = 1;
|
|
|
|
break;
|
|
|
|
case HA_EXTRA_NO_IGNORE_DUP_KEY:
|
|
|
|
using_ignore = 0;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int ha_tokudb::reset(void) {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::reset");
|
2013-04-17 06:01:37 +02:00
|
|
|
key_read = 0;
|
|
|
|
using_ignore = 0;
|
|
|
|
if (current_row.flags & (DB_DBT_MALLOC | DB_DBT_REALLOC)) {
|
|
|
|
current_row.flags = 0;
|
|
|
|
if (current_row.data) {
|
2013-04-17 06:01:46 +02:00
|
|
|
dlfree(current_row.data);
|
2013-04-17 06:01:37 +02:00
|
|
|
current_row.data = 0;
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(0);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// helper function that iterates through all DB's
|
|
|
|
// and grabs a lock (either read or write, but not both)
|
|
|
|
// Parameters:
|
|
|
|
// [in] trans - transaction to be used to pre acquire the lock
|
|
|
|
// lt - type of lock to get, either lock_read or lock_write
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::acquire_table_lock (DB_TXN* trans, TABLE_LOCK_TYPE lt) {
|
|
|
|
int error = ENOSYS;
|
|
|
|
uint curr_num_DBs = table->s->keys + test(hidden_primary_key);
|
|
|
|
if (lt == lock_read) {
|
|
|
|
for (uint i = 0; i < curr_num_DBs; i++) {
|
|
|
|
DB* db = share->key_file[i];
|
|
|
|
error = db->pre_acquire_read_lock(
|
|
|
|
db,
|
|
|
|
trans,
|
|
|
|
db->dbt_neg_infty(), db->dbt_neg_infty(),
|
|
|
|
db->dbt_pos_infty(), db->dbt_pos_infty()
|
|
|
|
);
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (lt == lock_write) {
|
|
|
|
for (uint i = 0; i < curr_num_DBs; i++) {
|
|
|
|
DB* db = share->key_file[i];
|
|
|
|
error = db->pre_acquire_table_lock(db, trans);
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = ENOSYS;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2008-02-05 17:00:53 +01:00
|
|
|
/*
|
|
|
|
As MySQL will execute an external lock for every new table it uses
|
|
|
|
we can use this to start the transactions.
|
|
|
|
If we are in auto_commit mode we just need to start a transaction
|
|
|
|
for the statement to be able to rollback the statement.
|
|
|
|
If not, we have to start a master transaction if there doesn't exist
|
|
|
|
one from before.
|
|
|
|
*/
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Parameters:
|
|
|
|
// [in] thd - handle to the user thread
|
|
|
|
// lock_type - the type of lock
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::external_lock(THD * thd, int lock_type) {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::external_lock %d", thd_sql_command(thd));
|
2013-04-17 06:01:40 +02:00
|
|
|
// QQQ this is here to allow experiments without transactions
|
2013-04-17 06:01:42 +02:00
|
|
|
int error = 0;
|
2013-04-17 06:01:45 +02:00
|
|
|
ulong tx_isolation = thd_tx_isolation(thd);
|
|
|
|
HA_TOKU_ISO_LEVEL toku_iso_level = tx_to_toku_iso(tx_isolation);
|
2013-04-17 06:01:42 +02:00
|
|
|
tokudb_trx_data *trx = NULL;
|
2013-04-17 06:01:46 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// reset per-stmt variables
|
|
|
|
//
|
|
|
|
num_added_rows_in_stmt = 0;
|
|
|
|
num_deleted_rows_in_stmt = 0;
|
|
|
|
num_updated_rows_in_stmt = 0;
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!trx) {
|
2013-04-17 06:01:45 +02:00
|
|
|
trx = (tokudb_trx_data *) my_malloc(sizeof(*trx), MYF(MY_ZEROFILL));
|
2013-04-17 06:01:42 +02:00
|
|
|
if (!trx) {
|
2013-04-17 06:01:42 +02:00
|
|
|
error = 1;
|
|
|
|
goto cleanup;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
trx->iso_level = hatoku_iso_not_set;
|
2013-04-17 06:01:41 +02:00
|
|
|
thd_data_set(thd, tokudb_hton->slot, trx);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
if (trx->all == NULL) {
|
|
|
|
trx->sp_level = NULL;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
if (lock_type != F_UNLCK) {
|
|
|
|
if (!trx->tokudb_lock_count++) {
|
|
|
|
DBUG_ASSERT(trx->stmt == 0);
|
2013-04-17 06:01:40 +02:00
|
|
|
transaction = NULL; // Safety
|
2013-04-17 06:01:37 +02:00
|
|
|
/* First table lock, start transaction */
|
2013-04-17 06:01:46 +02:00
|
|
|
if ((thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN | OPTION_TABLE_LOCK)) &&
|
|
|
|
!trx->all &&
|
2013-04-17 06:01:46 +02:00
|
|
|
(thd_sql_command(thd) != SQLCOM_CREATE_TABLE) &&
|
|
|
|
(thd_sql_command(thd) != SQLCOM_DROP_TABLE) &&
|
|
|
|
(thd_sql_command(thd) != SQLCOM_ALTER_TABLE)) {
|
2013-04-17 06:01:38 +02:00
|
|
|
/* QQQ We have to start a master transaction */
|
2013-04-17 06:01:37 +02:00
|
|
|
DBUG_PRINT("trans", ("starting transaction all: options: 0x%lx", (ulong) thd->options));
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// set the isolation level for the tranaction
|
|
|
|
//
|
|
|
|
trx->iso_level = toku_iso_level;
|
|
|
|
if ((error = db_env->txn_begin(db_env, NULL, &trx->all, toku_iso_to_txn_flag(toku_iso_level)))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
trx->tokudb_lock_count--; // We didn't get the lock
|
2013-04-17 06:01:42 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_TXN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("master:%p\n", trx->all);
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
trx->sp_level = trx->all;
|
|
|
|
trans_register_ha(thd, TRUE, tokudb_hton);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (thd->in_lock_tables && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) {
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// grab table locks
|
|
|
|
// For the command "Lock tables foo read, bar read"
|
|
|
|
// This statement is grabbing the locks for the table
|
|
|
|
// foo. The locks for bar will be grabbed when
|
|
|
|
// trx->tokudb_lock_count has been initialized
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
if (lock.type <= TL_READ_NO_INSERT) {
|
2013-04-17 06:01:42 +02:00
|
|
|
error = acquire_table_lock(trx->all,lock_read);
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
else {
|
2013-04-17 06:01:42 +02:00
|
|
|
error = acquire_table_lock(trx->all,lock_write);
|
|
|
|
}
|
|
|
|
// Don't create stmt trans
|
|
|
|
if (error) {trx->tokudb_lock_count--;}
|
2013-04-17 06:01:42 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
DBUG_PRINT("trans", ("starting transaction stmt"));
|
2013-04-17 06:01:42 +02:00
|
|
|
if (trx->stmt) {
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_TXN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("warning:stmt=%p\n", trx->stmt);
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
u_int32_t txn_begin_flags;
|
|
|
|
if (trx->iso_level == hatoku_iso_not_set) {
|
|
|
|
txn_begin_flags = toku_iso_to_txn_flag(toku_iso_level);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
txn_begin_flags = toku_iso_to_txn_flag(trx->iso_level);
|
|
|
|
}
|
|
|
|
if ((error = db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, txn_begin_flags))) {
|
2013-04-17 06:01:37 +02:00
|
|
|
/* We leave the possible master transaction open */
|
|
|
|
trx->tokudb_lock_count--; // We didn't get the lock
|
2013-04-17 06:01:42 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_TXN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("stmt:%p:%p\n", trx->sp_level, trx->stmt);
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
trans_register_ha(thd, FALSE, tokudb_hton);
|
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
if (thd->in_lock_tables && thd_sql_command(thd) == SQLCOM_LOCK_TABLES) {
|
2013-04-17 06:01:42 +02:00
|
|
|
assert(trx->all != NULL);
|
|
|
|
//
|
|
|
|
// For the command "Lock tables foo read, bar read"
|
|
|
|
// This statement is grabbing the locks for the table
|
|
|
|
// bar. The locks for foo will be grabbed when
|
|
|
|
// trx->tokudb_lock_count is 0 and we are initializing
|
|
|
|
// trx->all above
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
if (lock.type <= TL_READ_NO_INSERT) {
|
2013-04-17 06:01:42 +02:00
|
|
|
error = acquire_table_lock(trx->all,lock_read);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
2013-04-17 06:01:42 +02:00
|
|
|
error = acquire_table_lock(trx->all,lock_write);
|
|
|
|
}
|
|
|
|
if (error) {trx->tokudb_lock_count--; goto cleanup;}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
transaction = trx->stmt;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:37 +02:00
|
|
|
lock.type = TL_UNLOCK; // Unlocked
|
2013-04-17 06:01:43 +02:00
|
|
|
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
// hate dealing with comparison of signed vs unsigned, so doing this
|
|
|
|
if (deleted_rows > added_rows && share->rows < (deleted_rows - added_rows)) {
|
|
|
|
share->rows = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
share->rows += (added_rows - deleted_rows);
|
|
|
|
}
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
added_rows = 0;
|
|
|
|
deleted_rows = 0;
|
2013-04-17 06:01:46 +02:00
|
|
|
share->rows_from_locked_table = 0;
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!--trx->tokudb_lock_count) {
|
|
|
|
if (trx->stmt) {
|
|
|
|
/*
|
|
|
|
F_UNLCK is done without a transaction commit / rollback.
|
|
|
|
This happens if the thread didn't update any rows
|
|
|
|
We must in this case commit the work to keep the row locks
|
|
|
|
*/
|
|
|
|
DBUG_PRINT("trans", ("commiting non-updating transaction"));
|
|
|
|
error = trx->stmt->commit(trx->stmt, 0);
|
2013-04-17 06:01:39 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_TXN)
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("commit:%p:%d\n", trx->stmt, error);
|
2013-04-17 06:01:43 +02:00
|
|
|
trx->stmt = NULL;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:43 +02:00
|
|
|
transaction = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:42 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
When using LOCK TABLE's external_lock is only called when the actual
|
|
|
|
TABLE LOCK is done.
|
|
|
|
Under LOCK TABLES, each used tables will force a call to start_stmt.
|
|
|
|
*/
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::start_stmt(THD * thd, thr_lock_type lock_type) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::start_stmt");
|
2013-04-17 06:01:37 +02:00
|
|
|
int error = 0;
|
2013-04-17 06:01:46 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// reset per-stmt variables
|
|
|
|
//
|
|
|
|
num_added_rows_in_stmt = 0;
|
|
|
|
num_deleted_rows_in_stmt = 0;
|
|
|
|
num_updated_rows_in_stmt = 0;
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
tokudb_trx_data *trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
|
2013-04-17 06:01:37 +02:00
|
|
|
DBUG_ASSERT(trx);
|
|
|
|
/*
|
|
|
|
note that trx->stmt may have been already initialized as start_stmt()
|
|
|
|
is called for *each table* not for each storage engine,
|
|
|
|
and there could be many bdb tables referenced in the query
|
|
|
|
*/
|
|
|
|
if (!trx->stmt) {
|
|
|
|
DBUG_PRINT("trans", ("starting transaction stmt"));
|
2013-04-17 06:01:45 +02:00
|
|
|
error = db_env->txn_begin(db_env, trx->sp_level, &trx->stmt, toku_iso_to_txn_flag(trx->iso_level));
|
2013-04-17 06:01:37 +02:00
|
|
|
trans_register_ha(thd, FALSE, tokudb_hton);
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
if (added_rows > deleted_rows) {
|
|
|
|
share->rows_from_locked_table = added_rows - deleted_rows;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
transaction = trx->stmt;
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
The idea with handler::store_lock() is the following:
|
|
|
|
|
|
|
|
The statement decided which locks we should need for the table
|
|
|
|
for updates/deletes/inserts we get WRITE locks, for SELECT... we get
|
|
|
|
read locks.
|
|
|
|
|
|
|
|
Before adding the lock into the table lock handler (see thr_lock.c)
|
|
|
|
mysqld calls store lock with the requested locks. Store lock can now
|
|
|
|
modify a write lock to a read lock (or some other lock), ignore the
|
|
|
|
lock (if we don't want to use MySQL table locks at all) or add locks
|
|
|
|
for many tables (like we do when we are using a MERGE handler).
|
|
|
|
|
|
|
|
Tokudb DB changes all WRITE locks to TL_WRITE_ALLOW_WRITE (which
|
|
|
|
signals that we are doing WRITES, but we are still allowing other
|
|
|
|
reader's and writer's.
|
|
|
|
|
|
|
|
When releasing locks, store_lock() are also called. In this case one
|
|
|
|
usually doesn't have to do anything.
|
|
|
|
|
|
|
|
In some exceptional cases MySQL may send a request for a TL_IGNORE;
|
|
|
|
This means that we are requesting the same lock as last time and this
|
|
|
|
should also be ignored. (This may happen when someone does a flush
|
|
|
|
table when we have opened a part of the tables, in which case mysqld
|
|
|
|
closes and reopens the tables and tries to get the same locks at last
|
|
|
|
time). In the future we will probably try to remove this.
|
|
|
|
*/
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
THR_LOCK_DATA **ha_tokudb::store_lock(THD * thd, THR_LOCK_DATA ** to, enum thr_lock_type lock_type) {
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::store_lock, lock_type=%d cmd=%d", lock_type, thd_sql_command(thd));
|
2013-04-17 06:01:37 +02:00
|
|
|
if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
|
|
|
|
/* If we are not doing a LOCK TABLE, then allow multiple writers */
|
2013-04-17 06:01:44 +02:00
|
|
|
if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) &&
|
|
|
|
!thd->in_lock_tables && thd_sql_command(thd) != SQLCOM_TRUNCATE) {
|
2013-04-17 06:01:37 +02:00
|
|
|
lock_type = TL_WRITE_ALLOW_WRITE;
|
2013-04-17 06:01:42 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
lock.type = lock_type;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
*to++ = &lock;
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_RETURN(to);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
static int create_sub_table(const char *table_name, int flags) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("create_sub_table");
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
|
|
|
DB *file;
|
2013-04-17 06:01:46 +02:00
|
|
|
DBUG_PRINT("enter", ("flags: %d", flags));
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
if (!(error = db_create(&file, db_env, 0))) {
|
|
|
|
file->set_flags(file, flags);
|
2013-04-17 06:01:46 +02:00
|
|
|
error = (file->open(file, NULL, table_name, NULL, DB_BTREE, DB_THREAD | DB_CREATE, my_umask));
|
2013-04-17 06:01:37 +02:00
|
|
|
if (error) {
|
|
|
|
DBUG_PRINT("error", ("Got error: %d when opening table '%s'", error, table_name));
|
|
|
|
(void) file->remove(file, table_name, NULL, 0);
|
|
|
|
} else
|
|
|
|
(void) file->close(file, 0);
|
|
|
|
} else {
|
|
|
|
DBUG_PRINT("error", ("Got error: %d when creating table", error));
|
|
|
|
}
|
|
|
|
if (error)
|
|
|
|
my_errno = error;
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
static int mkdirpath(char *name, mode_t mode) {
|
|
|
|
char* parent = NULL;
|
2013-04-17 06:01:46 +02:00
|
|
|
int r = toku_os_mkdir(name, mode);
|
2013-04-17 06:01:46 +02:00
|
|
|
if (r == -1 && errno == ENOENT) {
|
2013-04-17 06:01:46 +02:00
|
|
|
parent = (char *)my_malloc(strlen(name)+1,MYF(MY_WME));
|
|
|
|
if (parent == NULL) {
|
|
|
|
r = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
strcpy(parent, name);
|
|
|
|
char *cp = strrchr(parent, '/');
|
|
|
|
if (cp) {
|
|
|
|
*cp = 0;
|
2013-04-17 06:01:46 +02:00
|
|
|
r = toku_os_mkdir(parent, 0755);
|
2013-04-17 06:01:46 +02:00
|
|
|
if (r == 0)
|
2013-04-17 06:01:46 +02:00
|
|
|
r = toku_os_mkdir(name, mode);
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
cleanup:
|
|
|
|
my_free(parent, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:46 +02:00
|
|
|
return r;
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
extern "C" {
|
2013-04-17 06:01:46 +02:00
|
|
|
#include <dirent.h>
|
2013-04-17 06:01:46 +02:00
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
static int rmall(const char *dname) {
|
|
|
|
int error = 0;
|
|
|
|
DIR *d = opendir(dname);
|
2013-04-17 06:01:46 +02:00
|
|
|
char* fname = NULL;
|
2013-04-17 06:01:39 +02:00
|
|
|
if (d) {
|
|
|
|
struct dirent *dirent;
|
|
|
|
while ((dirent = readdir(d)) != 0) {
|
|
|
|
if (0 == strcmp(dirent->d_name, ".") || 0 == strcmp(dirent->d_name, ".."))
|
|
|
|
continue;
|
2013-04-17 06:01:46 +02:00
|
|
|
fname = (char *)my_malloc(strlen(dname) + 1 + strlen(dirent->d_name) + 1, MYF(MY_WME));
|
2013-04-17 06:01:39 +02:00
|
|
|
sprintf(fname, "%s/%s", dname, dirent->d_name);
|
|
|
|
if (dirent->d_type == DT_DIR) {
|
|
|
|
error = rmall(fname);
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
TOKUDB_TRACE("removing:%s\n", fname);
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// if clause checks if the file is a .tokudb file
|
|
|
|
//
|
|
|
|
if (strlen(fname) >= strlen (ha_tokudb_ext) &&
|
|
|
|
strcmp(fname + (strlen(fname) - strlen(ha_tokudb_ext)), ha_tokudb_ext) == 0)
|
|
|
|
{
|
|
|
|
//
|
|
|
|
// if this fails under low memory conditions, gracefully exit and return error
|
|
|
|
// user will be notified that something went wrong, and he will
|
|
|
|
// have to deal with it
|
|
|
|
//
|
|
|
|
DB* db = NULL;
|
|
|
|
error = db_create(&db, db_env, 0);
|
|
|
|
if (error) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// it is ok to do db->remove on any .tokudb file, because any such
|
|
|
|
// file was created with db->open
|
|
|
|
//
|
|
|
|
db->remove(db, fname, NULL, 0);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
//
|
|
|
|
// in case we have some file that is not .tokudb, we just delete it
|
|
|
|
//
|
|
|
|
error = unlink(fname);
|
|
|
|
if (error != 0) {
|
|
|
|
error = errno;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
my_free(fname, MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
fname = NULL;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
closedir(d);
|
|
|
|
if (error == 0) {
|
|
|
|
error = rmdir(dname);
|
|
|
|
if (error != 0)
|
|
|
|
error = errno;
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:39 +02:00
|
|
|
error = errno;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
return error;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Creates a new table
|
|
|
|
// Parameters:
|
|
|
|
// [in] name - table name
|
|
|
|
// [in] form - info on table, columns and indexes
|
|
|
|
// [in] create_info - more info on table, CURRENTLY UNUSED
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::create(const char *name, TABLE * form, HA_CREATE_INFO * create_info) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::create");
|
2013-04-17 06:01:37 +02:00
|
|
|
char name_buff[FN_REFLEN];
|
|
|
|
int error;
|
2013-04-17 06:01:45 +02:00
|
|
|
DB *status_block = NULL;
|
|
|
|
bool dir_path_made = false;
|
|
|
|
char* dirname = NULL;
|
|
|
|
char* newname = NULL;
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
dirname = (char *)my_malloc(get_name_length(name) + NAME_CHAR_LEN,MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (dirname == NULL){ error = ENOMEM; goto cleanup;}
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(get_name_length(name) + NAME_CHAR_LEN,MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL){ error = ENOMEM; goto cleanup;}
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
uint i;
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// tracing information about what type of table we are creating
|
|
|
|
//
|
2013-04-17 06:01:40 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
for (i = 0; i < form->s->fields; i++) {
|
|
|
|
Field *field = form->s->field[i];
|
|
|
|
TOKUDB_TRACE("field:%d:%s:type=%d:flags=%x\n", i, field->field_name, field->type(), field->flags);
|
|
|
|
}
|
|
|
|
for (i = 0; i < form->s->keys; i++) {
|
|
|
|
KEY *key = &form->s->key_info[i];
|
|
|
|
TOKUDB_TRACE("key:%d:%s:%d\n", i, key->name, key->key_parts);
|
|
|
|
uint p;
|
|
|
|
for (p = 0; p < key->key_parts; p++) {
|
|
|
|
KEY_PART_INFO *key_part = &key->key_part[p];
|
|
|
|
Field *field = key_part->field;
|
|
|
|
TOKUDB_TRACE("key:%d:%d:length=%d:%s:type=%d:flags=%x\n",
|
|
|
|
i, p, key_part->length, field->field_name, field->type(), field->flags);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:38 +02:00
|
|
|
// a table is a directory of dictionaries
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(dirname, name, 0);
|
|
|
|
error = mkdirpath(dirname, 0777);
|
2013-04-17 06:01:37 +02:00
|
|
|
if (error != 0) {
|
2013-04-17 06:01:45 +02:00
|
|
|
error = errno;
|
|
|
|
goto cleanup;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
dir_path_made = true;
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newname, name, "main");
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
/* Create the main table that will hold the real rows */
|
2013-04-17 06:01:46 +02:00
|
|
|
error = create_sub_table(name_buff, 0);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("create:%s:error=%d\n", newname, error);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error) {
|
2013-04-17 06:01:45 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
|
|
|
|
primary_key = form->s->primary_key;
|
|
|
|
|
|
|
|
/* Create the keys */
|
2013-04-17 06:01:41 +02:00
|
|
|
char part[MAX_ALIAS_NAME + 10];
|
2013-04-17 06:01:37 +02:00
|
|
|
for (uint i = 0; i < form->s->keys; i++) {
|
|
|
|
if (i != primary_key) {
|
2013-04-17 06:01:45 +02:00
|
|
|
int flags = (form->s->key_info[i].flags & HA_CLUSTERING) ? 0 : DB_DUP + DB_DUPSORT;
|
2013-04-17 06:01:41 +02:00
|
|
|
sprintf(part, "key-%s", form->s->key_info[i].name);
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newname, name, part);
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
2013-04-17 06:01:46 +02:00
|
|
|
error = create_sub_table(name_buff, flags);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("create:%s:flags=%ld:error=%d\n", newname, form->key_info[i].flags, error);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error) {
|
2013-04-17 06:01:45 +02:00
|
|
|
goto cleanup;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
}
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:43 +02:00
|
|
|
/* Create status.tokudb and save relevant metadata */
|
2013-04-17 06:01:37 +02:00
|
|
|
if (!(error = (db_create(&status_block, db_env, 0)))) {
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newname, name, "status");
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
if (!(error = (status_block->open(status_block, NULL, name_buff, NULL, DB_BTREE, DB_CREATE, 0)))) {
|
2013-04-17 06:01:43 +02:00
|
|
|
uint version = HA_TOKU_VERSION;
|
|
|
|
uint capabilities = HA_TOKU_CAP;
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
error = write_metadata(status_block, hatoku_version,&version,sizeof(version));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (error) { goto cleanup; }
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
error = write_metadata(status_block, hatoku_capabilities,&capabilities,sizeof(capabilities));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (error) { goto cleanup; }
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
error = write_auto_inc_create(status_block, create_info->auto_increment_value);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (error) { goto cleanup; }
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
cleanup:
|
|
|
|
if (status_block != NULL) {
|
|
|
|
status_block->close(status_block, 0);
|
|
|
|
}
|
|
|
|
if (error && dir_path_made) {
|
2013-04-17 06:01:39 +02:00
|
|
|
rmall(dirname);
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(dirname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// Drops table
|
|
|
|
// Parameters:
|
|
|
|
// [in] name - name of table to be deleted
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::delete_table(const char *name) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::delete_table");
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
2013-04-17 06:01:38 +02:00
|
|
|
// remove all of the dictionaries in the table directory
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc((tokudb_data_dir ? strlen(tokudb_data_dir) : 0) + strlen(name) + NAME_CHAR_LEN, MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newname, name, 0);
|
2013-04-17 06:01:37 +02:00
|
|
|
error = rmall(newname);
|
|
|
|
my_errno = error;
|
2013-04-17 06:01:45 +02:00
|
|
|
cleanup:
|
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
|
|
|
// renames table from "from" to "to"
|
|
|
|
// Parameters:
|
|
|
|
// [in] name - old name of table
|
|
|
|
// [in] to - new name of table
|
|
|
|
// Returns:
|
|
|
|
// 0 on success
|
|
|
|
// error otherwise
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::rename_table(const char *from, const char *to) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_ENTER("%s %s %s", __FUNCTION__, from, to);
|
2013-04-17 06:01:37 +02:00
|
|
|
int error;
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newfrom = NULL;
|
|
|
|
char* newto = NULL;
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
int n = get_name_length(from) + NAME_CHAR_LEN;
|
2013-04-17 06:01:45 +02:00
|
|
|
newfrom = (char *)my_malloc(n,MYF(MY_WME));
|
|
|
|
if (newfrom == NULL){
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newfrom, from, 0);
|
2013-04-17 06:01:45 +02:00
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
n = get_name_length(to) + NAME_CHAR_LEN;
|
2013-04-17 06:01:45 +02:00
|
|
|
newto = (char *)my_malloc(n,MYF(MY_WME));
|
|
|
|
if (newto == NULL){
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
make_name(newto, to, 0);
|
2013-04-17 06:01:45 +02:00
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
error = rename(newfrom, newto);
|
2013-04-17 06:01:45 +02:00
|
|
|
if (error != 0) {
|
2013-04-17 06:01:37 +02:00
|
|
|
error = my_errno = errno;
|
2013-04-17 06:01:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
my_free(newfrom, MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(newto, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
2013-04-17 06:01:40 +02:00
|
|
|
Returns estimate on number of seeks it will take to read through the table
|
2008-02-05 17:00:53 +01:00
|
|
|
This is to be comparable to the number returned by records_in_range so
|
|
|
|
that we can decide if we should scan the table or use keys.
|
|
|
|
*/
|
2013-04-17 06:01:38 +02:00
|
|
|
/// QQQ why divide by 3
|
2013-04-17 06:01:37 +02:00
|
|
|
double ha_tokudb::scan_time() {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::scan_time");
|
2013-04-17 06:01:45 +02:00
|
|
|
double ret_val = (double)stats.records / 3;
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_RETURN(ret_val);
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:42 +02:00
|
|
|
//
|
|
|
|
// Calculate the time it takes to read a set of ranges through an index
|
|
|
|
// This enables us to optimize reads for clustered indexes.
|
|
|
|
// Implementation pulled from InnoDB
|
|
|
|
// Parameters:
|
|
|
|
// index - index to use
|
|
|
|
// ranges - number of ranges
|
|
|
|
// rows - estimated number of rows in the range
|
|
|
|
// Returns:
|
|
|
|
// estimated time measured in disk seeks
|
|
|
|
//
|
|
|
|
double ha_tokudb::read_time(
|
|
|
|
uint index,
|
|
|
|
uint ranges,
|
|
|
|
ha_rows rows
|
|
|
|
)
|
|
|
|
{
|
|
|
|
double total_scan;
|
|
|
|
double ret_val;
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// in case for hidden primary key, this is called
|
|
|
|
//
|
|
|
|
if (index >= table_share->keys) {
|
|
|
|
ret_val = handler::read_time(index, ranges, rows);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// if it is not the primary key, and it is not a clustering key, then return handler::read_time
|
|
|
|
//
|
|
|
|
if (index != primary_key && !(table->key_info[index].flags & HA_CLUSTERING)) {
|
2013-04-17 06:01:42 +02:00
|
|
|
ret_val = handler::read_time(index, ranges, rows);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// for primary key and for clustered keys, return a fraction of scan_time()
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
total_scan = scan_time();
|
|
|
|
|
|
|
|
if (stats.records < rows) {
|
|
|
|
ret_val = total_scan;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// one disk seek per range plus the proportional scan time of the rows
|
|
|
|
//
|
|
|
|
ret_val = (ranges + (double) rows / (double) stats.records * total_scan);
|
|
|
|
cleanup:
|
|
|
|
return ret_val;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// Estimates the number of index records in a range. In case of errors, return
|
|
|
|
// HA_TOKUDB_RANGE_COUNT instead of HA_POS_ERROR. This was behavior
|
|
|
|
// when we got the handlerton from MySQL.
|
|
|
|
// Parameters:
|
|
|
|
// keynr -index to use
|
|
|
|
// [in] start_key - low end of the range
|
|
|
|
// [in] end_key - high end of the range
|
|
|
|
// Returns:
|
|
|
|
// 0 - There are no matching keys in the given range
|
|
|
|
// number > 0 - There are approximately number matching rows in the range
|
|
|
|
// HA_POS_ERROR - Something is wrong with the index tree
|
2013-04-17 06:01:40 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
ha_rows ha_tokudb::records_in_range(uint keynr, key_range* start_key, key_range* end_key) {
|
2013-04-17 06:01:39 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::records_in_range");
|
2013-04-17 06:01:45 +02:00
|
|
|
DBT key, after_key;
|
2013-04-17 06:01:41 +02:00
|
|
|
ha_rows ret_val = HA_TOKUDB_RANGE_COUNT;
|
2013-04-17 06:01:41 +02:00
|
|
|
DB *kfile = share->key_file[keynr];
|
2013-04-17 06:01:41 +02:00
|
|
|
u_int64_t less, equal, greater;
|
|
|
|
u_int64_t start_rows, end_rows, rows;
|
|
|
|
int is_exact;
|
2013-04-17 06:01:41 +02:00
|
|
|
int error;
|
2013-04-17 06:01:45 +02:00
|
|
|
struct heavi_info heavi_info;
|
|
|
|
DBC* tmp_cursor = NULL;
|
|
|
|
u_int64_t after_key_less, after_key_equal, after_key_greater;
|
|
|
|
heavi_info.db = kfile;
|
|
|
|
heavi_info.key = &key;
|
|
|
|
after_key.data = key_buff2;
|
|
|
|
|
|
|
|
error = kfile->cursor(kfile, transaction, &tmp_cursor, 0);
|
|
|
|
if (error) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// get start_rows and end_rows values so that we can estimate range
|
2013-04-17 06:01:45 +02:00
|
|
|
// when calling key_range64, the only value we can trust is the value for less
|
|
|
|
// The reason is that the key being passed in may be a prefix of keys in the DB
|
|
|
|
// As a result, equal may be 0 and greater may actually be equal+greater
|
|
|
|
// So, we call key_range64 on the key, and the key that is after it.
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
if (start_key) {
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&key, keynr, key_buff, start_key->key, start_key->length, COL_NEG_INF);
|
2013-04-17 06:01:41 +02:00
|
|
|
error = kfile->key_range64(
|
2013-04-17 06:01:41 +02:00
|
|
|
kfile,
|
|
|
|
transaction,
|
2013-04-17 06:01:45 +02:00
|
|
|
&key,
|
2013-04-17 06:01:41 +02:00
|
|
|
&less,
|
|
|
|
&equal,
|
|
|
|
&greater,
|
|
|
|
&is_exact
|
2013-04-17 06:01:41 +02:00
|
|
|
);
|
|
|
|
if (error) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (start_key->flag == HA_READ_KEY_EXACT) {
|
2013-04-17 06:01:41 +02:00
|
|
|
start_rows= less;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
error = tmp_cursor->c_getf_heavi(
|
|
|
|
tmp_cursor,
|
|
|
|
0,
|
|
|
|
smart_dbt_callback_ror_heavi,
|
|
|
|
&after_key,
|
|
|
|
after_key_heavi,
|
|
|
|
&heavi_info,
|
|
|
|
1
|
|
|
|
);
|
|
|
|
if (error && error != DB_NOTFOUND) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else if (error == DB_NOTFOUND) {
|
|
|
|
start_rows = stats.records;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = kfile->key_range64(
|
|
|
|
kfile,
|
|
|
|
transaction,
|
|
|
|
&after_key,
|
|
|
|
&after_key_less,
|
|
|
|
&after_key_equal,
|
|
|
|
&after_key_greater,
|
|
|
|
&is_exact
|
|
|
|
);
|
|
|
|
if (error) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
start_rows = after_key_less;
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
else {
|
2013-04-17 06:01:41 +02:00
|
|
|
start_rows= 0;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (end_key) {
|
2013-04-17 06:01:45 +02:00
|
|
|
pack_key(&key, keynr, key_buff, end_key->key, end_key->length, COL_NEG_INF);
|
2013-04-17 06:01:41 +02:00
|
|
|
error = kfile->key_range64(
|
2013-04-17 06:01:41 +02:00
|
|
|
kfile,
|
|
|
|
transaction,
|
2013-04-17 06:01:45 +02:00
|
|
|
&key,
|
2013-04-17 06:01:41 +02:00
|
|
|
&less,
|
|
|
|
&equal,
|
|
|
|
&greater,
|
|
|
|
&is_exact
|
2013-04-17 06:01:41 +02:00
|
|
|
);
|
|
|
|
if (error) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
if (end_key->flag == HA_READ_BEFORE_KEY) {
|
2013-04-17 06:01:41 +02:00
|
|
|
end_rows= less;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:45 +02:00
|
|
|
error = tmp_cursor->c_getf_heavi(
|
|
|
|
tmp_cursor,
|
|
|
|
0,
|
|
|
|
smart_dbt_callback_ror_heavi,
|
|
|
|
&after_key,
|
|
|
|
after_key_heavi,
|
|
|
|
&heavi_info,
|
|
|
|
1
|
|
|
|
);
|
|
|
|
if (error && error != DB_NOTFOUND) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
else if (error == DB_NOTFOUND) {
|
|
|
|
end_rows = stats.records;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = kfile->key_range64(
|
|
|
|
kfile,
|
|
|
|
transaction,
|
|
|
|
&after_key,
|
|
|
|
&after_key_less,
|
|
|
|
&after_key_equal,
|
|
|
|
&after_key_greater,
|
|
|
|
&is_exact
|
|
|
|
);
|
|
|
|
if (error) {
|
|
|
|
ret_val = HA_TOKUDB_RANGE_COUNT;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
end_rows= after_key_less;
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2013-04-17 06:01:41 +02:00
|
|
|
end_rows = stats.records;
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
rows = (end_rows > start_rows) ? end_rows - start_rows : 1;
|
2013-04-17 06:01:41 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
// MySQL thinks a return value of 0 means there are exactly 0 rows
|
|
|
|
// Therefore, always return non-zero so this assumption is not made
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
2013-04-17 06:01:41 +02:00
|
|
|
ret_val = (ha_rows) (rows <= 1 ? 1 : rows);
|
2013-04-17 06:01:41 +02:00
|
|
|
cleanup:
|
2013-04-17 06:01:45 +02:00
|
|
|
if (tmp_cursor) {
|
|
|
|
tmp_cursor->c_close(tmp_cursor);
|
|
|
|
tmp_cursor = NULL;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
DBUG_RETURN(ret_val);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// initializes the auto increment data needed
|
|
|
|
//
|
|
|
|
void ha_tokudb::init_auto_increment() {
|
|
|
|
DBT key;
|
|
|
|
DBT value;
|
|
|
|
int error;
|
|
|
|
HA_METADATA_KEY key_val = hatoku_max_ai;
|
|
|
|
bzero(&key, sizeof(key));
|
|
|
|
bzero(&value, sizeof(value));
|
|
|
|
key.data = &key_val;
|
|
|
|
key.size = sizeof(key_val);
|
|
|
|
value.flags = DB_DBT_MALLOC;
|
|
|
|
DB_TXN* txn = NULL;
|
|
|
|
|
|
|
|
error = db_env->txn_begin(db_env, 0, &txn, 0);
|
|
|
|
if (error) {
|
|
|
|
share->last_auto_increment = 0;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
//
|
|
|
|
// First retrieve hatoku_max_ai, which is max value used by auto increment
|
|
|
|
// column so far, the max value could have been auto generated (e.g. insert (NULL))
|
|
|
|
// or it could have been manually inserted by user (e.g. insert (345))
|
|
|
|
//
|
|
|
|
error = share->status_block->get(
|
|
|
|
share->status_block,
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&value,
|
|
|
|
0
|
|
|
|
);
|
|
|
|
|
|
|
|
if (error == 0 && value.size == sizeof(share->last_auto_increment)) {
|
|
|
|
share->last_auto_increment = *(uint *)value.data;
|
2013-04-17 06:01:44 +02:00
|
|
|
free(value.data);
|
|
|
|
value.data = NULL;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
share->last_auto_increment = 0;
|
|
|
|
}
|
|
|
|
//
|
|
|
|
// Now retrieve the initial auto increment value, as specified by create table
|
|
|
|
// so if a user does "create table t1 (a int auto_increment, primary key (a)) auto_increment=100",
|
|
|
|
// then the value 100 should be stored here
|
|
|
|
//
|
|
|
|
key_val = hatoku_ai_create_value;
|
|
|
|
error = share->status_block->get(
|
|
|
|
share->status_block,
|
|
|
|
txn,
|
|
|
|
&key,
|
|
|
|
&value,
|
|
|
|
0
|
|
|
|
);
|
|
|
|
|
|
|
|
if (error == 0 && value.size == sizeof(share->auto_inc_create_value)) {
|
|
|
|
share->auto_inc_create_value = *(uint *)value.data;
|
2013-04-17 06:01:44 +02:00
|
|
|
free(value.data);
|
|
|
|
value.data = NULL;
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
share->auto_inc_create_value = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
txn->commit(txn,DB_TXN_NOSYNC);
|
|
|
|
}
|
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_AUTO_INCREMENT) {
|
|
|
|
TOKUDB_TRACE("init auto increment:%lld\n", share->last_auto_increment);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
void ha_tokudb::get_auto_increment(ulonglong offset, ulonglong increment, ulonglong nb_desired_values, ulonglong * first_value, ulonglong * nb_reserved_values) {
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::get_auto_increment");
|
2013-04-17 06:01:39 +02:00
|
|
|
ulonglong nr;
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
pthread_mutex_lock(&share->mutex);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
if (share->auto_inc_create_value > share->last_auto_increment) {
|
|
|
|
nr = share->auto_inc_create_value;
|
|
|
|
share->last_auto_increment = share->auto_inc_create_value;
|
2013-04-17 06:01:39 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
else {
|
|
|
|
nr = share->last_auto_increment + increment;
|
|
|
|
}
|
|
|
|
update_max_auto_inc(share->status_block, nr + (nb_desired_values - 1)*increment);
|
|
|
|
share->last_auto_increment = nr + (nb_desired_values - 1)*increment;
|
2013-04-17 06:01:39 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_AUTO_INCREMENT) {
|
2013-04-17 06:01:40 +02:00
|
|
|
TOKUDB_TRACE("get_auto_increment(%lld,%lld,%lld):got:%lld:%lld\n",
|
|
|
|
offset, increment, nb_desired_values, nr, nb_desired_values);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:39 +02:00
|
|
|
*first_value = nr;
|
2013-04-17 06:01:39 +02:00
|
|
|
*nb_reserved_values = nb_desired_values;
|
2013-04-17 06:01:44 +02:00
|
|
|
pthread_mutex_unlock(&share->mutex);
|
2013-04-17 06:01:41 +02:00
|
|
|
DBUG_VOID_RETURN;
|
2013-04-17 06:01:37 +02:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
bool ha_tokudb::is_auto_inc_singleton(){
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// Adds indexes to the table. Takes the array of KEY passed in key_info, and creates
|
|
|
|
// DB's that will go at the end of share->key_file. THE IMPLICIT ASSUMPTION HERE is
|
|
|
|
// that the table will be modified and that these added keys will be appended to the end
|
|
|
|
// of the array table->key_info
|
|
|
|
// Parameters:
|
|
|
|
// [in] table_arg - table that is being modified, seems to be identical to this->table
|
|
|
|
// [in] key_info - array of KEY's to be added
|
|
|
|
// num_of_keys - number of keys to be added, number of elements in key_info
|
|
|
|
// Returns:
|
|
|
|
// 0 on success, error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::add_index");
|
|
|
|
char name_buff[FN_REFLEN];
|
|
|
|
int error;
|
|
|
|
uint curr_index = 0;
|
|
|
|
DBC* tmp_cursor = NULL;
|
|
|
|
int cursor_ret_val = 0;
|
|
|
|
DBT current_primary_key;
|
|
|
|
DB_TXN* txn = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
|
|
|
uchar* tmp_key_buff = NULL;
|
|
|
|
uchar* tmp_prim_key_buff = NULL;
|
|
|
|
uchar* tmp_record = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
THD* thd = ha_thd();
|
2013-04-17 06:01:45 +02:00
|
|
|
uchar* tmp_record2 = NULL;
|
2013-04-17 06:01:45 +02:00
|
|
|
//
|
|
|
|
// these variables are for error handling
|
|
|
|
//
|
|
|
|
uint num_files_created = 0;
|
|
|
|
uint num_DB_opened = 0;
|
|
|
|
//
|
|
|
|
// number of DB files we have open currently, before add_index is executed
|
|
|
|
//
|
|
|
|
uint curr_num_DBs = table_arg->s->keys + test(hidden_primary_key);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// status message to be shown in "show process list"
|
|
|
|
//
|
|
|
|
char status_msg[MAX_ALIAS_NAME + 200]; //buffer of 200 should be a good upper bound.
|
|
|
|
ulonglong num_processed = 0; //variable that stores number of elements inserted thus far
|
|
|
|
thd_proc_info(thd, "Adding indexes");
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(share->table_name_length + NAME_CHAR_LEN, MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
tmp_key_buff = (uchar *)my_malloc(2*table_arg->s->rec_buff_length, MYF(MY_WME));
|
|
|
|
tmp_prim_key_buff = (uchar *)my_malloc(2*table_arg->s->rec_buff_length, MYF(MY_WME));
|
|
|
|
tmp_record = (uchar *)my_malloc(table_arg->s->rec_buff_length,MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
tmp_record2 = (uchar *)my_malloc(2*table_arg->s->rec_buff_length,MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL ||
|
|
|
|
tmp_key_buff == NULL ||
|
|
|
|
tmp_prim_key_buff == NULL ||
|
|
|
|
tmp_record == NULL ||
|
|
|
|
tmp_record2 == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// in unpack_row, MySQL passes a buffer that is this long,
|
|
|
|
// so this length should be good enough for us as well
|
|
|
|
//
|
|
|
|
bzero((void *) ¤t_primary_key, sizeof(current_primary_key));
|
2013-04-17 06:01:44 +02:00
|
|
|
current_primary_key.data = tmp_prim_key_buff;
|
2013-04-17 06:01:41 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// The files for secondary tables are derived from the name of keys
|
|
|
|
// If we try to add a key with the same name as an already existing key,
|
|
|
|
// We can crash. So here we check if any of the keys added has the same
|
|
|
|
// name of an existing key, and if so, we fail gracefully
|
|
|
|
//
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
|
|
|
for (uint j = 0; j < table_arg->s->keys; j++) {
|
|
|
|
if (strcmp(key_info[i].name, table_arg->s->key_info[j].name) == 0) {
|
|
|
|
error = HA_ERR_WRONG_COMMAND;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// first create all the DB's files
|
|
|
|
//
|
|
|
|
char part[MAX_ALIAS_NAME + 10];
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
2013-04-17 06:01:45 +02:00
|
|
|
int flags = (key_info[i].flags & HA_CLUSTERING) ? 0 : DB_DUP + DB_DUPSORT;
|
2013-04-17 06:01:41 +02:00
|
|
|
sprintf(part, "key-%s", key_info[i].name);
|
|
|
|
make_name(newname, share->table_name, part);
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
2013-04-17 06:01:46 +02:00
|
|
|
error = create_sub_table(name_buff, flags);
|
2013-04-17 06:01:41 +02:00
|
|
|
if (tokudb_debug & TOKUDB_DEBUG_OPEN) {
|
|
|
|
TOKUDB_TRACE("create:%s:flags=%ld:error=%d\n", newname, key_info[i].flags, error);
|
|
|
|
}
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
num_files_created++;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// open all the DB files and set the appropriate variables in share
|
|
|
|
// they go to the end of share->key_file
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
curr_index = curr_num_DBs;
|
2013-04-17 06:01:41 +02:00
|
|
|
for (uint i = 0; i < num_of_keys; i++, curr_index++) {
|
|
|
|
error = open_secondary_table(
|
|
|
|
&share->key_file[curr_index],
|
|
|
|
&key_info[i],
|
|
|
|
share->table_name,
|
|
|
|
0,
|
|
|
|
&share->key_type[curr_index]
|
|
|
|
);
|
|
|
|
if (error) { goto cleanup; }
|
|
|
|
num_DB_opened++;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// scan primary table, create each secondary key, add to each DB
|
|
|
|
//
|
|
|
|
|
|
|
|
error = db_env->txn_begin(db_env, 0, &txn, 0);
|
|
|
|
assert(error == 0);
|
2013-04-17 06:01:42 +02:00
|
|
|
|
|
|
|
//
|
|
|
|
// grab some locks to make this go faster
|
|
|
|
// first a global read lock on the main DB, because
|
|
|
|
// we intend to scan the entire thing
|
|
|
|
//
|
|
|
|
error = share->file->pre_acquire_read_lock(
|
|
|
|
share->file,
|
|
|
|
txn,
|
|
|
|
share->file->dbt_neg_infty(),
|
|
|
|
NULL,
|
|
|
|
share->file->dbt_pos_infty(),
|
|
|
|
NULL
|
|
|
|
);
|
|
|
|
if (error) { txn->commit(txn, 0); goto cleanup; }
|
|
|
|
|
|
|
|
//
|
|
|
|
// now grab a table write lock for secondary tables we
|
|
|
|
// are creating
|
|
|
|
//
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
2013-04-17 06:01:42 +02:00
|
|
|
uint curr_index = i + curr_num_DBs;
|
2013-04-17 06:01:42 +02:00
|
|
|
error = share->key_file[curr_index]->pre_acquire_table_lock(
|
|
|
|
share->key_file[curr_index],
|
|
|
|
txn
|
|
|
|
);
|
|
|
|
if (error) { txn->commit(txn, 0); goto cleanup; }
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
if ((error = share->file->cursor(share->file, txn, &tmp_cursor, 0))) {
|
|
|
|
tmp_cursor = NULL; // Safety
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// for each element in the primary table, insert the proper key value pair in each secondary table
|
|
|
|
// that is created
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
struct smart_dbt_ai_info info;
|
|
|
|
info.ha = this;
|
|
|
|
info.prim_key = ¤t_primary_key;
|
|
|
|
info.buf = tmp_record;
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
cursor_ret_val = tmp_cursor->c_getf_next(tmp_cursor, DB_PRELOCKED, smart_dbt_ai_callback, &info);
|
2013-04-17 06:01:41 +02:00
|
|
|
while (cursor_ret_val != DB_NOTFOUND) {
|
|
|
|
if (cursor_ret_val) {
|
|
|
|
error = cursor_ret_val;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
2013-04-17 06:01:45 +02:00
|
|
|
bool cluster_row_created = false;
|
|
|
|
DBT secondary_key, row;
|
2013-04-17 06:01:44 +02:00
|
|
|
bool has_null = false;
|
|
|
|
create_dbt_key_from_key(&secondary_key,&key_info[i], tmp_key_buff, tmp_record, &has_null);
|
2013-04-17 06:01:42 +02:00
|
|
|
uint curr_index = i + curr_num_DBs;
|
2013-04-17 06:01:41 +02:00
|
|
|
u_int32_t put_flags = share->key_type[curr_index];
|
2013-04-17 06:01:45 +02:00
|
|
|
if (put_flags == DB_NOOVERWRITE && (has_null || thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS))) {
|
|
|
|
put_flags = DB_YESOVERWRITE;
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
|
|
|
|
if (key_info[i].flags & HA_CLUSTERING) {
|
|
|
|
if (!cluster_row_created) {
|
|
|
|
if ((error = pack_row(&row, (const uchar *) tmp_record, false))){
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
cluster_row_created = true;
|
|
|
|
}
|
|
|
|
error = share->key_file[curr_index]->put(share->key_file[curr_index], txn, &secondary_key, &row, put_flags);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
error = share->key_file[curr_index]->put(share->key_file[curr_index], txn, &secondary_key, ¤t_primary_key, put_flags);
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
if (error) {
|
|
|
|
//
|
|
|
|
// in the case of any error anywhere, we can just nuke all the files created, so we dont need
|
|
|
|
// to be tricky and try to roll back changes. That is why we commit the transaction,
|
|
|
|
// which should be fast. The DB is going to go away anyway, so no pt in trying to keep
|
|
|
|
// it in a good state.
|
|
|
|
//
|
|
|
|
txn->commit(txn, 0);
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// found a duplicate in a no_dup DB
|
|
|
|
//
|
|
|
|
if ( (error == DB_KEYEXIST) && (key_info[i].flags & HA_NOSAME)) {
|
|
|
|
error = HA_ERR_FOUND_DUPP_KEY;
|
|
|
|
last_dup_key = i;
|
|
|
|
memcpy(table_arg->record[0], tmp_record, table_arg->s->rec_buff_length);
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
num_processed++;
|
|
|
|
|
|
|
|
if ((num_processed % 1000) == 0) {
|
|
|
|
sprintf(status_msg, "Adding indexes: Processed %llu of about %llu rows.", num_processed, share->rows);
|
|
|
|
thd_proc_info(thd, status_msg);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
cursor_ret_val = tmp_cursor->c_getf_next(tmp_cursor, DB_PRELOCKED, smart_dbt_ai_callback, &info);
|
2013-04-17 06:01:41 +02:00
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
tmp_cursor->c_close(tmp_cursor);
|
|
|
|
tmp_cursor = NULL;
|
|
|
|
|
2013-04-17 06:01:46 +02:00
|
|
|
//
|
|
|
|
// We have an accurate row count, might as well update share->rows
|
|
|
|
//
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
share->rows = num_processed;
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// Now flatten the new DB's created
|
|
|
|
//
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
|
|
|
uint curr_index = i + curr_num_DBs;
|
|
|
|
if ((error = share->key_file[curr_index]->cursor(share->key_file[curr_index], txn, &tmp_cursor, 0))) {
|
|
|
|
tmp_cursor = NULL; // Safety
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
error = 0;
|
2013-04-17 06:01:46 +02:00
|
|
|
num_processed = 0;
|
2013-04-17 06:01:44 +02:00
|
|
|
while (error != DB_NOTFOUND) {
|
|
|
|
error = tmp_cursor->c_getf_next(tmp_cursor, DB_PRELOCKED, smart_dbt_opt_callback, NULL);
|
|
|
|
if (error && error != DB_NOTFOUND) {
|
|
|
|
tmp_cursor->c_close(tmp_cursor);
|
|
|
|
txn->commit(txn, 0);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:46 +02:00
|
|
|
num_processed++;
|
|
|
|
if ((num_processed % 1000) == 0) {
|
|
|
|
sprintf(status_msg, "Adding indexes: Applied %llu of %llu rows in key-%s.", num_processed, share->rows, key_info[i].name);
|
|
|
|
thd_proc_info(thd, status_msg);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
tmp_cursor->c_close(tmp_cursor);
|
|
|
|
tmp_cursor = NULL;
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:41 +02:00
|
|
|
error = txn->commit(txn, 0);
|
|
|
|
assert(error == 0);
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
|
|
|
if (error) {
|
|
|
|
//
|
|
|
|
// We need to delete all the files that may have been created
|
|
|
|
// The DB's must be closed and removed
|
|
|
|
//
|
2013-04-17 06:01:42 +02:00
|
|
|
for (uint i = curr_num_DBs; i < curr_num_DBs + num_DB_opened; i++) {
|
2013-04-17 06:01:41 +02:00
|
|
|
share->key_file[i]->close(share->key_file[i], 0);
|
|
|
|
share->key_file[i] = NULL;
|
|
|
|
}
|
|
|
|
for (uint i = 0; i < num_files_created; i++) {
|
|
|
|
DB* tmp;
|
|
|
|
sprintf(part, "key-%s", key_info[i].name);
|
|
|
|
make_name(newname, share->table_name, part);
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
|
|
|
if (!(db_create(&tmp, db_env, 0))) {
|
|
|
|
tmp->remove(tmp, name_buff, NULL, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname,MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(tmp_key_buff,MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(tmp_prim_key_buff,MYF(MY_ALLOW_ZERO_PTR));
|
|
|
|
my_free(tmp_record,MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(tmp_record2,MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// Prepares to drop indexes to the table. For each value, i, in the array key_num,
|
|
|
|
// table->key_info[i] is a key that is to be dropped.
|
|
|
|
// ***********NOTE*******************
|
|
|
|
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
|
|
|
|
// and not actually do the removal, we are doing it here and not in final_drop_index
|
|
|
|
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
|
|
|
|
// Changes for "future-proofing" this so that it works when we have the equivalent flags
|
|
|
|
// that are not NO_WRITES are not worth it at the moments
|
|
|
|
// Parameters:
|
|
|
|
// [in] table_arg - table that is being modified, seems to be identical to this->table
|
|
|
|
// [in] key_num - array of indexes that specify which keys of the array table->key_info
|
|
|
|
// are to be dropped
|
|
|
|
// num_of_keys - size of array, key_num
|
|
|
|
// Returns:
|
|
|
|
// 0 on success, error otherwise
|
|
|
|
//
|
|
|
|
int ha_tokudb::prepare_drop_index(TABLE *table_arg, uint *key_num, uint num_of_keys) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::prepare_drop_index");
|
|
|
|
int error;
|
|
|
|
char name_buff[FN_REFLEN];
|
2013-04-17 06:01:45 +02:00
|
|
|
char* newname = NULL;
|
2013-04-17 06:01:41 +02:00
|
|
|
char part[MAX_ALIAS_NAME + 10];
|
|
|
|
DB** dbs_to_remove = NULL;
|
|
|
|
|
2013-04-17 06:01:45 +02:00
|
|
|
newname = (char *)my_malloc(share->table_name_length + NAME_CHAR_LEN, MYF(MY_WME));
|
2013-04-17 06:01:45 +02:00
|
|
|
if (newname == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
2013-04-17 06:01:41 +02:00
|
|
|
//
|
|
|
|
// we allocate an array of DB's here to get ready for removal
|
|
|
|
// We do this so that all potential memory allocation errors that may occur
|
|
|
|
// will do so BEFORE we go about dropping any indexes. This way, we
|
|
|
|
// can fail gracefully without losing integrity of data in such cases. If on
|
|
|
|
// on the other hand, we started removing DB's, and in the middle,
|
|
|
|
// one failed, it is not immedietely obvious how one would rollback
|
|
|
|
//
|
|
|
|
dbs_to_remove = (DB **)my_malloc(sizeof(*dbs_to_remove)*num_of_keys, MYF(MY_ZEROFILL));
|
|
|
|
if (dbs_to_remove == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
|
|
|
error = db_create(&dbs_to_remove[i], db_env, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (uint i = 0; i < num_of_keys; i++) {
|
|
|
|
uint curr_index = key_num[i];
|
|
|
|
share->key_file[curr_index]->close(share->key_file[curr_index],0);
|
|
|
|
share->key_file[curr_index] = NULL;
|
|
|
|
|
|
|
|
sprintf(part, "key-%s", table_arg->key_info[curr_index].name);
|
|
|
|
make_name(newname, share->table_name, part);
|
|
|
|
fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME);
|
|
|
|
|
|
|
|
dbs_to_remove[i]->remove(dbs_to_remove[i], name_buff, NULL, 0);
|
|
|
|
}
|
|
|
|
cleanup:
|
|
|
|
my_free(dbs_to_remove, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:45 +02:00
|
|
|
my_free(newname, MYF(MY_ALLOW_ZERO_PTR));
|
2013-04-17 06:01:41 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// ***********NOTE*******************
|
|
|
|
// Although prepare_drop_index is supposed to just get the DB's ready for removal,
|
|
|
|
// and not actually do the removal, we are doing it here and not in final_drop_index
|
|
|
|
// For the flags we expose in alter_table_flags, namely xxx_NO_WRITES, this is allowed
|
|
|
|
// Changes for "future-proofing" this so that it works when we have the equivalent flags
|
|
|
|
// that are not NO_WRITES are not worth it at the moments, therefore, we can make
|
|
|
|
// this function just return
|
|
|
|
int ha_tokudb::final_drop_index(TABLE *table_arg) {
|
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::final_drop_index");
|
|
|
|
TOKUDB_DBUG_RETURN(0);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:37 +02:00
|
|
|
void ha_tokudb::print_error(int error, myf errflag) {
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error == DB_LOCK_DEADLOCK)
|
2013-04-17 06:01:37 +02:00
|
|
|
error = HA_ERR_LOCK_DEADLOCK;
|
2013-04-17 06:01:39 +02:00
|
|
|
if (error == DB_LOCK_NOTGRANTED)
|
2013-04-17 06:01:40 +02:00
|
|
|
error = HA_ERR_LOCK_WAIT_TIMEOUT;
|
2013-04-17 06:01:37 +02:00
|
|
|
handler::print_error(error, errflag);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:38 +02:00
|
|
|
#if 0 // QQQ use default
|
2013-04-17 06:01:43 +02:00
|
|
|
//
|
|
|
|
// This function will probably need to be redone from scratch
|
|
|
|
// if we ever choose to implement it
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::analyze(THD * thd, HA_CHECK_OPT * check_opt) {
|
|
|
|
uint i;
|
|
|
|
DB_BTREE_STAT *stat = 0;
|
|
|
|
DB_TXN_STAT *txn_stat_ptr = 0;
|
|
|
|
tokudb_trx_data *trx = (tokudb_trx_data *) thd->ha_data[tokudb_hton->slot];
|
|
|
|
DBUG_ASSERT(trx);
|
|
|
|
|
|
|
|
for (i = 0; i < table_share->keys; i++) {
|
|
|
|
if (stat) {
|
|
|
|
free(stat);
|
|
|
|
stat = 0;
|
|
|
|
}
|
|
|
|
if ((key_file[i]->stat) (key_file[i], trx->all, (void *) &stat, 0))
|
|
|
|
goto err;
|
|
|
|
share->rec_per_key[i] = (stat->bt_ndata / (stat->bt_nkeys ? stat->bt_nkeys : 1));
|
|
|
|
}
|
|
|
|
/* A hidden primary key is not in key_file[] */
|
|
|
|
if (hidden_primary_key) {
|
|
|
|
if (stat) {
|
|
|
|
free(stat);
|
|
|
|
stat = 0;
|
|
|
|
}
|
|
|
|
if ((file->stat) (file, trx->all, (void *) &stat, 0))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
pthread_mutex_lock(&share->mutex);
|
|
|
|
share->status |= STATUS_TOKUDB_ANALYZE; // Save status on close
|
|
|
|
share->version++; // Update stat in table
|
|
|
|
pthread_mutex_unlock(&share->mutex);
|
|
|
|
update_status(share, table); // Write status to file
|
2008-02-05 17:00:53 +01:00
|
|
|
if (stat)
|
2013-04-17 06:01:37 +02:00
|
|
|
free(stat);
|
|
|
|
return ((share->status & STATUS_TOKUDB_ANALYZE) ? HA_ADMIN_FAILED : HA_ADMIN_OK);
|
|
|
|
|
|
|
|
err:
|
2008-02-05 17:00:53 +01:00
|
|
|
if (stat)
|
2013-04-17 06:01:37 +02:00
|
|
|
free(stat);
|
|
|
|
return HA_ADMIN_FAILED;
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
2013-04-17 06:01:38 +02:00
|
|
|
#endif
|
2008-02-05 17:00:53 +01:00
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// flatten all DB's in this table, to do so, just do a full scan on every DB
|
|
|
|
//
|
2013-04-17 06:01:37 +02:00
|
|
|
int ha_tokudb::optimize(THD * thd, HA_CHECK_OPT * check_opt) {
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_ENTER("ha_tokudb::optimize");
|
2013-04-17 06:01:44 +02:00
|
|
|
int error;
|
|
|
|
DBC* tmp_cursor = NULL;
|
2013-04-17 06:01:44 +02:00
|
|
|
tokudb_trx_data *trx = NULL;
|
|
|
|
DB_TXN* txn = NULL;
|
|
|
|
bool do_commit = false;
|
2013-04-17 06:01:44 +02:00
|
|
|
uint curr_num_DBs = table->s->keys + test(hidden_primary_key);
|
2013-04-17 06:01:44 +02:00
|
|
|
|
|
|
|
trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
|
|
|
|
if (trx == NULL) {
|
|
|
|
error = HA_ERR_UNSUPPORTED;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// optimize may be called without a valid transaction, so we have to do this
|
|
|
|
// in order to get a valid transaction
|
|
|
|
// this is a bit hacky, but it is the best we have right now
|
|
|
|
//
|
|
|
|
txn = trx->stmt ? trx->stmt : trx->sp_level;
|
|
|
|
if (txn == NULL) {
|
|
|
|
error = db_env->txn_begin(db_env, NULL, &txn, 0);
|
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
do_commit = true;
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
//
|
|
|
|
// prelock so each scan goes faster
|
|
|
|
//
|
2013-04-17 06:01:44 +02:00
|
|
|
error = acquire_table_lock(txn,lock_read);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (error) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// for each DB, scan through entire table and do nothing
|
|
|
|
//
|
|
|
|
for (uint i = 0; i < curr_num_DBs; i++) {
|
|
|
|
error = 0;
|
2013-04-17 06:01:44 +02:00
|
|
|
if ((error = share->file->cursor(share->file, txn, &tmp_cursor, 0))) {
|
2013-04-17 06:01:44 +02:00
|
|
|
tmp_cursor = NULL;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
while (error != DB_NOTFOUND) {
|
|
|
|
error = tmp_cursor->c_getf_next(tmp_cursor, DB_PRELOCKED, smart_dbt_opt_callback, NULL);
|
|
|
|
if (error && error != DB_NOTFOUND) {
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
tmp_cursor->c_close(tmp_cursor);
|
|
|
|
}
|
|
|
|
|
|
|
|
error = 0;
|
|
|
|
cleanup:
|
2013-04-17 06:01:44 +02:00
|
|
|
if (do_commit) {
|
|
|
|
error = txn->commit(txn, 0);
|
|
|
|
}
|
2013-04-17 06:01:44 +02:00
|
|
|
TOKUDB_DBUG_RETURN(error);
|
2008-02-05 17:00:53 +01:00
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:44 +02:00
|
|
|
// delete all rows from a table
|
|
|
|
//
|
|
|
|
// effects: delete all of the rows in the main dictionary and all of the
|
|
|
|
// indices. this must be atomic, so we use the statement transaction
|
|
|
|
// for all of the truncate operations.
|
|
|
|
// locks: if we have an exclusive table write lock, all of the concurrency
|
|
|
|
// issues go away.
|
|
|
|
// returns: 0 if success
|
|
|
|
|
|
|
|
int ha_tokudb::delete_all_rows() {
|
|
|
|
TOKUDB_DBUG_ENTER("delete_all_rows");
|
|
|
|
int error = 0;
|
|
|
|
|
|
|
|
// truncate all dictionaries
|
|
|
|
uint curr_num_DBs = table->s->keys + test(hidden_primary_key);
|
|
|
|
for (uint i = 0; i < curr_num_DBs; i++) {
|
|
|
|
DB *db = share->key_file[i];
|
|
|
|
u_int32_t row_count = 0;
|
2013-04-17 06:01:44 +02:00
|
|
|
error = db->truncate(db, transaction, &row_count, 0);
|
2013-04-17 06:01:44 +02:00
|
|
|
if (error)
|
|
|
|
break;
|
|
|
|
// do something with the row_count?
|
|
|
|
if (tokudb_debug)
|
|
|
|
TOKUDB_TRACE("row_count=%u\n", row_count);
|
|
|
|
}
|
|
|
|
|
|
|
|
// zap the row count
|
|
|
|
if (error == 0)
|
|
|
|
share->rows = 0;
|
|
|
|
|
|
|
|
TOKUDB_DBUG_RETURN(error);
|
|
|
|
}
|
|
|
|
|
2013-04-17 06:01:39 +02:00
|
|
|
|