2013-04-17 00:02:04 -04:00
|
|
|
#ifndef _HATOKU_CMP
|
|
|
|
#define _HATOKU_CMP
|
|
|
|
|
|
|
|
#include "toku_mysql_priv.h"
|
|
|
|
|
|
|
|
extern "C" {
|
|
|
|
#include "stdint.h"
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#include <db.h>
|
|
|
|
|
|
|
|
//
|
|
|
|
// A MySQL row is encoded in TokuDB, as follows:
|
|
|
|
// Keys:
|
|
|
|
// Keys pack the defined columns in the order that they are declared.
|
|
|
|
// The primary key contains only the columns listed
|
|
|
|
// If no primary key is defined, then an eight byte hidden primary key is autogenerated (like an auto increment) and used
|
|
|
|
// Secondary keys contains the defined key and the primary key.
|
|
|
|
// Two examples:
|
|
|
|
// 1) table foo (a int, b int, c int, d int, key(b))
|
|
|
|
// The key of the main dictionary contains an eight byte autogenerated hidden primary key
|
|
|
|
// The key of key-b is the column 'b' followed by the hidden primary key
|
|
|
|
// 2) table foo (a int, b int, c int, d int, primary key(a), key(b))
|
|
|
|
// The key of the main dictionary contains 'a'
|
|
|
|
// The key of key-b is the column 'b followed by 'a'
|
|
|
|
// Vals:
|
|
|
|
// For secondary keys they are empty.
|
|
|
|
// For the main dictionary and clustering keys, they contain all columns that do not show up in the dictionary's key
|
|
|
|
// Two examples:
|
|
|
|
// 1) table foo (a int, b int, c int, d varchar(100), primary key(a), clustering key d(d), clustering key d2(d(20))
|
|
|
|
// the val of the main dictionary contains (b,c,d)
|
|
|
|
// the val of d contains (b,c)
|
|
|
|
// the val of d2 contains (b,c,d). d is there because the entire row does not show up in the key
|
|
|
|
// Vals are encoded as follows. They have four components:
|
|
|
|
// 1) Null bytes: contains a bit field that states what columns are NULL.
|
|
|
|
// 2) Fixed fields: all fixed fields are then packed together. If a fixed field is NULL, its data is considered junk
|
|
|
|
// 3) varchars and varbinaries: stored in two pieces, first all the offsets and then all the data. If a var field is NULL, its data is considered junk
|
|
|
|
// 4) blobs: stored in (length, data) pairs. If a blob is NULL, its data is considered junk
|
|
|
|
// An example:
|
|
|
|
// Table: (a int, b varchar(20), c blob, d bigint, e varbinary(10), f largeblob, g varchar(10)) <-- no primary key defined
|
|
|
|
// Row inserted: (1, "bbb", "cc", 100, "eeeee", "ffff", "g")
|
|
|
|
// The packed format of the val looks like:
|
|
|
|
// NULL byte <-- 1 byte to encode nothing is NULL
|
|
|
|
// 1 <-- four bytes for 'a'
|
|
|
|
// 100 <-- four bytes for 'd'
|
|
|
|
// 3,8,9 <--offsets for location of data fields, note offsets point to where data ENDS
|
|
|
|
// "bbbeeeeeg" <-- data for variable length stuff
|
|
|
|
// 2,"cc",4,"ffff"<-- data that stores the blobs
|
|
|
|
// The structures below describe are used for the TokuDB encoding of a row
|
|
|
|
//
|
|
|
|
|
|
|
|
// used for queries
|
|
|
|
typedef struct st_col_pack_info {
|
|
|
|
u_int32_t col_pack_val; //offset if fixed, pack_index if var
|
|
|
|
} COL_PACK_INFO;
|
|
|
|
|
|
|
|
//
|
|
|
|
// used to define a couple of characteristics of a packed val for the main dictionary or a clustering dictionary
|
|
|
|
// fixed_field_size is the size of the fixed fields in the val.
|
|
|
|
// len_of_offsets is the size of the bytes that make up the offsets of variable size columns
|
|
|
|
// Some notes:
|
|
|
|
// If the val has no fixed fields, fixed_field_size is 0
|
|
|
|
// If the val has no variable fields, len_of_offsets is 0
|
|
|
|
// The number of null bytes at the beginning of a row is not saved, it is derived from table_share->null_bytes
|
|
|
|
// The pointer to where the variable data in a val starts is table_share->null_bytes + fixed_field_size + len_of_offsets
|
|
|
|
// To figure out where the blobs start, find the last offset listed (if offsets exist)
|
|
|
|
//
|
|
|
|
typedef struct st_multi_col_pack_info {
|
|
|
|
u_int32_t fixed_field_size; //where the fixed length stuff ends and the offsets for var stuff begins
|
|
|
|
u_int32_t len_of_offsets; //length of the offset bytes in a packed row
|
|
|
|
} MULTI_COL_PACK_INFO;
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct st_key_and_col_info {
|
|
|
|
//
|
|
|
|
// bitmaps for each key. key_filters[i] is associated with the i'th dictionary
|
|
|
|
// States what columns are not stored in the vals of each key, because
|
|
|
|
// the column is stored in the key. So, for example, the table (a int, b int, c int, d int, primary key (b,d)) will
|
|
|
|
// have the second and fourth bit of the primary key's bitmap set for the main dictionary's bitmap,
|
|
|
|
// because 'b' and 'd' do not show up in the val
|
|
|
|
//
|
|
|
|
MY_BITMAP key_filters[MAX_KEY+1];
|
|
|
|
//
|
|
|
|
// following three arrays are used to identify the types of rows in the field
|
|
|
|
// If table->field[i] is a fixed field:
|
|
|
|
// field_lengths[i] stores the field length, which is fixed
|
|
|
|
// length_bytes[i] is 0
|
|
|
|
// 'i' does not show up in the array blob_fields
|
|
|
|
// If table->field[i] is a varchar or varbinary:
|
|
|
|
// field_lengths[i] is 0
|
|
|
|
// length_bytes[i] stores the number of bytes MySQL uses to encode the length of the field in table->record[0]
|
|
|
|
// 'i' does not show up in the array blob_fields
|
|
|
|
// If table->field[i] is a blob:
|
|
|
|
// field_lengths[i] is 0
|
|
|
|
// length_bytes[i] is 0
|
|
|
|
// 'i' shows up in blob_fields
|
|
|
|
//
|
|
|
|
u_int16_t* field_lengths; //stores the field lengths of fixed size fields (1<<16 - 1 max),
|
|
|
|
uchar* length_bytes; // stores the length of lengths of varchars and varbinaries
|
|
|
|
u_int32_t* blob_fields; // list of indexes of blob fields,
|
|
|
|
u_int32_t num_blobs; // number of blobs in the table
|
|
|
|
//
|
|
|
|
// val packing info for all dictionaries. i'th one represents info for i'th dictionary
|
|
|
|
//
|
|
|
|
MULTI_COL_PACK_INFO mcp_info[MAX_KEY+1];
|
|
|
|
COL_PACK_INFO* cp_info[MAX_KEY+1];
|
|
|
|
//
|
|
|
|
// number bytes used to represent an offset in a val. Can be 1 or 2.
|
|
|
|
// The number of var fields in a val for dictionary i can be evaluated by
|
|
|
|
// mcp_info[i].len_of_offsets/num_offset_bytes.
|
|
|
|
//
|
|
|
|
u_int32_t num_offset_bytes; //number of bytes needed to encode the offset
|
|
|
|
} KEY_AND_COL_INFO;
|
|
|
|
|
|
|
|
void get_var_field_info(
|
|
|
|
u_int32_t* field_len,
|
|
|
|
u_int32_t* start_offset,
|
|
|
|
u_int32_t var_field_index,
|
|
|
|
const uchar* var_field_offset_ptr,
|
|
|
|
u_int32_t num_offset_bytes
|
|
|
|
);
|
|
|
|
|
|
|
|
void get_blob_field_info(
|
|
|
|
u_int32_t* start_offset,
|
|
|
|
u_int32_t len_of_offsets,
|
|
|
|
const uchar* var_field_data_ptr,
|
|
|
|
u_int32_t num_offset_bytes
|
|
|
|
);
|
|
|
|
|
|
|
|
inline u_int32_t get_blob_field_len(
|
|
|
|
const uchar* from_tokudb,
|
|
|
|
u_int32_t len_bytes
|
|
|
|
)
|
|
|
|
{
|
|
|
|
u_int32_t length = 0;
|
|
|
|
switch (len_bytes) {
|
|
|
|
case (1):
|
|
|
|
length = (u_int32_t)(*from_tokudb);
|
|
|
|
break;
|
|
|
|
case (2):
|
|
|
|
length = uint2korr(from_tokudb);
|
|
|
|
break;
|
|
|
|
case (3):
|
|
|
|
length = uint3korr(from_tokudb);
|
|
|
|
break;
|
|
|
|
case (4):
|
|
|
|
length = uint4korr(from_tokudb);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
inline const uchar* unpack_toku_field_blob(
|
|
|
|
uchar *to_mysql,
|
|
|
|
const uchar* from_tokudb,
|
|
|
|
u_int32_t len_bytes,
|
|
|
|
bool skip
|
|
|
|
)
|
|
|
|
{
|
|
|
|
u_int32_t length = 0;
|
|
|
|
const uchar* data_ptr = NULL;
|
|
|
|
if (!skip) {
|
|
|
|
memcpy(to_mysql, from_tokudb, len_bytes);
|
|
|
|
}
|
|
|
|
length = get_blob_field_len(from_tokudb,len_bytes);
|
|
|
|
|
|
|
|
data_ptr = from_tokudb + len_bytes;
|
|
|
|
if (!skip) {
|
|
|
|
memcpy(to_mysql + len_bytes, (uchar *)(&data_ptr), sizeof(uchar *));
|
|
|
|
}
|
|
|
|
return (from_tokudb + len_bytes + length);
|
|
|
|
}
|
|
|
|
|
|
|
|
inline uint get_null_offset(TABLE* table, Field* field) {
|
|
|
|
return (uint) ((uchar*) field->null_ptr - (uchar*) table->record[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
typedef enum {
|
|
|
|
toku_type_int = 0,
|
|
|
|
toku_type_double,
|
|
|
|
toku_type_float,
|
|
|
|
toku_type_fixbinary,
|
|
|
|
toku_type_fixstring,
|
|
|
|
toku_type_varbinary,
|
|
|
|
toku_type_varstring,
|
|
|
|
toku_type_blob,
|
|
|
|
toku_type_hpk, //for hidden primary key
|
|
|
|
toku_type_unknown
|
|
|
|
} TOKU_TYPE;
|
|
|
|
|
|
|
|
|
|
|
|
TOKU_TYPE mysql_to_toku_type (Field* field);
|
|
|
|
|
|
|
|
uchar* pack_toku_varbinary_from_desc(
|
|
|
|
uchar* to_tokudb,
|
|
|
|
const uchar* from_desc,
|
|
|
|
u_int32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
|
|
|
|
u_int32_t field_length //length of field
|
|
|
|
);
|
|
|
|
|
|
|
|
uchar* pack_toku_varstring_from_desc(
|
|
|
|
uchar* to_tokudb,
|
|
|
|
const uchar* from_desc,
|
|
|
|
u_int32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
|
|
|
|
u_int32_t field_length,
|
|
|
|
u_int32_t charset_num//length of field
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
uchar* pack_toku_key_field(
|
|
|
|
uchar* to_tokudb,
|
|
|
|
uchar* from_mysql,
|
|
|
|
Field* field,
|
|
|
|
u_int32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
|
|
|
|
);
|
|
|
|
|
|
|
|
uchar* pack_key_toku_key_field(
|
|
|
|
uchar* to_tokudb,
|
|
|
|
uchar* from_mysql,
|
|
|
|
Field* field,
|
|
|
|
u_int32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
|
|
|
|
);
|
|
|
|
|
|
|
|
uchar* unpack_toku_key_field(
|
|
|
|
uchar* to_mysql,
|
|
|
|
uchar* from_tokudb,
|
|
|
|
Field* field,
|
|
|
|
u_int32_t key_part_length
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
//
|
|
|
|
// for storing NULL byte in keys
|
|
|
|
//
|
|
|
|
#define NULL_COL_VAL 0
|
|
|
|
#define NONNULL_COL_VAL 1
|
|
|
|
|
|
|
|
//
|
|
|
|
// for storing if rest of key is +/- infinity
|
|
|
|
//
|
|
|
|
#define COL_NEG_INF -1
|
|
|
|
#define COL_ZERO 0
|
|
|
|
#define COL_POS_INF 1
|
|
|
|
|
|
|
|
//
|
|
|
|
// information for hidden primary keys
|
|
|
|
//
|
|
|
|
#define TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH 8
|
|
|
|
|
|
|
|
//
|
|
|
|
// function to convert a hidden primary key into a byte stream that can be stored in DBT
|
|
|
|
//
|
|
|
|
inline void hpk_num_to_char(uchar* to, ulonglong num) {
|
|
|
|
int8store(to, num);
|
|
|
|
}
|
|
|
|
|
|
|
|
//
|
|
|
|
// function that takes a byte stream of a hidden primary key and returns a ulonglong
|
|
|
|
//
|
|
|
|
inline ulonglong hpk_char_to_num(uchar* val) {
|
|
|
|
return uint8korr(val);
|
|
|
|
}
|
|
|
|
|
|
|
|
int tokudb_compare_two_keys(
|
|
|
|
const void* new_key_data,
|
|
|
|
const u_int32_t new_key_size,
|
|
|
|
const void* saved_key_data,
|
|
|
|
const u_int32_t saved_key_size,
|
|
|
|
const void* row_desc,
|
|
|
|
const u_int32_t row_desc_size,
|
|
|
|
bool cmp_prefix
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
int tokudb_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb);
|
|
|
|
|
|
|
|
//TODO: QQQ Only do one direction for prefix.
|
|
|
|
int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb);
|
|
|
|
|
|
|
|
int create_toku_key_descriptor(
|
|
|
|
uchar* buf,
|
|
|
|
bool is_first_hpk,
|
|
|
|
KEY* first_key,
|
|
|
|
bool is_second_hpk,
|
|
|
|
KEY* second_key
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
u_int32_t create_toku_main_key_pack_descriptor (
|
|
|
|
uchar* buf
|
|
|
|
);
|
|
|
|
|
|
|
|
u_int32_t get_max_clustering_val_pack_desc_size(
|
|
|
|
TABLE_SHARE* table_share
|
|
|
|
);
|
|
|
|
|
|
|
|
u_int32_t create_toku_clustering_val_pack_descriptor (
|
|
|
|
uchar* buf,
|
|
|
|
uint pk_index,
|
|
|
|
TABLE_SHARE* table_share,
|
|
|
|
KEY_AND_COL_INFO* kc_info,
|
|
|
|
u_int32_t keynr,
|
|
|
|
bool is_clustering
|
|
|
|
);
|
|
|
|
|
|
|
|
inline bool is_key_clustering(
|
|
|
|
void* row_desc,
|
|
|
|
u_int32_t row_desc_size
|
|
|
|
)
|
|
|
|
{
|
|
|
|
return (row_desc_size > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t pack_clustering_val_from_desc(
|
|
|
|
uchar* buf,
|
|
|
|
void* row_desc,
|
|
|
|
u_int32_t row_desc_size,
|
|
|
|
const DBT* pk_val
|
|
|
|
);
|
|
|
|
|
|
|
|
u_int32_t get_max_secondary_key_pack_desc_size(
|
|
|
|
KEY_AND_COL_INFO* kc_info
|
|
|
|
);
|
|
|
|
|
|
|
|
u_int32_t create_toku_secondary_key_pack_descriptor (
|
|
|
|
uchar* buf,
|
|
|
|
bool has_hpk,
|
|
|
|
uint pk_index,
|
|
|
|
TABLE_SHARE* table_share,
|
|
|
|
TABLE* table,
|
|
|
|
KEY_AND_COL_INFO* kc_info,
|
|
|
|
KEY* key_info,
|
|
|
|
KEY* prim_key
|
|
|
|
);
|
|
|
|
|
|
|
|
inline bool is_key_pk(
|
|
|
|
void* row_desc,
|
|
|
|
u_int32_t row_desc_size
|
|
|
|
)
|
|
|
|
{
|
|
|
|
uchar* buf = (uchar *)row_desc;
|
|
|
|
return buf[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
u_int32_t max_key_size_from_desc(
|
|
|
|
void* row_desc,
|
|
|
|
u_int32_t row_desc_size
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
|
|
u_int32_t pack_key_from_desc(
|
|
|
|
uchar* buf,
|
|
|
|
void* row_desc,
|
|
|
|
u_int32_t row_desc_size,
|
|
|
|
const DBT* pk_key,
|
|
|
|
const DBT* pk_val
|
|
|
|
);
|
|
|
|
|
|
|
|
bool fields_have_same_name(
|
|
|
|
Field* a,
|
|
|
|
Field* b
|
|
|
|
);
|
|
|
|
|
|
|
|
bool are_two_fields_same(
|
|
|
|
Field* a,
|
|
|
|
Field* b
|
|
|
|
);
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|