#ifndef _HATOKU_CMP #define _HATOKU_CMP #include "stdint.h" #include // // A MySQL row is encoded in TokuDB, as follows: // Keys: // Keys pack the defined columns in the order that they are declared. // The primary key contains only the columns listed // If no primary key is defined, then an eight byte hidden primary key is autogenerated (like an auto increment) and used // Secondary keys contains the defined key and the primary key. // Two examples: // 1) table foo (a int, b int, c int, d int, key(b)) // The key of the main dictionary contains an eight byte autogenerated hidden primary key // The key of key-b is the column 'b' followed by the hidden primary key // 2) table foo (a int, b int, c int, d int, primary key(a), key(b)) // The key of the main dictionary contains 'a' // The key of key-b is the column 'b followed by 'a' // Vals: // For secondary keys they are empty. // For the main dictionary and clustering keys, they contain all columns that do not show up in the dictionary's key // Two examples: // 1) table foo (a int, b int, c int, d varchar(100), primary key(a), clustering key d(d), clustering key d2(d(20)) // the val of the main dictionary contains (b,c,d) // the val of d contains (b,c) // the val of d2 contains (b,c,d). d is there because the entire row does not show up in the key // Vals are encoded as follows. They have four components: // 1) Null bytes: contains a bit field that states what columns are NULL. // 2) Fixed fields: all fixed fields are then packed together. If a fixed field is NULL, its data is considered junk // 3) varchars and varbinaries: stored in two pieces, first all the offsets and then all the data. If a var field is NULL, its data is considered junk // 4) blobs: stored in (length, data) pairs. If a blob is NULL, its data is considered junk // An example: // Table: (a int, b varchar(20), c blob, d bigint, e varbinary(10), f largeblob, g varchar(10)) <-- no primary key defined // Row inserted: (1, "bbb", "cc", 100, "eeeee", "ffff", "g") // The packed format of the val looks like: // NULL byte <-- 1 byte to encode nothing is NULL // 1 <-- four bytes for 'a' // 100 <-- four bytes for 'd' // 3,8,9 <--offsets for location of data fields, note offsets point to where data ENDS // "bbbeeeeeg" <-- data for variable length stuff // 2,"cc",4,"ffff"<-- data that stores the blobs // The structures below describe are used for the TokuDB encoding of a row // // used for queries typedef struct st_col_pack_info { uint32_t col_pack_val; //offset if fixed, pack_index if var } COL_PACK_INFO; // // used to define a couple of characteristics of a packed val for the main dictionary or a clustering dictionary // fixed_field_size is the size of the fixed fields in the val. // len_of_offsets is the size of the bytes that make up the offsets of variable size columns // Some notes: // If the val has no fixed fields, fixed_field_size is 0 // If the val has no variable fields, len_of_offsets is 0 // The number of null bytes at the beginning of a row is not saved, it is derived from table_share->null_bytes // The pointer to where the variable data in a val starts is table_share->null_bytes + fixed_field_size + len_of_offsets // To figure out where the blobs start, find the last offset listed (if offsets exist) // typedef struct st_multi_col_pack_info { uint32_t fixed_field_size; //where the fixed length stuff ends and the offsets for var stuff begins uint32_t len_of_offsets; //length of the offset bytes in a packed row } MULTI_COL_PACK_INFO; typedef struct st_key_and_col_info { // // bitmaps for each key. key_filters[i] is associated with the i'th dictionary // States what columns are not stored in the vals of each key, because // the column is stored in the key. So, for example, the table (a int, b int, c int, d int, primary key (b,d)) will // have bit 1 (for 'b') and bit 3 (for 'd') of the primary key's bitmap set for the main dictionary's bitmap, // because 'b' and 'd' do not show up in the val // MY_BITMAP key_filters[MAX_KEY+1]; // // following three arrays are used to identify the types of rows in the field // If table->field[i] is a fixed field: // field_lengths[i] stores the field length, which is fixed // length_bytes[i] is 0 // 'i' does not show up in the array blob_fields // If table->field[i] is a varchar or varbinary: // field_lengths[i] is 0 // length_bytes[i] stores the number of bytes MySQL uses to encode the length of the field in table->record[0] // 'i' does not show up in the array blob_fields // If table->field[i] is a blob: // field_lengths[i] is 0 // length_bytes[i] is 0 // 'i' shows up in blob_fields // uint16_t* field_lengths; //stores the field lengths of fixed size fields (1<<16 - 1 max), uchar* length_bytes; // stores the length of lengths of varchars and varbinaries uint32_t* blob_fields; // list of indexes of blob fields, uint32_t num_blobs; // number of blobs in the table // // val packing info for all dictionaries. i'th one represents info for i'th dictionary // MULTI_COL_PACK_INFO mcp_info[MAX_KEY+1]; COL_PACK_INFO* cp_info[MAX_KEY+1]; // // number bytes used to represent an offset in a val. Can be 1 or 2. // The number of var fields in a val for dictionary i can be evaluated by // mcp_info[i].len_of_offsets/num_offset_bytes. // uint32_t num_offset_bytes; //number of bytes needed to encode the offset } KEY_AND_COL_INFO; void get_var_field_info( uint32_t* field_len, uint32_t* start_offset, uint32_t var_field_index, const uchar* var_field_offset_ptr, uint32_t num_offset_bytes ); void get_blob_field_info( uint32_t* start_offset, uint32_t len_of_offsets, const uchar* var_field_data_ptr, uint32_t num_offset_bytes ); static inline uint32_t get_blob_field_len( const uchar* from_tokudb, uint32_t len_bytes ) { uint32_t length = 0; switch (len_bytes) { case (1): length = (uint32_t)(*from_tokudb); break; case (2): length = uint2korr(from_tokudb); break; case (3): length = uint3korr(from_tokudb); break; case (4): length = uint4korr(from_tokudb); break; default: assert(false); } return length; } static inline const uchar* unpack_toku_field_blob( uchar *to_mysql, const uchar* from_tokudb, uint32_t len_bytes, bool skip ) { uint32_t length = 0; const uchar* data_ptr = NULL; if (!skip) { memcpy(to_mysql, from_tokudb, len_bytes); } length = get_blob_field_len(from_tokudb,len_bytes); data_ptr = from_tokudb + len_bytes; if (!skip) { memcpy(to_mysql + len_bytes, (uchar *)(&data_ptr), sizeof(uchar *)); } return (from_tokudb + len_bytes + length); } static inline uint get_null_offset(TABLE* table, Field* field) { #if 50606 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699 return field->null_offset(table->record[0]); #else return (uint) ((uchar*) field->null_ptr - (uchar*) table->record[0]); #endif } typedef enum { toku_type_int = 0, toku_type_double, toku_type_float, toku_type_fixbinary, toku_type_fixstring, toku_type_varbinary, toku_type_varstring, toku_type_blob, toku_type_hpk, //for hidden primary key toku_type_unknown } TOKU_TYPE; TOKU_TYPE mysql_to_toku_type (Field* field); uchar* pack_toku_varbinary_from_desc( uchar* to_tokudb, const uchar* from_desc, uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb uint32_t field_length //length of field ); uchar* pack_toku_varstring_from_desc( uchar* to_tokudb, const uchar* from_desc, uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb uint32_t field_length, uint32_t charset_num//length of field ); uchar* pack_toku_key_field( uchar* to_tokudb, uchar* from_mysql, Field* field, uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff ); uchar* pack_key_toku_key_field( uchar* to_tokudb, uchar* from_mysql, Field* field, uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff ); uchar* unpack_toku_key_field( uchar* to_mysql, uchar* from_tokudb, Field* field, uint32_t key_part_length ); // // for storing NULL byte in keys // #define NULL_COL_VAL 0 #define NONNULL_COL_VAL 1 // // for storing if rest of key is +/- infinity // #define COL_NEG_INF -1 #define COL_ZERO 0 #define COL_POS_INF 1 // // information for hidden primary keys // #define TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH 8 // // function to convert a hidden primary key into a byte stream that can be stored in DBT // static inline void hpk_num_to_char(uchar* to, ulonglong num) { int8store(to, num); } // // function that takes a byte stream of a hidden primary key and returns a ulonglong // static inline ulonglong hpk_char_to_num(uchar* val) { return uint8korr(val); } int tokudb_compare_two_keys( const void* new_key_data, const uint32_t new_key_size, const void* saved_key_data, const uint32_t saved_key_size, const void* row_desc, const uint32_t row_desc_size, bool cmp_prefix ); int tokudb_cmp_dbt_key(DB* db, const DBT *keya, const DBT *keyb); //TODO: QQQ Only do one direction for prefix. int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb); static int tokudb_compare_two_key_parts( const void* new_key_data, const uint32_t new_key_size, const void* saved_key_data, const uint32_t saved_key_size, const void* row_desc, const uint32_t row_desc_size, uint max_parts ); static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts); int create_toku_key_descriptor( uchar* buf, bool is_first_hpk, KEY* first_key, bool is_second_hpk, KEY* second_key ); uint32_t create_toku_main_key_pack_descriptor ( uchar* buf ); uint32_t get_max_clustering_val_pack_desc_size( TABLE_SHARE* table_share ); uint32_t create_toku_clustering_val_pack_descriptor ( uchar* buf, uint pk_index, TABLE_SHARE* table_share, KEY_AND_COL_INFO* kc_info, uint32_t keynr, bool is_clustering ); static inline bool is_key_clustering( void* row_desc, uint32_t row_desc_size ) { return (row_desc_size > 0); } uint32_t pack_clustering_val_from_desc( uchar* buf, void* row_desc, uint32_t row_desc_size, const DBT* pk_val ); uint32_t get_max_secondary_key_pack_desc_size( KEY_AND_COL_INFO* kc_info ); uint32_t create_toku_secondary_key_pack_descriptor ( uchar* buf, bool has_hpk, uint pk_index, TABLE_SHARE* table_share, TABLE* table, KEY_AND_COL_INFO* kc_info, KEY* key_info, KEY* prim_key ); static inline bool is_key_pk( void* row_desc, uint32_t row_desc_size ) { uchar* buf = (uchar *)row_desc; return buf[0]; } uint32_t max_key_size_from_desc( void* row_desc, uint32_t row_desc_size ); uint32_t pack_key_from_desc( uchar* buf, void* row_desc, uint32_t row_desc_size, const DBT* pk_key, const DBT* pk_val ); bool fields_have_same_name( Field* a, Field* b ); bool fields_are_same_type( Field* a, Field* b ); bool are_two_fields_same( Field* a, Field* b ); #endif