From 29b0f1703e81e0dae83b87c72aa5b3bb87e106be Mon Sep 17 00:00:00 2001 From: Zardosht Kasheff Date: Fri, 3 Apr 2009 18:00:35 +0000 Subject: [PATCH] addresses #1567, #1569 fix row descriptor for clustering keys to not include infinity byte for second key, add comparison functions git-svn-id: file:///svn/mysql/tokudb-engine/src@10980 c7de825b-a66e-492c-adef-691d508d4ae1 --- storage/tokudb/ha_tokudb.cc | 19 +- storage/tokudb/hatoku_cmp.cc | 413 ++++++++++------------------------- storage/tokudb/hatoku_cmp.h | 23 +- 3 files changed, 131 insertions(+), 324 deletions(-) diff --git a/storage/tokudb/ha_tokudb.cc b/storage/tokudb/ha_tokudb.cc index 94c1f3288c9..d101dd4fe5f 100644 --- a/storage/tokudb/ha_tokudb.cc +++ b/storage/tokudb/ha_tokudb.cc @@ -517,7 +517,7 @@ int ha_tokudb::open_secondary_table(DB** ptr, KEY* key_info, const char* name, i fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME); *key_type = key_info->flags & HA_NOSAME ? DB_NOOVERWRITE : DB_YESOVERWRITE; (*ptr)->app_private = (void *) (key_info); - (*ptr)->set_bt_compare(*ptr, tokudb_cmp_packed_key); + (*ptr)->set_bt_compare(*ptr, tokudb_cmp_dbt_key); DBUG_PRINT("info", ("Setting DB_DUP+DB_DUPSORT for key %s\n", key_info->name)); // @@ -525,7 +525,7 @@ int ha_tokudb::open_secondary_table(DB** ptr, KEY* key_info, const char* name, i // if (!(key_info->flags & HA_CLUSTERING)) { (*ptr)->set_flags(*ptr, DB_DUP + DB_DUPSORT); - (*ptr)->set_dup_compare(*ptr, hidden_primary_key ? tokudb_cmp_hidden_key : tokudb_cmp_primary_key); + (*ptr)->set_dup_compare(*ptr, tokudb_cmp_dbt_data); } (*ptr)->api_internal = share->file->app_private; @@ -676,7 +676,7 @@ int ha_tokudb::open(const char *name, int mode, uint test_if_locked) { else { share->file->app_private = NULL; } - share->file->set_bt_compare(share->file, (hidden_primary_key ? tokudb_cmp_hidden_key : tokudb_cmp_packed_key)); + share->file->set_bt_compare(share->file, tokudb_cmp_dbt_key); make_name(newname, name, "main"); fn_format(name_buff, newname, "", 0, MY_UNPACK_FILENAME); @@ -1641,11 +1641,12 @@ int ha_tokudb::cmp_ref(const uchar * ref1, const uchar * ref2) { } key_info = &table->key_info[table_share->primary_key]; ret_val = tokudb_compare_two_keys( - key_info, ref1 + sizeof(u_int32_t), *(u_int32_t *)ref1, ref2 + sizeof(u_int32_t), *(u_int32_t *)ref2, + (uchar *)share->file->descriptor.data + 4, + *(u_int32_t *)share->file->descriptor.data, false ); exit: @@ -2517,7 +2518,7 @@ typedef struct heavi_info { // static int after_key_heavi(const DBT *key, const DBT *value, void *extra_h) { HEAVI_INFO info = (HEAVI_INFO)extra_h; - int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key); + int cmp = tokudb_prefix_cmp_dbt_key(info->db, key, info->key); return cmp>0 ? 1 : -1; } @@ -2547,7 +2548,7 @@ static int after_key_heavi(const DBT *key, const DBT *value, void *extra_h) { // static int prefix_last_or_prev_heavi(const DBT *key, const DBT *value, void *extra_h) { HEAVI_INFO info = (HEAVI_INFO)extra_h; - int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key); + int cmp = tokudb_prefix_cmp_dbt_key(info->db, key, info->key); return cmp; } @@ -2576,7 +2577,7 @@ static int prefix_last_or_prev_heavi(const DBT *key, const DBT *value, void *ext // static int before_key_heavi(const DBT *key, const DBT *value, void *extra_h) { HEAVI_INFO info = (HEAVI_INFO)extra_h; - int cmp = tokudb_prefix_cmp_packed_key(info->db, key, info->key); + int cmp = tokudb_prefix_cmp_dbt_key(info->db, key, info->key); return (cmp<0) ? -1 : 1; } @@ -2626,7 +2627,7 @@ int ha_tokudb::index_read(uchar * buf, const uchar * key, uint key_len, enum ha_ if (error == 0) { DBT orig_key; pack_key(&orig_key, active_index, key_buff2, key, key_len, COL_NEG_INF); - if (tokudb_prefix_cmp_packed_key(share->key_file[active_index], &orig_key, &last_key)) { + if (tokudb_prefix_cmp_dbt_key(share->key_file[active_index], &orig_key, &last_key)) { error = DB_NOTFOUND; } } @@ -2661,7 +2662,7 @@ int ha_tokudb::index_read(uchar * buf, const uchar * key, uint key_len, enum ha_ if (error == 0) { DBT orig_key; pack_key(&orig_key, active_index, key_buff2, key, key_len, COL_NEG_INF); - if (tokudb_prefix_cmp_packed_key(share->key_file[active_index], &orig_key, &last_key) != 0) { + if (tokudb_prefix_cmp_dbt_key(share->key_file[active_index], &orig_key, &last_key) != 0) { error = cursor->c_get(cursor, &last_key, &row, DB_PREV); } } diff --git a/storage/tokudb/hatoku_cmp.cc b/storage/tokudb/hatoku_cmp.cc index 53360eda7c7..ebe6746ef05 100755 --- a/storage/tokudb/hatoku_cmp.cc +++ b/storage/tokudb/hatoku_cmp.cc @@ -597,7 +597,6 @@ inline int compare_toku_field( u_int32_t charset_num = 0; bool is_unsigned = false; - TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0]; row_desc_pos++; @@ -681,90 +680,6 @@ inline int compare_toku_field( return ret_val; } -inline int compare_toku_field( - uchar* a_buf, - uchar* b_buf, - Field* field, - u_int32_t key_part_length, //I really hope this is temporary as I phase out the pack_cmp stuff - u_int32_t* a_bytes_read, - u_int32_t* b_bytes_read - ) { - int ret_val = 0; - TOKU_TYPE toku_type = mysql_to_toku_type(field); - u_int32_t num_bytes = 0; - switch(toku_type) { - case (toku_type_int): - ret_val = cmp_toku_int( - a_buf, - b_buf, - field->flags & UNSIGNED_FLAG, - field->pack_length() - ); - *a_bytes_read = field->pack_length(); - *b_bytes_read = field->pack_length(); - goto exit; - case (toku_type_double): - assert(field->pack_length() == sizeof(double)); - assert(key_part_length == sizeof(double)); - ret_val = cmp_toku_double(a_buf, b_buf); - *a_bytes_read = sizeof(double); - *b_bytes_read = sizeof(double); - goto exit; - case (toku_type_float): - assert(field->pack_length() == sizeof(float)); - assert(key_part_length == sizeof(float)); - ret_val = cmp_toku_float(a_buf, b_buf); - *a_bytes_read = sizeof(float); - *b_bytes_read = sizeof(float); - goto exit; - case (toku_type_fixbinary): - num_bytes = field->pack_length(); - set_if_smaller(num_bytes, key_part_length); - ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes); - *a_bytes_read = num_bytes; - *b_bytes_read = num_bytes; - goto exit; - case (toku_type_fixstring): - num_bytes = field->pack_length(); - set_if_smaller(num_bytes, key_part_length); - ret_val = cmp_toku_varstring( - a_buf, - b_buf, - get_length_bytes_from_max(num_bytes), - field->charset()->number, - a_bytes_read, - b_bytes_read - ); - goto exit; - case (toku_type_varbinary): - ret_val = cmp_toku_varbinary( - a_buf, - b_buf, - get_length_bytes_from_max(key_part_length), - a_bytes_read, - b_bytes_read - ); - goto exit; - case (toku_type_varstring): - case (toku_type_blob): - ret_val = cmp_toku_varstring( - a_buf, - b_buf, - get_length_bytes_from_max(key_part_length), - field->charset()->number, - a_bytes_read, - b_bytes_read - ); - goto exit; - default: - assert(false); - } - assert(false); -exit: - return ret_val; -} - - // // at the moment, this returns new position in buffer // I want to change this to be num_bytes_packed @@ -989,246 +904,148 @@ inline int tokudb_compare_two_hidden_keys( return a < b ? -1 : (a > b ? 1 : 0); } -int tokudb_cmp_hidden_key(DB * file, const DBT * new_key, const DBT * saved_key) { - return tokudb_compare_two_hidden_keys( - new_key->data, - new_key->size, - saved_key->data, - saved_key->size - ); -} - int tokudb_compare_two_keys( - KEY *key, const void* new_key_data, const u_int32_t new_key_size, const void* saved_key_data, const u_int32_t saved_key_size, + const void* row_desc, + const u_int32_t row_desc_size, bool cmp_prefix - ) { - uchar new_key_inf_val = *(uchar *) new_key_data; - uchar saved_key_inf_val = *(uchar *) saved_key_data; - // - // first byte is "infinity" byte - // - uchar *new_key_ptr = (uchar *)(new_key_data) + 1; - uchar *saved_key_ptr = (uchar *)(saved_key_data) + 1; - KEY_PART_INFO *key_part = key->key_part, *end = key_part + key->key_parts; - int ret_val; - // - // do not include the inf val at the beginning - // - uint new_key_length = new_key_size - sizeof(uchar); - uint saved_key_length = saved_key_size - sizeof(uchar); + ) +{ + int ret_val = 0; + uchar new_key_inf_val = COL_NEG_INF; + uchar saved_key_inf_val = COL_NEG_INF; + + uchar* row_desc_ptr = (uchar *)row_desc; + uchar *new_key_ptr = (uchar *)new_key_data; + uchar *saved_key_ptr = (uchar *)saved_key_data; - //DBUG_DUMP("key_in_index", saved_key_ptr, saved_key->size); - for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) { - int cmp; - uint new_key_field_length; - uint saved_key_field_length; - if (key_part->field->null_bit) { - assert(new_key_ptr < (uchar *) new_key_data + new_key_size); - assert(saved_key_ptr < (uchar *) saved_key_data + saved_key_size); - if (*new_key_ptr != *saved_key_ptr) { - return ((int) *new_key_ptr - (int) *saved_key_ptr); } - saved_key_ptr++; - new_key_length--; - saved_key_length--; - if (!*new_key_ptr++) { continue; } - } - cmp = compare_toku_field( - new_key_ptr, - saved_key_ptr, - key_part->field, - key_part->length, - &new_key_field_length, - &saved_key_field_length - ); - if (cmp) { - return cmp; - } + u_int32_t new_key_bytes_left = new_key_size; + u_int32_t saved_key_bytes_left = saved_key_size; - assert(new_key_length >= new_key_field_length); - assert(saved_key_length >= saved_key_field_length); - - new_key_ptr += new_key_field_length; - new_key_length -= new_key_field_length; - saved_key_ptr += saved_key_field_length; - saved_key_length -= saved_key_field_length; + // + // if the keys have an infinity byte, set it + // + if (row_desc_ptr[0]) { + new_key_inf_val = new_key_ptr[0]; + saved_key_inf_val = saved_key_ptr[0]; + new_key_ptr++; + saved_key_ptr++; } - if (cmp_prefix || (new_key_length == 0 && saved_key_length == 0) ) { + row_desc_ptr++; + + while ( (u_int32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size && + (u_int32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size && + (u_int32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size + ) + { + u_int32_t new_key_field_length; + u_int32_t saved_key_field_length; + u_int32_t row_desc_field_length; + // + // if there is a null byte at this point in the key + // + if (row_desc_ptr[0]) { + // + // compare null bytes. If different, return + // + if (new_key_ptr[0] != saved_key_ptr[0]) { + ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr); + goto exit; + } + new_key_ptr++; + saved_key_ptr++; + } + row_desc_ptr++; + + ret_val = compare_toku_field( + new_key_ptr, + saved_key_ptr, + row_desc_ptr, + &new_key_field_length, + &saved_key_field_length, + &row_desc_field_length + ); + new_key_ptr += new_key_field_length; + saved_key_ptr += saved_key_field_length; + row_desc_ptr += row_desc_field_length; + if (ret_val) { + goto exit;; + } + + assert((u_int32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size); + assert((u_int32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size); + assert((u_int32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size); + } + new_key_bytes_left = new_key_size - ((u_int32_t)(new_key_ptr - (uchar *)new_key_data)); + saved_key_bytes_left = saved_key_size - ((u_int32_t)(saved_key_ptr - (uchar *)saved_key_data)); + if (cmp_prefix || (new_key_bytes_left== 0 && saved_key_bytes_left== 0) ) { ret_val = 0; } // // at this point, one SHOULD be 0 // - else if (new_key_length == 0 && saved_key_length > 0) { + else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) { ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1; } - else if (new_key_length > 0 && saved_key_length == 0) { + else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) { ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1; } // // this should never happen, perhaps we should assert(false) // else { - ret_val = new_key_length - saved_key_length; + assert(false); + ret_val = new_key_bytes_left - saved_key_bytes_left; } +exit: return ret_val; } - - -// -// this is super super ugly, copied from compare_two_keys so that it can get done fast -// -int tokudb_compare_two_clustered_keys(KEY *key, KEY* primary_key, const DBT * new_key, const DBT * saved_key) { - uchar new_key_inf_val = *(uchar *) new_key->data; - uchar saved_key_inf_val = *(uchar *) saved_key->data; - // - // first byte is "infinity" byte - // - uchar *new_key_ptr = (uchar *)(new_key->data) + 1; - uchar *saved_key_ptr = (uchar *)(saved_key->data) + 1; - KEY_PART_INFO *key_part = key->key_part, *end = key_part + key->key_parts; - int ret_val; - // - // do not include the inf val at the beginning - // - uint new_key_length = new_key->size - sizeof(uchar); - uint saved_key_length = saved_key->size - sizeof(uchar); - - //DBUG_DUMP("key_in_index", saved_key_ptr, saved_key->size); - for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) { - int cmp; - uint new_key_field_length; - uint saved_key_field_length; - if (key_part->field->null_bit) { - assert(new_key_ptr < (uchar *) new_key->data + new_key->size); - assert(saved_key_ptr < (uchar *) saved_key->data + saved_key->size); - if (*new_key_ptr != *saved_key_ptr) { - return ((int) *new_key_ptr - (int) *saved_key_ptr); } - saved_key_ptr++; - new_key_length--; - saved_key_length--; - if (!*new_key_ptr++) { continue; } - } - cmp = compare_toku_field( - new_key_ptr, - saved_key_ptr, - key_part->field, - key_part->length, - &new_key_field_length, - &saved_key_field_length - ); - if (cmp) { - return cmp; - } - - assert(new_key_length >= new_key_field_length); - assert(saved_key_length >= saved_key_field_length); - - new_key_ptr += new_key_field_length; - new_key_length -= new_key_field_length; - saved_key_ptr += saved_key_field_length; - saved_key_length -= saved_key_field_length; - } - if (new_key_length == 0 && saved_key_length == 0){ - ret_val = 0; - } - else if (new_key_length == 0 && saved_key_length > 0) { - ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1; - } - else if (new_key_length > 0 && saved_key_length == 0) { - ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1; - } - // - // now we compare the primary key - // - else { - if (primary_key == NULL) { - // - // primary key hidden - // - ulonglong a = hpk_char_to_num((uchar *) new_key_ptr); - ulonglong b = hpk_char_to_num((uchar *) saved_key_ptr); - ret_val = a < b ? -1 : (a > b ? 1 : 0); - } - else { - // - // primary key not hidden, I know this is bad, basically copying the code from above - // - key_part = primary_key->key_part; - end = key_part + primary_key->key_parts; - for (; key_part != end && (int) new_key_length > 0 && (int) saved_key_length > 0; key_part++) { - int cmp; - uint new_key_field_length; - uint saved_key_field_length; - if (key_part->field->null_bit) { - assert(new_key_ptr < (uchar *) new_key->data + new_key->size); - assert(saved_key_ptr < (uchar *) saved_key->data + saved_key->size); - if (*new_key_ptr != *saved_key_ptr) { - return ((int) *new_key_ptr - (int) *saved_key_ptr); } - saved_key_ptr++; - new_key_length--; - saved_key_length--; - if (!*new_key_ptr++) { continue; } - } - cmp = compare_toku_field( - new_key_ptr, - saved_key_ptr, - key_part->field, - key_part->length, - &new_key_field_length, - &saved_key_field_length - ); - if (cmp) { - return cmp; - } - - assert(new_key_length >= new_key_field_length); - assert(saved_key_length >= saved_key_field_length); - - new_key_ptr += new_key_field_length; - new_key_length -= new_key_field_length; - saved_key_ptr += saved_key_field_length; - saved_key_length -= saved_key_field_length; - } - // - // at this point, we have compared the actual keys and the primary key, we return 0 - // - ret_val = 0; - } - } - return ret_val; +int tokudb_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) { + int cmp = tokudb_compare_two_keys( + keya->data, + keya->size, + keyb->data, + keyb->size, + (uchar *)file->descriptor.data + 4, + *(u_int32_t *)file->descriptor.data, + false + ); + return cmp; } - -int tokudb_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb) { - assert(file->app_private != 0); - KEY *key = (KEY *) file->app_private; - KEY *primary_key = (KEY *) file->api_internal; - if (key->flags & HA_CLUSTERING) { - return tokudb_compare_two_clustered_keys(key, primary_key, keya, keyb); - } - return tokudb_compare_two_keys(key, keya->data, keya->size, keyb->data, keyb->size, false); -} - -int tokudb_cmp_primary_key(DB *file, const DBT *keya, const DBT *keyb) { - assert(file->app_private != 0); - KEY *key = (KEY *) file->api_internal; - return tokudb_compare_two_keys(key, keya->data, keya->size, keyb->data, keyb->size, false); +int tokudb_cmp_dbt_data(DB *file, const DBT *keya, const DBT *keyb) { + int row_desc_offset = *(u_int32_t *)file->descriptor.size; + int cmp = tokudb_compare_two_keys( + keya->data, + keya->size, + keyb->data, + keyb->size, + (uchar *)file->descriptor.data + row_desc_offset, + file->descriptor.size - row_desc_offset, + false + ); + return cmp; } //TODO: QQQ Only do one direction for prefix. -int tokudb_prefix_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb) { - assert(file->app_private != 0); - KEY *key = (KEY *) file->app_private; - return tokudb_compare_two_keys(key, keya->data, keya->size, keyb->data, keyb->size, true); +int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) { + int cmp = tokudb_compare_two_keys( + keya->data, + keya->size, + keyb->data, + keyb->size, + (uchar *)file->descriptor.data + 4, + *(u_int32_t *)file->descriptor.data, + true + ); + return cmp; } - // // outputs a descriptor for key into buf. num_bytes returns number of bytes used in buf // to store the descriptor @@ -1380,22 +1197,24 @@ int create_toku_descriptor( goto exit; } + if (!is_clustering_key) { + pos[0] = (is_second_hpk) ? 0 : 1; //we place an infinity byte iff it is NOT a clustering key and NOT a hpk + pos++; + } + // // if we have a second key, and it is an hpk, we need to pack it, and // write in the offset to this position in the first four bytes // if (is_second_hpk) { - pos[0] = 0; //say there is NO infinity byte - pos[1] = 0; //field cannot be NULL, stating it - pos[2] = toku_type_hpk; - pos += 3; + pos[0] = 0; //field cannot be NULL, stating it + pos[1] = toku_type_hpk; + pos += 2; } else { // // second key is NOT a hidden primary key, so we now pack second_key // - pos[0] = 1; //say there is an infinity byte - pos++; num_bytes = create_toku_key_descriptor(second_key, pos); pos += num_bytes; } diff --git a/storage/tokudb/hatoku_cmp.h b/storage/tokudb/hatoku_cmp.h index 8830838a952..b5a7d405ddc 100755 --- a/storage/tokudb/hatoku_cmp.h +++ b/storage/tokudb/hatoku_cmp.h @@ -92,35 +92,22 @@ inline int tokudb_compare_two_hidden_keys( ); int tokudb_compare_two_keys( - KEY *key, const void* new_key_data, const u_int32_t new_key_size, const void* saved_key_data, const u_int32_t saved_key_size, + const void* row_desc, + const u_int32_t row_desc_size, bool cmp_prefix ); -int tokudb_cmp_hidden_key( - DB* file, - const DBT* new_key, - const DBT* saved_key - ); +int tokudb_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb); -int tokudb_compare_two_clustered_keys( - KEY *key, - KEY* primary_key, - const DBT * new_key, - const DBT * saved_key - ); +int tokudb_cmp_dbt_data(DB *file, const DBT *keya, const DBT *keyb); - -int tokudb_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb); - -int tokudb_cmp_primary_key(DB *file, const DBT *keya, const DBT *keyb); - //TODO: QQQ Only do one direction for prefix. -int tokudb_prefix_cmp_packed_key(DB *file, const DBT *keya, const DBT *keyb); +int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb); int create_toku_key_descriptor(KEY* key, uchar* buf);