From a48db602dd9bb54439cabef8c5921639837e639e Mon Sep 17 00:00:00 2001 From: "jan@hundin.mysql.fi" <> Date: Fri, 3 Sep 2004 15:26:29 +0300 Subject: [PATCH] Fixed unique prefix key bug for multibyte character sets (BUG #4521) for InnoDB. This fixes also a second part of the same problem with prefix keys on a multibyte string column for InnoDB. --- innobase/btr/btr0btr.c | 9 +++-- innobase/rem/rem0cmp.c | 34 ++---------------- innobase/row/row0ins.c | 17 ++++++--- innobase/row/row0row.c | 33 +++++++++++++---- innobase/row/row0sel.c | 15 ++++++-- innobase/row/row0upd.c | 32 +++++++++++++---- mysql-test/r/ctype_utf8.result | 66 ++++++++++++++++++++++++++++++++++ mysql-test/t/ctype_utf8.test | 61 ++++++++++++++++++++++++++++++- sql/ha_innodb.cc | 58 ++++++++++++++++++++++++++++++ sql/ha_innodb.h | 1 + 10 files changed, 271 insertions(+), 55 deletions(-) diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index 27d798f925a..e31aadbbfff 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -2400,14 +2400,17 @@ btr_index_rec_validate( dtype_t* type = dict_index_get_nth_type(index, i); rec_get_nth_field(rec, i, &len); - + + /* Note that prefix indexes are not fixed size even when + their type is CHAR. */ + if ((dict_index_get_nth_field(index, i)->prefix_len == 0 && len != UNIV_SQL_NULL && dtype_is_fixed_size(type) && len != dtype_get_fixed_size(type)) || (dict_index_get_nth_field(index, i)->prefix_len > 0 - && len != UNIV_SQL_NULL && dtype_is_fixed_size(type) - && len != + && len != UNIV_SQL_NULL + && len > dict_index_get_nth_field(index, i)->prefix_len)) { btr_index_rec_validate_report(page, rec, index); diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index f6c82102839..041fb7914e2 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -14,9 +14,6 @@ Created 7/1/1994 Heikki Tuuri #include "srv0srv.h" -#include -#include - /* ALPHABETICAL ORDER ================== @@ -455,8 +452,6 @@ cmp_dtuple_rec_with_match( ulint cur_bytes; /* number of already matched bytes in current field */ int ret = 3333; /* return value */ - - CHARSET_INFO* charset; /* charset used in the field */ ut_ad(dtuple && rec && matched_fields && matched_bytes); ut_ad(dtuple_check_typed(dtuple)); @@ -546,33 +541,8 @@ cmp_dtuple_rec_with_match( && dtype_get_charset_coll(cur_type->prtype) != data_mysql_latin1_swedish_charset_coll)) { - /* If character set is not latin1_swedish - we have to devide character length by the - maximum bytes needed for that character - set. For example if we have unique prefix - index for 1 utf8 character then we have - actually 3 bytes allocated in the index. - Therefore, we have to divide that with - maximum bytes needed for utf8 character i.e. - 3 byges.*/ - - if ( dtuple_f_len > 0) { - charset = get_charset( - dtype_get_charset_coll(cur_type->prtype), - MYF(MY_WME)); - - ut_ad(charset); - ut_ad(charset->mbmaxlen); - - dtuple_f_len = dtuple_f_len / charset->mbmaxlen; - - if ( dtuple_f_len == 0) - dtuple_f_len = 1; - - rec_f_len = dtuple_f_len; - } - - ret = cmp_whole_field(cur_type, + ret = cmp_whole_field( + cur_type, dfield_get_data(dtuple_field), dtuple_f_len, rec_b_ptr, rec_f_len); diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index edd3099b5f3..2429b8f2bf3 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -1999,6 +1999,7 @@ row_ins_index_entry_set_vals( dfield_t* row_field; ulint n_fields; ulint i; + dtype_t* cur_type; ut_ad(entry && row); @@ -2012,10 +2013,18 @@ row_ins_index_entry_set_vals( /* Check column prefix indexes */ if (ind_field->prefix_len > 0 - && dfield_get_len(row_field) != UNIV_SQL_NULL - && dfield_get_len(row_field) > ind_field->prefix_len) { - - field->len = ind_field->prefix_len; + && dfield_get_len(row_field) != UNIV_SQL_NULL) { + + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(ind_field)); + + field->len = innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + ind_field->prefix_len, + dfield_get_len(field),row_field->data); } else { field->len = row_field->len; } diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c index 680539764fd..ed6462b7377 100644 --- a/innobase/row/row0row.c +++ b/innobase/row/row0row.c @@ -113,6 +113,8 @@ row_build_index_entry( dfield_t* dfield2; dict_col_t* col; ulint i; + ulint storage_len; + dtype_t* cur_type; ut_ad(row && index && heap); ut_ad(dtuple_check_typed(row)); @@ -139,10 +141,20 @@ row_build_index_entry( /* If a column prefix index, take only the prefix */ if (ind_field->prefix_len > 0 - && dfield_get_len(dfield2) != UNIV_SQL_NULL - && dfield_get_len(dfield2) > ind_field->prefix_len) { + && dfield_get_len(dfield2) != UNIV_SQL_NULL) { - dfield_set_len(dfield, ind_field->prefix_len); + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(ind_field)); + + storage_len = innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + ind_field->prefix_len, + dfield_get_len(dfield2),dfield2->data); + + dfield_set_len(dfield,storage_len); } } @@ -460,6 +472,7 @@ row_build_row_ref_from_row( dict_col_t* col; ulint ref_len; ulint i; + dtype_t* cur_type; ut_ad(ref && table && row); @@ -481,10 +494,18 @@ row_build_row_ref_from_row( dfield_copy(dfield, dfield2); if (field->prefix_len > 0 - && dfield->len != UNIV_SQL_NULL - && dfield->len > field->prefix_len) { + && dfield->len != UNIV_SQL_NULL) { - dfield->len = field->prefix_len; + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(field)); + + dfield->len = innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + field->prefix_len, + dfield->len,dfield->data); } } diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 2c0092adc6e..d87cc857651 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -76,6 +76,7 @@ row_sel_sec_rec_is_for_clust_rec( ulint clust_len; ulint n; ulint i; + dtype_t* cur_type; UT_NOT_USED(clust_index); @@ -91,10 +92,18 @@ row_sel_sec_rec_is_for_clust_rec( sec_field = rec_get_nth_field(sec_rec, i, &sec_len); if (ifield->prefix_len > 0 - && clust_len != UNIV_SQL_NULL - && clust_len > ifield->prefix_len) { + && clust_len != UNIV_SQL_NULL) { - clust_len = ifield->prefix_len; + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(ifield)); + + clust_len = innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + ifield->prefix_len, + clust_len,clust_field); } if (0 != cmp_data_data(dict_col_get_type(col), diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index d35ae0a3e38..75400e06059 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -842,6 +842,7 @@ row_upd_index_replace_new_col_vals_index_pos( dfield_t* new_val; ulint j; ulint i; + dtype_t* cur_type; ut_ad(index); @@ -871,10 +872,19 @@ row_upd_index_replace_new_col_vals_index_pos( } if (field->prefix_len > 0 - && new_val->len != UNIV_SQL_NULL - && new_val->len > field->prefix_len) { + && new_val->len != UNIV_SQL_NULL) { - dfield->len = field->prefix_len; + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(field)); + + dfield->len = + innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + field->prefix_len, + new_val->len,new_val->data); } } } @@ -904,6 +914,7 @@ row_upd_index_replace_new_col_vals( dfield_t* new_val; ulint j; ulint i; + dtype_t* cur_type; ut_ad(index); @@ -933,10 +944,19 @@ row_upd_index_replace_new_col_vals( } if (field->prefix_len > 0 - && new_val->len != UNIV_SQL_NULL - && new_val->len > field->prefix_len) { + && new_val->len != UNIV_SQL_NULL) { - dfield->len = field->prefix_len; + /* For prefix keys get the storage length + for the prefix_len characters. */ + + cur_type = dict_col_get_type( + dict_field_get_col(field)); + + dfield->len = + innobase_get_at_most_n_mbchars( + dtype_get_charset_coll(cur_type->prtype), + field->prefix_len, + new_val->len,new_val->data); } } } diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index f3be539251a..7fb7a508a4e 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -316,6 +316,39 @@ select c cb20 from t1 where c=repeat('b',20); cb20 bbbbbbbbbbbbbbbbbbbb drop table t1; +create table t1 (c varchar(30) character set utf8, unique(c(10))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +insert into t1 values ('aaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaa' for key 1 +insert into t1 values ('aaaaaaaaaaaa'); +ERROR 23000: Duplicate entry 'aaaaaaaaaaaa' for key 1 +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +c1 +1 +select c c2 from t1 where c='2'; +c2 +2 +select c c3 from t1 where c='3'; +c3 +3 +select c cx from t1 where c='x'; +cx +x +select c cy from t1 where c='y'; +cy +y +select c cz from t1 where c='z'; +cz +z +select c ca10 from t1 where c='aaaaaaaaaa'; +ca10 +aaaaaaaaaa +select c cb20 from t1 where c=repeat('b',20); +cb20 +bbbbbbbbbbbbbbbbbbbb +drop table t1; create table t1 (c char(3) character set utf8, unique (c(2))); insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); insert into t1 values ('a'); @@ -339,6 +372,29 @@ insert into t1 values ('ꪪꪪ'); insert into t1 values ('ꪪꪪꪪ'); ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1 drop table t1; +create table t1 (c char(3) character set utf8, unique (c(2))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +insert into t1 values ('aaa'); +ERROR 23000: Duplicate entry 'aaa' for key 1 +insert into t1 values ('b'); +insert into t1 values ('bb'); +insert into t1 values ('bbb'); +ERROR 23000: Duplicate entry 'bbb' for key 1 +insert into t1 values ('а'); +insert into t1 values ('аа'); +insert into t1 values ('ааа'); +ERROR 23000: Duplicate entry 'ааа' for key 1 +insert into t1 values ('б'); +insert into t1 values ('бб'); +insert into t1 values ('ббб'); +ERROR 23000: Duplicate entry 'ббб' for key 1 +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +insert into t1 values ('ꪪꪪꪪ'); +ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1 +drop table t1; create table t1 ( c char(10) character set utf8, unique key a using hash (c(1)) @@ -611,6 +667,16 @@ str drop table t1; create table t1 ( str varchar(255) character set utf8 not null, +key str (str(2)) +) engine=innodb; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +str +str +drop table t1; +create table t1 ( +str varchar(255) character set utf8 not null, key str using btree (str(2)) ) engine=heap; INSERT INTO t1 VALUES ('str'); diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 2c531d4e5d2..97cdfedca99 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -217,6 +217,27 @@ select c ca10 from t1 where c='aaaaaaaaaa'; select c cb20 from t1 where c=repeat('b',20); drop table t1; +# +# Bug 4521: unique key prefix interacts poorly with utf8 +# InnoDB: keys with prefix compression, case insensitive collation. +# +create table t1 (c varchar(30) character set utf8, unique(c(10))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z'); +insert into t1 values ('aaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaa'); +--error 1062 +insert into t1 values ('aaaaaaaaaaaa'); +insert into t1 values (repeat('b',20)); +select c c1 from t1 where c='1'; +select c c2 from t1 where c='2'; +select c c3 from t1 where c='3'; +select c cx from t1 where c='x'; +select c cy from t1 where c='y'; +select c cz from t1 where c='z'; +select c ca10 from t1 where c='aaaaaaaaaa'; +select c cb20 from t1 where c=repeat('b',20); +drop table t1; # # Bug 4521: unique key prefix interacts poorly with utf8 # MYISAM: fixed length keys, case insensitive collation @@ -244,7 +265,33 @@ insert into t1 values ('ꪪꪪ'); --error 1062 insert into t1 values ('ꪪꪪꪪ'); drop table t1; - +# +# Bug 4521: unique key prefix interacts poorly with utf8 +# InnoDB: fixed length keys, case insensitive collation +# +create table t1 (c char(3) character set utf8, unique (c(2))) engine=innodb; +insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z'); +insert into t1 values ('a'); +insert into t1 values ('aa'); +--error 1062 +insert into t1 values ('aaa'); +insert into t1 values ('b'); +insert into t1 values ('bb'); +--error 1062 +insert into t1 values ('bbb'); +insert into t1 values ('а'); +insert into t1 values ('аа'); +--error 1062 +insert into t1 values ('ааа'); +insert into t1 values ('б'); +insert into t1 values ('бб'); +--error 1062 +insert into t1 values ('ббб'); +insert into t1 values ('ꪪ'); +insert into t1 values ('ꪪꪪ'); +--error 1062 +insert into t1 values ('ꪪꪪꪪ'); +drop table t1; # # Bug 4531: unique key prefix interacts poorly with utf8 # Check HEAP+HASH, case insensitive collation @@ -454,6 +501,18 @@ INSERT INTO t1 VALUES ('str2'); select * from t1 where str='str'; drop table t1; +# Bug#4594: column index make = failed for gbk, but like works +# Check InnoDB +# +create table t1 ( + str varchar(255) character set utf8 not null, + key str (str(2)) +) engine=innodb; +INSERT INTO t1 VALUES ('str'); +INSERT INTO t1 VALUES ('str2'); +select * from t1 where str='str'; +drop table t1; + # the same for HEAP+BTREE # diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 5aa7c02fcc0..00fa6bf34aa 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -41,6 +41,7 @@ have disables the InnoDB inlining in this file. */ #include #include #include +#include #define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1)) @@ -5268,4 +5269,61 @@ ulonglong ha_innobase::get_mysql_bin_log_pos() return trx_sys_mysql_bin_log_pos; } +extern "C" { +/*********************************************************************** +This function finds charset information and returns the character +length for multibyte character set. */ + +ulint innobase_get_charset_mbmaxlen( + ulint charset_id) /* in: charset id */ +{ + CHARSET_INFO* charset; /* charset used in the field */ + + charset = get_charset(charset_id,MYF(MY_WME)); + + ut_ad(charset); + ut_ad(charset->mbmaxlen); + + return charset->mbmaxlen; +} +} + +extern "C" { +/*********************************************************************** +This function finds charset information and returns position the nth +character for multibyte character set.*/ + +ulint innobase_get_at_most_n_mbchars( + ulint charset_id, /* in: character set id */ + ulint nth, /* in: nth character */ + ulint data_len, /* in: length of the sting in bytes */ + const char *pos) /* in: character string */ +{ + ulint byte_length; /* storage length, in bytes. */ + ulint char_length; /* character length in bytes */ + CHARSET_INFO* charset; /* charset used in the field */ + + ut_ad(pos); + byte_length = data_len; + + charset = get_charset(charset_id,MYF(MY_WME)); + + ut_ad(charset); + ut_ad(charset->mbmaxlen); + + char_length= byte_length / charset->mbmaxlen; + nth = nth / charset->mbmaxlen; + + if (byte_length > char_length) + { + char_length= my_charpos(charset, pos, pos + byte_length, nth); + set_if_smaller(char_length, byte_length); + } + else + char_length = nth; + + return char_length; +} +} + #endif /* HAVE_INNOBASE_DB */ diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 6556931fa1a..2aca6e3be70 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -228,3 +228,4 @@ my_bool innobase_query_caching_of_table_permitted(THD* thd, char* full_name, void innobase_release_temporary_latches(void* innobase_tid); void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset); +