Fixed unique prefix key bug for multibyte character sets (BUG ) for

InnoDB. This fixes also a second part of the same problem with prefix keys
on a multibyte string column for InnoDB.


innobase/btr/btr0btr.c:
  Multibyte character set prefix indexes are not any more fixed size. Therefore,
  we have to chect that length of the index field in not greater than
  prefix length.
innobase/rem/rem0cmp.c:
  Remove unnecessary changes.
innobase/row/row0ins.c:
  Fixed unique prefix key or prefix key using multibyte character set bugs for
  InnoDB (BUG ). For prefix keys we have to get the storage length
  for the prefix length of characters in the key.
innobase/row/row0row.c:
  Fixed unique prefix key or prefix key using multibyte character set bugs for
  InnoDB (BUG ). For prefix keys we have to get the storage length
  for the prefix length of characters in the key.
innobase/row/row0sel.c:
  Fixed unique prefix key or prefix key using multibyte character set bugs for
  InnoDB (BUG ). For prefix keys we have to get the storage length
  for the prefix length of characters in the key.
innobase/row/row0upd.c:
  Fixed unique prefix key or prefix key using multibyte character set bugs for
  InnoDB (BUG ). For prefix keys we have to get the storage length
  for the prefix length of characters in the key.
mysql-test/r/ctype_utf8.result:
  Added utf8 character test cases for InnoDB.
mysql-test/t/ctype_utf8.test:
  Added utf8 character expected test results for InnoDB.
sql/ha_innodb.cc:
  Added function innobase_get_at_most_n_mbchars to return position of
  the nth character in the multibyte character string.
sql/ha_innodb.h:
  Remove unnecessary changes.
This commit is contained in:
unknown 2004-09-03 15:26:29 +03:00
parent 3e3981b558
commit bbd402dc4f
10 changed files with 271 additions and 55 deletions

View file

@ -2400,14 +2400,17 @@ btr_index_rec_validate(
dtype_t* type = dict_index_get_nth_type(index, i);
rec_get_nth_field(rec, i, &len);
/* Note that prefix indexes are not fixed size even when
their type is CHAR. */
if ((dict_index_get_nth_field(index, i)->prefix_len == 0
&& len != UNIV_SQL_NULL && dtype_is_fixed_size(type)
&& len != dtype_get_fixed_size(type))
||
(dict_index_get_nth_field(index, i)->prefix_len > 0
&& len != UNIV_SQL_NULL && dtype_is_fixed_size(type)
&& len !=
&& len != UNIV_SQL_NULL
&& len >
dict_index_get_nth_field(index, i)->prefix_len)) {
btr_index_rec_validate_report(page, rec, index);

View file

@ -14,9 +14,6 @@ Created 7/1/1994 Heikki Tuuri
#include "srv0srv.h"
#include <m_ctype.h>
#include <my_sys.h>
/* ALPHABETICAL ORDER
==================
@ -455,8 +452,6 @@ cmp_dtuple_rec_with_match(
ulint cur_bytes; /* number of already matched bytes
in current field */
int ret = 3333; /* return value */
CHARSET_INFO* charset; /* charset used in the field */
ut_ad(dtuple && rec && matched_fields && matched_bytes);
ut_ad(dtuple_check_typed(dtuple));
@ -546,33 +541,8 @@ cmp_dtuple_rec_with_match(
&& dtype_get_charset_coll(cur_type->prtype) !=
data_mysql_latin1_swedish_charset_coll)) {
/* If character set is not latin1_swedish
we have to devide character length by the
maximum bytes needed for that character
set. For example if we have unique prefix
index for 1 utf8 character then we have
actually 3 bytes allocated in the index.
Therefore, we have to divide that with
maximum bytes needed for utf8 character i.e.
3 byges.*/
if ( dtuple_f_len > 0) {
charset = get_charset(
dtype_get_charset_coll(cur_type->prtype),
MYF(MY_WME));
ut_ad(charset);
ut_ad(charset->mbmaxlen);
dtuple_f_len = dtuple_f_len / charset->mbmaxlen;
if ( dtuple_f_len == 0)
dtuple_f_len = 1;
rec_f_len = dtuple_f_len;
}
ret = cmp_whole_field(cur_type,
ret = cmp_whole_field(
cur_type,
dfield_get_data(dtuple_field), dtuple_f_len,
rec_b_ptr, rec_f_len);

View file

@ -1999,6 +1999,7 @@ row_ins_index_entry_set_vals(
dfield_t* row_field;
ulint n_fields;
ulint i;
dtype_t* cur_type;
ut_ad(entry && row);
@ -2012,10 +2013,18 @@ row_ins_index_entry_set_vals(
/* Check column prefix indexes */
if (ind_field->prefix_len > 0
&& dfield_get_len(row_field) != UNIV_SQL_NULL
&& dfield_get_len(row_field) > ind_field->prefix_len) {
field->len = ind_field->prefix_len;
&& dfield_get_len(row_field) != UNIV_SQL_NULL) {
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(ind_field));
field->len = innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
ind_field->prefix_len,
dfield_get_len(field),row_field->data);
} else {
field->len = row_field->len;
}

View file

@ -113,6 +113,8 @@ row_build_index_entry(
dfield_t* dfield2;
dict_col_t* col;
ulint i;
ulint storage_len;
dtype_t* cur_type;
ut_ad(row && index && heap);
ut_ad(dtuple_check_typed(row));
@ -139,10 +141,20 @@ row_build_index_entry(
/* If a column prefix index, take only the prefix */
if (ind_field->prefix_len > 0
&& dfield_get_len(dfield2) != UNIV_SQL_NULL
&& dfield_get_len(dfield2) > ind_field->prefix_len) {
&& dfield_get_len(dfield2) != UNIV_SQL_NULL) {
dfield_set_len(dfield, ind_field->prefix_len);
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(ind_field));
storage_len = innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
ind_field->prefix_len,
dfield_get_len(dfield2),dfield2->data);
dfield_set_len(dfield,storage_len);
}
}
@ -460,6 +472,7 @@ row_build_row_ref_from_row(
dict_col_t* col;
ulint ref_len;
ulint i;
dtype_t* cur_type;
ut_ad(ref && table && row);
@ -481,10 +494,18 @@ row_build_row_ref_from_row(
dfield_copy(dfield, dfield2);
if (field->prefix_len > 0
&& dfield->len != UNIV_SQL_NULL
&& dfield->len > field->prefix_len) {
&& dfield->len != UNIV_SQL_NULL) {
dfield->len = field->prefix_len;
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(field));
dfield->len = innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
field->prefix_len,
dfield->len,dfield->data);
}
}

View file

@ -76,6 +76,7 @@ row_sel_sec_rec_is_for_clust_rec(
ulint clust_len;
ulint n;
ulint i;
dtype_t* cur_type;
UT_NOT_USED(clust_index);
@ -91,10 +92,18 @@ row_sel_sec_rec_is_for_clust_rec(
sec_field = rec_get_nth_field(sec_rec, i, &sec_len);
if (ifield->prefix_len > 0
&& clust_len != UNIV_SQL_NULL
&& clust_len > ifield->prefix_len) {
&& clust_len != UNIV_SQL_NULL) {
clust_len = ifield->prefix_len;
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(ifield));
clust_len = innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
ifield->prefix_len,
clust_len,clust_field);
}
if (0 != cmp_data_data(dict_col_get_type(col),

View file

@ -842,6 +842,7 @@ row_upd_index_replace_new_col_vals_index_pos(
dfield_t* new_val;
ulint j;
ulint i;
dtype_t* cur_type;
ut_ad(index);
@ -871,10 +872,19 @@ row_upd_index_replace_new_col_vals_index_pos(
}
if (field->prefix_len > 0
&& new_val->len != UNIV_SQL_NULL
&& new_val->len > field->prefix_len) {
&& new_val->len != UNIV_SQL_NULL) {
dfield->len = field->prefix_len;
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(field));
dfield->len =
innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
field->prefix_len,
new_val->len,new_val->data);
}
}
}
@ -904,6 +914,7 @@ row_upd_index_replace_new_col_vals(
dfield_t* new_val;
ulint j;
ulint i;
dtype_t* cur_type;
ut_ad(index);
@ -933,10 +944,19 @@ row_upd_index_replace_new_col_vals(
}
if (field->prefix_len > 0
&& new_val->len != UNIV_SQL_NULL
&& new_val->len > field->prefix_len) {
&& new_val->len != UNIV_SQL_NULL) {
dfield->len = field->prefix_len;
/* For prefix keys get the storage length
for the prefix_len characters. */
cur_type = dict_col_get_type(
dict_field_get_col(field));
dfield->len =
innobase_get_at_most_n_mbchars(
dtype_get_charset_coll(cur_type->prtype),
field->prefix_len,
new_val->len,new_val->data);
}
}
}

View file

@ -316,6 +316,39 @@ select c cb20 from t1 where c=repeat('b',20);
cb20
bbbbbbbbbbbbbbbbbbbb
drop table t1;
create table t1 (c varchar(30) character set utf8, unique(c(10))) engine=innodb;
insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z');
insert into t1 values ('aaaaaaaaaa');
insert into t1 values ('aaaaaaaaaaa');
ERROR 23000: Duplicate entry 'aaaaaaaaaaa' for key 1
insert into t1 values ('aaaaaaaaaaaa');
ERROR 23000: Duplicate entry 'aaaaaaaaaaaa' for key 1
insert into t1 values (repeat('b',20));
select c c1 from t1 where c='1';
c1
1
select c c2 from t1 where c='2';
c2
2
select c c3 from t1 where c='3';
c3
3
select c cx from t1 where c='x';
cx
x
select c cy from t1 where c='y';
cy
y
select c cz from t1 where c='z';
cz
z
select c ca10 from t1 where c='aaaaaaaaaa';
ca10
aaaaaaaaaa
select c cb20 from t1 where c=repeat('b',20);
cb20
bbbbbbbbbbbbbbbbbbbb
drop table t1;
create table t1 (c char(3) character set utf8, unique (c(2)));
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a');
@ -339,6 +372,29 @@ insert into t1 values ('ꪪꪪ');
insert into t1 values ('ꪪꪪꪪ');
ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1
drop table t1;
create table t1 (c char(3) character set utf8, unique (c(2))) engine=innodb;
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a');
insert into t1 values ('aa');
insert into t1 values ('aaa');
ERROR 23000: Duplicate entry 'aaa' for key 1
insert into t1 values ('b');
insert into t1 values ('bb');
insert into t1 values ('bbb');
ERROR 23000: Duplicate entry 'bbb' for key 1
insert into t1 values ('а');
insert into t1 values ('аа');
insert into t1 values ('ааа');
ERROR 23000: Duplicate entry 'ааа' for key 1
insert into t1 values ('б');
insert into t1 values ('бб');
insert into t1 values ('ббб');
ERROR 23000: Duplicate entry 'ббб' for key 1
insert into t1 values ('ꪪ');
insert into t1 values ('ꪪꪪ');
insert into t1 values ('ꪪꪪꪪ');
ERROR 23000: Duplicate entry 'ꪪꪪ' for key 1
drop table t1;
create table t1 (
c char(10) character set utf8,
unique key a using hash (c(1))
@ -611,6 +667,16 @@ str
drop table t1;
create table t1 (
str varchar(255) character set utf8 not null,
key str (str(2))
) engine=innodb;
INSERT INTO t1 VALUES ('str');
INSERT INTO t1 VALUES ('str2');
select * from t1 where str='str';
str
str
drop table t1;
create table t1 (
str varchar(255) character set utf8 not null,
key str using btree (str(2))
) engine=heap;
INSERT INTO t1 VALUES ('str');

View file

@ -217,6 +217,27 @@ select c ca10 from t1 where c='aaaaaaaaaa';
select c cb20 from t1 where c=repeat('b',20);
drop table t1;
#
# Bug 4521: unique key prefix interacts poorly with utf8
# InnoDB: keys with prefix compression, case insensitive collation.
#
create table t1 (c varchar(30) character set utf8, unique(c(10))) engine=innodb;
insert into t1 values ('1'),('2'),('3'),('x'),('y'),('z');
insert into t1 values ('aaaaaaaaaa');
--error 1062
insert into t1 values ('aaaaaaaaaaa');
--error 1062
insert into t1 values ('aaaaaaaaaaaa');
insert into t1 values (repeat('b',20));
select c c1 from t1 where c='1';
select c c2 from t1 where c='2';
select c c3 from t1 where c='3';
select c cx from t1 where c='x';
select c cy from t1 where c='y';
select c cz from t1 where c='z';
select c ca10 from t1 where c='aaaaaaaaaa';
select c cb20 from t1 where c=repeat('b',20);
drop table t1;
#
# Bug 4521: unique key prefix interacts poorly with utf8
# MYISAM: fixed length keys, case insensitive collation
@ -244,7 +265,33 @@ insert into t1 values ('ꪪꪪ');
--error 1062
insert into t1 values ('ꪪꪪꪪ');
drop table t1;
#
# Bug 4521: unique key prefix interacts poorly with utf8
# InnoDB: fixed length keys, case insensitive collation
#
create table t1 (c char(3) character set utf8, unique (c(2))) engine=innodb;
insert into t1 values ('1'),('2'),('3'),('4'),('x'),('y'),('z');
insert into t1 values ('a');
insert into t1 values ('aa');
--error 1062
insert into t1 values ('aaa');
insert into t1 values ('b');
insert into t1 values ('bb');
--error 1062
insert into t1 values ('bbb');
insert into t1 values ('а');
insert into t1 values ('аа');
--error 1062
insert into t1 values ('ааа');
insert into t1 values ('б');
insert into t1 values ('бб');
--error 1062
insert into t1 values ('ббб');
insert into t1 values ('ꪪ');
insert into t1 values ('ꪪꪪ');
--error 1062
insert into t1 values ('ꪪꪪꪪ');
drop table t1;
#
# Bug 4531: unique key prefix interacts poorly with utf8
# Check HEAP+HASH, case insensitive collation
@ -454,6 +501,18 @@ INSERT INTO t1 VALUES ('str2');
select * from t1 where str='str';
drop table t1;
# Bug#4594: column index make = failed for gbk, but like works
# Check InnoDB
#
create table t1 (
str varchar(255) character set utf8 not null,
key str (str(2))
) engine=innodb;
INSERT INTO t1 VALUES ('str');
INSERT INTO t1 VALUES ('str2');
select * from t1 where str='str';
drop table t1;
# the same for HEAP+BTREE
#

View file

@ -41,6 +41,7 @@ have disables the InnoDB inlining in this file. */
#include <hash.h>
#include <myisampack.h>
#include <mysys_err.h>
#include <my_sys.h>
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))
@ -5268,4 +5269,61 @@ ulonglong ha_innobase::get_mysql_bin_log_pos()
return trx_sys_mysql_bin_log_pos;
}
extern "C" {
/***********************************************************************
This function finds charset information and returns the character
length for multibyte character set. */
ulint innobase_get_charset_mbmaxlen(
ulint charset_id) /* in: charset id */
{
CHARSET_INFO* charset; /* charset used in the field */
charset = get_charset(charset_id,MYF(MY_WME));
ut_ad(charset);
ut_ad(charset->mbmaxlen);
return charset->mbmaxlen;
}
}
extern "C" {
/***********************************************************************
This function finds charset information and returns position the nth
character for multibyte character set.*/
ulint innobase_get_at_most_n_mbchars(
ulint charset_id, /* in: character set id */
ulint nth, /* in: nth character */
ulint data_len, /* in: length of the sting in bytes */
const char *pos) /* in: character string */
{
ulint byte_length; /* storage length, in bytes. */
ulint char_length; /* character length in bytes */
CHARSET_INFO* charset; /* charset used in the field */
ut_ad(pos);
byte_length = data_len;
charset = get_charset(charset_id,MYF(MY_WME));
ut_ad(charset);
ut_ad(charset->mbmaxlen);
char_length= byte_length / charset->mbmaxlen;
nth = nth / charset->mbmaxlen;
if (byte_length > char_length)
{
char_length= my_charpos(charset, pos, pos + byte_length, nth);
set_if_smaller(char_length, byte_length);
}
else
char_length = nth;
return char_length;
}
}
#endif /* HAVE_INNOBASE_DB */

View file

@ -228,3 +228,4 @@ my_bool innobase_query_caching_of_table_permitted(THD* thd, char* full_name,
void innobase_release_temporary_latches(void* innobase_tid);
void innobase_store_binlog_offset_and_flush_log(char *binlog_name,longlong offset);