Bug#20471 LIKE search fails with indexed utf8 char column

The main problem was already fixed by Igor under terms of 16674. Adding some additional minor fixes and tests. include/m_ctype.h: Adding reference to CHARSET_INFO.txt mysql-test/r/ctype_utf8.result: Adding test case mysql-test/t/ctype_utf8.test: Adding test case strings/CHARSET_INFO.txt: Adding comment about max_sort_char strings/ctype-mb.c: Restiring that non-Unicode character sets use 0xFF as pad character for max_str. Only Unicode character sets use wc_mb. strings/ctype-utf8.c: Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF.
2025-01-27 01:04:19 +01:00 · 2006-07-20 15:52:48 +05:00 · 2006-07-20 15:52:48 +05:00 · d2f7fe3558
commit d2f7fe3558
parent b53e47a1ec
6 changed files with 177 additions and 7 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -108,6 +108,8 @@ enum my_lex_states
 struct charset_info_st;
 /* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct my_collation_handler_st
 {
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
 /* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct my_charset_handler_st
 {
  my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
 extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
 /* See strings/CHARSET_INFO.txt about information on this structure  */
 typedef struct charset_info_st
 {
  uint      number;
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@ -1124,6 +1124,81 @@ check table t1;
 Table	Op	Msg_type	Msg_text
 test.t1	check	status	OK
 drop table t1;
 set names utf8;
 create table t1 (s1 char(5) character set utf8);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
 before_delete_general_ci
 ペテルグル
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
 after_delete_general_ci
 ペテルグル
 drop table t1;
 set names utf8;
 create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
 before_delete_unicode_ci
 ペテルグル
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
 after_delete_unicode_ci
 ペテルグル
 drop table t1;
 set names utf8;
 create table t1 (s1 char(5) character set utf8 collate utf8_bin);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
 before_delete_bin
 ペテルグル
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
 after_delete_bin
 ペテルグル
 drop table t1;
 set names utf8;
 create table t1 (a varchar(30) not null primary key)
 engine=innodb  default character set utf8 collate utf8_general_ci;
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 gci1
 さしすせそかきくけこあいうえお
 select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 gci2
 あいうえおかきくけこさしすせそ
 drop table t1;
 set names utf8;
 create table t1 (a varchar(30) not null primary key)
 engine=innodb default character set utf8 collate utf8_unicode_ci;
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 uci1
 さしすせそかきくけこあいうえお
 select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 uci2
 あいうえおかきくけこさしすせそ
 drop table t1;
 set names utf8;
 create table t1 (a varchar(30) not null primary key)
 engine=innodb default character set utf8 collate utf8_bin;
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 bin1
 さしすせそかきくけこあいうえお
 select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 bin2
 あいうえおかきくけこさしすせそ
 drop table t1;
 SET NAMES utf8;
 CREATE TABLE t1 (id int PRIMARY KEY,
 a varchar(16) collate utf8_unicode_ci NOT NULL default '',
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
 check table t1;
 drop table t1;
 #
 # Bug#20471 LIKE search fails with indexed utf8 char column
 #
 set names utf8;
 create table t1 (s1 char(5) character set utf8);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
 drop table t1;
 set names utf8;
 create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
 drop table t1;
 set names utf8;
 create table t1 (s1 char(5) character set utf8 collate utf8_bin);
 insert into t1 values
 ('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
 create index it1 on t1 (s1);
 select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
 delete from t1 where s1 = 'Y';
 select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
 drop table t1;
 # additional tests from duplicate bug#20744 MySQL return no result
 set names utf8;
 --disable_warnings
 create table t1 (a varchar(30) not null primary key)
 engine=innodb  default character set utf8 collate utf8_general_ci;
 --enable_warnings
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 drop table t1;
 set names utf8;
 --disable_warnings
 create table t1 (a varchar(30) not null primary key)
 engine=innodb default character set utf8 collate utf8_unicode_ci;
 --enable_warnings
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 drop table t1;
 set names utf8;
 --disable_warnings
 create table t1 (a varchar(30) not null primary key)
 engine=innodb default character set utf8 collate utf8_bin;
 --enable_warnings
 insert into t1 values ('あいうえおかきくけこさしすせそ');
 insert into t1 values ('さしすせそかきくけこあいうえお');
 select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
 select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
 drop table t1;
 #
 # Bug#14896: Comparison with a key in a partial index over mb chararacter field
 #
--- a/strings/CHARSET_INFO.txt
+++ b/strings/CHARSET_INFO.txt
@ -33,7 +33,7 @@ typedef struct charset_info_st
  uint      strxfrm_multiply;
  uint      mbminlen;
  uint      mbmaxlen;
-  char      max_sort_char; /* For LIKE optimization */
+  uint16    max_sort_char; /* For LIKE optimization */
  MY_CHARSET_HANDLER *cset;
  MY_COLLATION_HANDLER *coll;
@ -134,7 +134,15 @@ Misc fields
  mbmaxlen         - maximum multibyte sequence length.
                     1 for 8bit charsets. Can be also 2 or 3.
-
+  max_sort_char    - for LIKE range
                     in case of 8bit character sets - native code
 		     of maximum character (max_str pad byte);
                     in case of UTF8 and UCS2 - Unicode code of the maximum
 		     possible character (usually U+FFFF). This code is
 		     converted to multibyte representation (usually 0xEFBFBF)
 		     and then used as a pad sequence for max_str.
 		     in case of other multibyte character sets -
 		     max_str pad byte (usually 0xFF).
 MY_CHARSET_HANDLER
 ==================
--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
 /* 
-  Write max key: create a buffer with multibyte
+  Write max key:
 - for non-Unicode character sets:
  just set to 255.
 - for Unicode character set (utf-8):
  create a buffer with multibyte
  representation of the max_sort_char character,
  and copy it into max_str in a loop. 
 */
 static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
 {
  char buf[10];
-  char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
+  char buflen;
-                               (uchar*) buf + sizeof(buf));
+  
  if (!(cs->state & MY_CS_UNICODE))
  {
    bfill(str, end - str, 255);
    return;
  }
  buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
                          (uchar*) buf + sizeof(buf));
  DBUG_ASSERT(buflen > 0);
  do
  {
@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
    my_strnncoll_mb_bin,
    my_strnncollsp_mb_bin,
    my_strnxfrm_mb_bin,
-    my_like_range_simple,
+    my_like_range_mb,
    my_wildcmp_mb_bin,
    my_strcasecmp_mb_bin,
    my_instr_mb,
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin=
    1,                  /* mbminlen     */
    3,                  /* mbmaxlen     */
    0,                  /* min_sort_char */
-    255,                /* max_sort_char */
+    0xFFFF,             /* max_sort_char */
    0,                  /* escape_with_backslash_is_dangerous */
    &my_charset_utf8_handler,
    &my_collation_mb_bin_handler