mirror of
https://github.com/MariaDB/server.git
synced 2025-01-27 01:04:19 +01:00
Bug#20471 LIKE search fails with indexed utf8 char column
The main problem was already fixed by Igor under terms of 16674. Adding some additional minor fixes and tests. include/m_ctype.h: Adding reference to CHARSET_INFO.txt mysql-test/r/ctype_utf8.result: Adding test case mysql-test/t/ctype_utf8.test: Adding test case strings/CHARSET_INFO.txt: Adding comment about max_sort_char strings/ctype-mb.c: Restiring that non-Unicode character sets use 0xFF as pad character for max_str. Only Unicode character sets use wc_mb. strings/ctype-utf8.c: Fixed that max_sort_char for UTF8 from U+00FF to U+FFFF.
This commit is contained in:
parent
b53e47a1ec
commit
d2f7fe3558
6 changed files with 177 additions and 7 deletions
|
@ -108,6 +108,8 @@ enum my_lex_states
|
||||||
|
|
||||||
struct charset_info_st;
|
struct charset_info_st;
|
||||||
|
|
||||||
|
|
||||||
|
/* See strings/CHARSET_INFO.txt about information on this structure */
|
||||||
typedef struct my_collation_handler_st
|
typedef struct my_collation_handler_st
|
||||||
{
|
{
|
||||||
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
|
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
|
||||||
|
@ -147,6 +149,7 @@ extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler;
|
||||||
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
|
extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler;
|
||||||
|
|
||||||
|
|
||||||
|
/* See strings/CHARSET_INFO.txt about information on this structure */
|
||||||
typedef struct my_charset_handler_st
|
typedef struct my_charset_handler_st
|
||||||
{
|
{
|
||||||
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
|
my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint));
|
||||||
|
@ -204,6 +207,7 @@ extern MY_CHARSET_HANDLER my_charset_8bit_handler;
|
||||||
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
|
extern MY_CHARSET_HANDLER my_charset_ucs2_handler;
|
||||||
|
|
||||||
|
|
||||||
|
/* See strings/CHARSET_INFO.txt about information on this structure */
|
||||||
typedef struct charset_info_st
|
typedef struct charset_info_st
|
||||||
{
|
{
|
||||||
uint number;
|
uint number;
|
||||||
|
|
|
@ -1124,6 +1124,81 @@ check table t1;
|
||||||
Table Op Msg_type Msg_text
|
Table Op Msg_type Msg_text
|
||||||
test.t1 check status OK
|
test.t1 check status OK
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
|
||||||
|
before_delete_general_ci
|
||||||
|
ペテルグル
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
|
||||||
|
after_delete_general_ci
|
||||||
|
ペテルグル
|
||||||
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
|
||||||
|
before_delete_unicode_ci
|
||||||
|
ペテルグル
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
|
||||||
|
after_delete_unicode_ci
|
||||||
|
ペテルグル
|
||||||
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
|
||||||
|
before_delete_bin
|
||||||
|
ペテルグル
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
|
||||||
|
after_delete_bin
|
||||||
|
ペテルグル
|
||||||
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_general_ci;
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
gci1
|
||||||
|
さしすせそかきくけこあいうえお
|
||||||
|
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
gci2
|
||||||
|
あいうえおかきくけこさしすせそ
|
||||||
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_unicode_ci;
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
uci1
|
||||||
|
さしすせそかきくけこあいうえお
|
||||||
|
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
uci2
|
||||||
|
あいうえおかきくけこさしすせそ
|
||||||
|
drop table t1;
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_bin;
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
bin1
|
||||||
|
さしすせそかきくけこあいうえお
|
||||||
|
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
bin2
|
||||||
|
あいうえおかきくけこさしすせそ
|
||||||
|
drop table t1;
|
||||||
SET NAMES utf8;
|
SET NAMES utf8;
|
||||||
CREATE TABLE t1 (id int PRIMARY KEY,
|
CREATE TABLE t1 (id int PRIMARY KEY,
|
||||||
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
|
a varchar(16) collate utf8_unicode_ci NOT NULL default '',
|
||||||
|
|
|
@ -926,6 +926,76 @@ INSERT INTO t1 VALUES('uUABCDEFGHIGKLMNOPRSTUVWXYZ̈bbbbbbbbbbbbbbbbbbbbbbbbbbbb
|
||||||
check table t1;
|
check table t1;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bug#20471 LIKE search fails with indexed utf8 char column
|
||||||
|
#
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_general_ci from t1 where s1 like 'ペテ%';
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_general_ci from t1 where s1 like 'ペテ%';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8 collate utf8_unicode_ci);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_unicode_ci from t1 where s1 like 'ペテ%';
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_unicode_ci from t1 where s1 like 'ペテ%';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
set names utf8;
|
||||||
|
create table t1 (s1 char(5) character set utf8 collate utf8_bin);
|
||||||
|
insert into t1 values
|
||||||
|
('a'),('b'),(null),('ペテルグル'),('ü'),('Y');
|
||||||
|
create index it1 on t1 (s1);
|
||||||
|
select s1 as before_delete_bin from t1 where s1 like 'ペテ%';
|
||||||
|
delete from t1 where s1 = 'Y';
|
||||||
|
select s1 as after_delete_bin from t1 where s1 like 'ペテ%';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
# additional tests from duplicate bug#20744 MySQL return no result
|
||||||
|
|
||||||
|
set names utf8;
|
||||||
|
--disable_warnings
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_general_ci;
|
||||||
|
--enable_warnings
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as gci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
select a as gci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
set names utf8;
|
||||||
|
--disable_warnings
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_unicode_ci;
|
||||||
|
--enable_warnings
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as uci1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
select a as uci2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
set names utf8;
|
||||||
|
--disable_warnings
|
||||||
|
create table t1 (a varchar(30) not null primary key)
|
||||||
|
engine=innodb default character set utf8 collate utf8_bin;
|
||||||
|
--enable_warnings
|
||||||
|
insert into t1 values ('あいうえおかきくけこさしすせそ');
|
||||||
|
insert into t1 values ('さしすせそかきくけこあいうえお');
|
||||||
|
select a as bin1 from t1 where a like 'さしすせそかきくけこあいうえお%';
|
||||||
|
select a as bin2 from t1 where a like 'あいうえおかきくけこさしすせそ';
|
||||||
|
drop table t1;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
|
# Bug#14896: Comparison with a key in a partial index over mb chararacter field
|
||||||
#
|
#
|
||||||
|
|
|
@ -33,7 +33,7 @@ typedef struct charset_info_st
|
||||||
uint strxfrm_multiply;
|
uint strxfrm_multiply;
|
||||||
uint mbminlen;
|
uint mbminlen;
|
||||||
uint mbmaxlen;
|
uint mbmaxlen;
|
||||||
char max_sort_char; /* For LIKE optimization */
|
uint16 max_sort_char; /* For LIKE optimization */
|
||||||
|
|
||||||
MY_CHARSET_HANDLER *cset;
|
MY_CHARSET_HANDLER *cset;
|
||||||
MY_COLLATION_HANDLER *coll;
|
MY_COLLATION_HANDLER *coll;
|
||||||
|
@ -134,7 +134,15 @@ Misc fields
|
||||||
mbmaxlen - maximum multibyte sequence length.
|
mbmaxlen - maximum multibyte sequence length.
|
||||||
1 for 8bit charsets. Can be also 2 or 3.
|
1 for 8bit charsets. Can be also 2 or 3.
|
||||||
|
|
||||||
|
max_sort_char - for LIKE range
|
||||||
|
in case of 8bit character sets - native code
|
||||||
|
of maximum character (max_str pad byte);
|
||||||
|
in case of UTF8 and UCS2 - Unicode code of the maximum
|
||||||
|
possible character (usually U+FFFF). This code is
|
||||||
|
converted to multibyte representation (usually 0xEFBFBF)
|
||||||
|
and then used as a pad sequence for max_str.
|
||||||
|
in case of other multibyte character sets -
|
||||||
|
max_str pad byte (usually 0xFF).
|
||||||
|
|
||||||
MY_CHARSET_HANDLER
|
MY_CHARSET_HANDLER
|
||||||
==================
|
==================
|
||||||
|
|
|
@ -449,15 +449,28 @@ static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Write max key: create a buffer with multibyte
|
Write max key:
|
||||||
|
- for non-Unicode character sets:
|
||||||
|
just set to 255.
|
||||||
|
- for Unicode character set (utf-8):
|
||||||
|
create a buffer with multibyte
|
||||||
representation of the max_sort_char character,
|
representation of the max_sort_char character,
|
||||||
and copy it into max_str in a loop.
|
and copy it into max_str in a loop.
|
||||||
*/
|
*/
|
||||||
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
|
static void pad_max_char(CHARSET_INFO *cs, char *str, char *end)
|
||||||
{
|
{
|
||||||
char buf[10];
|
char buf[10];
|
||||||
char buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
|
char buflen;
|
||||||
(uchar*) buf + sizeof(buf));
|
|
||||||
|
if (!(cs->state & MY_CS_UNICODE))
|
||||||
|
{
|
||||||
|
bfill(str, end - str, 255);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
buflen= cs->cset->wc_mb(cs, cs->max_sort_char, (uchar*) buf,
|
||||||
|
(uchar*) buf + sizeof(buf));
|
||||||
|
|
||||||
DBUG_ASSERT(buflen > 0);
|
DBUG_ASSERT(buflen > 0);
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -894,7 +907,7 @@ MY_COLLATION_HANDLER my_collation_mb_bin_handler =
|
||||||
my_strnncoll_mb_bin,
|
my_strnncoll_mb_bin,
|
||||||
my_strnncollsp_mb_bin,
|
my_strnncollsp_mb_bin,
|
||||||
my_strnxfrm_mb_bin,
|
my_strnxfrm_mb_bin,
|
||||||
my_like_range_simple,
|
my_like_range_mb,
|
||||||
my_wildcmp_mb_bin,
|
my_wildcmp_mb_bin,
|
||||||
my_strcasecmp_mb_bin,
|
my_strcasecmp_mb_bin,
|
||||||
my_instr_mb,
|
my_instr_mb,
|
||||||
|
|
|
@ -2373,7 +2373,7 @@ CHARSET_INFO my_charset_utf8_bin=
|
||||||
1, /* mbminlen */
|
1, /* mbminlen */
|
||||||
3, /* mbmaxlen */
|
3, /* mbmaxlen */
|
||||||
0, /* min_sort_char */
|
0, /* min_sort_char */
|
||||||
255, /* max_sort_char */
|
0xFFFF, /* max_sort_char */
|
||||||
0, /* escape_with_backslash_is_dangerous */
|
0, /* escape_with_backslash_is_dangerous */
|
||||||
&my_charset_utf8_handler,
|
&my_charset_utf8_handler,
|
||||||
&my_collation_mb_bin_handler
|
&my_collation_mb_bin_handler
|
||||||
|
|
Loading…
Add table
Reference in a new issue