mirror of
https://github.com/MariaDB/server.git
synced 2025-01-17 20:42:30 +01:00
Bug #6737: REGEXP gives wrong result with case sensitive collation:
- A new flag MY_CS_CSSORT was introduced for case sensitivity. - Item_func_regexp doesn't substiture ICASE not only for binary collations but for case sensitive collations as well.
This commit is contained in:
parent
7f5661ae44
commit
e3b94d4ef5
7 changed files with 51 additions and 22 deletions
|
@ -63,7 +63,7 @@ typedef struct unicase_info_st
|
|||
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
|
||||
#define MY_CS_READY 256 /* if a charset is initialized */
|
||||
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
|
||||
|
||||
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
|
||||
#define MY_CHARSET_UNDEFINED 0
|
||||
|
||||
|
||||
|
|
|
@ -296,3 +296,12 @@ FD C3BD FD 1
|
|||
FE C3BE FE 1
|
||||
FF C3BF FF 1
|
||||
DROP TABLE t1;
|
||||
select 'a' regexp 'A' collate latin1_general_ci;
|
||||
'a' regexp 'A' collate latin1_general_ci
|
||||
1
|
||||
select 'a' regexp 'A' collate latin1_general_cs;
|
||||
'a' regexp 'A' collate latin1_general_cs
|
||||
0
|
||||
select 'a' regexp 'A' collate latin1_bin;
|
||||
'a' regexp 'A' collate latin1_bin
|
||||
0
|
||||
|
|
|
@ -53,3 +53,10 @@ SELECT
|
|||
hex(@l:=convert(@u using latin1)),
|
||||
a=@l FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
#
|
||||
# Bug #6737: REGEXP gives wrong result with case sensitive collation
|
||||
#
|
||||
select 'a' regexp 'A' collate latin1_general_ci;
|
||||
select 'a' regexp 'A' collate latin1_general_cs;
|
||||
select 'a' regexp 'A' collate latin1_bin;
|
||||
|
|
|
@ -228,6 +228,7 @@ static int add_collation(CHARSET_INFO *cs)
|
|||
}
|
||||
else
|
||||
{
|
||||
uchar *sort_order= all_charsets[cs->number]->sort_order;
|
||||
simple_cs_init_functions(all_charsets[cs->number]);
|
||||
new->mbminlen= 1;
|
||||
new->mbmaxlen= 1;
|
||||
|
@ -236,6 +237,16 @@ static int add_collation(CHARSET_INFO *cs)
|
|||
all_charsets[cs->number]->state |= MY_CS_LOADED;
|
||||
}
|
||||
all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
|
||||
|
||||
/*
|
||||
Check if case sensitive sort order: A < a < B.
|
||||
We need MY_CS_FLAG for regex library, and for
|
||||
case sensitivity flag for 5.0 client protocol,
|
||||
to support isCaseSensitive() method in JDBC driver
|
||||
*/
|
||||
if (sort_order && sort_order['A'] < sort_order['a'] &&
|
||||
sort_order['a'] < sort_order['B'])
|
||||
all_charsets[cs->number]->state|= MY_CS_CSSORT;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
|
|
@ -2364,11 +2364,12 @@ Item_func_regex::fix_fields(THD *thd, TABLE_LIST *tables, Item **ref)
|
|||
return 0;
|
||||
}
|
||||
int error;
|
||||
if ((error=regcomp(&preg,res->c_ptr(),
|
||||
(cmp_collation.collation->state & MY_CS_BINSORT) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
cmp_collation.collation)))
|
||||
if ((error= regcomp(&preg,res->c_ptr(),
|
||||
((cmp_collation.collation->state & MY_CS_BINSORT) ||
|
||||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
cmp_collation.collation)))
|
||||
{
|
||||
(void) regerror(error,&preg,buff,sizeof(buff));
|
||||
my_printf_error(ER_REGEXP_ERROR,ER(ER_REGEXP_ERROR),MYF(0),buff);
|
||||
|
@ -2416,10 +2417,11 @@ longlong Item_func_regex::val_int()
|
|||
regex_compiled=0;
|
||||
}
|
||||
if (regcomp(&preg,res2->c_ptr(),
|
||||
(cmp_collation.collation->state & MY_CS_BINSORT) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
cmp_collation.collation))
|
||||
((cmp_collation.collation->state & MY_CS_BINSORT) ||
|
||||
(cmp_collation.collation->state & MY_CS_CSSORT)) ?
|
||||
REG_EXTENDED | REG_NOSUB :
|
||||
REG_EXTENDED | REG_NOSUB | REG_ICASE,
|
||||
cmp_collation.collation))
|
||||
{
|
||||
null_value=1;
|
||||
return 0;
|
||||
|
|
|
@ -589,12 +589,12 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
|
|||
|
||||
CHARSET_INFO my_charset_latin2_czech_ci =
|
||||
{
|
||||
2,0,0, /* number */
|
||||
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
|
||||
"latin2", /* cs name */
|
||||
"latin2_czech_cs", /* name */
|
||||
"", /* comment */
|
||||
NULL, /* tailoring */
|
||||
2,0,0, /* number */
|
||||
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
|
||||
"latin2", /* cs name */
|
||||
"latin2_czech_cs", /* name */
|
||||
"", /* comment */
|
||||
NULL, /* tailoring */
|
||||
ctype_czech,
|
||||
to_lower_czech,
|
||||
to_upper_czech,
|
||||
|
|
|
@ -624,12 +624,12 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
|
|||
|
||||
CHARSET_INFO my_charset_cp1250_czech_ci =
|
||||
{
|
||||
34,0,0, /* number */
|
||||
MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
|
||||
"cp1250", /* cs name */
|
||||
"cp1250_czech_cs", /* name */
|
||||
"", /* comment */
|
||||
NULL, /* tailoring */
|
||||
34,0,0, /* number */
|
||||
MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_CSSORT, /* state */
|
||||
"cp1250", /* cs name */
|
||||
"cp1250_czech_cs", /* name */
|
||||
"", /* comment */
|
||||
NULL, /* tailoring */
|
||||
ctype_win1250ch,
|
||||
to_lower_win1250ch,
|
||||
to_upper_win1250ch,
|
||||
|
|
Loading…
Reference in a new issue