MDEV-33621 Unify duplicate code in my_wildcmp_uca_impl() and my_wildcmp_unicode_impl()

This is a refactoring patch, it does not change the behaviour.
The MTR tests are being added only to cover the LIKE predicate better.
(these tests should have been added earlier under terms of MDEV 9711).
This patch does not need its own specific MTR tests.

Moving the duplicate code into a new shared file ctype-wildcmp.inl
and including it from multiple places, to define the following functions:

- my_wildcmp_uca_impl(), in ctype-uca.c

  For utf8mb3, utf8mb4, ucs2, utf16, utf32, using cs->cset->mb_wc().
  For UCA based collations.

- my_wildcmp_mb2_or_mb4_general_ci_impl(), in ctype-ucs2.c:

  For ucs2, utf16, utf32, using cs->cset->mb_wc().
  For general_ci-style collations:
      - xxx_general_ci
      - xxx_general_mysql500_ci
      - xxx_general_nopad_ci

- my_wildcmp_mb2_or_mb4_bin_impl(), in ctype-ucs2.c:

  For ucs2, utf16, utf32, using cs->cset->mb_wc().
  For _bin collations:
      - xxx_bin
      - xxx_nopad_bin

- my_wildcmp_utf8mb3_general_ci_impl(), in ctype-utf8.c

  Optimized for utf8mb3, using my_mb_wc_utf8mb3_quick().

  For general_ci-style collations:
      - utf8mb3_general_ci
      - utf8mb3_general_mysql500_ci
      - utf8mb3_general_nopad_ci

- my_wildcmp_utf8mb4_general_ci_impl(), in ctype-utf8.c

  Optimized for utf8mb4, using my_mb_wc_utf8mb4_quick().

  For general_ci-style collations:
      - utf8mb4_general_ci
      - utf8mb4_general_nopad_ci
This commit is contained in:
Alexander Barkov 2024-03-07 12:00:36 +04:00
parent 9e7afa7782
commit 1e889a6e6c
14 changed files with 1285 additions and 406 deletions

View file

@ -1709,12 +1709,6 @@ size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *,
size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t);
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many,
MY_CASEFOLD_INFO *weights);
extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader,
const char *buf, size_t buflen);
extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end,

View file

@ -5773,7 +5773,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3'
#
# MDEV-9711 NO PAD Collatons
#
SET character_set_connection=ucs2;
SET collation_connection=ucs2_general_nopad_ci;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -6051,6 +6051,127 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
ucs2_general_nopad_ci
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_general_nopad_ci DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
SET collation_connection=ucs2_nopad_bin;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -6328,6 +6449,124 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
ucs2_nopad_bin
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_nopad_bin DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
#
# MDEV-10585 EXECUTE IMMEDIATE statement
#

View file

@ -1049,14 +1049,17 @@ SELECT CAST(CONVERT('1IJ3' USING ucs2) AS SIGNED);
--echo #
--echo # MDEV-9711 NO PAD Collatons
--echo #
SET character_set_connection=ucs2;
SET collation_connection=ucs2_general_nopad_ci;
let $coll='ucs2_general_nopad_ci';
let $coll_pad='ucs2_general_ci';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
SET collation_connection=ucs2_nopad_bin;
let $coll='ucs2_nopad_bin';
let $coll_pad='ucs2_bin';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
--echo #
--echo # MDEV-10585 EXECUTE IMMEDIATE statement

View file

@ -2258,7 +2258,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3'
#
# MDEV-9711 NO PAD Collatons
#
SET character_set_connection=utf16;
SET collation_connection=utf16_general_nopad_ci;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2536,6 +2536,127 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf16_general_nopad_ci
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16 COLLATE utf16_general_nopad_ci DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
SET collation_connection=utf16_nopad_bin;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2813,6 +2934,124 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf16_nopad_bin
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16 COLLATE utf16_nopad_bin DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
#
# End of 10.2 tests
#

View file

@ -927,14 +927,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16) AS SIGNED);
--echo #
--echo # MDEV-9711 NO PAD Collatons
--echo #
SET character_set_connection=utf16;
SET collation_connection=utf16_general_nopad_ci;
let $coll='utf16_general_nopad_ci';
let $coll_pad='utf16_general_ci';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
SET collation_connection=utf16_nopad_bin;
let $coll='utf16_nopad_bin';
let $coll_pad='utf16_bin';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
--echo #
--echo # End of 10.2 tests

View file

@ -2444,7 +2444,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3'
#
# MDEV-9711 NO PAD Collatons
#
SET character_set_connection=utf16le;
SET collation_connection=utf16le_general_nopad_ci;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2722,6 +2722,127 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf16le_general_nopad_ci
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_general_nopad_ci DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
SET collation_connection=utf16le_nopad_bin;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2999,6 +3120,124 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf16le_nopad_bin
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_nopad_bin DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
#
# MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset
#

View file

@ -806,14 +806,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16le) AS SIGNED);
--echo #
--echo # MDEV-9711 NO PAD Collatons
--echo #
SET character_set_connection=utf16le;
SET collation_connection=utf16le_general_nopad_ci;
let $coll='utf16le_general_nopad_ci';
let $coll_pad='utf16le_general_ci';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
SET collation_connection=utf16le_nopad_bin;
let $coll='utf16le_nopad_bin';
let $coll_pad='utf16le_bin';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
--echo #
--echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset

View file

@ -2314,7 +2314,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3'
#
# MDEV-9711 NO PAD Collatons
#
SET character_set_connection=utf32;
SET collation_connection=utf32_general_nopad_ci;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2592,6 +2592,127 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf32_general_nopad_ci
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf32 COLLATE utf32_general_nopad_ci DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
abc
abcd
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
SET collation_connection=utf32_nopad_bin;
SET DEFAULT_STORAGE_ENGINE=MyISAM;
#
# Start of ctype_pad.inc
@ -2869,6 +2990,124 @@ DROP TABLE t1;
# End of ctype_pad.inc
#
SET DEFAULT_STORAGE_ENGINE=Default;
select @@collation_connection;
@@collation_connection
utf32_nopad_bin
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` varchar(10) CHARACTER SET utf32 COLLATE utf32_nopad_bin DEFAULT NULL,
KEY `a` (`a`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
explain select * from t1 where a like concat('abc','%');
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index
select * from t1 where a like "abc%";
a
abc
abcd
select * from t1 where a like concat("abc","%");
a
abc
abcd
select * from t1 where a like "ABC%";
a
select * from t1 where a like "test%";
a
test
select * from t1 where a like "te_t";
a
test
select * from t1 where a like "%a%";
a
a
abc
abcd
select * from t1 where a like "%abcd%";
a
abcd
select * from t1 where a like "%abc\d%";
a
abcd
drop table t1;
select 'AA' like 'AA';
'AA' like 'AA'
1
select 'AA' like 'A%A';
'AA' like 'A%A'
1
select 'AA' like 'A%%A';
'AA' like 'A%%A'
1
select 'AA' like 'AA%';
'AA' like 'AA%'
1
select 'AA' like '%AA%';
'AA' like '%AA%'
1
select 'AA' like '%A';
'AA' like '%A'
1
select 'AA' like '%AA';
'AA' like '%AA'
1
select 'AA' like 'A%A%';
'AA' like 'A%A%'
1
select 'AA' like '_%_%';
'AA' like '_%_%'
1
select 'AA' like '%A%A';
'AA' like '%A%A'
1
select 'AAA'like 'A%A%A';
'AAA'like 'A%A%A'
1
select 'AZ' like 'AZ';
'AZ' like 'AZ'
1
select 'AZ' like 'A%Z';
'AZ' like 'A%Z'
1
select 'AZ' like 'A%%Z';
'AZ' like 'A%%Z'
1
select 'AZ' like 'AZ%';
'AZ' like 'AZ%'
1
select 'AZ' like '%AZ%';
'AZ' like '%AZ%'
1
select 'AZ' like '%Z';
'AZ' like '%Z'
1
select 'AZ' like '%AZ';
'AZ' like '%AZ'
1
select 'AZ' like 'A%Z%';
'AZ' like 'A%Z%'
1
select 'AZ' like '_%_%';
'AZ' like '_%_%'
1
select 'AZ' like '%A%Z';
'AZ' like '%A%Z'
1
select 'AZ' like 'A_';
'AZ' like 'A_'
1
select 'AZ' like '_Z';
'AZ' like '_Z'
1
select 'AMZ'like 'A%M%Z';
'AMZ'like 'A%M%Z'
1
#
# MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed
# 10.2 tests

View file

@ -1040,15 +1040,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf32) AS SIGNED);
--echo #
--echo # MDEV-9711 NO PAD Collatons
--echo #
SET character_set_connection=utf32;
SET collation_connection=utf32_general_nopad_ci;
let $coll='utf32_general_nopad_ci';
let $coll_pad='utf32_general_ci';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
SET collation_connection=utf32_nopad_bin;
let $coll='utf32_nopad_bin';
let $coll_pad='utf32_bin';
--source include/ctype_pad_all_engines.inc
--source include/ctype_like.inc
--echo #
--echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed

View file

@ -32211,145 +32211,16 @@ static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
return 0;
}
/*
** Compare string against string with wildcard
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
my_wildcmp_uca_impl()
A generic function for all Unicode character sets.
For UCA collations.
*/
static
int my_wildcmp_uca_impl(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many, int recurse_level)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1;
while (wildstr != wildend)
{
while (1)
{
my_bool escaped= 0;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
if (w_wc == (my_wc_t) escape && wildstr < wildend)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
escaped= 1;
}
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
str+= scan;
if (!escaped && w_wc == (my_wc_t) w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (my_uca_charcmp(cs,s_wc,w_wc))
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t) w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t) w_one)
{
wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t) escape)
{
if (wildstr < wildend)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
}
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
if (!my_uca_charcmp(cs,s_wc,w_wc))
break;
str+= scan;
}
if (str == str_end)
return -1;
str+= scan;
result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many,
recurse_level + 1);
if (result <= 0)
return result;
}
}
}
return (str != str_end ? 1 : 0);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _uca_impl
#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e)
#define MY_CHAR_EQ(cs, wc1, wc2) (my_uca_charcmp(cs, wc1, wc2)==0)
#include "ctype-wildcmp.inl"
int my_wildcmp_uca(CHARSET_INFO *cs,

View file

@ -796,6 +796,65 @@ cnv:
return (int) (dst -db);
}
static inline my_bool
my_char_eq_mb2_or_mb4_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0);
return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2);
}
static inline my_bool
my_char_eq_mb2_or_mb4_bin(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
DBUG_ASSERT((cs->state & MY_CS_BINSORT) != 0);
return wc1 == wc2;
}
/*
my_wildcmp_mb2_or_mb4_general_ci_impl()
A generic function for ucs2, utf16, utf32, for general_ci-style collations.
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_general_ci_impl
#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e)
#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_general_ci(cs, wc1, wc2)
#include "ctype-wildcmp.inl"
static int
my_wildcmp_mb2_or_mb4_general_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_mb2_or_mb4_general_ci_impl(cs, str, str_end,
wildstr, wildend,
escape, w_one, w_many, 1);
}
/*
my_wildcmp_mb2_or_mb4_bin_impl()
A generic function for ucs2, utf16, utf32, for _bin collations.
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_bin_impl
#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e)
#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_bin(cs, wc1, wc2)
#include "ctype-wildcmp.inl"
static int
my_wildcmp_mb2_or_mb4_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_mb2_or_mb4_bin_impl(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, 1);
}
#endif /* HAVE_CHARSET_mb2_or_mb4 */
@ -1407,29 +1466,6 @@ my_charpos_utf16(CHARSET_INFO *cs,
}
static int
my_wildcmp_utf16_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_CASEFOLD_INFO *uni_plane= cs->casefold;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
static int
my_wildcmp_utf16_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, NULL);
}
static void
my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *pos, size_t len,
@ -1465,7 +1501,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
my_strnxfrm_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16,
@ -1486,7 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_bin,
@ -1507,7 +1543,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
my_strnxfrm_nopad_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad,
@ -1528,7 +1564,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad_bin,
@ -1816,7 +1852,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
my_strnxfrm_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16,
@ -1837,7 +1873,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_bin,
@ -1858,7 +1894,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
my_strnxfrm_nopad_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf16_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad,
@ -1879,7 +1915,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf16_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf16_nopad_bin,
@ -2554,29 +2590,6 @@ void my_fill_utf32(CHARSET_INFO *cs,
}
static int
my_wildcmp_utf32_ci(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many)
{
MY_CASEFOLD_INFO *uni_plane= cs->casefold;
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, uni_plane);
}
static int
my_wildcmp_utf32_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many, NULL);
}
static size_t
my_scan_utf32(CHARSET_INFO *cs,
const char *str, const char *end, int sequence_type)
@ -2613,7 +2626,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
my_strnxfrm_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32,
@ -2634,7 +2647,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf32_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32,
@ -2655,7 +2668,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
my_strnxfrm_nopad_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_utf32_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32_nopad,
@ -2676,7 +2689,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
my_wildcmp_utf32_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_utf32_nopad,
@ -3148,29 +3161,6 @@ my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_CASEFOLD_INFO *uni_plane= cs->casefold;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
}
static
int my_wildcmp_ucs2_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,NULL);
}
static void
my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,
@ -3205,7 +3195,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
my_strnxfrm_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2,
@ -3226,7 +3216,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_mysql500_ci_handler =
my_strnxfrm_ucs2_general_mysql500_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2,
@ -3247,7 +3237,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
my_strnxfrm_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_bin,
@ -3268,7 +3258,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
my_strnxfrm_nopad_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_wildcmp_mb2_or_mb4_general_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_nopad,
@ -3289,7 +3279,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
my_strnxfrm_nopad_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_bin,
my_wildcmp_mb2_or_mb4_bin,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2_nopad_bin,

View file

@ -132,6 +132,24 @@ my_toupper_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc)
}
/*
Compare two characters for equality, according to the collation.
For simple Unicode AI CI collations, e.g. utf8mb4_general_ci.
@return TRUE if the two characters are equal
@return FALSE otherwise
*/
static inline my_bool
my_casefold_char_eq_general_ci(MY_CASEFOLD_INFO *casefold,
my_wc_t wc1, my_wc_t wc2)
{
DBUG_ASSERT(casefold->simple_weight);
my_tosort_unicode(casefold, &wc1);
my_tosort_unicode(casefold, &wc2);
return wc1 == wc2;
}
extern MY_CASEFOLD_INFO my_casefold_default;
extern MY_CASEFOLD_INFO my_casefold_turkish;
extern MY_CASEFOLD_INFO my_casefold_mysql500;

View file

@ -133,170 +133,11 @@ my_casefold_multiply_utf8mbx(CHARSET_INFO *cs)
}
/*
** Compare string against string with wildcard
** This function is used in UTF8 and UCS2
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static
int my_wildcmp_unicode_impl(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_CASEFOLD_INFO *weights, int recurse_level)
static inline my_bool
my_char_eq_utf8mbx_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan;
my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc;
if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1;
while (wildstr != wildend)
{
while (1)
{
my_bool escaped= 0;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
if (w_wc == (my_wc_t) escape && wildstr < wildend)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
escaped= 1;
}
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
str+= scan;
if (!escaped && w_wc == (my_wc_t) w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (weights)
{
my_tosort_unicode(weights, &s_wc);
my_tosort_unicode(weights, &w_wc);
}
if (s_wc != w_wc)
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t) w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t) w_one)
{
wildstr+= scan;
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t) escape)
{
if (wildstr < wildend)
{
if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr,
(const uchar*)wildend)) <= 0)
return 1;
wildstr+= scan;
}
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
if ((scan= mb_wc(cs, &s_wc, (const uchar*)str,
(const uchar*)str_end)) <= 0)
return 1;
if (weights)
{
my_tosort_unicode(weights, &s_wc);
my_tosort_unicode(weights, &w_wc);
}
if (s_wc == w_wc)
break;
str+= scan;
}
if (str == str_end)
return -1;
str+= scan;
result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend,
escape, w_one, w_many,
weights, recurse_level + 1);
if (result <= 0)
return result;
}
}
}
return (str != str_end ? 1 : 0);
}
int
my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
MY_CASEFOLD_INFO *weights)
{
return my_wildcmp_unicode_impl(cs, str, str_end,
wildstr, wildend,
escape, w_one, w_many, weights, 1);
DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0);
return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2);
}
@ -774,15 +615,25 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t)
}
/*
my_wildcmp_utf8mb3_general_ci_impl()
An optimized functions for utf8mb3.
For general_ci-style collations.
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_ci_impl
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2)
#include "ctype-wildcmp.inl"
static
int my_wildcmp_utf8mb3(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
MY_CASEFOLD_INFO *uni_plane= cs->casefold;
return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend,
escape,w_one,w_many,uni_plane);
return my_wildcmp_utf8mb3_general_ci_impl(cs,str,str_end,wildstr,wildend,
escape, w_one, w_many, 1);
}
@ -3117,14 +2968,25 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t)
}
/*
my_wildcmp_utf8mb4_general_ci_impl()
An optimized function for utf8mb4.
For general_ci-style collations.
*/
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci_impl
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb4_quick(pwc, s, e)
#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2)
#include "ctype-wildcmp.inl"
static int
my_wildcmp_utf8mb4(CHARSET_INFO *cs,
const char *str, const char *strend,
const char *wildstr, const char *wildend,
int escape, int w_one, int w_many)
{
return my_wildcmp_unicode(cs, str, strend, wildstr, wildend,
escape, w_one, w_many, cs->casefold);
return my_wildcmp_utf8mb4_general_ci_impl(cs, str, strend, wildstr, wildend,
escape, w_one, w_many, 1);
}

177
strings/ctype-wildcmp.inl Normal file
View file

@ -0,0 +1,177 @@
/*
Copyright (c) 2024, MariaDB
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
*/
#ifndef MY_FUNCTION_NAME
#error MY_FUNCTION_NAME is not defined
#endif
#ifndef MY_MB_WC
#error MY_MB_WC is not defined
#endif
#ifndef MY_CHAR_EQ
#error MY_CHAR_EQ is not defined
#endif
/*
** Compare string against string with wildcard
**
** 0 if matched
** -1 if not matched with wildcard
** 1 if matched with wildcard
*/
static int
MY_FUNCTION_NAME(wildcmp)(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many,
int recurse_level)
{
int result= -1; /* Not found, using wildcards */
my_wc_t s_wc, w_wc;
int scan;
if (my_string_stack_guard && my_string_stack_guard(recurse_level))
return 1;
while (wildstr != wildend)
{
while (1)
{
my_bool escaped= 0;
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
result= 1; /* Found an anchor char */
break;
}
wildstr+= scan;
if (w_wc == (my_wc_t) escape && wildstr < wildend)
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
escaped= 1;
}
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
str+= scan;
if (!escaped && w_wc == (my_wc_t) w_one)
{
result= 1; /* Found an anchor char */
}
else
{
if (!MY_CHAR_EQ(cs, s_wc, w_wc))
return 1; /* No match */
}
if (wildstr == wildend)
return (str != str_end); /* Match if both are at end */
}
if (w_wc == (my_wc_t) w_many)
{ /* Found w_many */
/* Remove any '%' and '_' from the wild search string */
for ( ; wildstr != wildend ; )
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
if (w_wc == (my_wc_t) w_many)
{
wildstr+= scan;
continue;
}
if (w_wc == (my_wc_t) w_one)
{
wildstr+= scan;
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
str+= scan;
continue;
}
break; /* Not a wild character */
}
if (wildstr == wildend)
return 0; /* Ok if w_many is last */
if (str == str_end)
return -1;
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
if (w_wc == (my_wc_t) escape)
{
if (wildstr < wildend)
{
if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr,
(const uchar*) wildend)) <= 0)
return 1;
wildstr+= scan;
}
}
while (1)
{
/* Skip until the first character from wildstr is found */
while (str != str_end)
{
if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str,
(const uchar*) str_end)) <= 0)
return 1;
if (MY_CHAR_EQ(cs, s_wc, w_wc))
break;
str+= scan;
}
if (str == str_end)
return -1;
str+= scan;
result= MY_FUNCTION_NAME(wildcmp)(cs,
str, str_end,
wildstr, wildend,
escape, w_one, w_many,
recurse_level + 1);
if (result <= 0)
return result;
}
}
}
return (str != str_end ? 1 : 0);
}
#undef MY_FUNCTION_NAME
#undef MY_MB_WC
#undef MY_CHAR_EQ