From 1e889a6e6c544d4279ae781b7e33543ba2ab12e7 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Thu, 7 Mar 2024 12:00:36 +0400 Subject: [PATCH] MDEV-33621 Unify duplicate code in my_wildcmp_uca_impl() and my_wildcmp_unicode_impl() This is a refactoring patch, it does not change the behaviour. The MTR tests are being added only to cover the LIKE predicate better. (these tests should have been added earlier under terms of MDEV 9711). This patch does not need its own specific MTR tests. Moving the duplicate code into a new shared file ctype-wildcmp.inl and including it from multiple places, to define the following functions: - my_wildcmp_uca_impl(), in ctype-uca.c For utf8mb3, utf8mb4, ucs2, utf16, utf32, using cs->cset->mb_wc(). For UCA based collations. - my_wildcmp_mb2_or_mb4_general_ci_impl(), in ctype-ucs2.c: For ucs2, utf16, utf32, using cs->cset->mb_wc(). For general_ci-style collations: - xxx_general_ci - xxx_general_mysql500_ci - xxx_general_nopad_ci - my_wildcmp_mb2_or_mb4_bin_impl(), in ctype-ucs2.c: For ucs2, utf16, utf32, using cs->cset->mb_wc(). For _bin collations: - xxx_bin - xxx_nopad_bin - my_wildcmp_utf8mb3_general_ci_impl(), in ctype-utf8.c Optimized for utf8mb3, using my_mb_wc_utf8mb3_quick(). For general_ci-style collations: - utf8mb3_general_ci - utf8mb3_general_mysql500_ci - utf8mb3_general_nopad_ci - my_wildcmp_utf8mb4_general_ci_impl(), in ctype-utf8.c Optimized for utf8mb4, using my_mb_wc_utf8mb4_quick(). For general_ci-style collations: - utf8mb4_general_ci - utf8mb4_general_nopad_ci --- include/m_ctype.h | 6 - mysql-test/main/ctype_ucs.result | 241 ++++++++++++++++++++++++++- mysql-test/main/ctype_ucs.test | 5 +- mysql-test/main/ctype_utf16.result | 241 ++++++++++++++++++++++++++- mysql-test/main/ctype_utf16.test | 5 +- mysql-test/main/ctype_utf16le.result | 241 ++++++++++++++++++++++++++- mysql-test/main/ctype_utf16le.test | 5 +- mysql-test/main/ctype_utf32.result | 241 ++++++++++++++++++++++++++- mysql-test/main/ctype_utf32.test | 6 +- strings/ctype-uca.c | 145 +--------------- strings/ctype-ucs2.c | 162 +++++++++--------- strings/ctype-unidata.h | 18 ++ strings/ctype-utf8.c | 198 ++++------------------ strings/ctype-wildcmp.inl | 177 ++++++++++++++++++++ 14 files changed, 1285 insertions(+), 406 deletions(-) create mode 100644 strings/ctype-wildcmp.inl diff --git a/include/m_ctype.h b/include/m_ctype.h index ab2f84656ef..8159e9ce7ee 100644 --- a/include/m_ctype.h +++ b/include/m_ctype.h @@ -1709,12 +1709,6 @@ size_t my_strnxfrm_unicode_full_nopad_bin(CHARSET_INFO *, size_t my_strnxfrmlen_unicode_full_bin(CHARSET_INFO *, size_t); -int my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str, const char *str_end, - const char *wildstr, const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights); - extern my_bool my_parse_charset_xml(MY_CHARSET_LOADER *loader, const char *buf, size_t buflen); extern char *my_strchr(CHARSET_INFO *cs, const char *str, const char *end, diff --git a/mysql-test/main/ctype_ucs.result b/mysql-test/main/ctype_ucs.result index 99d8209d32e..a6a0e628e11 100644 --- a/mysql-test/main/ctype_ucs.result +++ b/mysql-test/main/ctype_ucs.result @@ -5773,7 +5773,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=ucs2; +SET collation_connection=ucs2_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -6051,6 +6051,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +ucs2_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=ucs2_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -6328,6 +6449,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +ucs2_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET ucs2 COLLATE ucs2_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 23 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-10585 EXECUTE IMMEDIATE statement # diff --git a/mysql-test/main/ctype_ucs.test b/mysql-test/main/ctype_ucs.test index 993f9be5c54..43bd50f639a 100644 --- a/mysql-test/main/ctype_ucs.test +++ b/mysql-test/main/ctype_ucs.test @@ -1049,14 +1049,17 @@ SELECT CAST(CONVERT('1IJ3' USING ucs2) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=ucs2; +SET collation_connection=ucs2_general_nopad_ci; let $coll='ucs2_general_nopad_ci'; let $coll_pad='ucs2_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=ucs2_nopad_bin; let $coll='ucs2_nopad_bin'; let $coll_pad='ucs2_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # MDEV-10585 EXECUTE IMMEDIATE statement diff --git a/mysql-test/main/ctype_utf16.result b/mysql-test/main/ctype_utf16.result index 89d5283b63c..834d05828ca 100644 --- a/mysql-test/main/ctype_utf16.result +++ b/mysql-test/main/ctype_utf16.result @@ -2258,7 +2258,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf16; +SET collation_connection=utf16_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2536,6 +2536,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16 COLLATE utf16_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf16_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2813,6 +2934,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16 COLLATE utf16_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # End of 10.2 tests # diff --git a/mysql-test/main/ctype_utf16.test b/mysql-test/main/ctype_utf16.test index 529b737fcfb..9f81f23a5b2 100644 --- a/mysql-test/main/ctype_utf16.test +++ b/mysql-test/main/ctype_utf16.test @@ -927,14 +927,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf16; +SET collation_connection=utf16_general_nopad_ci; let $coll='utf16_general_nopad_ci'; let $coll_pad='utf16_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf16_nopad_bin; let $coll='utf16_nopad_bin'; let $coll_pad='utf16_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # End of 10.2 tests diff --git a/mysql-test/main/ctype_utf16le.result b/mysql-test/main/ctype_utf16le.result index 9bc53a1041d..99e9a2ef4d3 100644 --- a/mysql-test/main/ctype_utf16le.result +++ b/mysql-test/main/ctype_utf16le.result @@ -2444,7 +2444,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf16le; +SET collation_connection=utf16le_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2722,6 +2722,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16le_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf16le_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2999,6 +3120,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf16le_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf16le COLLATE utf16le_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset # diff --git a/mysql-test/main/ctype_utf16le.test b/mysql-test/main/ctype_utf16le.test index 537a456f7db..15fbda6b2ee 100644 --- a/mysql-test/main/ctype_utf16le.test +++ b/mysql-test/main/ctype_utf16le.test @@ -806,14 +806,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf16le) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf16le; +SET collation_connection=utf16le_general_nopad_ci; let $coll='utf16le_general_nopad_ci'; let $coll_pad='utf16le_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf16le_nopad_bin; let $coll='utf16le_nopad_bin'; let $coll_pad='utf16le_bin'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc --echo # --echo # MDEV-23408 Wrong result upon query from I_S and further Assertion `!alias_arg || strlen(alias_arg->str) == alias_arg->length' failed with certain connection charset diff --git a/mysql-test/main/ctype_utf32.result b/mysql-test/main/ctype_utf32.result index 7806b90b081..47497cdd771 100644 --- a/mysql-test/main/ctype_utf32.result +++ b/mysql-test/main/ctype_utf32.result @@ -2314,7 +2314,7 @@ Warning 1292 Truncated incorrect INTEGER value: '1IJ3' # # MDEV-9711 NO PAD Collatons # -SET character_set_connection=utf32; +SET collation_connection=utf32_general_nopad_ci; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2592,6 +2592,127 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf32_general_nopad_ci +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf32 COLLATE utf32_general_nopad_ci DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +abc +abcd +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 +SET collation_connection=utf32_nopad_bin; SET DEFAULT_STORAGE_ENGINE=MyISAM; # # Start of ctype_pad.inc @@ -2869,6 +2990,124 @@ DROP TABLE t1; # End of ctype_pad.inc # SET DEFAULT_STORAGE_ENGINE=Default; +select @@collation_connection; +@@collation_connection +utf32_nopad_bin +create table t1 as select repeat(' ',10) as a union select null; +alter table t1 add key(a); +show create table t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` varchar(10) CHARACTER SET utf32 COLLATE utf32_nopad_bin DEFAULT NULL, + KEY `a` (`a`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test"); +explain select * from t1 where a like 'abc%'; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +explain select * from t1 where a like concat('abc','%'); +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 range a a 43 NULL 2 Using where; Using index +select * from t1 where a like "abc%"; +a +abc +abcd +select * from t1 where a like concat("abc","%"); +a +abc +abcd +select * from t1 where a like "ABC%"; +a +select * from t1 where a like "test%"; +a +test +select * from t1 where a like "te_t"; +a +test +select * from t1 where a like "%a%"; +a +a +abc +abcd +select * from t1 where a like "%abcd%"; +a +abcd +select * from t1 where a like "%abc\d%"; +a +abcd +drop table t1; +select 'AA' like 'AA'; +'AA' like 'AA' +1 +select 'AA' like 'A%A'; +'AA' like 'A%A' +1 +select 'AA' like 'A%%A'; +'AA' like 'A%%A' +1 +select 'AA' like 'AA%'; +'AA' like 'AA%' +1 +select 'AA' like '%AA%'; +'AA' like '%AA%' +1 +select 'AA' like '%A'; +'AA' like '%A' +1 +select 'AA' like '%AA'; +'AA' like '%AA' +1 +select 'AA' like 'A%A%'; +'AA' like 'A%A%' +1 +select 'AA' like '_%_%'; +'AA' like '_%_%' +1 +select 'AA' like '%A%A'; +'AA' like '%A%A' +1 +select 'AAA'like 'A%A%A'; +'AAA'like 'A%A%A' +1 +select 'AZ' like 'AZ'; +'AZ' like 'AZ' +1 +select 'AZ' like 'A%Z'; +'AZ' like 'A%Z' +1 +select 'AZ' like 'A%%Z'; +'AZ' like 'A%%Z' +1 +select 'AZ' like 'AZ%'; +'AZ' like 'AZ%' +1 +select 'AZ' like '%AZ%'; +'AZ' like '%AZ%' +1 +select 'AZ' like '%Z'; +'AZ' like '%Z' +1 +select 'AZ' like '%AZ'; +'AZ' like '%AZ' +1 +select 'AZ' like 'A%Z%'; +'AZ' like 'A%Z%' +1 +select 'AZ' like '_%_%'; +'AZ' like '_%_%' +1 +select 'AZ' like '%A%Z'; +'AZ' like '%A%Z' +1 +select 'AZ' like 'A_'; +'AZ' like 'A_' +1 +select 'AZ' like '_Z'; +'AZ' like '_Z' +1 +select 'AMZ'like 'A%M%Z'; +'AMZ'like 'A%M%Z' +1 # # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed # 10.2 tests diff --git a/mysql-test/main/ctype_utf32.test b/mysql-test/main/ctype_utf32.test index bcbc3b14691..52071325199 100644 --- a/mysql-test/main/ctype_utf32.test +++ b/mysql-test/main/ctype_utf32.test @@ -1040,15 +1040,17 @@ SELECT CAST(CONVERT('1IJ3' USING utf32) AS SIGNED); --echo # --echo # MDEV-9711 NO PAD Collatons --echo # -SET character_set_connection=utf32; +SET collation_connection=utf32_general_nopad_ci; let $coll='utf32_general_nopad_ci'; let $coll_pad='utf32_general_ci'; --source include/ctype_pad_all_engines.inc +--source include/ctype_like.inc +SET collation_connection=utf32_nopad_bin; let $coll='utf32_nopad_bin'; let $coll_pad='utf32_bin'; --source include/ctype_pad_all_engines.inc - +--source include/ctype_like.inc --echo # --echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index aa4d366208f..e3ea43d5890 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -32211,145 +32211,16 @@ static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) return 0; } + /* -** Compare string against string with wildcard -** 0 if matched -** -1 if not matched with wildcard -** 1 if matched with wildcard + my_wildcmp_uca_impl() + A generic function for all Unicode character sets. + For UCA collations. */ - -static -int my_wildcmp_uca_impl(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, int recurse_level) -{ - int result= -1; /* Not found, using wildcards */ - my_wc_t s_wc, w_wc; - int scan; - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (1) - { - my_bool escaped= 0; - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - result= 1; /* Found an anchor char */ - break; - } - - wildstr+= scan; - if (w_wc == (my_wc_t) escape && wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - escaped= 1; - } - - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - - if (!escaped && w_wc == (my_wc_t) w_one) - { - result= 1; /* Found an anchor char */ - } - else - { - if (my_uca_charcmp(cs,s_wc,w_wc)) - return 1; /* No match */ - } - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - } - - if (w_wc == (my_wc_t) w_many) - { /* Found w_many */ - /* Remove any '%' and '_' from the wild search string */ - for ( ; wildstr != wildend ; ) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t) w_one) - { - wildstr+= scan; - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - continue; - } - break; /* Not a wild character */ - } - - if (wildstr == wildend) - return 0; /* Ok if w_many is last */ - - if (str == str_end) - return -1; - - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - - if (w_wc == (my_wc_t) escape) - { - if (wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - } - } - - while (1) - { - /* Skip until the first character from wildstr is found */ - while (str != str_end) - { - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - - if (!my_uca_charcmp(cs,s_wc,w_wc)) - break; - str+= scan; - } - if (str == str_end) - return -1; - - str+= scan; - result= my_wildcmp_uca_impl(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, - recurse_level + 1); - if (result <= 0) - return result; - } - } - } - return (str != str_end ? 1 : 0); -} +#define MY_FUNCTION_NAME(x) my_ ## x ## _uca_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) (my_uca_charcmp(cs, wc1, wc2)==0) +#include "ctype-wildcmp.inl" int my_wildcmp_uca(CHARSET_INFO *cs, diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index c8da32a87af..c4c4444188b 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -796,6 +796,65 @@ cnv: return (int) (dst -db); } + +static inline my_bool +my_char_eq_mb2_or_mb4_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0); + return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2); +} + + +static inline my_bool +my_char_eq_mb2_or_mb4_bin(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT((cs->state & MY_CS_BINSORT) != 0); + return wc1 == wc2; +} + + +/* + my_wildcmp_mb2_or_mb4_general_ci_impl() + A generic function for ucs2, utf16, utf32, for general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + +static int +my_wildcmp_mb2_or_mb4_general_ci(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_mb2_or_mb4_general_ci_impl(cs, str, str_end, + wildstr, wildend, + escape, w_one, w_many, 1); +} + + +/* + my_wildcmp_mb2_or_mb4_bin_impl() + A generic function for ucs2, utf16, utf32, for _bin collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _mb2_or_mb4_bin_impl +#define MY_MB_WC(cs, pwc, s, e) ((cs)->cset->mb_wc)(cs, pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_mb2_or_mb4_bin(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + +static int +my_wildcmp_mb2_or_mb4_bin(CHARSET_INFO *cs, + const char *str,const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many) +{ + return my_wildcmp_mb2_or_mb4_bin_impl(cs, str, str_end, wildstr, wildend, + escape, w_one, w_many, 1); +} + #endif /* HAVE_CHARSET_mb2_or_mb4 */ @@ -1407,29 +1466,6 @@ my_charpos_utf16(CHARSET_INFO *cs, } -static int -my_wildcmp_utf16_ci(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, uni_plane); -} - - -static int -my_wildcmp_utf16_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, NULL); -} - - static void my_hash_sort_utf16_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *pos, size_t len, @@ -1465,7 +1501,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler = my_strnxfrm_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, @@ -1486,7 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, @@ -1507,7 +1543,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler = my_strnxfrm_nopad_utf16_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, @@ -1528,7 +1564,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, @@ -1816,7 +1852,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler = my_strnxfrm_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16, @@ -1837,7 +1873,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_bin, @@ -1858,7 +1894,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler = my_strnxfrm_nopad_utf16le_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf16_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad, @@ -1879,7 +1915,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf16_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf16_nopad_bin, @@ -2554,29 +2590,6 @@ void my_fill_utf32(CHARSET_INFO *cs, } -static int -my_wildcmp_utf32_ci(CHARSET_INFO *cs, - const char *str, const char *str_end, - const char *wildstr, const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, uni_plane); -} - - -static int -my_wildcmp_utf32_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, NULL); -} - - static size_t my_scan_utf32(CHARSET_INFO *cs, const char *str, const char *end, int sequence_type) @@ -2613,7 +2626,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler = my_strnxfrm_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf32_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, @@ -2634,7 +2647,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler = my_strnxfrm_unicode_full_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf32_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32, @@ -2655,7 +2668,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler = my_strnxfrm_nopad_utf32_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_utf32_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, @@ -2676,7 +2689,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler = my_strnxfrm_unicode_full_nopad_bin, my_strnxfrmlen_unicode_full_bin, my_like_range_generic, - my_wildcmp_utf32_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_utf32_nopad, @@ -3148,29 +3161,6 @@ my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)), } -static -int my_wildcmp_ucs2_ci(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,uni_plane); -} - - -static -int my_wildcmp_ucs2_bin(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many) -{ - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,NULL); -} - - static void my_hash_sort_ucs2_nopad_bin(CHARSET_INFO *cs __attribute__((unused)), const uchar *key, size_t len, @@ -3205,7 +3195,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler = my_strnxfrm_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, @@ -3226,7 +3216,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_mysql500_ci_handler = my_strnxfrm_ucs2_general_mysql500_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2, @@ -3247,7 +3237,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler = my_strnxfrm_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_bin, @@ -3268,7 +3258,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler = my_strnxfrm_nopad_ucs2_general_ci, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_ci, + my_wildcmp_mb2_or_mb4_general_ci, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad, @@ -3289,7 +3279,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler = my_strnxfrm_nopad_ucs2_bin, my_strnxfrmlen_unicode, my_like_range_generic, - my_wildcmp_ucs2_bin, + my_wildcmp_mb2_or_mb4_bin, my_strcasecmp_mb2_or_mb4, my_instr_mb, my_hash_sort_ucs2_nopad_bin, diff --git a/strings/ctype-unidata.h b/strings/ctype-unidata.h index 0bcf96c09a0..bb741dc3eb5 100644 --- a/strings/ctype-unidata.h +++ b/strings/ctype-unidata.h @@ -132,6 +132,24 @@ my_toupper_unicode(MY_CASEFOLD_INFO *uni_plane, my_wc_t *wc) } +/* + Compare two characters for equality, according to the collation. + For simple Unicode AI CI collations, e.g. utf8mb4_general_ci. + + @return TRUE if the two characters are equal + @return FALSE otherwise +*/ +static inline my_bool +my_casefold_char_eq_general_ci(MY_CASEFOLD_INFO *casefold, + my_wc_t wc1, my_wc_t wc2) +{ + DBUG_ASSERT(casefold->simple_weight); + my_tosort_unicode(casefold, &wc1); + my_tosort_unicode(casefold, &wc2); + return wc1 == wc2; +} + + extern MY_CASEFOLD_INFO my_casefold_default; extern MY_CASEFOLD_INFO my_casefold_turkish; extern MY_CASEFOLD_INFO my_casefold_mysql500; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index d4a5c5be0d8..2dc120a2d0d 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -133,170 +133,11 @@ my_casefold_multiply_utf8mbx(CHARSET_INFO *cs) } -/* -** Compare string against string with wildcard -** This function is used in UTF8 and UCS2 -** -** 0 if matched -** -1 if not matched with wildcard -** 1 if matched with wildcard -*/ - -static -int my_wildcmp_unicode_impl(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights, int recurse_level) +static inline my_bool +my_char_eq_utf8mbx_general_ci(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2) { - int result= -1; /* Not found, using wildcards */ - my_wc_t s_wc, w_wc; - int scan; - my_charset_conv_mb_wc mb_wc= cs->cset->mb_wc; - - if (my_string_stack_guard && my_string_stack_guard(recurse_level)) - return 1; - while (wildstr != wildend) - { - while (1) - { - my_bool escaped= 0; - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - result= 1; /* Found an anchor char */ - break; - } - - wildstr+= scan; - if (w_wc == (my_wc_t) escape && wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - escaped= 1; - } - - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - - if (!escaped && w_wc == (my_wc_t) w_one) - { - result= 1; /* Found an anchor char */ - } - else - { - if (weights) - { - my_tosort_unicode(weights, &s_wc); - my_tosort_unicode(weights, &w_wc); - } - if (s_wc != w_wc) - return 1; /* No match */ - } - if (wildstr == wildend) - return (str != str_end); /* Match if both are at end */ - } - - if (w_wc == (my_wc_t) w_many) - { /* Found w_many */ - /* Remove any '%' and '_' from the wild search string */ - for ( ; wildstr != wildend ; ) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - - if (w_wc == (my_wc_t) w_many) - { - wildstr+= scan; - continue; - } - - if (w_wc == (my_wc_t) w_one) - { - wildstr+= scan; - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - str+= scan; - continue; - } - break; /* Not a wild character */ - } - - if (wildstr == wildend) - return 0; /* Ok if w_many is last */ - - if (str == str_end) - return -1; - - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - - if (w_wc == (my_wc_t) escape) - { - if (wildstr < wildend) - { - if ((scan= mb_wc(cs, &w_wc, (const uchar*)wildstr, - (const uchar*)wildend)) <= 0) - return 1; - wildstr+= scan; - } - } - - while (1) - { - /* Skip until the first character from wildstr is found */ - while (str != str_end) - { - if ((scan= mb_wc(cs, &s_wc, (const uchar*)str, - (const uchar*)str_end)) <= 0) - return 1; - if (weights) - { - my_tosort_unicode(weights, &s_wc); - my_tosort_unicode(weights, &w_wc); - } - - if (s_wc == w_wc) - break; - str+= scan; - } - if (str == str_end) - return -1; - - str+= scan; - result= my_wildcmp_unicode_impl(cs, str, str_end, wildstr, wildend, - escape, w_one, w_many, - weights, recurse_level + 1); - if (result <= 0) - return result; - } - } - } - return (str != str_end ? 1 : 0); -} - - -int -my_wildcmp_unicode(CHARSET_INFO *cs, - const char *str,const char *str_end, - const char *wildstr,const char *wildend, - int escape, int w_one, int w_many, - MY_CASEFOLD_INFO *weights) -{ - return my_wildcmp_unicode_impl(cs, str, str_end, - wildstr, wildend, - escape, w_one, w_many, weights, 1); + DBUG_ASSERT((cs->state & MY_CS_BINSORT) == 0); + return my_casefold_char_eq_general_ci(cs->casefold, wc1, wc2); } @@ -774,15 +615,25 @@ int my_strcasecmp_utf8mb3(CHARSET_INFO *cs, const char *s, const char *t) } +/* + my_wildcmp_utf8mb3_general_ci_impl() + An optimized functions for utf8mb3. + For general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb3_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + static int my_wildcmp_utf8mb3(CHARSET_INFO *cs, const char *str,const char *str_end, const char *wildstr,const char *wildend, int escape, int w_one, int w_many) { - MY_CASEFOLD_INFO *uni_plane= cs->casefold; - return my_wildcmp_unicode(cs,str,str_end,wildstr,wildend, - escape,w_one,w_many,uni_plane); + return my_wildcmp_utf8mb3_general_ci_impl(cs,str,str_end,wildstr,wildend, + escape, w_one, w_many, 1); } @@ -3117,14 +2968,25 @@ my_strcasecmp_utf8mb4(CHARSET_INFO *cs, const char *s, const char *t) } +/* + my_wildcmp_utf8mb4_general_ci_impl() + An optimized function for utf8mb4. + For general_ci-style collations. +*/ +#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4_general_ci_impl +#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb4_quick(pwc, s, e) +#define MY_CHAR_EQ(cs, wc1, wc2) my_char_eq_utf8mbx_general_ci(cs, wc1, wc2) +#include "ctype-wildcmp.inl" + + static int my_wildcmp_utf8mb4(CHARSET_INFO *cs, const char *str, const char *strend, const char *wildstr, const char *wildend, int escape, int w_one, int w_many) { - return my_wildcmp_unicode(cs, str, strend, wildstr, wildend, - escape, w_one, w_many, cs->casefold); + return my_wildcmp_utf8mb4_general_ci_impl(cs, str, strend, wildstr, wildend, + escape, w_one, w_many, 1); } diff --git a/strings/ctype-wildcmp.inl b/strings/ctype-wildcmp.inl new file mode 100644 index 00000000000..f7a5c02df78 --- /dev/null +++ b/strings/ctype-wildcmp.inl @@ -0,0 +1,177 @@ +/* + Copyright (c) 2024, MariaDB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + + +#ifndef MY_FUNCTION_NAME +#error MY_FUNCTION_NAME is not defined +#endif + +#ifndef MY_MB_WC +#error MY_MB_WC is not defined +#endif + +#ifndef MY_CHAR_EQ +#error MY_CHAR_EQ is not defined +#endif + +/* +** Compare string against string with wildcard +** +** 0 if matched +** -1 if not matched with wildcard +** 1 if matched with wildcard +*/ + +static int +MY_FUNCTION_NAME(wildcmp)(CHARSET_INFO *cs, + const char *str, const char *str_end, + const char *wildstr,const char *wildend, + int escape, int w_one, int w_many, + int recurse_level) +{ + int result= -1; /* Not found, using wildcards */ + my_wc_t s_wc, w_wc; + int scan; + + if (my_string_stack_guard && my_string_stack_guard(recurse_level)) + return 1; + while (wildstr != wildend) + { + while (1) + { + my_bool escaped= 0; + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + + if (w_wc == (my_wc_t) w_many) + { + result= 1; /* Found an anchor char */ + break; + } + + wildstr+= scan; + if (w_wc == (my_wc_t) escape && wildstr < wildend) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + escaped= 1; + } + + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + str+= scan; + + if (!escaped && w_wc == (my_wc_t) w_one) + { + result= 1; /* Found an anchor char */ + } + else + { + if (!MY_CHAR_EQ(cs, s_wc, w_wc)) + return 1; /* No match */ + } + if (wildstr == wildend) + return (str != str_end); /* Match if both are at end */ + } + + if (w_wc == (my_wc_t) w_many) + { /* Found w_many */ + /* Remove any '%' and '_' from the wild search string */ + for ( ; wildstr != wildend ; ) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + + if (w_wc == (my_wc_t) w_many) + { + wildstr+= scan; + continue; + } + + if (w_wc == (my_wc_t) w_one) + { + wildstr+= scan; + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + str+= scan; + continue; + } + break; /* Not a wild character */ + } + + if (wildstr == wildend) + return 0; /* Ok if w_many is last */ + + if (str == str_end) + return -1; + + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + + if (w_wc == (my_wc_t) escape) + { + if (wildstr < wildend) + { + if ((scan= MY_MB_WC(cs, &w_wc, (const uchar*) wildstr, + (const uchar*) wildend)) <= 0) + return 1; + wildstr+= scan; + } + } + + while (1) + { + /* Skip until the first character from wildstr is found */ + while (str != str_end) + { + if ((scan= MY_MB_WC(cs, &s_wc, (const uchar*) str, + (const uchar*) str_end)) <= 0) + return 1; + + if (MY_CHAR_EQ(cs, s_wc, w_wc)) + break; + str+= scan; + } + if (str == str_end) + return -1; + + str+= scan; + result= MY_FUNCTION_NAME(wildcmp)(cs, + str, str_end, + wildstr, wildend, + escape, w_one, w_many, + recurse_level + 1); + if (result <= 0) + return result; + } + } + } + return (str != str_end ? 1 : 0); +} + + +#undef MY_FUNCTION_NAME +#undef MY_MB_WC +#undef MY_CHAR_EQ