From 0845bce0d95fcb7a3b21e17864901d888d276dcb Mon Sep 17 00:00:00 2001 From: Alexander Barkov <bar@mariadb.com> Date: Fri, 3 Feb 2023 16:57:53 +0400 Subject: [PATCH] MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 --- mysql-test/include/ctype_casefolding.inc | 18 +++ mysql-test/main/ctype_ldml.result | 45 ++++++ mysql-test/main/ctype_ldml.test | 21 +++ mysql-test/main/ctype_uca.result | 8 +- mysql-test/main/ctype_utf8_uca.result | 174 +++++++++++++++++++++++ mysql-test/main/ctype_utf8_uca.test | 29 ++++ mysql-test/main/ctype_utf8mb4_uca.result | 174 +++++++++++++++++++++++ mysql-test/main/ctype_utf8mb4_uca.test | 29 ++++ mysys/charset.c | 2 + strings/ctype-uca.c | 37 ++--- 10 files changed, 517 insertions(+), 20 deletions(-) create mode 100644 mysql-test/include/ctype_casefolding.inc diff --git a/mysql-test/include/ctype_casefolding.inc b/mysql-test/include/ctype_casefolding.inc new file mode 100644 index 00000000000..4ee402c95ad --- /dev/null +++ b/mysql-test/include/ctype_casefolding.inc @@ -0,0 +1,18 @@ +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +# Uncode code points that have a variable length case mapping in utf8 +# (e.g. LOWER('2-byte-character') -> '3-byte-character' +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +DROP TABLE case_folding; diff --git a/mysql-test/main/ctype_ldml.result b/mysql-test/main/ctype_ldml.result index 3ce50331ed0..ac1d66f3c91 100644 --- a/mysql-test/main/ctype_ldml.result +++ b/mysql-test/main/ctype_ldml.result @@ -3034,3 +3034,48 @@ SELECT 'chž'< 'i'; 1 SELECT 'a' COLLATE utf8_czech_test_bad_w2; ERROR HY000: Unknown collation: 'utf8_czech_test_bad_w2' +# +# End of 10.2 tests +# +# +# Start of 10.3 tests +# +# +# MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_test_520_nopad_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_test_520_nopad_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/ctype_ldml.test b/mysql-test/main/ctype_ldml.test index 68891646c0f..368e26dba20 100644 --- a/mysql-test/main/ctype_ldml.test +++ b/mysql-test/main/ctype_ldml.test @@ -605,3 +605,24 @@ SELECT 'chž'< 'i'; --error ER_UNKNOWN_COLLATION SELECT 'a' COLLATE utf8_czech_test_bad_w2; + +--echo # +--echo # End of 10.2 tests +--echo # + + +--echo # +--echo # Start of 10.3 tests +--echo # + +--echo # +--echo # MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_test_520_nopad_ci; +--source include/ctype_casefolding.inc + + +--echo # +--echo # End of 10.3 tests +--echo # diff --git a/mysql-test/main/ctype_uca.result b/mysql-test/main/ctype_uca.result index af2798f12f3..2298733851d 100644 --- a/mysql-test/main/ctype_uca.result +++ b/mysql-test/main/ctype_uca.result @@ -8204,7 +8204,7 @@ INSERT INTO t1 VALUES (_utf32 0x2CEE); SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c FROM t1 ORDER BY c, BINARY c; hex(c) hex(lower(c)) hex(upper(c)) hex(weight_string(c)) c -C8BA C8BA 1214 Ⱥ +C8BA E2B1A5 C8BA 1214 Ⱥ E2B1A5 E2B1A5 C8BA 1214 ⱥ C680 C680 C983 122D ƀ C983 C680 C983 122D Ƀ @@ -8229,7 +8229,7 @@ E2B1AA E2B1AA E2B1A9 1328 ⱪ C8BD C69A C8BD 133B Ƚ E2B1A0 E2B1A1 E2B1A0 133F Ⱡ E2B1A1 E2B1A1 E2B1A0 133F ⱡ -C9AB C9AB 1340 ɫ +C9AB C9AB E2B1A2 1340 ɫ E2B1A2 C9AB E2B1A2 1340 Ɫ E1B5BD E1B5BD E2B1A3 13B8 ᵽ E2B1A3 E1B5BD E2B1A3 13B8 Ᵽ @@ -8237,11 +8237,11 @@ C98A C98B C98A 13D2 Ɋ C98B C98B C98A 13D2 ɋ C98C C98D C98C 13E4 Ɍ C98D C98D C98C 13E4 ɍ -C9BD C9BD 13FC ɽ +C9BD C9BD E2B1A4 13FC ɽ E2B1A4 C9BD E2B1A4 13FC Ɽ EA9CA8 EA9CA9 EA9CA8 143314AD Ꜩ EA9CA9 EA9CA9 EA9CA8 143314AD ꜩ -C8BE C8BE 143C Ⱦ +C8BE E2B1A6 C8BE 143C Ⱦ E2B1A6 E2B1A6 C8BE 143C ⱦ C984 CA89 C984 145B Ʉ CA89 CA89 C984 145B ʉ diff --git a/mysql-test/main/ctype_utf8_uca.result b/mysql-test/main/ctype_utf8_uca.result index a6e1616997f..bf4e5ed2fd8 100644 --- a/mysql-test/main/ctype_utf8_uca.result +++ b/mysql-test/main/ctype_utf8_uca.result @@ -587,3 +587,177 @@ DROP TABLE t1; # # End of 10.2 tests # +# +# Start of 10.3 tests +# +# +# MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +# +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_ci /*Unicode-4.0 folding*/; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8 COLLATE utf8_unicode_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8 COLLATE utf8_unicode_520_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_nopad_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8 COLLATE utf8_unicode_520_nopad_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_myanmar_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8 COLLATE utf8_myanmar_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8 COLLATE utf8_thai_520_w2 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/ctype_utf8_uca.test b/mysql-test/main/ctype_utf8_uca.test index 0879b4d2810..38bcce8f4ba 100644 --- a/mysql-test/main/ctype_utf8_uca.test +++ b/mysql-test/main/ctype_utf8_uca.test @@ -21,3 +21,32 @@ SET NAMES utf8 COLLATE utf8_unicode_nopad_ci; --echo # --echo # End of 10.2 tests --echo # + + +--echo # +--echo # Start of 10.3 tests +--echo # + +--echo # +--echo # MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +--echo # + +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_ci /*Unicode-4.0 folding*/; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb3 COLLATE utf8mb3_unicode_520_nopad_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb3 COLLATE utf8mb3_myanmar_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb3 COLLATE utf8mb3_thai_520_w2; +--source include/ctype_casefolding.inc + + +--echo # +--echo # End of 10.3 tests +--echo # diff --git a/mysql-test/main/ctype_utf8mb4_uca.result b/mysql-test/main/ctype_utf8mb4_uca.result index 8d2e81d8d89..d0a63d0dd36 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.result +++ b/mysql-test/main/ctype_utf8mb4_uca.result @@ -6605,3 +6605,177 @@ SET NAMES utf8mb4; # # End of 10.2 tests # +# +# Start of 10.3 tests +# +# +# MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +# +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci /*Unicode-4.0 folding*/; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A C8BA C8BA Ⱥ +23E C8BE C8BE Ⱦ +23F C8BF C8BF ȿ +240 C980 C980 ɀ +250 C990 C990 ɐ +251 C991 C991 ɑ +252 C992 C992 ɒ +26B C9AB C9AB ɫ +271 C9B1 C9B1 ɱ +27D C9BD C9BD ɽ +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_nopad_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_520_nopad_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_myanmar_ci; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_myanmar_ci DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +SET NAMES utf8mb4 COLLATE utf8mb4_thai_520_w2; +CREATE OR REPLACE TABLE case_folding AS SELECT 0 AS code, SPACE(32) AS c LIMIT 0; +SHOW CREATE TABLE case_folding; +Table Create Table +case_folding CREATE TABLE `case_folding` ( + `code` int(1) NOT NULL, + `c` varchar(32) CHARACTER SET utf8mb4 COLLATE utf8mb4_thai_520_w2 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +INSERT INTO case_folding (code) VALUES +(0x23A), +(0x23E), +(0x23F), +(0x240), +(0x250), +(0x251), +(0x252), +(0x26B), +(0x271), +(0x27D); +UPDATE case_folding SET c=CHAR(code USING ucs2); +SELECT HEX(code), HEX(LOWER(c)), HEX(UPPER(c)), c FROM case_folding; +HEX(code) HEX(LOWER(c)) HEX(UPPER(c)) c +23A E2B1A5 C8BA Ⱥ +23E E2B1A6 C8BE Ⱦ +23F C8BF E2B1BE ȿ +240 C980 E2B1BF ɀ +250 C990 E2B1AF ɐ +251 C991 E2B1AD ɑ +252 C992 E2B1B0 ɒ +26B C9AB E2B1A2 ɫ +271 C9B1 E2B1AE ɱ +27D C9BD E2B1A4 ɽ +DROP TABLE case_folding; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/ctype_utf8mb4_uca.test b/mysql-test/main/ctype_utf8mb4_uca.test index 160cb48bad6..9b532f109d8 100644 --- a/mysql-test/main/ctype_utf8mb4_uca.test +++ b/mysql-test/main/ctype_utf8mb4_uca.test @@ -108,3 +108,32 @@ SET NAMES utf8mb4; --echo # --echo # End of 10.2 tests --echo # + + +--echo # +--echo # Start of 10.3 tests +--echo # + +--echo # +--echo # MDEV-30556 UPPER() returns an empty string for U+0251 in Unicode-5.2.0+ collations for utf8 +--echo # + +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_ci /*Unicode-4.0 folding*/; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb4 COLLATE utf8mb4_unicode_520_nopad_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb4 COLLATE utf8mb4_myanmar_ci; +--source include/ctype_casefolding.inc + +SET NAMES utf8mb4 COLLATE utf8mb4_thai_520_w2; +--source include/ctype_casefolding.inc + + +--echo # +--echo # End of 10.3 tests +--echo # diff --git a/mysys/charset.c b/mysys/charset.c index f44dc7606c1..c84039b0071 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -368,6 +368,8 @@ static int add_collation(struct charset_info_st *cs) &my_charset_utf8mb4_unicode_ci, cs); newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype; + if (init_state_maps(newcs)) + return MY_XML_ERROR; newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED; #endif } diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c index fbca4df39e7..969f642b51f 100644 --- a/strings/ctype-uca.c +++ b/strings/ctype-uca.c @@ -33859,6 +33859,11 @@ create_tailoring(struct charset_info_st *cs, { src_uca= &my_uca_v520; cs->caseinfo= &my_unicase_unicode520; + if (cs->mbminlen == 1 && cs->mbmaxlen >=3) + { + cs->caseup_multiply= 2; + cs->casedn_multiply= 2; + } } else if (rules.version == 400) /* Unicode-4.0.0 requested */ { @@ -35692,8 +35697,8 @@ struct charset_info_st my_charset_utf8_myanmar_uca_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 9, /* min_sort_char */ @@ -35725,8 +35730,8 @@ struct charset_info_st my_charset_utf8_unicode_520_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 9, /* min_sort_char */ @@ -35757,8 +35762,8 @@ struct charset_info_st my_charset_utf8_thai_520_w2= NULL, /* state_map */ NULL, /* ident_map */ 4, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 9, /* min_sort_char */ @@ -35855,8 +35860,8 @@ struct charset_info_st my_charset_utf8_unicode_520_nopad_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 3, /* mbmaxlen */ 9, /* min_sort_char */ @@ -36670,8 +36675,8 @@ struct charset_info_st my_charset_utf8mb4_myanmar_uca_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 9, /* min_sort_char */ @@ -36702,8 +36707,8 @@ struct charset_info_st my_charset_utf8mb4_thai_520_w2= NULL, /* state_map */ NULL, /* ident_map */ 4, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 9, /* min_sort_char */ @@ -36734,8 +36739,8 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 9, /* min_sort_char */ @@ -36833,8 +36838,8 @@ struct charset_info_st my_charset_utf8mb4_unicode_520_nopad_ci= NULL, /* state_map */ NULL, /* ident_map */ 8, /* strxfrm_multiply */ - 1, /* caseup_multiply */ - 1, /* casedn_multiply */ + 2, /* caseup_multiply */ + 2, /* casedn_multiply */ 1, /* mbminlen */ 4, /* mbmaxlen */ 9, /* min_sort_char */