mirror of
https://github.com/MariaDB/server.git
synced 2025-01-15 19:42:28 +01:00
MDEV-27154 allkeys.txt based tests for Unicode-4.0.0 and 5.2.0
This commit is contained in:
parent
897d8c57b6
commit
02de93d158
7 changed files with 38031 additions and 0 deletions
23
mysql-test/include/ctype_unicode_allchars.inc
Normal file
23
mysql-test/include/ctype_unicode_allchars.inc
Normal file
|
@ -0,0 +1,23 @@
|
|||
#
|
||||
# Make a table with all Unicode characters
|
||||
# in the range U+0000 .. U+10FFFF
|
||||
#
|
||||
|
||||
CREATE TABLE allchars AS SELECT 1 AS code, ' ' AS str LIMIT 0;
|
||||
SHOW CREATE TABLE allchars;
|
||||
|
||||
CREATE TABLE t1tmp (a INT NOT NULL);
|
||||
DELIMITER $$;
|
||||
FOR i IN 0..0xFFF
|
||||
DO
|
||||
INSERT INTO t1tmp VALUES (i);
|
||||
END FOR;
|
||||
$$
|
||||
DELIMITER ;$$
|
||||
INSERT INTO allchars SELECT
|
||||
t1.a*0x1000+t2.a,
|
||||
CHAR(t1.a*0x1000+t2.a USING utf32)
|
||||
FROM t1tmp t1, t1tmp t2
|
||||
WHERE t1.a BETWEEN 0 AND 0x10F;
|
||||
DROP TABLE t1tmp;
|
||||
SELECT COUNT(*) FROM allchars;
|
73
mysql-test/main/ctype_utf8mb4_uca_allkeys400.result
Normal file
73
mysql-test/main/ctype_utf8mb4_uca_allkeys400.result
Normal file
|
@ -0,0 +1,73 @@
|
|||
#
|
||||
# Start of 10.8 tests
|
||||
#
|
||||
SET NAMES utf8mb4 COLLATE utf8mb4_bin;
|
||||
CREATE TABLE allchars AS SELECT 1 AS code, ' ' AS str LIMIT 0;
|
||||
SHOW CREATE TABLE allchars;
|
||||
Table Create Table
|
||||
allchars CREATE TABLE `allchars` (
|
||||
`code` int(1) NOT NULL,
|
||||
`str` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
CREATE TABLE t1tmp (a INT NOT NULL);
|
||||
FOR i IN 0..0xFFF
|
||||
DO
|
||||
INSERT INTO t1tmp VALUES (i);
|
||||
END FOR;
|
||||
$$
|
||||
INSERT INTO allchars SELECT
|
||||
t1.a*0x1000+t2.a,
|
||||
CHAR(t1.a*0x1000+t2.a USING utf32)
|
||||
FROM t1tmp t1, t1tmp t2
|
||||
WHERE t1.a BETWEEN 0 AND 0x10F;
|
||||
DROP TABLE t1tmp;
|
||||
SELECT COUNT(*) FROM allchars;
|
||||
COUNT(*)
|
||||
1114112
|
||||
CREATE TABLE allkeys_txt (a TEXT, b TEXT, c TEXT) ENGINE=MyISAM;
|
||||
LOAD DATA INFILE '../../std_data/unicode/allkeys400.txt'
|
||||
INTO TABLE allkeys_txt FIELDS TERMINATED BY ';' (@a,@b,@qq)
|
||||
SET a=TRIM(@a), b=TRIM(REGEXP_SUBSTR(@b,'^[^#]*')), c=TRIM(REGEXP_SUBSTR(@b, '#.*$'));
|
||||
CREATE TABLE allkeys AS
|
||||
SELECT
|
||||
a,
|
||||
CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_bin AS str,
|
||||
HEX(WEIGHT_STRING(CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_unicode_ci)) as ws,
|
||||
REPLACE(REPLACE(REGEXP_REPLACE(b,'[[][.*](....)[.]....[.]....[.].{4,5}]','-\\1-'),'-0000-',''),'-','') AS wd,
|
||||
c
|
||||
FROM allkeys_txt
|
||||
WHERE a RLIKE '^[0-9A-Z]';
|
||||
ALTER TABLE allkeys ADD KEY(str(3));
|
||||
SELECT COUNT(*), SUM(ws<>wd) FROM allkeys WHERE OCTET_LENGTH(str)<=3;
|
||||
COUNT(*) SUM(ws<>wd)
|
||||
12073 1
|
||||
SELECT a, ws, wd FROM allkeys WHERE ws<>wd AND OCTET_LENGTH(str)<=3;
|
||||
a ws wd
|
||||
FDFA FBC1FDFA 138713AB13C70209135013AB13AB13B70209138F13AB13C813B7020913BD138113AB13B0
|
||||
SELECT
|
||||
HEX(code),
|
||||
HEX(WEIGHT_STRING(str COLLATE utf8mb4_unicode_ci)) AS ws,
|
||||
CASE
|
||||
WHEN code >= 0x10000 THEN 'FFFD'
|
||||
WHEN code >= 0x3400 AND code <= 0x4DB5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB80 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
WHEN code >= 0x4E00 AND code <= 0x9FA5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB40 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
ELSE
|
||||
CONCAT(LPAD(HEX(0xFBC0 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
END AS wd
|
||||
FROM allchars
|
||||
LEFT OUTER JOIN allkeys USING (str)
|
||||
WHERE allkeys.str IS NULL
|
||||
HAVING ws<>wd
|
||||
ORDER BY HEX(str);
|
||||
HEX(code) ws wd
|
||||
DROP TABLE allkeys_txt;
|
||||
DROP TABLE allkeys;
|
||||
DROP TABLE allchars;
|
||||
#
|
||||
# End of 10.8 tests
|
||||
#
|
77
mysql-test/main/ctype_utf8mb4_uca_allkeys400.test
Normal file
77
mysql-test/main/ctype_utf8mb4_uca_allkeys400.test
Normal file
|
@ -0,0 +1,77 @@
|
|||
--source include/have_utf32.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
|
||||
--echo #
|
||||
--echo # Start of 10.8 tests
|
||||
--echo #
|
||||
|
||||
|
||||
SET NAMES utf8mb4 COLLATE utf8mb4_bin;
|
||||
--source include/ctype_unicode_allchars.inc
|
||||
|
||||
#
|
||||
# Load allkeys.txt from Unicode-4.0.0
|
||||
#
|
||||
# The 4.0.0 file has four weight levels and an optional extra field
|
||||
# after the character name, e.g. "; QQK"
|
||||
#00A0 ; [*0209.0020.001B.00A0] # NO-BREAK SPACE; QQK
|
||||
#
|
||||
|
||||
CREATE TABLE allkeys_txt (a TEXT, b TEXT, c TEXT) ENGINE=MyISAM;
|
||||
LOAD DATA INFILE '../../std_data/unicode/allkeys400.txt'
|
||||
INTO TABLE allkeys_txt FIELDS TERMINATED BY ';' (@a,@b,@qq)
|
||||
SET a=TRIM(@a), b=TRIM(REGEXP_SUBSTR(@b,'^[^#]*')), c=TRIM(REGEXP_SUBSTR(@b, '#.*$'));
|
||||
CREATE TABLE allkeys AS
|
||||
SELECT
|
||||
a,
|
||||
CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_bin AS str,
|
||||
HEX(WEIGHT_STRING(CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_unicode_ci)) as ws,
|
||||
REPLACE(REPLACE(REGEXP_REPLACE(b,'[[][.*](....)[.]....[.]....[.].{4,5}]','-\\1-'),'-0000-',''),'-','') AS wd,
|
||||
c
|
||||
FROM allkeys_txt
|
||||
WHERE a RLIKE '^[0-9A-Z]';
|
||||
ALTER TABLE allkeys ADD KEY(str(3));
|
||||
|
||||
#
|
||||
# Test explicit weights
|
||||
# utf8mb4_unicode_ci supports only BMP characters.
|
||||
# Built-in default contractions are not supported.
|
||||
# The (OCTET_LENGTH(str)<=3) part of the condition filters out
|
||||
# characters outside BMP and contractions.
|
||||
|
||||
SELECT COUNT(*), SUM(ws<>wd) FROM allkeys WHERE OCTET_LENGTH(str)<=3;
|
||||
SELECT a, ws, wd FROM allkeys WHERE ws<>wd AND OCTET_LENGTH(str)<=3;
|
||||
|
||||
#
|
||||
# Test implicit weights
|
||||
# Non-BMP characters all have the same weight FFFD.
|
||||
#
|
||||
|
||||
SELECT
|
||||
HEX(code),
|
||||
HEX(WEIGHT_STRING(str COLLATE utf8mb4_unicode_ci)) AS ws,
|
||||
CASE
|
||||
WHEN code >= 0x10000 THEN 'FFFD'
|
||||
WHEN code >= 0x3400 AND code <= 0x4DB5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB80 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
WHEN code >= 0x4E00 AND code <= 0x9FA5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB40 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
ELSE
|
||||
CONCAT(LPAD(HEX(0xFBC0 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
END AS wd
|
||||
FROM allchars
|
||||
LEFT OUTER JOIN allkeys USING (str)
|
||||
WHERE allkeys.str IS NULL
|
||||
HAVING ws<>wd
|
||||
ORDER BY HEX(str);
|
||||
|
||||
DROP TABLE allkeys_txt;
|
||||
DROP TABLE allkeys;
|
||||
DROP TABLE allchars;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.8 tests
|
||||
--echo #
|
72
mysql-test/main/ctype_utf8mb4_uca_allkeys520.result
Normal file
72
mysql-test/main/ctype_utf8mb4_uca_allkeys520.result
Normal file
|
@ -0,0 +1,72 @@
|
|||
#
|
||||
# Start of 10.8 tests
|
||||
#
|
||||
SET NAMES utf8mb4 COLLATE utf8mb4_bin;
|
||||
CREATE TABLE allchars AS SELECT 1 AS code, ' ' AS str LIMIT 0;
|
||||
SHOW CREATE TABLE allchars;
|
||||
Table Create Table
|
||||
allchars CREATE TABLE `allchars` (
|
||||
`code` int(1) NOT NULL,
|
||||
`str` varchar(1) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin NOT NULL
|
||||
) ENGINE=MyISAM DEFAULT CHARSET=latin1
|
||||
CREATE TABLE t1tmp (a INT NOT NULL);
|
||||
FOR i IN 0..0xFFF
|
||||
DO
|
||||
INSERT INTO t1tmp VALUES (i);
|
||||
END FOR;
|
||||
$$
|
||||
INSERT INTO allchars SELECT
|
||||
t1.a*0x1000+t2.a,
|
||||
CHAR(t1.a*0x1000+t2.a USING utf32)
|
||||
FROM t1tmp t1, t1tmp t2
|
||||
WHERE t1.a BETWEEN 0 AND 0x10F;
|
||||
DROP TABLE t1tmp;
|
||||
SELECT COUNT(*) FROM allchars;
|
||||
COUNT(*)
|
||||
1114112
|
||||
CREATE TABLE allkeys_txt (a TEXT, b TEXT, c TEXT) ENGINE=MyISAM;
|
||||
LOAD DATA INFILE '../../std_data/unicode/allkeys520.txt'
|
||||
INTO TABLE allkeys_txt FIELDS TERMINATED BY ';' (@a,@b,@qq)
|
||||
SET a=TRIM(@a), b=TRIM(REGEXP_SUBSTR(@b,'^[^#]*')), c=TRIM(REGEXP_SUBSTR(@b, '#.*$'));
|
||||
CREATE TABLE allkeys AS
|
||||
SELECT
|
||||
a,
|
||||
CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_bin AS str,
|
||||
HEX(WEIGHT_STRING(CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_unicode_520_ci)) as ws,
|
||||
REPLACE(REPLACE(REGEXP_REPLACE(b,'[[][.*](....)[.]....[.]....[.].{4,5}]','-\\1-'),'-0000-',''),'-','') AS wd,
|
||||
c
|
||||
FROM allkeys_txt
|
||||
WHERE a RLIKE '^[0-9A-Z]';
|
||||
ALTER TABLE allkeys ADD KEY(str(3));
|
||||
SELECT COUNT(*), SUM(ws<>wd) FROM allkeys WHERE a NOT LIKE '% %';
|
||||
COUNT(*) SUM(ws<>wd)
|
||||
21807 1
|
||||
SELECT a, ws, wd FROM allkeys WHERE ws<>wd AND a NOT LIKE '% %';
|
||||
a ws wd
|
||||
FDFA 18FC192B194F020A18AD192B192B193D 18FC192B194F020A18AD192B192B193D020A1904192B1950193D020A194318F1192B1931
|
||||
SELECT
|
||||
HEX(code),
|
||||
HEX(WEIGHT_STRING(str COLLATE utf8mb4_unicode_520_ci)) AS ws,
|
||||
CASE
|
||||
WHEN code >= 0x3400 AND code <= 0x4DB5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB80 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
WHEN code >= 0x4E00 AND code <= 0x9FA5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB40 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
ELSE
|
||||
CONCAT(LPAD(HEX(0xFBC0 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
END AS wd
|
||||
FROM allchars
|
||||
LEFT OUTER JOIN allkeys USING (str)
|
||||
WHERE allkeys.str IS NULL
|
||||
HAVING ws<>wd
|
||||
ORDER BY HEX(str);
|
||||
HEX(code) ws wd
|
||||
DROP TABLE allkeys_txt;
|
||||
DROP TABLE allkeys;
|
||||
DROP TABLE allchars;
|
||||
#
|
||||
# End of 10.8 tests
|
||||
#
|
75
mysql-test/main/ctype_utf8mb4_uca_allkeys520.test
Normal file
75
mysql-test/main/ctype_utf8mb4_uca_allkeys520.test
Normal file
|
@ -0,0 +1,75 @@
|
|||
--source include/have_utf32.inc
|
||||
--source include/have_utf8mb4.inc
|
||||
|
||||
--echo #
|
||||
--echo # Start of 10.8 tests
|
||||
--echo #
|
||||
|
||||
|
||||
SET NAMES utf8mb4 COLLATE utf8mb4_bin;
|
||||
--source include/ctype_unicode_allchars.inc
|
||||
|
||||
#
|
||||
# Load allkeys.txt from Unicode-5.2.0
|
||||
#
|
||||
# The 5.2.0 file has four weight levels and an optional extra field
|
||||
# after the character name, e.g. "; QQK"
|
||||
#00A0 ; [*020A.0020.001B.00A0] # NO-BREAK SPACE; QQK
|
||||
#
|
||||
|
||||
CREATE TABLE allkeys_txt (a TEXT, b TEXT, c TEXT) ENGINE=MyISAM;
|
||||
LOAD DATA INFILE '../../std_data/unicode/allkeys520.txt'
|
||||
INTO TABLE allkeys_txt FIELDS TERMINATED BY ';' (@a,@b,@qq)
|
||||
SET a=TRIM(@a), b=TRIM(REGEXP_SUBSTR(@b,'^[^#]*')), c=TRIM(REGEXP_SUBSTR(@b, '#.*$'));
|
||||
CREATE TABLE allkeys AS
|
||||
SELECT
|
||||
a,
|
||||
CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_bin AS str,
|
||||
HEX(WEIGHT_STRING(CONVERT(CAST(UNHEX(regexp_replace(regexp_replace(regexp_replace(a,'(\\b[0-9A-Z]{4}\\b)','-0000\\1-'),'(\\b[0-9A-Z]{5}\\b)','-000\\1-'),'[ -]','')) AS CHAR CHARACTER SET utf32) USING utf8mb4) COLLATE utf8mb4_unicode_520_ci)) as ws,
|
||||
REPLACE(REPLACE(REGEXP_REPLACE(b,'[[][.*](....)[.]....[.]....[.].{4,5}]','-\\1-'),'-0000-',''),'-','') AS wd,
|
||||
c
|
||||
FROM allkeys_txt
|
||||
WHERE a RLIKE '^[0-9A-Z]';
|
||||
ALTER TABLE allkeys ADD KEY(str(3));
|
||||
|
||||
#
|
||||
# Test explicit weights
|
||||
# Built-in default contractions are not supported.
|
||||
# The (NOT LIKE '% %') part of the condition filters out contractions.
|
||||
|
||||
SELECT COUNT(*), SUM(ws<>wd) FROM allkeys WHERE a NOT LIKE '% %';
|
||||
SELECT a, ws, wd FROM allkeys WHERE ws<>wd AND a NOT LIKE '% %';
|
||||
|
||||
|
||||
#
|
||||
# Test implicit weights
|
||||
# Non-BMP characters all have the same weight FFFD.
|
||||
#
|
||||
|
||||
SELECT
|
||||
HEX(code),
|
||||
HEX(WEIGHT_STRING(str COLLATE utf8mb4_unicode_520_ci)) AS ws,
|
||||
CASE
|
||||
WHEN code >= 0x3400 AND code <= 0x4DB5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB80 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
WHEN code >= 0x4E00 AND code <= 0x9FA5 THEN
|
||||
CONCAT(LPAD(HEX(0xFB40 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
ELSE
|
||||
CONCAT(LPAD(HEX(0xFBC0 + (code >> 15)),4,'0'),
|
||||
LPAD(HEX(0x8000 | (code & 0x7FFF)),4,'0'))
|
||||
END AS wd
|
||||
FROM allchars
|
||||
LEFT OUTER JOIN allkeys USING (str)
|
||||
WHERE allkeys.str IS NULL
|
||||
HAVING ws<>wd
|
||||
ORDER BY HEX(str);
|
||||
|
||||
DROP TABLE allkeys_txt;
|
||||
DROP TABLE allkeys;
|
||||
DROP TABLE allchars;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.8 tests
|
||||
--echo #
|
15169
mysql-test/std_data/unicode/allkeys400.txt
Normal file
15169
mysql-test/std_data/unicode/allkeys400.txt
Normal file
File diff suppressed because it is too large
Load diff
22542
mysql-test/std_data/unicode/allkeys520.txt
Normal file
22542
mysql-test/std_data/unicode/allkeys520.txt
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Reference in a new issue