MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server

This is done by mapping most of the existing MySQL unicode 0900 collations to MariadB 1400 unicode collations. The assumption is that 1400 is a super set of 0900 for all practical purposes. I also added a new function 'compare_collations()' and changed most code to use this instead of comparing character sets directly. This enables one to seamlessly mix-and-match the corresponding 0900 and 1400 sets. Field comparision and alter table treats the character sets as identical. All MySQL 8.0 0900 collations are supported except: - utf8mb4_ja_0900_as_cs - utf8mb4_ja_0900_as_cs_ks - utf8mb4_ru_0900_as_cs - utf8mb4_zh_0900_as_cs These do not have corresponding entries in the MariadB 01400 collations. Other things: - Added COMMENT colum to information_schema.collations. For utf8mb4_0900 colletions it contains the corresponding alias collation.
2025-01-15 19:42:28 +01:00 · 2024-12-15 15:57:53 +02:00 · 2024-12-15 15:57:53 +02:00 · 7fcaab7aaa
commit 7fcaab7aaa
parent 9e7762e718
21 changed files with 6284 additions and 102 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -458,9 +458,9 @@ typedef struct my_charset_loader_st
 {
  char error[128];
  void *(*once_alloc)(size_t);
-  void *(*malloc)(size_t);
+  void *(*malloc)(size_t);                      /* Not used */
-  void *(*realloc)(void *, size_t);
+  void *(*realloc)(void *, size_t);             /* Not used */
-  void (*free)(void *);
+  void (*free)(void *);                         /* Not used */
  void (*reporter)(enum loglevel, const char *format, ...);
  int  (*add_collation)(struct charset_info_st *cs);
 } MY_CHARSET_LOADER;
@ -1693,6 +1693,7 @@ my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
 uint my_ci_get_id_generic(CHARSET_INFO *cs, my_collation_id_type_t type);
 LEX_CSTRING my_ci_get_collation_name_generic(CHARSET_INFO *cs,
                                             my_collation_name_mode_t mode);
 my_bool compare_collations(CHARSET_INFO *cs1, CHARSET_INFO *cs2);
 typedef struct 
 {
--- a/include/my_sys.h
+++ b/include/my_sys.h
@ -1119,6 +1119,9 @@ static inline my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
 extern my_bool init_compiled_charsets(myf flags);
 extern void add_compiled_collation(struct charset_info_st *cs);
 extern void add_compiled_extra_collation(struct charset_info_st *cs);
 extern my_bool add_alias_for_collation(LEX_CSTRING *collation_name,
                                       LEX_CSTRING *alias,
                                       uint alias_id);
 extern size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
                                      char *to, size_t to_length,
                                      const char *from, size_t length,
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 1093c22e357d0164aa072b829693e39f14322384
+Subproject commit 52d0a38ed15f62906206d77b675079fc159cec7e
--- a/mysql-test/main/ctype_ldml.result
+++ b/mysql-test/main/ctype_ldml.result
@ -456,43 +456,43 @@ select "foo" = "foo " collate latin1_test;
 1
 The following tests check that two-byte collation IDs work
 select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
-ascii2_general_nopad_ci	ascii2	318			1
+ascii2_general_nopad_ci	ascii2	318			1	
-ascii2_bin2	ascii2	319			1
+ascii2_bin2	ascii2	319			1	
-ascii2_general_ci	ascii2	320	Yes		1
+ascii2_general_ci	ascii2	320	Yes		1	
-ascii2_bin	ascii2	321			1
+ascii2_bin	ascii2	321			1	
-ascii2_general_inherited_ci	ascii2	322			1
+ascii2_general_inherited_ci	ascii2	322			1	
-ascii2_general_inherited2_ci	ascii2	323			1
+ascii2_general_inherited2_ci	ascii2	323			1	
-ascii2_badly_inherited_ci	ascii2	324			1
+ascii2_badly_inherited_ci	ascii2	324			1	
-ascii2_nopad_bin	ascii2	325			1
+ascii2_nopad_bin	ascii2	325			1	
-utf8mb4_test_ci	utf8mb4	326			8
+utf8mb4_test_ci	utf8mb4	326			8	
-utf16_test_ci	utf16	327			8
+utf16_test_ci	utf16	327			8	
-utf8mb4_test_400_ci	utf8mb4	328			8
+utf8mb4_test_400_ci	utf8mb4	328			8	
-utf8mb4_test_520_nopad_ci	utf8mb4	329			8
+utf8mb4_test_520_nopad_ci	utf8mb4	329			8	
-utf8mb4_uca1400_test01_as_ci	utf8mb4	330			4
+utf8mb4_uca1400_test01_as_ci	utf8mb4	330			4	
-latin1_test	latin1	331			1
+latin1_test	latin1	331			1	cp1252 West European
-latin1_test2	latin1	332			1
+latin1_test2	latin1	332			1	cp1252 West European
-latin1_test2_cs	latin1	333			1
+latin1_test2_cs	latin1	333			1	cp1252 West European
-latin1_swedish_nopad2_ci	latin1	334			1
+latin1_swedish_nopad2_ci	latin1	334			1	cp1252 West European
-utf8mb3_bengali_standard_ci	utf8mb3	336			8
+utf8mb3_bengali_standard_ci	utf8mb3	336			8	
-utf8mb3_bengali_traditional_ci	utf8mb3	337			8
+utf8mb3_bengali_traditional_ci	utf8mb3	337			8	
-utf8mb3_implicit_weights_ci	utf8mb3	338			8
+utf8mb3_implicit_weights_ci	utf8mb3	338			8	
-utf8mb3_phone_ci	utf8mb3	352			8
+utf8mb3_phone_ci	utf8mb3	352			8	
-utf8mb3_test_ci	utf8mb3	353			8
+utf8mb3_test_ci	utf8mb3	353			8	
-utf8mb3_5624_1	utf8mb3	354			8
+utf8mb3_5624_1	utf8mb3	354			8	
-utf8mb3_5624_2	utf8mb3	355			8
+utf8mb3_5624_2	utf8mb3	355			8	
-utf8mb3_5624_3	utf8mb3	356			8
+utf8mb3_5624_3	utf8mb3	356			8	
-utf8mb3_5624_4	utf8mb3	357			8
+utf8mb3_5624_4	utf8mb3	357			8	
-ucs2_test_ci	ucs2	358			8
+ucs2_test_ci	ucs2	358			8	
-ucs2_vn_ci	ucs2	359			8
+ucs2_vn_ci	ucs2	359			8	
-ucs2_5624_1	ucs2	360			8
+ucs2_5624_1	ucs2	360			8	
-utf8mb3_5624_5	utf8mb3	368			8
+utf8mb3_5624_5	utf8mb3	368			8	
-utf8mb3_5624_5_bad	utf8mb3	369			8
+utf8mb3_5624_5_bad	utf8mb3	369			8	
-utf8mb3_czech_test_w2	utf8mb3	370			4
+utf8mb3_czech_test_w2	utf8mb3	370			4	
-utf8mb3_czech_test_nopad_w2	utf8mb3	371			4
+utf8mb3_czech_test_nopad_w2	utf8mb3	371			4	
-utf8mb3_czech_test_bad_w2	utf8mb3	372			4
+utf8mb3_czech_test_bad_w2	utf8mb3	372			4	
-utf32_test_ci	utf32	391			8
+utf32_test_ci	utf32	391			8	
-utf8mb3_maxuserid_ci	utf8mb3	2047			8
+utf8mb3_maxuserid_ci	utf8mb3	2047			8	
 show collation like '%test%';
 Collation	Charset	Id	Default	Compiled	Sortlen
 latin1_test	latin1	331			1
--- a/mysql-test/main/ctype_utf8mb4_0900.result
+++ b/mysql-test/main/ctype_utf8mb4_0900.result
--- a/mysql-test/main/ctype_utf8mb4_0900.test
+++ b/mysql-test/main/ctype_utf8mb4_0900.test
@ -0,0 +1,116 @@
 -- source include/have_ucs2.inc
 -- source include/have_utf8mb4.inc
 -- source include/have_innodb.inc
 --disable_warnings
 DROP TABLE IF EXISTS t1;
 --enable_warnings
 #
 # Basic tests
 #
 select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY
 where collation_name like "%0900%" order by collation_name;
 select * from information_schema.COLLATIONS where collation_name like "%0900%";
 SET NAMES utf8mb4;
 CREATE TABLE t1 (c1 CHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin);
 --source include/ctype_unicode_latin.inc
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_icelandic_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_latvian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_romanian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_slovenian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_polish_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_estonian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_spanish_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_swedish_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_turkish_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_czech_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_danish_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_lithuanian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_slovak_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_spanish2_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_roman_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_esperanto_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_hungarian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_mysql561_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_german2_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_520_ci;
 SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_vietnamese_ci;
 DROP TABLE t1;
 --echo #
 --echo # MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server
 --echo #
 CREATE DATABASE db1 CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
 USE db1;
 CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
 ALTER TABLE t1 CONVERT TO CHARACTER SET DEFAULT COLLATE utf8mb4_0900_ai_ci;
 SHOW CREATE TABLE t1;
 DROP TABLE t1;
 CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
 ALTER TABLE t1 CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_sv_0900_ai_ci;
 SHOW CREATE TABLE t1;
 DROP TABLE t1;
 DROP DATABASE db1;
 USE test;
 --echo #
 --echo # CREATE TABLE - table level character set and collation
 --echo #
 CREATE DATABASE db1 CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
 CREATE TABLE db1.t1 (a CHAR(1)) COLLATE utf8mb4_0900_ai_ci;
 SHOW CREATE TABLE db1.t1;
 ALTER TABLE db1.t1 modify a CHAR(1) COLLATE utf8mb4_sv_0900_ai_ci;
 SHOW CREATE TABLE db1.t1;
 DROP TABLE db1.t1;
 CREATE TABLE db1.t1 (a CHAR(1)) COLLATE utf8mb4_sv_0900_ai_ci;
 SHOW CREATE TABLE db1.t1;
 DROP TABLE db1.t1;
 CREATE TABLE db1.t1 (a CHAR(1)) CHARACTER SET DEFAULT COLLATE utf8mb4_0900_ai_ci;
 SHOW CREATE TABLE db1.t1;
 DROP TABLE db1.t1;
 DROP DATABASE db1;
 --echo #
 --echo # Ensure that we can seamlessly compare and move between
 --echo # utf8mb4_sv_0900_ai_ci and utf8mb4_uca1400_swedish_1400_nopad_ai_ci
 --echo #
 CREATE TABLE t1 (p int primary key auto_increment, a VARCHAR(10), key (a)) engine=innodb, COLLATE utf8mb4_sv_0900_ai_ci;
 show create table t1;
 CREATE TABLE t2 (p int primary key auto_increment, a VARCHAR(10), key(a)) engine=innodb, COLLATE utf8mb4_uca1400_swedish_nopad_ai_ci;
 show create table t2;
 insert into t1 (a) values ("hello"),("world");
 insert into t2 (a) values ("hello"),("world");
 explain select * from t1,t2 where t1.a=t2.a;
 --echo # Check that alter table can convert between the character sets
 alter table t1 modify a varchar(10) collate utf8mb4_uca1400_swedish_nopad_ai_ci, algorithm=nocopy;
 show create table t1;
 alter table t2 modify a varchar(10) collate utf8mb4_sv_0900_ai_ci, algorithm=nocopy;
 show create table t2;
 drop table t1,t2;
 CREATE OR REPLACE TABLE t1 (p int primary key auto_increment, a VARCHAR(10), key (a)) engine=aria, COLLATE utf8mb4_sv_0900_ai_ci;
 alter table t1 modify a varchar(10) collate utf8mb4_uca1400_swedish_nopad_ai_ci, algorithm=nocopy;
 drop table t1;
--- a/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.result
+++ b/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.result
--- a/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.test
+++ b/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.test
@ -0,0 +1,15 @@
 --echo #
 --echo # Start of 10.10 tests
 --echo #
 --echo #
 --echo # MDEV-30577 Case folding for uca1400 collations is not up to date
 --echo #
 SET NAMES utf8mb4 COLLATE utf8mb4_0900_ai_ci;
 --source include/ctype_unicode_casefold_bmp.inc
 --source include/ctype_unicode_casefold_supplementary.inc
 --echo #
 --echo # End of 11.4 tests
 --echo #
--- a/mysql-test/main/information_schema.result
+++ b/mysql-test/main/information_schema.result
@ -278,17 +278,17 @@ Charset	Description	Default collation	Maxlen
 latin1	cp1252 West European	latin1_swedish_ci	1
 select * from information_schema.COLLATIONS
 where COLLATION_NAME like 'latin1%';
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
-latin1_german1_ci	latin1	5		#	1
+latin1_german1_ci	latin1	5		#	1	cp1252 West European
-latin1_swedish_ci	latin1	8	Yes	#	1
+latin1_swedish_ci	latin1	8	Yes	#	1	cp1252 West European
-latin1_danish_ci	latin1	15		#	1
+latin1_danish_ci	latin1	15		#	1	cp1252 West European
-latin1_german2_ci	latin1	31		#	2
+latin1_german2_ci	latin1	31		#	2	cp1252 West European
-latin1_bin	latin1	47		#	1
+latin1_bin	latin1	47		#	1	cp1252 West European
-latin1_general_ci	latin1	48		#	1
+latin1_general_ci	latin1	48		#	1	cp1252 West European
-latin1_general_cs	latin1	49		#	1
+latin1_general_cs	latin1	49		#	1	cp1252 West European
-latin1_spanish_ci	latin1	94		#	1
+latin1_spanish_ci	latin1	94		#	1	cp1252 West European
-latin1_swedish_nopad_ci	latin1	1032		#	1
+latin1_swedish_nopad_ci	latin1	1032		#	1	
-latin1_nopad_bin	latin1	1071		#	1
+latin1_nopad_bin	latin1	1071		#	1	
 SHOW COLLATION LIKE 'latin1%';
 Collation	Charset	Id	Default	Compiled	Sortlen
 latin1_german1_ci	latin1	5		#	1
@ -1653,7 +1653,7 @@ SELECT *
 FROM tables ta
 JOIN collations co ON ( co.collation_name = ta.table_catalog )
 JOIN character_sets cs ON ( cs.character_set_name = ta.table_catalog );
-TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	TABLE_TYPE	ENGINE	VERSION	ROW_FORMAT	TABLE_ROWS	AVG_ROW_LENGTH	DATA_LENGTH	MAX_DATA_LENGTH	INDEX_LENGTH	DATA_FREE	AUTO_INCREMENT	CREATE_TIME	UPDATE_TIME	CHECK_TIME	TABLE_COLLATION	CHECKSUM	CREATE_OPTIONS	TABLE_COMMENT	MAX_INDEX_LENGTH	TEMPORARY	COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
+TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	TABLE_TYPE	ENGINE	VERSION	ROW_FORMAT	TABLE_ROWS	AVG_ROW_LENGTH	DATA_LENGTH	MAX_DATA_LENGTH	INDEX_LENGTH	DATA_FREE	AUTO_INCREMENT	CREATE_TIME	UPDATE_TIME	CHECK_TIME	TABLE_COLLATION	CHECKSUM	CREATE_OPTIONS	TABLE_COMMENT	MAX_INDEX_LENGTH	TEMPORARY	COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT	CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
 DROP TABLE test.t1;
 SET max_heap_table_size = DEFAULT;
 USE test;
--- a/mysql-test/suite/funcs_1/r/charset_collation.result
+++ b/mysql-test/suite/funcs_1/r/charset_collation.result
@ -18,11 +18,11 @@ AND (collation_name LIKE CONCAT(character_set_name,'_general_ci')
 OR
 collation_name LIKE CONCAT(character_set_name,'_bin'))
 ORDER BY collation_name;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
-latin1_bin	latin1	47		Yes	1
+latin1_bin	latin1	47		Yes	1	cp1252 West European
-latin1_general_ci	latin1	48		Yes	1
+latin1_general_ci	latin1	48		Yes	1	cp1252 West European
-utf8mb3_bin	utf8mb3	83		Yes	1
+utf8mb3_bin	utf8mb3	83		Yes	1	UTF-8 Unicode
-utf8mb3_general_ci	utf8mb3	33	Yes	Yes	1
+utf8mb3_general_ci	utf8mb3	33	Yes	Yes	1	UTF-8 Unicode
 SELECT *
 FROM information_schema.collation_character_set_applicability
--- a/mysql-test/suite/funcs_1/r/is_collations.result
+++ b/mysql-test/suite/funcs_1/r/is_collations.result
@ -34,6 +34,7 @@ ID	bigint(11)	YES		NULL
 IS_DEFAULT	varchar(3)	YES		NULL	
 IS_COMPILED	varchar(3)	NO		NULL	
 SORTLEN	bigint(3)	NO		NULL	
 COMMENT	varchar(80)	NO		NULL	
 SHOW CREATE TABLE information_schema.COLLATIONS;
 Table	Create Table
 COLLATIONS	CREATE TEMPORARY TABLE `COLLATIONS` (
@ -42,7 +43,8 @@ COLLATIONS	CREATE TEMPORARY TABLE `COLLATIONS` (
  `ID` bigint(11),
  `IS_DEFAULT` varchar(3),
  `IS_COMPILED` varchar(3) NOT NULL,
-  `SORTLEN` bigint(3) NOT NULL
+  `SORTLEN` bigint(3) NOT NULL,
  `COMMENT` varchar(80) NOT NULL
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
 SHOW COLUMNS FROM information_schema.COLLATIONS;
 Field	Type	Null	Key	Default	Extra
@ -52,6 +54,7 @@ ID	bigint(11)	YES		NULL
 IS_DEFAULT	varchar(3)	YES		NULL	
 IS_COMPILED	varchar(3)	NO		NULL	
 SORTLEN	bigint(3)	NO		NULL	
 COMMENT	varchar(80)	NO		NULL	
 # Testcases 3.2.3.2 and 3.2.3.3 are checked in suite/funcs_1/t/charset_collation*.test
 ########################################################################
 # Testcases 3.2.1.3-3.2.1.5 + 3.2.1.8-3.2.1.12: INSERT/UPDATE/DELETE and
--- a/mysql-test/suite/funcs_1/r/is_columns_is.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_is.result
@ -57,6 +57,7 @@ def	information_schema	CLIENT_STATISTICS	TOTAL_SSL_CONNECTIONS	24	NULL	NO	bigint
 def	information_schema	CLIENT_STATISTICS	UPDATE_COMMANDS	16	NULL	NO	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(21)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	CHARACTER_SET_NAME	2	NULL	YES	varchar	32	96	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(32)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	COLLATION_NAME	1	NULL	NO	varchar	64	192	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(64)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	COMMENT	7	NULL	NO	varchar	80	240	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(80)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	ID	3	NULL	YES	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(11)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	IS_COMPILED	5	NULL	NO	varchar	3	9	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(3)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	IS_DEFAULT	4	NULL	YES	varchar	3	9	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(3)			select		NEVER	NULL	NO	NO
@ -641,6 +642,7 @@ NULL	information_schema	COLLATIONS	ID	bigint	NULL	NULL	NULL	NULL	bigint(11)
 3.0000	information_schema	COLLATIONS	IS_DEFAULT	varchar	3	9	utf8mb3	utf8mb3_general_ci	varchar(3)
 3.0000	information_schema	COLLATIONS	IS_COMPILED	varchar	3	9	utf8mb3	utf8mb3_general_ci	varchar(3)
 NULL	information_schema	COLLATIONS	SORTLEN	bigint	NULL	NULL	NULL	NULL	bigint(3)
 3.0000	information_schema	COLLATIONS	COMMENT	varchar	80	240	utf8mb3	utf8mb3_general_ci	varchar(80)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	COLLATION_NAME	varchar	64	192	utf8mb3	utf8mb3_general_ci	varchar(64)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	CHARACTER_SET_NAME	varchar	32	96	utf8mb3	utf8mb3_general_ci	varchar(32)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	FULL_COLLATION_NAME	varchar	64	192	utf8mb3	utf8mb3_general_ci	varchar(64)
--- a/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
+++ b/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
@ -388,8 +388,8 @@ select "foo" = "foo " collate latin1_test;
 "foo" = "foo " collate latin1_test
 1
 The following tests check that two-byte collation IDs work
-select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
+select collation_name, character_set_name, id, is_default, is_compiled, sortlen from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+collation_name	character_set_name	id	is_default	is_compiled	sortlen
 ascii2_general_nopad_ci	ascii2	318			1
 ascii2_bin2	ascii2	319			1
 ascii2_general_ci	ascii2	320	Yes		1
--- a/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
+++ b/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
@ -171,7 +171,7 @@ select "foo" = "foo " collate latin1_test;
 # The file ../std-data/Index.xml has a number of collations with high IDs.
 # Test that the "ID" column in I_S and SHOW queries can handle two bytes
-select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
+select collation_name, character_set_name, id, is_default, is_compiled, sortlen from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
 show collation like '%test%';
 # Test that two-byte collation ID is correctly transfered to the client side.
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@ -543,6 +543,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
  if (my_uca1400_collation_definitions_add())
    return TRUE;
-  
+  if (mysql_utf8mb4_0900_collation_definitions_add())
    return TRUE;
  return FALSE;
 }
--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -29,7 +29,7 @@
 #include <locale.h>
 #endif
-extern HASH charset_name_hash;
+static HASH charset_name_hash;
 /*
  The code below implements this functionality:
@ -640,6 +640,62 @@ void add_compiled_extra_collation(struct charset_info_st *cs)
 }
 /*
  Add an alias for a collation with an unique id
  Used to add MySQL utf8mb4_0900 collations to MariaDB as an alias for the
  corresponding utf8mb4_1400 collation
 */
 my_bool add_alias_for_collation(LEX_CSTRING *collation_name, LEX_CSTRING *alias,
                                uint alias_id)
 {
  char *coll_name, *comment;
  struct charset_info_st *new_ci;
  CHARSET_INFO *org;
  MY_CHARSET_LOADER loader;
  char comment_buff[64+15];
  size_t comment_length;
  uint org_id= get_collation_number_internal(collation_name->str);
  DBUG_ASSERT(org_id);
  DBUG_ASSERT(all_charsets[org_id]);
  if (!(org= all_charsets[org_id]))
    return 1;
  /*
    We have to init the character set to ensure it is not changed after we copy
    it.
  */
  my_charset_loader_init_mysys(&loader);
  if (my_ci_init_charset((struct charset_info_st*) org, &loader) ||
      my_ci_init_collation((struct charset_info_st*) org, &loader) ||
      (org->m_ctype &&
       init_state_maps((struct charset_info_st*) org)))
    return 1;
  ((struct charset_info_st*) org)->state|= MY_CS_READY;
  comment_length= strxnmov(comment_buff, sizeof(comment_buff)-1,
                           "Alias for ", collation_name->str,
                           NullS) - comment_buff;
  if (!(new_ci= ((struct charset_info_st*)
                 my_once_alloc(sizeof(CHARSET_INFO) +
                               alias->length + comment_length + 2,
                               MYF(MY_WME)))))
    return 1;
  coll_name= (char*) (new_ci+1);
  comment= coll_name + alias->length +1;
  memcpy((void*) new_ci, org, sizeof(CHARSET_INFO));
  (new_ci->coll_name.str)= coll_name;
  memcpy(coll_name, alias->str, alias->length+1);
  memcpy(comment, comment_buff, comment_length+1);
  new_ci->coll_name.length= alias->length;
  new_ci->comment= comment;
  new_ci->number= alias_id;
  all_charsets[alias_id]= new_ci;
  return 0;
 }
 static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
 static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
@ -660,6 +716,54 @@ my_bool my_collation_is_known_id(uint id)
 }
 /*
  Compare if two collations are identical.
  They are identical if all slots are identical except collation name and
  number.  Note that alias collations are made by memcpy(), which means that
  also the also padding in the structures are identical.
  Note that this code assumes knowledge of the CHARSET_INFO structure.
  Especially the place of number, cs_name, coll_name and tailoring.
  Other option would have been to add a new member 'alias_collation'
  into CHARSET_INFO where all identical collations would point to,
  but that would have changed the CHARSET_INFO structure which would
  have required a lot more changes.
  @return 0  Identical
  @return 1  Different
 */
 my_bool compare_collations(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
 {
  size_t length;
  if (cs1 == cs2)
    return 0;
  /* Quick check to detect different collation */
  if (cs1->cset != cs2->cset || cs1->coll != cs2->coll ||
      cs1->uca != cs2->uca)
    goto diff;
  /* We don't compare character set number */
  if (cs1->primary_number != cs2->primary_number)
    goto diff;
  if (cs1->binary_number != cs2->binary_number)
    goto diff;
  if (cs1->state != cs2->state)
    goto diff;
  /* Compare everything after comment_name */
  length= sizeof(CHARSET_INFO) - (((char*) &cs1->tailoring) - (char*) cs1);
  if (!memcmp(&cs1->tailoring, &cs2->tailoring, length))
   return 0;
 diff:
  return 1;
 }
 /*
  Collation use statistics functions do not lock
  counters to avoid mutex contention. This can lose
@ -688,8 +792,6 @@ const char *my_collation_get_tailoring(uint id)
 }
 HASH charset_name_hash;
 static const uchar *get_charset_key(const void *object, size_t *size,
                                    my_bool not_used __attribute__((unused)))
 {
@ -723,7 +825,7 @@ static void init_available_charsets(void)
    if (*cs)
    {
      DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN);
-      if (cs[0]->m_ctype)
+      if (cs[0]->m_ctype && !cs[0]->state_map)
        if (init_state_maps(*cs))
          *cs= NULL;
    }
--- a/sql/field.cc
+++ b/sql/field.cc
@ -2718,9 +2718,9 @@ void Field_null::sql_type(String &res) const
 bool Field_null::is_equal(const Column_definition &new_field) const
 {
  DBUG_ASSERT(!compression_method());
-  return new_field.type_handler() == type_handler() &&
+  return (new_field.type_handler() == type_handler() &&
-         new_field.charset == field_charset() &&
+          !compare_collations(new_field.charset, field_charset()) &&
-         new_field.length == max_display_length();
+          new_field.length == max_display_length());
 }
@ -7490,10 +7490,10 @@ int Field_str::store(double nr)
 bool Field_string::is_equal(const Column_definition &new_field) const
 {
  DBUG_ASSERT(!compression_method());
-  return new_field.type_handler() == type_handler() &&
+  return (new_field.type_handler() == type_handler() &&
-         new_field.char_length == char_length() &&
+          new_field.char_length == char_length() &&
-         new_field.charset == field_charset() &&
+          !compare_collations(new_field.charset, field_charset()) &&
-         new_field.length == max_display_length();
+          new_field.length == max_display_length());
 }
@ -7514,11 +7514,11 @@ Data_type_compatibility
 Field_longstr::cmp_to_string_with_same_collation(const Item_bool_func *cond,
                                                 const Item *item) const
 {
-  return !cmp_is_done_using_type_handler_of_this(cond, item) ?
+  return (!cmp_is_done_using_type_handler_of_this(cond, item) ?
-         Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
+          Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
-         charset() != cond->compare_collation() ?
+          compare_collations(charset(), cond->compare_collation()) ?
-         Data_type_compatibility::INCOMPATIBLE_COLLATION :
+          Data_type_compatibility::INCOMPATIBLE_COLLATION :
-         Data_type_compatibility::OK;
+          Data_type_compatibility::OK);
 }
@ -7526,13 +7526,13 @@ Data_type_compatibility
 Field_longstr::cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
                                                     const Item *item) const
 {
-  return !cmp_is_done_using_type_handler_of_this(cond, item) ?
+  return (!cmp_is_done_using_type_handler_of_this(cond, item) ?
-         Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
+          Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
-         (charset() != cond->compare_collation() &&
+          (compare_collations(charset(), cond->compare_collation()) &&
-          !(cond->compare_collation()->state & MY_CS_BINSORT) &&
+           !(cond->compare_collation()->state & MY_CS_BINSORT) &&
-          !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ?
+           !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ?
-         Data_type_compatibility::INCOMPATIBLE_COLLATION :
+          Data_type_compatibility::INCOMPATIBLE_COLLATION :
-         Data_type_compatibility::OK;
+          Data_type_compatibility::OK);
 }
@ -8443,11 +8443,11 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, TABLE *new_table,
 bool Field_varstring::is_equal(const Column_definition &new_field) const
 {
-  return new_field.type_handler() == type_handler() &&
+  return (new_field.type_handler() == type_handler() &&
-         new_field.length == field_length &&
+          new_field.length == field_length &&
-         new_field.char_length == char_length() &&
+          new_field.char_length == char_length() &&
-         !new_field.compression_method() == !compression_method() &&
+          !new_field.compression_method() == !compression_method() &&
-         new_field.charset == field_charset();
+          !compare_collations(new_field.charset, field_charset()));
 }
@ -8714,7 +8714,7 @@ uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg) const
 */
 int Field_blob::copy_value(Field_blob *from)
 {
-  DBUG_ASSERT(field_charset() == from->charset());
+  DBUG_ASSERT(!compare_collations(field_charset(), from->charset()));
  DBUG_ASSERT(!compression_method() == !from->compression_method());
  int rc= 0;
  uint32 length= from->get_length();
@ -9245,10 +9245,10 @@ uint Field_blob::max_packed_col_length(uint max_length)
 bool Field_blob::is_equal(const Column_definition &new_field) const
 {
-  return new_field.type_handler() == type_handler() &&
+  return (new_field.type_handler() == type_handler() &&
-         !new_field.compression_method() == !compression_method() &&
+          !new_field.compression_method() == !compression_method() &&
-         new_field.pack_length == pack_length() &&
+          new_field.pack_length == pack_length() &&
-         new_field.charset == field_charset();
+          !compare_collations(new_field.charset, field_charset()));
 }
@ -9747,7 +9747,7 @@ bool Field_enum::is_equal(const Column_definition &new_field) const
    type, charset and have the same underlying length.
  */
  if (new_field.type_handler() != type_handler() ||
-      new_field.charset != field_charset() ||
+      compare_collations(new_field.charset, field_charset()) ||
      new_field.pack_length != pack_length())
    return false;
@ -9854,9 +9854,9 @@ Field_enum::can_optimize_range_or_keypart_ref(const Item_bool_func *cond,
  case REAL_RESULT:
    return Data_type_compatibility::OK;
  case STRING_RESULT:
-    return charset() == cond->compare_collation() ?
+    return (!compare_collations(charset(), cond->compare_collation()) ?
-           Data_type_compatibility::OK :
+            Data_type_compatibility::OK :
-           Data_type_compatibility::INCOMPATIBLE_COLLATION;
+            Data_type_compatibility::INCOMPATIBLE_COLLATION);
  case ROW_RESULT:
    DBUG_ASSERT(0);
    break;
--- a/sql/item.cc
+++ b/sql/item.cc
@ -2544,7 +2544,7 @@ bool DTCollation::aggregate(const DTCollation &dt, uint flags)
  }
  else
  { 
-    if (collation == dt.collation)
+    if (!compare_collations(collation, dt.collation))
    {
      /* Do nothing */
    }
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@ -6648,6 +6648,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          table->field[1]->set_null(); // CHARACTER_SET_NAME
          table->field[2]->set_null(); // ID
          table->field[3]->set_null(); // IS_DEFAULT
          table->field[6]->set_null(); // Comment
        }
        else
        {
@ -6658,6 +6659,13 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          table->field[3]->set_notnull(); // IS_DEFAULT
          table->field[3]->store(
            Show::Yes_or_empty::value(def_cl == tmp_cl), scs);
          if (tmp_cl->comment)
          {
            LEX_CSTRING comment;
            comment.str= tmp_cl->comment;
            comment.length= strlen(comment.str);
            table->field[6]->store(&comment, scs);
          }
        }
        table->field[4]->store(
          Show::Yes_or_empty::value(tmp_cl->compiled_flag()), scs);
@ -9706,7 +9714,8 @@ ST_FIELD_INFO collation_fields_info[]=
  Column("ID", SLonglong(MY_INT32_NUM_DECIMAL_DIGITS), NULLABLE, "Id"),
  Column("IS_DEFAULT",                 Yes_or_empty(), NULLABLE, "Default"),
  Column("IS_COMPILED",                Yes_or_empty(), NOT_NULL, "Compiled"),
-  Column("SORTLEN",                      SLonglong(3), NOT_NULL, "Sortlen"),
+  Column("SORTLEN",                    SLonglong(3),   NOT_NULL, "Sortlen"),
  Column("COMMENT",                    Varchar(80),    NOT_NULL),
  CEnd()
 };
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@ -34,6 +34,7 @@
 #include "strings_def.h"
 #include <m_ctype.h>
 #include <my_sys.h>
 #include "ctype-uca.h"
 #include "ctype-unidata.h"
 #include "my_bit.h"
@ -39564,4 +39565,129 @@ LEX_CSTRING my_ci_get_collation_name_uca(CHARSET_INFO *cs,
 }
 /*
  Add support for MySQL 8.0 utf8mb4_0900_.. collations
 */
 #define mysql_0900_collation_start 255
 struct mysql_0900_to_mariadb_1400_mapping
 {
  const char *mysql_col_name, *mariadb_col_name, *case_sensitivity;
 };
 struct mysql_0900_to_mariadb_1400_mapping mysql_0900_mapping[]=
 {
  /* 255 Ascent insensitive, Case insensitive 'ai_ci' */
  {"", "", "ai_ci"},
  {"de_pb", "german2", "ai_ci"},
  {"is", "icelandic", "ai_ci"},
  {"lv", "latvian", "ai_ci"},
  {"ro", "romanian", "ai_ci"},
  {"sl", "slovenian", "ai_ci"},
  {"pl", "polish", "ai_ci"},
  {"et", "estonian", "ai_ci"},
  {"es", "spanish", "ai_ci"},
  {"sv", "swedish", "ai_ci"},
  {"tr", "turkish", "ai_ci"},
  {"cs", "czech", "ai_ci"},
  {"da", "danish", "ai_ci"},
  {"lt", "lithuanian", "ai_ci"},
  {"sk", "slovak", "ai_ci"},
  {"es_trad", "spanish2", "ai_ci"},
  {"la", "roman", "ai_ci"},
  {"fa", NullS, "ai_ci"},                             // Disabled in MySQL
  {"eo", "esperanto", "ai_ci"},
  {"hu", "hungarian", "ai_ci"},
  {"hr", "croatian", "ai_ci"},
  {"si", NullS, "ai_ci"},                             // Disabled in MySQL
  {"vi", "vietnamese", "ai_ci"},
  /* 278 Ascent sensitive, Case sensitive 'as_cs' */
  {"","", "as_cs"},
  {"de_pb", "german2", "as_cs"},
  {"is", "icelandic", "as_cs"},
  {"lv", "latvian", "as_cs"},
  {"ro", "romanian", "as_cs"},
  {"sl", "slovenian", "as_cs"},
  {"pl", "polish", "as_cs"},
  {"et", "estonian", "as_cs"},
  {"es", "spanish", "as_cs"},
  {"sv", "swedish", "as_cs"},
  {"tr", "turkish", "as_cs"},
  {"cs", "czech", "as_cs"},
  {"da", "danish", "as_cs"},
  {"lt", "lithuanian", "as_cs"},
  {"sk", "slovak", "as_cs"},
  {"es_trad", "spanish2", "as_cs"},
  {"la", "roman", "as_cs"},
  {"fa", NullS, "as_cs"},                             // Disabled in MySQL
  {"eo", "esperanto", "as_cs"},
  {"hu", "hungarian", "as_cs"},
  {"hr", "croatian", "as_cs"},
  {"si", NullS, "as_cs"},                             // Disabled in MySQL
  {"vi", "vietnamese", "as_cs"},
  {"", NullS, "as_cs"},                               // Missing
  {"", NullS, "as_cs"},                               // Missing
  {"_ja_0900_as_cs", NullS, "as_cs"},                 // Not supported
  {"_ja_0900_as_cs_ks", NullS, "as_cs"},              // Not supported
  /* 305 Ascent-sensitive, Case insensitive 'as_ci' */
  {"","", "as_ci"},
  {"ru", NullS, "ai_ci"},                             // Not supported
  {"ru", NullS, "as_cs"},                             // Not supported
  {"zh", NullS, "as_cs"},                             // Not supported
  {NullS, NullS, ""}
 };
 /*
  Map mysql character sets to MariaDB using the same definition but with
  with the MySQL collation name and id.
 */
 my_bool mysql_utf8mb4_0900_collation_definitions_add()
 {
  uint id= mysql_0900_collation_start;
  struct mysql_0900_to_mariadb_1400_mapping *map;
  LEX_CSTRING mysql_utf8_bin= { STRING_WITH_LEN("utf8mb4_0900_bin") };
  LEX_CSTRING mariadb_utf8_bin= { STRING_WITH_LEN("utf8mb4_bin") };
  for (map= mysql_0900_mapping; map->mysql_col_name ; map++, id++)
  {
    if (map->mariadb_col_name)               /* Supported collation */
    {
      size_t org_length, ali_length;
      char original[64], alias[64];
      LEX_CSTRING org_name, alias_name;
      org_length= (strxnmov(original, sizeof(original)-1,
                            "utf8mb4_uca1400_",
                            map->mariadb_col_name,
                            (map->mariadb_col_name[0] ? "_" : ""),
                            "nopad_",
                            map->case_sensitivity,
                            NullS) - original);
      ali_length= (strxnmov(alias, sizeof(alias)-1,
                            "utf8mb4_", map->mysql_col_name,
                            (map->mysql_col_name[0] ? "_" : ""),
                            "0900_",
                            map->case_sensitivity,
                            NullS) - alias);
      org_name.str=      original;
      org_name.length=   org_length;
      alias_name.str=    alias;
      alias_name.length= ali_length;
      if (add_alias_for_collation(&org_name, &alias_name, id))
        return 1;
    }
  }
  if (add_alias_for_collation(&mariadb_utf8_bin, &mysql_utf8_bin, 309))
    return 1;
  return 0;
 }
 #endif /* HAVE_UCA_COLLATIONS */
--- a/strings/ctype-uca1400.h
+++ b/strings/ctype-uca1400.h
@ -248,5 +248,6 @@ my_uca1400_collation_definition_init(MY_CHARSET_LOADER *loader,
 #define MY_UCA1400_COLLATION_DEFINITION_COUNT 26
 my_bool mysql_utf8mb4_0900_collation_definitions_add();
 #endif /* CTYPE_UCA_1400_H */
		`@ -1 +1 @@`
			`Subproject commit 1093c22e357d0164aa072b829693e39f14322384`				`Subproject commit 52d0a38ed15f62906206d77b675079fc159cec7e`