MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server

This is done by mapping most of the existing MySQL unicode 0900 collations to MariadB 1400 unicode collations. The assumption is that 1400 is a super set of 0900 for all practical purposes. I also added a new function 'compare_collations()' and changed most code to use this instead of comparing character sets directly. This enables one to seamlessly mix-and-match the corresponding 0900 and 1400 sets. Field comparision and alter table treats the character sets as identical. All MySQL 8.0 0900 collations are supported except: - utf8mb4_ja_0900_as_cs - utf8mb4_ja_0900_as_cs_ks - utf8mb4_ru_0900_as_cs - utf8mb4_zh_0900_as_cs These do not have corresponding entries in the MariadB 01400 collations. Other things: - Added COMMENT colum to information_schema.collations. For utf8mb4_0900 colletions it contains the corresponding alias collation.
2025-01-15 11:32:29 +01:00 · 2024-12-15 15:57:53 +02:00 · 2024-12-15 15:57:53 +02:00 · 7fcaab7aaa
commit 7fcaab7aaa
parent 9e7762e718
21 changed files with 6284 additions and 102 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -458,9 +458,9 @@ typedef struct my_charset_loader_st
 {
  char error[128];
  void *(*once_alloc)(size_t);
-  void *(*malloc)(size_t);
-  void *(*realloc)(void *, size_t);
-  void (*free)(void *);
+  void *(*malloc)(size_t);                      /* Not used */
+  void *(*realloc)(void *, size_t);             /* Not used */
+  void (*free)(void *);                         /* Not used */
  void (*reporter)(enum loglevel, const char *format, ...);
  int  (*add_collation)(struct charset_info_st *cs);
 } MY_CHARSET_LOADER;
@ -1693,6 +1693,7 @@ my_bool my_propagate_complex(CHARSET_INFO *cs, const uchar *str, size_t len);
 uint my_ci_get_id_generic(CHARSET_INFO *cs, my_collation_id_type_t type);
 LEX_CSTRING my_ci_get_collation_name_generic(CHARSET_INFO *cs,
                                             my_collation_name_mode_t mode);
+my_bool compare_collations(CHARSET_INFO *cs1, CHARSET_INFO *cs2);

 typedef struct 
 {
--- a/include/my_sys.h
+++ b/include/my_sys.h
@ -1119,6 +1119,9 @@ static inline my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
 extern my_bool init_compiled_charsets(myf flags);
 extern void add_compiled_collation(struct charset_info_st *cs);
 extern void add_compiled_extra_collation(struct charset_info_st *cs);
+extern my_bool add_alias_for_collation(LEX_CSTRING *collation_name,
+                                       LEX_CSTRING *alias,
+                                       uint alias_id);
 extern size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
                                      char *to, size_t to_length,
                                      const char *from, size_t length,
--- a/2
+++ b/2
@ -1 +1 @@
-Subproject commit 1093c22e357d0164aa072b829693e39f14322384
+Subproject commit 52d0a38ed15f62906206d77b675079fc159cec7e
--- a/mysql-test/main/ctype_ldml.result
+++ b/mysql-test/main/ctype_ldml.result
@ -456,43 +456,43 @@ select "foo" = "foo " collate latin1_test;
 1
 The following tests check that two-byte collation IDs work
 select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-ascii2_general_nopad_ci	ascii2	318			1
-ascii2_bin2	ascii2	319			1
-ascii2_general_ci	ascii2	320	Yes		1
-ascii2_bin	ascii2	321			1
-ascii2_general_inherited_ci	ascii2	322			1
-ascii2_general_inherited2_ci	ascii2	323			1
-ascii2_badly_inherited_ci	ascii2	324			1
-ascii2_nopad_bin	ascii2	325			1
-utf8mb4_test_ci	utf8mb4	326			8
-utf16_test_ci	utf16	327			8
-utf8mb4_test_400_ci	utf8mb4	328			8
-utf8mb4_test_520_nopad_ci	utf8mb4	329			8
-utf8mb4_uca1400_test01_as_ci	utf8mb4	330			4
-latin1_test	latin1	331			1
-latin1_test2	latin1	332			1
-latin1_test2_cs	latin1	333			1
-latin1_swedish_nopad2_ci	latin1	334			1
-utf8mb3_bengali_standard_ci	utf8mb3	336			8
-utf8mb3_bengali_traditional_ci	utf8mb3	337			8
-utf8mb3_implicit_weights_ci	utf8mb3	338			8
-utf8mb3_phone_ci	utf8mb3	352			8
-utf8mb3_test_ci	utf8mb3	353			8
-utf8mb3_5624_1	utf8mb3	354			8
-utf8mb3_5624_2	utf8mb3	355			8
-utf8mb3_5624_3	utf8mb3	356			8
-utf8mb3_5624_4	utf8mb3	357			8
-ucs2_test_ci	ucs2	358			8
-ucs2_vn_ci	ucs2	359			8
-ucs2_5624_1	ucs2	360			8
-utf8mb3_5624_5	utf8mb3	368			8
-utf8mb3_5624_5_bad	utf8mb3	369			8
-utf8mb3_czech_test_w2	utf8mb3	370			4
-utf8mb3_czech_test_nopad_w2	utf8mb3	371			4
-utf8mb3_czech_test_bad_w2	utf8mb3	372			4
-utf32_test_ci	utf32	391			8
-utf8mb3_maxuserid_ci	utf8mb3	2047			8
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
+ascii2_general_nopad_ci	ascii2	318			1	
+ascii2_bin2	ascii2	319			1	
+ascii2_general_ci	ascii2	320	Yes		1	
+ascii2_bin	ascii2	321			1	
+ascii2_general_inherited_ci	ascii2	322			1	
+ascii2_general_inherited2_ci	ascii2	323			1	
+ascii2_badly_inherited_ci	ascii2	324			1	
+ascii2_nopad_bin	ascii2	325			1	
+utf8mb4_test_ci	utf8mb4	326			8	
+utf16_test_ci	utf16	327			8	
+utf8mb4_test_400_ci	utf8mb4	328			8	
+utf8mb4_test_520_nopad_ci	utf8mb4	329			8	
+utf8mb4_uca1400_test01_as_ci	utf8mb4	330			4	
+latin1_test	latin1	331			1	cp1252 West European
+latin1_test2	latin1	332			1	cp1252 West European
+latin1_test2_cs	latin1	333			1	cp1252 West European
+latin1_swedish_nopad2_ci	latin1	334			1	cp1252 West European
+utf8mb3_bengali_standard_ci	utf8mb3	336			8	
+utf8mb3_bengali_traditional_ci	utf8mb3	337			8	
+utf8mb3_implicit_weights_ci	utf8mb3	338			8	
+utf8mb3_phone_ci	utf8mb3	352			8	
+utf8mb3_test_ci	utf8mb3	353			8	
+utf8mb3_5624_1	utf8mb3	354			8	
+utf8mb3_5624_2	utf8mb3	355			8	
+utf8mb3_5624_3	utf8mb3	356			8	
+utf8mb3_5624_4	utf8mb3	357			8	
+ucs2_test_ci	ucs2	358			8	
+ucs2_vn_ci	ucs2	359			8	
+ucs2_5624_1	ucs2	360			8	
+utf8mb3_5624_5	utf8mb3	368			8	
+utf8mb3_5624_5_bad	utf8mb3	369			8	
+utf8mb3_czech_test_w2	utf8mb3	370			4	
+utf8mb3_czech_test_nopad_w2	utf8mb3	371			4	
+utf8mb3_czech_test_bad_w2	utf8mb3	372			4	
+utf32_test_ci	utf32	391			8	
+utf8mb3_maxuserid_ci	utf8mb3	2047			8	
 show collation like '%test%';
 Collation	Charset	Id	Default	Compiled	Sortlen
 latin1_test	latin1	331			1
--- a/mysql-test/main/ctype_utf8mb4_0900.result
+++ b/mysql-test/main/ctype_utf8mb4_0900.result
--- a/mysql-test/main/ctype_utf8mb4_0900.test
+++ b/mysql-test/main/ctype_utf8mb4_0900.test
@ -0,0 +1,116 @@
+-- source include/have_ucs2.inc
+-- source include/have_utf8mb4.inc
+-- source include/have_innodb.inc
+
+--disable_warnings
+DROP TABLE IF EXISTS t1;
+--enable_warnings
+
+#
+# Basic tests
+#
+
+select * from information_schema.COLLATION_CHARACTER_SET_APPLICABILITY
+where collation_name like "%0900%" order by collation_name;
+
+select * from information_schema.COLLATIONS where collation_name like "%0900%";
+
+SET NAMES utf8mb4;
+CREATE TABLE t1 (c1 CHAR(10) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin);
+
+--source include/ctype_unicode_latin.inc
+
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_icelandic_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_latvian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_romanian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_slovenian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_polish_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_estonian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_spanish_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_swedish_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_turkish_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_czech_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_danish_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_lithuanian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_slovak_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_spanish2_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_roman_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_esperanto_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_hungarian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_mysql561_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_croatian_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_german2_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_unicode_520_ci;
+SELECT GROUP_CONCAT(c1 ORDER BY c1 SEPARATOR '') FROM t1 GROUP BY c1 COLLATE utf8mb4_vietnamese_ci;
+
+DROP TABLE t1;
+
+--echo #
+--echo # MDEV-20912 Add support for utf8mb4_0900_* collations in MariaDB Server
+--echo #
+
+CREATE DATABASE db1 CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+USE db1;
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
+ALTER TABLE t1 CONVERT TO CHARACTER SET DEFAULT COLLATE utf8mb4_0900_ai_ci;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET latin1);
+ALTER TABLE t1 CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_sv_0900_ai_ci;
+SHOW CREATE TABLE t1;
+DROP TABLE t1;
+
+DROP DATABASE db1;
+USE test;
+
+--echo #
+--echo # CREATE TABLE - table level character set and collation
+--echo #
+
+CREATE DATABASE db1 CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
+
+CREATE TABLE db1.t1 (a CHAR(1)) COLLATE utf8mb4_0900_ai_ci;
+SHOW CREATE TABLE db1.t1;
+ALTER TABLE db1.t1 modify a CHAR(1) COLLATE utf8mb4_sv_0900_ai_ci;
+SHOW CREATE TABLE db1.t1;
+DROP TABLE db1.t1;
+
+CREATE TABLE db1.t1 (a CHAR(1)) COLLATE utf8mb4_sv_0900_ai_ci;
+SHOW CREATE TABLE db1.t1;
+DROP TABLE db1.t1;
+
+CREATE TABLE db1.t1 (a CHAR(1)) CHARACTER SET DEFAULT COLLATE utf8mb4_0900_ai_ci;
+SHOW CREATE TABLE db1.t1;
+DROP TABLE db1.t1;
+
+DROP DATABASE db1;
+
+--echo #
+--echo # Ensure that we can seamlessly compare and move between
+--echo # utf8mb4_sv_0900_ai_ci and utf8mb4_uca1400_swedish_1400_nopad_ai_ci
+--echo #
+
+CREATE TABLE t1 (p int primary key auto_increment, a VARCHAR(10), key (a)) engine=innodb, COLLATE utf8mb4_sv_0900_ai_ci;
+show create table t1;
+CREATE TABLE t2 (p int primary key auto_increment, a VARCHAR(10), key(a)) engine=innodb, COLLATE utf8mb4_uca1400_swedish_nopad_ai_ci;
+show create table t2;
+
+insert into t1 (a) values ("hello"),("world");
+insert into t2 (a) values ("hello"),("world");
+explain select * from t1,t2 where t1.a=t2.a;
+
+--echo # Check that alter table can convert between the character sets
+
+alter table t1 modify a varchar(10) collate utf8mb4_uca1400_swedish_nopad_ai_ci, algorithm=nocopy;
+show create table t1;
+
+alter table t2 modify a varchar(10) collate utf8mb4_sv_0900_ai_ci, algorithm=nocopy;
+show create table t2;
+drop table t1,t2;
+
+CREATE OR REPLACE TABLE t1 (p int primary key auto_increment, a VARCHAR(10), key (a)) engine=aria, COLLATE utf8mb4_sv_0900_ai_ci;
+alter table t1 modify a varchar(10) collate utf8mb4_uca1400_swedish_nopad_ai_ci, algorithm=nocopy;
+drop table t1;
--- a/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.result
+++ b/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.result
--- a/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.test
+++ b/mysql-test/main/ctype_utf8mb4_0900_ai_ci_casefold.test
@ -0,0 +1,15 @@
+--echo #
+--echo # Start of 10.10 tests
+--echo #
+
+--echo #
+--echo # MDEV-30577 Case folding for uca1400 collations is not up to date
+--echo #
+
+SET NAMES utf8mb4 COLLATE utf8mb4_0900_ai_ci;
+--source include/ctype_unicode_casefold_bmp.inc
+--source include/ctype_unicode_casefold_supplementary.inc
+
+--echo #
+--echo # End of 11.4 tests
+--echo #
--- a/mysql-test/main/information_schema.result
+++ b/mysql-test/main/information_schema.result
@ -278,17 +278,17 @@ Charset	Description	Default collation	Maxlen
 latin1	cp1252 West European	latin1_swedish_ci	1
 select * from information_schema.COLLATIONS
 where COLLATION_NAME like 'latin1%';
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-latin1_german1_ci	latin1	5		#	1
-latin1_swedish_ci	latin1	8	Yes	#	1
-latin1_danish_ci	latin1	15		#	1
-latin1_german2_ci	latin1	31		#	2
-latin1_bin	latin1	47		#	1
-latin1_general_ci	latin1	48		#	1
-latin1_general_cs	latin1	49		#	1
-latin1_spanish_ci	latin1	94		#	1
-latin1_swedish_nopad_ci	latin1	1032		#	1
-latin1_nopad_bin	latin1	1071		#	1
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
+latin1_german1_ci	latin1	5		#	1	cp1252 West European
+latin1_swedish_ci	latin1	8	Yes	#	1	cp1252 West European
+latin1_danish_ci	latin1	15		#	1	cp1252 West European
+latin1_german2_ci	latin1	31		#	2	cp1252 West European
+latin1_bin	latin1	47		#	1	cp1252 West European
+latin1_general_ci	latin1	48		#	1	cp1252 West European
+latin1_general_cs	latin1	49		#	1	cp1252 West European
+latin1_spanish_ci	latin1	94		#	1	cp1252 West European
+latin1_swedish_nopad_ci	latin1	1032		#	1	
+latin1_nopad_bin	latin1	1071		#	1	
 SHOW COLLATION LIKE 'latin1%';
 Collation	Charset	Id	Default	Compiled	Sortlen
 latin1_german1_ci	latin1	5		#	1
@ -1653,7 +1653,7 @@ SELECT *
 FROM tables ta
 JOIN collations co ON ( co.collation_name = ta.table_catalog )
 JOIN character_sets cs ON ( cs.character_set_name = ta.table_catalog );
-TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	TABLE_TYPE	ENGINE	VERSION	ROW_FORMAT	TABLE_ROWS	AVG_ROW_LENGTH	DATA_LENGTH	MAX_DATA_LENGTH	INDEX_LENGTH	DATA_FREE	AUTO_INCREMENT	CREATE_TIME	UPDATE_TIME	CHECK_TIME	TABLE_COLLATION	CHECKSUM	CREATE_OPTIONS	TABLE_COMMENT	MAX_INDEX_LENGTH	TEMPORARY	COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
+TABLE_CATALOG	TABLE_SCHEMA	TABLE_NAME	TABLE_TYPE	ENGINE	VERSION	ROW_FORMAT	TABLE_ROWS	AVG_ROW_LENGTH	DATA_LENGTH	MAX_DATA_LENGTH	INDEX_LENGTH	DATA_FREE	AUTO_INCREMENT	CREATE_TIME	UPDATE_TIME	CHECK_TIME	TABLE_COLLATION	CHECKSUM	CREATE_OPTIONS	TABLE_COMMENT	MAX_INDEX_LENGTH	TEMPORARY	COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT	CHARACTER_SET_NAME	DEFAULT_COLLATE_NAME	DESCRIPTION	MAXLEN
 DROP TABLE test.t1;
 SET max_heap_table_size = DEFAULT;
 USE test;
--- a/mysql-test/suite/funcs_1/r/charset_collation.result
+++ b/mysql-test/suite/funcs_1/r/charset_collation.result
@ -18,11 +18,11 @@ AND (collation_name LIKE CONCAT(character_set_name,'_general_ci')
 OR
 collation_name LIKE CONCAT(character_set_name,'_bin'))
 ORDER BY collation_name;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
-latin1_bin	latin1	47		Yes	1
-latin1_general_ci	latin1	48		Yes	1
-utf8mb3_bin	utf8mb3	83		Yes	1
-utf8mb3_general_ci	utf8mb3	33	Yes	Yes	1
+COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN	COMMENT
+latin1_bin	latin1	47		Yes	1	cp1252 West European
+latin1_general_ci	latin1	48		Yes	1	cp1252 West European
+utf8mb3_bin	utf8mb3	83		Yes	1	UTF-8 Unicode
+utf8mb3_general_ci	utf8mb3	33	Yes	Yes	1	UTF-8 Unicode

 SELECT *
 FROM information_schema.collation_character_set_applicability
--- a/mysql-test/suite/funcs_1/r/is_collations.result
+++ b/mysql-test/suite/funcs_1/r/is_collations.result
@ -34,6 +34,7 @@ ID	bigint(11)	YES		NULL
 IS_DEFAULT	varchar(3)	YES		NULL	
 IS_COMPILED	varchar(3)	NO		NULL	
 SORTLEN	bigint(3)	NO		NULL	
+COMMENT	varchar(80)	NO		NULL	
 SHOW CREATE TABLE information_schema.COLLATIONS;
 Table	Create Table
 COLLATIONS	CREATE TEMPORARY TABLE `COLLATIONS` (
@ -42,7 +43,8 @@ COLLATIONS	CREATE TEMPORARY TABLE `COLLATIONS` (
  `ID` bigint(11),
  `IS_DEFAULT` varchar(3),
  `IS_COMPILED` varchar(3) NOT NULL,
-  `SORTLEN` bigint(3) NOT NULL
+  `SORTLEN` bigint(3) NOT NULL,
+  `COMMENT` varchar(80) NOT NULL
 ) ENGINE=MEMORY DEFAULT CHARSET=utf8mb3 COLLATE=utf8mb3_general_ci
 SHOW COLUMNS FROM information_schema.COLLATIONS;
 Field	Type	Null	Key	Default	Extra
@ -52,6 +54,7 @@ ID	bigint(11)	YES		NULL
 IS_DEFAULT	varchar(3)	YES		NULL	
 IS_COMPILED	varchar(3)	NO		NULL	
 SORTLEN	bigint(3)	NO		NULL	
+COMMENT	varchar(80)	NO		NULL	
 # Testcases 3.2.3.2 and 3.2.3.3 are checked in suite/funcs_1/t/charset_collation*.test
 ########################################################################
 # Testcases 3.2.1.3-3.2.1.5 + 3.2.1.8-3.2.1.12: INSERT/UPDATE/DELETE and
--- a/mysql-test/suite/funcs_1/r/is_columns_is.result
+++ b/mysql-test/suite/funcs_1/r/is_columns_is.result
@ -57,6 +57,7 @@ def	information_schema	CLIENT_STATISTICS	TOTAL_SSL_CONNECTIONS	24	NULL	NO	bigint
 def	information_schema	CLIENT_STATISTICS	UPDATE_COMMANDS	16	NULL	NO	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(21)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	CHARACTER_SET_NAME	2	NULL	YES	varchar	32	96	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(32)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	COLLATION_NAME	1	NULL	NO	varchar	64	192	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(64)			select		NEVER	NULL	NO	NO
+def	information_schema	COLLATIONS	COMMENT	7	NULL	NO	varchar	80	240	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(80)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	ID	3	NULL	YES	bigint	NULL	NULL	19	0	NULL	NULL	NULL	bigint(11)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	IS_COMPILED	5	NULL	NO	varchar	3	9	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(3)			select		NEVER	NULL	NO	NO
 def	information_schema	COLLATIONS	IS_DEFAULT	4	NULL	YES	varchar	3	9	NULL	NULL	NULL	utf8mb3	utf8mb3_general_ci	varchar(3)			select		NEVER	NULL	NO	NO
@ -641,6 +642,7 @@ NULL	information_schema	COLLATIONS	ID	bigint	NULL	NULL	NULL	NULL	bigint(11)
 3.0000	information_schema	COLLATIONS	IS_DEFAULT	varchar	3	9	utf8mb3	utf8mb3_general_ci	varchar(3)
 3.0000	information_schema	COLLATIONS	IS_COMPILED	varchar	3	9	utf8mb3	utf8mb3_general_ci	varchar(3)
 NULL	information_schema	COLLATIONS	SORTLEN	bigint	NULL	NULL	NULL	NULL	bigint(3)
+3.0000	information_schema	COLLATIONS	COMMENT	varchar	80	240	utf8mb3	utf8mb3_general_ci	varchar(80)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	COLLATION_NAME	varchar	64	192	utf8mb3	utf8mb3_general_ci	varchar(64)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	CHARACTER_SET_NAME	varchar	32	96	utf8mb3	utf8mb3_general_ci	varchar(32)
 3.0000	information_schema	COLLATION_CHARACTER_SET_APPLICABILITY	FULL_COLLATION_NAME	varchar	64	192	utf8mb3	utf8mb3_general_ci	varchar(64)
--- a/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
+++ b/mysql-test/suite/innodb/r/innodb_ctype_ldml.result
@ -388,8 +388,8 @@ select "foo" = "foo " collate latin1_test;
 "foo" = "foo " collate latin1_test
 1
 The following tests check that two-byte collation IDs work
-select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
-COLLATION_NAME	CHARACTER_SET_NAME	ID	IS_DEFAULT	IS_COMPILED	SORTLEN
+select collation_name, character_set_name, id, is_default, is_compiled, sortlen from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
+collation_name	character_set_name	id	is_default	is_compiled	sortlen
 ascii2_general_nopad_ci	ascii2	318			1
 ascii2_bin2	ascii2	319			1
 ascii2_general_ci	ascii2	320	Yes		1
--- a/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
+++ b/mysql-test/suite/innodb/t/innodb_ctype_ldml.test
@ -171,7 +171,7 @@ select "foo" = "foo " collate latin1_test;
 # The file ../std-data/Index.xml has a number of collations with high IDs.

 # Test that the "ID" column in I_S and SHOW queries can handle two bytes
-select * from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
+select collation_name, character_set_name, id, is_default, is_compiled, sortlen from information_schema.collations where id>256 and is_compiled<>'Yes' order by id;
 show collation like '%test%';

 # Test that two-byte collation ID is correctly transfered to the client side.
--- a/mysys/charset-def.c
+++ b/mysys/charset-def.c
@ -543,6 +543,8 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))

  if (my_uca1400_collation_definitions_add())
    return TRUE;
-  
+  if (mysql_utf8mb4_0900_collation_definitions_add())
+    return TRUE;
+
  return FALSE;
 }
--- a/mysys/charset.c
+++ b/mysys/charset.c
@ -29,7 +29,7 @@
 #include <locale.h>
 #endif

-extern HASH charset_name_hash;
+static HASH charset_name_hash;

 /*
  The code below implements this functionality:
@ -640,6 +640,62 @@ void add_compiled_extra_collation(struct charset_info_st *cs)
 }


+/*
+  Add an alias for a collation with an unique id
+  Used to add MySQL utf8mb4_0900 collations to MariaDB as an alias for the
+  corresponding utf8mb4_1400 collation
+*/
+
+my_bool add_alias_for_collation(LEX_CSTRING *collation_name, LEX_CSTRING *alias,
+                                uint alias_id)
+{
+  char *coll_name, *comment;
+  struct charset_info_st *new_ci;
+  CHARSET_INFO *org;
+  MY_CHARSET_LOADER loader;
+  char comment_buff[64+15];
+  size_t comment_length;
+  uint org_id= get_collation_number_internal(collation_name->str);
+  DBUG_ASSERT(org_id);
+  DBUG_ASSERT(all_charsets[org_id]);
+
+  if (!(org= all_charsets[org_id]))
+    return 1;
+  /*
+    We have to init the character set to ensure it is not changed after we copy
+    it.
+  */
+  my_charset_loader_init_mysys(&loader);
+  if (my_ci_init_charset((struct charset_info_st*) org, &loader) ||
+      my_ci_init_collation((struct charset_info_st*) org, &loader) ||
+      (org->m_ctype &&
+       init_state_maps((struct charset_info_st*) org)))
+    return 1;
+  ((struct charset_info_st*) org)->state|= MY_CS_READY;
+
+  comment_length= strxnmov(comment_buff, sizeof(comment_buff)-1,
+                           "Alias for ", collation_name->str,
+                           NullS) - comment_buff;
+
+  if (!(new_ci= ((struct charset_info_st*)
+                 my_once_alloc(sizeof(CHARSET_INFO) +
+                               alias->length + comment_length + 2,
+                               MYF(MY_WME)))))
+    return 1;
+
+  coll_name= (char*) (new_ci+1);
+  comment= coll_name + alias->length +1;
+  memcpy((void*) new_ci, org, sizeof(CHARSET_INFO));
+  (new_ci->coll_name.str)= coll_name;
+  memcpy(coll_name, alias->str, alias->length+1);
+  memcpy(comment, comment_buff, comment_length+1);
+  new_ci->coll_name.length= alias->length;
+  new_ci->comment= comment;
+  new_ci->number= alias_id;
+  all_charsets[alias_id]= new_ci;
+  return 0;
+}
+

 static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
 static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
@ -660,6 +716,54 @@ my_bool my_collation_is_known_id(uint id)
 }


+/*
+  Compare if two collations are identical.
+  They are identical if all slots are identical except collation name and
+  number.  Note that alias collations are made by memcpy(), which means that
+  also the also padding in the structures are identical.
+
+  Note that this code assumes knowledge of the CHARSET_INFO structure.
+  Especially the place of number, cs_name, coll_name and tailoring.
+
+  Other option would have been to add a new member 'alias_collation'
+  into CHARSET_INFO where all identical collations would point to,
+  but that would have changed the CHARSET_INFO structure which would
+  have required a lot more changes.
+
+  @return 0  Identical
+  @return 1  Different
+*/
+
+my_bool compare_collations(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
+{
+  size_t length;
+
+  if (cs1 == cs2)
+    return 0;
+
+  /* Quick check to detect different collation */
+  if (cs1->cset != cs2->cset || cs1->coll != cs2->coll ||
+      cs1->uca != cs2->uca)
+    goto diff;
+
+  /* We don't compare character set number */
+  if (cs1->primary_number != cs2->primary_number)
+    goto diff;
+  if (cs1->binary_number != cs2->binary_number)
+    goto diff;
+  if (cs1->state != cs2->state)
+    goto diff;
+
+  /* Compare everything after comment_name */
+  length= sizeof(CHARSET_INFO) - (((char*) &cs1->tailoring) - (char*) cs1);
+
+  if (!memcmp(&cs1->tailoring, &cs2->tailoring, length))
+   return 0;
+diff:
+  return 1;
+}
+
+
 /*
  Collation use statistics functions do not lock
  counters to avoid mutex contention. This can lose
@ -688,8 +792,6 @@ const char *my_collation_get_tailoring(uint id)
 }


-HASH charset_name_hash;
-
 static const uchar *get_charset_key(const void *object, size_t *size,
                                    my_bool not_used __attribute__((unused)))
 {
@ -723,7 +825,7 @@ static void init_available_charsets(void)
    if (*cs)
    {
      DBUG_ASSERT(cs[0]->mbmaxlen <= MY_CS_MBMAXLEN);
-      if (cs[0]->m_ctype)
+      if (cs[0]->m_ctype && !cs[0]->state_map)
        if (init_state_maps(*cs))
          *cs= NULL;
    }
--- a/sql/field.cc
+++ b/sql/field.cc
@ -2718,9 +2718,9 @@ void Field_null::sql_type(String &res) const
 bool Field_null::is_equal(const Column_definition &new_field) const
 {
  DBUG_ASSERT(!compression_method());
-  return new_field.type_handler() == type_handler() &&
-         new_field.charset == field_charset() &&
-         new_field.length == max_display_length();
+  return (new_field.type_handler() == type_handler() &&
+          !compare_collations(new_field.charset, field_charset()) &&
+          new_field.length == max_display_length());
 }


@ -7490,10 +7490,10 @@ int Field_str::store(double nr)
 bool Field_string::is_equal(const Column_definition &new_field) const
 {
  DBUG_ASSERT(!compression_method());
-  return new_field.type_handler() == type_handler() &&
-         new_field.char_length == char_length() &&
-         new_field.charset == field_charset() &&
-         new_field.length == max_display_length();
+  return (new_field.type_handler() == type_handler() &&
+          new_field.char_length == char_length() &&
+          !compare_collations(new_field.charset, field_charset()) &&
+          new_field.length == max_display_length());
 }


@ -7514,11 +7514,11 @@ Data_type_compatibility
 Field_longstr::cmp_to_string_with_same_collation(const Item_bool_func *cond,
                                                 const Item *item) const
 {
-  return !cmp_is_done_using_type_handler_of_this(cond, item) ?
-         Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
-         charset() != cond->compare_collation() ?
-         Data_type_compatibility::INCOMPATIBLE_COLLATION :
-         Data_type_compatibility::OK;
+  return (!cmp_is_done_using_type_handler_of_this(cond, item) ?
+          Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
+          compare_collations(charset(), cond->compare_collation()) ?
+          Data_type_compatibility::INCOMPATIBLE_COLLATION :
+          Data_type_compatibility::OK);
 }


@ -7526,13 +7526,13 @@ Data_type_compatibility
 Field_longstr::cmp_to_string_with_stricter_collation(const Item_bool_func *cond,
                                                     const Item *item) const
 {
-  return !cmp_is_done_using_type_handler_of_this(cond, item) ?
-         Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
-         (charset() != cond->compare_collation() &&
-          !(cond->compare_collation()->state & MY_CS_BINSORT) &&
-          !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ?
-         Data_type_compatibility::INCOMPATIBLE_COLLATION :
-         Data_type_compatibility::OK;
+  return (!cmp_is_done_using_type_handler_of_this(cond, item) ?
+          Data_type_compatibility::INCOMPATIBLE_DATA_TYPE :
+          (compare_collations(charset(), cond->compare_collation()) &&
+           !(cond->compare_collation()->state & MY_CS_BINSORT) &&
+           !Utf8_narrow::should_do_narrowing(this, cond->compare_collation())) ?
+          Data_type_compatibility::INCOMPATIBLE_COLLATION :
+          Data_type_compatibility::OK);
 }


@ -8443,11 +8443,11 @@ Field *Field_varstring::new_key_field(MEM_ROOT *root, TABLE *new_table,

 bool Field_varstring::is_equal(const Column_definition &new_field) const
 {
-  return new_field.type_handler() == type_handler() &&
-         new_field.length == field_length &&
-         new_field.char_length == char_length() &&
-         !new_field.compression_method() == !compression_method() &&
-         new_field.charset == field_charset();
+  return (new_field.type_handler() == type_handler() &&
+          new_field.length == field_length &&
+          new_field.char_length == char_length() &&
+          !new_field.compression_method() == !compression_method() &&
+          !compare_collations(new_field.charset, field_charset()));
 }


@ -8714,7 +8714,7 @@ uint32 Field_blob::get_length(const uchar *pos, uint packlength_arg) const
 */
 int Field_blob::copy_value(Field_blob *from)
 {
-  DBUG_ASSERT(field_charset() == from->charset());
+  DBUG_ASSERT(!compare_collations(field_charset(), from->charset()));
  DBUG_ASSERT(!compression_method() == !from->compression_method());
  int rc= 0;
  uint32 length= from->get_length();
@ -9245,10 +9245,10 @@ uint Field_blob::max_packed_col_length(uint max_length)

 bool Field_blob::is_equal(const Column_definition &new_field) const
 {
-  return new_field.type_handler() == type_handler() &&
-         !new_field.compression_method() == !compression_method() &&
-         new_field.pack_length == pack_length() &&
-         new_field.charset == field_charset();
+  return (new_field.type_handler() == type_handler() &&
+          !new_field.compression_method() == !compression_method() &&
+          new_field.pack_length == pack_length() &&
+          !compare_collations(new_field.charset, field_charset()));
 }


@ -9747,7 +9747,7 @@ bool Field_enum::is_equal(const Column_definition &new_field) const
    type, charset and have the same underlying length.
  */
  if (new_field.type_handler() != type_handler() ||
-      new_field.charset != field_charset() ||
+      compare_collations(new_field.charset, field_charset()) ||
      new_field.pack_length != pack_length())
    return false;

@ -9854,9 +9854,9 @@ Field_enum::can_optimize_range_or_keypart_ref(const Item_bool_func *cond,
  case REAL_RESULT:
    return Data_type_compatibility::OK;
  case STRING_RESULT:
-    return charset() == cond->compare_collation() ?
-           Data_type_compatibility::OK :
-           Data_type_compatibility::INCOMPATIBLE_COLLATION;
+    return (!compare_collations(charset(), cond->compare_collation()) ?
+            Data_type_compatibility::OK :
+            Data_type_compatibility::INCOMPATIBLE_COLLATION);
  case ROW_RESULT:
    DBUG_ASSERT(0);
    break;
--- a/sql/item.cc
+++ b/sql/item.cc
@ -2544,7 +2544,7 @@ bool DTCollation::aggregate(const DTCollation &dt, uint flags)
  }
  else
  { 
-    if (collation == dt.collation)
+    if (!compare_collations(collation, dt.collation))
    {
      /* Do nothing */
    }
--- a/sql/sql_show.cc
+++ b/sql/sql_show.cc
@ -6648,6 +6648,7 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          table->field[1]->set_null(); // CHARACTER_SET_NAME
          table->field[2]->set_null(); // ID
          table->field[3]->set_null(); // IS_DEFAULT
+          table->field[6]->set_null(); // Comment
        }
        else
        {
@ -6658,6 +6659,13 @@ int fill_schema_collation(THD *thd, TABLE_LIST *tables, COND *cond)
          table->field[3]->set_notnull(); // IS_DEFAULT
          table->field[3]->store(
            Show::Yes_or_empty::value(def_cl == tmp_cl), scs);
+          if (tmp_cl->comment)
+          {
+            LEX_CSTRING comment;
+            comment.str= tmp_cl->comment;
+            comment.length= strlen(comment.str);
+            table->field[6]->store(&comment, scs);
+          }
        }
        table->field[4]->store(
          Show::Yes_or_empty::value(tmp_cl->compiled_flag()), scs);
@ -9706,7 +9714,8 @@ ST_FIELD_INFO collation_fields_info[]=
  Column("ID", SLonglong(MY_INT32_NUM_DECIMAL_DIGITS), NULLABLE, "Id"),
  Column("IS_DEFAULT",                 Yes_or_empty(), NULLABLE, "Default"),
  Column("IS_COMPILED",                Yes_or_empty(), NOT_NULL, "Compiled"),
-  Column("SORTLEN",                      SLonglong(3), NOT_NULL, "Sortlen"),
+  Column("SORTLEN",                    SLonglong(3),   NOT_NULL, "Sortlen"),
+  Column("COMMENT",                    Varchar(80),    NOT_NULL),
  CEnd()
 };

--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@ -34,6 +34,7 @@

 #include "strings_def.h"
 #include <m_ctype.h>
+#include <my_sys.h>
 #include "ctype-uca.h"
 #include "ctype-unidata.h"
 #include "my_bit.h"
@ -39564,4 +39565,129 @@ LEX_CSTRING my_ci_get_collation_name_uca(CHARSET_INFO *cs,
 }


+/*
+  Add support for MySQL 8.0 utf8mb4_0900_.. collations
+*/
+
+#define mysql_0900_collation_start 255
+
+struct mysql_0900_to_mariadb_1400_mapping
+{
+  const char *mysql_col_name, *mariadb_col_name, *case_sensitivity;
+};
+
+struct mysql_0900_to_mariadb_1400_mapping mysql_0900_mapping[]=
+{
+  /* 255 Ascent insensitive, Case insensitive 'ai_ci' */
+  {"", "", "ai_ci"},
+  {"de_pb", "german2", "ai_ci"},
+  {"is", "icelandic", "ai_ci"},
+  {"lv", "latvian", "ai_ci"},
+  {"ro", "romanian", "ai_ci"},
+  {"sl", "slovenian", "ai_ci"},
+  {"pl", "polish", "ai_ci"},
+  {"et", "estonian", "ai_ci"},
+  {"es", "spanish", "ai_ci"},
+  {"sv", "swedish", "ai_ci"},
+  {"tr", "turkish", "ai_ci"},
+  {"cs", "czech", "ai_ci"},
+  {"da", "danish", "ai_ci"},
+  {"lt", "lithuanian", "ai_ci"},
+  {"sk", "slovak", "ai_ci"},
+  {"es_trad", "spanish2", "ai_ci"},
+  {"la", "roman", "ai_ci"},
+  {"fa", NullS, "ai_ci"},                             // Disabled in MySQL
+  {"eo", "esperanto", "ai_ci"},
+  {"hu", "hungarian", "ai_ci"},
+  {"hr", "croatian", "ai_ci"},
+  {"si", NullS, "ai_ci"},                             // Disabled in MySQL
+  {"vi", "vietnamese", "ai_ci"},
+
+  /* 278 Ascent sensitive, Case sensitive 'as_cs' */
+  {"","", "as_cs"},
+  {"de_pb", "german2", "as_cs"},
+  {"is", "icelandic", "as_cs"},
+  {"lv", "latvian", "as_cs"},
+  {"ro", "romanian", "as_cs"},
+  {"sl", "slovenian", "as_cs"},
+  {"pl", "polish", "as_cs"},
+  {"et", "estonian", "as_cs"},
+  {"es", "spanish", "as_cs"},
+  {"sv", "swedish", "as_cs"},
+  {"tr", "turkish", "as_cs"},
+  {"cs", "czech", "as_cs"},
+  {"da", "danish", "as_cs"},
+  {"lt", "lithuanian", "as_cs"},
+  {"sk", "slovak", "as_cs"},
+  {"es_trad", "spanish2", "as_cs"},
+  {"la", "roman", "as_cs"},
+  {"fa", NullS, "as_cs"},                             // Disabled in MySQL
+  {"eo", "esperanto", "as_cs"},
+  {"hu", "hungarian", "as_cs"},
+  {"hr", "croatian", "as_cs"},
+  {"si", NullS, "as_cs"},                             // Disabled in MySQL
+  {"vi", "vietnamese", "as_cs"},
+
+  {"", NullS, "as_cs"},                               // Missing
+  {"", NullS, "as_cs"},                               // Missing
+  {"_ja_0900_as_cs", NullS, "as_cs"},                 // Not supported
+  {"_ja_0900_as_cs_ks", NullS, "as_cs"},              // Not supported
+
+  /* 305 Ascent-sensitive, Case insensitive 'as_ci' */
+  {"","", "as_ci"},
+  {"ru", NullS, "ai_ci"},                             // Not supported
+  {"ru", NullS, "as_cs"},                             // Not supported
+  {"zh", NullS, "as_cs"},                             // Not supported
+  {NullS, NullS, ""}
+};
+
+
+/*
+  Map mysql character sets to MariaDB using the same definition but with
+  with the MySQL collation name and id.
+*/
+
+my_bool mysql_utf8mb4_0900_collation_definitions_add()
+{
+  uint id= mysql_0900_collation_start;
+  struct mysql_0900_to_mariadb_1400_mapping *map;
+  LEX_CSTRING mysql_utf8_bin= { STRING_WITH_LEN("utf8mb4_0900_bin") };
+  LEX_CSTRING mariadb_utf8_bin= { STRING_WITH_LEN("utf8mb4_bin") };
+
+  for (map= mysql_0900_mapping; map->mysql_col_name ; map++, id++)
+  {
+    if (map->mariadb_col_name)               /* Supported collation */
+    {
+      size_t org_length, ali_length;
+      char original[64], alias[64];
+      LEX_CSTRING org_name, alias_name;
+
+      org_length= (strxnmov(original, sizeof(original)-1,
+                            "utf8mb4_uca1400_",
+                            map->mariadb_col_name,
+                            (map->mariadb_col_name[0] ? "_" : ""),
+                            "nopad_",
+                            map->case_sensitivity,
+                            NullS) - original);
+      ali_length= (strxnmov(alias, sizeof(alias)-1,
+                            "utf8mb4_", map->mysql_col_name,
+                            (map->mysql_col_name[0] ? "_" : ""),
+                            "0900_",
+                            map->case_sensitivity,
+                            NullS) - alias);
+      org_name.str=      original;
+      org_name.length=   org_length;
+      alias_name.str=    alias;
+      alias_name.length= ali_length;
+
+      if (add_alias_for_collation(&org_name, &alias_name, id))
+        return 1;
+    }
+  }
+
+  if (add_alias_for_collation(&mariadb_utf8_bin, &mysql_utf8_bin, 309))
+    return 1;
+  return 0;
+}
+
 #endif /* HAVE_UCA_COLLATIONS */
--- a/strings/ctype-uca1400.h
+++ b/strings/ctype-uca1400.h
@ -248,5 +248,6 @@ my_uca1400_collation_definition_init(MY_CHARSET_LOADER *loader,

 #define MY_UCA1400_COLLATION_DEFINITION_COUNT 26

+my_bool mysql_utf8mb4_0900_collation_definitions_add();

 #endif /* CTYPE_UCA_1400_H */