From a92cc88aa537bc1a3264c3681d2407e54a9dba74 Mon Sep 17 00:00:00 2001 From: Alexander Nozdrin Date: Wed, 24 Nov 2010 17:52:57 +0300 Subject: [PATCH] Bug#55980 Character sets: supplementary character _bin ordering is wrong Post-fix: Reverting the "utf16_bin did not sort supplementary characters between U+D700 and U+E000" part. We'll use code-point order. Committing on behalf of Alexander Barkov. --- mysql-test/r/ctype_utf16.result | 4 ++-- strings/ctype-ucs2.c | 4 ++-- strings/ctype-utf8.c | 11 ----------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result index 7cee15aecef..beafbf80e1e 100644 --- a/mysql-test/r/ctype_utf16.result +++ b/mysql-test/r/ctype_utf16.result @@ -625,16 +625,16 @@ INSERT INTO t1 VALUES (_utf8mb4 0xCE85),(_utf8mb4 0xF4808080); SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; HEX(a) HEX(CONVERT(a USING utf8mb4)) 0385 CE85 +FF9D EFBE9D D800DF84 F0908E84 DBC0DC00 F4808080 -FF9D EFBE9D ALTER TABLE t1 ADD KEY(a); SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a; HEX(a) HEX(CONVERT(a USING utf8mb4)) 0385 CE85 +FF9D EFBE9D D800DF84 F0908E84 DBC0DC00 F4808080 -FF9D EFBE9D DROP TABLE IF EXISTS t1; select @@collation_connection; @@collation_connection diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c index 09652c5884e..85d391d0c70 100644 --- a/strings/ctype-ucs2.c +++ b/strings/ctype-ucs2.c @@ -1463,7 +1463,7 @@ my_strnncoll_utf16_bin(CHARSET_INFO *cs, } if (s_wc != t_wc) { - return my_bincmp(s, s + s_res, t, t + t_res); + return s_wc > t_wc ? 1 : -1; } s+= s_res; @@ -1503,7 +1503,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs, if (s_wc != t_wc) { - return my_bincmp(s, s + s_res, t, t + t_res); + return s_wc > t_wc ? 1 : -1; } s+= s_res; diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c index 76fff72290b..324f6b9aafb 100644 --- a/strings/ctype-utf8.c +++ b/strings/ctype-utf8.c @@ -1967,17 +1967,6 @@ my_strnxfrm_unicode_full_bin(CHARSET_INFO *cs, if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0) break; src+= res; - if (cs->mbminlen == 2) /* utf16_bin */ - { - /* - Reorder code points to weights as follows: - U+0000..U+D7FF -> [00][00][00]..[00][D7][FF] BMP part #1 - U+10000..U+10FFFF -> [01][00][00]..[10][FF][FF] Supplementary - U+E000..U+FFFF -> [20][E0][00]..[20][FF][FF] BMP part #2 - */ - if (wc >= 0xE000 && wc <= 0xFFFF) - wc+= 0x200000; - } *dst++= (uchar) (wc >> 16); *dst++= (uchar) ((wc >> 8) & 0xFF); *dst++= (uchar) (wc & 0xFF);