mirror of
https://github.com/MariaDB/server.git
synced 2025-01-30 10:31:54 +01:00
Bug#55980 Character sets: supplementary character _bin ordering is wrong
Post-fix: Reverting the "utf16_bin did not sort supplementary characters between U+D700 and U+E000" part. We'll use code-point order. Committing on behalf of Alexander Barkov.
This commit is contained in:
parent
aad23e52c3
commit
a92cc88aa5
3 changed files with 4 additions and 15 deletions
|
@ -625,16 +625,16 @@ INSERT INTO t1 VALUES (_utf8mb4 0xCE85),(_utf8mb4 0xF4808080);
|
||||||
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
|
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
|
||||||
HEX(a) HEX(CONVERT(a USING utf8mb4))
|
HEX(a) HEX(CONVERT(a USING utf8mb4))
|
||||||
0385 CE85
|
0385 CE85
|
||||||
|
FF9D EFBE9D
|
||||||
D800DF84 F0908E84
|
D800DF84 F0908E84
|
||||||
DBC0DC00 F4808080
|
DBC0DC00 F4808080
|
||||||
FF9D EFBE9D
|
|
||||||
ALTER TABLE t1 ADD KEY(a);
|
ALTER TABLE t1 ADD KEY(a);
|
||||||
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
|
SELECT HEX(a), HEX(CONVERT(a USING utf8mb4)) FROM t1 ORDER BY a;
|
||||||
HEX(a) HEX(CONVERT(a USING utf8mb4))
|
HEX(a) HEX(CONVERT(a USING utf8mb4))
|
||||||
0385 CE85
|
0385 CE85
|
||||||
|
FF9D EFBE9D
|
||||||
D800DF84 F0908E84
|
D800DF84 F0908E84
|
||||||
DBC0DC00 F4808080
|
DBC0DC00 F4808080
|
||||||
FF9D EFBE9D
|
|
||||||
DROP TABLE IF EXISTS t1;
|
DROP TABLE IF EXISTS t1;
|
||||||
select @@collation_connection;
|
select @@collation_connection;
|
||||||
@@collation_connection
|
@@collation_connection
|
||||||
|
|
|
@ -1463,7 +1463,7 @@ my_strnncoll_utf16_bin(CHARSET_INFO *cs,
|
||||||
}
|
}
|
||||||
if (s_wc != t_wc)
|
if (s_wc != t_wc)
|
||||||
{
|
{
|
||||||
return my_bincmp(s, s + s_res, t, t + t_res);
|
return s_wc > t_wc ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s+= s_res;
|
s+= s_res;
|
||||||
|
@ -1503,7 +1503,7 @@ my_strnncollsp_utf16_bin(CHARSET_INFO *cs,
|
||||||
|
|
||||||
if (s_wc != t_wc)
|
if (s_wc != t_wc)
|
||||||
{
|
{
|
||||||
return my_bincmp(s, s + s_res, t, t + t_res);
|
return s_wc > t_wc ? 1 : -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
s+= s_res;
|
s+= s_res;
|
||||||
|
|
|
@ -1967,17 +1967,6 @@ my_strnxfrm_unicode_full_bin(CHARSET_INFO *cs,
|
||||||
if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
|
if ((res= cs->cset->mb_wc(cs, &wc, src, se)) <= 0)
|
||||||
break;
|
break;
|
||||||
src+= res;
|
src+= res;
|
||||||
if (cs->mbminlen == 2) /* utf16_bin */
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
Reorder code points to weights as follows:
|
|
||||||
U+0000..U+D7FF -> [00][00][00]..[00][D7][FF] BMP part #1
|
|
||||||
U+10000..U+10FFFF -> [01][00][00]..[10][FF][FF] Supplementary
|
|
||||||
U+E000..U+FFFF -> [20][E0][00]..[20][FF][FF] BMP part #2
|
|
||||||
*/
|
|
||||||
if (wc >= 0xE000 && wc <= 0xFFFF)
|
|
||||||
wc+= 0x200000;
|
|
||||||
}
|
|
||||||
*dst++= (uchar) (wc >> 16);
|
*dst++= (uchar) (wc >> 16);
|
||||||
*dst++= (uchar) ((wc >> 8) & 0xFF);
|
*dst++= (uchar) ((wc >> 8) & 0xFF);
|
||||||
*dst++= (uchar) (wc & 0xFF);
|
*dst++= (uchar) (wc & 0xFF);
|
||||||
|
|
Loading…
Add table
Reference in a new issue