MDEV-30746 Regression in ucs2_general_mysql500_ci

1. Adding a separate MY_COLLATION_HANDLER
   my_collation_ucs2_general_mysql500_ci_handler
   implementing a proper order for ucs2_general_mysql500_ci
   The problem happened because ucs2_general_mysql500_ci
   erroneously used my_collation_ucs2_general_ci_handler.

2. Cosmetic changes: Renaming:
   - plane00_mysql500 to my_unicase_mysql500_page00
   - my_unicase_pages_mysql500 to my_unicase_mysql500_pages
   to use the same naming style with:
   - my_unicase_default_page00
   - my_unicase_defaul_pages

3. Moving code fragments from
   - handler::check_collation_compatibility() in handler.cc
   - upgrade_collation() in table.cc
   into new methods in class Charset, to reuse the code easier.
This commit is contained in:
Alexander Barkov 2023-02-28 10:49:25 +04:00
parent 841e8877cc
commit 965bdf3e66
13 changed files with 251 additions and 58 deletions

View file

@ -6442,5 +6442,22 @@ IS_IPV4('10.0.0.1')
1
SET NAMES utf8;
#
# MDEV-30746 Regression in ucs2_general_mysql500_ci
#
SET NAMES utf8mb3;
CREATE TABLE t1 (a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_general_mysql500_ci);
INSERT INTO t1 VALUES ('s'),('z'),(_latin1 0xDF);
SELECT GROUP_CONCAT(a) FROM t1 GROUP BY a ORDER BY a;
GROUP_CONCAT(a)
s
z
ß
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a;
a HEX(a) HEX(WEIGHT_STRING(a))
s 0073 0053
z 007A 005A
ß 00DF 00DF
DROP TABLE t1;
#
# End of 10.4 tests
#

View file

@ -1136,6 +1136,18 @@ SELECT IS_IPV6('::');
SELECT IS_IPV4('10.0.0.1');
SET NAMES utf8;
--echo #
--echo # MDEV-30746 Regression in ucs2_general_mysql500_ci
--echo #
SET NAMES utf8mb3;
CREATE TABLE t1 (a VARCHAR(32) CHARACTER SET ucs2 COLLATE ucs2_general_mysql500_ci);
INSERT INTO t1 VALUES ('s'),('z'),(_latin1 0xDF);
SELECT GROUP_CONCAT(a) FROM t1 GROUP BY a ORDER BY a;
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a;
DROP TABLE t1;
--echo #
--echo # End of 10.4 tests
--echo #

View file

@ -399,3 +399,56 @@ DROP TABLE maria050313_utf8_croatian_ci;
DROP TABLE maria050533_xxx_croatian_ci;
DROP TABLE maria100004_xxx_croatian_ci;
DROP TABLE mysql050614_xxx_croatian_ci;
#
# Start of 10.4 tests
#
#
# MDEV-30746 Regression in ucs2_general_mysql500_ci
#
SET NAMES utf8mb3;
SHOW CREATE TABLE t1;
ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.t1` FORCE" or dump/reload to fix it!
SELECT * FROM t1;
ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.t1` FORCE" or dump/reload to fix it!
SELECT * FROM t1 IGNORE KEY(a);
ERROR HY000: Table rebuild required. Please do "ALTER TABLE `test.t1` FORCE" or dump/reload to fix it!
CHECK TABLE t1;
Table Op Msg_type Msg_text
test.t1 check error Upgrade required. Please do "REPAIR TABLE `t1`" or dump/reload to fix it!
REPAIR TABLE t1;
Table Op Msg_type Msg_text
test.t1 repair status OK
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a;
a HEX(a) HEX(WEIGHT_STRING(a))
s 0073 0053
z 007A 005A
ß 00DF 00DF
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 FORCE KEY(a) ORDER BY a;
a HEX(a) HEX(WEIGHT_STRING(a))
s 0073 0053
z 007A 005A
ß 00DF 00DF
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 IGNORE KEY(a) ORDER BY a;
a HEX(a) HEX(WEIGHT_STRING(a))
s 0073 0053
z 007A 005A
ß 00DF 00DF
SELECT GROUP_CONCAT(a) FROM t1 GROUP BY a ORDER BY a;
GROUP_CONCAT(a)
s
z
ß
SELECT GROUP_CONCAT(a) FROM t1 IGNORE KEY(a) GROUP BY a ORDER BY a;
GROUP_CONCAT(a)
s
z
ß
SELECT GROUP_CONCAT(a) FROM t1 FORCE KEY(a) GROUP BY a ORDER BY a;
GROUP_CONCAT(a)
s
z
ß
DROP TABLE t1;
#
# End of 10.4 tests
#

View file

@ -203,3 +203,38 @@ DROP TABLE maria050313_utf8_croatian_ci;
DROP TABLE maria050533_xxx_croatian_ci;
DROP TABLE maria100004_xxx_croatian_ci;
DROP TABLE mysql050614_xxx_croatian_ci;
--echo #
--echo # Start of 10.4 tests
--echo #
--echo #
--echo # MDEV-30746 Regression in ucs2_general_mysql500_ci
--echo #
SET NAMES utf8mb3;
copy_file std_data/ctype_upgrade/mariadb100428_ucs2_general_ci.frm $MYSQLD_DATADIR/test/t1.frm;
copy_file std_data/ctype_upgrade/mariadb100428_ucs2_general_ci.MYD $MYSQLD_DATADIR/test/t1.MYD;
copy_file std_data/ctype_upgrade/mariadb100428_ucs2_general_ci.MYI $MYSQLD_DATADIR/test/t1.MYI;
--error ER_TABLE_NEEDS_REBUILD
SHOW CREATE TABLE t1;
--error ER_TABLE_NEEDS_REBUILD
SELECT * FROM t1;
--error ER_TABLE_NEEDS_REBUILD
SELECT * FROM t1 IGNORE KEY(a);
CHECK TABLE t1;
REPAIR TABLE t1;
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 ORDER BY a;
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 FORCE KEY(a) ORDER BY a;
SELECT a, HEX(a), HEX(WEIGHT_STRING(a)) FROM t1 IGNORE KEY(a) ORDER BY a;
SELECT GROUP_CONCAT(a) FROM t1 GROUP BY a ORDER BY a;
SELECT GROUP_CONCAT(a) FROM t1 IGNORE KEY(a) GROUP BY a ORDER BY a;
SELECT GROUP_CONCAT(a) FROM t1 FORCE KEY(a) GROUP BY a ORDER BY a;
DROP TABLE t1;
--echo #
--echo # End of 10.4 tests
--echo #

View file

@ -4110,7 +4110,7 @@ int handler::check_collation_compatibility()
{
ulong mysql_version= table->s->mysql_version;
if (mysql_version < 50124)
if (mysql_version < Charset::latest_mariadb_version_with_collation_change())
{
KEY *key= table->key_info;
KEY *key_end= key + table->s->keys;
@ -4124,18 +4124,7 @@ int handler::check_collation_compatibility()
continue;
Field *field= table->field[key_part->fieldnr - 1];
uint cs_number= field->charset()->number;
if ((mysql_version < 50048 &&
(cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
cs_number == 41 || /* latin7_general_ci - bug #29461 */
cs_number == 42 || /* latin7_general_cs - bug #29461 */
cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
cs_number == 22 || /* koi8u_general_ci - bug #29461 */
cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
(mysql_version < 50124 &&
(cs_number == 33 || /* utf8_general_ci - bug #27877 */
cs_number == 35))) /* ucs2_general_ci - bug #27877 */
if (Charset::collation_changed_order(mysql_version, cs_number))
return HA_ADMIN_NEEDS_UPGRADE;
}
}

View file

@ -184,6 +184,83 @@ public:
{
return m_charset != &my_charset_bin;
}
/*
The MariaDB version when the last collation change happened,
e.g. due to a bug fix. See functions below.
*/
static ulong latest_mariadb_version_with_collation_change()
{
return 110002;
}
/*
Check if the collation with the given ID changed its order
since the given MariaDB version.
*/
static bool collation_changed_order(ulong mysql_version, uint cs_number)
{
if ((mysql_version < 50048 &&
(cs_number == 11 || /* ascii_general_ci - bug #29499, bug #27562 */
cs_number == 41 || /* latin7_general_ci - bug #29461 */
cs_number == 42 || /* latin7_general_cs - bug #29461 */
cs_number == 20 || /* latin7_estonian_cs - bug #29461 */
cs_number == 21 || /* latin2_hungarian_ci - bug #29461 */
cs_number == 22 || /* koi8u_general_ci - bug #29461 */
cs_number == 23 || /* cp1251_ukrainian_ci - bug #29461 */
cs_number == 26)) || /* cp1250_general_ci - bug #29461 */
(mysql_version < 50124 &&
(cs_number == 33 || /* utf8_general_ci - bug #27877 */
cs_number == 35))) /* ucs2_general_ci - bug #27877 */
return true;
if (cs_number == 159 && /* ucs2_general_mysql500_ci - MDEV-30746 */
((mysql_version >= 100400 && mysql_version < 100429) ||
(mysql_version >= 100500 && mysql_version < 100520) ||
(mysql_version >= 100600 && mysql_version < 100613) ||
(mysql_version >= 100700 && mysql_version < 100708) ||
(mysql_version >= 100800 && mysql_version < 100808) ||
(mysql_version >= 100900 && mysql_version < 100906) ||
(mysql_version >= 101000 && mysql_version < 101004) ||
(mysql_version >= 101100 && mysql_version < 101103) ||
(mysql_version >= 110000 && mysql_version < 110002)))
return true;
return false;
}
/**
Check if a collation has changed ID since the given version.
Return the new ID.
@param mysql_version
@param cs_number - collation ID
@retval the new collation ID (or cs_number, if no change)
*/
static uint upgrade_collation_id(ulong mysql_version, uint cs_number)
{
if (mysql_version >= 50300 && mysql_version <= 50399)
{
switch (cs_number) {
case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci
case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci
}
}
if ((mysql_version >= 50500 && mysql_version <= 50599) ||
(mysql_version >= 100000 && mysql_version <= 100005))
{
switch (cs_number) {
case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci
case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci
case 214: return MY_PAGE2_COLLATION_ID_UTF32; // utf32_croatian_ci
case 215: return MY_PAGE2_COLLATION_ID_UTF16; // utf16_croatian_ci
case 245: return MY_PAGE2_COLLATION_ID_UTF8MB4;// utf8mb4_croatian_ci
}
}
return cs_number;
}
};

View file

@ -929,39 +929,6 @@ static uint enum_value_with_check(THD *thd, TABLE_SHARE *share,
}
/**
Check if a collation has changed number
@param mysql_version
@param current collation number
@retval new collation number (same as current collation number of no change)
*/
static uint upgrade_collation(ulong mysql_version, uint cs_number)
{
if (mysql_version >= 50300 && mysql_version <= 50399)
{
switch (cs_number) {
case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci
case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci
}
}
if ((mysql_version >= 50500 && mysql_version <= 50599) ||
(mysql_version >= 100000 && mysql_version <= 100005))
{
switch (cs_number) {
case 149: return MY_PAGE2_COLLATION_ID_UCS2; // ucs2_crotian_ci
case 213: return MY_PAGE2_COLLATION_ID_UTF8; // utf8_crotian_ci
case 214: return MY_PAGE2_COLLATION_ID_UTF32; // utf32_croatian_ci
case 215: return MY_PAGE2_COLLATION_ID_UTF16; // utf16_croatian_ci
case 245: return MY_PAGE2_COLLATION_ID_UTF8MB4;// utf8mb4_croatian_ci
}
}
return cs_number;
}
void Column_definition_attributes::frm_pack_basic(uchar *buff) const
{
int2store(buff + 3, length);
@ -989,7 +956,7 @@ bool Column_definition_attributes::frm_unpack_charset(TABLE_SHARE *share,
const uchar *buff)
{
uint cs_org= buff[14] + (((uint) buff[11]) << 8);
uint cs_new= upgrade_collation(share->mysql_version, cs_org);
uint cs_new= Charset::upgrade_collation_id(share->mysql_version, cs_org);
if (cs_org != cs_new)
share->incompatible_version|= HA_CREATE_USED_CHARSET;
if (cs_new && !(charset= get_charset(cs_new, MYF(0))))
@ -1735,7 +1702,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
if (!frm_image[32]) // New frm file in 3.23
{
uint cs_org= (((uint) frm_image[41]) << 8) + (uint) frm_image[38];
uint cs_new= upgrade_collation(share->mysql_version, cs_org);
uint cs_new= Charset::upgrade_collation_id(share->mysql_version, cs_org);
if (cs_org != cs_new)
share->incompatible_version|= HA_CREATE_USED_CHARSET;
@ -2760,6 +2727,9 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write,
goto err;
field= key_part->field= share->field[key_part->fieldnr-1];
if (Charset::collation_changed_order(share->mysql_version,
field->charset()->number))
share->incompatible_version|= HA_CREATE_USED_CHARSET;
key_part->type= field->key_type();
if (field->invisible > INVISIBLE_USER && !field->vers_sys_field())

View file

@ -2987,6 +2987,14 @@ static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
}
static inline int my_weight_mb2_ucs2_general_mysql500_ci(uchar b0, uchar b1)
{
my_wc_t wc= UCS2_CODE(b0, b1);
MY_UNICASE_CHARACTER *page= my_unicase_mysql500_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_ci
#define DEFINE_STRNXFRM_UNICODE
#define DEFINE_STRNXFRM_UNICODE_NOPAD
@ -3000,6 +3008,18 @@ static inline int my_weight_mb2_ucs2_general_ci(uchar b0, uchar b1)
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_general_mysql500_ci
#define DEFINE_STRNXFRM_UNICODE
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
#define OPTIMIZE_ASCII 0
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 my_unicase_mysql500_page00
#define UNICASE_PAGES my_unicase_mysql500_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB2(b0,b1) my_weight_mb2_ucs2_general_mysql500_ci(b0,b1)
#include "strcoll.inl"
#define MY_FUNCTION_NAME(x) my_ ## x ## _ucs2_bin
#define DEFINE_STRNXFRM_UNICODE_BIN2
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_ucs2_quick(pwc, s, e)
@ -3285,6 +3305,23 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
};
static MY_COLLATION_HANDLER my_collation_ucs2_general_mysql500_ci_handler =
{
NULL, /* init */
my_strnncoll_ucs2_general_mysql500_ci,
my_strnncollsp_ucs2_general_mysql500_ci,
my_strnncollsp_nchars_ucs2_general_mysql500_ci,
my_strnxfrm_ucs2_general_mysql500_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
my_wildcmp_ucs2_ci,
my_strcasecmp_mb2_or_mb4,
my_instr_mb,
my_hash_sort_ucs2,
my_propagate_simple
};
static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
{
NULL, /* init */
@ -3431,7 +3468,7 @@ struct charset_info_st my_charset_ucs2_general_mysql500_ci=
0, /* escape_with_backslash_is_dangerous */
1, /* levels_for_order */
&my_charset_ucs2_handler,
&my_collation_ucs2_general_ci_handler
&my_collation_ucs2_general_mysql500_ci_handler
};

View file

@ -21,6 +21,9 @@
extern MY_UNICASE_CHARACTER my_unicase_default_page00[256];
extern MY_UNICASE_CHARACTER *my_unicase_default_pages[256];
extern MY_UNICASE_CHARACTER my_unicase_mysql500_page00[256];
extern MY_UNICASE_CHARACTER *my_unicase_mysql500_pages[256];
size_t my_strxfrm_pad_nweights_unicode(uchar *str, uchar *strend, size_t nweights);
size_t my_strxfrm_pad_unicode(uchar *str, uchar *strend);

View file

@ -248,7 +248,7 @@ MY_UNICASE_CHARACTER my_unicase_default_page00[]={
Almost similar to my_unicase_default_page00, but maps sorting order
for U+00DF to 0x00DF instead of 0x0053.
*/
static MY_UNICASE_CHARACTER plane00_mysql500[]={
MY_UNICASE_CHARACTER my_unicase_mysql500_page00[]={
{0x0000,0x0000,0x0000}, {0x0001,0x0001,0x0001},
{0x0002,0x0002,0x0002}, {0x0003,0x0003,0x0003},
{0x0004,0x0004,0x0004}, {0x0005,0x0005,0x0005},
@ -1739,8 +1739,8 @@ MY_UNICASE_INFO my_unicase_default=
/*
Reproduce old utf8_general_ci behaviour before we fixed Bug#27877.
*/
MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={
plane00_mysql500,
MY_UNICASE_CHARACTER *my_unicase_mysql500_pages[256]={
my_unicase_mysql500_page00,
plane01, plane02, plane03, plane04, plane05, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
@ -1780,7 +1780,7 @@ MY_UNICASE_CHARACTER *my_unicase_pages_mysql500[256]={
MY_UNICASE_INFO my_unicase_mysql500=
{
0xFFFF,
my_unicase_pages_mysql500
my_unicase_mysql500_pages
};
@ -5266,14 +5266,14 @@ static inline int my_weight_mb3_utf8_general_ci(uchar b0, uchar b1, uchar b2)
static inline int my_weight_mb1_utf8_general_mysql500_ci(uchar b)
{
return (int) plane00_mysql500[b & 0xFF].sort;
return (int) my_unicase_mysql500_page00[b & 0xFF].sort;
}
static inline int my_weight_mb2_utf8_general_mysql500_ci(uchar b0, uchar b1)
{
my_wc_t wc= UTF8MB2_CODE(b0, b1);
MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8];
MY_UNICASE_CHARACTER *page= my_unicase_mysql500_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
@ -5282,7 +5282,7 @@ static inline int
my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
{
my_wc_t wc= UTF8MB3_CODE(b0, b1, b2);
MY_UNICASE_CHARACTER *page= my_unicase_pages_mysql500[wc >> 8];
MY_UNICASE_CHARACTER *page= my_unicase_mysql500_pages[wc >> 8];
return (int) (page ? page[wc & 0xFF].sort : wc);
}
@ -5292,8 +5292,8 @@ my_weight_mb3_utf8_general_mysql500_ci(uchar b0, uchar b1, uchar b2)
#define MY_MB_WC(cs, pwc, s, e) my_mb_wc_utf8mb3_quick(pwc, s, e)
#define OPTIMIZE_ASCII 1
#define UNICASE_MAXCHAR MY_UNICASE_INFO_DEFAULT_MAXCHAR
#define UNICASE_PAGE0 plane00_mysql500
#define UNICASE_PAGES my_unicase_pages_mysql500
#define UNICASE_PAGE0 my_unicase_mysql500_page00
#define UNICASE_PAGES my_unicase_mysql500_pages
#define WEIGHT_ILSEQ(x) (0xFF0000 + (uchar) (x))
#define WEIGHT_MB1(x) my_weight_mb1_utf8_general_mysql500_ci(x)
#define WEIGHT_MB2(x,y) my_weight_mb2_utf8_general_mysql500_ci(x,y)