2022-01-26 12:42:17 +02:00
|
|
|
CREATE TABLE t1 (a CHAR(8), id INT, PRIMARY KEY (a,id)) COLLATE utf8_nopad_bin
|
|
|
|
ENGINE=InnoDB ROW_FORMAT=REDUNDANT;
|
|
|
|
INSERT INTO t1 VALUES ('',1);
|
|
|
|
ALTER TABLE t1 ROW_FORMAT=DYNAMIC;
|
|
|
|
INSERT INTO t1 VALUES ('',2);
|
|
|
|
ALTER TABLE t1 ROW_FORMAT=REDUNDANT;
|
|
|
|
DROP TABLE t1;
|
MDEV-26743 InnoDB: CHAR+nopad does not work well
The patch for "MDEV-25440: Indexed CHAR ... broken with NO_PAD collations"
fixed these scenarios from MDEV-26743:
- Basic latin letter vs equal accented letter
- Two letters vs equal (but space padded) expansion
However, this scenario was still broken:
- Basic latin letter (but followed by an ignorable character)
vs equal accented letter
Fix:
When processing for a NOPAD collation a string with trailing ignorable
characters, like:
'<non-ignorable><ignorable><ignorable>'
the string gets virtually converted to:
'<non-ignorable><ignorable><ignorable><space><space><space>...'
After the fix the code works differently in these two cases:
1. <space> fits into the "nchars" limit
2. <space> does not fit into the "nchars" limit
Details:
1. If "nchars" is large enough (4+ in this example),
return weights as follows:
'[weight-for-non-ignorable, 1 char] [weight-for-space-character, 3 chars]'
i.e. the weight for the virtual trailing space character now indicates
that it corresponds to total 3 characters:
- two ignorable characters
- one virtual trailing space character
2. If "nchars" is small (3), then the virtual trailing space character
does not fit into the "nchar" limit, so return 0x00 as weight, e.g.:
'[weight-for-non-ignorable, 1 char] [0x00, 2 chars]'
Adding corresponding MTR tests and unit tests.
2021-10-01 17:12:00 +04:00
|
|
|
#
|
|
|
|
# MDEV-26743 InnoDB: CHAR+nopad does not work well
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# Basic Latin letter vs equal accented letter
|
|
|
|
#
|
|
|
|
SET NAMES utf8mb3;
|
|
|
|
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
|
|
|
INSERT INTO t1 VALUES ('a'),('ä');
|
|
|
|
ERROR 23000: Duplicate entry 'ä' for key 'PRIMARY'
|
|
|
|
DROP TABLE t1;
|
|
|
|
#
|
|
|
|
# Two letters vs equal (but space padded) expansion
|
|
|
|
#
|
|
|
|
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
|
|
|
INSERT INTO t1 VALUES ('ss'),('ß');
|
|
|
|
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
|
|
|
SELECT HEX(a) FROM t1;
|
|
|
|
HEX(a)
|
|
|
|
7373
|
|
|
|
C39F20
|
|
|
|
SET sql_mode=DEFAULT;
|
|
|
|
DROP TABLE t1;
|
|
|
|
#
|
|
|
|
# Basic Latin letter (but followed by an ignorable character) vs equal accented letter
|
|
|
|
#
|
|
|
|
SET NAMES utf8mb3;
|
|
|
|
CREATE TABLE t1 (a CHAR(3), PRIMARY KEY(a)) CHARACTER SET utf8mb3 COLLATE utf8mb3_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
|
|
|
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
|
|
|
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
|
|
|
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
|
|
|
HEX(a)
|
|
|
|
610120
|
|
|
|
C3A42020
|
|
|
|
SET sql_mode=DEFAULT;
|
|
|
|
DROP TABLE t1;
|
|
|
|
SET NAMES utf8mb3;
|
|
|
|
CREATE TABLE t1 (a CHAR(2), PRIMARY KEY(a)) COLLATE utf8_unicode_nopad_ci ENGINE=InnoDB ROW_FORMAT=COMPACT;
|
|
|
|
INSERT INTO t1 VALUES (CONCAT('a',_utf8mb3 0x01)),('ä');
|
|
|
|
SET sql_mode=PAD_CHAR_TO_FULL_LENGTH;
|
|
|
|
SELECT HEX(a) FROM t1 ORDER BY HEX(a);
|
|
|
|
HEX(a)
|
|
|
|
6101
|
|
|
|
C3A420
|
|
|
|
SET sql_mode=DEFAULT;
|
|
|
|
DROP TABLE t1;
|