mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
This commit is contained in:
parent
702fba1511
commit
197afb413f
42 changed files with 1900 additions and 253 deletions
|
@ -444,22 +444,64 @@ struct my_charset_handler_st
|
|||
size_t (*scan)(CHARSET_INFO *, const char *b, const char *e,
|
||||
int sq);
|
||||
|
||||
/* Copying routines */
|
||||
/* String copying routines and helpers for them */
|
||||
/*
|
||||
copy_abort() - copy a string, abort if a bad byte sequence was found.
|
||||
charlen() - calculate length of the left-most character in bytes.
|
||||
@param cs Character set
|
||||
@param str The beginning of the string
|
||||
@param end The end of the string
|
||||
|
||||
@return MY_CS_ILSEQ if a bad byte sequence was found.
|
||||
@return MY_CS_TOOSMALLN(x) if the string ended unexpectedly.
|
||||
@return a positive number in the range 1..mbmaxlen,
|
||||
if a valid character was found.
|
||||
*/
|
||||
int (*charlen)(CHARSET_INFO *cs, const uchar *str, const uchar *end);
|
||||
/*
|
||||
well_formed_char_length() - returns character length of a string.
|
||||
|
||||
@param cs Character set
|
||||
@param str The beginning of the string
|
||||
@param end The end of the string
|
||||
@param nchars Not more than "nchars" left-most characters are checked.
|
||||
@param status[OUT] Additional statistics is returned here.
|
||||
"status" can be uninitialized before the call,
|
||||
and it is fully initialized after the call.
|
||||
|
||||
status->m_source_end_pos is set to the position where reading stopped.
|
||||
|
||||
If a bad byte sequence is found, the function returns immediately and
|
||||
status->m_well_formed_error_pos is set to the position where a bad byte
|
||||
sequence was found.
|
||||
|
||||
status->m_well_formed_error_pos is set to NULL if no bad bytes were found.
|
||||
If status->m_well_formed_error_pos is NULL after the call, that means:
|
||||
- either the function reached the end of the string,
|
||||
- or all "nchars" characters were read.
|
||||
The caller can check status->m_source_end_pos to detect which of these two
|
||||
happened.
|
||||
*/
|
||||
size_t (*well_formed_char_length)(CHARSET_INFO *cs,
|
||||
const char *str, const char *end,
|
||||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status);
|
||||
|
||||
/*
|
||||
copy_fix() - copy a string, replace bad bytes to '?'.
|
||||
Not more than "nchars" characters are copied.
|
||||
|
||||
status->m_source_end_pos is set to a position in the range
|
||||
between "src" and "src + src_length".
|
||||
between "src" and "src + src_length", where reading stopped.
|
||||
|
||||
status->m_well_formed_error_pos is set to NULL if the string
|
||||
in the range "src" and "status->m_source_end_pos" was well formed,
|
||||
or is set to "src + src_length" otherwise.
|
||||
or is set to a position between "src" and "src + src_length" where
|
||||
the leftmost bad byte sequence was found.
|
||||
*/
|
||||
size_t (*copy_abort)(CHARSET_INFO *,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status);
|
||||
size_t (*copy_fix)(CHARSET_INFO *,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status);
|
||||
};
|
||||
|
||||
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
|
||||
|
@ -596,10 +638,10 @@ size_t my_copy_8bit(CHARSET_INFO *,
|
|||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *);
|
||||
size_t my_copy_abort_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *);
|
||||
size_t my_copy_fix_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *);
|
||||
|
||||
/* Functions for 8bit */
|
||||
extern size_t my_caseup_str_8bit(CHARSET_INFO *, char *);
|
||||
|
@ -691,6 +733,11 @@ size_t my_numcells_8bit(CHARSET_INFO *, const char *b, const char *e);
|
|||
size_t my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, size_t pos);
|
||||
size_t my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e,
|
||||
size_t pos, int *error);
|
||||
size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
|
||||
const char *b, const char *e,
|
||||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status);
|
||||
int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
|
||||
uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);
|
||||
|
||||
|
||||
|
|
|
@ -597,7 +597,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
|
|||
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
|
||||
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
|
||||
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
COUNT(*)
|
||||
13973
|
||||
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
|
||||
|
|
|
@ -165,7 +165,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
|||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
14623
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
|
||||
COUNT(*)
|
||||
63
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
|
|
|
@ -10101,6 +10101,9 @@ COUNT(*)
|
|||
56959
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
COUNT(*)
|
||||
56959
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
|
||||
COUNT(*)
|
||||
17735
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
COUNT(*)
|
||||
|
@ -33632,7 +33635,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms);
|
|||
INSERT INTO t1 VALUES (0x8EA0);
|
||||
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
|
||||
HEX(a) CHAR_LENGTH(a)
|
||||
0
|
||||
3F3F 2
|
||||
DROP TABLE t1;
|
||||
SELECT _eucjpms 0x8EA0;
|
||||
ERROR HY000: Invalid eucjpms character string: '8EA0'
|
||||
|
|
|
@ -407,12 +407,12 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xA1\xFF' for column 's1' at row 1
|
||||
select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
|
||||
hex(s1) hex(convert(s1 using utf8))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
3F3F 3F3F
|
||||
3F3F 3F3F
|
||||
3F40 3F40
|
||||
3F5B 3F5B
|
||||
3F60 3F60
|
||||
3F7B 3F7B
|
||||
A141 ECA2A5
|
||||
A15A ECA381
|
||||
A161 ECA382
|
||||
|
@ -445,7 +445,7 @@ FROM t1 t11, t1 t12
|
|||
WHERE t11.a >= 0x81 AND t11.a <= 0xFE
|
||||
AND t12.a >= 0x41 AND t12.a <= 0xFE
|
||||
ORDER BY t11.a, t12.a;
|
||||
SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
|
||||
SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
|
||||
bad_code
|
||||
815B
|
||||
815C
|
||||
|
@ -1959,7 +1959,7 @@ FE7D
|
|||
FE7E
|
||||
FE7F
|
||||
FE80
|
||||
DELETE FROM t2 WHERE a='';
|
||||
DELETE FROM t2 WHERE a='?';
|
||||
ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
|
||||
UPDATE t2 SET u=a, a2=u;
|
||||
SELECT s as unassigned_code FROM t2 WHERE u='?';
|
||||
|
@ -24492,7 +24492,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
|
|||
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
|
||||
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
|
||||
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
COUNT(*)
|
||||
22428
|
||||
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
|
||||
|
|
|
@ -553,7 +553,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
|
|||
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
|
||||
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
|
||||
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
COUNT(*)
|
||||
8178
|
||||
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
|
||||
|
|
|
@ -573,7 +573,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
|
|||
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
|
||||
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
|
||||
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
COUNT(*)
|
||||
23940
|
||||
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
|
||||
|
@ -4946,3 +4946,814 @@ DROP TABLE t1;
|
|||
#
|
||||
# End of 10.0 tests
|
||||
#
|
||||
#
|
||||
# Start of 10.1 tests
|
||||
#
|
||||
#
|
||||
# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
#
|
||||
CREATE TABLE t1 (
|
||||
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||
b VARBINARY(16),
|
||||
type SET('ascii','bad','head','tail','mb2','unassigned')
|
||||
);
|
||||
INSERT INTO t1 (b, type) VALUES (0x40, 'ascii,tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0x80, 'tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0x81, 'head,tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0xFF, 'bad');
|
||||
INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
|
||||
INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
|
||||
INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
|
||||
CREATE TABLE t2 AS SELECT
|
||||
CONCAT(t1.b,t2.b) AS b,
|
||||
t1.type AS type1,
|
||||
t2.type AS type2,
|
||||
CONCAT('[',t1.type,'][',t2.type,']') AS comment
|
||||
FROM t1, t1 t2;
|
||||
CREATE TABLE t3
|
||||
(
|
||||
b VARBINARY(16),
|
||||
c VARCHAR(16) CHARACTER SET gbk,
|
||||
comment VARCHAR(128)
|
||||
);
|
||||
#
|
||||
# A combination of two valid characters, should give no warnings
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE
|
||||
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
|
||||
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
16
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
4040 [ascii,tail][ascii,tail]
|
||||
40A140 [ascii,tail][mb2,unassigned]
|
||||
40A1A3 [ascii,tail][mb2]
|
||||
40FE40 [ascii,tail][mb2]
|
||||
A14040 [mb2,unassigned][ascii,tail]
|
||||
A140A140 [mb2,unassigned][mb2,unassigned]
|
||||
A140A1A3 [mb2,unassigned][mb2]
|
||||
A140FE40 [mb2,unassigned][mb2]
|
||||
A1A340 [mb2][ascii,tail]
|
||||
A1A3A140 [mb2][mb2,unassigned]
|
||||
A1A3A1A3 [mb2][mb2]
|
||||
A1A3FE40 [mb2][mb2]
|
||||
FE4040 [mb2][ascii,tail]
|
||||
FE40A140 [mb2][mb2,unassigned]
|
||||
FE40A1A3 [mb2][mb2]
|
||||
FE40FE40 [mb2][mb2]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that start with a tail or a bad byte,
|
||||
# or end with a bad byte, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE type1='tail' OR type1='bad' OR type2='bad'
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x80\xFF' for column 'c' at row 8
|
||||
Warning 1366 Incorrect string value: '\x81\xFF' for column 'c' at row 9
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 10
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 11
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 12
|
||||
Warning 1366 Incorrect string value: '\xFF@' for column 'c' at row 13
|
||||
Warning 1366 Incorrect string value: '\xFF\x80' for column 'c' at row 14
|
||||
Warning 1366 Incorrect string value: '\xFF\x81' for column 'c' at row 15
|
||||
Warning 1366 Incorrect string value: '\xFF\xA1@' for column 'c' at row 16
|
||||
Warning 1366 Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 17
|
||||
Warning 1366 Incorrect string value: '\xFF\xFE@' for column 'c' at row 18
|
||||
Warning 1366 Incorrect string value: '\xFF\xFF' for column 'c' at row 19
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
19
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
403F 40FF [ascii,tail][bad]
|
||||
3F40 8040 [tail][ascii,tail]
|
||||
3F3F 8080 [tail][tail]
|
||||
3F3F 8081 [tail][head,tail]
|
||||
3FA140 80A140 [tail][mb2,unassigned]
|
||||
3FA1A3 80A1A3 [tail][mb2]
|
||||
3FFE40 80FE40 [tail][mb2]
|
||||
3F3F 80FF [tail][bad]
|
||||
3F3F 81FF [head,tail][bad]
|
||||
A1403F A140FF [mb2,unassigned][bad]
|
||||
A1A33F A1A3FF [mb2][bad]
|
||||
FE403F FE40FF [mb2][bad]
|
||||
3F40 FF40 [bad][ascii,tail]
|
||||
3F3F FF80 [bad][tail]
|
||||
3F3F FF81 [bad][head,tail]
|
||||
3FA140 FFA140 [bad][mb2,unassigned]
|
||||
3FA1A3 FFA1A3 [bad][mb2]
|
||||
3FFE40 FFFE40 [bad][mb2]
|
||||
3F3F FFFF [bad][bad]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that start with an ASCII or an MB2 character,
|
||||
# followed by a non-ASCII tail, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
|
||||
AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 8
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
8
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
403F 4080 [ascii,tail][tail]
|
||||
403F 4081 [ascii,tail][head,tail]
|
||||
A1403F A14080 [mb2,unassigned][tail]
|
||||
A1403F A14081 [mb2,unassigned][head,tail]
|
||||
A1A33F A1A380 [mb2][tail]
|
||||
A1A33F A1A381 [mb2][head,tail]
|
||||
FE403F FE4080 [mb2][tail]
|
||||
FE403F FE4081 [mb2][head,tail]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Other sequences
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 5
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
6
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
8140 [head,tail][ascii,tail]
|
||||
8180 [head,tail][tail]
|
||||
8181 [head,tail][head,tail]
|
||||
81A140 [head,tail][mb2,unassigned]
|
||||
81FE40 [head,tail][mb2]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
81A13F 81A1A3 [head,tail][mb2]
|
||||
DELETE FROM t3;
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t2;
|
||||
CREATE TABLE t2 AS SELECT
|
||||
CONCAT(t1.b,t2.b,t3.b) AS b,
|
||||
t1.type AS type1,
|
||||
t2.type AS type2,
|
||||
t3.type AS type3,
|
||||
CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
|
||||
FROM t1, t1 t2,t1 t3;
|
||||
SELECT COUNT(*) FROM t2;
|
||||
COUNT(*)
|
||||
343
|
||||
CREATE TABLE t3
|
||||
(
|
||||
b VARBINARY(16),
|
||||
c VARCHAR(16) CHARACTER SET gbk,
|
||||
comment VARCHAR(128)
|
||||
);
|
||||
#
|
||||
# A combination of three valid characters, should give no warnings
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE
|
||||
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
|
||||
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
|
||||
(FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3))
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
64
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
404040 [ascii,tail][ascii,tail][ascii,tail]
|
||||
4040A140 [ascii,tail][ascii,tail][mb2,unassigned]
|
||||
4040A1A3 [ascii,tail][ascii,tail][mb2]
|
||||
4040FE40 [ascii,tail][ascii,tail][mb2]
|
||||
40A14040 [ascii,tail][mb2,unassigned][ascii,tail]
|
||||
40A140A140 [ascii,tail][mb2,unassigned][mb2,unassigned]
|
||||
40A140A1A3 [ascii,tail][mb2,unassigned][mb2]
|
||||
40A140FE40 [ascii,tail][mb2,unassigned][mb2]
|
||||
40A1A340 [ascii,tail][mb2][ascii,tail]
|
||||
40A1A3A140 [ascii,tail][mb2][mb2,unassigned]
|
||||
40A1A3A1A3 [ascii,tail][mb2][mb2]
|
||||
40A1A3FE40 [ascii,tail][mb2][mb2]
|
||||
40FE4040 [ascii,tail][mb2][ascii,tail]
|
||||
40FE40A140 [ascii,tail][mb2][mb2,unassigned]
|
||||
40FE40A1A3 [ascii,tail][mb2][mb2]
|
||||
40FE40FE40 [ascii,tail][mb2][mb2]
|
||||
A1404040 [mb2,unassigned][ascii,tail][ascii,tail]
|
||||
A14040A140 [mb2,unassigned][ascii,tail][mb2,unassigned]
|
||||
A14040A1A3 [mb2,unassigned][ascii,tail][mb2]
|
||||
A14040FE40 [mb2,unassigned][ascii,tail][mb2]
|
||||
A140A14040 [mb2,unassigned][mb2,unassigned][ascii,tail]
|
||||
A140A140A140 [mb2,unassigned][mb2,unassigned][mb2,unassigned]
|
||||
A140A140A1A3 [mb2,unassigned][mb2,unassigned][mb2]
|
||||
A140A140FE40 [mb2,unassigned][mb2,unassigned][mb2]
|
||||
A140A1A340 [mb2,unassigned][mb2][ascii,tail]
|
||||
A140A1A3A140 [mb2,unassigned][mb2][mb2,unassigned]
|
||||
A140A1A3A1A3 [mb2,unassigned][mb2][mb2]
|
||||
A140A1A3FE40 [mb2,unassigned][mb2][mb2]
|
||||
A140FE4040 [mb2,unassigned][mb2][ascii,tail]
|
||||
A140FE40A140 [mb2,unassigned][mb2][mb2,unassigned]
|
||||
A140FE40A1A3 [mb2,unassigned][mb2][mb2]
|
||||
A140FE40FE40 [mb2,unassigned][mb2][mb2]
|
||||
A1A34040 [mb2][ascii,tail][ascii,tail]
|
||||
A1A340A140 [mb2][ascii,tail][mb2,unassigned]
|
||||
A1A340A1A3 [mb2][ascii,tail][mb2]
|
||||
A1A340FE40 [mb2][ascii,tail][mb2]
|
||||
A1A3A14040 [mb2][mb2,unassigned][ascii,tail]
|
||||
A1A3A140A140 [mb2][mb2,unassigned][mb2,unassigned]
|
||||
A1A3A140A1A3 [mb2][mb2,unassigned][mb2]
|
||||
A1A3A140FE40 [mb2][mb2,unassigned][mb2]
|
||||
A1A3A1A340 [mb2][mb2][ascii,tail]
|
||||
A1A3A1A3A140 [mb2][mb2][mb2,unassigned]
|
||||
A1A3A1A3A1A3 [mb2][mb2][mb2]
|
||||
A1A3A1A3FE40 [mb2][mb2][mb2]
|
||||
A1A3FE4040 [mb2][mb2][ascii,tail]
|
||||
A1A3FE40A140 [mb2][mb2][mb2,unassigned]
|
||||
A1A3FE40A1A3 [mb2][mb2][mb2]
|
||||
A1A3FE40FE40 [mb2][mb2][mb2]
|
||||
FE404040 [mb2][ascii,tail][ascii,tail]
|
||||
FE4040A140 [mb2][ascii,tail][mb2,unassigned]
|
||||
FE4040A1A3 [mb2][ascii,tail][mb2]
|
||||
FE4040FE40 [mb2][ascii,tail][mb2]
|
||||
FE40A14040 [mb2][mb2,unassigned][ascii,tail]
|
||||
FE40A140A140 [mb2][mb2,unassigned][mb2,unassigned]
|
||||
FE40A140A1A3 [mb2][mb2,unassigned][mb2]
|
||||
FE40A140FE40 [mb2][mb2,unassigned][mb2]
|
||||
FE40A1A340 [mb2][mb2][ascii,tail]
|
||||
FE40A1A3A140 [mb2][mb2][mb2,unassigned]
|
||||
FE40A1A3A1A3 [mb2][mb2][mb2]
|
||||
FE40A1A3FE40 [mb2][mb2][mb2]
|
||||
FE40FE4040 [mb2][mb2][ascii,tail]
|
||||
FE40FE40A140 [mb2][mb2][mb2,unassigned]
|
||||
FE40FE40A1A3 [mb2][mb2][mb2]
|
||||
FE40FE40FE40 [mb2][mb2][mb2]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that start with a tail or a bad byte,
|
||||
# or have a bad byte, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x80\xFF' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x81\xFF' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\xFF@' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\xFF\x80' for column 'c' at row 8
|
||||
Warning 1366 Incorrect string value: '\xFF\x81' for column 'c' at row 9
|
||||
Warning 1366 Incorrect string value: '\xFF\xA1@' for column 'c' at row 10
|
||||
Warning 1366 Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 11
|
||||
Warning 1366 Incorrect string value: '\xFF\xFE@' for column 'c' at row 12
|
||||
Warning 1366 Incorrect string value: '\xFF\xFF' for column 'c' at row 13
|
||||
Warning 1366 Incorrect string value: '\x80@@' for column 'c' at row 14
|
||||
Warning 1366 Incorrect string value: '\x80@\x80' for column 'c' at row 15
|
||||
Warning 1366 Incorrect string value: '\x80@\x81' for column 'c' at row 16
|
||||
Warning 1366 Incorrect string value: '\x80@\xA1@' for column 'c' at row 17
|
||||
Warning 1366 Incorrect string value: '\x80@\xA1\xA3' for column 'c' at row 18
|
||||
Warning 1366 Incorrect string value: '\x80@\xFE@' for column 'c' at row 19
|
||||
Warning 1366 Incorrect string value: '\x80@\xFF' for column 'c' at row 20
|
||||
Warning 1366 Incorrect string value: '\x80\x80@' for column 'c' at row 21
|
||||
Warning 1366 Incorrect string value: '\x80\x80\x80' for column 'c' at row 22
|
||||
Warning 1366 Incorrect string value: '\x80\x80\x81' for column 'c' at row 23
|
||||
Warning 1366 Incorrect string value: '\x80\x80\xA1@' for column 'c' at row 24
|
||||
Warning 1366 Incorrect string value: '\x80\x80\xA1\xA3' for column 'c' at row 25
|
||||
Warning 1366 Incorrect string value: '\x80\x80\xFE@' for column 'c' at row 26
|
||||
Warning 1366 Incorrect string value: '\x80\x80\xFF' for column 'c' at row 27
|
||||
Warning 1366 Incorrect string value: '\x80\x81@' for column 'c' at row 28
|
||||
Warning 1366 Incorrect string value: '\x80\x81\x80' for column 'c' at row 29
|
||||
Warning 1366 Incorrect string value: '\x80\x81\x81' for column 'c' at row 30
|
||||
Warning 1366 Incorrect string value: '\x80\x81\xA1@' for column 'c' at row 31
|
||||
Warning 1366 Incorrect string value: '\x80\x81\xA1\xA3' for column 'c' at row 32
|
||||
Warning 1366 Incorrect string value: '\x80\x81\xFE@' for column 'c' at row 33
|
||||
Warning 1366 Incorrect string value: '\x80\x81\xFF' for column 'c' at row 34
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@@' for column 'c' at row 35
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\x80' for column 'c' at row 36
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\x81' for column 'c' at row 37
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\xA1@' for column 'c' at row 38
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\xA1\xA3' for column 'c' at row 39
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\xFE@' for column 'c' at row 40
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@\xFF' for column 'c' at row 41
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3@' for column 'c' at row 42
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\x80' for column 'c' at row 43
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\x81' for column 'c' at row 44
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xA1@' for column 'c' at row 45
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xA1\xA3' for column 'c' at row 46
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xFE@' for column 'c' at row 47
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xFF' for column 'c' at row 48
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@@' for column 'c' at row 49
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\x80' for column 'c' at row 50
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\x81' for column 'c' at row 51
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\xA1@' for column 'c' at row 52
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\xA1\xA3' for column 'c' at row 53
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\xFE@' for column 'c' at row 54
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@\xFF' for column 'c' at row 55
|
||||
Warning 1366 Incorrect string value: '\x80\xFF@' for column 'c' at row 56
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\x80' for column 'c' at row 57
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\x81' for column 'c' at row 58
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\xA1@' for column 'c' at row 59
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\xA1\xA3' for column 'c' at row 60
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\xFE@' for column 'c' at row 61
|
||||
Warning 1366 Incorrect string value: '\x80\xFF\xFF' for column 'c' at row 62
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 63
|
||||
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 64
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
163
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
40403F 4040FF [ascii,tail][ascii,tail][bad]
|
||||
403F3F 4080FF [ascii,tail][tail][bad]
|
||||
403F3F 4081FF [ascii,tail][head,tail][bad]
|
||||
40A1403F 40A140FF [ascii,tail][mb2,unassigned][bad]
|
||||
40A1A33F 40A1A3FF [ascii,tail][mb2][bad]
|
||||
40FE403F 40FE40FF [ascii,tail][mb2][bad]
|
||||
403F40 40FF40 [ascii,tail][bad][ascii,tail]
|
||||
403F3F 40FF80 [ascii,tail][bad][tail]
|
||||
403F3F 40FF81 [ascii,tail][bad][head,tail]
|
||||
403FA140 40FFA140 [ascii,tail][bad][mb2,unassigned]
|
||||
403FA1A3 40FFA1A3 [ascii,tail][bad][mb2]
|
||||
403FFE40 40FFFE40 [ascii,tail][bad][mb2]
|
||||
403F3F 40FFFF [ascii,tail][bad][bad]
|
||||
3F4040 804040 [tail][ascii,tail][ascii,tail]
|
||||
3F403F 804080 [tail][ascii,tail][tail]
|
||||
3F403F 804081 [tail][ascii,tail][head,tail]
|
||||
3F40A140 8040A140 [tail][ascii,tail][mb2,unassigned]
|
||||
3F40A1A3 8040A1A3 [tail][ascii,tail][mb2]
|
||||
3F40FE40 8040FE40 [tail][ascii,tail][mb2]
|
||||
3F403F 8040FF [tail][ascii,tail][bad]
|
||||
3F3F40 808040 [tail][tail][ascii,tail]
|
||||
3F3F3F 808080 [tail][tail][tail]
|
||||
3F3F3F 808081 [tail][tail][head,tail]
|
||||
3F3FA140 8080A140 [tail][tail][mb2,unassigned]
|
||||
3F3FA1A3 8080A1A3 [tail][tail][mb2]
|
||||
3F3FFE40 8080FE40 [tail][tail][mb2]
|
||||
3F3F3F 8080FF [tail][tail][bad]
|
||||
3F8140 808140 [tail][head,tail][ascii,tail]
|
||||
3F8180 808180 [tail][head,tail][tail]
|
||||
3F8181 808181 [tail][head,tail][head,tail]
|
||||
3F81A140 8081A140 [tail][head,tail][mb2,unassigned]
|
||||
3F81A13F 8081A1A3 [tail][head,tail][mb2]
|
||||
3F81FE40 8081FE40 [tail][head,tail][mb2]
|
||||
3F3F3F 8081FF [tail][head,tail][bad]
|
||||
3FA14040 80A14040 [tail][mb2,unassigned][ascii,tail]
|
||||
3FA1403F 80A14080 [tail][mb2,unassigned][tail]
|
||||
3FA1403F 80A14081 [tail][mb2,unassigned][head,tail]
|
||||
3FA140A140 80A140A140 [tail][mb2,unassigned][mb2,unassigned]
|
||||
3FA140A1A3 80A140A1A3 [tail][mb2,unassigned][mb2]
|
||||
3FA140FE40 80A140FE40 [tail][mb2,unassigned][mb2]
|
||||
3FA1403F 80A140FF [tail][mb2,unassigned][bad]
|
||||
3FA1A340 80A1A340 [tail][mb2][ascii,tail]
|
||||
3FA1A33F 80A1A380 [tail][mb2][tail]
|
||||
3FA1A33F 80A1A381 [tail][mb2][head,tail]
|
||||
3FA1A3A140 80A1A3A140 [tail][mb2][mb2,unassigned]
|
||||
3FA1A3A1A3 80A1A3A1A3 [tail][mb2][mb2]
|
||||
3FA1A3FE40 80A1A3FE40 [tail][mb2][mb2]
|
||||
3FA1A33F 80A1A3FF [tail][mb2][bad]
|
||||
3FFE4040 80FE4040 [tail][mb2][ascii,tail]
|
||||
3FFE403F 80FE4080 [tail][mb2][tail]
|
||||
3FFE403F 80FE4081 [tail][mb2][head,tail]
|
||||
3FFE40A140 80FE40A140 [tail][mb2][mb2,unassigned]
|
||||
3FFE40A1A3 80FE40A1A3 [tail][mb2][mb2]
|
||||
3FFE40FE40 80FE40FE40 [tail][mb2][mb2]
|
||||
3FFE403F 80FE40FF [tail][mb2][bad]
|
||||
3F3F40 80FF40 [tail][bad][ascii,tail]
|
||||
3F3F3F 80FF80 [tail][bad][tail]
|
||||
3F3F3F 80FF81 [tail][bad][head,tail]
|
||||
3F3FA140 80FFA140 [tail][bad][mb2,unassigned]
|
||||
3F3FA1A3 80FFA1A3 [tail][bad][mb2]
|
||||
3F3FFE40 80FFFE40 [tail][bad][mb2]
|
||||
3F3F3F 80FFFF [tail][bad][bad]
|
||||
81403F 8140FF [head,tail][ascii,tail][bad]
|
||||
81803F 8180FF [head,tail][tail][bad]
|
||||
81813F 8181FF [head,tail][head,tail][bad]
|
||||
81A1403F 81A140FF [head,tail][mb2,unassigned][bad]
|
||||
81A13F3F 81A1A3FF [head,tail][mb2][bad]
|
||||
81FE403F 81FE40FF [head,tail][mb2][bad]
|
||||
3F3F40 81FF40 [head,tail][bad][ascii,tail]
|
||||
3F3F3F 81FF80 [head,tail][bad][tail]
|
||||
3F3F3F 81FF81 [head,tail][bad][head,tail]
|
||||
3F3FA140 81FFA140 [head,tail][bad][mb2,unassigned]
|
||||
3F3FA1A3 81FFA1A3 [head,tail][bad][mb2]
|
||||
3F3FFE40 81FFFE40 [head,tail][bad][mb2]
|
||||
3F3F3F 81FFFF [head,tail][bad][bad]
|
||||
A140403F A14040FF [mb2,unassigned][ascii,tail][bad]
|
||||
A1403F3F A14080FF [mb2,unassigned][tail][bad]
|
||||
A1403F3F A14081FF [mb2,unassigned][head,tail][bad]
|
||||
A140A1403F A140A140FF [mb2,unassigned][mb2,unassigned][bad]
|
||||
A140A1A33F A140A1A3FF [mb2,unassigned][mb2][bad]
|
||||
A140FE403F A140FE40FF [mb2,unassigned][mb2][bad]
|
||||
A1403F40 A140FF40 [mb2,unassigned][bad][ascii,tail]
|
||||
A1403F3F A140FF80 [mb2,unassigned][bad][tail]
|
||||
A1403F3F A140FF81 [mb2,unassigned][bad][head,tail]
|
||||
A1403FA140 A140FFA140 [mb2,unassigned][bad][mb2,unassigned]
|
||||
A1403FA1A3 A140FFA1A3 [mb2,unassigned][bad][mb2]
|
||||
A1403FFE40 A140FFFE40 [mb2,unassigned][bad][mb2]
|
||||
A1403F3F A140FFFF [mb2,unassigned][bad][bad]
|
||||
A1A3403F A1A340FF [mb2][ascii,tail][bad]
|
||||
A1A33F3F A1A380FF [mb2][tail][bad]
|
||||
A1A33F3F A1A381FF [mb2][head,tail][bad]
|
||||
A1A3A1403F A1A3A140FF [mb2][mb2,unassigned][bad]
|
||||
A1A3A1A33F A1A3A1A3FF [mb2][mb2][bad]
|
||||
A1A3FE403F A1A3FE40FF [mb2][mb2][bad]
|
||||
A1A33F40 A1A3FF40 [mb2][bad][ascii,tail]
|
||||
A1A33F3F A1A3FF80 [mb2][bad][tail]
|
||||
A1A33F3F A1A3FF81 [mb2][bad][head,tail]
|
||||
A1A33FA140 A1A3FFA140 [mb2][bad][mb2,unassigned]
|
||||
A1A33FA1A3 A1A3FFA1A3 [mb2][bad][mb2]
|
||||
A1A33FFE40 A1A3FFFE40 [mb2][bad][mb2]
|
||||
A1A33F3F A1A3FFFF [mb2][bad][bad]
|
||||
FE40403F FE4040FF [mb2][ascii,tail][bad]
|
||||
FE403F3F FE4080FF [mb2][tail][bad]
|
||||
FE403F3F FE4081FF [mb2][head,tail][bad]
|
||||
FE40A1403F FE40A140FF [mb2][mb2,unassigned][bad]
|
||||
FE40A1A33F FE40A1A3FF [mb2][mb2][bad]
|
||||
FE40FE403F FE40FE40FF [mb2][mb2][bad]
|
||||
FE403F40 FE40FF40 [mb2][bad][ascii,tail]
|
||||
FE403F3F FE40FF80 [mb2][bad][tail]
|
||||
FE403F3F FE40FF81 [mb2][bad][head,tail]
|
||||
FE403FA140 FE40FFA140 [mb2][bad][mb2,unassigned]
|
||||
FE403FA1A3 FE40FFA1A3 [mb2][bad][mb2]
|
||||
FE403FFE40 FE40FFFE40 [mb2][bad][mb2]
|
||||
FE403F3F FE40FFFF [mb2][bad][bad]
|
||||
3F4040 FF4040 [bad][ascii,tail][ascii,tail]
|
||||
3F403F FF4080 [bad][ascii,tail][tail]
|
||||
3F403F FF4081 [bad][ascii,tail][head,tail]
|
||||
3F40A140 FF40A140 [bad][ascii,tail][mb2,unassigned]
|
||||
3F40A1A3 FF40A1A3 [bad][ascii,tail][mb2]
|
||||
3F40FE40 FF40FE40 [bad][ascii,tail][mb2]
|
||||
3F403F FF40FF [bad][ascii,tail][bad]
|
||||
3F3F40 FF8040 [bad][tail][ascii,tail]
|
||||
3F3F3F FF8080 [bad][tail][tail]
|
||||
3F3F3F FF8081 [bad][tail][head,tail]
|
||||
3F3FA140 FF80A140 [bad][tail][mb2,unassigned]
|
||||
3F3FA1A3 FF80A1A3 [bad][tail][mb2]
|
||||
3F3FFE40 FF80FE40 [bad][tail][mb2]
|
||||
3F3F3F FF80FF [bad][tail][bad]
|
||||
3F8140 FF8140 [bad][head,tail][ascii,tail]
|
||||
3F8180 FF8180 [bad][head,tail][tail]
|
||||
3F8181 FF8181 [bad][head,tail][head,tail]
|
||||
3F81A140 FF81A140 [bad][head,tail][mb2,unassigned]
|
||||
3F81A13F FF81A1A3 [bad][head,tail][mb2]
|
||||
3F81FE40 FF81FE40 [bad][head,tail][mb2]
|
||||
3F3F3F FF81FF [bad][head,tail][bad]
|
||||
3FA14040 FFA14040 [bad][mb2,unassigned][ascii,tail]
|
||||
3FA1403F FFA14080 [bad][mb2,unassigned][tail]
|
||||
3FA1403F FFA14081 [bad][mb2,unassigned][head,tail]
|
||||
3FA140A140 FFA140A140 [bad][mb2,unassigned][mb2,unassigned]
|
||||
3FA140A1A3 FFA140A1A3 [bad][mb2,unassigned][mb2]
|
||||
3FA140FE40 FFA140FE40 [bad][mb2,unassigned][mb2]
|
||||
3FA1403F FFA140FF [bad][mb2,unassigned][bad]
|
||||
3FA1A340 FFA1A340 [bad][mb2][ascii,tail]
|
||||
3FA1A33F FFA1A380 [bad][mb2][tail]
|
||||
3FA1A33F FFA1A381 [bad][mb2][head,tail]
|
||||
3FA1A3A140 FFA1A3A140 [bad][mb2][mb2,unassigned]
|
||||
3FA1A3A1A3 FFA1A3A1A3 [bad][mb2][mb2]
|
||||
3FA1A3FE40 FFA1A3FE40 [bad][mb2][mb2]
|
||||
3FA1A33F FFA1A3FF [bad][mb2][bad]
|
||||
3FFE4040 FFFE4040 [bad][mb2][ascii,tail]
|
||||
3FFE403F FFFE4080 [bad][mb2][tail]
|
||||
3FFE403F FFFE4081 [bad][mb2][head,tail]
|
||||
3FFE40A140 FFFE40A140 [bad][mb2][mb2,unassigned]
|
||||
3FFE40A1A3 FFFE40A1A3 [bad][mb2][mb2]
|
||||
3FFE40FE40 FFFE40FE40 [bad][mb2][mb2]
|
||||
3FFE403F FFFE40FF [bad][mb2][bad]
|
||||
3F3F40 FFFF40 [bad][bad][ascii,tail]
|
||||
3F3F3F FFFF80 [bad][bad][tail]
|
||||
3F3F3F FFFF81 [bad][bad][head,tail]
|
||||
3F3FA140 FFFFA140 [bad][bad][mb2,unassigned]
|
||||
3F3FA1A3 FFFFA1A3 [bad][bad][mb2]
|
||||
3F3FFE40 FFFFFE40 [bad][bad][mb2]
|
||||
3F3F3F FFFFFF [bad][bad][bad]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that start with an ASCII or an MB2 character,
|
||||
# followed by a pure non-ASCII tail, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
|
||||
AND type2='tail'
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 8
|
||||
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 9
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 10
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 11
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 12
|
||||
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 13
|
||||
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 14
|
||||
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 15
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 16
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 17
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 18
|
||||
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 19
|
||||
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 20
|
||||
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 21
|
||||
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 22
|
||||
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 23
|
||||
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 24
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
24
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
403F40 408040 [ascii,tail][tail][ascii,tail]
|
||||
403F3F 408080 [ascii,tail][tail][tail]
|
||||
403F3F 408081 [ascii,tail][tail][head,tail]
|
||||
403FA140 4080A140 [ascii,tail][tail][mb2,unassigned]
|
||||
403FA1A3 4080A1A3 [ascii,tail][tail][mb2]
|
||||
403FFE40 4080FE40 [ascii,tail][tail][mb2]
|
||||
A1403F40 A1408040 [mb2,unassigned][tail][ascii,tail]
|
||||
A1403F3F A1408080 [mb2,unassigned][tail][tail]
|
||||
A1403F3F A1408081 [mb2,unassigned][tail][head,tail]
|
||||
A1403FA140 A14080A140 [mb2,unassigned][tail][mb2,unassigned]
|
||||
A1403FA1A3 A14080A1A3 [mb2,unassigned][tail][mb2]
|
||||
A1403FFE40 A14080FE40 [mb2,unassigned][tail][mb2]
|
||||
A1A33F40 A1A38040 [mb2][tail][ascii,tail]
|
||||
A1A33F3F A1A38080 [mb2][tail][tail]
|
||||
A1A33F3F A1A38081 [mb2][tail][head,tail]
|
||||
A1A33FA140 A1A380A140 [mb2][tail][mb2,unassigned]
|
||||
A1A33FA1A3 A1A380A1A3 [mb2][tail][mb2]
|
||||
A1A33FFE40 A1A380FE40 [mb2][tail][mb2]
|
||||
FE403F40 FE408040 [mb2][tail][ascii,tail]
|
||||
FE403F3F FE408080 [mb2][tail][tail]
|
||||
FE403F3F FE408081 [mb2][tail][head,tail]
|
||||
FE403FA140 FE4080A140 [mb2][tail][mb2,unassigned]
|
||||
FE403FA1A3 FE4080A1A3 [mb2][tail][mb2]
|
||||
FE403FFE40 FE4080FE40 [mb2][tail][mb2]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that consist of two ASCII or MB2 characters,
|
||||
# followed by a pure non-ASCII tail, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
|
||||
(FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
|
||||
type3='tail'
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 8
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 9
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 10
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 11
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 12
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 13
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 14
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 15
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 16
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
16
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
40403F 404080 [ascii,tail][ascii,tail][tail]
|
||||
40A1403F 40A14080 [ascii,tail][mb2,unassigned][tail]
|
||||
40A1A33F 40A1A380 [ascii,tail][mb2][tail]
|
||||
40FE403F 40FE4080 [ascii,tail][mb2][tail]
|
||||
A140403F A1404080 [mb2,unassigned][ascii,tail][tail]
|
||||
A140A1403F A140A14080 [mb2,unassigned][mb2,unassigned][tail]
|
||||
A140A1A33F A140A1A380 [mb2,unassigned][mb2][tail]
|
||||
A140FE403F A140FE4080 [mb2,unassigned][mb2][tail]
|
||||
A1A3403F A1A34080 [mb2][ascii,tail][tail]
|
||||
A1A3A1403F A1A3A14080 [mb2][mb2,unassigned][tail]
|
||||
A1A3A1A33F A1A3A1A380 [mb2][mb2][tail]
|
||||
A1A3FE403F A1A3FE4080 [mb2][mb2][tail]
|
||||
FE40403F FE404080 [mb2][ascii,tail][tail]
|
||||
FE40A1403F FE40A14080 [mb2][mb2,unassigned][tail]
|
||||
FE40A1A33F FE40A1A380 [mb2][mb2][tail]
|
||||
FE40FE403F FE40FE4080 [mb2][mb2][tail]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that consist of two MB2 characters,
|
||||
# followed by a non-ASCII head or tail, all should be fixed.
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
|
||||
AND NOT FIND_IN_SET('ascii',type3)
|
||||
AND NOT FIND_IN_SET('mb2',type3)
|
||||
ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 2
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 4
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 8
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 9
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
9
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
A140A1403F A140A14081 [mb2,unassigned][mb2,unassigned][head,tail]
|
||||
A140A1A33F A140A1A381 [mb2,unassigned][mb2][head,tail]
|
||||
A140FE403F A140FE4081 [mb2,unassigned][mb2][head,tail]
|
||||
A1A3A1403F A1A3A14081 [mb2][mb2,unassigned][head,tail]
|
||||
A1A3A1A33F A1A3A1A381 [mb2][mb2][head,tail]
|
||||
A1A3FE403F A1A3FE4081 [mb2][mb2][head,tail]
|
||||
FE40A1403F FE40A14081 [mb2][mb2,unassigned][head,tail]
|
||||
FE40A1A33F FE40A1A381 [mb2][mb2][head,tail]
|
||||
FE40FE403F FE40FE4081 [mb2][mb2][head,tail]
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that consist of head + tail + MB2 should go without warnings
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE FIND_IN_SET('head',type1)
|
||||
AND FIND_IN_SET('tail',type2)
|
||||
AND FIND_IN_SET('mb2',type3)
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
9
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
8140A140 [head,tail][ascii,tail][mb2,unassigned]
|
||||
8140A1A3 [head,tail][ascii,tail][mb2]
|
||||
8140FE40 [head,tail][ascii,tail][mb2]
|
||||
8180A140 [head,tail][tail][mb2,unassigned]
|
||||
8180A1A3 [head,tail][tail][mb2]
|
||||
8180FE40 [head,tail][tail][mb2]
|
||||
8181A140 [head,tail][head,tail][mb2,unassigned]
|
||||
8181A1A3 [head,tail][head,tail][mb2]
|
||||
8181FE40 [head,tail][head,tail][mb2]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
#
|
||||
# Sequences that consist of (ascii or mb2) + head + tail should go without warnings
|
||||
#
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
|
||||
AND FIND_IN_SET('head',type2)
|
||||
AND FIND_IN_SET('tail',type3)
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
12
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
408140 [ascii,tail][head,tail][ascii,tail]
|
||||
408180 [ascii,tail][head,tail][tail]
|
||||
408181 [ascii,tail][head,tail][head,tail]
|
||||
A1408140 [mb2,unassigned][head,tail][ascii,tail]
|
||||
A1408180 [mb2,unassigned][head,tail][tail]
|
||||
A1408181 [mb2,unassigned][head,tail][head,tail]
|
||||
A1A38140 [mb2][head,tail][ascii,tail]
|
||||
A1A38180 [mb2][head,tail][tail]
|
||||
A1A38181 [mb2][head,tail][head,tail]
|
||||
FE408140 [mb2][head,tail][ascii,tail]
|
||||
FE408180 [mb2][head,tail][tail]
|
||||
FE408181 [mb2][head,tail][head,tail]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 1
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 3
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 5
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 7
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 9
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 10
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 12
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 13
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 15
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 16
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 18
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 19
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 27
|
||||
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 30
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 31
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 35
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 37
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 39
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 41
|
||||
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 43
|
||||
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 45
|
||||
SELECT COUNT(*) FROM t3;
|
||||
COUNT(*)
|
||||
46
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
HEX(c) comment
|
||||
4081A140 [ascii,tail][head,tail][mb2,unassigned]
|
||||
4081FE40 [ascii,tail][head,tail][mb2]
|
||||
814040 [head,tail][ascii,tail][ascii,tail]
|
||||
818040 [head,tail][tail][ascii,tail]
|
||||
818140 [head,tail][head,tail][ascii,tail]
|
||||
81A14040 [head,tail][mb2,unassigned][ascii,tail]
|
||||
81A140A140 [head,tail][mb2,unassigned][mb2,unassigned]
|
||||
81A140A1A3 [head,tail][mb2,unassigned][mb2]
|
||||
81A140FE40 [head,tail][mb2,unassigned][mb2]
|
||||
81A1A340 [head,tail][mb2][ascii,tail]
|
||||
81A1A380 [head,tail][mb2][tail]
|
||||
81A1A381 [head,tail][mb2][head,tail]
|
||||
81A1A3A140 [head,tail][mb2][mb2,unassigned]
|
||||
81A1A3FE40 [head,tail][mb2][mb2]
|
||||
81FE4040 [head,tail][mb2][ascii,tail]
|
||||
81FE40A140 [head,tail][mb2][mb2,unassigned]
|
||||
81FE40A1A3 [head,tail][mb2][mb2]
|
||||
81FE40FE40 [head,tail][mb2][mb2]
|
||||
A14081A140 [mb2,unassigned][head,tail][mb2,unassigned]
|
||||
A14081FE40 [mb2,unassigned][head,tail][mb2]
|
||||
A1A381A140 [mb2][head,tail][mb2,unassigned]
|
||||
A1A381FE40 [mb2][head,tail][mb2]
|
||||
FE4081A140 [mb2][head,tail][mb2,unassigned]
|
||||
FE4081FE40 [mb2][head,tail][mb2]
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
HEX(c) HEX(b) comment
|
||||
40403F 404081 [ascii,tail][ascii,tail][head,tail]
|
||||
4081A13F 4081A1A3 [ascii,tail][head,tail][mb2]
|
||||
40A1403F 40A14081 [ascii,tail][mb2,unassigned][head,tail]
|
||||
40A1A33F 40A1A381 [ascii,tail][mb2][head,tail]
|
||||
40FE403F 40FE4081 [ascii,tail][mb2][head,tail]
|
||||
81403F 814080 [head,tail][ascii,tail][tail]
|
||||
81403F 814081 [head,tail][ascii,tail][head,tail]
|
||||
81803F 818080 [head,tail][tail][tail]
|
||||
81803F 818081 [head,tail][tail][head,tail]
|
||||
81813F 818180 [head,tail][head,tail][tail]
|
||||
81813F 818181 [head,tail][head,tail][head,tail]
|
||||
81A1403F 81A14080 [head,tail][mb2,unassigned][tail]
|
||||
81A1403F 81A14081 [head,tail][mb2,unassigned][head,tail]
|
||||
81A1A3A13F 81A1A3A1A3 [head,tail][mb2][mb2]
|
||||
81FE403F 81FE4080 [head,tail][mb2][tail]
|
||||
81FE403F 81FE4081 [head,tail][mb2][head,tail]
|
||||
A140403F A1404081 [mb2,unassigned][ascii,tail][head,tail]
|
||||
A14081A13F A14081A1A3 [mb2,unassigned][head,tail][mb2]
|
||||
A1A3403F A1A34081 [mb2][ascii,tail][head,tail]
|
||||
A1A381A13F A1A381A1A3 [mb2][head,tail][mb2]
|
||||
FE40403F FE404081 [mb2][ascii,tail][head,tail]
|
||||
FE4081A13F FE4081A1A3 [mb2][head,tail][mb2]
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t2;
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
#
|
||||
#
|
||||
# End of 10.1 tests
|
||||
#
|
||||
|
|
|
@ -477,7 +477,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
|||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
14623
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
|
||||
COUNT(*)
|
||||
63
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
|
|
|
@ -2626,7 +2626,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
|
|||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
44671
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
COUNT(*)
|
||||
17735
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
|
@ -25938,7 +25938,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis);
|
|||
INSERT INTO t1 VALUES (0x8EA0);
|
||||
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
|
||||
HEX(a) CHAR_LENGTH(a)
|
||||
0
|
||||
3F3F 2
|
||||
DROP TABLE t1;
|
||||
SELECT _ujis 0x8EA0;
|
||||
ERROR HY000: Invalid ujis character string: '8EA0'
|
||||
|
|
|
@ -225,7 +225,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 varchar(10) character set utf8);
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -233,7 +233,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 text character set utf8);
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -241,7 +241,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (a text character set utf8, primary key(a(371)));
|
||||
ERROR 42000: Specified key was too long; max key length is 1000 bytes
|
||||
|
|
|
@ -225,7 +225,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 varchar(10) character set utf8mb4);
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -233,7 +233,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 text character set utf8mb4);
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -241,7 +241,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (a text character set utf8mb4, primary key(a(371)));
|
||||
ERROR 42000: Specified key was too long; max key length is 1000 bytes
|
||||
|
@ -2327,7 +2327,7 @@ select hex(utf8mb4) from t1;
|
|||
hex(utf8mb4)
|
||||
F0908080
|
||||
F0BFBFBF
|
||||
|
||||
3F
|
||||
delete from t1;
|
||||
Testing [F2..F3][80..BF][80..BF][80..BF]
|
||||
insert into t1 values (0xF2808080);
|
||||
|
@ -2347,7 +2347,7 @@ select hex(utf8mb4) from t1;
|
|||
hex(utf8mb4)
|
||||
F4808080
|
||||
F48F8080
|
||||
|
||||
3F
|
||||
drop table t1;
|
||||
#
|
||||
# Check strnxfrm() with odd length
|
||||
|
@ -2472,45 +2472,45 @@ F3A087AFEA9DA8
|
|||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
EA9DA8
|
||||
3F3F3F3FEA9DA8
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
|
||||
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
|
||||
F09D8480EA9DA8
|
||||
F09D8480EFB9AB
|
||||
F09D8480
|
||||
F09D84803F3F3F3F
|
||||
F09D849EEA9DA8
|
||||
F09D849EEFB9AB
|
||||
F09D849E
|
||||
F09D849E3F3F3F3F
|
||||
F09D859EEA9DA8
|
||||
F09D859EEFB9AB
|
||||
F09D859E
|
||||
F09D859E3F3F3F3F
|
||||
F09D878FEA9DA8
|
||||
F09D878FEFB9AB
|
||||
F09D878F
|
||||
F09D878F3F3F3F3F
|
||||
F09D9C9FEA9DA8
|
||||
F09D9C9FEFB9AB
|
||||
F09D9C9F
|
||||
F09D9C9F3F3F3F3F
|
||||
F09D9E9FEA9DA8
|
||||
F09D9E9FEFB9AB
|
||||
F09D9E9F
|
||||
F09D9E9F3F3F3F3F
|
||||
F48FBFBFEA9DA8
|
||||
F48FBFBFEFB9AB
|
||||
F48FBFBF
|
||||
F48FBFBF3F3F3F3F
|
||||
F3A087AFEA9DA8
|
||||
F3A087AFEFB9AB
|
||||
F3A087AF
|
||||
F3A087AF3F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
EA9DA8
|
||||
EFB9AB
|
||||
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
3F3F3F3FEA9DA8
|
||||
3F3F3F3FEFB9AB
|
||||
3F3F3F3F3F3F3F3F
|
||||
SELECT count(*) FROM t1, t2
|
||||
WHERE t1.utf8mb4_encoding > t2.utf8mb3_encoding;
|
||||
count(*)
|
||||
|
@ -2547,7 +2547,7 @@ u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
|
|||
119070 3F3F3F3F3F3F3F3F3F3F ??????????
|
||||
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB ﹫?????﹫﹫?﹫
|
||||
119070 3F3F3F3F3F3F3F3F3F3F ??????????
|
||||
1114111
|
||||
1114111 3F3F3F3F ????
|
||||
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb4;
|
||||
SHOW CREATE TABLE t2;
|
||||
Table Create Table
|
||||
|
@ -2559,7 +2559,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
|||
u_decimal hex(utf8mb3_encoding)
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
|
||||
SHOW CREATE TABLE t2;
|
||||
Table Create Table
|
||||
|
@ -2571,7 +2571,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
|||
u_decimal hex(utf8mb3_encoding)
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
|
@ -2592,7 +2592,7 @@ u_decimal hex(utf8mb4_encoding)
|
|||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb4;
|
||||
SHOW CREATE TABLE t1;
|
||||
Table Create Table
|
||||
|
@ -2613,7 +2613,7 @@ u_decimal hex(utf8mb4_encoding)
|
|||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
ALTER TABLE t2 MODIFY utf8mb3_encoding VARCHAR(10) CHARACTER SET utf8mb4;
|
||||
SHOW CREATE TABLE t2;
|
||||
Table Create Table
|
||||
|
@ -2625,7 +2625,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
|||
u_decimal hex(utf8mb3_encoding)
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
DROP TABLE IF EXISTS t3;
|
||||
CREATE TABLE t3 (
|
||||
u_decimal int NOT NULL,
|
||||
|
@ -3306,5 +3306,53 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
|
|||
# End of 5.6 tests
|
||||
#
|
||||
#
|
||||
# Start of 10.0 tests
|
||||
#
|
||||
#
|
||||
# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
#
|
||||
#
|
||||
# This test sets session character set to 3-byte utf8,
|
||||
# but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
|
||||
# It should be replaced to four question marks: '????' in both columns
|
||||
# (i.e. four unknown bytes are replaced to four question marks),
|
||||
# then the rest of the string should be stored, so we get 'a ???? b'.
|
||||
#
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1 (
|
||||
a VARCHAR(32) CHARACTER SET utf8mb4,
|
||||
b VARCHAR(32) CHARACTER SET utf8
|
||||
);
|
||||
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'a' at row 1
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
|
||||
SELECT * FROM t1;
|
||||
a b
|
||||
a ???? b a ???? b
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# This test sets session character set to 4-byte utf8,
|
||||
# then normally sends a 4-byte sequence.
|
||||
# It should be stored AS IS into the utf8mb4 column (a),
|
||||
# and should be replaced to a single question mark in the utf8 column (b)
|
||||
# (i.e. one character that cannot be converted is replaced to one question mark).
|
||||
#
|
||||
SET NAMES utf8mb4;
|
||||
CREATE TABLE t1 (
|
||||
a VARCHAR(32) CHARACTER SET utf8mb4,
|
||||
b VARCHAR(32) CHARACTER SET utf8
|
||||
);
|
||||
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
|
||||
Warnings:
|
||||
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
|
||||
SELECT * FROM t1;
|
||||
a b
|
||||
a 😁 b a ? b
|
||||
DROP TABLE t1;
|
||||
#
|
||||
# End of 10.0 tests
|
||||
#
|
||||
#
|
||||
# End of tests
|
||||
#
|
||||
|
|
|
@ -225,7 +225,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 varchar(10) character set utf8mb4) engine heap;
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -233,7 +233,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8mb4 ENGINE heap;
|
||||
INSERT INTO t1 VALUES ( 'test' );
|
||||
|
@ -2157,7 +2157,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F0908080
|
||||
F0BFBFBF
|
||||
delete from t1;
|
||||
|
@ -2177,7 +2177,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F4808080
|
||||
F48F8080
|
||||
drop table t1;
|
||||
|
@ -2274,7 +2274,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
|
|||
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
|
||||
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
|
||||
EA9DA8
|
||||
3F3F3F3FEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
F09D8480EA9DA8
|
||||
F09D849EEA9DA8
|
||||
|
@ -2288,40 +2288,40 @@ F3A087AFEA9DA8
|
|||
F48FBFBFEA9DA8
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
|
||||
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
|
||||
|
||||
EA9DA8
|
||||
EFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
|
||||
3F3F3F3F3F3F3F3F
|
||||
3F3F3F3FEA9DA8
|
||||
3F3F3F3FEFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
|
||||
F09D8480
|
||||
F09D84803F3F3F3F
|
||||
F09D8480EA9DA8
|
||||
F09D8480EFB9AB
|
||||
F09D849E
|
||||
F09D849E3F3F3F3F
|
||||
F09D849EEA9DA8
|
||||
F09D849EEFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D859E
|
||||
F09D859E3F3F3F3F
|
||||
F09D859EEA9DA8
|
||||
F09D859EEFB9AB
|
||||
F09D878F
|
||||
F09D878F3F3F3F3F
|
||||
F09D878FEA9DA8
|
||||
F09D878FEFB9AB
|
||||
F09D9C9F
|
||||
F09D9C9F3F3F3F3F
|
||||
F09D9C9FEA9DA8
|
||||
F09D9C9FEFB9AB
|
||||
F09D9E9F
|
||||
F09D9E9F3F3F3F3F
|
||||
F09D9E9FEA9DA8
|
||||
F09D9E9FEFB9AB
|
||||
F3A087AF
|
||||
F3A087AF3F3F3F3F
|
||||
F3A087AFEA9DA8
|
||||
F3A087AFEFB9AB
|
||||
F48FBFBF
|
||||
F48FBFBF3F3F3F3F
|
||||
F48FBFBFEA9DA8
|
||||
F48FBFBFEFB9AB
|
||||
SELECT count(*) FROM t1, t2
|
||||
|
@ -2337,8 +2337,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
|
||||
1114111
|
||||
1114111 3F ?
|
||||
1114111 3F3F3F3F ????
|
||||
119040 3F ?
|
||||
119070 3F ?
|
||||
119070 3F3F3F3F3F3F3F3F3F3F ??????????
|
||||
|
@ -2358,7 +2358,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8mb4
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
|
||||
|
@ -2370,7 +2370,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
|
||||
|
@ -2382,8 +2382,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2403,8 +2403,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2424,7 +2424,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MEMORY DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
|
|
@ -225,7 +225,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 varchar(10) character set utf8mb4) engine InnoDB;
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -233,7 +233,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 text character set utf8mb4) engine InnoDB;
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -241,7 +241,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (a text character set utf8mb4, primary key(a(371))) engine InnoDB;
|
||||
ERROR 42000: Specified key was too long; max key length is 767 bytes
|
||||
|
@ -2285,7 +2285,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F0908080
|
||||
F0BFBFBF
|
||||
delete from t1;
|
||||
|
@ -2305,7 +2305,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F4808080
|
||||
F48F8080
|
||||
drop table t1;
|
||||
|
@ -2421,7 +2421,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
|
|||
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
|
||||
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
|
||||
EA9DA8
|
||||
3F3F3F3FEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
F09D8480EA9DA8
|
||||
F09D849EEA9DA8
|
||||
|
@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
|
|||
F48FBFBFEA9DA8
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
|
||||
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
|
||||
|
||||
EA9DA8
|
||||
EFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
|
||||
3F3F3F3F3F3F3F3F
|
||||
3F3F3F3FEA9DA8
|
||||
3F3F3F3FEFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
|
||||
F09D8480
|
||||
F09D84803F3F3F3F
|
||||
F09D8480EA9DA8
|
||||
F09D8480EFB9AB
|
||||
F09D849E
|
||||
F09D849E3F3F3F3F
|
||||
F09D849EEA9DA8
|
||||
F09D849EEFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D859E
|
||||
F09D859E3F3F3F3F
|
||||
F09D859EEA9DA8
|
||||
F09D859EEFB9AB
|
||||
F09D878F
|
||||
F09D878F3F3F3F3F
|
||||
F09D878FEA9DA8
|
||||
F09D878FEFB9AB
|
||||
F09D9C9F
|
||||
F09D9C9F3F3F3F3F
|
||||
F09D9C9FEA9DA8
|
||||
F09D9C9FEFB9AB
|
||||
F09D9E9F
|
||||
F09D9E9F3F3F3F3F
|
||||
F09D9E9FEA9DA8
|
||||
F09D9E9FEFB9AB
|
||||
F3A087AF
|
||||
F3A087AF3F3F3F3F
|
||||
F3A087AFEA9DA8
|
||||
F3A087AFEFB9AB
|
||||
F48FBFBF
|
||||
F48FBFBF3F3F3F3F
|
||||
F48FBFBFEA9DA8
|
||||
F48FBFBFEFB9AB
|
||||
SELECT count(*) FROM t1, t2
|
||||
|
@ -2484,8 +2484,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
|
||||
1114111
|
||||
1114111 3F ?
|
||||
1114111 3F3F3F3F ????
|
||||
119040 3F ?
|
||||
119070 3F ?
|
||||
119070 3F3F3F3F3F3F3F3F3F3F ??????????
|
||||
|
@ -2505,7 +2505,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
|
||||
|
@ -2517,7 +2517,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
|
||||
|
@ -2529,8 +2529,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2550,8 +2550,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2571,7 +2571,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
|
|
@ -225,7 +225,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 varchar(10) character set utf8mb4) engine MyISAM;
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -233,7 +233,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (s1 text character set utf8mb4) engine MyISAM;
|
||||
insert into t1 values (0x41FF);
|
||||
|
@ -241,7 +241,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
|
||||
select hex(s1) from t1;
|
||||
hex(s1)
|
||||
41
|
||||
413F
|
||||
drop table t1;
|
||||
create table t1 (a text character set utf8mb4, primary key(a(371))) engine MyISAM;
|
||||
ERROR 42000: Specified key was too long; max key length is 1000 bytes
|
||||
|
@ -2285,7 +2285,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F0908080
|
||||
F0BFBFBF
|
||||
delete from t1;
|
||||
|
@ -2305,7 +2305,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
|
||||
select hex(utf8mb4) from t1;
|
||||
hex(utf8mb4)
|
||||
|
||||
3F
|
||||
F4808080
|
||||
F48F8080
|
||||
drop table t1;
|
||||
|
@ -2421,7 +2421,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
|
|||
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
|
||||
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
|
||||
EA9DA8
|
||||
3F3F3F3FEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
F09D8480EA9DA8
|
||||
F09D849EEA9DA8
|
||||
|
@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
|
|||
F48FBFBFEA9DA8
|
||||
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
|
||||
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
|
||||
|
||||
EA9DA8
|
||||
EFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
|
||||
3F3F3F3F3F3F3F3F
|
||||
3F3F3F3FEA9DA8
|
||||
3F3F3F3FEFB9AB
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
|
||||
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
|
||||
F09D8480
|
||||
F09D84803F3F3F3F
|
||||
F09D8480EA9DA8
|
||||
F09D8480EFB9AB
|
||||
F09D849E
|
||||
F09D849E3F3F3F3F
|
||||
F09D849EEA9DA8
|
||||
F09D849EEFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
|
||||
F09D859E
|
||||
F09D859E3F3F3F3F
|
||||
F09D859EEA9DA8
|
||||
F09D859EEFB9AB
|
||||
F09D878F
|
||||
F09D878F3F3F3F3F
|
||||
F09D878FEA9DA8
|
||||
F09D878FEFB9AB
|
||||
F09D9C9F
|
||||
F09D9C9F3F3F3F3F
|
||||
F09D9C9FEA9DA8
|
||||
F09D9C9FEFB9AB
|
||||
F09D9E9F
|
||||
F09D9E9F3F3F3F3F
|
||||
F09D9E9FEA9DA8
|
||||
F09D9E9FEFB9AB
|
||||
F3A087AF
|
||||
F3A087AF3F3F3F3F
|
||||
F3A087AFEA9DA8
|
||||
F3A087AFEFB9AB
|
||||
F48FBFBF
|
||||
F48FBFBF3F3F3F3F
|
||||
F48FBFBFEA9DA8
|
||||
F48FBFBFEFB9AB
|
||||
SELECT count(*) FROM t1, t2
|
||||
|
@ -2484,8 +2484,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
|
||||
1114111
|
||||
1114111 3F ?
|
||||
1114111 3F3F3F3F ????
|
||||
119040 3F ?
|
||||
119070 3F ?
|
||||
119070 3F3F3F3F3F3F3F3F3F3F ??????????
|
||||
|
@ -2505,7 +2505,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
|
||||
|
@ -2517,7 +2517,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
|
||||
|
@ -2529,8 +2529,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2550,8 +2550,8 @@ t1 CREATE TABLE `t1` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
|
||||
u_decimal hex(utf8mb4_encoding)
|
||||
1114111
|
||||
1114111 3F
|
||||
1114111 3F3F3F3F
|
||||
119040 3F
|
||||
119070 3F
|
||||
119070 3F3F3F3F3F3F3F3F3F3F
|
||||
|
@ -2571,7 +2571,7 @@ t2 CREATE TABLE `t2` (
|
|||
) ENGINE=MyISAM DEFAULT CHARSET=utf8
|
||||
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
|
||||
u_decimal hex(utf8mb3_encoding)
|
||||
1114111
|
||||
1114111 3F3F3F3F
|
||||
42856 EA9DA8
|
||||
65131 EFB9AB
|
||||
DROP TABLE IF EXISTS t3;
|
||||
|
|
|
@ -22,13 +22,15 @@ SHOW TABLE STATUS LIKE 't1';
|
|||
|
||||
--disable_warnings
|
||||
--disable_query_log
|
||||
ALTER TABLE test.t1 ADD code VARCHAR(16) NOT NULL;
|
||||
let $1= 221;
|
||||
while ($1)
|
||||
{
|
||||
eval INSERT INTO test.t1 VALUES(CHAR(254-$1));
|
||||
eval INSERT INTO test.t1 VALUES(CHAR(254-$1), HEX(254-$1));
|
||||
dec $1;
|
||||
}
|
||||
DELETE FROM test.t1 WHERE CHAR_LENGTH(a) <> 1;
|
||||
DELETE FROM test.t1 WHERE a='?' AND code<>'3F';
|
||||
--enable_query_log
|
||||
--enable_warnings
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\xA3' for column 'f1' at row 1
|
||||
select f1 from t1;
|
||||
f1
|
||||
|
||||
?
|
||||
update t1 set f1=0x6a;
|
||||
update t1 set f3=repeat(0xb1,8103);
|
||||
update t1 set f1=0x4a;
|
||||
|
@ -39,5 +39,5 @@ Warnings:
|
|||
Warning 1366 Incorrect string value: '\x82' for column 'f1' at row 1
|
||||
select f1 from t1;
|
||||
f1
|
||||
|
||||
?
|
||||
drop table t1;
|
||||
|
|
|
@ -121,7 +121,7 @@ DROP TEMPORARY TABLE head, tail;
|
|||
SHOW CREATE TABLE t1;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
#
|
||||
|
|
|
@ -99,7 +99,7 @@ DROP TEMPORARY TABLE head, tail;
|
|||
SHOW CREATE TABLE t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
|
|
|
@ -446,6 +446,7 @@ SHOW CREATE TABLE t1;
|
|||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
SELECT * FROM t1 WHERE CHAR_LENGTH(a)=2;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
|
||||
|
|
|
@ -95,8 +95,8 @@ WHERE t11.a >= 0x81 AND t11.a <= 0xFE
|
|||
AND t12.a >= 0x41 AND t12.a <= 0xFE
|
||||
ORDER BY t11.a, t12.a;
|
||||
--enable_warnings
|
||||
SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
|
||||
DELETE FROM t2 WHERE a='';
|
||||
SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
|
||||
DELETE FROM t2 WHERE a='?';
|
||||
ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
|
||||
--disable_warnings
|
||||
UPDATE t2 SET u=a, a2=u;
|
||||
|
@ -145,7 +145,7 @@ ORDER BY head, tail;
|
|||
DROP TEMPORARY TABLE head, tail;
|
||||
SHOW CREATE TABLE t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
#
|
||||
|
|
|
@ -69,7 +69,7 @@ ORDER BY head, tail;
|
|||
DROP TEMPORARY TABLE head, tail;
|
||||
SHOW CREATE TABLE t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
#
|
||||
|
|
|
@ -104,7 +104,7 @@ ORDER BY head, tail;
|
|||
DROP TEMPORARY TABLE head, tail;
|
||||
SHOW CREATE TABLE t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
#
|
||||
|
@ -203,3 +203,228 @@ SET NAMES gbk;
|
|||
--echo #
|
||||
--echo # End of 10.0 tests
|
||||
--echo #
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # Start of 10.1 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1 (
|
||||
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
|
||||
b VARBINARY(16),
|
||||
type SET('ascii','bad','head','tail','mb2','unassigned')
|
||||
);
|
||||
INSERT INTO t1 (b, type) VALUES (0x40, 'ascii,tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0x80, 'tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0x81, 'head,tail');
|
||||
INSERT INTO t1 (b, type) VALUES (0xFF, 'bad');
|
||||
INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
|
||||
INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
|
||||
INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
|
||||
CREATE TABLE t2 AS SELECT
|
||||
CONCAT(t1.b,t2.b) AS b,
|
||||
t1.type AS type1,
|
||||
t2.type AS type2,
|
||||
CONCAT('[',t1.type,'][',t2.type,']') AS comment
|
||||
FROM t1, t1 t2;
|
||||
|
||||
CREATE TABLE t3
|
||||
(
|
||||
b VARBINARY(16),
|
||||
c VARCHAR(16) CHARACTER SET gbk,
|
||||
comment VARCHAR(128)
|
||||
);
|
||||
--echo #
|
||||
--echo # A combination of two valid characters, should give no warnings
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE
|
||||
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
|
||||
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that start with a tail or a bad byte,
|
||||
--echo # or end with a bad byte, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE type1='tail' OR type1='bad' OR type2='bad'
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that start with an ASCII or an MB2 character,
|
||||
--echo # followed by a non-ASCII tail, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
|
||||
AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Other sequences
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t3;
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t2;
|
||||
|
||||
CREATE TABLE t2 AS SELECT
|
||||
CONCAT(t1.b,t2.b,t3.b) AS b,
|
||||
t1.type AS type1,
|
||||
t2.type AS type2,
|
||||
t3.type AS type3,
|
||||
CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
|
||||
FROM t1, t1 t2,t1 t3;
|
||||
SELECT COUNT(*) FROM t2;
|
||||
|
||||
CREATE TABLE t3
|
||||
(
|
||||
b VARBINARY(16),
|
||||
c VARCHAR(16) CHARACTER SET gbk,
|
||||
comment VARCHAR(128)
|
||||
);
|
||||
|
||||
--echo #
|
||||
--echo # A combination of three valid characters, should give no warnings
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE
|
||||
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
|
||||
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
|
||||
(FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3))
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that start with a tail or a bad byte,
|
||||
--echo # or have a bad byte, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that start with an ASCII or an MB2 character,
|
||||
--echo # followed by a pure non-ASCII tail, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
|
||||
AND type2='tail'
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that consist of two ASCII or MB2 characters,
|
||||
--echo # followed by a pure non-ASCII tail, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
|
||||
(FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
|
||||
type3='tail'
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that consist of two MB2 characters,
|
||||
--echo # followed by a non-ASCII head or tail, all should be fixed.
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
|
||||
AND NOT FIND_IN_SET('ascii',type3)
|
||||
AND NOT FIND_IN_SET('mb2',type3)
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that consist of head + tail + MB2 should go without warnings
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE FIND_IN_SET('head',type1)
|
||||
AND FIND_IN_SET('tail',type2)
|
||||
AND FIND_IN_SET('mb2',type3)
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
--echo #
|
||||
--echo # Sequences that consist of (ascii or mb2) + head + tail should go without warnings
|
||||
--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
|
||||
WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
|
||||
AND FIND_IN_SET('head',type2)
|
||||
AND FIND_IN_SET('tail',type3)
|
||||
ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
|
||||
DELETE FROM t3;
|
||||
|
||||
|
||||
#--echo #
|
||||
#--echo # Other sequences
|
||||
#--echo #
|
||||
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
|
||||
SELECT COUNT(*) FROM t3;
|
||||
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
|
||||
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
|
||||
|
||||
DROP TABLE t3;
|
||||
DROP TABLE t2;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.1 tests
|
||||
--echo #
|
||||
|
|
|
@ -145,7 +145,7 @@ DROP TEMPORARY TABLE head, tail;
|
|||
SHOW CREATE TABLE t1;
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
#
|
||||
# Display all characters that have upper or lower case mapping.
|
||||
|
|
|
@ -1276,7 +1276,7 @@ SHOW CREATE TABLE t1;
|
|||
|
||||
UPDATE t1 SET a=unhex(code) ORDER BY code;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'?';
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
|
||||
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
|
||||
#
|
||||
|
|
|
@ -1832,6 +1832,50 @@ set @@collation_connection=utf8mb4_bin;
|
|||
--echo # End of 5.6 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # Start of 10.0 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # This test sets session character set to 3-byte utf8,
|
||||
--echo # but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
|
||||
--echo # It should be replaced to four question marks: '????' in both columns
|
||||
--echo # (i.e. four unknown bytes are replaced to four question marks),
|
||||
--echo # then the rest of the string should be stored, so we get 'a ???? b'.
|
||||
--echo #
|
||||
SET NAMES utf8;
|
||||
CREATE TABLE t1 (
|
||||
a VARCHAR(32) CHARACTER SET utf8mb4,
|
||||
b VARCHAR(32) CHARACTER SET utf8
|
||||
);
|
||||
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
|
||||
SELECT * FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # This test sets session character set to 4-byte utf8,
|
||||
--echo # then normally sends a 4-byte sequence.
|
||||
--echo # It should be stored AS IS into the utf8mb4 column (a),
|
||||
--echo # and should be replaced to a single question mark in the utf8 column (b)
|
||||
--echo # (i.e. one character that cannot be converted is replaced to one question mark).
|
||||
--echo #
|
||||
|
||||
SET NAMES utf8mb4;
|
||||
CREATE TABLE t1 (
|
||||
a VARCHAR(32) CHARACTER SET utf8mb4,
|
||||
b VARCHAR(32) CHARACTER SET utf8
|
||||
);
|
||||
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
|
||||
SELECT * FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo #
|
||||
--echo # End of 10.0 tests
|
||||
--echo #
|
||||
|
||||
--echo #
|
||||
--echo # End of tests
|
||||
|
|
|
@ -922,8 +922,8 @@ String_copier::well_formed_copy(CHARSET_INFO *to_cs,
|
|||
my_charset_same(from_cs, to_cs))
|
||||
{
|
||||
m_cannot_convert_error_pos= NULL;
|
||||
return to_cs->cset->copy_abort(to_cs, to, to_length, from, from_length,
|
||||
nchars, this);
|
||||
return to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
|
||||
nchars, this);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
|
||||
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -6843,6 +6843,9 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
|
|||
if (s+2>e)
|
||||
return MY_CS_TOOSMALL2;
|
||||
|
||||
if (!IS_MB2_CHAR(hi, s[1]))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
|
||||
return -2;
|
||||
|
||||
|
@ -6894,7 +6897,9 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_big5,
|
||||
my_well_formed_char_length_big5,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
struct charset_info_st my_charset_big5_chinese_ci=
|
||||
|
|
|
@ -549,6 +549,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_charlen_8bit,
|
||||
my_well_formed_char_length_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
|
|
@ -186,7 +186,7 @@ static const uchar sort_order_cp932[]=
|
|||
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
|
||||
#define IS_8BIT_CHAR(x) iscp932kata(x)
|
||||
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -34765,7 +34765,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_cp932,
|
||||
my_well_formed_char_length_cp932,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -204,7 +204,7 @@ static const uchar sort_order_euc_kr[]=
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
|
||||
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -9928,6 +9928,9 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
|
|||
if (s+2>e)
|
||||
return MY_CS_TOOSMALL2;
|
||||
|
||||
if (!IS_MB2_CHAR(hi, s[1]))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
|
||||
return -2;
|
||||
|
||||
|
@ -9979,7 +9982,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_euckr,
|
||||
my_well_formed_char_length_euckr,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -198,7 +198,7 @@ static const uchar sort_order_eucjpms[]=
|
|||
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -67511,7 +67511,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_eucjpms,
|
||||
my_well_formed_char_length_eucjpms,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -167,7 +167,7 @@ static const uchar sort_order_gb2312[]=
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
|
||||
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -6330,7 +6330,10 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
|
|||
|
||||
if (s+2>e)
|
||||
return MY_CS_TOOSMALL2;
|
||||
|
||||
|
||||
if (!IS_MB2_CHAR(hi, s[1]))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
|
||||
return -2;
|
||||
|
||||
|
@ -6382,7 +6385,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_gb2312,
|
||||
my_well_formed_char_length_gb2312,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
|
||||
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -10724,6 +10724,9 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
|
|||
if (s+2>e)
|
||||
return MY_CS_TOOSMALL2;
|
||||
|
||||
if (!IS_MB2_CHAR(hi, s[1]))
|
||||
return MY_CS_ILSEQ;
|
||||
|
||||
if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
|
||||
return -2;
|
||||
|
||||
|
@ -10776,7 +10779,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_gbk,
|
||||
my_well_formed_char_length_gbk,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -422,6 +422,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_charlen_8bit,
|
||||
my_well_formed_char_length_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
|
|
@ -424,25 +424,95 @@ size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
|
|||
|
||||
|
||||
/*
|
||||
Copy a multi-byte string. Abort if a bad byte sequence was found.
|
||||
Note more than "nchars" characters are copied.
|
||||
Append a badly formed piece of string.
|
||||
Bad bytes are fixed to '?'.
|
||||
|
||||
@param to The destination string
|
||||
@param to_end The end of the destination string
|
||||
@param from The source string
|
||||
@param from_end The end of the source string
|
||||
@param nchars Write not more than "nchars" characters.
|
||||
@param status Copying status, must be previously initialized,
|
||||
e.g. using well_formed_char_length() on the original
|
||||
full source string.
|
||||
*/
|
||||
size_t
|
||||
my_copy_abort_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
static size_t
|
||||
my_append_fix_badly_formed_tail(CHARSET_INFO *cs,
|
||||
char *to, char *to_end,
|
||||
const char *from, const char *from_end,
|
||||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
int well_formed_error;
|
||||
size_t res;
|
||||
char *to0= to;
|
||||
|
||||
for ( ; nchars; nchars--)
|
||||
{
|
||||
int chlen;
|
||||
if ((chlen= cs->cset->charlen(cs, (const uchar*) from,
|
||||
(const uchar *) from_end)) > 0)
|
||||
{
|
||||
/* Found a valid character */ /* chlen == 1..MBMAXLEN */
|
||||
DBUG_ASSERT(chlen <= (int) cs->mbmaxlen);
|
||||
if (to + chlen > to_end)
|
||||
goto end; /* Does not fit to "to" */
|
||||
memcpy(to, from, (size_t) chlen);
|
||||
from+= chlen;
|
||||
to+= chlen;
|
||||
continue;
|
||||
}
|
||||
if (chlen == MY_CS_ILSEQ) /* chlen == 0 */
|
||||
{
|
||||
DBUG_ASSERT(from < from_end); /* Shouldn't get MY_CS_ILSEQ if empty */
|
||||
goto bad;
|
||||
}
|
||||
/* Got an incomplete character */ /* chlen == MY_CS_TOOSMALLXXX */
|
||||
DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6);
|
||||
DBUG_ASSERT(chlen <= MY_CS_TOOSMALL);
|
||||
if (from >= from_end)
|
||||
break; /* End of the source string */
|
||||
bad:
|
||||
/* Bad byte sequence, or incomplete character found */
|
||||
if (!status->m_well_formed_error_pos)
|
||||
status->m_well_formed_error_pos= from;
|
||||
|
||||
if ((chlen= cs->cset->wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0)
|
||||
break; /* Question mark does not fit into the destination */
|
||||
to+= chlen;
|
||||
from++;
|
||||
}
|
||||
end:
|
||||
status->m_source_end_pos= from;
|
||||
return to - to0;
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
my_copy_fix_mb(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t well_formed_nchars;
|
||||
size_t well_formed_length;
|
||||
size_t fixed_length;
|
||||
|
||||
set_if_smaller(src_length, dst_length);
|
||||
res= cs->cset->well_formed_len(cs, src, src + src_length,
|
||||
nchars, &well_formed_error);
|
||||
memmove(dst, src, res);
|
||||
status->m_source_end_pos= src + res;
|
||||
status->m_well_formed_error_pos= well_formed_error ? src + res : NULL;
|
||||
return res;
|
||||
well_formed_nchars= cs->cset->well_formed_char_length(cs,
|
||||
src, src + src_length,
|
||||
nchars, status);
|
||||
DBUG_ASSERT(well_formed_nchars <= nchars);
|
||||
memmove(dst, src, (well_formed_length= status->m_source_end_pos - src));
|
||||
if (!status->m_well_formed_error_pos)
|
||||
return well_formed_length;
|
||||
|
||||
fixed_length= my_append_fix_badly_formed_tail(cs,
|
||||
dst + well_formed_length,
|
||||
dst + dst_length,
|
||||
src + well_formed_length,
|
||||
src + src_length,
|
||||
nchars - well_formed_nchars,
|
||||
status);
|
||||
return well_formed_length + fixed_length;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -29,7 +29,70 @@
|
|||
#endif
|
||||
|
||||
|
||||
#ifdef WELL_FORMED_LEN
|
||||
#ifdef DEFINE_ASIAN_ROUTINES
|
||||
#define DEFINE_WELL_FORMED_LEN
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH
|
||||
#define DEFINE_CHARLEN
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef DEFINE_CHARLEN
|
||||
/**
|
||||
Returns length of the left-most character of a string.
|
||||
@param cs - charset with mbminlen==1 and mbmaxlen<=4
|
||||
@param b - the beginning of the string
|
||||
@param e - the end of the string
|
||||
|
||||
@return MY_CS_ILSEQ if a bad byte sequence was found
|
||||
@return MY_CS_TOOSMALL(N) if the string ended unexpectedly
|
||||
@return >0 if a valid character was found
|
||||
*/
|
||||
static int
|
||||
MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *b, const uchar *e)
|
||||
{
|
||||
DBUG_ASSERT(cs->mbminlen == 1);
|
||||
DBUG_ASSERT(cs->mbmaxlen <= 4);
|
||||
|
||||
if (b >= e)
|
||||
return MY_CS_TOOSMALL;
|
||||
if ((uchar) b[0] < 128)
|
||||
return 1; /* Single byte ASCII character */
|
||||
|
||||
#ifdef IS_8BIT_CHAR
|
||||
if (IS_8BIT_CHAR(b[0]))
|
||||
{
|
||||
/* Single byte non-ASCII character, e.g. half width kana in sjis */
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (b + 2 > e)
|
||||
return MY_CS_TOOSMALLN(2);
|
||||
if (IS_MB2_CHAR(b[0], b[1]))
|
||||
return 2; /* Double byte character */
|
||||
|
||||
#ifdef IS_MB3_CHAR
|
||||
if (b + 3 > e)
|
||||
return MY_CS_TOOSMALLN(3);
|
||||
if (IS_MB3_CHAR(b[0], b[1], b[2]))
|
||||
return 3; /* Three-byte character */
|
||||
#endif
|
||||
|
||||
#ifdef IS_MB4_CHAR
|
||||
if (b + 4 > e)
|
||||
return MY_CS_TOOSMALLN(4);
|
||||
if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
||||
return 4; /* Four-byte character */
|
||||
#endif
|
||||
|
||||
/* Wrong byte sequence */
|
||||
return MY_CS_ILSEQ;
|
||||
}
|
||||
#endif /* DEFINE_WELL_FORMED_LEN */
|
||||
|
||||
|
||||
#ifdef DEFINE_WELL_FORMED_LEN
|
||||
/**
|
||||
Returns well formed length of a character string with
|
||||
variable character length for character sets with:
|
||||
|
@ -91,4 +154,105 @@ MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
|
|||
return b - b0;
|
||||
}
|
||||
|
||||
#endif /* WELL_FORMED_LEN */
|
||||
#endif /* DEFINE_WELL_FORMED_LEN */
|
||||
|
||||
|
||||
|
||||
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
|
||||
/**
|
||||
Returns well formed length of a string
|
||||
measured in characters (rather than in bytes).
|
||||
Version for character sets that define IS_MB?_CHAR(), e.g. big5.
|
||||
*/
|
||||
static size_t
|
||||
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t nchars0= nchars;
|
||||
for ( ; b < e && nchars ; nchars--)
|
||||
{
|
||||
if ((uchar) b[0] < 128)
|
||||
{
|
||||
b++; /* Single byte ASCII character */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
|
||||
{
|
||||
b+= 2; /* Double byte character */
|
||||
continue;
|
||||
}
|
||||
|
||||
#ifdef IS_MB3_CHAR
|
||||
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
|
||||
{
|
||||
b+= 3; /* Three-byte character */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_MB4_CHAR
|
||||
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
|
||||
{
|
||||
b+= 4; /* Four-byte character */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef IS_8BIT_CHAR
|
||||
if (IS_8BIT_CHAR(b[0]))
|
||||
{
|
||||
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Wrong byte sequence */
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= b;
|
||||
return nchars0 - nchars;
|
||||
}
|
||||
status->m_source_end_pos= b;
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
return nchars0 - nchars;
|
||||
}
|
||||
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
|
||||
|
||||
|
||||
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#ifndef CHARLEN
|
||||
#error CHARLEN is not defined
|
||||
#endif
|
||||
/**
|
||||
Returns well formed length of a string
|
||||
measured in characters (rather than in bytes).
|
||||
Version for character sets that define CHARLEN(), e.g. utf8.
|
||||
CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
|
||||
- a positive number in the range [1-mbmaxlen] if a valid
|
||||
single-byte or multi-byte character was found
|
||||
- MY_CS_ILSEQ (0) on a bad byte sequence
|
||||
- MY_CS_TOOSMALLxx if the incoming sequence is incomplete
|
||||
*/
|
||||
static size_t
|
||||
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t nchars,
|
||||
MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t nchars0= nchars;
|
||||
int chlen;
|
||||
for ( ; nchars ; nchars--, b+= chlen)
|
||||
{
|
||||
if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
|
||||
{
|
||||
status->m_well_formed_error_pos= b < e ? b : NULL;
|
||||
status->m_source_end_pos= b;
|
||||
return nchars0 - nchars;
|
||||
}
|
||||
}
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
status->m_source_end_pos= b;
|
||||
return nchars0 - nchars;
|
||||
}
|
||||
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */
|
||||
|
|
|
@ -248,6 +248,13 @@ int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
|
|||
}
|
||||
|
||||
|
||||
int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *str, const uchar *end)
|
||||
{
|
||||
return str >= end ? MY_CS_TOOSMALL : 1;
|
||||
}
|
||||
|
||||
|
||||
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
|
||||
const uchar *str,
|
||||
const uchar *end __attribute__((unused)))
|
||||
|
@ -1108,6 +1115,19 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
size_t
|
||||
my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *start, const char *end,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t nbytes= (size_t) (end - start);
|
||||
size_t res= MY_MIN(nbytes, nchars);
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
status->m_source_end_pos= start + res;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy a 8-bit string. Not more than "nchars" character are copied.
|
||||
*/
|
||||
|
@ -1906,6 +1926,8 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_charlen_8bit,
|
||||
my_well_formed_char_length_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
|
|
@ -187,7 +187,7 @@ static const uchar sort_order_sjis[]=
|
|||
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
|
||||
#define IS_8BIT_CHAR(x) issjiskata(x)
|
||||
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -34144,7 +34144,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_sjis,
|
||||
my_well_formed_char_length_sjis,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -886,6 +886,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_charlen_8bit,
|
||||
my_well_formed_char_length_8bit,
|
||||
my_copy_8bit,
|
||||
};
|
||||
|
||||
|
|
|
@ -92,62 +92,107 @@ my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy an UCS2/UTF16/UTF32 string.
|
||||
Not more that "nchars" characters are copied.
|
||||
typedef enum
|
||||
{
|
||||
MY_CHAR_COPY_OK= 0, /* The character was Okey */
|
||||
MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */
|
||||
MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */
|
||||
} my_char_copy_status_t;
|
||||
|
||||
UCS2/UTF16/UTF32 may need to prepend zero some bytes,
|
||||
e.g. when copying from a BINARY source:
|
||||
INSERT INTO t1 (ucs2_column) VALUES (0x01);
|
||||
0x01 -> 0x0001
|
||||
|
||||
/*
|
||||
Copies an incomplete character, lef-padding it with 0x00 bytes.
|
||||
|
||||
@param cs Character set
|
||||
@param dst The destination string
|
||||
@param dst_length Space available in dst
|
||||
@param src The source string
|
||||
@param src_length Length of src
|
||||
@param nchars Copy not more than nchars characters.
|
||||
The "nchars" parameter of the caller.
|
||||
Only 0 and non-0 are important here.
|
||||
@param fix What to do if after zero-padding didn't get a valid
|
||||
character:
|
||||
- FALSE - exit with error.
|
||||
- TRUE - try to put '?' instead.
|
||||
|
||||
@return MY_CHAR_COPY_OK if after zero-padding got a valid character.
|
||||
cs->mbmaxlen bytes were written to "dst".
|
||||
@return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid
|
||||
character, but wrote '?' to the destination
|
||||
string instead.
|
||||
cs->mbminlen bytes were written to "dst".
|
||||
@return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst".
|
||||
Possible reasons:
|
||||
- dst_length was too short
|
||||
- nchars was 0
|
||||
- the character after padding appeared not
|
||||
to be valid, and could not fix it to '?'.
|
||||
*/
|
||||
static my_char_copy_status_t
|
||||
my_copy_incomplete_char(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, my_bool fix)
|
||||
{
|
||||
size_t pad_length;
|
||||
size_t src_offset= src_length % cs->mbminlen;
|
||||
if (dst_length < cs->mbminlen || !nchars)
|
||||
return MY_CHAR_COPY_ERROR;
|
||||
|
||||
pad_length= cs->mbminlen - src_offset;
|
||||
bzero(dst, pad_length);
|
||||
memmove(dst + pad_length, src, src_offset);
|
||||
/*
|
||||
In some cases left zero-padding can create an incorrect character.
|
||||
For example:
|
||||
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
||||
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
||||
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
||||
|
||||
Make sure we didn't pad to an incorrect character.
|
||||
*/
|
||||
if (cs->cset->charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
|
||||
(int) cs->mbminlen)
|
||||
return MY_CHAR_COPY_OK;
|
||||
|
||||
if (fix &&
|
||||
cs->cset->wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
|
||||
(int) cs->mbminlen)
|
||||
return MY_CHAR_COPY_FIXED;
|
||||
|
||||
return MY_CHAR_COPY_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Copy an UCS2/UTF16/UTF32 string, fix bad characters.
|
||||
*/
|
||||
static size_t
|
||||
my_copy_abort_mb2_or_mb4(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
|
||||
char *dst, size_t dst_length,
|
||||
const char *src, size_t src_length,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t src_offset;
|
||||
|
||||
if ((src_offset= (src_length % cs->mbminlen)))
|
||||
size_t length2, src_offset= src_length % cs->mbminlen;
|
||||
my_char_copy_status_t padstatus;
|
||||
|
||||
if (!src_offset)
|
||||
return my_copy_fix_mb(cs, dst, dst_length,
|
||||
src, src_length, nchars, status);
|
||||
if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
|
||||
src, src_length, nchars, TRUE)) ==
|
||||
MY_CHAR_COPY_ERROR)
|
||||
{
|
||||
int well_formed_error;
|
||||
size_t pad_length;
|
||||
if (dst_length < cs->mbminlen || !nchars)
|
||||
{
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
||||
return 0;
|
||||
}
|
||||
|
||||
pad_length= cs->mbminlen - src_offset;
|
||||
bzero(dst, pad_length);
|
||||
memmove(dst + pad_length, src, src_offset);
|
||||
/*
|
||||
In some cases left zero-padding can create an incorrect character.
|
||||
For example:
|
||||
INSERT INTO t1 (utf32_column) VALUES (0x110000);
|
||||
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
|
||||
The valid characters range is limited to 0x00000000..0x0010FFFF.
|
||||
|
||||
Make sure we didn't pad to an incorrect character.
|
||||
*/
|
||||
if (cs->cset->well_formed_len(cs,
|
||||
dst, dst + cs->mbminlen, 1,
|
||||
&well_formed_error) != cs->mbminlen)
|
||||
{
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
||||
return 0;
|
||||
}
|
||||
nchars--;
|
||||
src+= src_offset;
|
||||
src_length-= src_offset;
|
||||
dst+= cs->mbminlen;
|
||||
dst_length-= cs->mbminlen;
|
||||
return
|
||||
cs->mbminlen /* The left-padded character */ +
|
||||
my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
|
||||
status->m_source_end_pos= status->m_well_formed_error_pos= src;
|
||||
return 0;
|
||||
}
|
||||
return my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
|
||||
length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
|
||||
src + src_offset, src_length - src_offset,
|
||||
nchars - 1, status);
|
||||
if (padstatus == MY_CHAR_COPY_FIXED)
|
||||
status->m_well_formed_error_pos= src;
|
||||
return cs->mbminlen /* The left-padded character */ + length2;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1475,6 +1520,24 @@ my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e)
|
|||
}
|
||||
|
||||
|
||||
static int
|
||||
my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
|
||||
{
|
||||
my_wc_t wc;
|
||||
return cs->cset->mb_wc(cs, &wc, str, end);
|
||||
}
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16
|
||||
#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end)
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#include "ctype-mb.ic"
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef CHARLEN
|
||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
/* Defines my_well_formed_char_length_utf16 */
|
||||
|
||||
|
||||
static uint
|
||||
my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
|
||||
uint c __attribute__((unused)))
|
||||
|
@ -1742,7 +1805,9 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
|
|||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
my_charlen_utf16,
|
||||
my_well_formed_char_length_utf16,
|
||||
my_copy_fix_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
|
@ -1912,7 +1977,9 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
|
|||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
my_charlen_utf16,
|
||||
my_well_formed_char_length_utf16,
|
||||
my_copy_fix_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
|
@ -1987,6 +2054,13 @@ struct charset_info_st my_charset_utf16le_bin=
|
|||
|
||||
#ifdef HAVE_CHARSET_utf32
|
||||
|
||||
/*
|
||||
Check is b0 and b1 start a valid UTF32 four-byte sequence.
|
||||
Don't accept characters greater than U+10FFFF.
|
||||
*/
|
||||
#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
|
||||
|
||||
|
||||
static int
|
||||
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t *pwc, const uchar *s, const uchar *e)
|
||||
|
@ -1994,7 +2068,7 @@ my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
|
|||
if (s + 4 > e)
|
||||
return MY_CS_TOOSMALL4;
|
||||
*pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
|
||||
return 4;
|
||||
return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
|
||||
}
|
||||
|
||||
|
||||
|
@ -2004,7 +2078,10 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|||
{
|
||||
if (s + 4 > e)
|
||||
return MY_CS_TOOSMALL4;
|
||||
|
||||
|
||||
if (wc > 0x10FFFF)
|
||||
return MY_CS_ILUNI;
|
||||
|
||||
s[0]= (uchar) (wc >> 24);
|
||||
s[1]= (uchar) (wc >> 16) & 0xFF;
|
||||
s[2]= (uchar) (wc >> 8) & 0xFF;
|
||||
|
@ -2263,10 +2340,29 @@ my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|||
const char *b,
|
||||
const char *e)
|
||||
{
|
||||
return b + 4 > e ? 0 : 4;
|
||||
return b + 4 > e || !IS_UTF32_MBHEAD4(b[0], b[1]) ? 0 : 4;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *b, const uchar *e)
|
||||
{
|
||||
return b + 4 > e ? MY_CS_TOOSMALL4 :
|
||||
IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
|
||||
}
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32
|
||||
#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end)
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#include "ctype-mb.ic"
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef CHARLEN
|
||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
/* Defines my_well_formed_char_length_utf32 */
|
||||
|
||||
|
||||
static uint
|
||||
my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
|
||||
uint c __attribute__((unused)))
|
||||
|
@ -2579,8 +2675,7 @@ my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
for (; b < e; b+= 4)
|
||||
{
|
||||
/* Don't accept characters greater than U+10FFFF */
|
||||
if (b[0] || (uchar) b[1] > 0x10)
|
||||
if (!IS_UTF32_MBHEAD4(b[0], b[1]))
|
||||
{
|
||||
*error= 1;
|
||||
return b - b0;
|
||||
|
@ -2827,7 +2922,9 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
|
|||
my_strtoll10_utf32,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_utf32,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
my_charlen_utf32,
|
||||
my_well_formed_char_length_utf32,
|
||||
my_copy_fix_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
|
@ -2961,6 +3058,14 @@ static const uchar to_upper_ucs2[] = {
|
|||
};
|
||||
|
||||
|
||||
static int
|
||||
my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *s, const uchar *e)
|
||||
{
|
||||
return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
|
||||
}
|
||||
|
||||
|
||||
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
|
||||
my_wc_t * pwc, const uchar *s, const uchar *e)
|
||||
{
|
||||
|
@ -3264,6 +3369,31 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static size_t
|
||||
my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const char *b, const char *e,
|
||||
size_t nchars, MY_STRCOPY_STATUS *status)
|
||||
{
|
||||
size_t length= e - b;
|
||||
if (nchars * 2 <= length)
|
||||
{
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
status->m_source_end_pos= b + (nchars * 2);
|
||||
return nchars;
|
||||
}
|
||||
if (length % 2)
|
||||
{
|
||||
status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
status->m_well_formed_error_pos= NULL;
|
||||
status->m_source_end_pos= e;
|
||||
}
|
||||
return length / 2;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
|
||||
const char *str,const char *str_end,
|
||||
|
@ -3446,7 +3576,9 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
|
|||
my_strtoll10_mb2,
|
||||
my_strntoull10rnd_mb2_or_mb4,
|
||||
my_scan_mb2,
|
||||
my_copy_abort_mb2_or_mb4,
|
||||
my_charlen_ucs2,
|
||||
my_well_formed_char_length_ucs2,
|
||||
my_copy_fix_mb2_or_mb4,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -197,7 +197,7 @@ static const uchar sort_order_ujis[]=
|
|||
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
|
||||
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
|
||||
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
|
||||
#define WELL_FORMED_LEN
|
||||
#define DEFINE_ASIAN_ROUTINES
|
||||
#include "ctype-mb.ic"
|
||||
|
||||
|
||||
|
@ -67255,7 +67255,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_ujis,
|
||||
my_well_formed_char_length_ujis,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -5446,8 +5446,8 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
|
|||
|
||||
|
||||
static
|
||||
int my_valid_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *s, const uchar *e)
|
||||
int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *s, const uchar *e)
|
||||
{
|
||||
uchar c;
|
||||
|
||||
|
@ -5515,7 +5515,7 @@ my_well_formed_len_utf8(CHARSET_INFO *cs, const char *b, const char *e,
|
|||
{
|
||||
int mb_len;
|
||||
|
||||
if ((mb_len= my_valid_mbcharlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
|
||||
if ((mb_len= my_charlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
|
||||
{
|
||||
*error= b < e ? 1 : 0;
|
||||
break;
|
||||
|
@ -5526,9 +5526,20 @@ my_well_formed_len_utf8(CHARSET_INFO *cs, const char *b, const char *e,
|
|||
return (size_t) (b - b_start);
|
||||
}
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8
|
||||
#define CHARLEN(cs,str,end) my_charlen_utf8(cs,str,end)
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#include "ctype-mb.ic"
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef CHARLEN
|
||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
/* my_well_formed_char_length_utf8 */
|
||||
|
||||
|
||||
static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e)
|
||||
{
|
||||
int res= my_valid_mbcharlen_utf8(cs, (const uchar*)b, (const uchar*)e);
|
||||
int res= my_charlen_utf8(cs, (const uchar*) b, (const uchar*) e);
|
||||
return (res>1) ? res : 0;
|
||||
}
|
||||
|
||||
|
@ -5615,7 +5626,9 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_utf8,
|
||||
my_well_formed_char_length_utf8,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -7125,6 +7138,24 @@ my_wc_mb_filename(CHARSET_INFO *cs __attribute__((unused)),
|
|||
}
|
||||
|
||||
|
||||
static int
|
||||
my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
|
||||
{
|
||||
my_wc_t wc;
|
||||
return cs->cset->mb_wc(cs, &wc, str, end);
|
||||
}
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
|
||||
#define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end)
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#include "ctype-mb.ic"
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef CHARLEN
|
||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
/* my_well_formed_char_length_filename */
|
||||
|
||||
|
||||
static MY_COLLATION_HANDLER my_collation_filename_handler =
|
||||
{
|
||||
NULL, /* init */
|
||||
|
@ -7169,7 +7200,9 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_filename,
|
||||
my_well_formed_char_length_filename,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
@ -7954,8 +7987,8 @@ my_wildcmp_utf8mb4(CHARSET_INFO *cs,
|
|||
|
||||
|
||||
static int
|
||||
my_valid_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *s, const uchar *e)
|
||||
my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
|
||||
const uchar *s, const uchar *e)
|
||||
{
|
||||
uchar c;
|
||||
|
||||
|
@ -8015,7 +8048,7 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
|
|||
{
|
||||
int mb_len;
|
||||
|
||||
if ((mb_len= my_valid_mbcharlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
|
||||
if ((mb_len= my_charlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
|
||||
{
|
||||
*error= b < e ? 1 : 0;
|
||||
break;
|
||||
|
@ -8027,10 +8060,19 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
|
|||
}
|
||||
|
||||
|
||||
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4
|
||||
#define CHARLEN(cs,str,end) my_charlen_utf8mb4(cs,str,end)
|
||||
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
#include "ctype-mb.ic"
|
||||
#undef MY_FUNCTION_NAME
|
||||
#undef CHARLEN
|
||||
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
|
||||
/* my_well_formed_char_length_utf8mb4 */
|
||||
|
||||
static uint
|
||||
my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e)
|
||||
{
|
||||
int res= my_valid_mbcharlen_utf8mb4(cs, (const uchar*)b, (const uchar*)e);
|
||||
int res= my_charlen_utf8mb4(cs, (const uchar*) b, (const uchar*) e);
|
||||
return (res > 1) ? res : 0;
|
||||
}
|
||||
|
||||
|
@ -8113,7 +8155,9 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
|
|||
my_strtoll10_8bit,
|
||||
my_strntoull10rnd_8bit,
|
||||
my_scan_8bit,
|
||||
my_copy_abort_mb,
|
||||
my_charlen_utf8mb4,
|
||||
my_well_formed_char_length_utf8mb4,
|
||||
my_copy_fix_mb,
|
||||
};
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue