MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion

This commit is contained in:
Alexander Barkov 2015-03-13 16:51:36 +04:00
parent 702fba1511
commit 197afb413f
42 changed files with 1900 additions and 253 deletions

View file

@ -444,22 +444,64 @@ struct my_charset_handler_st
size_t (*scan)(CHARSET_INFO *, const char *b, const char *e,
int sq);
/* Copying routines */
/* String copying routines and helpers for them */
/*
copy_abort() - copy a string, abort if a bad byte sequence was found.
charlen() - calculate length of the left-most character in bytes.
@param cs Character set
@param str The beginning of the string
@param end The end of the string
@return MY_CS_ILSEQ if a bad byte sequence was found.
@return MY_CS_TOOSMALLN(x) if the string ended unexpectedly.
@return a positive number in the range 1..mbmaxlen,
if a valid character was found.
*/
int (*charlen)(CHARSET_INFO *cs, const uchar *str, const uchar *end);
/*
well_formed_char_length() - returns character length of a string.
@param cs Character set
@param str The beginning of the string
@param end The end of the string
@param nchars Not more than "nchars" left-most characters are checked.
@param status[OUT] Additional statistics is returned here.
"status" can be uninitialized before the call,
and it is fully initialized after the call.
status->m_source_end_pos is set to the position where reading stopped.
If a bad byte sequence is found, the function returns immediately and
status->m_well_formed_error_pos is set to the position where a bad byte
sequence was found.
status->m_well_formed_error_pos is set to NULL if no bad bytes were found.
If status->m_well_formed_error_pos is NULL after the call, that means:
- either the function reached the end of the string,
- or all "nchars" characters were read.
The caller can check status->m_source_end_pos to detect which of these two
happened.
*/
size_t (*well_formed_char_length)(CHARSET_INFO *cs,
const char *str, const char *end,
size_t nchars,
MY_STRCOPY_STATUS *status);
/*
copy_fix() - copy a string, replace bad bytes to '?'.
Not more than "nchars" characters are copied.
status->m_source_end_pos is set to a position in the range
between "src" and "src + src_length".
between "src" and "src + src_length", where reading stopped.
status->m_well_formed_error_pos is set to NULL if the string
in the range "src" and "status->m_source_end_pos" was well formed,
or is set to "src + src_length" otherwise.
or is set to a position between "src" and "src + src_length" where
the leftmost bad byte sequence was found.
*/
size_t (*copy_abort)(CHARSET_INFO *,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status);
size_t (*copy_fix)(CHARSET_INFO *,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status);
};
extern MY_CHARSET_HANDLER my_charset_8bit_handler;
@ -596,10 +638,10 @@ size_t my_copy_8bit(CHARSET_INFO *,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *);
size_t my_copy_abort_mb(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *);
size_t my_copy_fix_mb(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *);
/* Functions for 8bit */
extern size_t my_caseup_str_8bit(CHARSET_INFO *, char *);
@ -691,6 +733,11 @@ size_t my_numcells_8bit(CHARSET_INFO *, const char *b, const char *e);
size_t my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, size_t pos);
size_t my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e,
size_t pos, int *error);
size_t my_well_formed_char_length_8bit(CHARSET_INFO *cs,
const char *b, const char *e,
size_t nchars,
MY_STRCOPY_STATUS *status);
int my_charlen_8bit(CHARSET_INFO *, const uchar *str, const uchar *end);
uint my_mbcharlen_8bit(CHARSET_INFO *, uint c);

View file

@ -597,7 +597,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
COUNT(*)
13973
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));

View file

@ -165,7 +165,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1;
COUNT(*)
14623
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
COUNT(*)
63
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;

View file

@ -10101,6 +10101,9 @@ COUNT(*)
56959
SELECT COUNT(*) FROM t1 WHERE a<>'';
COUNT(*)
56959
SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
COUNT(*)
17735
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
COUNT(*)
@ -33632,7 +33635,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET eucjpms);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
HEX(a) CHAR_LENGTH(a)
0
3F3F 2
DROP TABLE t1;
SELECT _eucjpms 0x8EA0;
ERROR HY000: Invalid eucjpms character string: '8EA0'

View file

@ -407,12 +407,12 @@ Warnings:
Warning 1366 Incorrect string value: '\xA1\xFF' for column 's1' at row 1
select hex(s1), hex(convert(s1 using utf8)) from t1 order by binary s1;
hex(s1) hex(convert(s1 using utf8))
3F3F 3F3F
3F3F 3F3F
3F40 3F40
3F5B 3F5B
3F60 3F60
3F7B 3F7B
A141 ECA2A5
A15A ECA381
A161 ECA382
@ -445,7 +445,7 @@ FROM t1 t11, t1 t12
WHERE t11.a >= 0x81 AND t11.a <= 0xFE
AND t12.a >= 0x41 AND t12.a <= 0xFE
ORDER BY t11.a, t12.a;
SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
bad_code
815B
815C
@ -1959,7 +1959,7 @@ FE7D
FE7E
FE7F
FE80
DELETE FROM t2 WHERE a='';
DELETE FROM t2 WHERE a='?';
ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
UPDATE t2 SET u=a, a2=u;
SELECT s as unassigned_code FROM t2 WHERE u='?';
@ -24492,7 +24492,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
COUNT(*)
22428
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));

View file

@ -553,7 +553,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
COUNT(*)
8178
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));

View file

@ -573,7 +573,7 @@ Warning 1366 Incorrect string value: '\x80\' for column 'a' at row 61
Warning 1366 Incorrect string value: '\x80]' for column 'a' at row 62
Warning 1366 Incorrect string value: '\x80^' for column 'a' at row 63
Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
COUNT(*)
23940
SELECT code, hex(upper(a)), hex(lower(a)),a, upper(a), lower(a) FROM t1 WHERE hex(a)<>hex(upper(a)) OR hex(a)<>hex(lower(a));
@ -4946,3 +4946,814 @@ DROP TABLE t1;
#
# End of 10.0 tests
#
#
# Start of 10.1 tests
#
#
# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
#
CREATE TABLE t1 (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
b VARBINARY(16),
type SET('ascii','bad','head','tail','mb2','unassigned')
);
INSERT INTO t1 (b, type) VALUES (0x40, 'ascii,tail');
INSERT INTO t1 (b, type) VALUES (0x80, 'tail');
INSERT INTO t1 (b, type) VALUES (0x81, 'head,tail');
INSERT INTO t1 (b, type) VALUES (0xFF, 'bad');
INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
CREATE TABLE t2 AS SELECT
CONCAT(t1.b,t2.b) AS b,
t1.type AS type1,
t2.type AS type2,
CONCAT('[',t1.type,'][',t2.type,']') AS comment
FROM t1, t1 t2;
CREATE TABLE t3
(
b VARBINARY(16),
c VARCHAR(16) CHARACTER SET gbk,
comment VARCHAR(128)
);
#
# A combination of two valid characters, should give no warnings
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
ORDER BY b;
SELECT COUNT(*) FROM t3;
COUNT(*)
16
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
4040 [ascii,tail][ascii,tail]
40A140 [ascii,tail][mb2,unassigned]
40A1A3 [ascii,tail][mb2]
40FE40 [ascii,tail][mb2]
A14040 [mb2,unassigned][ascii,tail]
A140A140 [mb2,unassigned][mb2,unassigned]
A140A1A3 [mb2,unassigned][mb2]
A140FE40 [mb2,unassigned][mb2]
A1A340 [mb2][ascii,tail]
A1A3A140 [mb2][mb2,unassigned]
A1A3A1A3 [mb2][mb2]
A1A3FE40 [mb2][mb2]
FE4040 [mb2][ascii,tail]
FE40A140 [mb2][mb2,unassigned]
FE40A1A3 [mb2][mb2]
FE40FE40 [mb2][mb2]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that start with a tail or a bad byte,
# or end with a bad byte, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE type1='tail' OR type1='bad' OR type2='bad'
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 4
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x80\xFF' for column 'c' at row 8
Warning 1366 Incorrect string value: '\x81\xFF' for column 'c' at row 9
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 10
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 11
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 12
Warning 1366 Incorrect string value: '\xFF@' for column 'c' at row 13
Warning 1366 Incorrect string value: '\xFF\x80' for column 'c' at row 14
Warning 1366 Incorrect string value: '\xFF\x81' for column 'c' at row 15
Warning 1366 Incorrect string value: '\xFF\xA1@' for column 'c' at row 16
Warning 1366 Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 17
Warning 1366 Incorrect string value: '\xFF\xFE@' for column 'c' at row 18
Warning 1366 Incorrect string value: '\xFF\xFF' for column 'c' at row 19
SELECT COUNT(*) FROM t3;
COUNT(*)
19
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
403F 40FF [ascii,tail][bad]
3F40 8040 [tail][ascii,tail]
3F3F 8080 [tail][tail]
3F3F 8081 [tail][head,tail]
3FA140 80A140 [tail][mb2,unassigned]
3FA1A3 80A1A3 [tail][mb2]
3FFE40 80FE40 [tail][mb2]
3F3F 80FF [tail][bad]
3F3F 81FF [head,tail][bad]
A1403F A140FF [mb2,unassigned][bad]
A1A33F A1A3FF [mb2][bad]
FE403F FE40FF [mb2][bad]
3F40 FF40 [bad][ascii,tail]
3F3F FF80 [bad][tail]
3F3F FF81 [bad][head,tail]
3FA140 FFA140 [bad][mb2,unassigned]
3FA1A3 FFA1A3 [bad][mb2]
3FFE40 FFFE40 [bad][mb2]
3F3F FFFF [bad][bad]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that start with an ASCII or an MB2 character,
# followed by a non-ASCII tail, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 4
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 8
SELECT COUNT(*) FROM t3;
COUNT(*)
8
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
403F 4080 [ascii,tail][tail]
403F 4081 [ascii,tail][head,tail]
A1403F A14080 [mb2,unassigned][tail]
A1403F A14081 [mb2,unassigned][head,tail]
A1A33F A1A380 [mb2][tail]
A1A33F A1A381 [mb2][head,tail]
FE403F FE4080 [mb2][tail]
FE403F FE4081 [mb2][head,tail]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Other sequences
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 5
SELECT COUNT(*) FROM t3;
COUNT(*)
6
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
8140 [head,tail][ascii,tail]
8180 [head,tail][tail]
8181 [head,tail][head,tail]
81A140 [head,tail][mb2,unassigned]
81FE40 [head,tail][mb2]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
81A13F 81A1A3 [head,tail][mb2]
DELETE FROM t3;
DROP TABLE t3;
DROP TABLE t2;
CREATE TABLE t2 AS SELECT
CONCAT(t1.b,t2.b,t3.b) AS b,
t1.type AS type1,
t2.type AS type2,
t3.type AS type3,
CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
FROM t1, t1 t2,t1 t3;
SELECT COUNT(*) FROM t2;
COUNT(*)
343
CREATE TABLE t3
(
b VARBINARY(16),
c VARCHAR(16) CHARACTER SET gbk,
comment VARCHAR(128)
);
#
# A combination of three valid characters, should give no warnings
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
(FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3))
ORDER BY b;
SELECT COUNT(*) FROM t3;
COUNT(*)
64
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
404040 [ascii,tail][ascii,tail][ascii,tail]
4040A140 [ascii,tail][ascii,tail][mb2,unassigned]
4040A1A3 [ascii,tail][ascii,tail][mb2]
4040FE40 [ascii,tail][ascii,tail][mb2]
40A14040 [ascii,tail][mb2,unassigned][ascii,tail]
40A140A140 [ascii,tail][mb2,unassigned][mb2,unassigned]
40A140A1A3 [ascii,tail][mb2,unassigned][mb2]
40A140FE40 [ascii,tail][mb2,unassigned][mb2]
40A1A340 [ascii,tail][mb2][ascii,tail]
40A1A3A140 [ascii,tail][mb2][mb2,unassigned]
40A1A3A1A3 [ascii,tail][mb2][mb2]
40A1A3FE40 [ascii,tail][mb2][mb2]
40FE4040 [ascii,tail][mb2][ascii,tail]
40FE40A140 [ascii,tail][mb2][mb2,unassigned]
40FE40A1A3 [ascii,tail][mb2][mb2]
40FE40FE40 [ascii,tail][mb2][mb2]
A1404040 [mb2,unassigned][ascii,tail][ascii,tail]
A14040A140 [mb2,unassigned][ascii,tail][mb2,unassigned]
A14040A1A3 [mb2,unassigned][ascii,tail][mb2]
A14040FE40 [mb2,unassigned][ascii,tail][mb2]
A140A14040 [mb2,unassigned][mb2,unassigned][ascii,tail]
A140A140A140 [mb2,unassigned][mb2,unassigned][mb2,unassigned]
A140A140A1A3 [mb2,unassigned][mb2,unassigned][mb2]
A140A140FE40 [mb2,unassigned][mb2,unassigned][mb2]
A140A1A340 [mb2,unassigned][mb2][ascii,tail]
A140A1A3A140 [mb2,unassigned][mb2][mb2,unassigned]
A140A1A3A1A3 [mb2,unassigned][mb2][mb2]
A140A1A3FE40 [mb2,unassigned][mb2][mb2]
A140FE4040 [mb2,unassigned][mb2][ascii,tail]
A140FE40A140 [mb2,unassigned][mb2][mb2,unassigned]
A140FE40A1A3 [mb2,unassigned][mb2][mb2]
A140FE40FE40 [mb2,unassigned][mb2][mb2]
A1A34040 [mb2][ascii,tail][ascii,tail]
A1A340A140 [mb2][ascii,tail][mb2,unassigned]
A1A340A1A3 [mb2][ascii,tail][mb2]
A1A340FE40 [mb2][ascii,tail][mb2]
A1A3A14040 [mb2][mb2,unassigned][ascii,tail]
A1A3A140A140 [mb2][mb2,unassigned][mb2,unassigned]
A1A3A140A1A3 [mb2][mb2,unassigned][mb2]
A1A3A140FE40 [mb2][mb2,unassigned][mb2]
A1A3A1A340 [mb2][mb2][ascii,tail]
A1A3A1A3A140 [mb2][mb2][mb2,unassigned]
A1A3A1A3A1A3 [mb2][mb2][mb2]
A1A3A1A3FE40 [mb2][mb2][mb2]
A1A3FE4040 [mb2][mb2][ascii,tail]
A1A3FE40A140 [mb2][mb2][mb2,unassigned]
A1A3FE40A1A3 [mb2][mb2][mb2]
A1A3FE40FE40 [mb2][mb2][mb2]
FE404040 [mb2][ascii,tail][ascii,tail]
FE4040A140 [mb2][ascii,tail][mb2,unassigned]
FE4040A1A3 [mb2][ascii,tail][mb2]
FE4040FE40 [mb2][ascii,tail][mb2]
FE40A14040 [mb2][mb2,unassigned][ascii,tail]
FE40A140A140 [mb2][mb2,unassigned][mb2,unassigned]
FE40A140A1A3 [mb2][mb2,unassigned][mb2]
FE40A140FE40 [mb2][mb2,unassigned][mb2]
FE40A1A340 [mb2][mb2][ascii,tail]
FE40A1A3A140 [mb2][mb2][mb2,unassigned]
FE40A1A3A1A3 [mb2][mb2][mb2]
FE40A1A3FE40 [mb2][mb2][mb2]
FE40FE4040 [mb2][mb2][ascii,tail]
FE40FE40A140 [mb2][mb2][mb2,unassigned]
FE40FE40A1A3 [mb2][mb2][mb2]
FE40FE40FE40 [mb2][mb2][mb2]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that start with a tail or a bad byte,
# or have a bad byte, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x80\xFF' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x81\xFF' for column 'c' at row 3
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 4
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 5
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 6
Warning 1366 Incorrect string value: '\xFF@' for column 'c' at row 7
Warning 1366 Incorrect string value: '\xFF\x80' for column 'c' at row 8
Warning 1366 Incorrect string value: '\xFF\x81' for column 'c' at row 9
Warning 1366 Incorrect string value: '\xFF\xA1@' for column 'c' at row 10
Warning 1366 Incorrect string value: '\xFF\xA1\xA3' for column 'c' at row 11
Warning 1366 Incorrect string value: '\xFF\xFE@' for column 'c' at row 12
Warning 1366 Incorrect string value: '\xFF\xFF' for column 'c' at row 13
Warning 1366 Incorrect string value: '\x80@@' for column 'c' at row 14
Warning 1366 Incorrect string value: '\x80@\x80' for column 'c' at row 15
Warning 1366 Incorrect string value: '\x80@\x81' for column 'c' at row 16
Warning 1366 Incorrect string value: '\x80@\xA1@' for column 'c' at row 17
Warning 1366 Incorrect string value: '\x80@\xA1\xA3' for column 'c' at row 18
Warning 1366 Incorrect string value: '\x80@\xFE@' for column 'c' at row 19
Warning 1366 Incorrect string value: '\x80@\xFF' for column 'c' at row 20
Warning 1366 Incorrect string value: '\x80\x80@' for column 'c' at row 21
Warning 1366 Incorrect string value: '\x80\x80\x80' for column 'c' at row 22
Warning 1366 Incorrect string value: '\x80\x80\x81' for column 'c' at row 23
Warning 1366 Incorrect string value: '\x80\x80\xA1@' for column 'c' at row 24
Warning 1366 Incorrect string value: '\x80\x80\xA1\xA3' for column 'c' at row 25
Warning 1366 Incorrect string value: '\x80\x80\xFE@' for column 'c' at row 26
Warning 1366 Incorrect string value: '\x80\x80\xFF' for column 'c' at row 27
Warning 1366 Incorrect string value: '\x80\x81@' for column 'c' at row 28
Warning 1366 Incorrect string value: '\x80\x81\x80' for column 'c' at row 29
Warning 1366 Incorrect string value: '\x80\x81\x81' for column 'c' at row 30
Warning 1366 Incorrect string value: '\x80\x81\xA1@' for column 'c' at row 31
Warning 1366 Incorrect string value: '\x80\x81\xA1\xA3' for column 'c' at row 32
Warning 1366 Incorrect string value: '\x80\x81\xFE@' for column 'c' at row 33
Warning 1366 Incorrect string value: '\x80\x81\xFF' for column 'c' at row 34
Warning 1366 Incorrect string value: '\x80\xA1@@' for column 'c' at row 35
Warning 1366 Incorrect string value: '\x80\xA1@\x80' for column 'c' at row 36
Warning 1366 Incorrect string value: '\x80\xA1@\x81' for column 'c' at row 37
Warning 1366 Incorrect string value: '\x80\xA1@\xA1@' for column 'c' at row 38
Warning 1366 Incorrect string value: '\x80\xA1@\xA1\xA3' for column 'c' at row 39
Warning 1366 Incorrect string value: '\x80\xA1@\xFE@' for column 'c' at row 40
Warning 1366 Incorrect string value: '\x80\xA1@\xFF' for column 'c' at row 41
Warning 1366 Incorrect string value: '\x80\xA1\xA3@' for column 'c' at row 42
Warning 1366 Incorrect string value: '\x80\xA1\xA3\x80' for column 'c' at row 43
Warning 1366 Incorrect string value: '\x80\xA1\xA3\x81' for column 'c' at row 44
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xA1@' for column 'c' at row 45
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xA1\xA3' for column 'c' at row 46
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xFE@' for column 'c' at row 47
Warning 1366 Incorrect string value: '\x80\xA1\xA3\xFF' for column 'c' at row 48
Warning 1366 Incorrect string value: '\x80\xFE@@' for column 'c' at row 49
Warning 1366 Incorrect string value: '\x80\xFE@\x80' for column 'c' at row 50
Warning 1366 Incorrect string value: '\x80\xFE@\x81' for column 'c' at row 51
Warning 1366 Incorrect string value: '\x80\xFE@\xA1@' for column 'c' at row 52
Warning 1366 Incorrect string value: '\x80\xFE@\xA1\xA3' for column 'c' at row 53
Warning 1366 Incorrect string value: '\x80\xFE@\xFE@' for column 'c' at row 54
Warning 1366 Incorrect string value: '\x80\xFE@\xFF' for column 'c' at row 55
Warning 1366 Incorrect string value: '\x80\xFF@' for column 'c' at row 56
Warning 1366 Incorrect string value: '\x80\xFF\x80' for column 'c' at row 57
Warning 1366 Incorrect string value: '\x80\xFF\x81' for column 'c' at row 58
Warning 1366 Incorrect string value: '\x80\xFF\xA1@' for column 'c' at row 59
Warning 1366 Incorrect string value: '\x80\xFF\xA1\xA3' for column 'c' at row 60
Warning 1366 Incorrect string value: '\x80\xFF\xFE@' for column 'c' at row 61
Warning 1366 Incorrect string value: '\x80\xFF\xFF' for column 'c' at row 62
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 63
Warning 1366 Incorrect string value: '\xFF' for column 'c' at row 64
SELECT COUNT(*) FROM t3;
COUNT(*)
163
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
40403F 4040FF [ascii,tail][ascii,tail][bad]
403F3F 4080FF [ascii,tail][tail][bad]
403F3F 4081FF [ascii,tail][head,tail][bad]
40A1403F 40A140FF [ascii,tail][mb2,unassigned][bad]
40A1A33F 40A1A3FF [ascii,tail][mb2][bad]
40FE403F 40FE40FF [ascii,tail][mb2][bad]
403F40 40FF40 [ascii,tail][bad][ascii,tail]
403F3F 40FF80 [ascii,tail][bad][tail]
403F3F 40FF81 [ascii,tail][bad][head,tail]
403FA140 40FFA140 [ascii,tail][bad][mb2,unassigned]
403FA1A3 40FFA1A3 [ascii,tail][bad][mb2]
403FFE40 40FFFE40 [ascii,tail][bad][mb2]
403F3F 40FFFF [ascii,tail][bad][bad]
3F4040 804040 [tail][ascii,tail][ascii,tail]
3F403F 804080 [tail][ascii,tail][tail]
3F403F 804081 [tail][ascii,tail][head,tail]
3F40A140 8040A140 [tail][ascii,tail][mb2,unassigned]
3F40A1A3 8040A1A3 [tail][ascii,tail][mb2]
3F40FE40 8040FE40 [tail][ascii,tail][mb2]
3F403F 8040FF [tail][ascii,tail][bad]
3F3F40 808040 [tail][tail][ascii,tail]
3F3F3F 808080 [tail][tail][tail]
3F3F3F 808081 [tail][tail][head,tail]
3F3FA140 8080A140 [tail][tail][mb2,unassigned]
3F3FA1A3 8080A1A3 [tail][tail][mb2]
3F3FFE40 8080FE40 [tail][tail][mb2]
3F3F3F 8080FF [tail][tail][bad]
3F8140 808140 [tail][head,tail][ascii,tail]
3F8180 808180 [tail][head,tail][tail]
3F8181 808181 [tail][head,tail][head,tail]
3F81A140 8081A140 [tail][head,tail][mb2,unassigned]
3F81A13F 8081A1A3 [tail][head,tail][mb2]
3F81FE40 8081FE40 [tail][head,tail][mb2]
3F3F3F 8081FF [tail][head,tail][bad]
3FA14040 80A14040 [tail][mb2,unassigned][ascii,tail]
3FA1403F 80A14080 [tail][mb2,unassigned][tail]
3FA1403F 80A14081 [tail][mb2,unassigned][head,tail]
3FA140A140 80A140A140 [tail][mb2,unassigned][mb2,unassigned]
3FA140A1A3 80A140A1A3 [tail][mb2,unassigned][mb2]
3FA140FE40 80A140FE40 [tail][mb2,unassigned][mb2]
3FA1403F 80A140FF [tail][mb2,unassigned][bad]
3FA1A340 80A1A340 [tail][mb2][ascii,tail]
3FA1A33F 80A1A380 [tail][mb2][tail]
3FA1A33F 80A1A381 [tail][mb2][head,tail]
3FA1A3A140 80A1A3A140 [tail][mb2][mb2,unassigned]
3FA1A3A1A3 80A1A3A1A3 [tail][mb2][mb2]
3FA1A3FE40 80A1A3FE40 [tail][mb2][mb2]
3FA1A33F 80A1A3FF [tail][mb2][bad]
3FFE4040 80FE4040 [tail][mb2][ascii,tail]
3FFE403F 80FE4080 [tail][mb2][tail]
3FFE403F 80FE4081 [tail][mb2][head,tail]
3FFE40A140 80FE40A140 [tail][mb2][mb2,unassigned]
3FFE40A1A3 80FE40A1A3 [tail][mb2][mb2]
3FFE40FE40 80FE40FE40 [tail][mb2][mb2]
3FFE403F 80FE40FF [tail][mb2][bad]
3F3F40 80FF40 [tail][bad][ascii,tail]
3F3F3F 80FF80 [tail][bad][tail]
3F3F3F 80FF81 [tail][bad][head,tail]
3F3FA140 80FFA140 [tail][bad][mb2,unassigned]
3F3FA1A3 80FFA1A3 [tail][bad][mb2]
3F3FFE40 80FFFE40 [tail][bad][mb2]
3F3F3F 80FFFF [tail][bad][bad]
81403F 8140FF [head,tail][ascii,tail][bad]
81803F 8180FF [head,tail][tail][bad]
81813F 8181FF [head,tail][head,tail][bad]
81A1403F 81A140FF [head,tail][mb2,unassigned][bad]
81A13F3F 81A1A3FF [head,tail][mb2][bad]
81FE403F 81FE40FF [head,tail][mb2][bad]
3F3F40 81FF40 [head,tail][bad][ascii,tail]
3F3F3F 81FF80 [head,tail][bad][tail]
3F3F3F 81FF81 [head,tail][bad][head,tail]
3F3FA140 81FFA140 [head,tail][bad][mb2,unassigned]
3F3FA1A3 81FFA1A3 [head,tail][bad][mb2]
3F3FFE40 81FFFE40 [head,tail][bad][mb2]
3F3F3F 81FFFF [head,tail][bad][bad]
A140403F A14040FF [mb2,unassigned][ascii,tail][bad]
A1403F3F A14080FF [mb2,unassigned][tail][bad]
A1403F3F A14081FF [mb2,unassigned][head,tail][bad]
A140A1403F A140A140FF [mb2,unassigned][mb2,unassigned][bad]
A140A1A33F A140A1A3FF [mb2,unassigned][mb2][bad]
A140FE403F A140FE40FF [mb2,unassigned][mb2][bad]
A1403F40 A140FF40 [mb2,unassigned][bad][ascii,tail]
A1403F3F A140FF80 [mb2,unassigned][bad][tail]
A1403F3F A140FF81 [mb2,unassigned][bad][head,tail]
A1403FA140 A140FFA140 [mb2,unassigned][bad][mb2,unassigned]
A1403FA1A3 A140FFA1A3 [mb2,unassigned][bad][mb2]
A1403FFE40 A140FFFE40 [mb2,unassigned][bad][mb2]
A1403F3F A140FFFF [mb2,unassigned][bad][bad]
A1A3403F A1A340FF [mb2][ascii,tail][bad]
A1A33F3F A1A380FF [mb2][tail][bad]
A1A33F3F A1A381FF [mb2][head,tail][bad]
A1A3A1403F A1A3A140FF [mb2][mb2,unassigned][bad]
A1A3A1A33F A1A3A1A3FF [mb2][mb2][bad]
A1A3FE403F A1A3FE40FF [mb2][mb2][bad]
A1A33F40 A1A3FF40 [mb2][bad][ascii,tail]
A1A33F3F A1A3FF80 [mb2][bad][tail]
A1A33F3F A1A3FF81 [mb2][bad][head,tail]
A1A33FA140 A1A3FFA140 [mb2][bad][mb2,unassigned]
A1A33FA1A3 A1A3FFA1A3 [mb2][bad][mb2]
A1A33FFE40 A1A3FFFE40 [mb2][bad][mb2]
A1A33F3F A1A3FFFF [mb2][bad][bad]
FE40403F FE4040FF [mb2][ascii,tail][bad]
FE403F3F FE4080FF [mb2][tail][bad]
FE403F3F FE4081FF [mb2][head,tail][bad]
FE40A1403F FE40A140FF [mb2][mb2,unassigned][bad]
FE40A1A33F FE40A1A3FF [mb2][mb2][bad]
FE40FE403F FE40FE40FF [mb2][mb2][bad]
FE403F40 FE40FF40 [mb2][bad][ascii,tail]
FE403F3F FE40FF80 [mb2][bad][tail]
FE403F3F FE40FF81 [mb2][bad][head,tail]
FE403FA140 FE40FFA140 [mb2][bad][mb2,unassigned]
FE403FA1A3 FE40FFA1A3 [mb2][bad][mb2]
FE403FFE40 FE40FFFE40 [mb2][bad][mb2]
FE403F3F FE40FFFF [mb2][bad][bad]
3F4040 FF4040 [bad][ascii,tail][ascii,tail]
3F403F FF4080 [bad][ascii,tail][tail]
3F403F FF4081 [bad][ascii,tail][head,tail]
3F40A140 FF40A140 [bad][ascii,tail][mb2,unassigned]
3F40A1A3 FF40A1A3 [bad][ascii,tail][mb2]
3F40FE40 FF40FE40 [bad][ascii,tail][mb2]
3F403F FF40FF [bad][ascii,tail][bad]
3F3F40 FF8040 [bad][tail][ascii,tail]
3F3F3F FF8080 [bad][tail][tail]
3F3F3F FF8081 [bad][tail][head,tail]
3F3FA140 FF80A140 [bad][tail][mb2,unassigned]
3F3FA1A3 FF80A1A3 [bad][tail][mb2]
3F3FFE40 FF80FE40 [bad][tail][mb2]
3F3F3F FF80FF [bad][tail][bad]
3F8140 FF8140 [bad][head,tail][ascii,tail]
3F8180 FF8180 [bad][head,tail][tail]
3F8181 FF8181 [bad][head,tail][head,tail]
3F81A140 FF81A140 [bad][head,tail][mb2,unassigned]
3F81A13F FF81A1A3 [bad][head,tail][mb2]
3F81FE40 FF81FE40 [bad][head,tail][mb2]
3F3F3F FF81FF [bad][head,tail][bad]
3FA14040 FFA14040 [bad][mb2,unassigned][ascii,tail]
3FA1403F FFA14080 [bad][mb2,unassigned][tail]
3FA1403F FFA14081 [bad][mb2,unassigned][head,tail]
3FA140A140 FFA140A140 [bad][mb2,unassigned][mb2,unassigned]
3FA140A1A3 FFA140A1A3 [bad][mb2,unassigned][mb2]
3FA140FE40 FFA140FE40 [bad][mb2,unassigned][mb2]
3FA1403F FFA140FF [bad][mb2,unassigned][bad]
3FA1A340 FFA1A340 [bad][mb2][ascii,tail]
3FA1A33F FFA1A380 [bad][mb2][tail]
3FA1A33F FFA1A381 [bad][mb2][head,tail]
3FA1A3A140 FFA1A3A140 [bad][mb2][mb2,unassigned]
3FA1A3A1A3 FFA1A3A1A3 [bad][mb2][mb2]
3FA1A3FE40 FFA1A3FE40 [bad][mb2][mb2]
3FA1A33F FFA1A3FF [bad][mb2][bad]
3FFE4040 FFFE4040 [bad][mb2][ascii,tail]
3FFE403F FFFE4080 [bad][mb2][tail]
3FFE403F FFFE4081 [bad][mb2][head,tail]
3FFE40A140 FFFE40A140 [bad][mb2][mb2,unassigned]
3FFE40A1A3 FFFE40A1A3 [bad][mb2][mb2]
3FFE40FE40 FFFE40FE40 [bad][mb2][mb2]
3FFE403F FFFE40FF [bad][mb2][bad]
3F3F40 FFFF40 [bad][bad][ascii,tail]
3F3F3F FFFF80 [bad][bad][tail]
3F3F3F FFFF81 [bad][bad][head,tail]
3F3FA140 FFFFA140 [bad][bad][mb2,unassigned]
3F3FA1A3 FFFFA1A3 [bad][bad][mb2]
3F3FFE40 FFFFFE40 [bad][bad][mb2]
3F3F3F FFFFFF [bad][bad][bad]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that start with an ASCII or an MB2 character,
# followed by a pure non-ASCII tail, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
AND type2='tail'
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 4
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 8
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 9
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 10
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 11
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 12
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 13
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 14
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 15
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 16
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 17
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 18
Warning 1366 Incorrect string value: '\x80@' for column 'c' at row 19
Warning 1366 Incorrect string value: '\x80\x80' for column 'c' at row 20
Warning 1366 Incorrect string value: '\x80\x81' for column 'c' at row 21
Warning 1366 Incorrect string value: '\x80\xA1@' for column 'c' at row 22
Warning 1366 Incorrect string value: '\x80\xA1\xA3' for column 'c' at row 23
Warning 1366 Incorrect string value: '\x80\xFE@' for column 'c' at row 24
SELECT COUNT(*) FROM t3;
COUNT(*)
24
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
403F40 408040 [ascii,tail][tail][ascii,tail]
403F3F 408080 [ascii,tail][tail][tail]
403F3F 408081 [ascii,tail][tail][head,tail]
403FA140 4080A140 [ascii,tail][tail][mb2,unassigned]
403FA1A3 4080A1A3 [ascii,tail][tail][mb2]
403FFE40 4080FE40 [ascii,tail][tail][mb2]
A1403F40 A1408040 [mb2,unassigned][tail][ascii,tail]
A1403F3F A1408080 [mb2,unassigned][tail][tail]
A1403F3F A1408081 [mb2,unassigned][tail][head,tail]
A1403FA140 A14080A140 [mb2,unassigned][tail][mb2,unassigned]
A1403FA1A3 A14080A1A3 [mb2,unassigned][tail][mb2]
A1403FFE40 A14080FE40 [mb2,unassigned][tail][mb2]
A1A33F40 A1A38040 [mb2][tail][ascii,tail]
A1A33F3F A1A38080 [mb2][tail][tail]
A1A33F3F A1A38081 [mb2][tail][head,tail]
A1A33FA140 A1A380A140 [mb2][tail][mb2,unassigned]
A1A33FA1A3 A1A380A1A3 [mb2][tail][mb2]
A1A33FFE40 A1A380FE40 [mb2][tail][mb2]
FE403F40 FE408040 [mb2][tail][ascii,tail]
FE403F3F FE408080 [mb2][tail][tail]
FE403F3F FE408081 [mb2][tail][head,tail]
FE403FA140 FE4080A140 [mb2][tail][mb2,unassigned]
FE403FA1A3 FE4080A1A3 [mb2][tail][mb2]
FE403FFE40 FE4080FE40 [mb2][tail][mb2]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that consist of two ASCII or MB2 characters,
# followed by a pure non-ASCII tail, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
(FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
type3='tail'
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 4
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 8
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 9
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 10
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 11
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 12
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 13
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 14
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 15
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 16
SELECT COUNT(*) FROM t3;
COUNT(*)
16
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
40403F 404080 [ascii,tail][ascii,tail][tail]
40A1403F 40A14080 [ascii,tail][mb2,unassigned][tail]
40A1A33F 40A1A380 [ascii,tail][mb2][tail]
40FE403F 40FE4080 [ascii,tail][mb2][tail]
A140403F A1404080 [mb2,unassigned][ascii,tail][tail]
A140A1403F A140A14080 [mb2,unassigned][mb2,unassigned][tail]
A140A1A33F A140A1A380 [mb2,unassigned][mb2][tail]
A140FE403F A140FE4080 [mb2,unassigned][mb2][tail]
A1A3403F A1A34080 [mb2][ascii,tail][tail]
A1A3A1403F A1A3A14080 [mb2][mb2,unassigned][tail]
A1A3A1A33F A1A3A1A380 [mb2][mb2][tail]
A1A3FE403F A1A3FE4080 [mb2][mb2][tail]
FE40403F FE404080 [mb2][ascii,tail][tail]
FE40A1403F FE40A14080 [mb2][mb2,unassigned][tail]
FE40A1A33F FE40A1A380 [mb2][mb2][tail]
FE40FE403F FE40FE4080 [mb2][mb2][tail]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that consist of two MB2 characters,
# followed by a non-ASCII head or tail, all should be fixed.
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
AND NOT FIND_IN_SET('ascii',type3)
AND NOT FIND_IN_SET('mb2',type3)
ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 1
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 2
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 4
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 8
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 9
SELECT COUNT(*) FROM t3;
COUNT(*)
9
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
A140A1403F A140A14081 [mb2,unassigned][mb2,unassigned][head,tail]
A140A1A33F A140A1A381 [mb2,unassigned][mb2][head,tail]
A140FE403F A140FE4081 [mb2,unassigned][mb2][head,tail]
A1A3A1403F A1A3A14081 [mb2][mb2,unassigned][head,tail]
A1A3A1A33F A1A3A1A381 [mb2][mb2][head,tail]
A1A3FE403F A1A3FE4081 [mb2][mb2][head,tail]
FE40A1403F FE40A14081 [mb2][mb2,unassigned][head,tail]
FE40A1A33F FE40A1A381 [mb2][mb2][head,tail]
FE40FE403F FE40FE4081 [mb2][mb2][head,tail]
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that consist of head + tail + MB2 should go without warnings
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE FIND_IN_SET('head',type1)
AND FIND_IN_SET('tail',type2)
AND FIND_IN_SET('mb2',type3)
ORDER BY b;
SELECT COUNT(*) FROM t3;
COUNT(*)
9
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
8140A140 [head,tail][ascii,tail][mb2,unassigned]
8140A1A3 [head,tail][ascii,tail][mb2]
8140FE40 [head,tail][ascii,tail][mb2]
8180A140 [head,tail][tail][mb2,unassigned]
8180A1A3 [head,tail][tail][mb2]
8180FE40 [head,tail][tail][mb2]
8181A140 [head,tail][head,tail][mb2,unassigned]
8181A1A3 [head,tail][head,tail][mb2]
8181FE40 [head,tail][head,tail][mb2]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#
# Sequences that consist of (ascii or mb2) + head + tail should go without warnings
#
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
AND FIND_IN_SET('head',type2)
AND FIND_IN_SET('tail',type3)
ORDER BY b;
SELECT COUNT(*) FROM t3;
COUNT(*)
12
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
408140 [ascii,tail][head,tail][ascii,tail]
408180 [ascii,tail][head,tail][tail]
408181 [ascii,tail][head,tail][head,tail]
A1408140 [mb2,unassigned][head,tail][ascii,tail]
A1408180 [mb2,unassigned][head,tail][tail]
A1408181 [mb2,unassigned][head,tail][head,tail]
A1A38140 [mb2][head,tail][ascii,tail]
A1A38180 [mb2][head,tail][tail]
A1A38181 [mb2][head,tail][head,tail]
FE408140 [mb2][head,tail][ascii,tail]
FE408180 [mb2][head,tail][tail]
FE408181 [mb2][head,tail][head,tail]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
Warnings:
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 1
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 3
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 5
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 6
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 7
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 9
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 10
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 12
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 13
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 15
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 16
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 18
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 19
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 27
Warning 1366 Incorrect string value: '\x80' for column 'c' at row 30
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 31
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 35
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 37
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 39
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 41
Warning 1366 Incorrect string value: '\x81' for column 'c' at row 43
Warning 1366 Incorrect string value: '\xA3' for column 'c' at row 45
SELECT COUNT(*) FROM t3;
COUNT(*)
46
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
HEX(c) comment
4081A140 [ascii,tail][head,tail][mb2,unassigned]
4081FE40 [ascii,tail][head,tail][mb2]
814040 [head,tail][ascii,tail][ascii,tail]
818040 [head,tail][tail][ascii,tail]
818140 [head,tail][head,tail][ascii,tail]
81A14040 [head,tail][mb2,unassigned][ascii,tail]
81A140A140 [head,tail][mb2,unassigned][mb2,unassigned]
81A140A1A3 [head,tail][mb2,unassigned][mb2]
81A140FE40 [head,tail][mb2,unassigned][mb2]
81A1A340 [head,tail][mb2][ascii,tail]
81A1A380 [head,tail][mb2][tail]
81A1A381 [head,tail][mb2][head,tail]
81A1A3A140 [head,tail][mb2][mb2,unassigned]
81A1A3FE40 [head,tail][mb2][mb2]
81FE4040 [head,tail][mb2][ascii,tail]
81FE40A140 [head,tail][mb2][mb2,unassigned]
81FE40A1A3 [head,tail][mb2][mb2]
81FE40FE40 [head,tail][mb2][mb2]
A14081A140 [mb2,unassigned][head,tail][mb2,unassigned]
A14081FE40 [mb2,unassigned][head,tail][mb2]
A1A381A140 [mb2][head,tail][mb2,unassigned]
A1A381FE40 [mb2][head,tail][mb2]
FE4081A140 [mb2][head,tail][mb2,unassigned]
FE4081FE40 [mb2][head,tail][mb2]
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
HEX(c) HEX(b) comment
40403F 404081 [ascii,tail][ascii,tail][head,tail]
4081A13F 4081A1A3 [ascii,tail][head,tail][mb2]
40A1403F 40A14081 [ascii,tail][mb2,unassigned][head,tail]
40A1A33F 40A1A381 [ascii,tail][mb2][head,tail]
40FE403F 40FE4081 [ascii,tail][mb2][head,tail]
81403F 814080 [head,tail][ascii,tail][tail]
81403F 814081 [head,tail][ascii,tail][head,tail]
81803F 818080 [head,tail][tail][tail]
81803F 818081 [head,tail][tail][head,tail]
81813F 818180 [head,tail][head,tail][tail]
81813F 818181 [head,tail][head,tail][head,tail]
81A1403F 81A14080 [head,tail][mb2,unassigned][tail]
81A1403F 81A14081 [head,tail][mb2,unassigned][head,tail]
81A1A3A13F 81A1A3A1A3 [head,tail][mb2][mb2]
81FE403F 81FE4080 [head,tail][mb2][tail]
81FE403F 81FE4081 [head,tail][mb2][head,tail]
A140403F A1404081 [mb2,unassigned][ascii,tail][head,tail]
A14081A13F A14081A1A3 [mb2,unassigned][head,tail][mb2]
A1A3403F A1A34081 [mb2][ascii,tail][head,tail]
A1A381A13F A1A381A1A3 [mb2][head,tail][mb2]
FE40403F FE404081 [mb2][ascii,tail][head,tail]
FE4081A13F FE4081A1A3 [mb2][head,tail][mb2]
DROP TABLE t3;
DROP TABLE t2;
DROP TABLE t1;
#
# END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
#
#
# End of 10.1 tests
#

View file

@ -477,7 +477,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1;
COUNT(*)
14623
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
COUNT(*)
63
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;

View file

@ -2626,7 +2626,7 @@ Warning 1366 Incorrect string value: '\x80_' for column 'a' at row 64
SELECT COUNT(*) FROM t1;
COUNT(*)
44671
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
COUNT(*)
17735
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
@ -25938,7 +25938,7 @@ CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET ujis);
INSERT INTO t1 VALUES (0x8EA0);
SELECT HEX(a), CHAR_LENGTH(a) FROM t1;
HEX(a) CHAR_LENGTH(a)
0
3F3F 2
DROP TABLE t1;
SELECT _ujis 0x8EA0;
ERROR HY000: Invalid ujis character string: '8EA0'

View file

@ -225,7 +225,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 varchar(10) character set utf8);
insert into t1 values (0x41FF);
@ -233,7 +233,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 text character set utf8);
insert into t1 values (0x41FF);
@ -241,7 +241,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (a text character set utf8, primary key(a(371)));
ERROR 42000: Specified key was too long; max key length is 1000 bytes

View file

@ -225,7 +225,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 varchar(10) character set utf8mb4);
insert into t1 values (0x41FF);
@ -233,7 +233,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 text character set utf8mb4);
insert into t1 values (0x41FF);
@ -241,7 +241,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (a text character set utf8mb4, primary key(a(371)));
ERROR 42000: Specified key was too long; max key length is 1000 bytes
@ -2327,7 +2327,7 @@ select hex(utf8mb4) from t1;
hex(utf8mb4)
F0908080
F0BFBFBF
3F
delete from t1;
Testing [F2..F3][80..BF][80..BF][80..BF]
insert into t1 values (0xF2808080);
@ -2347,7 +2347,7 @@ select hex(utf8mb4) from t1;
hex(utf8mb4)
F4808080
F48F8080
3F
drop table t1;
#
# Check strnxfrm() with odd length
@ -2472,45 +2472,45 @@ F3A087AFEA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
EA9DA8
3F3F3F3FEA9DA8
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
F09D8480EA9DA8
F09D8480EFB9AB
F09D8480
F09D84803F3F3F3F
F09D849EEA9DA8
F09D849EEFB9AB
F09D849E
F09D849E3F3F3F3F
F09D859EEA9DA8
F09D859EEFB9AB
F09D859E
F09D859E3F3F3F3F
F09D878FEA9DA8
F09D878FEFB9AB
F09D878F
F09D878F3F3F3F3F
F09D9C9FEA9DA8
F09D9C9FEFB9AB
F09D9C9F
F09D9C9F3F3F3F3F
F09D9E9FEA9DA8
F09D9E9FEFB9AB
F09D9E9F
F09D9E9F3F3F3F3F
F48FBFBFEA9DA8
F48FBFBFEFB9AB
F48FBFBF
F48FBFBF3F3F3F3F
F3A087AFEA9DA8
F3A087AFEFB9AB
F3A087AF
F3A087AF3F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
EA9DA8
EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
3F3F3F3FEA9DA8
3F3F3F3FEFB9AB
3F3F3F3F3F3F3F3F
SELECT count(*) FROM t1, t2
WHERE t1.utf8mb4_encoding > t2.utf8mb3_encoding;
count(*)
@ -2547,7 +2547,7 @@ u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
119070 3F3F3F3F3F3F3F3F3F3F ??????????
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB ﹫?????﹫﹫?﹫
119070 3F3F3F3F3F3F3F3F3F3F ??????????
1114111
1114111 3F3F3F3F ????
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb4;
SHOW CREATE TABLE t2;
Table Create Table
@ -2559,7 +2559,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
42856 EA9DA8
65131 EFB9AB
1114111
1114111 3F3F3F3F
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
SHOW CREATE TABLE t2;
Table Create Table
@ -2571,7 +2571,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
42856 EA9DA8
65131 EFB9AB
1114111
1114111 3F3F3F3F
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
SHOW CREATE TABLE t1;
Table Create Table
@ -2592,7 +2592,7 @@ u_decimal hex(utf8mb4_encoding)
119070 3F3F3F3F3F3F3F3F3F3F
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
119070 3F3F3F3F3F3F3F3F3F3F
1114111
1114111 3F3F3F3F
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb4;
SHOW CREATE TABLE t1;
Table Create Table
@ -2613,7 +2613,7 @@ u_decimal hex(utf8mb4_encoding)
119070 3F3F3F3F3F3F3F3F3F3F
65131 EFB9AB3F3F3F3F3FEFB9ABEFB9AB3FEFB9AB
119070 3F3F3F3F3F3F3F3F3F3F
1114111
1114111 3F3F3F3F
ALTER TABLE t2 MODIFY utf8mb3_encoding VARCHAR(10) CHARACTER SET utf8mb4;
SHOW CREATE TABLE t2;
Table Create Table
@ -2625,7 +2625,7 @@ SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
42856 EA9DA8
65131 EFB9AB
1114111
1114111 3F3F3F3F
DROP TABLE IF EXISTS t3;
CREATE TABLE t3 (
u_decimal int NOT NULL,
@ -3306,5 +3306,53 @@ DFFFFFDFFFFF9CFFFF9DFFFF9EFFFF
# End of 5.6 tests
#
#
# Start of 10.0 tests
#
#
# MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
#
#
# This test sets session character set to 3-byte utf8,
# but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
# It should be replaced to four question marks: '????' in both columns
# (i.e. four unknown bytes are replaced to four question marks),
# then the rest of the string should be stored, so we get 'a ???? b'.
#
SET NAMES utf8;
CREATE TABLE t1 (
a VARCHAR(32) CHARACTER SET utf8mb4,
b VARCHAR(32) CHARACTER SET utf8
);
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'a' at row 1
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
SELECT * FROM t1;
a b
a ???? b a ???? b
DROP TABLE t1;
#
# This test sets session character set to 4-byte utf8,
# then normally sends a 4-byte sequence.
# It should be stored AS IS into the utf8mb4 column (a),
# and should be replaced to a single question mark in the utf8 column (b)
# (i.e. one character that cannot be converted is replaced to one question mark).
#
SET NAMES utf8mb4;
CREATE TABLE t1 (
a VARCHAR(32) CHARACTER SET utf8mb4,
b VARCHAR(32) CHARACTER SET utf8
);
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
Warnings:
Warning 1366 Incorrect string value: '\xF0\x9F\x98\x81 b' for column 'b' at row 1
SELECT * FROM t1;
a b
a 😁 b a ? b
DROP TABLE t1;
#
# End of 10.0 tests
#
#
# End of tests
#

View file

@ -225,7 +225,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 varchar(10) character set utf8mb4) engine heap;
insert into t1 values (0x41FF);
@ -233,7 +233,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
CREATE TABLE t1 ( a varchar(10) ) CHARACTER SET utf8mb4 ENGINE heap;
INSERT INTO t1 VALUES ( 'test' );
@ -2157,7 +2157,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F0908080
F0BFBFBF
delete from t1;
@ -2177,7 +2177,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F4808080
F48F8080
drop table t1;
@ -2274,7 +2274,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
EA9DA8
3F3F3F3FEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
F09D8480EA9DA8
F09D849EEA9DA8
@ -2288,40 +2288,40 @@ F3A087AFEA9DA8
F48FBFBFEA9DA8
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
EA9DA8
EFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
3F3F3F3F3F3F3F3F
3F3F3F3FEA9DA8
3F3F3F3FEFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
F09D8480
F09D84803F3F3F3F
F09D8480EA9DA8
F09D8480EFB9AB
F09D849E
F09D849E3F3F3F3F
F09D849EEA9DA8
F09D849EEFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D859E
F09D859E3F3F3F3F
F09D859EEA9DA8
F09D859EEFB9AB
F09D878F
F09D878F3F3F3F3F
F09D878FEA9DA8
F09D878FEFB9AB
F09D9C9F
F09D9C9F3F3F3F3F
F09D9C9FEA9DA8
F09D9C9FEFB9AB
F09D9E9F
F09D9E9F3F3F3F3F
F09D9E9FEA9DA8
F09D9E9FEFB9AB
F3A087AF
F3A087AF3F3F3F3F
F3A087AFEA9DA8
F3A087AFEFB9AB
F48FBFBF
F48FBFBF3F3F3F3F
F48FBFBFEA9DA8
F48FBFBFEFB9AB
SELECT count(*) FROM t1, t2
@ -2337,8 +2337,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
1114111
1114111 3F ?
1114111 3F3F3F3F ????
119040 3F ?
119070 3F ?
119070 3F3F3F3F3F3F3F3F3F3F ??????????
@ -2358,7 +2358,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8mb4
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@ -2370,7 +2370,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@ -2382,8 +2382,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2403,8 +2403,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2424,7 +2424,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MEMORY DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
DROP TABLE IF EXISTS t3;

View file

@ -225,7 +225,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 varchar(10) character set utf8mb4) engine InnoDB;
insert into t1 values (0x41FF);
@ -233,7 +233,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 text character set utf8mb4) engine InnoDB;
insert into t1 values (0x41FF);
@ -241,7 +241,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (a text character set utf8mb4, primary key(a(371))) engine InnoDB;
ERROR 42000: Specified key was too long; max key length is 767 bytes
@ -2285,7 +2285,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F0908080
F0BFBFBF
delete from t1;
@ -2305,7 +2305,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F4808080
F48F8080
drop table t1;
@ -2421,7 +2421,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
EA9DA8
3F3F3F3FEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
F09D8480EA9DA8
F09D849EEA9DA8
@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
F48FBFBFEA9DA8
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
EA9DA8
EFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
3F3F3F3F3F3F3F3F
3F3F3F3FEA9DA8
3F3F3F3FEFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
F09D8480
F09D84803F3F3F3F
F09D8480EA9DA8
F09D8480EFB9AB
F09D849E
F09D849E3F3F3F3F
F09D849EEA9DA8
F09D849EEFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D859E
F09D859E3F3F3F3F
F09D859EEA9DA8
F09D859EEFB9AB
F09D878F
F09D878F3F3F3F3F
F09D878FEA9DA8
F09D878FEFB9AB
F09D9C9F
F09D9C9F3F3F3F3F
F09D9C9FEA9DA8
F09D9C9FEFB9AB
F09D9E9F
F09D9E9F3F3F3F3F
F09D9E9FEA9DA8
F09D9E9FEFB9AB
F3A087AF
F3A087AF3F3F3F3F
F3A087AFEA9DA8
F3A087AFEFB9AB
F48FBFBF
F48FBFBF3F3F3F3F
F48FBFBFEA9DA8
F48FBFBFEFB9AB
SELECT count(*) FROM t1, t2
@ -2484,8 +2484,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
1114111
1114111 3F ?
1114111 3F3F3F3F ????
119040 3F ?
119070 3F ?
119070 3F3F3F3F3F3F3F3F3F3F ??????????
@ -2505,7 +2505,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@ -2517,7 +2517,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@ -2529,8 +2529,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2550,8 +2550,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2571,7 +2571,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
DROP TABLE IF EXISTS t3;

View file

@ -225,7 +225,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 varchar(10) character set utf8mb4) engine MyISAM;
insert into t1 values (0x41FF);
@ -233,7 +233,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (s1 text character set utf8mb4) engine MyISAM;
insert into t1 values (0x41FF);
@ -241,7 +241,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xFF' for column 's1' at row 1
select hex(s1) from t1;
hex(s1)
41
413F
drop table t1;
create table t1 (a text character set utf8mb4, primary key(a(371))) engine MyISAM;
ERROR 42000: Specified key was too long; max key length is 1000 bytes
@ -2285,7 +2285,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF0\x8F\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F0908080
F0BFBFBF
delete from t1;
@ -2305,7 +2305,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xF4\x90\x80\x80' for column 'utf8mb4' at row 1
select hex(utf8mb4) from t1;
hex(utf8mb4)
3F
F4808080
F48F8080
drop table t1;
@ -2421,7 +2421,7 @@ Warning 1366 Incorrect string value: '\xF4\x8F\xBF\xBD' for column 'utf8mb3_enco
UPDATE t2 SET utf8mb3_encoding= _utf8mb4 x'ea9da8' where u_decimal= 42856;
SELECT HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8')) FROM t1;
HEX(CONCAT(utf8mb4_encoding, _utf8 x'ea9da8'))
EA9DA8
3F3F3F3FEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
F09D8480EA9DA8
F09D849EEA9DA8
@ -2435,40 +2435,40 @@ F3A087AFEA9DA8
F48FBFBFEA9DA8
SELECT HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding)) FROM t1,t2;
HEX(CONCAT(utf8mb4_encoding, utf8mb3_encoding))
EA9DA8
EFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB
3F3F3F3F3F3F3F3F
3F3F3F3FEA9DA8
3F3F3F3FEFB9AB
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9AB3F3F3F3F
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEA9DA8
EFB9ABF09D849EF09D859EF09D859EF09D8480F09D859FEFB9ABEFB9ABF09D85A0EFB9ABEFB9AB
F09D8480
F09D84803F3F3F3F
F09D8480EA9DA8
F09D8480EFB9AB
F09D849E
F09D849E3F3F3F3F
F09D849EEA9DA8
F09D849EEFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D84803F3F3F3F
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EA9DA8
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D849EF09D859EF09D859EF09D8480F09D859FF09D859FF09D859FF09D85A0F09D85A0F09D8480EFB9AB
F09D859E
F09D859E3F3F3F3F
F09D859EEA9DA8
F09D859EEFB9AB
F09D878F
F09D878F3F3F3F3F
F09D878FEA9DA8
F09D878FEFB9AB
F09D9C9F
F09D9C9F3F3F3F3F
F09D9C9FEA9DA8
F09D9C9FEFB9AB
F09D9E9F
F09D9E9F3F3F3F3F
F09D9E9FEA9DA8
F09D9E9FEFB9AB
F3A087AF
F3A087AF3F3F3F3F
F3A087AFEA9DA8
F3A087AFEFB9AB
F48FBFBF
F48FBFBF3F3F3F3F
F48FBFBFEA9DA8
F48FBFBFEFB9AB
SELECT count(*) FROM t1, t2
@ -2484,8 +2484,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding),utf8mb4_encoding FROM t1;
u_decimal hex(utf8mb4_encoding) utf8mb4_encoding
1114111
1114111 3F ?
1114111 3F3F3F3F ????
119040 3F ?
119070 3F ?
119070 3F3F3F3F3F3F3F3F3F3F ??????????
@ -2505,7 +2505,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t2 CONVERT TO CHARACTER SET utf8mb3;
@ -2517,7 +2517,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
ALTER TABLE t1 MODIFY utf8mb4_encoding VARCHAR(10) CHARACTER SET utf8mb3;
@ -2529,8 +2529,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2550,8 +2550,8 @@ t1 CREATE TABLE `t1` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb4_encoding) FROM t1;
u_decimal hex(utf8mb4_encoding)
1114111
1114111 3F
1114111 3F3F3F3F
119040 3F
119070 3F
119070 3F3F3F3F3F3F3F3F3F3F
@ -2571,7 +2571,7 @@ t2 CREATE TABLE `t2` (
) ENGINE=MyISAM DEFAULT CHARSET=utf8
SELECT u_decimal,hex(utf8mb3_encoding) FROM t2;
u_decimal hex(utf8mb3_encoding)
1114111
1114111 3F3F3F3F
42856 EA9DA8
65131 EFB9AB
DROP TABLE IF EXISTS t3;

View file

@ -22,13 +22,15 @@ SHOW TABLE STATUS LIKE 't1';
--disable_warnings
--disable_query_log
ALTER TABLE test.t1 ADD code VARCHAR(16) NOT NULL;
let $1= 221;
while ($1)
{
eval INSERT INTO test.t1 VALUES(CHAR(254-$1));
eval INSERT INTO test.t1 VALUES(CHAR(254-$1), HEX(254-$1));
dec $1;
}
DELETE FROM test.t1 WHERE CHAR_LENGTH(a) <> 1;
DELETE FROM test.t1 WHERE a='?' AND code<>'3F';
--enable_query_log
--enable_warnings

View file

@ -30,7 +30,7 @@ Warnings:
Warning 1366 Incorrect string value: '\xA3' for column 'f1' at row 1
select f1 from t1;
f1
?
update t1 set f1=0x6a;
update t1 set f3=repeat(0xb1,8103);
update t1 set f1=0x4a;
@ -39,5 +39,5 @@ Warnings:
Warning 1366 Incorrect string value: '\x82' for column 'f1' at row 1
select f1 from t1;
f1
?
drop table t1;

View file

@ -121,7 +121,7 @@ DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
SELECT COUNT(*) FROM t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
#
# Display all characters that have upper or lower case mapping.
#

View file

@ -99,7 +99,7 @@ DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
#
# Display all characters that have upper or lower case mapping.

View file

@ -446,6 +446,7 @@ SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'' AND a<>'?';
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
SELECT * FROM t1 WHERE CHAR_LENGTH(a)=2;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;

View file

@ -95,8 +95,8 @@ WHERE t11.a >= 0x81 AND t11.a <= 0xFE
AND t12.a >= 0x41 AND t12.a <= 0xFE
ORDER BY t11.a, t12.a;
--enable_warnings
SELECT s as bad_code FROM t2 WHERE a='' ORDER BY s;
DELETE FROM t2 WHERE a='';
SELECT s as bad_code FROM t2 WHERE a='?' ORDER BY s;
DELETE FROM t2 WHERE a='?';
ALTER TABLE t2 ADD u VARCHAR(1) CHARACTER SET utf8, ADD a2 VARCHAR(1) CHARACTER SET euckr;
--disable_warnings
UPDATE t2 SET u=a, a2=u;
@ -145,7 +145,7 @@ ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
#
# Display all characters that have upper or lower case mapping.
#

View file

@ -69,7 +69,7 @@ ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
#
# Display all characters that have upper or lower case mapping.
#

View file

@ -104,7 +104,7 @@ ORDER BY head, tail;
DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
#
# Display all characters that have upper or lower case mapping.
#
@ -203,3 +203,228 @@ SET NAMES gbk;
--echo #
--echo # End of 10.0 tests
--echo #
--echo #
--echo # Start of 10.1 tests
--echo #
--echo #
--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
--echo #
CREATE TABLE t1 (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
b VARBINARY(16),
type SET('ascii','bad','head','tail','mb2','unassigned')
);
INSERT INTO t1 (b, type) VALUES (0x40, 'ascii,tail');
INSERT INTO t1 (b, type) VALUES (0x80, 'tail');
INSERT INTO t1 (b, type) VALUES (0x81, 'head,tail');
INSERT INTO t1 (b, type) VALUES (0xFF, 'bad');
INSERT INTO t1 (b, type) VALUES (0xA140, 'mb2,unassigned');
INSERT INTO t1 (b, type) VALUES (0xA1A3, 'mb2');
INSERT INTO t1 (b, type) VALUES (0xFE40, 'mb2');
CREATE TABLE t2 AS SELECT
CONCAT(t1.b,t2.b) AS b,
t1.type AS type1,
t2.type AS type2,
CONCAT('[',t1.type,'][',t2.type,']') AS comment
FROM t1, t1 t2;
CREATE TABLE t3
(
b VARBINARY(16),
c VARCHAR(16) CHARACTER SET gbk,
comment VARCHAR(128)
);
--echo #
--echo # A combination of two valid characters, should give no warnings
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2))
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that start with a tail or a bad byte,
--echo # or end with a bad byte, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE type1='tail' OR type1='bad' OR type2='bad'
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that start with an ASCII or an MB2 character,
--echo # followed by a non-ASCII tail, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
AND (FIND_IN_SET('tail',type2) AND NOT FIND_IN_SET('ascii',type2))
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Other sequences
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t3;
DROP TABLE t3;
DROP TABLE t2;
CREATE TABLE t2 AS SELECT
CONCAT(t1.b,t2.b,t3.b) AS b,
t1.type AS type1,
t2.type AS type2,
t3.type AS type3,
CONCAT('[',t1.type,'][',t2.type,'][',t3.type,']') AS comment
FROM t1, t1 t2,t1 t3;
SELECT COUNT(*) FROM t2;
CREATE TABLE t3
(
b VARBINARY(16),
c VARCHAR(16) CHARACTER SET gbk,
comment VARCHAR(128)
);
--echo #
--echo # A combination of three valid characters, should give no warnings
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE
(FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1)) AND
(FIND_IN_SET('ascii',type2) OR FIND_IN_SET('mb2',type2)) AND
(FIND_IN_SET('ascii',type3) OR FIND_IN_SET('mb2',type3))
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that start with a tail or a bad byte,
--echo # or have a bad byte, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE type1='tail' OR type1='bad' OR type2='bad' OR type3='bad'
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that start with an ASCII or an MB2 character,
--echo # followed by a pure non-ASCII tail, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1))
AND type2='tail'
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that consist of two ASCII or MB2 characters,
--echo # followed by a pure non-ASCII tail, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('mb2',type1) OR FIND_IN_SET('ascii',type1)) AND
(FIND_IN_SET('mb2',type2) OR FIND_IN_SET('ascii',type2)) AND
type3='tail'
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that consist of two MB2 characters,
--echo # followed by a non-ASCII head or tail, all should be fixed.
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE FIND_IN_SET('mb2',type1) AND FIND_IN_SET('mb2',type2)
AND NOT FIND_IN_SET('ascii',type3)
AND NOT FIND_IN_SET('mb2',type3)
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that consist of head + tail + MB2 should go without warnings
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE FIND_IN_SET('head',type1)
AND FIND_IN_SET('tail',type2)
AND FIND_IN_SET('mb2',type3)
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
--echo #
--echo # Sequences that consist of (ascii or mb2) + head + tail should go without warnings
--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2
WHERE (FIND_IN_SET('ascii',type1) OR FIND_IN_SET('mb2',type1))
AND FIND_IN_SET('head',type2)
AND FIND_IN_SET('tail',type3)
ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DELETE FROM t2 WHERE b IN (SELECT b FROM t3);
DELETE FROM t3;
#--echo #
#--echo # Other sequences
#--echo #
INSERT INTO t3 (b,c,comment) SELECT b,b,comment FROM t2 ORDER BY b;
SELECT COUNT(*) FROM t3;
SELECT HEX(c),comment FROM t3 WHERE b=c ORDER BY b;
SELECT HEX(c),HEX(b),comment FROM t3 WHERE b<>c ORDER BY b;
DROP TABLE t3;
DROP TABLE t2;
DROP TABLE t1;
--echo #
--echo # END OF MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
--echo #
--echo #
--echo # End of 10.1 tests
--echo #

View file

@ -145,7 +145,7 @@ DROP TEMPORARY TABLE head, tail;
SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'?' AND OCTET_LENGTH(a)=1;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
#
# Display all characters that have upper or lower case mapping.

View file

@ -1276,7 +1276,7 @@ SHOW CREATE TABLE t1;
UPDATE t1 SET a=unhex(code) ORDER BY code;
SELECT COUNT(*) FROM t1;
SELECT COUNT(*) FROM t1 WHERE a<>'';
SELECT COUNT(*) FROM t1 WHERE a<>'?';
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=2;
SELECT COUNT(*) FROM t1 WHERE a<>'' AND OCTET_LENGTH(a)=3;
#

View file

@ -1832,6 +1832,50 @@ set @@collation_connection=utf8mb4_bin;
--echo # End of 5.6 tests
--echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-6566 Different INSERT behaviour on bad bytes with and without character set conversion
--echo #
--echo #
--echo # This test sets session character set to 3-byte utf8,
--echo # but then sends a 4-byte sequence (which is wrong for 3-byte utf8).
--echo # It should be replaced to four question marks: '????' in both columns
--echo # (i.e. four unknown bytes are replaced to four question marks),
--echo # then the rest of the string should be stored, so we get 'a ???? b'.
--echo #
SET NAMES utf8;
CREATE TABLE t1 (
a VARCHAR(32) CHARACTER SET utf8mb4,
b VARCHAR(32) CHARACTER SET utf8
);
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # This test sets session character set to 4-byte utf8,
--echo # then normally sends a 4-byte sequence.
--echo # It should be stored AS IS into the utf8mb4 column (a),
--echo # and should be replaced to a single question mark in the utf8 column (b)
--echo # (i.e. one character that cannot be converted is replaced to one question mark).
--echo #
SET NAMES utf8mb4;
CREATE TABLE t1 (
a VARCHAR(32) CHARACTER SET utf8mb4,
b VARCHAR(32) CHARACTER SET utf8
);
INSERT INTO t1 SELECT 'a 😁 b', 'a 😁 b';
SELECT * FROM t1;
DROP TABLE t1;
--echo #
--echo # End of 10.0 tests
--echo #
--echo #
--echo # End of tests

View file

@ -922,8 +922,8 @@ String_copier::well_formed_copy(CHARSET_INFO *to_cs,
my_charset_same(from_cs, to_cs))
{
m_cannot_convert_error_pos= NULL;
return to_cs->cset->copy_abort(to_cs, to, to_length, from, from_length,
nchars, this);
return to_cs->cset->copy_fix(to_cs, to, to_length, from, from_length,
nchars, this);
}
else
{

View file

@ -50,7 +50,7 @@
#define MY_FUNCTION_NAME(x) my_ ## x ## _big5
#define IS_MB2_CHAR(x,y) (isbig5head(x) && isbig5tail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -6843,6 +6843,9 @@ my_mb_wc_big5(CHARSET_INFO *cs __attribute__((unused)),
if (s+2>e)
return MY_CS_TOOSMALL2;
if (!IS_MB2_CHAR(hi, s[1]))
return MY_CS_ILSEQ;
if (!(pwc[0]=func_big5_uni_onechar((hi<<8)+s[1])))
return -2;
@ -6894,7 +6897,9 @@ static MY_CHARSET_HANDLER my_charset_big5_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_big5,
my_well_formed_char_length_big5,
my_copy_fix_mb,
};
struct charset_info_st my_charset_big5_chinese_ci=

View file

@ -549,6 +549,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
};

View file

@ -186,7 +186,7 @@ static const uchar sort_order_cp932[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _cp932
#define IS_8BIT_CHAR(x) iscp932kata(x)
#define IS_MB2_CHAR(x,y) (iscp932head(x) && iscp932tail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -34765,7 +34765,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_cp932,
my_well_formed_char_length_cp932,
my_copy_fix_mb,
};

View file

@ -204,7 +204,7 @@ static const uchar sort_order_euc_kr[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _euckr
#define IS_MB2_CHAR(x,y) (iseuc_kr_head(x) && iseuc_kr_tail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -9928,6 +9928,9 @@ my_mb_wc_euc_kr(CHARSET_INFO *cs __attribute__((unused)),
if (s+2>e)
return MY_CS_TOOSMALL2;
if (!IS_MB2_CHAR(hi, s[1]))
return MY_CS_ILSEQ;
if (!(pwc[0]=func_ksc5601_uni_onechar((hi<<8)+s[1])))
return -2;
@ -9979,7 +9982,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_euckr,
my_well_formed_char_length_euckr,
my_copy_fix_mb,
};

View file

@ -198,7 +198,7 @@ static const uchar sort_order_eucjpms[]=
#define IS_MB2_KATA(x,y) (iseucjpms_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x,y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x,y,z) (iseucjpms_ss3(x) && IS_MB2_JIS(y,z))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -67511,7 +67511,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_eucjpms,
my_well_formed_char_length_eucjpms,
my_copy_fix_mb,
};

View file

@ -167,7 +167,7 @@ static const uchar sort_order_gb2312[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _gb2312
#define IS_MB2_CHAR(x,y) (isgb2312head(x) && isgb2312tail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -6330,7 +6330,10 @@ my_mb_wc_gb2312(CHARSET_INFO *cs __attribute__((unused)),
if (s+2>e)
return MY_CS_TOOSMALL2;
if (!IS_MB2_CHAR(hi, s[1]))
return MY_CS_ILSEQ;
if (!(pwc[0]=func_gb2312_uni_onechar(((hi<<8)+s[1])&0x7F7F)))
return -2;
@ -6382,7 +6385,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_gb2312,
my_well_formed_char_length_gb2312,
my_copy_fix_mb,
};

View file

@ -45,7 +45,7 @@
#define MY_FUNCTION_NAME(x) my_ ## x ## _gbk
#define IS_MB2_CHAR(x,y) (isgbkhead(x) && isgbktail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -10724,6 +10724,9 @@ my_mb_wc_gbk(CHARSET_INFO *cs __attribute__((unused)),
if (s+2>e)
return MY_CS_TOOSMALL2;
if (!IS_MB2_CHAR(hi, s[1]))
return MY_CS_ILSEQ;
if (!(pwc[0]=func_gbk_uni_onechar( (hi<<8) + s[1])))
return -2;
@ -10776,7 +10779,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_gbk,
my_well_formed_char_length_gbk,
my_copy_fix_mb,
};

View file

@ -422,6 +422,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
};

View file

@ -424,25 +424,95 @@ size_t my_well_formed_len_mb(CHARSET_INFO *cs, const char *b, const char *e,
/*
Copy a multi-byte string. Abort if a bad byte sequence was found.
Note more than "nchars" characters are copied.
Append a badly formed piece of string.
Bad bytes are fixed to '?'.
@param to The destination string
@param to_end The end of the destination string
@param from The source string
@param from_end The end of the source string
@param nchars Write not more than "nchars" characters.
@param status Copying status, must be previously initialized,
e.g. using well_formed_char_length() on the original
full source string.
*/
size_t
my_copy_abort_mb(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status)
static size_t
my_append_fix_badly_formed_tail(CHARSET_INFO *cs,
char *to, char *to_end,
const char *from, const char *from_end,
size_t nchars,
MY_STRCOPY_STATUS *status)
{
int well_formed_error;
size_t res;
char *to0= to;
for ( ; nchars; nchars--)
{
int chlen;
if ((chlen= cs->cset->charlen(cs, (const uchar*) from,
(const uchar *) from_end)) > 0)
{
/* Found a valid character */ /* chlen == 1..MBMAXLEN */
DBUG_ASSERT(chlen <= (int) cs->mbmaxlen);
if (to + chlen > to_end)
goto end; /* Does not fit to "to" */
memcpy(to, from, (size_t) chlen);
from+= chlen;
to+= chlen;
continue;
}
if (chlen == MY_CS_ILSEQ) /* chlen == 0 */
{
DBUG_ASSERT(from < from_end); /* Shouldn't get MY_CS_ILSEQ if empty */
goto bad;
}
/* Got an incomplete character */ /* chlen == MY_CS_TOOSMALLXXX */
DBUG_ASSERT(chlen >= MY_CS_TOOSMALL6);
DBUG_ASSERT(chlen <= MY_CS_TOOSMALL);
if (from >= from_end)
break; /* End of the source string */
bad:
/* Bad byte sequence, or incomplete character found */
if (!status->m_well_formed_error_pos)
status->m_well_formed_error_pos= from;
if ((chlen= cs->cset->wc_mb(cs, '?', (uchar*) to, (uchar *) to_end)) <= 0)
break; /* Question mark does not fit into the destination */
to+= chlen;
from++;
}
end:
status->m_source_end_pos= from;
return to - to0;
}
size_t
my_copy_fix_mb(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t well_formed_nchars;
size_t well_formed_length;
size_t fixed_length;
set_if_smaller(src_length, dst_length);
res= cs->cset->well_formed_len(cs, src, src + src_length,
nchars, &well_formed_error);
memmove(dst, src, res);
status->m_source_end_pos= src + res;
status->m_well_formed_error_pos= well_formed_error ? src + res : NULL;
return res;
well_formed_nchars= cs->cset->well_formed_char_length(cs,
src, src + src_length,
nchars, status);
DBUG_ASSERT(well_formed_nchars <= nchars);
memmove(dst, src, (well_formed_length= status->m_source_end_pos - src));
if (!status->m_well_formed_error_pos)
return well_formed_length;
fixed_length= my_append_fix_badly_formed_tail(cs,
dst + well_formed_length,
dst + dst_length,
src + well_formed_length,
src + src_length,
nchars - well_formed_nchars,
status);
return well_formed_length + fixed_length;
}

View file

@ -29,7 +29,70 @@
#endif
#ifdef WELL_FORMED_LEN
#ifdef DEFINE_ASIAN_ROUTINES
#define DEFINE_WELL_FORMED_LEN
#define DEFINE_WELL_FORMED_CHAR_LENGTH
#define DEFINE_CHARLEN
#endif
#ifdef DEFINE_CHARLEN
/**
Returns length of the left-most character of a string.
@param cs - charset with mbminlen==1 and mbmaxlen<=4
@param b - the beginning of the string
@param e - the end of the string
@return MY_CS_ILSEQ if a bad byte sequence was found
@return MY_CS_TOOSMALL(N) if the string ended unexpectedly
@return >0 if a valid character was found
*/
static int
MY_FUNCTION_NAME(charlen)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, const uchar *e)
{
DBUG_ASSERT(cs->mbminlen == 1);
DBUG_ASSERT(cs->mbmaxlen <= 4);
if (b >= e)
return MY_CS_TOOSMALL;
if ((uchar) b[0] < 128)
return 1; /* Single byte ASCII character */
#ifdef IS_8BIT_CHAR
if (IS_8BIT_CHAR(b[0]))
{
/* Single byte non-ASCII character, e.g. half width kana in sjis */
return 1;
}
#endif
if (b + 2 > e)
return MY_CS_TOOSMALLN(2);
if (IS_MB2_CHAR(b[0], b[1]))
return 2; /* Double byte character */
#ifdef IS_MB3_CHAR
if (b + 3 > e)
return MY_CS_TOOSMALLN(3);
if (IS_MB3_CHAR(b[0], b[1], b[2]))
return 3; /* Three-byte character */
#endif
#ifdef IS_MB4_CHAR
if (b + 4 > e)
return MY_CS_TOOSMALLN(4);
if (IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
return 4; /* Four-byte character */
#endif
/* Wrong byte sequence */
return MY_CS_ILSEQ;
}
#endif /* DEFINE_WELL_FORMED_LEN */
#ifdef DEFINE_WELL_FORMED_LEN
/**
Returns well formed length of a character string with
variable character length for character sets with:
@ -91,4 +154,105 @@ MY_FUNCTION_NAME(well_formed_len)(CHARSET_INFO *cs __attribute__((unused)),
return b - b0;
}
#endif /* WELL_FORMED_LEN */
#endif /* DEFINE_WELL_FORMED_LEN */
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH
/**
Returns well formed length of a string
measured in characters (rather than in bytes).
Version for character sets that define IS_MB?_CHAR(), e.g. big5.
*/
static size_t
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars,
MY_STRCOPY_STATUS *status)
{
size_t nchars0= nchars;
for ( ; b < e && nchars ; nchars--)
{
if ((uchar) b[0] < 128)
{
b++; /* Single byte ASCII character */
continue;
}
if (b + 2 <= e && IS_MB2_CHAR(b[0], b[1]))
{
b+= 2; /* Double byte character */
continue;
}
#ifdef IS_MB3_CHAR
if (b + 3 <= e && IS_MB3_CHAR(b[0], b[1], b[2]))
{
b+= 3; /* Three-byte character */
continue;
}
#endif
#ifdef IS_MB4_CHAR
if (b + 4 <= e && IS_MB4_CHAR(b[0], b[1], b[2], b[3]))
{
b+= 4; /* Four-byte character */
continue;
}
#endif
#ifdef IS_8BIT_CHAR
if (IS_8BIT_CHAR(b[0]))
{
b++; /* Single byte non-ASCII character, e.g. half width kana in sjis */
continue;
}
#endif
/* Wrong byte sequence */
status->m_source_end_pos= status->m_well_formed_error_pos= b;
return nchars0 - nchars;
}
status->m_source_end_pos= b;
status->m_well_formed_error_pos= NULL;
return nchars0 - nchars;
}
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH */
#ifdef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#ifndef CHARLEN
#error CHARLEN is not defined
#endif
/**
Returns well formed length of a string
measured in characters (rather than in bytes).
Version for character sets that define CHARLEN(), e.g. utf8.
CHARLEN(cs,b,e) must use the same return code convension that mb_wc() does:
- a positive number in the range [1-mbmaxlen] if a valid
single-byte or multi-byte character was found
- MY_CS_ILSEQ (0) on a bad byte sequence
- MY_CS_TOOSMALLxx if the incoming sequence is incomplete
*/
static size_t
MY_FUNCTION_NAME(well_formed_char_length)(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars,
MY_STRCOPY_STATUS *status)
{
size_t nchars0= nchars;
int chlen;
for ( ; nchars ; nchars--, b+= chlen)
{
if ((chlen= CHARLEN(cs, (uchar*) b, (uchar*) e)) <= 0)
{
status->m_well_formed_error_pos= b < e ? b : NULL;
status->m_source_end_pos= b;
return nchars0 - nchars;
}
}
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= b;
return nchars0 - nchars;
}
#endif /* DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN */

View file

@ -248,6 +248,13 @@ int my_strcasecmp_8bit(CHARSET_INFO * cs,const char *s, const char *t)
}
int my_charlen_8bit(CHARSET_INFO *cs __attribute__((unused)),
const uchar *str, const uchar *end)
{
return str >= end ? MY_CS_TOOSMALL : 1;
}
int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc,
const uchar *str,
const uchar *end __attribute__((unused)))
@ -1108,6 +1115,19 @@ size_t my_well_formed_len_8bit(CHARSET_INFO *cs __attribute__((unused)),
}
size_t
my_well_formed_char_length_8bit(CHARSET_INFO *cs __attribute__((unused)),
const char *start, const char *end,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t nbytes= (size_t) (end - start);
size_t res= MY_MIN(nbytes, nchars);
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= start + res;
return res;
}
/*
Copy a 8-bit string. Not more than "nchars" character are copied.
*/
@ -1906,6 +1926,8 @@ MY_CHARSET_HANDLER my_charset_8bit_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
};

View file

@ -187,7 +187,7 @@ static const uchar sort_order_sjis[]=
#define MY_FUNCTION_NAME(x) my_ ## x ## _sjis
#define IS_8BIT_CHAR(x) issjiskata(x)
#define IS_MB2_CHAR(x,y) (issjishead(x) && issjistail(y))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -34144,7 +34144,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_sjis,
my_well_formed_char_length_sjis,
my_copy_fix_mb,
};

View file

@ -886,6 +886,8 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_charlen_8bit,
my_well_formed_char_length_8bit,
my_copy_8bit,
};

View file

@ -92,62 +92,107 @@ my_strcasecmp_mb2_or_mb4(CHARSET_INFO *cs __attribute__((unused)),
}
/*
Copy an UCS2/UTF16/UTF32 string.
Not more that "nchars" characters are copied.
typedef enum
{
MY_CHAR_COPY_OK= 0, /* The character was Okey */
MY_CHAR_COPY_ERROR= 1, /* The character was not Ok, and could not fix */
MY_CHAR_COPY_FIXED= 2 /* The character was not Ok, was fixed to '?' */
} my_char_copy_status_t;
UCS2/UTF16/UTF32 may need to prepend zero some bytes,
e.g. when copying from a BINARY source:
INSERT INTO t1 (ucs2_column) VALUES (0x01);
0x01 -> 0x0001
/*
Copies an incomplete character, lef-padding it with 0x00 bytes.
@param cs Character set
@param dst The destination string
@param dst_length Space available in dst
@param src The source string
@param src_length Length of src
@param nchars Copy not more than nchars characters.
The "nchars" parameter of the caller.
Only 0 and non-0 are important here.
@param fix What to do if after zero-padding didn't get a valid
character:
- FALSE - exit with error.
- TRUE - try to put '?' instead.
@return MY_CHAR_COPY_OK if after zero-padding got a valid character.
cs->mbmaxlen bytes were written to "dst".
@return MY_CHAR_COPY_FIXED if after zero-padding did not get a valid
character, but wrote '?' to the destination
string instead.
cs->mbminlen bytes were written to "dst".
@return MY_CHAR_COPY_ERROR If failed and nothing was written to "dst".
Possible reasons:
- dst_length was too short
- nchars was 0
- the character after padding appeared not
to be valid, and could not fix it to '?'.
*/
static my_char_copy_status_t
my_copy_incomplete_char(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, my_bool fix)
{
size_t pad_length;
size_t src_offset= src_length % cs->mbminlen;
if (dst_length < cs->mbminlen || !nchars)
return MY_CHAR_COPY_ERROR;
pad_length= cs->mbminlen - src_offset;
bzero(dst, pad_length);
memmove(dst + pad_length, src, src_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (cs->cset->charlen(cs, (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
(int) cs->mbminlen)
return MY_CHAR_COPY_OK;
if (fix &&
cs->cset->wc_mb(cs, '?', (uchar *) dst, (uchar *) dst + cs->mbminlen) ==
(int) cs->mbminlen)
return MY_CHAR_COPY_FIXED;
return MY_CHAR_COPY_ERROR;
}
/*
Copy an UCS2/UTF16/UTF32 string, fix bad characters.
*/
static size_t
my_copy_abort_mb2_or_mb4(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status)
my_copy_fix_mb2_or_mb4(CHARSET_INFO *cs,
char *dst, size_t dst_length,
const char *src, size_t src_length,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t src_offset;
if ((src_offset= (src_length % cs->mbminlen)))
size_t length2, src_offset= src_length % cs->mbminlen;
my_char_copy_status_t padstatus;
if (!src_offset)
return my_copy_fix_mb(cs, dst, dst_length,
src, src_length, nchars, status);
if ((padstatus= my_copy_incomplete_char(cs, dst, dst_length,
src, src_length, nchars, TRUE)) ==
MY_CHAR_COPY_ERROR)
{
int well_formed_error;
size_t pad_length;
if (dst_length < cs->mbminlen || !nchars)
{
status->m_source_end_pos= status->m_well_formed_error_pos= src;
return 0;
}
pad_length= cs->mbminlen - src_offset;
bzero(dst, pad_length);
memmove(dst + pad_length, src, src_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (cs->cset->well_formed_len(cs,
dst, dst + cs->mbminlen, 1,
&well_formed_error) != cs->mbminlen)
{
status->m_source_end_pos= status->m_well_formed_error_pos= src;
return 0;
}
nchars--;
src+= src_offset;
src_length-= src_offset;
dst+= cs->mbminlen;
dst_length-= cs->mbminlen;
return
cs->mbminlen /* The left-padded character */ +
my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
status->m_source_end_pos= status->m_well_formed_error_pos= src;
return 0;
}
return my_copy_abort_mb(cs, dst, dst_length, src, src_length, nchars, status);
length2= my_copy_fix_mb(cs, dst + cs->mbminlen, dst_length - cs->mbminlen,
src + src_offset, src_length - src_offset,
nchars - 1, status);
if (padstatus == MY_CHAR_COPY_FIXED)
status->m_well_formed_error_pos= src;
return cs->mbminlen /* The left-padded character */ + length2;
}
@ -1475,6 +1520,24 @@ my_ismbchar_utf16(CHARSET_INFO *cs, const char *b, const char *e)
}
static int
my_charlen_utf16(CHARSET_INFO *cs, const uchar *str, const uchar *end)
{
my_wc_t wc;
return cs->cset->mb_wc(cs, &wc, str, end);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf16
#define CHARLEN(cs,str,end) my_charlen_utf16(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.ic"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* Defines my_well_formed_char_length_utf16 */
static uint
my_mbcharlen_utf16(CHARSET_INFO *cs __attribute__((unused)),
uint c __attribute__((unused)))
@ -1742,7 +1805,9 @@ MY_CHARSET_HANDLER my_charset_utf16_handler=
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_copy_abort_mb2_or_mb4,
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
};
@ -1912,7 +1977,9 @@ static MY_CHARSET_HANDLER my_charset_utf16le_handler=
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_copy_abort_mb2_or_mb4,
my_charlen_utf16,
my_well_formed_char_length_utf16,
my_copy_fix_mb2_or_mb4,
};
@ -1987,6 +2054,13 @@ struct charset_info_st my_charset_utf16le_bin=
#ifdef HAVE_CHARSET_utf32
/*
Check is b0 and b1 start a valid UTF32 four-byte sequence.
Don't accept characters greater than U+10FFFF.
*/
#define IS_UTF32_MBHEAD4(b0,b1) (!(b0) && ((uchar) (b1) <= 0x10))
static int
my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t *pwc, const uchar *s, const uchar *e)
@ -1994,7 +2068,7 @@ my_utf32_uni(CHARSET_INFO *cs __attribute__((unused)),
if (s + 4 > e)
return MY_CS_TOOSMALL4;
*pwc= (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + (s[3]);
return 4;
return *pwc > 0x10FFFF ? MY_CS_ILSEQ : 4;
}
@ -2004,7 +2078,10 @@ my_uni_utf32(CHARSET_INFO *cs __attribute__((unused)),
{
if (s + 4 > e)
return MY_CS_TOOSMALL4;
if (wc > 0x10FFFF)
return MY_CS_ILUNI;
s[0]= (uchar) (wc >> 24);
s[1]= (uchar) (wc >> 16) & 0xFF;
s[2]= (uchar) (wc >> 8) & 0xFF;
@ -2263,10 +2340,29 @@ my_ismbchar_utf32(CHARSET_INFO *cs __attribute__((unused)),
const char *b,
const char *e)
{
return b + 4 > e ? 0 : 4;
return b + 4 > e || !IS_UTF32_MBHEAD4(b[0], b[1]) ? 0 : 4;
}
static int
my_charlen_utf32(CHARSET_INFO *cs __attribute__((unused)),
const uchar *b, const uchar *e)
{
return b + 4 > e ? MY_CS_TOOSMALL4 :
IS_UTF32_MBHEAD4(b[0], b[1]) ? 4 : MY_CS_ILSEQ;
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf32
#define CHARLEN(cs,str,end) my_charlen_utf32(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.ic"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* Defines my_well_formed_char_length_utf32 */
static uint
my_mbcharlen_utf32(CHARSET_INFO *cs __attribute__((unused)) ,
uint c __attribute__((unused)))
@ -2579,8 +2675,7 @@ my_well_formed_len_utf32(CHARSET_INFO *cs __attribute__((unused)),
}
for (; b < e; b+= 4)
{
/* Don't accept characters greater than U+10FFFF */
if (b[0] || (uchar) b[1] > 0x10)
if (!IS_UTF32_MBHEAD4(b[0], b[1]))
{
*error= 1;
return b - b0;
@ -2827,7 +2922,9 @@ MY_CHARSET_HANDLER my_charset_utf32_handler=
my_strtoll10_utf32,
my_strntoull10rnd_mb2_or_mb4,
my_scan_utf32,
my_copy_abort_mb2_or_mb4,
my_charlen_utf32,
my_well_formed_char_length_utf32,
my_copy_fix_mb2_or_mb4,
};
@ -2961,6 +3058,14 @@ static const uchar to_upper_ucs2[] = {
};
static int
my_charlen_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
{
return s + 2 > e ? MY_CS_TOOSMALLN(2) : 2;
}
static int my_ucs2_uni(CHARSET_INFO *cs __attribute__((unused)),
my_wc_t * pwc, const uchar *s, const uchar *e)
{
@ -3264,6 +3369,31 @@ size_t my_well_formed_len_ucs2(CHARSET_INFO *cs __attribute__((unused)),
}
static size_t
my_well_formed_char_length_ucs2(CHARSET_INFO *cs __attribute__((unused)),
const char *b, const char *e,
size_t nchars, MY_STRCOPY_STATUS *status)
{
size_t length= e - b;
if (nchars * 2 <= length)
{
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= b + (nchars * 2);
return nchars;
}
if (length % 2)
{
status->m_well_formed_error_pos= status->m_source_end_pos= e - 1;
}
else
{
status->m_well_formed_error_pos= NULL;
status->m_source_end_pos= e;
}
return length / 2;
}
static
int my_wildcmp_ucs2_ci(CHARSET_INFO *cs,
const char *str,const char *str_end,
@ -3446,7 +3576,9 @@ MY_CHARSET_HANDLER my_charset_ucs2_handler=
my_strtoll10_mb2,
my_strntoull10rnd_mb2_or_mb4,
my_scan_mb2,
my_copy_abort_mb2_or_mb4,
my_charlen_ucs2,
my_well_formed_char_length_ucs2,
my_copy_fix_mb2_or_mb4,
};

View file

@ -197,7 +197,7 @@ static const uchar sort_order_ujis[]=
#define IS_MB2_KATA(x,y) (isujis_ss2(x) && iskata(y))
#define IS_MB2_CHAR(x, y) (IS_MB2_KATA(x,y) || IS_MB2_JIS(x,y))
#define IS_MB3_CHAR(x, y, z) (isujis_ss3(x) && IS_MB2_JIS(y,z))
#define WELL_FORMED_LEN
#define DEFINE_ASIAN_ROUTINES
#include "ctype-mb.ic"
@ -67255,7 +67255,9 @@ static MY_CHARSET_HANDLER my_charset_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_ujis,
my_well_formed_char_length_ujis,
my_copy_fix_mb,
};

View file

@ -5446,8 +5446,8 @@ int my_wildcmp_utf8(CHARSET_INFO *cs,
static
int my_valid_mbcharlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
int my_charlen_utf8(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
{
uchar c;
@ -5515,7 +5515,7 @@ my_well_formed_len_utf8(CHARSET_INFO *cs, const char *b, const char *e,
{
int mb_len;
if ((mb_len= my_valid_mbcharlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
if ((mb_len= my_charlen_utf8(cs, (uchar*) b, (uchar*) e)) <= 0)
{
*error= b < e ? 1 : 0;
break;
@ -5526,9 +5526,20 @@ my_well_formed_len_utf8(CHARSET_INFO *cs, const char *b, const char *e,
return (size_t) (b - b_start);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8
#define CHARLEN(cs,str,end) my_charlen_utf8(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.ic"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* my_well_formed_char_length_utf8 */
static uint my_ismbchar_utf8(CHARSET_INFO *cs,const char *b, const char *e)
{
int res= my_valid_mbcharlen_utf8(cs, (const uchar*)b, (const uchar*)e);
int res= my_charlen_utf8(cs, (const uchar*) b, (const uchar*) e);
return (res>1) ? res : 0;
}
@ -5615,7 +5626,9 @@ MY_CHARSET_HANDLER my_charset_utf8_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_utf8,
my_well_formed_char_length_utf8,
my_copy_fix_mb,
};
@ -7125,6 +7138,24 @@ my_wc_mb_filename(CHARSET_INFO *cs __attribute__((unused)),
}
static int
my_charlen_filename(CHARSET_INFO *cs, const uchar *str, const uchar *end)
{
my_wc_t wc;
return cs->cset->mb_wc(cs, &wc, str, end);
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _filename
#define CHARLEN(cs,str,end) my_charlen_filename(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.ic"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* my_well_formed_char_length_filename */
static MY_COLLATION_HANDLER my_collation_filename_handler =
{
NULL, /* init */
@ -7169,7 +7200,9 @@ static MY_CHARSET_HANDLER my_charset_filename_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_filename,
my_well_formed_char_length_filename,
my_copy_fix_mb,
};
@ -7954,8 +7987,8 @@ my_wildcmp_utf8mb4(CHARSET_INFO *cs,
static int
my_valid_mbcharlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
my_charlen_utf8mb4(CHARSET_INFO *cs __attribute__((unused)),
const uchar *s, const uchar *e)
{
uchar c;
@ -8015,7 +8048,7 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
{
int mb_len;
if ((mb_len= my_valid_mbcharlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
if ((mb_len= my_charlen_utf8mb4(cs, (uchar*) b, (uchar*) e)) <= 0)
{
*error= b < e ? 1 : 0;
break;
@ -8027,10 +8060,19 @@ size_t my_well_formed_len_utf8mb4(CHARSET_INFO *cs,
}
#define MY_FUNCTION_NAME(x) my_ ## x ## _utf8mb4
#define CHARLEN(cs,str,end) my_charlen_utf8mb4(cs,str,end)
#define DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
#include "ctype-mb.ic"
#undef MY_FUNCTION_NAME
#undef CHARLEN
#undef DEFINE_WELL_FORMED_CHAR_LENGTH_USING_CHARLEN
/* my_well_formed_char_length_utf8mb4 */
static uint
my_ismbchar_utf8mb4(CHARSET_INFO *cs, const char *b, const char *e)
{
int res= my_valid_mbcharlen_utf8mb4(cs, (const uchar*)b, (const uchar*)e);
int res= my_charlen_utf8mb4(cs, (const uchar*) b, (const uchar*) e);
return (res > 1) ? res : 0;
}
@ -8113,7 +8155,9 @@ MY_CHARSET_HANDLER my_charset_utf8mb4_handler=
my_strtoll10_8bit,
my_strntoull10rnd_8bit,
my_scan_8bit,
my_copy_abort_mb,
my_charlen_utf8mb4,
my_well_formed_char_length_utf8mb4,
my_copy_fix_mb,
};