Bug#58321 No warning when characters outside BMP0 is converted to UCS2

Problem: when inserting supplementary characters to an UCS2 column,
character was silently shrinked to 16-bit value.

Fix: produce a warning on attempt to insert a supplementary character,
and convert to question mark.

  @ mysql-test/r/ctype_many.result
  @ mysql-test/t/ctype_many.test
  Adding tests

  @ strings/ctype-ucs2.c
  Check if wc is greater than the highest value supported (0xFFFF),
  return MY_CS_ILUNI if true.
This commit is contained in:
Alexander Barkov 2010-12-15 12:58:37 +03:00
parent 935ca4b3c0
commit ac665ecf06
3 changed files with 46 additions and 1 deletions

View file

@ -1684,6 +1684,9 @@ ARMENIAN CAPIT ECH 2
ARMENIAN CAPIT ZA 2 ARMENIAN CAPIT ZA 2
DROP TABLE t1; DROP TABLE t1;
# #
# Start of 5.5 tests
#
#
# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32 # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
# Testing that only utf8mb4 is superset for utf8 # Testing that only utf8mb4 is superset for utf8
# No other Unicode character set pairs have superset/subset relations # No other Unicode character set pairs have superset/subset relations
@ -1739,3 +1742,22 @@ ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1; SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat' ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
DROP TABLE t1; DROP TABLE t1;
#
# Bug#58321 No warning when characters outside BMP0 is converted to UCS2
#
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
CREATE TABLE t2 (a VARCHAR(10) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (0x10082), (0x12345);
INSERT INTO t2 SELECT * FROM t1;
Warnings:
Warning 1366 Incorrect string value: '\x00\x01\x00\x82' for column 'a' at row 1
Warning 1366 Incorrect string value: '\x00\x01\x23\x45' for column 'a' at row 2
SELECT HEX(a) FROM t2;
HEX(a)
003F
003F
DROP TABLE t1;
DROP TABLE t2;
#
# End of 5.5 tests
#

View file

@ -216,6 +216,10 @@ DROP TABLE t1;
# End of 4.1 tests # End of 4.1 tests
--echo #
--echo # Start of 5.5 tests
--echo #
--echo # --echo #
--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32 --echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
--echo # Testing that only utf8mb4 is superset for utf8 --echo # Testing that only utf8mb4 is superset for utf8
@ -284,3 +288,19 @@ SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1; SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
DROP TABLE t1; DROP TABLE t1;
--echo #
--echo # Bug#58321 No warning when characters outside BMP0 is converted to UCS2
--echo #
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
CREATE TABLE t2 (a VARCHAR(10) CHARACTER SET ucs2);
INSERT INTO t1 VALUES (0x10082), (0x12345);
INSERT INTO t2 SELECT * FROM t1;
SELECT HEX(a) FROM t2;
DROP TABLE t1;
DROP TABLE t2;
--echo #
--echo # End of 5.5 tests
--echo #

View file

@ -2694,6 +2694,9 @@ static int my_uni_ucs2(CHARSET_INFO *cs __attribute__((unused)) ,
if ( r+2 > e ) if ( r+2 > e )
return MY_CS_TOOSMALL2; return MY_CS_TOOSMALL2;
if (wc > 0xFFFF) /* UCS2 does not support characters outside BMP */
return MY_CS_ILUNI;
r[0]= (uchar) (wc >> 8); r[0]= (uchar) (wc >> 8);
r[1]= (uchar) (wc & 0xFF); r[1]= (uchar) (wc & 0xFF);
return 2; return 2;