From 56c3b98065b78b08b525e97c32589dfd5020165d Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 2 Jun 2010 16:23:50 +0400 Subject: [PATCH] Bug#52520 Difference in tinytext utf column metadata Problems: - regression (compating to version 5.1) in metadata for BLOB types - inconsistency between length metadata in server and embedded for BLOB types - wrong max_length calculation in items derived from BLOB columns @ libmysqld/lib_sql.cc Calculating length metadata in embedded similary to server version, using new function char_to_byte_length_safe(). @ mysql-test/r/ctype_utf16.result Adding tests @ mysql-test/r/ctype_utf32.result Adding tests @ mysql-test/r/ctype_utf8.result Adding tests @ mysql-test/r/ctype_utf8mb4.result Adding tests @ mysql-test/t/ctype_utf16.test Adding tests @ mysql-test/t/ctype_utf32.test Adding tests @ mysql-test/t/ctype_utf8.test Adding tests @ mysql-test/t/ctype_utf8mb4.test Adding tests @ sql/field.cc Overriding char_length() for Field_blob: unlike in generic Item::char_length() we don't divide to mbmaxlen for BLOBs. @ sql/field.h - Making Field::char_length() virtual - Adding prototype for Field_blob::char_length() @ sql/item.h - Adding new helper function char_to_byte_length_safe() - Using new function @ sql/protocol.cc Using new function char_to_byte_length_safe(). modified: libmysqld/lib_sql.cc mysql-test/r/ctype_utf16.result mysql-test/r/ctype_utf32.result mysql-test/r/ctype_utf8.result mysql-test/r/ctype_utf8mb4.result mysql-test/t/ctype_utf16.test mysql-test/t/ctype_utf32.test mysql-test/t/ctype_utf8.test mysql-test/t/ctype_utf8mb4.test sql/field.cc sql/field.h sql/item.h sql/protocol.cc --- libmysqld/lib_sql.cc | 3 +- mysql-test/r/ctype_utf16.result | 43 +++++++++++++++++++++++++++ mysql-test/r/ctype_utf32.result | 43 +++++++++++++++++++++++++++ mysql-test/r/ctype_utf8.result | 49 +++++++++++++++++++++++++++++++ mysql-test/r/ctype_utf8mb4.result | 43 +++++++++++++++++++++++++++ mysql-test/t/ctype_utf16.test | 21 +++++++++++++ mysql-test/t/ctype_utf32.test | 21 +++++++++++++ mysql-test/t/ctype_utf8.test | 28 ++++++++++++++++++ mysql-test/t/ctype_utf8mb4.test | 21 +++++++++++++ sql/field.cc | 33 +++++++++++++++++++++ sql/field.h | 3 +- sql/item.h | 16 ++++++++-- sql/protocol.cc | 8 ++--- 13 files changed, 323 insertions(+), 9 deletions(-) diff --git a/libmysqld/lib_sql.cc b/libmysqld/lib_sql.cc index 72379fbc089..e727122293c 100644 --- a/libmysqld/lib_sql.cc +++ b/libmysqld/lib_sql.cc @@ -953,7 +953,8 @@ bool Protocol::send_result_set_metadata(List *list, uint flags) server_field.type <= (int) MYSQL_TYPE_BLOB) ? server_field.length / item->collation.collation->mbminlen : server_field.length / item->collation.collation->mbmaxlen; - client_field->length= max_char_len * thd_cs->mbmaxlen; + client_field->length= char_to_byte_length_safe(max_char_len, + thd_cs->mbmaxlen); } client_field->type= server_field.type; client_field->flags= server_field.flags; diff --git a/mysql-test/r/ctype_utf16.result b/mysql-test/r/ctype_utf16.result index 3c2fe316d71..c5fd7ef1439 100644 --- a/mysql-test/r/ctype_utf16.result +++ b/mysql-test/r/ctype_utf16.result @@ -1034,5 +1034,48 @@ DROP TABLE t1; SET max_sort_length=DEFAULT; SET NAMES latin1; # +# Bug#52520 Difference in tinytext utf column metadata +# +CREATE TABLE t1 ( +s1 TINYTEXT CHARACTER SET utf16, +s2 TEXT CHARACTER SET utf16, +s3 MEDIUMTEXT CHARACTER SET utf16, +s4 LONGTEXT CHARACTER SET utf16 +); +SET NAMES utf8, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 54 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 54 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 54 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 54 +def HEX(s1) 253 6120 0 Y 0 0 33 +s1 s2 s3 s4 HEX(s1) +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 127 0 Y 16 0 8 +def test t1 t1 s2 s2 252 32767 0 Y 16 0 8 +def test t1 t1 s3 s3 252 8388607 0 Y 16 0 8 +def test t1 t1 s4 s4 252 2147483647 0 Y 16 0 8 +def HEX(s1) 253 2040 0 Y 0 0 8 +s1 s2 s3 s4 HEX(s1) +SET NAMES utf8; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 381 0 Y 16 0 33 +def test t1 t1 s2 s2 252 98301 0 Y 16 0 33 +def test t1 t1 s3 s3 252 25165821 0 Y 16 0 33 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33 +def HEX(s1) 253 6120 0 Y 0 0 33 +s1 s2 s3 s4 HEX(s1) +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `CONCAT(s1)` varchar(255) CHARACTER SET utf16 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1, t2; +# # End of 5.5 tests # diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index f0f10be9743..9f395f87be7 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -1048,5 +1048,48 @@ DROP TABLE t1; SET max_sort_length=DEFAULT; SET NAMES latin1; # +# Bug#52520 Difference in tinytext utf column metadata +# +CREATE TABLE t1 ( +s1 TINYTEXT CHARACTER SET utf32, +s2 TEXT CHARACTER SET utf32, +s3 MEDIUMTEXT CHARACTER SET utf32, +s4 LONGTEXT CHARACTER SET utf32 +); +SET NAMES utf8mb4, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 60 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 60 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 60 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 60 +def HEX(s1) 253 8160 0 Y 0 0 45 +s1 s2 s3 s4 HEX(s1) +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 63 0 Y 16 0 8 +def test t1 t1 s2 s2 252 16383 0 Y 16 0 8 +def test t1 t1 s3 s3 252 4194303 0 Y 16 0 8 +def test t1 t1 s4 s4 252 1073741823 0 Y 16 0 8 +def HEX(s1) 253 2040 0 Y 0 0 8 +s1 s2 s3 s4 HEX(s1) +SET NAMES utf8mb4; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 252 0 Y 16 0 45 +def test t1 t1 s2 s2 252 65532 0 Y 16 0 45 +def test t1 t1 s3 s3 252 16777212 0 Y 16 0 45 +def test t1 t1 s4 s4 252 4294967292 0 Y 16 0 45 +def HEX(s1) 253 8160 0 Y 0 0 45 +s1 s2 s3 s4 HEX(s1) +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `CONCAT(s1)` varchar(255) CHARACTER SET utf32 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1, t2; +# # End of 5.5 tests # diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result index 03040d1676c..a4e7c4ef53a 100644 --- a/mysql-test/r/ctype_utf8.result +++ b/mysql-test/r/ctype_utf8.result @@ -2041,3 +2041,52 @@ predicted_order hex(utf8_encoding) 101 E0B78AE2808DE0B6BB DROP TABLE t1; End of 5.4 tests +# +# Start of 5.5 tests +# +# +# Bug#52520 Difference in tinytext utf column metadata +# +CREATE TABLE t1 ( +s1 TINYTEXT CHARACTER SET utf8, +s2 TEXT CHARACTER SET utf8, +s3 MEDIUMTEXT CHARACTER SET utf8, +s4 LONGTEXT CHARACTER SET utf8 +); +SET NAMES utf8, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 33 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 33 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 33 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33 +def HEX(s1) 253 4590 0 Y 0 0 33 +s1 s2 s3 s4 HEX(s1) +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 8 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 8 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 8 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 8 +def HEX(s1) 253 1530 0 Y 0 0 8 +s1 s2 s3 s4 HEX(s1) +SET NAMES utf8; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 765 0 Y 16 0 33 +def test t1 t1 s2 s2 252 196605 0 Y 16 0 33 +def test t1 t1 s3 s3 252 50331645 0 Y 16 0 33 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 33 +def HEX(s1) 253 4590 0 Y 0 0 33 +s1 s2 s3 s4 HEX(s1) +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `CONCAT(s1)` varchar(255) CHARACTER SET utf8 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1, t2; +# +# End of 5.5 tests +# diff --git a/mysql-test/r/ctype_utf8mb4.result b/mysql-test/r/ctype_utf8mb4.result index 4de7a192546..454c9d4bfbb 100644 --- a/mysql-test/r/ctype_utf8mb4.result +++ b/mysql-test/r/ctype_utf8mb4.result @@ -2471,6 +2471,49 @@ abc𐐀def 𐐀 DROP TABLE t1; # +# Bug#52520 Difference in tinytext utf column metadata +# +CREATE TABLE t1 ( +s1 TINYTEXT CHARACTER SET utf8mb4, +s2 TEXT CHARACTER SET utf8mb4, +s3 MEDIUMTEXT CHARACTER SET utf8mb4, +s4 LONGTEXT CHARACTER SET utf8mb4 +); +SET NAMES utf8mb4, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 45 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 45 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 45 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 45 +def HEX(s1) 253 8160 0 Y 0 0 45 +s1 s2 s3 s4 HEX(s1) +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 255 0 Y 16 0 8 +def test t1 t1 s2 s2 252 65535 0 Y 16 0 8 +def test t1 t1 s3 s3 252 16777215 0 Y 16 0 8 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 8 +def HEX(s1) 253 2040 0 Y 0 0 8 +s1 s2 s3 s4 HEX(s1) +SET NAMES utf8mb4; +SELECT *, HEX(s1) FROM t1; +Catalog Database Table Table_alias Column Column_alias Type Length Max length Is_null Flags Decimals Charsetnr +def test t1 t1 s1 s1 252 1020 0 Y 16 0 45 +def test t1 t1 s2 s2 252 262140 0 Y 16 0 45 +def test t1 t1 s3 s3 252 67108860 0 Y 16 0 45 +def test t1 t1 s4 s4 252 4294967295 0 Y 16 0 45 +def HEX(s1) 253 8160 0 Y 0 0 45 +s1 s2 s3 s4 HEX(s1) +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +Table Create Table +t2 CREATE TABLE `t2` ( + `CONCAT(s1)` varchar(255) CHARACTER SET utf8mb4 DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1, t2; +# # End of 5.5 tests # # diff --git a/mysql-test/t/ctype_utf16.test b/mysql-test/t/ctype_utf16.test index b997bde6e7c..e9c7e569250 100644 --- a/mysql-test/t/ctype_utf16.test +++ b/mysql-test/t/ctype_utf16.test @@ -723,6 +723,27 @@ DROP TABLE t1; SET max_sort_length=DEFAULT; SET NAMES latin1; +--echo # +--echo # Bug#52520 Difference in tinytext utf column metadata +--echo # +CREATE TABLE t1 ( + s1 TINYTEXT CHARACTER SET utf16, + s2 TEXT CHARACTER SET utf16, + s3 MEDIUMTEXT CHARACTER SET utf16, + s4 LONGTEXT CHARACTER SET utf16 +); +--enable_metadata +SET NAMES utf8, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +SET NAMES utf8; +SELECT *, HEX(s1) FROM t1; +--disable_metadata +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +DROP TABLE t1, t2; + # ## TODO: add tests for all engines diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index f1e17532b88..96f1a341d38 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -779,6 +779,27 @@ DROP TABLE t1; SET max_sort_length=DEFAULT; SET NAMES latin1; +--echo # +--echo # Bug#52520 Difference in tinytext utf column metadata +--echo # +CREATE TABLE t1 ( + s1 TINYTEXT CHARACTER SET utf32, + s2 TEXT CHARACTER SET utf32, + s3 MEDIUMTEXT CHARACTER SET utf32, + s4 LONGTEXT CHARACTER SET utf32 +); +--enable_metadata +SET NAMES utf8mb4, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +SET NAMES utf8mb4; +SELECT *, HEX(s1) FROM t1; +--disable_metadata +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +DROP TABLE t1, t2; + --echo # --echo # End of 5.5 tests --echo # diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test index 201e96b0b09..f2287488a4f 100644 --- a/mysql-test/t/ctype_utf8.test +++ b/mysql-test/t/ctype_utf8.test @@ -1480,3 +1480,31 @@ DROP TABLE t1; --echo End of 5.4 tests +--echo # +--echo # Start of 5.5 tests +--echo # + +--echo # +--echo # Bug#52520 Difference in tinytext utf column metadata +--echo # +CREATE TABLE t1 ( + s1 TINYTEXT CHARACTER SET utf8, + s2 TEXT CHARACTER SET utf8, + s3 MEDIUMTEXT CHARACTER SET utf8, + s4 LONGTEXT CHARACTER SET utf8 +); +--enable_metadata +SET NAMES utf8, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +SET NAMES utf8; +SELECT *, HEX(s1) FROM t1; +--disable_metadata +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +DROP TABLE t1, t2; + +--echo # +--echo # End of 5.5 tests +--echo # diff --git a/mysql-test/t/ctype_utf8mb4.test b/mysql-test/t/ctype_utf8mb4.test index f396d36e5b0..8fcba92ff47 100644 --- a/mysql-test/t/ctype_utf8mb4.test +++ b/mysql-test/t/ctype_utf8mb4.test @@ -1789,6 +1789,27 @@ SELECT hex(subject), length(subject), char_length(subject), octet_length(subject SELECT subject FROM t1 ORDER BY 1; DROP TABLE t1; +--echo # +--echo # Bug#52520 Difference in tinytext utf column metadata +--echo # +CREATE TABLE t1 ( + s1 TINYTEXT CHARACTER SET utf8mb4, + s2 TEXT CHARACTER SET utf8mb4, + s3 MEDIUMTEXT CHARACTER SET utf8mb4, + s4 LONGTEXT CHARACTER SET utf8mb4 +); +--enable_metadata +SET NAMES utf8mb4, @@character_set_results=NULL; +SELECT *, HEX(s1) FROM t1; +SET NAMES latin1; +SELECT *, HEX(s1) FROM t1; +SET NAMES utf8mb4; +SELECT *, HEX(s1) FROM t1; +--disable_metadata +CREATE TABLE t2 AS SELECT CONCAT(s1) FROM t1; +SHOW CREATE TABLE t2; +DROP TABLE t1, t2; + --echo # --echo # End of 5.5 tests --echo # diff --git a/sql/field.cc b/sql/field.cc index ee7d91c1fb6..ac40ae53d7c 100644 --- a/sql/field.cc +++ b/sql/field.cc @@ -9956,6 +9956,39 @@ Create_field::Create_field(Field *old_field,Field *orig_field) } +/** + maximum possible character length for blob. + + This method is used in Item_field::set_field to calculate + max_length for Item. + + For example: + CREATE TABLE t2 SELECT CONCAT(tinyblob_utf8_column) FROM t1; + must create a "VARCHAR(255) CHARACTER SET utf8" column. + + @return + length +*/ + +uint32 Field_blob::char_length() +{ + switch (packlength) + { + case 1: + return 255; + case 2: + return 65535; + case 3: + return 16777215; + case 4: + return (uint32) 4294967295U; + default: + DBUG_ASSERT(0); // we should never go here + return 0; + } +} + + /** maximum possible display length for blob. diff --git a/sql/field.h b/sql/field.h index 66b13d02b89..46d8a2aa6d9 100644 --- a/sql/field.h +++ b/sql/field.h @@ -499,7 +499,7 @@ public: longlong convert_decimal2longlong(const my_decimal *val, bool unsigned_flag, int *err); /* The max. number of characters */ - inline uint32 char_length() const + virtual uint32 char_length() { return field_length / charset()->mbmaxlen; } @@ -1813,6 +1813,7 @@ public: bool has_charset(void) const { return charset() == &my_charset_bin ? FALSE : TRUE; } uint32 max_display_length(); + uint32 char_length(); uint is_equal(Create_field *new_field); inline bool in_read_set() { return bitmap_is_set(table->read_set, field_index); } inline bool in_write_set() { return bitmap_is_set(table->write_set, field_index); } diff --git a/sql/item.h b/sql/item.h index e441a6ff261..e18fa43037a 100644 --- a/sql/item.h +++ b/sql/item.h @@ -34,6 +34,15 @@ void item_init(void); /* Init item functions */ class Item_field; class user_var_entry; + +static inline uint32 +char_to_byte_length_safe(uint32 char_length_arg, uint32 mbmaxlen_arg) +{ + ulonglong tmp= ((ulonglong) char_length_arg) * mbmaxlen_arg; + return (tmp > UINT_MAX32) ? (uint32) UINT_MAX32 : (uint32) tmp; +} + + /* "Declared Type Collation" A combination of collation and its derivation. @@ -1171,11 +1180,14 @@ public: { return max_length / collation.collation->mbmaxlen; } void fix_length_and_charset(uint32 max_char_length_arg, CHARSET_INFO *cs) { - max_length= max_char_length_arg * cs->mbmaxlen; + max_length= char_to_byte_length_safe(max_char_length_arg, cs->mbmaxlen); collation.collation= cs; } void fix_char_length(uint32 max_char_length_arg) - { max_length= max_char_length_arg * collation.collation->mbmaxlen; } + { + max_length= char_to_byte_length_safe(max_char_length_arg, + collation.collation->mbmaxlen); + } void fix_length_and_charset_datetime(uint32 max_char_length_arg) { collation.set(&my_charset_numeric, DERIVATION_NUMERIC, MY_REPERTOIRE_ASCII); diff --git a/sql/protocol.cc b/sql/protocol.cc index eeb248012ab..ac78ac88ec6 100644 --- a/sql/protocol.cc +++ b/sql/protocol.cc @@ -747,8 +747,7 @@ bool Protocol::send_result_set_metadata(List *list, uint flags) else { /* With conversion */ - ulonglong max_length; - uint32 field_length; + uint32 field_length, max_length; int2store(pos, thd_charset->number); /* For TEXT/BLOB columns, field_length describes the maximum data @@ -771,9 +770,8 @@ bool Protocol::send_result_set_metadata(List *list, uint flags) field.type <= MYSQL_TYPE_BLOB) ? field.length / item->collation.collation->mbminlen : field.length / item->collation.collation->mbmaxlen; - max_length*= thd_charset->mbmaxlen; - field_length= (max_length > UINT_MAX32) ? - UINT_MAX32 : (uint32) max_length; + field_length= char_to_byte_length_safe(max_length, + thd_charset->mbmaxlen); int4store(pos + 2, field_length); } pos[6]= field.type;