From cb9c49a9b20dd8d1eee39641176134e207a4d84f Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Thu, 21 May 2020 18:16:27 +0400 Subject: [PATCH] MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed The code incorrectly assumed in multiple places that TYPELIB values cannot have 0x00 bytes inside. In fact they can: CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY); Note, the TYPELIB value encoding used in FRM is ambiguous about 0x00. So this fix is partial. It fixes 0x00 bytes in many (but not all) places: - In the middle or in the end of a value: CREATE TABLE t1 (a ENUM(0x6100) ...); CREATE TABLE t1 (a ENUM(0x610062) ...); - In the beginning of the first value: CREATE TABLE t1 (a ENUM(0x0061)); CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b')); - In the beginning of the second (and following) value of the *last* ENUM/SET in the table: CREATE TABLE t1 (a ENUM('a',0x0061)); CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061)); However, it does not fix 0x00 when: - 0x00 byte is in the beginning of a value of a non-last ENUM/SET causes an error: CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b')); ERROR 1033 (HY000): Incorrect information in file: './test/t1.frm' This is an ambuguous case and will be fixed separately. We need a new TYPELIB encoding to fix this. Details: - unireg.cc The function pack_header() incorrectly used strlen() to detect a TYPELIB value length. Adding a new function typelib_values_packed_length() which uses TYPELIB::type_lengths[n] to detect the n-th value length, and reusing the new function in pack_header() and packed_fields_length() - table.cc fix_type_pointers() assumed in multiple places that values cannot have 0x00 inside and used strlen(TYPELIB::type_names[n]) to set the corresponding TYPELIB::type_lengths[n]. Also, fix_type_pointers() did not check the encoded data for consistency. Rewriting fix_type_pointers() code to populate TYPELIB::type_names[n] and TYPELIB::type_lengths[n] at the same time, so no additional loop with strlen() is needed any more. Adding many data consistency tests. Fixing the main loop in fix_type_pointers() to use memchr() instead of strchr() to handle 0x00 properly. Fixing create_key_infos() to return the result in a LEX_STRING rather that in a char*. --- mysql-test/r/ctype_binary.result | 93 +++++++++++++++++ mysql-test/t/ctype_binary.test | 60 +++++++++++ sql/table.cc | 174 +++++++++++++++++++++---------- sql/unireg.cc | 23 ++-- 4 files changed, 288 insertions(+), 62 deletions(-) diff --git a/mysql-test/r/ctype_binary.result b/mysql-test/r/ctype_binary.result index 15b44d07ddf..aad9621966f 100644 --- a/mysql-test/r/ctype_binary.result +++ b/mysql-test/r/ctype_binary.result @@ -3171,5 +3171,98 @@ Warnings: Note 1003 select `test`.`t1`.`a` AS `a` from `test`.`t1` where ((`test`.`t1`.`a` = 'a') and (weight_string(`test`.`t1`.`a`) = 'a')) DROP TABLE t1; # +# MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed +# +CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary'); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a\0','b\0','c\0') CHARACTER SET binary DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1),(2),(3); +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +6100 +6200 +6300 +DROP TABLE t1; +0x00 in the middle or in the end of a value +CREATE TABLE t1 (a ENUM(0x6100)); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a\0') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1); +SELECT HEX(a) FROM t1; +HEX(a) +6100 +DROP TABLE t1; +CREATE TABLE t1 (a ENUM(0x610062)); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a\0b') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1); +SELECT HEX(a) FROM t1; +HEX(a) +610062 +DROP TABLE t1; +0x00 in the beginning of the first value: +CREATE TABLE t1 (a ENUM(0x0061)); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('\0a') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES(1); +SELECT * FROM t1; +a +a +DROP TABLE t1; +CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b')); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('\0a') DEFAULT NULL, + `b` enum('b') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1,1); +SELECT HEX(a), HEX(b) FROM t1; +HEX(a) HEX(b) +0061 62 +DROP TABLE t1; +# 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table: +CREATE TABLE t1 (a ENUM('a',0x0061)); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a','\0a') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1),(2); +SELECT HEX(a) FROM t1 ORDER BY a; +HEX(a) +61 +0061 +DROP TABLE t1; +CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061)); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `a` enum('a') DEFAULT NULL, + `b` enum('b','\0a') DEFAULT NULL +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +INSERT INTO t1 VALUES (1,1); +INSERT INTO t1 VALUES (1,2); +SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b; +HEX(a) HEX(b) +61 62 +61 0061 +DROP TABLE t1; +0x00 in the beginning of a value of a non-last ENUM/SET causes an error: +CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b')); +ERROR HY000: Incorrect information in file: 'DIR/t1.frm' +# # End of 10.1 tests # diff --git a/mysql-test/t/ctype_binary.test b/mysql-test/t/ctype_binary.test index 155d8548f77..19a58180187 100644 --- a/mysql-test/t/ctype_binary.test +++ b/mysql-test/t/ctype_binary.test @@ -74,6 +74,66 @@ EXPLAIN EXTENDED SELECT * FROM t1 WHERE COERCIBILITY(a)=2 AND a='a'; EXPLAIN EXTENDED SELECT * FROM t1 WHERE WEIGHT_STRING(a)='a' AND a='a'; DROP TABLE t1; + +--echo # +--echo # MDEV-22111 ERROR 1064 & 1033 and SIGSEGV on CREATE TABLE w/ various charsets on 10.4/5 optimized builds | Assertion `(uint) (table_check_constraints - share->check_constraints) == (uint) (share->table_check_constraints - share->field_check_constraints)' failed +--echo # + +CREATE TABLE t1(a ENUM(0x6100,0x6200,0x6300) CHARACTER SET 'Binary'); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1),(2),(3); +SELECT HEX(a) FROM t1 ORDER BY a; +DROP TABLE t1; + +--echo 0x00 in the middle or in the end of a value + +CREATE TABLE t1 (a ENUM(0x6100)); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a ENUM(0x610062)); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1); +SELECT HEX(a) FROM t1; +DROP TABLE t1; + +--echo 0x00 in the beginning of the first value: + +CREATE TABLE t1 (a ENUM(0x0061)); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES(1); +SELECT * FROM t1; +DROP TABLE t1; + +CREATE TABLE t1 (a ENUM(0x0061), b ENUM('b')); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1,1); +SELECT HEX(a), HEX(b) FROM t1; +DROP TABLE t1; + +--echo # 0x00 in the beginning of the second (and following) value of the *last* ENUM/SET in the table: + +CREATE TABLE t1 (a ENUM('a',0x0061)); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1),(2); +SELECT HEX(a) FROM t1 ORDER BY a; +DROP TABLE t1; + +CREATE TABLE t1 (a ENUM('a'), b ENUM('b',0x0061)); +SHOW CREATE TABLE t1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t1 VALUES (1,2); +SELECT HEX(a), HEX(b) FROM t1 ORDER BY a, b; +DROP TABLE t1; + +--echo 0x00 in the beginning of a value of a non-last ENUM/SET causes an error: +--replace_regex /'.*t1.frm'/'DIR\/t1.frm'/ +--error ER_NOT_FORM_FILE +CREATE TABLE t1 (a ENUM('a',0x0061), b ENUM('b')); + + --echo # --echo # End of 10.1 tests --echo # diff --git a/sql/table.cc b/sql/table.cc index f1a4b322a80..192faa0d00e 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -64,8 +64,11 @@ LEX_STRING parse_vcol_keyword= { C_STRING_WITH_LEN("PARSE_VCOL_EXPR ") }; /* Functions defined in this file */ -static void fix_type_pointers(const char ***array, TYPELIB *point_to_type, - uint types, char **names); +static bool fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *names, size_t names_length); + static uint find_field(Field **fields, uchar *record, uint start, uint length); inline bool is_system_table_name(const char *name, uint length); @@ -670,7 +673,8 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end, uint keys, KEY *keyinfo, uint new_frm_ver, uint &ext_key_parts, TABLE_SHARE *share, uint len, - KEY *first_keyinfo, char* &keynames) + KEY *first_keyinfo, + LEX_STRING *keynames) { uint i, j, n_length; KEY_PART_INFO *key_part= NULL; @@ -813,10 +817,13 @@ static bool create_key_infos(const uchar *strpos, const uchar *frm_image_end, } share->ext_key_parts+= keyinfo->ext_key_parts; } - keynames=(char*) key_part; - strpos+= strnmov(keynames, (char *) strpos, frm_image_end - strpos) - keynames; + keynames->str= (char*) key_part; + keynames->length= strnmov(keynames->str, (char *) strpos, + frm_image_end - strpos) - keynames->str; + strpos+= keynames->length; if (*strpos++) // key names are \0-terminated return 1; + keynames->length++; // Include '\0', to make fix_type_pointers() happy. //reading index comments for (keyinfo= share->key_info, i=0; i < keys; i++, keyinfo++) @@ -918,12 +925,14 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, TABLE_SHARE *share= this; uint new_frm_ver, field_pack_length, new_field_pack_flag; uint interval_count, interval_parts, read_length, int_length; + uint total_typelib_value_count; uint db_create_options, keys, key_parts, n_length; uint com_length, null_bit_pos; uint extra_rec_buf_length; uint i; bool use_hash; - char *keynames, *names, *comment_pos; + LEX_STRING keynames= {NULL, 0}; + char *names, *comment_pos; const uchar *forminfo, *extra2; const uchar *frm_image_end = frm_image + frm_length; uchar *record, *null_flags, *null_pos; @@ -935,6 +944,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, KEY_PART_INFO *key_part= NULL; Field **field_ptr, *reg_field; const char **interval_array; + uint *typelib_value_lengths= NULL; enum legacy_db_type legacy_db_type; my_bitmap_map *bitmaps; bool null_bits_are_used; @@ -1237,7 +1247,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, new_frm_ver, ext_key_parts, - share, len, &first_keyinfo, keynames)) + share, len, &first_keyinfo, &keynames)) goto err; if (next_chunk + 5 < buff_end) @@ -1330,7 +1340,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, { if (create_key_infos(disk_buff + 6, frm_image_end, keys, keyinfo, new_frm_ver, ext_key_parts, - share, len, &first_keyinfo, keynames)) + share, len, &first_keyinfo, &keynames)) goto err; } @@ -1372,6 +1382,24 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, DBUG_PRINT("info",("i_count: %d i_parts: %d index: %d n_length: %d int_length: %d com_length: %d vcol_screen_length: %d", interval_count,interval_parts, keys,n_length,int_length, com_length, vcol_screen_length)); + /* + We load the following things into TYPELIBs: + - One TYPELIB for field names + - interval_count TYPELIBs for ENUM/SET values + - One TYPELIB for key names + Every TYPELIB requires one extra value with a NULL pointer and zero length, + which is the end-of-values marker. + TODO-10.5+: + Note, we should eventually reuse this total_typelib_value_count + to allocate interval_array. The above code reserves less space + than total_typelib_value_count pointers. So it seems `interval_array` + and `names` overlap in the memory. Too dangerous to fix in 10.1. + */ + total_typelib_value_count= + (share->fields + 1/*end-of-values marker*/) + + (interval_parts + interval_count/*end-of-values markers*/) + + (keys + 1/*end-of-values marker*/); + if (!(field_ptr = (Field **) alloc_root(&share->mem_root, @@ -1383,6 +1411,12 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, vcol_screen_length))))) goto err; /* purecov: inspected */ + + if (!(typelib_value_lengths= (uint *) alloc_root(&share->mem_root, + total_typelib_value_count * + sizeof(uint *)))) + goto err; + share->field= field_ptr; read_length=(uint) (share->fields * field_pack_length + pos+ (uint) (n_length+int_length+com_length+ @@ -1391,6 +1425,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->intervals= (TYPELIB*) (field_ptr+share->fields+1); interval_array= (const char **) (share->intervals+interval_count); + // This looks wrong: shouldn't it be (+2+interval_count) instread of (+3) ? names= (char*) (interval_array+share->fields+interval_parts+keys+3); if (!interval_count) share->intervals= 0; // For better debugging @@ -1403,34 +1438,21 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, memcpy(vcol_screen_pos, disk_buff+read_length-vcol_screen_length, vcol_screen_length); - fix_type_pointers(&interval_array, &share->fieldnames, 1, &names); - if (share->fieldnames.count != share->fields) + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->fieldnames, 1, names, n_length) || + share->fieldnames.count != share->fields) goto err; - fix_type_pointers(&interval_array, share->intervals, interval_count, - &names); - { - /* Set ENUM and SET lengths */ - TYPELIB *interval; - for (interval= share->intervals; - interval < share->intervals + interval_count; - interval++) - { - uint count= (uint) (interval->count + 1) * sizeof(uint); - if (!(interval->type_lengths= (uint *) alloc_root(&share->mem_root, - count))) - goto err; - for (count= 0; count < interval->count; count++) - { - char *val= (char*) interval->type_names[count]; - interval->type_lengths[count]= strlen(val); - } - interval->type_lengths[count]= 0; - } - } + if (fix_type_pointers(&interval_array, &typelib_value_lengths, + share->intervals, interval_count, + names + n_length, int_length)) + goto err; - if (keynames) - fix_type_pointers(&interval_array, &share->keynames, 1, &keynames); + if (keynames.length && + (fix_type_pointers(&interval_array, &typelib_value_lengths, + &share->keynames, 1, keynames.str, keynames.length) || + share->keynames.count != keys)) + goto err; /* Allocate handler */ if (!(handler_file= get_new_handler(share, thd->mem_root, @@ -3216,37 +3238,81 @@ void open_table_error(TABLE_SHARE *share, enum open_frm_error error, ** with a '\0' */ -static void -fix_type_pointers(const char ***array, TYPELIB *point_to_type, uint types, - char **names) +static bool +fix_type_pointers(const char ***typelib_value_names, + uint **typelib_value_lengths, + TYPELIB *point_to_type, uint types, + char *ptr, size_t length) { - char *type_name, *ptr; - char chr; + const char *end= ptr + length; - ptr= *names; while (types--) { + char sep; point_to_type->name=0; - point_to_type->type_names= *array; + point_to_type->type_names= *typelib_value_names; + point_to_type->type_lengths= *typelib_value_lengths; - if ((chr= *ptr)) /* Test if empty type */ + /* + Typelib can be encoded as: + 1) 0x00 - empty typelib + 2) 0xFF 0x00 - empty typelib (index names) + 3) sep (value sep)... 0x00 - non-empty typelib (where sep is a separator) + */ + if (length == 2 && ptr[0] == (char) 0xFF && ptr[1] == '\0') { - while ((type_name=strchr(ptr+1,chr)) != NullS) - { - *((*array)++) = ptr+1; - *type_name= '\0'; /* End string */ - ptr=type_name; - } - ptr+=2; /* Skip end mark and last 0 */ + /* + This is a special case #2. + If there are no indexes at all, index names can be encoded + as a two byte sequence: 0xFF 0x00 + TODO: Check if it's a bug in the FRM packing routine. + It should probably write just 0x00 instead of 0xFF00. + */ + ptr+= 2; } - else - ptr++; - point_to_type->count= (uint) (*array - point_to_type->type_names); + else if ((sep= *ptr++)) // A non-empty typelib + { + for ( ; ptr < end; ) + { + // Now scan the next value+sep pair + char *vend= (char*) memchr(ptr, sep, end - ptr); + if (!vend) + return true; // Bad format + *((*typelib_value_names)++)= ptr; + *((*typelib_value_lengths)++)= vend - ptr; + *vend= '\0'; // Change sep to '\0' + ptr= vend + 1; // Shift from sep to the next byte + /* + Now we can have either: + - the end-of-typelib marker (0x00) + - more value+sep pairs + */ + if (!*ptr) + { + /* + We have an ambiguity here. 0x00 can be an end-of-typelib marker, + but it can also be a part of the next value: + CREATE TABLE t1 (a ENUM(0x61, 0x0062) CHARACTER SET BINARY); + If this is the last ENUM/SET in the table and there is still more + packed data left after 0x00, then we know for sure that 0x00 + is a part of the next value. + TODO-10.5+: we should eventually introduce a new unambiguous + typelib encoding for FRM. + */ + if (!types && ptr + 1 < end) + continue; // A binary value starting with 0x00 + ptr++; // Consume the end-of-typelib marker + break; // End of the current typelib + } + } + } + point_to_type->count= (uint) (*typelib_value_names - + point_to_type->type_names); point_to_type++; - *((*array)++)= NullS; /* End of type */ + *((*typelib_value_names)++)= NullS; /* End of type */ + *((*typelib_value_lengths)++)= 0; /* End of type */ } - *names=ptr; /* Update end */ - return; + return ptr != end; } /* fix_type_pointers */ diff --git a/sql/unireg.cc b/sql/unireg.cc index b116218b60e..2ccc440f71b 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -498,6 +498,18 @@ static uint pack_keys(uchar *keybuff, uint key_count, KEY *keyinfo, } /* pack_keys */ +static uint typelib_values_packed_length(const TYPELIB *t) +{ + uint length= 0; + for (uint i= 0; t->type_names[i]; i++) + { + length+= t->type_lengths[i]; + length++; /* Separator */ + } + return length; +} + + /* Make formheader */ static bool pack_header(THD *thd, uchar *forminfo, @@ -619,9 +631,8 @@ static bool pack_header(THD *thd, uchar *forminfo, field->interval_id=get_interval_id(&int_count,create_fields,field); if (old_int_count != int_count) { - for (const char **pos=field->interval->type_names ; *pos ; pos++) - int_length+=(uint) strlen(*pos)+1; // field + suffix prefix - int_parts+=field->interval->count+1; + int_length+= typelib_values_packed_length(field->interval); + int_parts+= field->interval->count + 1; } } if (f_maybe_null(field->pack_flag)) @@ -710,11 +721,7 @@ static size_t packed_fields_length(List &create_fields) { int_count= field->interval_id; length++; - for (int i=0; field->interval->type_names[i]; i++) - { - length+= field->interval->type_lengths[i]; - length++; - } + length+= typelib_values_packed_length(field->interval); length++; } if (field->vcol_info)