From 76e0dc18b66025c273c5ec428e0e08d39f7cc8bf Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Tue, 4 Jun 2024 12:00:20 +0400 Subject: [PATCH] MDEV-34288 SET NAMES DEFAULT crashes `mariadbd --collation-server=utf8mb4_unicode_ci` The @@global.character_set_client variable could erroneously be set to a non-default collation of its character set, which further made the `SET NAMES DEFAULT` statement crash the server. Fixing the code to make sure that the global value these variables: @@character_set_client @@character_set_connection @@character_set_server @@character_set_database @@character_set_connection point to the default compiled collations of the character set. --- .../main/ctype_utf8mb4_unicode_ci_def.result | 45 +++++++++++++++++++ .../main/ctype_utf8mb4_unicode_ci_def.test | 42 +++++++++++++++++ sql/lex_charset.h | 6 +++ sql/mysqld.cc | 6 ++- sql/sql_lex.cc | 23 +++++++++- sql/sql_lex.h | 3 +- sql/sql_yacc.yy | 11 ++--- sql/sys_vars.cc | 11 ++--- sql/sys_vars.inl | 25 +++++++++++ 9 files changed, 156 insertions(+), 16 deletions(-) diff --git a/mysql-test/main/ctype_utf8mb4_unicode_ci_def.result b/mysql-test/main/ctype_utf8mb4_unicode_ci_def.result index 2e15931248b..a80d6160ed8 100644 --- a/mysql-test/main/ctype_utf8mb4_unicode_ci_def.result +++ b/mysql-test/main/ctype_utf8mb4_unicode_ci_def.result @@ -9,3 +9,48 @@ DROP TABLE t1; # # End of 10.3 tests # +# +# Start of 10.11 tests +# +# +# MDEV-34288 SET NAMES DEFAULT crashes `mariadbd --collation-server=utf8mb4_unicode_ci` +# +SET NAMES DEFAULT COLLATE latin1_bin; +ERROR 42000: COLLATION 'latin1_bin' is not valid for CHARACTER SET 'utf8mb4' +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +latin1 latin1_swedish_ci latin1 +SET NAMES DEFAULT COLLATE utf8mb4_bin; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb4 utf8mb4_bin utf8mb4 +SET NAMES DEFAULT COLLATE uca1400_ai_ci; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb4 utf8mb4_uca1400_ai_ci utf8mb4 +SET @@global.character_set_client=latin1; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +latin1 latin1_swedish_ci latin1 +SET @@global.character_set_client=utf8mb3; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb3 utf8mb3_general_ci utf8mb3 +SET @@global.character_set_client=DEFAULT; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb4 utf8mb4_general_ci utf8mb4 +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb4 utf8mb4_general_ci utf8mb4 +SET NAMES DEFAULT COLLATE DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; +@@character_set_connection @@collation_connection @@character_set_results +utf8mb4 utf8mb4_general_ci utf8mb4 +# +# End of 10.11 tests +# diff --git a/mysql-test/main/ctype_utf8mb4_unicode_ci_def.test b/mysql-test/main/ctype_utf8mb4_unicode_ci_def.test index fb7fbe04e3b..3e55d1e6b1e 100644 --- a/mysql-test/main/ctype_utf8mb4_unicode_ci_def.test +++ b/mysql-test/main/ctype_utf8mb4_unicode_ci_def.test @@ -13,3 +13,45 @@ DROP TABLE t1; --echo # --echo # End of 10.3 tests --echo # + + +--echo # +--echo # Start of 10.11 tests +--echo # + +--echo # +--echo # MDEV-34288 SET NAMES DEFAULT crashes `mariadbd --collation-server=utf8mb4_unicode_ci` +--echo # + +--error ER_COLLATION_CHARSET_MISMATCH +SET NAMES DEFAULT COLLATE latin1_bin; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET NAMES DEFAULT COLLATE utf8mb4_bin; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET NAMES DEFAULT COLLATE uca1400_ai_ci; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET @@global.character_set_client=latin1; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET @@global.character_set_client=utf8mb3; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET @@global.character_set_client=DEFAULT; +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET NAMES DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + +SET NAMES DEFAULT COLLATE DEFAULT; +SELECT @@character_set_connection, @@collation_connection, @@character_set_results; + + +--echo # +--echo # End of 10.11 tests +--echo # diff --git a/sql/lex_charset.h b/sql/lex_charset.h index 2bbeff8a4a6..f593681d27b 100644 --- a/sql/lex_charset.h +++ b/sql/lex_charset.h @@ -288,6 +288,12 @@ public: DBUG_ASSERT(0); return m_ci->coll_name; } + static Lex_extended_collation_st collate_default() + { + Lex_extended_collation_st res; + res.set_collate_default(); + return res; + } void set_collate_default() { m_ci= &my_collation_contextually_typed_default; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index d0619aa26c2..5ae30282729 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -4329,8 +4329,10 @@ static int init_common_variables() if (is_supported_parser_charset(default_charset_info)) { global_system_variables.collation_connection= default_charset_info; - global_system_variables.character_set_results= default_charset_info; - global_system_variables.character_set_client= default_charset_info; + global_system_variables.character_set_results= + global_system_variables.character_set_client= + Lex_exact_charset_opt_extended_collate(default_charset_info, true). + find_default_collation(); } else { diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index 7d3810dd326..bbae1f3da99 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -12131,10 +12131,31 @@ bool LEX::sp_create_set_password_instr(THD *thd, } +/* + Handle the SET NAMES statement variants, e.g.: + SET NAMES DEFAULT; + SET NAMES DEFAULT COLLATE DEFAULT; + SET NAMES DEFAULT COLLATE latin1_bin; + SET NAMES latin1; + SET NAMES latin1 COLLATE DEFAULT; + SET NAMES latin1 COLLATE latin1_bin; + SET NAMES utf8mb4 COLLATE uca1400_ai_ci; + + @param pos - The position of the keyword `NAMES` inside the query + @param cs - The character set part, or nullptr if DEFAULT + @param cl - The collation (explicit or contextually typed) + @param no_lookahead - The tokinizer lookahead state +*/ bool LEX::set_names(const char *pos, - const Lex_exact_charset_opt_extended_collate &cscl, + CHARSET_INFO *cs, + const Lex_extended_collation_st &cl, bool no_lookahead) { + CHARSET_INFO *def= global_system_variables.character_set_client; + Lex_exact_charset_opt_extended_collate cscl(cs ? cs : def, true); + if (cscl.merge_collation_override(cl)) + return true; + if (sp_create_assignment_lex(thd, pos)) return true; CHARSET_INFO *ci= cscl.collation().charset_info(); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index 73f7e557c49..8eb7c20eb50 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3873,7 +3873,8 @@ public: int case_stmt_action_then(); bool setup_select_in_parentheses(); bool set_names(const char *pos, - const Lex_exact_charset_opt_extended_collate &cs, + CHARSET_INFO *cs, + const Lex_extended_collation_st &coll, bool no_lookahead); bool set_trigger_new_row(const LEX_CSTRING *name, Item *val); bool set_trigger_field(const LEX_CSTRING *name1, const LEX_CSTRING *name2, diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 577c90e660a..cfd31635643 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -16671,18 +16671,15 @@ option_value_no_option_type: } | NAMES_SYM charset_name_or_default { - CHARSET_INFO *def= global_system_variables.character_set_client; - Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); - if (Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) + if (Lex->set_names($1.pos(), $2, + Lex_extended_collation_st::collate_default(), + yychar == YYEMPTY)) MYSQL_YYABORT; } | NAMES_SYM charset_name_or_default COLLATE_SYM collation_name_or_default { - CHARSET_INFO *def= global_system_variables.character_set_client; - Lex_exact_charset_opt_extended_collate tmp($2 ? $2 : def, false); - if (tmp.merge_collation($4) || - Lex->set_names($1.pos(), tmp, yychar == YYEMPTY)) + if (Lex->set_names($1.pos(), $2, $4, yychar == YYEMPTY)) MYSQL_YYABORT; } | DEFAULT ROLE_SYM grant_role diff --git a/sql/sys_vars.cc b/sql/sys_vars.cc index 8057ecb8626..418507d5c64 100644 --- a/sql/sys_vars.cc +++ b/sql/sys_vars.cc @@ -824,7 +824,7 @@ static Sys_var_struct Sys_character_set_system( READ_ONLY GLOBAL_VAR(system_charset_info), NO_CMD_LINE, offsetof(CHARSET_INFO, cs_name.str), DEFAULT(0)); -static Sys_var_struct Sys_character_set_server( +static Sys_var_charset Sys_character_set_server( "character_set_server", "The default character set", SESSION_VAR(collation_server), NO_CMD_LINE, offsetof(CHARSET_INFO, cs_name.str), DEFAULT(&default_charset_info), @@ -838,7 +838,7 @@ static bool check_charset_db(sys_var *self, THD *thd, set_var *var) var->save_result.ptr= thd->db_charset; return false; } -static Sys_var_struct Sys_character_set_database( +static Sys_var_charset Sys_character_set_database( "character_set_database", "The character set used by the default database", SESSION_VAR(collation_database), NO_CMD_LINE, @@ -862,7 +862,8 @@ static bool fix_thd_charset(sys_var *self, THD *thd, enum_var_type type) thd->update_charset(); return false; } -static Sys_var_struct Sys_character_set_client( + +static Sys_var_charset Sys_character_set_client( "character_set_client", "The character set for statements " "that arrive from the client", NO_SET_STMT SESSION_VAR(character_set_client), NO_CMD_LINE, @@ -872,7 +873,7 @@ static Sys_var_struct Sys_character_set_client( // for check changing export sys_var *Sys_character_set_client_ptr= &Sys_character_set_client; -static Sys_var_struct Sys_character_set_connection( +static Sys_var_charset Sys_character_set_connection( "character_set_connection", "The character set used for " "literals that do not have a character set introducer and for " "number-to-string conversion", @@ -883,7 +884,7 @@ static Sys_var_struct Sys_character_set_connection( // for check changing export sys_var *Sys_character_set_connection_ptr= &Sys_character_set_connection; -static Sys_var_struct Sys_character_set_results( +static Sys_var_charset Sys_character_set_results( "character_set_results", "The character set used for returning " "query results to the client", SESSION_VAR(character_set_results), NO_CMD_LINE, diff --git a/sql/sys_vars.inl b/sql/sys_vars.inl index 932b12fcb2e..655d72a6663 100644 --- a/sql/sys_vars.inl +++ b/sql/sys_vars.inl @@ -2166,6 +2166,31 @@ public: { return valptr(thd, *(uchar**)option.def_value); } }; + +/** + The class to store character sets. +*/ +class Sys_var_charset: public Sys_var_struct +{ +public: + using Sys_var_struct::Sys_var_struct; + void global_save_default(THD *thd, set_var *var) + { + /* + The default value can point to an arbitrary collation, + e.g. default_charset_info. + Let's convert it to the compiled default collation. + This makes the code easier in various places such as SET NAMES. + */ + void **default_value= reinterpret_cast(option.def_value); + var->save_result.ptr= + Lex_exact_charset_opt_extended_collate((CHARSET_INFO *) *default_value, + true). + find_default_collation(); + } +}; + + /** The class for variables that store time zones