From 0f56e21efa68ba3b37d1171d001c21845c3d2b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 16 Mar 2022 11:49:47 +0200 Subject: [PATCH 01/52] MDEV-28091 PERFORMANCE_SCHEMA unit tests fail due to memory misalignment Let us make the mocked-up pfs_malloc() return aligned memory, just like the actual implementation does. --- storage/perfschema/unittest/stub_pfs_global.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/storage/perfschema/unittest/stub_pfs_global.h b/storage/perfschema/unittest/stub_pfs_global.h index 8a1f9216ba2..b7adbe33504 100644 --- a/storage/perfschema/unittest/stub_pfs_global.h +++ b/storage/perfschema/unittest/stub_pfs_global.h @@ -1,4 +1,5 @@ /* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved. + Copyright (c) 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License, version 2.0, @@ -24,6 +25,9 @@ #include #include #include +#ifdef HAVE_MEMALIGN +# include +#endif bool pfs_initialized= false; @@ -43,7 +47,17 @@ void *pfs_malloc(size_t size, myf) if (--stub_alloc_fails_after_count <= 0) return NULL; +#ifndef PFS_ALIGNEMENT void *ptr= malloc(size); +#elif defined HAVE_MEMALIGN + void *ptr= memalign(PFS_ALIGNEMENT, size); +#elif defined HAVE_ALIGNED_MALLOC + void *ptr= _aligned_malloc(size, PFS_ALIGNEMENT); +#else + void *ptr; + if (posix_memalign(&ptr, PFS_ALIGNEMENT, size)) + ptr= NULL; +#endif if (ptr != NULL) memset(ptr, 0, size); return ptr; From 75e39f3cba01f9f3a835d8e311658bcbf0453d5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 17 Mar 2022 10:13:50 +0200 Subject: [PATCH 02/52] Fix gcc-12 -O2 -Wmaybe-uninitialized --- storage/innobase/fil/fil0fil.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 8075defac4c..6980078f87d 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2021, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3316,10 +3316,11 @@ fil_make_filepath( if (path != NULL) { memcpy(full_name, path, path_len); len = path_len; - full_name[len] = '\0'; - os_normalize_path(full_name); } + full_name[len] = '\0'; + os_normalize_path(full_name); + if (trim_name) { /* Find the offset of the last DIR separator and set it to null in order to strip off the old basename from this path. */ From 118826d1734bc4f650f9ec96b3d0d885eedba9c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 17 Mar 2022 10:20:07 +0200 Subject: [PATCH 03/52] Fix gcc-12 -O2 -Warray-bounds --- sql/handler.h | 4 +-- sql/sql_table.cc | 43 +++++++++++++++++---------------- storage/innobase/fts/fts0fts.cc | 4 +-- 3 files changed, 25 insertions(+), 26 deletions(-) diff --git a/sql/handler.h b/sql/handler.h index 02a4a76c6c1..27836f1735f 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -2,7 +2,7 @@ #define HANDLER_INCLUDED /* Copyright (c) 2000, 2019, Oracle and/or its affiliates. - Copyright (c) 2009, 2021, MariaDB + Copyright (c) 2009, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -4275,7 +4275,7 @@ static inline const char *ha_resolve_storage_engine_name(const handlerton *db_ty static inline bool ha_check_storage_engine_flag(const handlerton *db_type, uint32 flag) { - return db_type == NULL ? FALSE : MY_TEST(db_type->flags & flag); + return db_type && (db_type->flags & flag); } static inline bool ha_storage_engine_is_enabled(const handlerton *db_type) diff --git a/sql/sql_table.cc b/sql/sql_table.cc index e1f752191ae..a2dc5c97aeb 100644 --- a/sql/sql_table.cc +++ b/sql/sql_table.cc @@ -9191,22 +9191,24 @@ bool mysql_alter_table(THD *thd,char *new_db, char *new_name, create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT; } + handlerton * const old_db_type= table->s->db_type(); + handlerton *new_db_type= create_info->db_type; + DBUG_PRINT("info", ("old type: %s new type: %s", - ha_resolve_storage_engine_name(table->s->db_type()), - ha_resolve_storage_engine_name(create_info->db_type))); - if (ha_check_storage_engine_flag(table->s->db_type(), HTON_ALTER_NOT_SUPPORTED)) + ha_resolve_storage_engine_name(old_db_type), + ha_resolve_storage_engine_name(new_db_type))); + if (ha_check_storage_engine_flag(old_db_type, HTON_ALTER_NOT_SUPPORTED)) { DBUG_PRINT("info", ("doesn't support alter")); - my_error(ER_ILLEGAL_HA, MYF(0), hton_name(table->s->db_type())->str, + my_error(ER_ILLEGAL_HA, MYF(0), hton_name(old_db_type)->str, alter_ctx.db, alter_ctx.table_name); DBUG_RETURN(true); } - if (ha_check_storage_engine_flag(create_info->db_type, - HTON_ALTER_NOT_SUPPORTED)) + if (ha_check_storage_engine_flag(new_db_type, HTON_ALTER_NOT_SUPPORTED)) { DBUG_PRINT("info", ("doesn't support alter")); - my_error(ER_ILLEGAL_HA, MYF(0), hton_name(create_info->db_type)->str, + my_error(ER_ILLEGAL_HA, MYF(0), hton_name(new_db_type)->str, alter_ctx.new_db, alter_ctx.new_name); DBUG_RETURN(true); } @@ -9349,6 +9351,17 @@ do_continue:; DBUG_RETURN(true); } } + /* + If the old table had partitions and we are doing ALTER TABLE ... + engine= , the new table must preserve the original + partitioning. This means that the new engine is still the + partitioning engine, not the engine specified in the parser. + This is discovered in prep_alter_part_table, which in such case + updates create_info->db_type. + It's therefore important that the assignment below is done + after prep_alter_part_table. + */ + new_db_type= create_info->db_type; #endif if (mysql_prepare_alter_table(thd, table, create_info, alter_info, @@ -9424,7 +9437,7 @@ do_continue:; Alter_info::ALTER_TABLE_ALGORITHM_INPLACE) || is_inplace_alter_impossible(table, create_info, alter_info) || IF_PARTITIONING((partition_changed && - !(table->s->db_type()->partition_flags() & HA_USE_AUTO_PARTITION)), 0)) + !(old_db_type->partition_flags() & HA_USE_AUTO_PARTITION)), 0)) { if (alter_info->requested_algorithm == Alter_info::ALTER_TABLE_ALGORITHM_INPLACE) @@ -9441,22 +9454,10 @@ do_continue:; request table rebuild. Set ALTER_RECREATE flag to force table rebuild. */ - if (create_info->db_type == table->s->db_type() && + if (new_db_type == old_db_type && create_info->used_fields & HA_CREATE_USED_ENGINE) alter_info->flags|= Alter_info::ALTER_RECREATE; - /* - If the old table had partitions and we are doing ALTER TABLE ... - engine= , the new table must preserve the original - partitioning. This means that the new engine is still the - partitioning engine, not the engine specified in the parser. - This is discovered in prep_alter_part_table, which in such case - updates create_info->db_type. - It's therefore important that the assignment below is done - after prep_alter_part_table. - */ - handlerton *new_db_type= create_info->db_type; - handlerton *old_db_type= table->s->db_type(); TABLE *new_table= NULL; ha_rows copied=0,deleted=0; diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 3cb15d64e91..6f9349a9c35 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -2313,9 +2313,7 @@ fts_trx_table_create( fts_trx_table_t* ftt; ftt = static_cast( - mem_heap_alloc(fts_trx->heap, sizeof(*ftt))); - - memset(ftt, 0x0, sizeof(*ftt)); + mem_heap_zalloc(fts_trx->heap, sizeof *ftt)); ftt->table = table; ftt->fts_trx = fts_trx; From 22fd31c5883622b5c7451cee74bc5d087d81e112 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Wed, 16 Mar 2022 14:37:55 +0400 Subject: [PATCH 04/52] MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets TYPELIBs for ENUM/SET columns could erroneously undergo redundant hex-unescaping at the table open time. Fix: - Prevent multiple unescaping of the same TYPELIB - Prevent sharing TYPELIBs between columns with different mbminlen --- mysql-test/r/ctype_utf32.result | 25 +++++++++++++++++++++++++ mysql-test/t/ctype_utf32.test | 19 +++++++++++++++++++ sql/table.cc | 20 ++++++++++++++++++-- sql/unireg.cc | 11 ++++++++++- 4 files changed, 72 insertions(+), 3 deletions(-) diff --git a/mysql-test/r/ctype_utf32.result b/mysql-test/r/ctype_utf32.result index 143fff9e419..22cea274182 100644 --- a/mysql-test/r/ctype_utf32.result +++ b/mysql-test/r/ctype_utf32.result @@ -2913,5 +2913,30 @@ t1 CREATE TABLE `t1` ( DROP TABLE t1; SET NAMES utf8; # +# MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets +# +CREATE TABLE t1 ( +c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a', +c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a' +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` enum('a','b') CHARACTER SET utf32 DEFAULT 'a', + `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +CREATE TABLE t1 ( +c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin, +c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci +); +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `c1` enum('00000061','00000062') CHARACTER SET latin1 COLLATE latin1_bin DEFAULT '00000061', + `c2` enum('a','b') CHARACTER SET utf32 DEFAULT 'a' +) ENGINE=MyISAM DEFAULT CHARSET=latin1 +DROP TABLE t1; +# # End of 10.2 tests # diff --git a/mysql-test/t/ctype_utf32.test b/mysql-test/t/ctype_utf32.test index 46ff333b5f7..739096ae9cb 100644 --- a/mysql-test/t/ctype_utf32.test +++ b/mysql-test/t/ctype_utf32.test @@ -1067,6 +1067,25 @@ DROP TABLE t1; SET NAMES utf8; +--echo # +--echo # MDEV-28078 Garbage on multiple equal ENUMs with tricky character sets +--echo # + +CREATE TABLE t1 ( + c1 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a', + c2 ENUM ('a','b') CHARACTER SET utf32 DEFAULT 'a' +); +SHOW CREATE TABLE t1; +DROP TABLE t1; + +CREATE TABLE t1 ( + c1 ENUM ('00000061','00000062') DEFAULT '00000061' COLLATE latin1_bin, + c2 ENUM ('a','b') DEFAULT 'a' COLLATE utf32_general_ci +); +SHOW CREATE TABLE t1; +DROP TABLE t1; + + --echo # --echo # End of 10.2 tests --echo # diff --git a/sql/table.cc b/sql/table.cc index ca6ce02e4f2..1f7b6452303 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -1229,6 +1229,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, plugin_ref se_plugin= 0; MEM_ROOT *old_root= thd->mem_root; Virtual_column_info **table_check_constraints; + bool *interval_unescaped= NULL; DBUG_ENTER("TABLE_SHARE::init_from_binary_frm_image"); keyinfo= &first_keyinfo; @@ -1686,6 +1687,13 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, goto err; + if (interval_count) + { + if (!(interval_unescaped= (bool*) my_alloca(interval_count * sizeof(bool)))) + goto err; + bzero(interval_unescaped, interval_count * sizeof(bool)); + } + field_ptr= share->field; table_check_constraints= share->check_constraints; read_length=(uint) (share->fields * field_pack_length + @@ -1956,11 +1964,17 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, if (share->mysql_version < 100200) pack_flag&= ~FIELDFLAG_LONG_DECIMAL; - if (interval_nr && charset->mbminlen > 1) + if (interval_nr && charset->mbminlen > 1 && + !interval_unescaped[interval_nr - 1]) { - /* Unescape UCS2 intervals from HEX notation */ + /* + Unescape UCS2/UTF16/UTF32 intervals from HEX notation. + Note, ENUM/SET columns with equal value list share a single + copy of TYPELIB. Unescape every TYPELIB only once. + */ TYPELIB *interval= share->intervals + interval_nr - 1; unhex_type2(interval); + interval_unescaped[interval_nr - 1]= true; } #ifndef TO_BE_DELETED_ON_PRODUCTION @@ -2610,6 +2624,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, share->error= OPEN_FRM_OK; thd->status_var.opened_shares++; thd->mem_root= old_root; + my_afree(interval_unescaped); DBUG_RETURN(0); err: @@ -2623,6 +2638,7 @@ int TABLE_SHARE::init_from_binary_frm_image(THD *thd, bool write, open_table_error(share, OPEN_FRM_CORRUPTED, share->open_errno); thd->mem_root= old_root; + my_afree(interval_unescaped); DBUG_RETURN(HA_ERR_NOT_A_TABLE); } diff --git a/sql/unireg.cc b/sql/unireg.cc index 7974255af35..5471290651b 100644 --- a/sql/unireg.cc +++ b/sql/unireg.cc @@ -757,7 +757,16 @@ static uint get_interval_id(uint *int_count,List &create_fields, while ((field=it++) != last_field) { - if (field->interval_id && field->interval->count == interval->count) + /* + ENUM/SET columns with equal value lists share a single + copy of the underlying TYPELIB. + Fields with different mbminlen can't reuse TYPELIBs, because: + - mbminlen==1 are written to FRM as is + - mbminlen>1 are written to FRM in hex-encoded format + */ + if (field->interval_id && + field->interval->count == interval->count && + field->charset->mbminlen == last_field->charset->mbminlen) { const char **a,**b; for (a=field->interval->type_names, b=interval->type_names ; From ecb6f9c894d3ebafeff1c6eb3b65cd248062296f Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 17 Mar 2022 16:57:56 +0100 Subject: [PATCH 05/52] MDEV-28095 crash in multi-update and implicit grouping disallow implicit grouping in multi-update. explicit GROUP BY is not allowed by the grammar. --- mysql-test/main/multi_update_innodb.result | 15 +++++++++++++++ mysql-test/main/multi_update_innodb.test | 19 +++++++++++++++++++ sql/sql_update.cc | 5 +++++ 3 files changed, 39 insertions(+) diff --git a/mysql-test/main/multi_update_innodb.result b/mysql-test/main/multi_update_innodb.result index 2ec7eb3065e..52bbece4fa0 100644 --- a/mysql-test/main/multi_update_innodb.result +++ b/mysql-test/main/multi_update_innodb.result @@ -207,4 +207,19 @@ ERROR 23000: Duplicate entry '0000-00-00 00:00:00' for key 'f2k' DROP VIEW v1; DROP TABLE t3,t4; SET @@sql_mode=@save_sql_mode; +# # End of 10.2 tests +# +# +# MDEV-28095 crash in multi-update and implicit grouping +# +CREATE TABLE t1 (a int) engine=innodb; +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b int); +INSERT INTO t2 VALUES (1),(2); +UPDATE t1 NATURAL JOIN t2 SET a = 1 ORDER BY AVG (a) ; +ERROR HY000: Invalid use of group function +DROP TABLE t1, t2; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/multi_update_innodb.test b/mysql-test/main/multi_update_innodb.test index 04736482011..02f6a7a3316 100644 --- a/mysql-test/main/multi_update_innodb.test +++ b/mysql-test/main/multi_update_innodb.test @@ -243,4 +243,23 @@ DROP VIEW v1; DROP TABLE t3,t4; SET @@sql_mode=@save_sql_mode; +--echo # --echo # End of 10.2 tests +--echo # + +--echo # +--echo # MDEV-28095 crash in multi-update and implicit grouping +--echo # +CREATE TABLE t1 (a int) engine=innodb; +INSERT INTO t1 VALUES (1),(2); +CREATE TABLE t2 (b int); +INSERT INTO t2 VALUES (1),(2); +--error ER_INVALID_GROUP_FUNC_USE +UPDATE t1 NATURAL JOIN t2 SET a = 1 ORDER BY AVG (a) ; +DROP TABLE t1, t2; + + +--echo # +--echo # End of 10.3 tests +--echo # + diff --git a/sql/sql_update.cc b/sql/sql_update.cc index 1e997b75c7d..a6a0b78259d 100644 --- a/sql/sql_update.cc +++ b/sql/sql_update.cc @@ -2150,6 +2150,11 @@ multi_update::initialize_tables(JOIN *join) if (unlikely((thd->variables.option_bits & OPTION_SAFE_UPDATES) && error_if_full_join(join))) DBUG_RETURN(1); + if (join->implicit_grouping) + { + my_error(ER_INVALID_GROUP_FUNC_USE, MYF(0)); + DBUG_RETURN(1); + } main_table=join->join_tab->table; table_to_update= 0; From 8e9e1c397902d3c3fdba7311c5889f9ed12a0215 Mon Sep 17 00:00:00 2001 From: sjaakola Date: Tue, 22 Feb 2022 11:37:43 +0200 Subject: [PATCH 06/52] MDEV-27649 Crash with PS execute after BF abort MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit contains a test for reproducing the issue in MDEV-27649, where a transaction, executing a prepared statment, is BF aborted. The scenario, in MDEV-27649 has a transaction which has prepared a PS, but not yet executed it, and this transaction is then BF aborted in this state. When the BF aborted transaction tries to execute the PS, it will receive deadlock error. But, when it tries to execute the PS second time, the node crashes. Mtr test galera.galera_bf_abort_ps_bind, exercises this scenario. However, mtr test platform does not have mechanism to control the execution of PS in required detail. For this purpose, mysqltetst.cc was extended to contain 4 new commands: PS_prepare - to prepare a prepared statement PS_bind - to bind values for parameters for the PS PS_execute - to execute the PS PS_close - to close the PS The support for controlling prepared statments in mtr scripts is quite minimal in this commit. Limitations are: * only one PS can be used by a connection, at a time * only input parameters can be bound for the PS * only varchar, integer or float type of parameters can be bound added the result fixes Reviewed-by: Jan Lindström --- client/mysqltest.cc | 450 ++++++++++++++++++ .../galera/r/galera_bf_abort_ps_bind.result | 37 ++ .../galera/t/galera_bf_abort_ps_bind.cnf | 7 + .../galera/t/galera_bf_abort_ps_bind.test | 58 +++ 4 files changed, 552 insertions(+) create mode 100644 mysql-test/suite/galera/r/galera_bf_abort_ps_bind.result create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps_bind.cnf create mode 100644 mysql-test/suite/galera/t/galera_bf_abort_ps_bind.test diff --git a/client/mysqltest.cc b/client/mysqltest.cc index 85beda907c3..814c07ba60f 100644 --- a/client/mysqltest.cc +++ b/client/mysqltest.cc @@ -319,6 +319,7 @@ struct st_connection char *name; size_t name_len; MYSQL_STMT* stmt; + MYSQL_BIND *ps_params; /* Set after send to disallow other queries before reap */ my_bool pending; @@ -393,6 +394,10 @@ enum enum_commands { Q_ENABLE_PREPARE_WARNINGS, Q_DISABLE_PREPARE_WARNINGS, Q_RESET_CONNECTION, Q_OPTIMIZER_TRACE, + Q_PS_PREPARE, + Q_PS_BIND, + Q_PS_EXECUTE, + Q_PS_CLOSE, Q_UNKNOWN, /* Unknown command. */ Q_COMMENT, /* Comments, ignored. */ Q_COMMENT_WITH_COMMAND, @@ -506,6 +511,10 @@ const char *command_names[]= "disable_prepare_warnings", "reset_connection", "optimizer_trace", + "PS_prepare", + "PS_bind", + "PS_execute", + "PS_close", 0 }; @@ -7848,6 +7857,15 @@ static void handle_no_active_connection(struct st_command *command, var_set_errno(2006); } +/* handler functions to execute prepared statement calls in client C API */ +void run_prepare_stmt(struct st_connection *cn, struct st_command *command, const char *query, + size_t query_len, DYNAMIC_STRING *ds, DYNAMIC_STRING *ds_warnings); +void run_bind_stmt(struct st_connection *cn, struct st_command *command, const char *query, + size_t query_len, DYNAMIC_STRING *ds, DYNAMIC_STRING *ds_warnings); +void run_execute_stmt(struct st_connection *cn, struct st_command *command, const char *query, + size_t query_len, DYNAMIC_STRING *ds, DYNAMIC_STRING *ds_warnings); +void run_close_stmt(struct st_connection *cn, struct st_command *command, const char *query, + size_t query_len, DYNAMIC_STRING *ds, DYNAMIC_STRING *ds_warnings); /* Run query using MySQL C API @@ -7879,6 +7897,32 @@ void run_query_normal(struct st_connection *cn, struct st_command *command, DBUG_VOID_RETURN; } + /* handle prepared statement commands */ + switch (command->type) { + case Q_PS_PREPARE: + run_prepare_stmt(cn, command, query, query_len, ds, ds_warnings); + flags &= ~QUERY_SEND_FLAG; + goto end; + break; + case Q_PS_BIND: + run_bind_stmt(cn, command, query, query_len, ds, ds_warnings); + flags &= ~QUERY_SEND_FLAG; + goto end; + break; + case Q_PS_EXECUTE: + run_execute_stmt(cn, command, query, query_len, ds, ds_warnings); + flags &= ~QUERY_SEND_FLAG; + goto end; + break; + case Q_PS_CLOSE: + run_close_stmt(cn, command, query, query_len, ds, ds_warnings); + flags &= ~QUERY_SEND_FLAG; + goto end; + break; + default: /* not a prepared statement command */ + break; + } + if (flags & QUERY_SEND_FLAG) { /* @@ -8434,6 +8478,408 @@ end: DBUG_VOID_RETURN; } +/* + prepare query using prepared statement C API + + SYNPOSIS + run_prepare_stmt + mysql - mysql handle + command - current command pointer + query - query string to execute + query_len - length query string to execute + ds - output buffer where to store result form query + + RETURN VALUE + error - function will not return +*/ + +void run_prepare_stmt(struct st_connection *cn, struct st_command *command, const char *query, size_t query_len, DYNAMIC_STRING *ds, DYNAMIC_STRING *ds_warnings) +{ + + MYSQL *mysql= cn->mysql; + MYSQL_STMT *stmt; + DYNAMIC_STRING ds_prepare_warnings; + DBUG_ENTER("run_prepare_stmt"); + DBUG_PRINT("query", ("'%-.60s'", query)); + + /* + Init a new stmt if it's not already one created for this connection + */ + if(!(stmt= cn->stmt)) + { + if (!(stmt= mysql_stmt_init(mysql))) + die("unable to init stmt structure"); + cn->stmt= stmt; + } + + /* Init dynamic strings for warnings */ + if (!disable_warnings) + { + init_dynamic_string(&ds_prepare_warnings, NULL, 0, 256); + } + + /* + Prepare the query + */ + char* PS_query= command->first_argument; + size_t PS_query_len= command->end - command->first_argument; + if (do_stmt_prepare(cn, PS_query, PS_query_len)) + { + handle_error(command, mysql_stmt_errno(stmt), + mysql_stmt_error(stmt), mysql_stmt_sqlstate(stmt), ds); + goto end; + } + + /* + Get the warnings from mysql_stmt_prepare and keep them in a + separate string + */ + if (!disable_warnings) + append_warnings(&ds_prepare_warnings, mysql); + end: + DBUG_VOID_RETURN; +} + +/* + bind parameters for a prepared statement C API + + SYNPOSIS + run_bind_stmt + mysql - mysql handle + command - current command pointer + query - query string to execute + query_len - length query string to execute + ds - output buffer where to store result form query + + RETURN VALUE + error - function will not return +*/ + +void run_bind_stmt(struct st_connection *cn, struct st_command *command, + const char *query, size_t query_len, DYNAMIC_STRING *ds, + DYNAMIC_STRING *ds_warnings + ) +{ + MYSQL_STMT *stmt= cn->stmt; + DBUG_ENTER("run_bind_stmt"); + DBUG_PRINT("query", ("'%-.60s'", query)); + MYSQL_BIND *ps_params= cn->ps_params; + if (ps_params) + { + for (size_t i=0; iparam_count; i++) + { + my_free(ps_params[i].buffer); + ps_params[i].buffer= NULL; + } + my_free(ps_params); + ps_params= NULL; + } + + /* Init PS-parameters. */ + cn->ps_params= ps_params = (MYSQL_BIND*)my_malloc(sizeof(MYSQL_BIND) * stmt->param_count, + MYF(MY_WME)); + bzero((char *) ps_params, sizeof(MYSQL_BIND) * stmt->param_count); + + int i=0; + char *c; + long *l; + double *d; + + char *p= strtok((char*)command->first_argument, " "); + while (p != nullptr) + { + (void)strtol(p, &c, 10); + if (!*c) + { + ps_params[i].buffer_type= MYSQL_TYPE_LONG; + l= (long*)my_malloc(sizeof(long), MYF(MY_WME)); + *l= strtol(p, &c, 10); + ps_params[i].buffer= (void*)l; + ps_params[i].buffer_length= 8; + } + else + { + (void)strtod(p, &c); + if (!*c) + { + ps_params[i].buffer_type= MYSQL_TYPE_DECIMAL; + d= (double*)my_malloc(sizeof(double), MYF(MY_WME)); + *d= strtod(p, &c); + ps_params[i].buffer= (void*)d; + ps_params[i].buffer_length= 8; + } + else + { + ps_params[i].buffer_type= MYSQL_TYPE_STRING; + ps_params[i].buffer= strdup(p); + ps_params[i].buffer_length= (unsigned long)strlen(p); + } + } + + p= strtok(nullptr, " "); + i++; + } + + int rc= mysql_stmt_bind_param(stmt, ps_params); + if (rc) + { + die("mysql_stmt_bind_param() failed': %d %s", + mysql_stmt_errno(stmt), mysql_stmt_error(stmt)); + } + + DBUG_VOID_RETURN; +} + +/* + execute query using prepared statement C API + + SYNPOSIS + run_axecute_stmt + mysql - mysql handle + command - current command pointer + query - query string to execute + query_len - length query string to execute + ds - output buffer where to store result form query + + RETURN VALUE + error - function will not return +*/ + +void run_execute_stmt(struct st_connection *cn, struct st_command *command, + const char *query, size_t query_len, DYNAMIC_STRING *ds, + DYNAMIC_STRING *ds_warnings + ) +{ + MYSQL_RES *res= NULL; /* Note that here 'res' is meta data result set */ + MYSQL *mysql= cn->mysql; + MYSQL_STMT *stmt= cn->stmt; + DYNAMIC_STRING ds_execute_warnings; + DBUG_ENTER("run_execute_stmt"); + DBUG_PRINT("query", ("'%-.60s'", query)); + + /* Init dynamic strings for warnings */ + if (!disable_warnings) + { + init_dynamic_string(&ds_execute_warnings, NULL, 0, 256); + } + +#if MYSQL_VERSION_ID >= 50000 + if (cursor_protocol_enabled) + { + /* + Use cursor when retrieving result + */ + ulong type= CURSOR_TYPE_READ_ONLY; + if (mysql_stmt_attr_set(stmt, STMT_ATTR_CURSOR_TYPE, (void*) &type)) + die("mysql_stmt_attr_set(STMT_ATTR_CURSOR_TYPE) failed': %d %s", + mysql_stmt_errno(stmt), mysql_stmt_error(stmt)); + } +#endif + + /* + Execute the query + */ + if (do_stmt_execute(cn)) + { + handle_error(command, mysql_stmt_errno(stmt), + mysql_stmt_error(stmt), mysql_stmt_sqlstate(stmt), ds); + goto end; + } + + /* + When running in cursor_protocol get the warnings from execute here + and keep them in a separate string for later. + */ + if (cursor_protocol_enabled && !disable_warnings) + append_warnings(&ds_execute_warnings, mysql); + + /* + We instruct that we want to update the "max_length" field in + mysql_stmt_store_result(), this is our only way to know how much + buffer to allocate for result data + */ + { + my_bool one= 1; + if (mysql_stmt_attr_set(stmt, STMT_ATTR_UPDATE_MAX_LENGTH, (void*) &one)) + die("mysql_stmt_attr_set(STMT_ATTR_UPDATE_MAX_LENGTH) failed': %d %s", + mysql_stmt_errno(stmt), mysql_stmt_error(stmt)); + } + + /* + If we got here the statement succeeded and was expected to do so, + get data. Note that this can still give errors found during execution! + Store the result of the query if if will return any fields + */ + if (mysql_stmt_field_count(stmt) && mysql_stmt_store_result(stmt)) + { + handle_error(command, mysql_stmt_errno(stmt), + mysql_stmt_error(stmt), mysql_stmt_sqlstate(stmt), ds); + goto end; + } + + /* If we got here the statement was both executed and read successfully */ + handle_no_error(command); + if (!disable_result_log) + { + /* + Not all statements creates a result set. If there is one we can + now create another normal result set that contains the meta + data. This set can be handled almost like any other non prepared + statement result set. + */ + if ((res= mysql_stmt_result_metadata(stmt)) != NULL) + { + /* Take the column count from meta info */ + MYSQL_FIELD *fields= mysql_fetch_fields(res); + uint num_fields= mysql_num_fields(res); + + if (display_metadata) + append_metadata(ds, fields, num_fields); + + if (!display_result_vertically) + append_table_headings(ds, fields, num_fields); + + append_stmt_result(ds, stmt, fields, num_fields); + + mysql_free_result(res); /* Free normal result set with meta data */ + + /* + Normally, if there is a result set, we do not show warnings from the + prepare phase. This is because some warnings are generated both during + prepare and execute; this would generate different warning output + between normal and ps-protocol test runs. + + The --enable_prepare_warnings command can be used to change this so + that warnings from both the prepare and execute phase are shown. + */ + } + else + { + /* + This is a query without resultset + */ + } + + /* + Fetch info before fetching warnings, since it will be reset + otherwise. + */ + if (!disable_info) + append_info(ds, mysql_stmt_affected_rows(stmt), mysql_info(mysql)); + + if (display_session_track_info) + append_session_track_info(ds, mysql); + + + if (!disable_warnings) + { + /* Get the warnings from execute */ + + /* Append warnings to ds - if there are any */ + if (append_warnings(&ds_execute_warnings, mysql) || + ds_execute_warnings.length || + ds_warnings->length) + { + dynstr_append_mem(ds, "Warnings:\n", 10); + if (ds_warnings->length) + dynstr_append_mem(ds, ds_warnings->str, + ds_warnings->length); + if (ds_execute_warnings.length) + dynstr_append_mem(ds, ds_execute_warnings.str, + ds_execute_warnings.length); + } + } + } + +end: + if (!disable_warnings) + { + dynstr_free(&ds_execute_warnings); + } + + /* + We save the return code (mysql_stmt_errno(stmt)) from the last call sent + to the server into the mysqltest builtin variable $mysql_errno. This + variable then can be used from the test case itself. + */ + + var_set_errno(mysql_stmt_errno(stmt)); + + revert_properties(); + + /* Close the statement if reconnect, need new prepare */ + { +#ifndef EMBEDDED_LIBRARY + my_bool reconnect; + mysql_get_option(mysql, MYSQL_OPT_RECONNECT, &reconnect); + if (reconnect) +#else + if (mysql->reconnect) +#endif + { + if (cn->ps_params) + { + for (size_t i=0; iparam_count; i++) + { + my_free(cn->ps_params[i].buffer); + cn->ps_params[i].buffer= NULL; + } + my_free(cn->ps_params); + } + mysql_stmt_close(stmt); + cn->stmt= NULL; + cn->ps_params= NULL; + } + } + DBUG_VOID_RETURN; +} + +/* + close a prepared statement C API + + SYNPOSIS + run_close_stmt + mysql - mysql handle + command - current command pointer + query - query string to execute + query_len - length query string to execute + ds - output buffer where to store result form query + + RETURN VALUE + error - function will not return +*/ + +void run_close_stmt(struct st_connection *cn, struct st_command *command, + const char *query, size_t query_len, DYNAMIC_STRING *ds, + DYNAMIC_STRING *ds_warnings + ) +{ + MYSQL_STMT *stmt= cn->stmt; + DBUG_ENTER("run_close_stmt"); + DBUG_PRINT("query", ("'%-.60s'", query)); + + if (cn->ps_params) + { + + for (size_t i=0; iparam_count; i++) + { + my_free(cn->ps_params[i].buffer); + cn->ps_params[i].buffer= NULL; + } + my_free(cn->ps_params); + } + + /* Close the statement */ + if (stmt) + { + mysql_stmt_close(stmt); + cn->stmt= NULL; + } + cn->ps_params= NULL; + + DBUG_VOID_RETURN; +} + /* @@ -9474,6 +9920,10 @@ int main(int argc, char **argv) /* fall through */ case Q_QUERY: case Q_REAP: + case Q_PS_PREPARE: + case Q_PS_BIND: + case Q_PS_EXECUTE: + case Q_PS_CLOSE: { my_bool old_display_result_vertically= display_result_vertically; /* Default is full query, both reap and send */ diff --git a/mysql-test/suite/galera/r/galera_bf_abort_ps_bind.result b/mysql-test/suite/galera/r/galera_bf_abort_ps_bind.result new file mode 100644 index 00000000000..adc7da58eae --- /dev/null +++ b/mysql-test/suite/galera/r/galera_bf_abort_ps_bind.result @@ -0,0 +1,37 @@ +connection node_2; +connection node_1; +CREATE TABLE t (i int primary key auto_increment, j varchar(20) character set utf8); +connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1; +connection node_1a; +SET SESSION wsrep_sync_wait = 0; +connection node_1; +insert into t values (1, 'first'); +PS_prepare INSERT INTO t(j) VALUES (?);; +PS_bind node1; +PS_execute; +PS_execute; +select * from t; +i j +1 first +3 node1 +5 node1 +PS_close; +PS_prepare INSERT INTO t(j) VALUES (?);; +PS_bind node1; +begin; +update t set j='node1' where i=1; +connection node_2; +update t set j='node2' where i=1; +connection node_1a; +connection node_1; +PS_execute; +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +PS_execute; +commit; +select * from t; +i j +1 node2 +3 node1 +5 node1 +7 node1 +drop table t; diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.cnf b/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.cnf new file mode 100644 index 00000000000..62cf1854032 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.cnf @@ -0,0 +1,7 @@ +!include ../galera_2nodes.cnf + +[mysqld.1] +wsrep-debug=1 + +[mysqld.2] +wsrep-debug=1 diff --git a/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.test b/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.test new file mode 100644 index 00000000000..a840f612a82 --- /dev/null +++ b/mysql-test/suite/galera/t/galera_bf_abort_ps_bind.test @@ -0,0 +1,58 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc + +CREATE TABLE t (i int primary key auto_increment, j varchar(20) character set utf8); + +--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1 +--connection node_1a +SET SESSION wsrep_sync_wait = 0; + +--connection node_1 +insert into t values (1, 'first'); + +# prepare a statement for inserting rows into table t +--PS_prepare INSERT INTO t(j) VALUES (?); + +# bind parameter, to insert with column j having value 'node1' +--PS_bind node1 + +# insert two rows with the PS +# this is for showing that two execute commands can follow a bind command +--PS_execute +--PS_execute +select * from t; + +# close the prepared statement, and prepare a new PS, +# this happens to be same as the first PS +# also bind parameter for the PS +--PS_close +--PS_prepare INSERT INTO t(j) VALUES (?); +--PS_bind node1 + +# start a transaction and make one update +# leaving the transaction open +begin; +update t set j='node1' where i=1; + +# replicate a transaction from node2, which BF aborts the open +# transaction in node1 +--connection node_2 +update t set j='node2' where i=1; + +# wait until the BF has completed, and update from node_2 has committed +--connection node_1a +--let $wait_condition = SELECT COUNT(*) = 1 FROM t WHERE j='node2' +--source include/wait_condition.inc + +# continue the open transaction, trying to insert third row, deadlock is now observed +--connection node_1 +--error ER_LOCK_DEADLOCK +--PS_execute + +# try to insert one more row +--PS_execute +commit; + +select * from t; + +drop table t; From 97582f1c064443e7691720b0438e8dc2c6c14f3e Mon Sep 17 00:00:00 2001 From: sjaakola Date: Mon, 31 Jan 2022 18:31:47 +0200 Subject: [PATCH 07/52] MDEV-27649 PS conflict handling causing node crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Handling BF abort for prepared statement execution so that EXECUTE processing will continue until parameter setup is complete, before BF abort bails out the statement execution. THD class has new boolean member: wsrep_delayed_BF_abort, which is set if BF abort is observed in do_command() right after reading client's packet, and if the client has sent PS execute command. In such case, the deadlock error is not returned immediately back to client, but the PS execution will be started. However, the PS execution loop, will now check if wsrep_delayed_BF_abort is set, and stop the PS execution after the type information has been assigned for the PS. With this, the PS protocol type information, which is present in the first PS EXECUTE command, is not lost even if the first PS EXECUTE command was marked to abort. Reviewed-by: Jan Lindström --- sql/sql_class.cc | 1 + sql/sql_class.h | 4 ++++ sql/sql_parse.cc | 19 ++++++++++++++++++- sql/sql_prepare.cc | 8 +++++++- 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sql/sql_class.cc b/sql/sql_class.cc index f3353135ac8..c9e55fd413e 100644 --- a/sql/sql_class.cc +++ b/sql/sql_class.cc @@ -666,6 +666,7 @@ THD::THD(my_thread_id id, bool is_wsrep_applier) wsrep_replicate_GTID(false), wsrep_ignore_table(false), wsrep_aborter(0), + wsrep_delayed_BF_abort(false), /* wsrep-lib */ m_wsrep_next_trx_id(WSREP_UNDEFINED_TRX_ID), diff --git a/sql/sql_class.h b/sql/sql_class.h index 2afda6e63b7..76deee68a71 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4897,6 +4897,10 @@ public: /* thread who has started kill for this THD protected by LOCK_thd_data*/ my_thread_id wsrep_aborter; + /* true if BF abort is observed in do_command() right after reading + client's packet, and if the client has sent PS execute command. */ + bool wsrep_delayed_BF_abort; + /* Transaction id: * m_wsrep_next_trx_id is assigned on the first query after diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index a3d44423a69..bd685add60e 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1313,7 +1313,13 @@ bool do_command(THD *thd) DBUG_ASSERT(!thd->mdl_context.has_locks()); DBUG_ASSERT(!thd->get_stmt_da()->is_set()); /* We let COM_QUIT and COM_STMT_CLOSE to execute even if wsrep aborted. */ - if (command != COM_STMT_CLOSE && + if (command == COM_STMT_EXECUTE) + { + WSREP_DEBUG("PS BF aborted at do_command"); + thd->wsrep_delayed_BF_abort= true; + } + if (command != COM_STMT_CLOSE && + command != COM_STMT_EXECUTE && command != COM_QUIT) { my_error(ER_LOCK_DEADLOCK, MYF(0)); @@ -1385,6 +1391,17 @@ out: /* there was a command to process, and before_command() has been called */ wsrep_after_command_after_result(thd); } + + if (thd->wsrep_delayed_BF_abort) + { + my_error(ER_LOCK_DEADLOCK, MYF(0)); + WSREP_DEBUG("Deadlock error for PS query: %s", thd->query()); + thd->reset_killed(); + thd->mysys_var->abort = 0; + thd->wsrep_retry_counter = 0; + + thd->wsrep_delayed_BF_abort= false; + } #endif /* WITH_WSREP */ DBUG_RETURN(return_value); } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index 49b342d660d..c674cc78719 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -4461,7 +4461,13 @@ Prepared_statement::execute_loop(String *expanded_query, if (set_parameters(expanded_query, packet, packet_end)) return TRUE; - +#ifdef WITH_WSREP + if (thd->wsrep_delayed_BF_abort) + { + WSREP_DEBUG("delayed BF abort, quitting execute_loop, stmt: %d", id); + return TRUE; + } +#endif /* WITH_WSREP */ reexecute: // Make sure that reprepare() did not create any new Items. DBUG_ASSERT(thd->free_list == NULL); From 39ed4005536dc1c76053c564a3a1404eebb199a5 Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Mon, 31 Jan 2022 16:09:26 +0100 Subject: [PATCH 08/52] Fixup for MDEV-27553 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update wsrep-lib which contains a fixup introduced with MDEV-27553. Also, adapt the corresponding test: after apply failure on ROLLBACK, node will disconnect from cluster Reviewed-by: Jan Lindström --- .../suite/galera_sr/r/MDEV-27553.result | 37 +++++++---- mysql-test/suite/galera_sr/t/MDEV-27553.test | 65 ++++++++++++++++--- wsrep-lib | 2 +- 3 files changed, 82 insertions(+), 22 deletions(-) diff --git a/mysql-test/suite/galera_sr/r/MDEV-27553.result b/mysql-test/suite/galera_sr/r/MDEV-27553.result index f6f81bd13f1..5a6a5bd4956 100644 --- a/mysql-test/suite/galera_sr/r/MDEV-27553.result +++ b/mysql-test/suite/galera_sr/r/MDEV-27553.result @@ -1,23 +1,36 @@ connection node_2; connection node_1; -CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); connection node_1; +connection node_2; +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); +connection node_2; SET SESSION wsrep_trx_fragment_size=1; START TRANSACTION; INSERT INTO t1 VALUES (1); -SET @@global.debug_dbug="+d,ha_index_init_fail"; -ROLLBACK; -connection node_2; -SELECT COUNT(*) `Expect 0` FROM mysql.wsrep_streaming_log; -Expect 0 -0 -connection node_1; -SET @@global.debug_dbug=""; SELECT COUNT(*) `Expect 1` FROM mysql.wsrep_streaming_log; Expect 1 1 -SET SESSION wsrep_on=OFF; -DELETE FROM mysql.wsrep_streaming_log; -SET SESSION wsrep_on=ON; +SET @@global.debug_dbug="+d,ha_index_init_fail"; +ROLLBACK; +connection node_1; +SET SESSION wsrep_sync_wait = 0; +SELECT COUNT(*) `Expect 0` FROM mysql.wsrep_streaming_log; +Expect 0 +0 +connection node_2; +SET @@global.debug_dbug=""; +SET SESSION wsrep_sync_wait = 0; +SELECT COUNT(*) `Expect 1` FROM mysql.wsrep_streaming_log; +Expect 1 +1 +connection node_2; +SET GLOBAL wsrep_on=OFF; +# restart +SELECT COUNT(*) `Expect 0` FROM mysql.wsrep_streaming_log; +Expect 0 +0 DROP TABLE t1; CALL mtr.add_suppression("WSREP: Failed to init table for index scan"); +CALL mtr.add_suppression("WSREP: Failed to apply write set"); +CALL mtr.add_suppression("Failed to report last committed"); diff --git a/mysql-test/suite/galera_sr/t/MDEV-27553.test b/mysql-test/suite/galera_sr/t/MDEV-27553.test index d17af175512..5c557db9201 100644 --- a/mysql-test/suite/galera_sr/t/MDEV-27553.test +++ b/mysql-test/suite/galera_sr/t/MDEV-27553.test @@ -5,29 +5,76 @@ --source include/galera_cluster.inc --source include/have_debug.inc +--let $node_1=node_1 +--let $node_2=node_2 +--source suite/galera/include/auto_increment_offset_save.inc + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 + CREATE TABLE t1 (f1 INTEGER PRIMARY KEY); ---connection node_1 ---let $wsrep_cluster_address_orig = `SELECT @@wsrep_cluster_address` +--connection node_2 SET SESSION wsrep_trx_fragment_size=1; START TRANSACTION; INSERT INTO t1 VALUES (1); -# This will result in failure to remove fragments -# from streaming log, in the following ROLLBACK. +SELECT COUNT(*) `Expect 1` FROM mysql.wsrep_streaming_log; + +# +# Issue ROLLBACK and make sure it fails to clean up +# the streaming log. Failure to remove fragments +# results in apply failure of the rollback fragment. +# The node should disconnect from the cluster. +# SET @@global.debug_dbug="+d,ha_index_init_fail"; ROLLBACK; ---connection node_2 + +# +# Expect the cluster to shrink +# +--connection node_1 +SET SESSION wsrep_sync_wait = 0; +--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +# +# ROLLBACK should clean up the streaming log just fine in node 1 +# SELECT COUNT(*) `Expect 0` FROM mysql.wsrep_streaming_log; ---connection node_1 +# +# Expect the failure on ROLLBACK to leave a entry in streaming log +# +--connection node_2 SET @@global.debug_dbug=""; +SET SESSION wsrep_sync_wait = 0; +# Expect node to be disconnected +--let wait_condition = SELECT VARIABLE_VALUE = 'Disconnected' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status'; +--source include/wait_condition.inc + SELECT COUNT(*) `Expect 1` FROM mysql.wsrep_streaming_log; -SET SESSION wsrep_on=OFF; -DELETE FROM mysql.wsrep_streaming_log; -SET SESSION wsrep_on=ON; + +# +# Restart node 2, so that it joins the cluster back +# +--connection node_2 +SET GLOBAL wsrep_on=OFF; +--source include/restart_mysqld.inc + +# +# After restart, the streaming log is empty in node 2 +# +SELECT COUNT(*) `Expect 0` FROM mysql.wsrep_streaming_log; + +# +# Cleanup +# DROP TABLE t1; CALL mtr.add_suppression("WSREP: Failed to init table for index scan"); +CALL mtr.add_suppression("WSREP: Failed to apply write set"); +CALL mtr.add_suppression("Failed to report last committed"); + +--source suite/galera/include/auto_increment_offset_restore.inc diff --git a/wsrep-lib b/wsrep-lib index edd141127c1..23fb8624624 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit edd141127c11d78ef073f9f3ca61708821f20b32 +Subproject commit 23fb8624624c9144c77f3874647fa0f7394b0aa8 From c63eab2c68bd83c7b1f5fec9fc9bb0d7238b9b34 Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Tue, 1 Feb 2022 14:26:24 +0100 Subject: [PATCH 09/52] MDEV-28055: Galera ps-protocol fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix test galera.MW-44 to make it work with --ps-protocol * Skip test galera.MW-328C under --ps-protocol This test relies on wsrep_retry_autocommit, which has no effect under ps-protocol. * Return WSREP related errors on COM_STMT_PREPARE commands Change wsrep_command_no_result() to allow sending back errors when a statement is prepared. For example, to handle deadlock error due to BF aborted transaction during prepare. * Add sync waiting before statement prepare When a statement is prepared, tables used in the statement may be opened and checked for existence. Because of that, some tests (for example galera_create_table_as_select) that CREATE a table in one node and then SELECT from the same table in another node may result in errors due to non existing table. To make tests behave similarly under normal and PS protocol, we add a call to sync wait before preparing statements that would sync wait during normal execution. Reviewed-by: Jan Lindström --- mysql-test/suite/galera/t/MW-328C.test | 1 + mysql-test/suite/galera/t/MW-44.test | 6 ++- sql/sql_parse.cc | 3 +- sql/sql_prepare.cc | 4 ++ sql/wsrep_mysqld.cc | 72 ++++++++++++++++++++++++-- sql/wsrep_mysqld.h | 1 + 6 files changed, 79 insertions(+), 8 deletions(-) diff --git a/mysql-test/suite/galera/t/MW-328C.test b/mysql-test/suite/galera/t/MW-328C.test index 7241dfbdbca..c3370a3decd 100644 --- a/mysql-test/suite/galera/t/MW-328C.test +++ b/mysql-test/suite/galera/t/MW-328C.test @@ -7,6 +7,7 @@ # masks all deadlock errors # +--source include/no_protocol.inc --source include/galera_cluster.inc --source suite/galera/t/MW-328-header.inc diff --git a/mysql-test/suite/galera/t/MW-44.test b/mysql-test/suite/galera/t/MW-44.test index a2acfc57f6c..7b479d45844 100644 --- a/mysql-test/suite/galera/t/MW-44.test +++ b/mysql-test/suite/galera/t/MW-44.test @@ -19,7 +19,11 @@ SET SESSION wsrep_osu_method=RSU; ALTER TABLE t1 ADD COLUMN f2 INTEGER; SET SESSION wsrep_osu_method=TOI; ---let $wait_condition = SELECT COUNT(*) = 2 FROM mysql.general_log WHERE argument LIKE "CREATE%" OR argument LIKE "ALTER%" +--let $wait_condition = SELECT COUNT(*) = 1 FROM mysql.general_log WHERE argument LIKE "CREATE%" AND command_type != 'Prepare' +--let $wait_condition_on_error_output = SELECT * FROM mysql.general_log +--source include/wait_condition_with_debug.inc + +--let $wait_condition = SELECT COUNT(*) = 1 FROM mysql.general_log WHERE argument LIKE "ALTER%" AND command_type != 'Prepare' --let $wait_condition_on_error_output = SELECT * FROM mysql.general_log --source include/wait_condition_with_debug.inc diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index bd685add60e..35310d2655a 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -1167,8 +1167,7 @@ static bool wsrep_tables_accessible_when_detached(const TABLE_LIST *tables) static bool wsrep_command_no_result(char command) { - return (command == COM_STMT_PREPARE || - command == COM_STMT_FETCH || + return (command == COM_STMT_FETCH || command == COM_STMT_SEND_LONG_DATA || command == COM_STMT_CLOSE); } diff --git a/sql/sql_prepare.cc b/sql/sql_prepare.cc index c674cc78719..c6e993b8f6c 100644 --- a/sql/sql_prepare.cc +++ b/sql/sql_prepare.cc @@ -2394,6 +2394,10 @@ static bool check_prepared_statement(Prepared_statement *stmt) goto error; } +#ifdef WITH_WSREP + if (wsrep_sync_wait(thd, sql_command)) + goto error; +#endif switch (sql_command) { case SQLCOM_REPLACE: case SQLCOM_INSERT: diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 087b6c5b74d..8a5b6d915d9 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1176,6 +1176,73 @@ wsrep_sync_wait_upto (THD* thd, return ret; } +bool wsrep_is_show_query(enum enum_sql_command command) +{ + DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; +} + +static bool wsrep_is_diagnostic_query(enum enum_sql_command command) +{ + assert(command >= 0 && command <= SQLCOM_END); + return (sql_command_flags[command] & CF_DIAGNOSTIC_STMT) != 0; +} + +static enum enum_wsrep_sync_wait +wsrep_sync_wait_mask_for_command(enum enum_sql_command command) +{ + switch (command) + { + case SQLCOM_SELECT: + case SQLCOM_CHECKSUM: + return WSREP_SYNC_WAIT_BEFORE_READ; + case SQLCOM_DELETE: + case SQLCOM_DELETE_MULTI: + case SQLCOM_UPDATE: + case SQLCOM_UPDATE_MULTI: + return WSREP_SYNC_WAIT_BEFORE_UPDATE_DELETE; + case SQLCOM_REPLACE: + case SQLCOM_INSERT: + case SQLCOM_REPLACE_SELECT: + case SQLCOM_INSERT_SELECT: + return WSREP_SYNC_WAIT_BEFORE_INSERT_REPLACE; + default: + if (wsrep_is_diagnostic_query(command)) + { + return WSREP_SYNC_WAIT_NONE; + } + if (wsrep_is_show_query(command)) + { + switch (command) + { + case SQLCOM_SHOW_PROFILE: + case SQLCOM_SHOW_PROFILES: + case SQLCOM_SHOW_SLAVE_HOSTS: + case SQLCOM_SHOW_RELAYLOG_EVENTS: + case SQLCOM_SHOW_SLAVE_STAT: + case SQLCOM_SHOW_MASTER_STAT: + case SQLCOM_SHOW_ENGINE_STATUS: + case SQLCOM_SHOW_ENGINE_MUTEX: + case SQLCOM_SHOW_ENGINE_LOGS: + case SQLCOM_SHOW_PROCESSLIST: + case SQLCOM_SHOW_PRIVILEGES: + return WSREP_SYNC_WAIT_NONE; + default: + return WSREP_SYNC_WAIT_BEFORE_SHOW; + } + } + } + return WSREP_SYNC_WAIT_NONE; +} + +bool wsrep_sync_wait(THD* thd, enum enum_sql_command command) +{ + bool res = false; + if (WSREP_CLIENT(thd) && thd->variables.wsrep_sync_wait) + res = wsrep_sync_wait(thd, wsrep_sync_wait_mask_for_command(command)); + return res; +} + void wsrep_keys_free(wsrep_key_arr_t* key_arr) { for (size_t i= 0; i < key_arr->keys_len; ++i) @@ -2706,11 +2773,6 @@ extern bool wsrep_thd_ignore_table(THD *thd) return thd->wsrep_ignore_table; } -bool wsrep_is_show_query(enum enum_sql_command command) -{ - DBUG_ASSERT(command >= 0 && command <= SQLCOM_END); - return (sql_command_flags[command] & CF_STATUS_COMMAND) != 0; -} bool wsrep_create_like_table(THD* thd, TABLE_LIST* table, TABLE_LIST* src_table, HA_CREATE_INFO *create_info) diff --git a/sql/wsrep_mysqld.h b/sql/wsrep_mysqld.h index 279f2bd44aa..12151e9a887 100644 --- a/sql/wsrep_mysqld.h +++ b/sql/wsrep_mysqld.h @@ -208,6 +208,7 @@ extern bool wsrep_start_replication(const char *wsrep_cluster_address); extern void wsrep_shutdown_replication(); extern bool wsrep_must_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); extern bool wsrep_sync_wait (THD* thd, uint mask= WSREP_SYNC_WAIT_BEFORE_READ); +extern bool wsrep_sync_wait (THD* thd, enum enum_sql_command command); extern enum wsrep::provider::status wsrep_sync_wait_upto (THD* thd, wsrep_gtid_t* upto, int timeout); extern int wsrep_check_opts(); From 304f75c97311a1b746d9bb6bc94de415b5daa21c Mon Sep 17 00:00:00 2001 From: mkaruza Date: Wed, 16 Feb 2022 15:05:58 +0100 Subject: [PATCH 10/52] MDEV-27568 Parallel async replication hangs on a Galera node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using parallel slave applying can cause deadlock between between DDL and other events. GTID with lower seqno can be blocked in galera when node entered TOI mode, but DDL GTID which has higher node can be blocked before previous GTIDs are applied locally. Fix is to check prior commits before entering TOI. Reviewed-by: Jan Lindström --- sql/sql_class.h | 5 ++++- sql/wsrep_mysqld.cc | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/sql_class.h b/sql/sql_class.h index 76deee68a71..573d70bbe00 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -4932,7 +4932,10 @@ public: { return m_wsrep_next_trx_id; } - + /* + If node is async slave and have parallel execution, wait for prior commits. + */ + bool wsrep_parallel_slave_wait_for_prior_commit(); private: wsrep_trx_id_t m_wsrep_next_trx_id; /* cast from query_id_t */ /* wsrep-lib */ diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 8a5b6d915d9..d53fb7320e4 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -2247,6 +2247,12 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, if (!wsrep_thd_is_local(thd)) return 0; + if (thd->wsrep_parallel_slave_wait_for_prior_commit()) + { + WSREP_WARN("TOI: wait_for_prior_commit() returned error."); + return -1; + } + int ret= 0; mysql_mutex_lock(&thd->LOCK_thd_data); @@ -3064,6 +3070,15 @@ enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit) } } +bool THD::wsrep_parallel_slave_wait_for_prior_commit() +{ + if (rgi_slave && rgi_slave->is_parallel_exec && wait_for_prior_commit()) + { + return 1; + } + return 0; +} + /***** callbacks for wsrep service ************/ my_bool get_wsrep_recovery() From 507030c492c17e4d1e1d3a1c8f68437b1fd8fb09 Mon Sep 17 00:00:00 2001 From: mkaruza Date: Mon, 7 Mar 2022 10:48:24 +0100 Subject: [PATCH 11/52] MDEV-27713 Crash after a conflict of applier thread with stored procedure call by event scheduler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When thread is BF aborted by high priority service, ULL (user level locks need to be removed and released). Calling directly release of lock for MDL_EXPLICIT type doesn't clear also `thd->ull_hash`. Method `mysql_ull_cleanup` will properly clear all information about ULL locks for thread. Reviewed-by: Jan Lindström --- mysql-test/suite/galera/r/MDEV-27713.result | 46 ++++++++++++++ mysql-test/suite/galera/t/MDEV-27713.test | 67 +++++++++++++++++++++ sql/wsrep_client_service.cc | 1 + sql/wsrep_high_priority_service.cc | 1 + 4 files changed, 115 insertions(+) create mode 100644 mysql-test/suite/galera/r/MDEV-27713.result create mode 100644 mysql-test/suite/galera/t/MDEV-27713.test diff --git a/mysql-test/suite/galera/r/MDEV-27713.result b/mysql-test/suite/galera/r/MDEV-27713.result new file mode 100644 index 00000000000..14575cb484d --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-27713.result @@ -0,0 +1,46 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 ( +f1 INT, +f2 VARCHAR(255) PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8; +INSERT INTO t1 VALUES(1, 'abc'); +connection node_1; +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (2,'def'); +connection node_2; +SET GLOBAL event_scheduler=ON; +CREATE PROCEDURE update_table() +BEGIN +SET AUTOCOMMIT=OFF; +DO GET_LOCK('local_lock', 0); +SET DEBUG_SYNC = 'innodb_row_update_for_mysql_begin SIGNAL blocked WAIT_FOR continue'; +UPDATE t1 SET f2 = 'jkl' WHERE f1 != 2; +DO RELEASE_LOCK('local_lock'); +END| +CREATE DEFINER=current_user +EVENT event +ON SCHEDULE AT CURRENT_TIMESTAMP +ON COMPLETION PRESERVE +ENABLE +DO CALL update_table(); +connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2; +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; +connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2; +SET GLOBAL debug_dbug = "+d,sync.wsrep_apply_cb"; +connection node_1; +COMMIT; +connection node_2b; +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; +connection node_2a; +SET DEBUG_SYNC = 'now SIGNAL continue'; +connection node_2; +SET GLOBAL event_scheduler=default; +DROP PROCEDURE update_table; +DROP EVENT event; +SET DEBUG_SYNC='reset'; +SET GLOBAL debug_dbug = DEFAULT; +connection node_1; +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/MDEV-27713.test b/mysql-test/suite/galera/t/MDEV-27713.test new file mode 100644 index 00000000000..4bfcd7e3d50 --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-27713.test @@ -0,0 +1,67 @@ +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/big_test.inc + +CREATE TABLE t1 ( + f1 INT, + f2 VARCHAR(255) PRIMARY KEY +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +INSERT INTO t1 VALUES(1, 'abc'); + +--connection node_1 +SET AUTOCOMMIT=OFF; +START TRANSACTION; +INSERT INTO t1 VALUES (2,'def'); + +--connection node_2 + +SET GLOBAL event_scheduler=ON; + +DELIMITER |; +CREATE PROCEDURE update_table() +BEGIN + SET AUTOCOMMIT=OFF; + DO GET_LOCK('local_lock', 0); + SET DEBUG_SYNC = 'innodb_row_update_for_mysql_begin SIGNAL blocked WAIT_FOR continue'; + UPDATE t1 SET f2 = 'jkl' WHERE f1 != 2; + DO RELEASE_LOCK('local_lock'); +END| +DELIMITER ;| + +CREATE DEFINER=current_user + EVENT event + ON SCHEDULE AT CURRENT_TIMESTAMP + ON COMPLETION PRESERVE + ENABLE + DO CALL update_table(); + +--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2 +SET DEBUG_SYNC = 'now WAIT_FOR blocked'; + +# Applier control thread +--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2 +SET GLOBAL debug_dbug = "+d,sync.wsrep_apply_cb"; + +--connection node_1 +COMMIT; + +# Applier control thread +--connection node_2b +SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached"; +SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb"; + +--connection node_2a +SET DEBUG_SYNC = 'now SIGNAL continue'; + +--connection node_2 +SET GLOBAL event_scheduler=default; +DROP PROCEDURE update_table; +DROP EVENT event; +SET DEBUG_SYNC='reset'; +SET GLOBAL debug_dbug = DEFAULT; + +--connection node_1 +DROP TABLE t1; diff --git a/sql/wsrep_client_service.cc b/sql/wsrep_client_service.cc index 8473489d2e1..464296ea6cf 100644 --- a/sql/wsrep_client_service.cc +++ b/sql/wsrep_client_service.cc @@ -342,6 +342,7 @@ int Wsrep_client_service::bf_rollback() m_thd->global_read_lock.unlock_global_read_lock(m_thd); } m_thd->release_transactional_locks(); + mysql_ull_cleanup(m_thd); m_thd->mdl_context.release_explicit_locks(); DBUG_RETURN(ret); diff --git a/sql/wsrep_high_priority_service.cc b/sql/wsrep_high_priority_service.cc index 452242dfd23..8fd7d08d543 100644 --- a/sql/wsrep_high_priority_service.cc +++ b/sql/wsrep_high_priority_service.cc @@ -356,6 +356,7 @@ int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle, m_thd->wsrep_cs().prepare_for_ordering(ws_handle, ws_meta, false); int ret= (trans_rollback_stmt(m_thd) || trans_rollback(m_thd)); m_thd->release_transactional_locks(); + mysql_ull_cleanup(m_thd); m_thd->mdl_context.release_explicit_locks(); free_root(m_thd->mem_root, MYF(MY_KEEP_PREALLOC)); From c519aa3d7aea1afb7754ccf70aa5b276ff5fbf87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 15 Mar 2022 12:48:29 +0200 Subject: [PATCH 12/52] MDEV-24143 : Galera nodes "randomly" crashing in Item_func_release_lock::val_int Fixed on MDEV-27713. Added additional test case. --- mysql-test/suite/galera/r/MDEV-24143.result | 23 +++++++++++++++++++++ mysql-test/suite/galera/t/MDEV-24143.test | 20 ++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 mysql-test/suite/galera/r/MDEV-24143.result create mode 100644 mysql-test/suite/galera/t/MDEV-24143.test diff --git a/mysql-test/suite/galera/r/MDEV-24143.result b/mysql-test/suite/galera/r/MDEV-24143.result new file mode 100644 index 00000000000..860d8a35834 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-24143.result @@ -0,0 +1,23 @@ +connection node_2; +connection node_1; +CREATE TABLE t1 (c1 BIGINT NOT NULL PRIMARY KEY, c2 BINARY (10), c3 DATETIME); +SELECT get_lock ('test2', 0); +get_lock ('test2', 0) +1 +DROP TABLE t1; +CREATE TABLE t1 (c1 SMALLINT NOT NULL AUTO_INCREMENT PRIMARY KEY); +INSERT INTO t1 VALUES (1); +SET SESSION wsrep_trx_fragment_size=10; +SET SESSION autocommit=0; +SELECT * FROM t1 WHERE c1 <=0 ORDER BY c1 DESC; +c1 +INSERT INTO t1 VALUES (4),(3),(1),(2); +ERROR 40001: Deadlock found when trying to get lock; try restarting transaction +CREATE TABLE t1 (pk INT PRIMARY KEY, b INT) ENGINE=SEQUENCE; +ERROR 42S01: Table 't1' already exists +ALTER TABLE t1 DROP COLUMN c2; +ERROR 42000: Can't DROP COLUMN `c2`; check that it exists +SELECT get_lock ('test', 1.5); +get_lock ('test', 1.5) +1 +DROP TABLE t1; diff --git a/mysql-test/suite/galera/t/MDEV-24143.test b/mysql-test/suite/galera/t/MDEV-24143.test new file mode 100644 index 00000000000..e58f147cb7c --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-24143.test @@ -0,0 +1,20 @@ +--source include/galera_cluster.inc +--source include/have_sequence.inc + +CREATE TABLE t1 (c1 BIGINT NOT NULL PRIMARY KEY, c2 BINARY (10), c3 DATETIME); +SELECT get_lock ('test2', 0); +DROP TABLE t1; +CREATE TABLE t1 (c1 SMALLINT NOT NULL AUTO_INCREMENT PRIMARY KEY); +INSERT INTO t1 VALUES (1); +SET SESSION wsrep_trx_fragment_size=10; +SET SESSION autocommit=0; +SELECT * FROM t1 WHERE c1 <=0 ORDER BY c1 DESC; +--error ER_LOCK_DEADLOCK +INSERT INTO t1 VALUES (4),(3),(1),(2); +--error ER_TABLE_EXISTS_ERROR +CREATE TABLE t1 (pk INT PRIMARY KEY, b INT) ENGINE=SEQUENCE; +--error ER_CANT_DROP_FIELD_OR_KEY +ALTER TABLE t1 DROP COLUMN c2; +SELECT get_lock ('test', 1.5); +DROP TABLE t1; + From 74e668eaeb4271845fd69d2945085383c431e333 Mon Sep 17 00:00:00 2001 From: Monty Date: Thu, 17 Mar 2022 16:58:43 +0200 Subject: [PATCH 13/52] Fixed warning for maria.maria-recovery2 about crashed table The bug was a missing va_start in eprint() which caused a wrong table name to be printed. Patch backported from 10.3. --- storage/maria/ma_recovery_util.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/storage/maria/ma_recovery_util.c b/storage/maria/ma_recovery_util.c index 9443ba90f6c..cb4203392dd 100644 --- a/storage/maria/ma_recovery_util.c +++ b/storage/maria/ma_recovery_util.c @@ -59,11 +59,12 @@ void tprint(FILE *trace_file __attribute__ ((unused)), va_list args; #ifndef DBUG_OFF { - char buff[1024], *end; + char buff[1024]; + size_t length; va_start(args, format); - vsnprintf(buff, sizeof(buff)-1, format, args); - if (*(end= strend(buff)) == '\n') - *end= 0; /* Don't print end \n */ + length= my_vsnprintf(buff, sizeof(buff)-1, format, args); + if (length && buff[length-1] == '\n') + buff[length-1]= 0; /* Don't print end \n */ DBUG_PRINT("info", ("%s", buff)); va_end(args); } @@ -95,6 +96,7 @@ void eprint(FILE *trace_file __attribute__ ((unused)), fputc('\n', trace_file); if (trace_file != stderr) { + va_start(args, format); my_printv_error(HA_ERR_INITIALIZATION, format, MYF(0), args); } va_end(args); From cf86580f2b4ca2b7bd337d27af22b46c82bb25df Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 10 Mar 2022 12:07:38 +0900 Subject: [PATCH 14/52] MDEV-28032 "git submodule update --depth 1" may fail with old Git submodules.cmake: don't use "--depth 1" with old Git Old Git may not work with "--depth 1" when the referenced commit hash is far from HEAD. Newer Git improves the situation. For example: https://github.com/git/git/commit/fb43e31f2b43076e7a30c9cd00d0241cb8cf97eb It's safe to not use "--depth 1" with old Git. Closes #2049 --- cmake/submodules.cmake | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/cmake/submodules.cmake b/cmake/submodules.cmake index b6c64b32f3d..fca67a0508b 100644 --- a/cmake/submodules.cmake +++ b/cmake/submodules.cmake @@ -17,20 +17,29 @@ IF(GIT_EXECUTABLE AND EXISTS "${CMAKE_SOURCE_DIR}/.git") ${GIT_EXECUTABLE} config cmake.update-submodules yes") ELSEIF(git_config_get_result EQUAL 128) SET(update_result 0) - ELSEIF (cmake_update_submodules MATCHES force) - MESSAGE(STATUS "Updating submodules (forced)") - EXECUTE_PROCESS(COMMAND "${GIT_EXECUTABLE}" submodule update --init --force --depth=1 - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - RESULT_VARIABLE update_result) - ELSEIF (cmake_update_submodules MATCHES yes) - EXECUTE_PROCESS(COMMAND "${GIT_EXECUTABLE}" submodule update --init --depth=1 - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - RESULT_VARIABLE update_result) ELSE() - MESSAGE(STATUS "Updating submodules") - EXECUTE_PROCESS(COMMAND "${GIT_EXECUTABLE}" submodule update --init --depth=1 - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - RESULT_VARIABLE update_result) + SET(UPDATE_SUBMODULES_COMMAND + "${GIT_EXECUTABLE}" submodule update --init --recursive) + # Old Git may not work with "--depth 1". + # See also: https://github.com/git/git/commit/fb43e31f2b43076e7a30c9cd00d0241cb8cf97eb + IF(NOT GIT_VERSION_STRING VERSION_LESS "2.8.0") + SET(UPDATE_SUBMODULES_COMMAND ${UPDATE_SUBMODULES_COMMAND} --depth 1) + ENDIF() + IF(cmake_update_submodules MATCHES force) + MESSAGE(STATUS "Updating submodules (forced)") + EXECUTE_PROCESS(COMMAND ${UPDATE_SUBMODULES_COMMAND} --force + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE update_result) + ELSEIF(cmake_update_submodules MATCHES yes) + EXECUTE_PROCESS(COMMAND ${UPDATE_SUBMODULES_COMMAND} + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE update_result) + ELSE() + MESSAGE(STATUS "Updating submodules") + EXECUTE_PROCESS(COMMAND ${UPDATE_SUBMODULES_COMMAND} + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + RESULT_VARIABLE update_result) + ENDIF() ENDIF() ENDIF() From 421153848f0002c9e9b5e204d4c6e85dfcfb5600 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Sun, 20 Mar 2022 21:36:41 +0100 Subject: [PATCH 15/52] MDEV-27980 file-key-management plugin disabled in mysql_install_db breaks automated deployments (and container initialization) Revert "Silence the file-key-management plugin during mysql_install_db" This reverts commit e99d3da6381023395c86f679bb76b00b4385dc2d. --- scripts/mysql_install_db.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mysql_install_db.sh b/scripts/mysql_install_db.sh index 3df48f0eb95..bf2e2617b9b 100644 --- a/scripts/mysql_install_db.sh +++ b/scripts/mysql_install_db.sh @@ -506,7 +506,7 @@ mysqld_install_cmd_line() { "$mysqld_bootstrap" $defaults $defaults_group_suffix "$mysqld_opt" --bootstrap $silent_startup\ "--basedir=$basedir" "--datadir=$ldata" --log-warnings=0 --enforce-storage-engine="" \ - "--plugin-dir=${plugindir}" --loose-disable-plugin-file-key-management \ + "--plugin-dir=${plugindir}" \ $args --max_allowed_packet=8M \ --net_buffer_length=16K } From fbc1cc974e433ad4ee77ef19a2ea199537686a98 Mon Sep 17 00:00:00 2001 From: Oleksandr Byelkin Date: Fri, 18 Mar 2022 11:13:09 +0100 Subject: [PATCH 16/52] MDEV-26009 Server crash when calling twice procedure using FOR-loop The problem was that instructions sp_instr_cursor_copy_struct and sp_instr_copen uses the same lex, adding and removing "tail" of prelocked tables and forgetting that tail of all tables is kept in LEX::query_tables_last. If the LEX used only by one instruction or the query do not have prelocked tables it is not important. But to work correctly in all cases LEX::query_tables_last should be reset to make new tables added in the correct list (after last table in the LEX instead after last table of the prelocking "tail" which was cut). --- mysql-test/main/sp-cursor.result | 63 ++++++++++++++++++++++++++++++++ mysql-test/main/sp-cursor.test | 56 ++++++++++++++++++++++++++++ sql/sp_head.cc | 1 + 3 files changed, 120 insertions(+) diff --git a/mysql-test/main/sp-cursor.result b/mysql-test/main/sp-cursor.result index 2656ef8821d..b1c2b335ea4 100644 --- a/mysql-test/main/sp-cursor.result +++ b/mysql-test/main/sp-cursor.result @@ -737,3 +737,66 @@ rec.en1 c DROP PROCEDURE p1; DROP TABLE t1; +# +# MDEV-26009: Server crash when calling twice procedure using FOR-loop +# +CREATE TABLE t1 ( id int, name varchar(24)); +INSERT INTO t1 values (1, 'x'), (2, 'y'), (3, 'z'); +create function get_name(_id int) returns varchar(24) +return (select name from t1 where id = _id); +select get_name(id) from t1; +get_name(id) +x +y +z +create procedure test_proc() +begin +declare _cur cursor for select get_name(id) from t1; +for row in _cur do select 1; end for; +end; +^^ +call test_proc(); +1 +1 +1 +1 +1 +1 +call test_proc(); +1 +1 +1 +1 +1 +1 +drop procedure test_proc; +drop function get_name; +drop table t1; +CREATE TABLE t1 (id int, name varchar(24)); +INSERT INTO t1 (id, name) VALUES (1, 'x'),(2, 'y'),(3, 'z'); +create function get_name(_id int) returns varchar(24) +return (select name from t1 where id = _id); +create view v1 as select get_name(id) from t1; +create procedure test_proc() +begin +declare _cur cursor for select 1 from v1; +for row in _cur do select 1; end for; +end$$ +call test_proc(); +1 +1 +1 +1 +1 +1 +call test_proc(); +1 +1 +1 +1 +1 +1 +drop procedure test_proc; +drop view v1; +drop function get_name; +drop table t1; diff --git a/mysql-test/main/sp-cursor.test b/mysql-test/main/sp-cursor.test index 97483ef9caf..9794815c784 100644 --- a/mysql-test/main/sp-cursor.test +++ b/mysql-test/main/sp-cursor.test @@ -744,3 +744,59 @@ DELIMITER ;$$ CALL p1(); DROP PROCEDURE p1; DROP TABLE t1; + + +--echo # +--echo # MDEV-26009: Server crash when calling twice procedure using FOR-loop +--echo # + + +CREATE TABLE t1 ( id int, name varchar(24)); +INSERT INTO t1 values (1, 'x'), (2, 'y'), (3, 'z'); + +create function get_name(_id int) returns varchar(24) + return (select name from t1 where id = _id); + +select get_name(id) from t1; + +delimiter ^^; + +create procedure test_proc() +begin + declare _cur cursor for select get_name(id) from t1; + for row in _cur do select 1; end for; +end; +^^ +delimiter ;^^ + +call test_proc(); +call test_proc(); + +drop procedure test_proc; +drop function get_name; +drop table t1; + + +CREATE TABLE t1 (id int, name varchar(24)); +INSERT INTO t1 (id, name) VALUES (1, 'x'),(2, 'y'),(3, 'z'); + +create function get_name(_id int) returns varchar(24) + return (select name from t1 where id = _id); + +create view v1 as select get_name(id) from t1; + +delimiter $$; +create procedure test_proc() +begin + declare _cur cursor for select 1 from v1; + for row in _cur do select 1; end for; +end$$ +delimiter ;$$ + +call test_proc(); +call test_proc(); + +drop procedure test_proc; +drop view v1; +drop function get_name; +drop table t1; diff --git a/sql/sp_head.cc b/sql/sp_head.cc index 57ab31d9edf..1d1199aaa62 100644 --- a/sql/sp_head.cc +++ b/sql/sp_head.cc @@ -3486,6 +3486,7 @@ sp_lex_keeper::reset_lex_and_exec_core(THD *thd, uint *nextp, lex_query_tables_own_last= m_lex->query_tables_own_last; prelocking_tables= *lex_query_tables_own_last; *lex_query_tables_own_last= NULL; + m_lex->query_tables_last= m_lex->query_tables_own_last; m_lex->mark_as_requiring_prelocking(NULL); } thd->rollback_item_tree_changes(); From f54d6380d2662d7bc7f173bf96f5dc3d7cf3aec1 Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Mon, 21 Mar 2022 11:01:40 +0100 Subject: [PATCH 17/52] MDEV-27980 file-key-management plugin disabled in mysql_install_db breaks automated deployments (and container initialization) fix a 2015 typo in build scripts. --without-plugin=plugin_file_key_management translates to -DPLUGIN_PLUGIN_FILE_KEY_MANAGEMENT=NO replace it with a line from 10.4 that builds the plugin dynamically. --- BUILD/SETUP.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index 8a7238c359f..77504522f11 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -211,7 +211,7 @@ fi max_no_embedded_configs="$SSL_LIBRARY --with-plugins=max" max_no_qc_configs="$SSL_LIBRARY --with-plugins=max --without-query-cache" -max_configs="$SSL_LIBRARY --with-plugins=max --with-embedded-server --with-libevent --without-plugin=plugin_file_key_management --with-plugin-rocksdb=dynamic --with-plugin-test_sql_discovery=DYNAMIC" +max_configs="$SSL_LIBRARY --with-plugins=max --with-embedded-server --with-libevent --with-plugin-rocksdb=dynamic --with-plugin-test_sql_discovery=DYNAMIC --with-plugin-file_key_management=DYNAMIC" all_configs="$SSL_LIBRARY --with-plugins=max --with-embedded-server --with-innodb_plugin --with-libevent" # From 6277e7df6b84f6d5931dab66d3edf8859d5b16d3 Mon Sep 17 00:00:00 2001 From: Alexey Botchkov Date: Mon, 22 Nov 2021 09:58:46 +0400 Subject: [PATCH 18/52] MDEV-22742 UBSAN: Many overflow issues in strings/decimal.c - runtime error: signed integer overflow: x * y cannot be represented in type 'long long int' (on optimized builds). Avoid integer overflow, do the check before the calculation. --- strings/decimal.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/strings/decimal.c b/strings/decimal.c index 9d18a9ce52a..78b4c12b0ef 100644 --- a/strings/decimal.c +++ b/strings/decimal.c @@ -1128,13 +1128,21 @@ int decimal2ulonglong(const decimal_t *from, ulonglong *to) for (intg=from->intg; intg > 0; intg-=DIG_PER_DEC1) { - ulonglong y=x; - x=x*DIG_BASE + *buf++; - if (unlikely(y > ((ulonglong) ULONGLONG_MAX/DIG_BASE) || x < y)) + /* + Check that the decimal is bigger than any possible integer. + Do it before we do the x*=DIB_BASE to avoid integer + overflow. + */ + if (unlikely ( + x >= ULONGLONG_MAX/DIG_BASE && + (x > ULONGLONG_MAX/DIG_BASE || + *buf > (dec1) (ULONGLONG_MAX%DIG_BASE)))) { *to=ULONGLONG_MAX; return E_DEC_OVERFLOW; } + + x=x*DIG_BASE + *buf++; } *to=x; for (frac=from->frac; unlikely(frac > 0); frac-=DIG_PER_DEC1) @@ -1151,23 +1159,29 @@ int decimal2longlong(const decimal_t *from, longlong *to) for (intg=from->intg; intg > 0; intg-=DIG_PER_DEC1) { - longlong y=x; /* + Check that the decimal is less than any possible integer. + Do it before we do the x*=DIB_BASE to avoid integer + overflow. Attention: trick! we're calculating -|from| instead of |from| here because |LONGLONG_MIN| > LONGLONG_MAX - so we can convert -9223372036854775808 correctly + so we can convert -9223372036854775808 correctly. */ - x=x*DIG_BASE - *buf++; - if (unlikely(y < (LONGLONG_MIN/DIG_BASE) || x > y)) + if (unlikely ( + x <= LONGLONG_MIN/DIG_BASE && + (x < LONGLONG_MIN/DIG_BASE || + *buf > (dec1) (-(LONGLONG_MIN%DIG_BASE))))) { /* - the decimal is bigger than any possible integer - return border integer depending on the sign + the decimal is bigger than any possible integer + return border integer depending on the sign */ *to= from->sign ? LONGLONG_MIN : LONGLONG_MAX; return E_DEC_OVERFLOW; } + + x=x*DIG_BASE - *buf++; } /* boundary case: 9223372036854775808 */ if (unlikely(from->sign==0 && x == LONGLONG_MIN)) From 0812d0de8dcb1f76d4a03cea3f20bfa30345b83b Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Mon, 21 Mar 2022 16:42:58 +0400 Subject: [PATCH 19/52] MDEV-28131 Unexpected warning while selecting from information_schema.processlist Problem: DECIMAL columns in I_S must be explicitly set of some value. I_S columns do not have `DEFAULT 0` (after MDEV-18918), so during restore_record() their record fragments pointed by Field::ptr are initialized to zero bytes 0x00. But an array of 0x00's is not a valid binary DECIMAL value. So val_decimal() called for such Field_new_decimal generated a warning when seeing a wrong binary encoded DECIMAL value in the record. Fix: Explicitly setting INFORMATION_SCHEMA.PROCESSLIST.PROGRESS to the decimal value of 0 if no progress information is available. --- mysql-test/main/processlist.result | 20 +++++++++++++++++ mysql-test/main/processlist.test | 35 ++++++++++++++++++++++++++++++ sql/sql_show.cc | 10 +++++++++ 3 files changed, 65 insertions(+) diff --git a/mysql-test/main/processlist.result b/mysql-test/main/processlist.result index 2d3228a6d91..d99160f5c74 100644 --- a/mysql-test/main/processlist.result +++ b/mysql-test/main/processlist.result @@ -40,3 +40,23 @@ utf8mb4_string xxx😎yyy # # End of 10.1 tests # +# +# Start of 10.3 tests +# +# +# MDEV-28131 Unexpected warning while selecting from information_schema.processlist +# +connect conn1, localhost, root,,; +connection conn1; +SELECT SLEEP(1000); +connection default; +SELECT progress FROM information_schema.processlist WHERE info='SELECT SLEEP(1000)'; +progress +0.000 +connection conn1; +Got one of the listed errors +connection default; +disconnect conn1; +# +# End of 10.3 tests +# diff --git a/mysql-test/main/processlist.test b/mysql-test/main/processlist.test index 8e98701459a..f419f57ea2f 100644 --- a/mysql-test/main/processlist.test +++ b/mysql-test/main/processlist.test @@ -70,3 +70,38 @@ SELECT INFO, INFO_BINARY, 'xxx😎yyy' AS utf8mb4_string FROM INFORMATION_SCHEMA --echo # --echo # End of 10.1 tests --echo # + +--echo # +--echo # Start of 10.3 tests +--echo # + +--echo # +--echo # MDEV-28131 Unexpected warning while selecting from information_schema.processlist +--echo # + +connect (conn1, localhost, root,,); +connection conn1; +let $ID= `select connection_id()`; +send SELECT SLEEP(1000); +connection default; +let $wait_timeout= 10; +let $wait_condition=select count(*)=1 from information_schema.processlist +where state='User sleep' and info='SELECT SLEEP(1000)'; +--source include/wait_condition.inc +SELECT progress FROM information_schema.processlist WHERE info='SELECT SLEEP(1000)'; +disable_query_log; +eval kill $ID; +enable_query_log; +let $wait_timeout= 10; +let $wait_condition=select count(*)=0 from information_schema.processlist +where state='User sleep' and info='SELECT SLEEP(1000)'; +--source include/wait_condition.inc +connection conn1; +--error 2013,ER_CONNECTION_KILLED +reap; +connection default; +disconnect conn1; + +--echo # +--echo # End of 10.3 tests +--echo # diff --git a/sql/sql_show.cc b/sql/sql_show.cc index e1090d450e8..439cad1c858 100644 --- a/sql/sql_show.cc +++ b/sql/sql_show.cc @@ -3351,6 +3351,16 @@ int fill_schema_processlist(THD* thd, TABLE_LIST* tables, COND* cond) table->field[11]->store((double) tmp->progress.counter / (double) max_counter*100.0); } + else + { + /* + This is a DECIMAL column without DEFAULT. + restore_record() fills its Field::ptr to zero bytes, + according to pack_length(). But an array of zero bytes + is not a valid decimal. Set it explicitly to 0. + */ + table->field[11]->store((longlong) 0, true); + } mysql_mutex_unlock(&tmp->LOCK_thd_data); } From 8f4d7e365e41962370f63294e9ebd8152c3c9d59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Thu, 10 Feb 2022 07:33:02 +0200 Subject: [PATCH 20/52] MDEV-27775 : Some Galera tests fail on FreeBSD due to "unknown signal 9" Replace 9 with KILL --- mysql-test/suite/galera/include/kill_galera.inc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mysql-test/suite/galera/include/kill_galera.inc b/mysql-test/suite/galera/include/kill_galera.inc index d7f665df6c7..c8b88496e52 100644 --- a/mysql-test/suite/galera/include/kill_galera.inc +++ b/mysql-test/suite/galera/include/kill_galera.inc @@ -1,5 +1,10 @@ --echo Killing server ... +if (!$kill_signal) +{ +--let $kill_signal = KILL +} + # Write file to make mysql-test-run.pl expect the crash, but don't start it --let $_server_id= `SELECT @@server_id` --let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect @@ -7,13 +12,15 @@ # Kill the connected server --disable_reconnect +--let KILL_SIGNAL_VALUE = $kill_signal --let KILL_NODE_PIDFILE = `SELECT @@pid_file` --perl + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'} my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; my $mysqld_pid = `cat $pid_filename`; chomp($mysqld_pid); - system("kill -9 $mysqld_pid"); + system("kill -s $kill_sig $mysqld_pid"); exit(0); EOF From cade21b4e054486d18db9212e823ea8ce12ea46a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 22 Mar 2022 09:27:43 +0200 Subject: [PATCH 21/52] MDEV-27775 : Some Galera tests fail on FreeBSD due to "unknown signal 9" Replace 9 with KILL --- mysql-test/suite/galera/include/kill_galera.inc | 2 +- .../galera/r/galera_ist_restart_joiner.result | 1 + .../suite/galera/t/galera_ist_restart_joiner.test | 14 +------------- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/mysql-test/suite/galera/include/kill_galera.inc b/mysql-test/suite/galera/include/kill_galera.inc index c8b88496e52..56118df84f9 100644 --- a/mysql-test/suite/galera/include/kill_galera.inc +++ b/mysql-test/suite/galera/include/kill_galera.inc @@ -16,7 +16,7 @@ if (!$kill_signal) --let KILL_NODE_PIDFILE = `SELECT @@pid_file` --perl - my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'} + my $kill_sig = $ENV{'KILL_SIGNAL_VALUE'}; my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; my $mysqld_pid = `cat $pid_filename`; chomp($mysqld_pid); diff --git a/mysql-test/suite/galera/r/galera_ist_restart_joiner.result b/mysql-test/suite/galera/r/galera_ist_restart_joiner.result index e58d04b30b3..d739558dfd5 100644 --- a/mysql-test/suite/galera/r/galera_ist_restart_joiner.result +++ b/mysql-test/suite/galera/r/galera_ist_restart_joiner.result @@ -16,6 +16,7 @@ SET SESSION wsrep_on=ON; connection node_1; UPDATE t1 SET f2 = 'd' WHERE f1 > 3; connection node_2; +Killing server ... connection node_1; UPDATE t1 SET f2 = 'e' WHERE f1 > 4; connection node_2; diff --git a/mysql-test/suite/galera/t/galera_ist_restart_joiner.test b/mysql-test/suite/galera/t/galera_ist_restart_joiner.test index 15b47a328fc..686fb0b3f76 100644 --- a/mysql-test/suite/galera/t/galera_ist_restart_joiner.test +++ b/mysql-test/suite/galera/t/galera_ist_restart_joiner.test @@ -63,19 +63,7 @@ UPDATE t1 SET f2 = 'd' WHERE f1 > 3; # Kill node #2 while IST is in progress --connection node_2 - -# Kill the connected server ---disable_reconnect - ---perl - my $pid_filename = $ENV{'KILL_NODE_PIDFILE'}; - my $mysqld_pid = `cat $pid_filename`; - chomp($mysqld_pid); - system("kill -9 $mysqld_pid"); - exit(0); -EOF - ---source include/wait_until_disconnected.inc +--source include/kill_galera.inc --connection node_1 --source include/wait_until_connected_again.inc From 8153c974e60901f5f029b925de75afa3a68f3066 Mon Sep 17 00:00:00 2001 From: Ian Gilfillan Date: Tue, 22 Mar 2022 14:45:55 +0200 Subject: [PATCH 22/52] Update contributors --- CREDITS | 2 ++ mysql-test/r/contributors.result | 2 ++ sql/contributors.h | 2 ++ 3 files changed, 6 insertions(+) diff --git a/CREDITS b/CREDITS index f5e87e18752..35092602ccf 100644 --- a/CREDITS +++ b/CREDITS @@ -4,9 +4,11 @@ organization registered in the USA. The current main sponsors of the MariaDB Foundation are: Alibaba Cloud https://www.alibabacloud.com/ (2017) +Intel https://www.intel.com (2022) MariaDB Corporation https://www.mariadb.com (2013) Microsoft https://microsoft.com/ (2017) ServiceNow https://servicenow.com (2019) +SIT https://sit.org (2022) Tencent Cloud https://cloud.tencent.com (2017) Development Bank of Singapore https://dbs.com (2016) IBM https://www.ibm.com (2017) diff --git a/mysql-test/r/contributors.result b/mysql-test/r/contributors.result index 0c7ca03a2c5..8d72373696c 100644 --- a/mysql-test/r/contributors.result +++ b/mysql-test/r/contributors.result @@ -5,6 +5,8 @@ Tencent Cloud https://cloud.tencent.com Platinum Sponsor of the MariaDB Foundati Microsoft https://microsoft.com/ Platinum Sponsor of the MariaDB Foundation MariaDB Corporation https://mariadb.com Founding member, Platinum Sponsor of the MariaDB Foundation ServiceNow https://servicenow.com Platinum Sponsor of the MariaDB Foundation +Intel https://www.intel.com Platinum Sponsor of the MariaDB Foundation +SIT https://sit.org Platinum Sponsor of the MariaDB Foundation Visma https://visma.com Gold Sponsor of the MariaDB Foundation DBS https://dbs.com Gold Sponsor of the MariaDB Foundation IBM https://www.ibm.com Gold Sponsor of the MariaDB Foundation diff --git a/sql/contributors.h b/sql/contributors.h index e16448ee985..bc8ba4eabbb 100644 --- a/sql/contributors.h +++ b/sql/contributors.h @@ -42,6 +42,8 @@ struct show_table_contributors_st show_table_contributors[]= { {"Microsoft", "https://microsoft.com/", "Platinum Sponsor of the MariaDB Foundation"}, {"MariaDB Corporation", "https://mariadb.com", "Founding member, Platinum Sponsor of the MariaDB Foundation"}, {"ServiceNow", "https://servicenow.com", "Platinum Sponsor of the MariaDB Foundation"}, + {"Intel", "https://www.intel.com", "Platinum Sponsor of the MariaDB Foundation"}, + {"SIT", "https://sit.org", "Platinum Sponsor of the MariaDB Foundation"}, {"Visma", "https://visma.com", "Gold Sponsor of the MariaDB Foundation"}, {"DBS", "https://dbs.com", "Gold Sponsor of the MariaDB Foundation"}, {"IBM", "https://www.ibm.com", "Gold Sponsor of the MariaDB Foundation"}, From bbf02c85ba2e850da546199421cb75c224747475 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Wed, 23 Mar 2022 12:45:56 -0700 Subject: [PATCH 23/52] MDEV-24281 Reading from freed memory when running main.view with --ps-protocol This bug could affect prepared statements for the command CREATE VIEW with specification that contained unnamed basic constant in select list. If generation of a valid name for the corresponding view column required resolution of conflicts with names of other columns that were explicitly defined then execution of such prepared statement and following deallocation of this statement led to reading from freed memory. Approved by Oleksandr Byelkin --- mysql-test/main/view.result | 28 ++++++++++++++++++++++++++++ mysql-test/main/view.test | 26 ++++++++++++++++++++++++++ sql/sql_view.cc | 3 ++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/view.result b/mysql-test/main/view.result index a410ab741f5..6483d76bba1 100644 --- a/mysql-test/main/view.result +++ b/mysql-test/main/view.result @@ -6839,5 +6839,33 @@ id bar Drop View v1; Drop table t1; # +# MDEV-24281: Execution of PREPARE from CREATE VIEW statement +# +create table t1 (s1 int); +insert into t1 values (3), (7), (1); +prepare stmt from " +create view v1 as select 's1', s1, 1 as My_exp_s1 from t1; +"; +execute stmt; +deallocate prepare stmt; +show create view v1; +View Create View character_set_client collation_connection +v1 CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v1` AS select 's1' AS `My_exp_1_s1`,`t1`.`s1` AS `s1`,1 AS `My_exp_s1` from `t1` latin1 latin1_swedish_ci +select * from v1; +My_exp_1_s1 s1 My_exp_s1 +s1 3 1 +s1 7 1 +s1 1 1 +drop view v1; +prepare stmt from " +create view v1 as select 's1', s1, 1 as My_exp_s1 from t1; +"; +execute stmt; +execute stmt; +ERROR 42S01: Table 'v1' already exists +deallocate prepare stmt; +drop view v1; +drop table t1; +# # End of 10.3 tests # diff --git a/mysql-test/main/view.test b/mysql-test/main/view.test index 431dfdb86f6..46232b1bcdc 100644 --- a/mysql-test/main/view.test +++ b/mysql-test/main/view.test @@ -6576,6 +6576,32 @@ SELECT v.id, v.foo AS bar FROM v1 v Drop View v1; Drop table t1; +--echo # +--echo # MDEV-24281: Execution of PREPARE from CREATE VIEW statement +--echo # + +create table t1 (s1 int); +insert into t1 values (3), (7), (1); + +prepare stmt from " +create view v1 as select 's1', s1, 1 as My_exp_s1 from t1; +"; +execute stmt; +deallocate prepare stmt; +show create view v1; +select * from v1; +drop view v1; + +prepare stmt from " +create view v1 as select 's1', s1, 1 as My_exp_s1 from t1; +"; +execute stmt; +--error ER_TABLE_EXISTS_ERROR +execute stmt; +deallocate prepare stmt; +drop view v1; +drop table t1; + --echo # --echo # End of 10.3 tests --echo # diff --git a/sql/sql_view.cc b/sql/sql_view.cc index 024bd36f483..b6787a1cb8b 100644 --- a/sql/sql_view.cc +++ b/sql/sql_view.cc @@ -96,7 +96,8 @@ static void make_unique_view_field_name(THD *thd, Item *target, itc.rewind(); } - target->orig_name= target->name.str; + if (!target->orig_name) + target->orig_name= target->name.str; target->set_name(thd, buff, name_len, system_charset_info); } From cf483a7766d0730872232fdedd727d30a493fe29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 24 Mar 2022 09:53:52 +0200 Subject: [PATCH 24/52] MDEV-17441 fixup: Remove unused my_atomic long macros --- include/my_atomic.h | 18 +----------------- wsrep-lib | 2 +- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/include/my_atomic.h b/include/my_atomic.h index 81da9e35cf9..270134a6caf 100644 --- a/include/my_atomic.h +++ b/include/my_atomic.h @@ -2,7 +2,7 @@ #define MY_ATOMIC_INCLUDED /* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - Copyright (c) 2018, 2020, MariaDB + Copyright (c) 2018, 2022, MariaDB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -115,22 +115,6 @@ #include "atomic/gcc_builtins.h" #endif -#if SIZEOF_LONG == 4 -#define my_atomic_addlong(A,B) my_atomic_add32((int32*) (A), (B)) -#define my_atomic_loadlong(A) my_atomic_load32((int32*) (A)) -#define my_atomic_loadlong_explicit(A,O) my_atomic_load32_explicit((int32*) (A), (O)) -#define my_atomic_storelong(A,B) my_atomic_store32((int32*) (A), (B)) -#define my_atomic_faslong(A,B) my_atomic_fas32((int32*) (A), (B)) -#define my_atomic_caslong(A,B,C) my_atomic_cas32((int32*) (A), (int32*) (B), (C)) -#else -#define my_atomic_addlong(A,B) my_atomic_add64((int64*) (A), (B)) -#define my_atomic_loadlong(A) my_atomic_load64((int64*) (A)) -#define my_atomic_loadlong_explicit(A,O) my_atomic_load64_explicit((int64*) (A), (O)) -#define my_atomic_storelong(A,B) my_atomic_store64((int64*) (A), (B)) -#define my_atomic_faslong(A,B) my_atomic_fas64((int64*) (A), (B)) -#define my_atomic_caslong(A,B,C) my_atomic_cas64((int64*) (A), (int64*) (B), (C)) -#endif - #ifndef MY_MEMORY_ORDER_SEQ_CST #define MY_MEMORY_ORDER_RELAXED #define MY_MEMORY_ORDER_CONSUME diff --git a/wsrep-lib b/wsrep-lib index 23fb8624624..edd141127c1 160000 --- a/wsrep-lib +++ b/wsrep-lib @@ -1 +1 @@ -Subproject commit 23fb8624624c9144c77f3874647fa0f7394b0aa8 +Subproject commit edd141127c11d78ef073f9f3ca61708821f20b32 From 32ab6219bed1ca785914af5b9e4e6553e3b7a94f Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Thu, 5 Aug 2021 12:59:37 -0600 Subject: [PATCH 25/52] MDEV-25580: rpl.rpl_semi_sync_slave_compressed_protocol crashes because of wrong packet Problem: ======== When both semi-sync and slave compression are enabled, the numbering on packet headers can become out of sync between the primary and replica servers. More specifically, after the master flushes its write, it should increment the counters that track packets. The bug is such that the master only updates the normal packet counter and leaves the compressed packet counter alone. Solution: ======== After the master flushes, additionally increment the compressed packet counter. Reviewed By: ============ Andrei Elkin: --- sql/semisync_master.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/semisync_master.cc b/sql/semisync_master.cc index b239a9776a7..0f868d4fc7a 100644 --- a/sql/semisync_master.cc +++ b/sql/semisync_master.cc @@ -1229,6 +1229,7 @@ int Repl_semi_sync_master::flush_net(THD *thd, net_clear(net, 0); net->pkt_nr++; + net->compress_pkt_nr++; result = 0; rpl_semi_sync_master_net_wait_num++; From 174f1734a9672c13928dfd03d49d25c4325f0ce3 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Wed, 22 Sep 2021 11:25:52 -0600 Subject: [PATCH 26/52] MDEV-14608: mysqlbinlog lastest backupfile size is 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: ======== When using mariadb-binlog with --raw and --stop-never, events from the master's currently active log file should be written to their respective log file specified by --result-file, and shown on-disk. There is a bug where mariadb-binlog does not flush the result file to disk when new events are received Solution: ======== Add a function call to flush mariadb-binlog’s result file after receiving an event in --raw mode. Reviewed By: ============ Andrei Elkin --- client/mysqlbinlog.cc | 1 + .../r/binlog_mysqlbinlog_raw_flush.result | 7 +++ .../t/binlog_mysqlbinlog_raw_flush.test | 45 +++++++++++++++++++ 3 files changed, 53 insertions(+) create mode 100644 mysql-test/suite/binlog/r/binlog_mysqlbinlog_raw_flush.result create mode 100644 mysql-test/suite/binlog/t/binlog_mysqlbinlog_raw_flush.test diff --git a/client/mysqlbinlog.cc b/client/mysqlbinlog.cc index 4e28876cdf6..c7c71f6b00d 100644 --- a/client/mysqlbinlog.cc +++ b/client/mysqlbinlog.cc @@ -2510,6 +2510,7 @@ static Exit_status handle_event_raw_mode(PRINT_EVENT_INFO *print_event_info, error("Could not write into log file '%s'", out_file_name); DBUG_RETURN(ERROR_STOP); } + fflush(result_file); DBUG_RETURN(OK_CONTINUE); } diff --git a/mysql-test/suite/binlog/r/binlog_mysqlbinlog_raw_flush.result b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_raw_flush.result new file mode 100644 index 00000000000..9148f0e8c2b --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_mysqlbinlog_raw_flush.result @@ -0,0 +1,7 @@ +CREATE TABLE t1 (a int); +FLUSH LOGS; +INSERT INTO t1 VALUES (1); +# timeout TIMEOUT MYSQL_BINLOG --raw --read-from-remote-server --user=root --host=127.0.0.1 --port=MASTER_MYPORT --stop-never --result-file=MYSQLTEST_VARDIR/tmp/ master-bin.000001 +# MYSQL_BINLOG MYSQLTEST_VARDIR/tmp/master-bin.000002 > MYSQLTEST_VARDIR/tmp/local-bin.000002.out +FOUND 1 /GTID 0-1-2/ in local-bin.000002.out +DROP TABLE t1; diff --git a/mysql-test/suite/binlog/t/binlog_mysqlbinlog_raw_flush.test b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_raw_flush.test new file mode 100644 index 00000000000..f95fc0137a2 --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_mysqlbinlog_raw_flush.test @@ -0,0 +1,45 @@ +# +# Purpose: +# When using mariadb-binlog with options for --raw and --stop-never, events +# from the master's currently active log file should be written to their +# respective log file specified by --result-file, and shown on-disk. This test +# ensures that the log files on disk, created by mariadb-binlog, have the most +# up-to-date events from the master. +# +# Methodology: +# On the master, rotate to a newly active binlog file and write an event to +# it. Read the master's binlog using mariadb-binlog with --raw and --stop-never +# and write the data to an intermediary binlog file (a timeout is used on this +# command to ensure it exits). Read the local intermediary binlog file to ensure +# that the master's most recent event exists in the local file. +# +# References: +# MDEV-14608: mysqlbinlog lastest backupfile size is 0 +# + +--source include/linux.inc +--source include/have_log_bin.inc + +# Create newly active log +CREATE TABLE t1 (a int); +FLUSH LOGS; +INSERT INTO t1 VALUES (1); + +# Read binlog data from master to intermediary result file +--let TIMEOUT=1 +--echo # timeout TIMEOUT MYSQL_BINLOG --raw --read-from-remote-server --user=root --host=127.0.0.1 --port=MASTER_MYPORT --stop-never --result-file=MYSQLTEST_VARDIR/tmp/ master-bin.000001 +--error 124 # Error 124 means timeout was reached +--exec timeout $TIMEOUT $MYSQL_BINLOG --raw --read-from-remote-server --user=root --host=127.0.0.1 --port=$MASTER_MYPORT --stop-never --result-file=$MYSQLTEST_VARDIR/tmp/ master-bin.000001 + +# Ensure the binlog output has the most recent events from the master +--echo # MYSQL_BINLOG MYSQLTEST_VARDIR/tmp/master-bin.000002 > MYSQLTEST_VARDIR/tmp/local-bin.000002.out +--exec $MYSQL_BINLOG $MYSQLTEST_VARDIR/tmp/master-bin.000002 > $MYSQLTEST_VARDIR/tmp/local-bin.000002.out +--let SEARCH_PATTERN= GTID 0-1-2 +--let SEARCH_FILE= $MYSQLTEST_VARDIR/tmp/local-bin.000002.out +--source include/search_pattern_in_file.inc + +# Cleanup +DROP TABLE t1; +--remove_file $MYSQLTEST_VARDIR/tmp/master-bin.000001 +--remove_file $MYSQLTEST_VARDIR/tmp/master-bin.000002 +--remove_file $MYSQLTEST_VARDIR/tmp/local-bin.000002.out From cd88b0831f7bea7e313af7d0bd96b7050f6c9d60 Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Mon, 15 Nov 2021 16:33:45 -0700 Subject: [PATCH 27/52] DBAAS-7828: Primary/replica: configuration change of autocommit=0 can not be applied Problem: ======== When the mysql.gtid_slave_pos table uses the InnoDB engine, and mysqld starts, it reads the table and begins a transaction. After reading the value, it should end the transaction and release all associated locks. The bug reported in DBAAS-7828 shows that when autocommit is off, the locks are not released, resulting in indefinite hangs on future attempts to change gtid_slave_pos. In particular, the transaction was not properly finalized because thd->server_status was not updated to reflect the end of the transaction. Solution: ======== This patch updates the code to properly commit the transaction after reading gtid_slave_pos during mysqld start-up. Reviewed By: ============ Andrei Elkin --- .../r/binlog_autocommit_off_no_hang.result | 6 +++ .../binlog_autocommit_off_no_hang-master.opt | 1 + .../t/binlog_autocommit_off_no_hang.test | 45 +++++++++++++++++++ sql/rpl_rli.cc | 2 +- 4 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 mysql-test/suite/binlog/r/binlog_autocommit_off_no_hang.result create mode 100644 mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang-master.opt create mode 100644 mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang.test diff --git a/mysql-test/suite/binlog/r/binlog_autocommit_off_no_hang.result b/mysql-test/suite/binlog/r/binlog_autocommit_off_no_hang.result new file mode 100644 index 00000000000..71eecd881ca --- /dev/null +++ b/mysql-test/suite/binlog/r/binlog_autocommit_off_no_hang.result @@ -0,0 +1,6 @@ +ALTER TABLE mysql.gtid_slave_pos ENGINE=innodb; +# Restart the server so mysqld reads the gtid_slave_pos using innodb +# Set gtid_slave_pos should not hang +SET GLOBAL gtid_slave_pos=@@gtid_binlog_pos; +COMMIT; +RESET MASTER; diff --git a/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang-master.opt b/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang-master.opt new file mode 100644 index 00000000000..e0fa81e6eeb --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang-master.opt @@ -0,0 +1 @@ +--autocommit=0 diff --git a/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang.test b/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang.test new file mode 100644 index 00000000000..8f1dbb2a2dd --- /dev/null +++ b/mysql-test/suite/binlog/t/binlog_autocommit_off_no_hang.test @@ -0,0 +1,45 @@ +# +# Purpose: +# When the mysql.gtid_slave_pos table uses the InnoDB engine, and mysqld +# starts, it reads the table and begins a transaction. After mysqld reads the +# value, it should end the transaction and release all associated locks. +# The bug reported in DBAAS-7828 shows that when autocommit is off, the locks +# are not released, resulting in indefinite hangs on future attempts to change +# gtid_slave_pos. This test ensures its fix such that the locks are properly +# released. +# +# References: +# DBAAS-7828: Primary/replica: configuration change of "autocommit=0" can +# not be applied +# + +--source include/have_innodb.inc +--source include/have_log_bin.inc + +# Reading gtid_slave_pos table is format independent so just use one for +# reduced test time +--source include/have_binlog_format_row.inc + +--let old_slave_pos_engine= query_get_value(SHOW TABLE STATUS FROM mysql LIKE 'gtid_slave_pos', Engine, 1) + +# Use a transactional engine +ALTER TABLE mysql.gtid_slave_pos ENGINE=innodb; + +--echo # Restart the server so mysqld reads the gtid_slave_pos using innodb +--exec echo "wait" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--shutdown_server +--source include/wait_until_disconnected.inc +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.1.expect +--enable_reconnect +--source include/wait_until_connected_again.inc + +--echo # Set gtid_slave_pos should not hang +SET GLOBAL gtid_slave_pos=@@gtid_binlog_pos; +COMMIT; + +# Revert table type +--disable_query_log +--eval ALTER TABLE mysql.gtid_slave_pos ENGINE=$old_slave_pos_engine +--enable_query_log + +RESET MASTER; diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 88cb8fc5e1e..176801130d7 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -1675,7 +1675,7 @@ end: { table->file->ha_index_or_rnd_end(); ha_commit_trans(thd, FALSE); - ha_commit_trans(thd, TRUE); + trans_commit(thd); } if (table_opened) { From dcb2968f90b6849eee079078557b7b25bec160b8 Mon Sep 17 00:00:00 2001 From: Vlad Lesin Date: Mon, 14 Mar 2022 20:46:40 +0300 Subject: [PATCH 28/52] MDEV-27557 InnoDB unnecessarily commits mtr during secondary index search to preserve clustered index latching order New function to release latches till savepoint was added in mtr_t. As there is no longer need to limit MDEV-20605 fix usage for locking reads only, the limitation is removed. --- storage/innobase/include/mtr0mtr.h | 9 +++++ storage/innobase/mtr/mtr0mtr.cc | 59 ++++++++++++++++++++++++++++++ storage/innobase/row/row0sel.cc | 43 +++++++--------------- 3 files changed, 82 insertions(+), 29 deletions(-) diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 3f9777ad225..0c7051ed31a 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -97,6 +97,15 @@ struct mtr_t { /** Commit the mini-transaction. */ void commit(); + /** Release latches till savepoint. To simplify the code only + MTR_MEMO_S_LOCK and MTR_MEMO_PAGE_S_FIX slot types are allowed to be + released, otherwise it would be neccesary to add one more argument in the + function to point out what slot types are allowed for rollback, and this + would be overengineering as currently the function is used only in one place + in the code. + @param savepoint savepoint, can be obtained with get_savepoint */ + void rollback_to_savepoint(ulint savepoint); + /** Commit a mini-transaction that is shrinking a tablespace. @param space tablespace that is being shrunk */ ATTRIBUTE_COLD void commit_shrink(fil_space_t &space); diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index ca97a9e77e4..2daada16a91 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -300,6 +300,50 @@ struct ReleaseAll { } }; +/** Stops iteration is savepoint is reached */ +template struct TillSavepoint +{ + + /** Constructor + @param[in] functor functor which is called if savepoint is not reached + @param[in] savepoint savepoint value to rollback + @param[in] used current position in slots container */ + TillSavepoint(const Functor &functor, ulint savepoint, ulint used) + : functor(functor), + m_slots_count((used - savepoint) / sizeof(mtr_memo_slot_t)) + { + ut_ad(savepoint); + ut_ad(used >= savepoint); + } + + /** @return true if savepoint is not reached, false otherwise */ + bool operator()(mtr_memo_slot_t *slot) + { +#ifdef UNIV_DEBUG + /** This check is added because the code is invoked only from + row_search_mvcc() to release latches acquired during clustered index search + for secondary index record. To make it more universal we could add one more + member in this functor for debug build to pass only certain slot types, + but this is currently not necessary. */ + switch (slot->type) + { + case MTR_MEMO_S_LOCK: + case MTR_MEMO_PAGE_S_FIX: + break; + default: + ut_a(false); + } +#endif + return m_slots_count-- && functor(slot); + } + +private: + /** functor to invoke */ + const Functor &functor; + /** slots count left till savepoint */ + ulint m_slots_count; +}; + #ifdef UNIV_DEBUG /** Check that all slots have been handled. */ struct DebugCheck { @@ -468,6 +512,21 @@ void mtr_t::commit() release_resources(); } +/** Release latches till savepoint. To simplify the code only +MTR_MEMO_S_LOCK and MTR_MEMO_PAGE_S_FIX slot types are allowed to be +released, otherwise it would be neccesary to add one more argument in the +function to point out what slot types are allowed for rollback, and this +would be overengineering as corrently the function is used only in one place +in the code. +@param savepoint savepoint, can be obtained with get_savepoint */ +void mtr_t::rollback_to_savepoint(ulint savepoint) +{ + Iterate> iteration( + TillSavepoint(ReleaseLatches(), savepoint, + get_savepoint())); + m_memo.for_each_block_in_reverse(iteration); +} + /** Shrink a tablespace. */ struct Shrink { diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 6b7397454b3..1dae2edbc01 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -3588,14 +3588,12 @@ record with the same ordering prefix in in the B-tree index @param[in] latch_mode latch mode wished in restoration @param[in] pcur cursor whose position has been stored @param[in] moves_up true if the cursor moves up in the index -@param[in] mtr mtr; CAUTION: may commit mtr temporarily! -@param[in] select_lock_type select lock type +@param[in,out] mtr mtr; CAUTION: may commit mtr temporarily! @return true if we may need to process the record the cursor is now positioned on (i.e. we should not go to the next record yet) */ static bool sel_restore_position_for_mysql(bool *same_user_rec, ulint latch_mode, btr_pcur_t *pcur, - bool moves_up, mtr_t *mtr, - lock_mode select_lock_type) + bool moves_up, mtr_t *mtr) { auto status = btr_pcur_restore_position(latch_mode, pcur, mtr); @@ -3618,8 +3616,7 @@ static bool sel_restore_position_for_mysql(bool *same_user_rec, switch (pcur->rel_pos) { case BTR_PCUR_ON: if (!*same_user_rec && moves_up) { - if (status == btr_pcur_t::SAME_UNIQ - && select_lock_type != LOCK_NONE) + if (status == btr_pcur_t::SAME_UNIQ) return true; next: if (btr_pcur_move_to_next(pcur, mtr) @@ -4303,7 +4300,7 @@ row_search_mvcc( const rec_t* clust_rec; Row_sel_get_clust_rec_for_mysql row_sel_get_clust_rec_for_mysql; ibool unique_search = FALSE; - ibool mtr_has_extra_clust_latch = FALSE; + ulint mtr_extra_clust_savepoint = 0; bool moves_up = false; /* if the returned record was locked and we did a semi-consistent read (fetch the newest committed version), then this is set to @@ -4673,7 +4670,7 @@ wait_table_again: bool need_to_process = sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr, prebuilt->select_lock_type); + pcur, moves_up, &mtr); if (UNIV_UNLIKELY(need_to_process)) { if (UNIV_UNLIKELY(prebuilt->row_read_type @@ -5355,7 +5352,7 @@ requires_clust_rec: /* It was a non-clustered index and we must fetch also the clustered index record */ - mtr_has_extra_clust_latch = TRUE; + mtr_extra_clust_savepoint = mtr.get_savepoint(); ut_ad(!vrow); /* The following call returns 'offsets' associated with @@ -5643,27 +5640,15 @@ next_rec: /* No need to do store restore for R-tree */ mtr.commit(); mtr.start(); - mtr_has_extra_clust_latch = FALSE; - } else if (mtr_has_extra_clust_latch) { - /* If we have extra cluster latch, we must commit - mtr if we are moving to the next non-clustered + mtr_extra_clust_savepoint = 0; + } else if (mtr_extra_clust_savepoint) { + /* We must release any clustered index latches + if we are moving to the next non-clustered index record, because we could break the latching order if we would access a different clustered index page right away without releasing the previous. */ - - btr_pcur_store_position(pcur, &mtr); - mtr.commit(); - mtr_has_extra_clust_latch = FALSE; - - mtr.start(); - - if (sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, - pcur, moves_up, &mtr, - prebuilt->select_lock_type) - ) { - goto rec_loop; - } + mtr.rollback_to_savepoint(mtr_extra_clust_savepoint); + mtr_extra_clust_savepoint = 0; } if (moves_up) { @@ -5723,7 +5708,7 @@ page_read_error: lock_table_wait: mtr.commit(); - mtr_has_extra_clust_latch = FALSE; + mtr_extra_clust_savepoint = 0; trx->error_state = err; @@ -5752,7 +5737,7 @@ lock_table_wait: if (!dict_index_is_spatial(index)) { sel_restore_position_for_mysql( &same_user_rec, BTR_SEARCH_LEAF, pcur, - moves_up, &mtr, prebuilt->select_lock_type); + moves_up, &mtr); } if (trx->isolation_level <= TRX_ISO_READ_COMMITTED From 6437b304048d0b42e6b2b8f59631ea04bd3c2891 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 25 Mar 2022 07:05:08 +0400 Subject: [PATCH 29/52] MDEV-28166 sql_mode=ORACLE: fully qualified package function calls do not work: db.pkg.func() Also fixes MDEV-19328 sql_mode=ORACLE: Package function in VIEW --- .../suite/compat/oracle/r/sp-package.result | 210 +++++++++++++++++ .../suite/compat/oracle/t/sp-package.test | 213 ++++++++++++++++++ sql/item.cc | 20 +- sql/sql_class.h | 13 ++ sql/sql_lex.cc | 50 ++++ sql/sql_lex.h | 5 + sql/sql_yacc.yy | 5 + sql/sql_yacc_ora.yy | 5 + 8 files changed, 519 insertions(+), 2 deletions(-) diff --git a/mysql-test/suite/compat/oracle/r/sp-package.result b/mysql-test/suite/compat/oracle/r/sp-package.result index 598c766c808..a8be1a8eb16 100644 --- a/mysql-test/suite/compat/oracle/r/sp-package.result +++ b/mysql-test/suite/compat/oracle/r/sp-package.result @@ -2925,3 +2925,213 @@ END $$ CALL xyz.xyz123(17,18,@R); DROP PACKAGE xyz; DROP TABLE t1; +# +# MDEV-28166 sql_mode=ORACLE: fully qualified package function calls do not work: db.pkg.func() +# +SELECT `db `.pkg.func(); +ERROR 42000: Incorrect database name 'db ' +SELECT db.`pkg `.func(); +ERROR 42000: Incorrect routine name 'pkg ' +SELECT db.pkg.`func `(); +ERROR 42000: Incorrect routine name 'func ' +CREATE DATABASE db1; +USE db1; +CREATE PACKAGE pkg1 AS +FUNCTION f1 RETURN TEXT; +FUNCTION f2_db1_pkg1_f1 RETURN TEXT; +FUNCTION f2_pkg1_f1 RETURN TEXT; +FUNCTION f2_f1 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY pkg1 +AS +FUNCTION f1 RETURN TEXT IS +BEGIN +RETURN 'This is db1.pkg1.f1'; +END; +FUNCTION f2_db1_pkg1_f1 RETURN TEXT IS +BEGIN +RETURN db1.pkg1.f1(); +END; +FUNCTION f2_pkg1_f1 RETURN TEXT IS +BEGIN +RETURN pkg1.f1(); +END; +FUNCTION f2_f1 RETURN TEXT IS +BEGIN +RETURN f1(); +END; +END; +$$ +USE db1; +SELECT pkg1.f2_db1_pkg1_f1(); +pkg1.f2_db1_pkg1_f1() +This is db1.pkg1.f1 +SELECT pkg1.f2_pkg1_f1(); +pkg1.f2_pkg1_f1() +This is db1.pkg1.f1 +SELECT pkg1.f2_f1(); +pkg1.f2_f1() +This is db1.pkg1.f1 +SELECT db1.pkg1.f2_db1_pkg1_f1(); +db1.pkg1.f2_db1_pkg1_f1() +This is db1.pkg1.f1 +SELECT db1.pkg1.f2_pkg1_f1(); +db1.pkg1.f2_pkg1_f1() +This is db1.pkg1.f1 +SELECT db1.pkg1.f2_f1(); +db1.pkg1.f2_f1() +This is db1.pkg1.f1 +USE test; +SELECT db1.pkg1.f2_db1_pkg1_f1(); +db1.pkg1.f2_db1_pkg1_f1() +This is db1.pkg1.f1 +SELECT db1.pkg1.f2_pkg1_f1(); +db1.pkg1.f2_pkg1_f1() +This is db1.pkg1.f1 +SELECT db1.pkg1.f2_f1(); +db1.pkg1.f2_f1() +This is db1.pkg1.f1 +DROP DATABASE db1; +CREATE DATABASE db1; +CREATE DATABASE db2; +CREATE PACKAGE db1.pkg1 AS +FUNCTION f1 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY db1.pkg1 AS +FUNCTION f1 RETURN TEXT AS +BEGIN +RETURN 'This is db1.pkg1.f1'; +END; +END; +$$ +CREATE PACKAGE db2.pkg1 AS +FUNCTION f1 RETURN TEXT; +FUNCTION var1 RETURN TEXT; +FUNCTION var2 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY db2.pkg1 AS +m_var1 TEXT; +m_var2 TEXT; +FUNCTION f1 RETURN TEXT AS +BEGIN +RETURN 'This is db2.pkg1.f1'; +END; +FUNCTION var1 RETURN TEXT AS +BEGIN +RETURN m_var1; +END; +FUNCTION var2 RETURN TEXT AS +BEGIN +RETURN m_var2; +END; +BEGIN +m_var1:= db1.pkg1.f1(); +m_var2:= db2.pkg1.f1(); +END; +$$ +SELECT db2.pkg1.var1(), db2.pkg1.var2(); +db2.pkg1.var1() db2.pkg1.var2() +This is db1.pkg1.f1 This is db2.pkg1.f1 +DROP DATABASE db1; +DROP DATABASE db2; +CREATE PACKAGE pkg1 AS +FUNCTION f1(a TEXT) RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY pkg1 AS +FUNCTION f1(a TEXT) RETURN TEXT AS +BEGIN +RETURN a; +END; +END; +$$ +SELECT test.pkg1.f1('xxx'); +test.pkg1.f1('xxx') +xxx +SELECT test.pkg1.f1('xxx' AS a); +ERROR 42000: You have an error in your SQL syntax; check the manual that corresponds to your MariaDB server version for the right syntax to use near 'AS a)' at line 1 +DROP PACKAGE pkg1; +# +# MDEV-19328 sql_mode=ORACLE: Package function in VIEW +# +SET sql_mode=ORACLE; +CREATE PACKAGE test1 AS +FUNCTION f_test RETURN number; +END test1; +$$ +CREATE PACKAGE BODY test1 +AS +FUNCTION f_test RETURN NUMBER IS +BEGIN +RETURN 1; +END; +END test1; +$$ +SET sql_mode=ORACLE; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test1.f_test(); +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE VIEW "v_test" AS select 1 AS "c1" from DUAL where 1 = "test"."test1"."f_test"() +character_set_client latin1 +collation_connection latin1_swedish_ci +SET sql_mode=DEFAULT; +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v_test` AS select 1 AS `c1` from DUAL where 1 = `test`.`test1`.`f_test`() +character_set_client latin1 +collation_connection latin1_swedish_ci +DROP VIEW v_test; +SET sql_mode=DEFAULT; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test1.f_test(); +ERROR 42000: FUNCTION test1.f_test does not exist +SET sql_mode=ORACLE; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test.test1.f_test(); +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE VIEW "v_test" AS select 1 AS "c1" from DUAL where 1 = "test"."test1"."f_test"() +character_set_client latin1 +collation_connection latin1_swedish_ci +SET sql_mode=DEFAULT; +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v_test` AS select 1 AS `c1` from DUAL where 1 = `test`.`test1`.`f_test`() +character_set_client latin1 +collation_connection latin1_swedish_ci +DROP VIEW v_test; +SET sql_mode=DEFAULT; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test.test1.f_test(); +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE ALGORITHM=UNDEFINED DEFINER=`root`@`localhost` SQL SECURITY DEFINER VIEW `v_test` AS select 1 AS `c1` from DUAL where 1 = `test`.`test1`.`f_test`() +character_set_client latin1 +collation_connection latin1_swedish_ci +SET sql_mode=ORACLE; +SELECT * FROM v_test; +c1 +1 +SHOW CREATE VIEW v_test; +View v_test +Create View CREATE VIEW "v_test" AS select 1 AS "c1" from DUAL where 1 = "test"."test1"."f_test"() +character_set_client latin1 +collation_connection latin1_swedish_ci +DROP VIEW v_test; +SET sql_mode=ORACLE; +DROP PACKAGE test1; diff --git a/mysql-test/suite/compat/oracle/t/sp-package.test b/mysql-test/suite/compat/oracle/t/sp-package.test index 5eac3987982..8fcf72d3145 100644 --- a/mysql-test/suite/compat/oracle/t/sp-package.test +++ b/mysql-test/suite/compat/oracle/t/sp-package.test @@ -2682,3 +2682,216 @@ DELIMITER ;$$ CALL xyz.xyz123(17,18,@R); DROP PACKAGE xyz; DROP TABLE t1; + + +--echo # +--echo # MDEV-28166 sql_mode=ORACLE: fully qualified package function calls do not work: db.pkg.func() +--echo # + +--error ER_WRONG_DB_NAME +SELECT `db `.pkg.func(); +--error ER_SP_WRONG_NAME +SELECT db.`pkg `.func(); +--error ER_SP_WRONG_NAME +SELECT db.pkg.`func `(); + + +CREATE DATABASE db1; +USE db1; + +DELIMITER $$; +CREATE PACKAGE pkg1 AS + FUNCTION f1 RETURN TEXT; + FUNCTION f2_db1_pkg1_f1 RETURN TEXT; + FUNCTION f2_pkg1_f1 RETURN TEXT; + FUNCTION f2_f1 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY pkg1 +AS + FUNCTION f1 RETURN TEXT IS + BEGIN + RETURN 'This is db1.pkg1.f1'; + END; + FUNCTION f2_db1_pkg1_f1 RETURN TEXT IS + BEGIN + RETURN db1.pkg1.f1(); + END; + FUNCTION f2_pkg1_f1 RETURN TEXT IS + BEGIN + RETURN pkg1.f1(); + END; + FUNCTION f2_f1 RETURN TEXT IS + BEGIN + RETURN f1(); + END; +END; +$$ +DELIMITER ;$$ + +USE db1; +SELECT pkg1.f2_db1_pkg1_f1(); +SELECT pkg1.f2_pkg1_f1(); +SELECT pkg1.f2_f1(); + +SELECT db1.pkg1.f2_db1_pkg1_f1(); +SELECT db1.pkg1.f2_pkg1_f1(); +SELECT db1.pkg1.f2_f1(); + +USE test; +SELECT db1.pkg1.f2_db1_pkg1_f1(); +SELECT db1.pkg1.f2_pkg1_f1(); +SELECT db1.pkg1.f2_f1(); + +DROP DATABASE db1; + + +# +# Testing db.pkg.func() in the package initialization section +# + +CREATE DATABASE db1; +CREATE DATABASE db2; + +DELIMITER $$; +CREATE PACKAGE db1.pkg1 AS + FUNCTION f1 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY db1.pkg1 AS + FUNCTION f1 RETURN TEXT AS + BEGIN + RETURN 'This is db1.pkg1.f1'; + END; +END; +$$ +DELIMITER ;$$ + + +DELIMITER $$; +CREATE PACKAGE db2.pkg1 AS + FUNCTION f1 RETURN TEXT; + FUNCTION var1 RETURN TEXT; + FUNCTION var2 RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY db2.pkg1 AS + m_var1 TEXT; + m_var2 TEXT; + FUNCTION f1 RETURN TEXT AS + BEGIN + RETURN 'This is db2.pkg1.f1'; + END; + FUNCTION var1 RETURN TEXT AS + BEGIN + RETURN m_var1; + END; + FUNCTION var2 RETURN TEXT AS + BEGIN + RETURN m_var2; + END; +BEGIN + m_var1:= db1.pkg1.f1(); + m_var2:= db2.pkg1.f1(); +END; +$$ +DELIMITER ;$$ + +SELECT db2.pkg1.var1(), db2.pkg1.var2(); + +DROP DATABASE db1; +DROP DATABASE db2; + +# +# Make sure fully qualified package function call does not support AS syntax: +# SELECT db.pkg.func(10 AS a); +# + +DELIMITER $$; +CREATE PACKAGE pkg1 AS + FUNCTION f1(a TEXT) RETURN TEXT; +END; +$$ +CREATE PACKAGE BODY pkg1 AS + FUNCTION f1(a TEXT) RETURN TEXT AS + BEGIN + RETURN a; + END; +END; +$$ +DELIMITER ;$$ +SELECT test.pkg1.f1('xxx'); +--error ER_PARSE_ERROR +SELECT test.pkg1.f1('xxx' AS a); +DROP PACKAGE pkg1; + + +--echo # +--echo # MDEV-19328 sql_mode=ORACLE: Package function in VIEW +--echo # + +SET sql_mode=ORACLE; +DELIMITER $$; +CREATE PACKAGE test1 AS + FUNCTION f_test RETURN number; +END test1; +$$ +CREATE PACKAGE BODY test1 +AS + FUNCTION f_test RETURN NUMBER IS + BEGIN + RETURN 1; + END; +END test1; +$$ +DELIMITER ;$$ + + +SET sql_mode=ORACLE; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test1.f_test(); +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +SET sql_mode=DEFAULT; +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +DROP VIEW v_test; + + +SET sql_mode=DEFAULT; +--error ER_SP_DOES_NOT_EXIST +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test1.f_test(); + + +SET sql_mode=ORACLE; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test.test1.f_test(); +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +SET sql_mode=DEFAULT; +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +DROP VIEW v_test; + + +SET sql_mode=DEFAULT; +CREATE VIEW v_test AS SELECT 1 AS c1 FROM DUAL WHERE 1=test.test1.f_test(); +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +SET sql_mode=ORACLE; +SELECT * FROM v_test; +--vertical_results +SHOW CREATE VIEW v_test; +--horizontal_results +DROP VIEW v_test; + +SET sql_mode=ORACLE; +DROP PACKAGE test1; diff --git a/sql/item.cc b/sql/item.cc index 2fe5411f972..f06055f0a08 100644 --- a/sql/item.cc +++ b/sql/item.cc @@ -2898,9 +2898,11 @@ Item_sp::func_name(THD *thd) const /* Calculate length to avoid reallocation of string for sure */ size_t len= (((m_name->m_explicit_name ? m_name->m_db.length : 0) + m_name->m_name.length)*2 + //characters*quoting - 2 + // ` and ` + 2 + // quotes for the function name + 2 + // quotes for the package name (m_name->m_explicit_name ? 3 : 0) + // '`', '`' and '.' for the db + 1 + // '.' between package and function 1 + // end of string ALIGN_SIZE(1)); // to avoid String reallocation String qname((char *)alloc_root(thd->mem_root, len), len, @@ -2912,7 +2914,21 @@ Item_sp::func_name(THD *thd) const append_identifier(thd, &qname, &m_name->m_db); qname.append('.'); } - append_identifier(thd, &qname, &m_name->m_name); + if (m_sp && m_sp->m_handler == &sp_handler_package_function) + { + /* + In case of a package function split `pkg.func` and print + quoted `pkg` and `func` separately, so the entire result looks like: + `db`.`pkg`.`func` + */ + Database_qualified_name tmp= Database_qualified_name::split(m_name->m_name); + DBUG_ASSERT(tmp.m_db.length); + append_identifier(thd, &qname, &tmp.m_db); + qname.append('.'); + append_identifier(thd, &qname, &tmp.m_name); + } + else + append_identifier(thd, &qname, &m_name->m_name); return qname.c_ptr_safe(); } diff --git a/sql/sql_class.h b/sql/sql_class.h index 311b47aea61..637e16c991b 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -6680,6 +6680,19 @@ public: } void copy(MEM_ROOT *mem_root, const LEX_CSTRING &db, const LEX_CSTRING &name); + + static Database_qualified_name split(const LEX_CSTRING &txt) + { + DBUG_ASSERT(txt.str[txt.length] == '\0'); // Expect 0-terminated input + const char *dot= strchr(txt.str, '.'); + if (!dot) + return Database_qualified_name(NULL, 0, txt.str, txt.length); + size_t dblen= dot - txt.str; + Lex_cstring db(txt.str, dblen); + Lex_cstring name(txt.str + dblen + 1, txt.length - dblen - 1); + return Database_qualified_name(db, name); + } + // Export db and name as a qualified name string: 'db.name' size_t make_qname(char *dst, size_t dstlen) const { diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index d51b9bd5a26..cffc0eb25dd 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -8148,6 +8148,56 @@ Item *LEX::make_item_func_call_generic(THD *thd, Lex_ident_cli_st *cdb, } +/* + Create a 3-step qualified function call. + Currently it's possible for package routines only, e.g.: + SELECT db.pkg.func(); +*/ +Item *LEX::make_item_func_call_generic(THD *thd, + Lex_ident_cli_st *cdb, + Lex_ident_cli_st *cpkg, + Lex_ident_cli_st *cfunc, + List *args) +{ + static Lex_cstring dot(".", 1); + Lex_ident_sys db(thd, cdb), pkg(thd, cpkg), func(thd, cfunc); + Database_qualified_name q_db_pkg(db, pkg); + Database_qualified_name q_pkg_func(pkg, func); + sp_name *qname; + + if (db.is_null() || pkg.is_null() || func.is_null()) + return NULL; // EOM + + if (check_db_name((LEX_STRING*) static_cast(&db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db.str); + return NULL; + } + if (check_routine_name(&pkg) || + check_routine_name(&func)) + return NULL; + + // Concat `pkg` and `name` to `pkg.name` + LEX_CSTRING pkg_dot_func; + if (q_pkg_func.make_qname(thd->mem_root, &pkg_dot_func) || + check_ident_length(&pkg_dot_func) || + !(qname= new (thd->mem_root) sp_name(&db, &pkg_dot_func, true))) + return NULL; + + sp_handler_package_function.add_used_routine(thd->lex, thd, qname); + sp_handler_package_body.add_used_routine(thd->lex, thd, &q_db_pkg); + + thd->lex->safe_to_cache_query= 0; + + if (args && args->elements > 0) + return new (thd->mem_root) Item_func_sp(thd, thd->lex->current_context(), + qname, &sp_handler_package_function, + *args); + return new (thd->mem_root) Item_func_sp(thd, thd->lex->current_context(), + qname, &sp_handler_package_function); +} + + Item *LEX::create_item_qualified_asterisk(THD *thd, const Lex_ident_sys_st *name) { diff --git a/sql/sql_lex.h b/sql/sql_lex.h index e1f34afa350..a63ec7c9153 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3697,6 +3697,11 @@ public: Item *make_item_func_substr(THD *thd, Item *a, Item *b); Item *make_item_func_call_generic(THD *thd, Lex_ident_cli_st *db, Lex_ident_cli_st *name, List *args); + Item *make_item_func_call_generic(THD *thd, + Lex_ident_cli_st *db, + Lex_ident_cli_st *pkg, + Lex_ident_cli_st *name, + List *args); my_var *create_outvar(THD *thd, const LEX_CSTRING *name); /* diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index e0e4308ef5c..2852d2efc0c 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -11210,6 +11210,11 @@ function_call_generic: if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, $5)))) MYSQL_YYABORT; } + | ident_cli '.' ident_cli '.' ident_cli '(' opt_expr_list ')' + { + if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, &$5, $7)))) + MYSQL_YYABORT; + } ; fulltext_options: diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index b45cee01502..8b96937c955 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -11149,6 +11149,11 @@ function_call_generic: if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, $5)))) MYSQL_YYABORT; } + | ident_cli '.' ident_cli '.' ident_cli '(' opt_expr_list ')' + { + if (unlikely(!($$= Lex->make_item_func_call_generic(thd, &$1, &$3, &$5, $7)))) + MYSQL_YYABORT; + } ; fulltext_options: From e9e6db93550aee9e6567b94ab928da9b13050e98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 25 Mar 2022 09:23:16 +0200 Subject: [PATCH 30/52] Fix g++-12 -O2 -Wstringop-overflow buf_pool_t::watch_unset(): Reorder some code so that no warning will be emitted in CMAKE_BUILD_TYPE=RelWithDebInfo. It is unclear why invoking watch_is_sentinel() before accessing the block descriptor state would make the warning disappear. --- storage/innobase/buf/buf0buf.cc | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 3f4f7888315..24eef21d461 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2160,17 +2160,21 @@ void buf_pool_t::watch_unset(const page_id_t id, buf_pool_t::hash_chain &chain) buf_page_t *w; { transactional_lock_guard g{page_hash.lock_get(chain)}; - /* The page must exist because watch_set() increments buf_fix_count. */ + /* The page must exist because watch_set() did fix(). */ w= page_hash.get(id, chain); - const auto state= w->state(); - ut_ad(state >= buf_page_t::UNFIXED); - ut_ad(~buf_page_t::LRU_MASK & state); ut_ad(w->in_page_hash); - if (state != buf_page_t::UNFIXED + 1 || !watch_is_sentinel(*w)) + if (!watch_is_sentinel(*w)) { - w->unfix(); + no_watch: + ut_d(const auto s=) w->unfix(); + ut_ad(~buf_page_t::LRU_MASK & s); w= nullptr; } + const auto state= w->state(); + ut_ad(~buf_page_t::LRU_MASK & state); + ut_ad(state >= buf_page_t::UNFIXED); + if (state != buf_page_t::UNFIXED + 1) + goto no_watch; } if (!w) From 9b2fa2ae8e26e263714daa96d4b72dd6911994bd Mon Sep 17 00:00:00 2001 From: sjaakola Date: Fri, 11 Mar 2022 10:27:36 +0200 Subject: [PATCH 31/52] MDEV-24845 Oddities around innodb_fatal_semaphore_wait_threshold and global.innodb_disallow_writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a mtr test for reproducing a test scenario where despite of innodb_disallow_writes blocking, writes to file system can still happen. The test launches a garbd node, which triggers one of the cluster node to switch to SST donor state. In this state, all disk activity should be halted, and e.g. innodb_disallow_writes has been set. The test records md5sum aggregate over mariadb data directory when the node enters the donor state, and records another md5sum when the node leaves the donor state. If there is no IO activity in data directory, these hashes should be equal. For this test, the Donor state processing, has beeen instrumented so that, SST donor thread can be stopped when entering the donor state. The test uses this new dbug sync point, to control when to record the md5sums. New SST script was added: wsrep_sst_backup, and garbd uses backup method to lauch the donor node to call this script, and to enter in donor state. The backup script could be later extended as general purpose backup method for the cluster. This commit fixes also one race condition happening in wsrep_sst_rsync, like this: * wsrep_rsync_sst script requests for flush tables, and then waits in a loop until mariadbd has created file tables_flushed, as confirmation that FLUSH TABLES has completed * mariadbd's SST donor thread, wakes for the flush table request and then performs FTWRL, and after this it creates the tables_flushed file * note that SST script will now continue to startup rsync sending * mariadbd's SST donor thread now calls for sst_disallow_writes(), so that innodb would setup disk IO blockage, however rsyncing may already be ongoing at this point This race condition is fixed in this commit, by performing all disk IO blocking before creating the tables_flushed file. Reviewed-by: Jan Lindström --- .../r/galera_garbd_backup.result | 41 ++++++ .../galera_3nodes/t/galera_garbd_backup.cnf | 13 ++ .../galera_3nodes/t/galera_garbd_backup.test | 134 ++++++++++++++++++ scripts/CMakeLists.txt | 1 + scripts/wsrep_sst_backup.sh | 112 +++++++++++++++ sql/wsrep_sst.cc | 80 +++++++---- 6 files changed, 350 insertions(+), 31 deletions(-) create mode 100644 mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result create mode 100644 mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf create mode 100644 mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test create mode 100644 scripts/wsrep_sst_backup.sh diff --git a/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result b/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result new file mode 100644 index 00000000000..f176ef1dd7f --- /dev/null +++ b/mysql-test/suite/galera_3nodes/r/galera_garbd_backup.result @@ -0,0 +1,41 @@ +connection node_1; +connection node_1; +connection node_2; +connection node_3; +connection node_1; +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; +connection node_1; +CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; +CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; +INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; +connection node_2; +Killing node #3 to free ports for garbd ... +connection node_3; +connection node_1; +SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state"; +Starting garbd ... +SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached"; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0; +SET GLOBAL innodb_max_dirty_pages_pct=0; +SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state"; +SET GLOBAL debug_dbug = ""; +SET debug_sync='RESET'; +connection node_2; +Killing garbd ... +connection node_1; +connection node_2; +DROP TABLE t1; +DROP TABLE ten; +Restarting node #3 to satisfy MTR's end-of-test checks +connection node_3; +connection node_1; +SET GLOBAL innodb_max_dirty_pages_pct = 75.000000; +SET GLOBAL innodb_max_dirty_pages_pct_lwm = 0.000000; +connection node_1; +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); +connection node_2; +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); +connection node_3; +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); diff --git a/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf b/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf new file mode 100644 index 00000000000..8b7cb948a87 --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.cnf @@ -0,0 +1,13 @@ +!include ../galera_3nodes.cnf + +[mysqld] +wsrep_sst_method=rsync + +[mysqld.1] +wsrep_node_name=node1 + +[mysqld.2] +wsrep_node_name=node2 + +[mysqld.3] +wsrep_node_name=node3 diff --git a/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test b/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test new file mode 100644 index 00000000000..302bf430dde --- /dev/null +++ b/mysql-test/suite/galera_3nodes/t/galera_garbd_backup.test @@ -0,0 +1,134 @@ +# +# A very basic test for the galera arbitrator. We shut down node #3 and use its port allocation to start garbd. +# As MTR does not allow multiple servers to be down at the same time, we are limited as to what we can test. +# + +--source include/galera_cluster.inc +--source include/have_innodb.inc +--source include/have_garbd.inc +--source include/big_test.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +--connection node_1 +# Save original auto_increment_offset values. +--let $node_1=node_1 +--let $node_2=node_2 +--let $node_3=node_3 + +--let $galera_connection_name = node_3 +--let $galera_server_number = 3 +--source include/galera_connect.inc +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_3 = $_NODE_GALERAPORT + +--source ../galera/include/auto_increment_offset_save.inc + +# Save galera ports +--connection node_1 +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_1 = $_NODE_GALERAPORT +--let $datadir= `SELECT @@datadir` + +--let $innodb_max_dirty_pages_pct = `SELECT @@innodb_max_dirty_pages_pct` +--let $innodb_max_dirty_pages_pct_lwm = `SELECT @@innodb_max_dirty_pages_pct_lwm` + +SET GLOBAL innodb_max_dirty_pages_pct=99; +SET GLOBAL innodb_max_dirty_pages_pct_lwm=99; + +--connection node_1 +CREATE TABLE t1 (f1 INTEGER, f2 varchar(1024)) Engine=InnoDB; +CREATE TABLE ten (f1 INTEGER) ENGINE=InnoDB; +INSERT INTO ten VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10); +INSERT INTO t1 (f2) SELECT REPEAT('x', 1024) FROM ten AS a1, ten AS a2, ten AS a3, ten AS a4; + +--connection node_2 +--source suite/galera/include/galera_base_port.inc +--let $NODE_GALERAPORT_2 = $_NODE_GALERAPORT + +--echo Killing node #3 to free ports for garbd ... +--connection node_3 +--source include/shutdown_mysqld.inc + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +# stop SST donor thread when node is in donor state +SET GLOBAL debug_dbug = "+d,sync.wsrep_donor_state"; + +--echo Starting garbd ... +--exec $MTR_GARBD_EXE --address "gcomm://127.0.0.1:$NODE_GALERAPORT_1" --group my_wsrep_cluster --donor node1 --sst backup --options 'base_port=$NODE_GALERAPORT_3' > $MYSQL_TMP_DIR/garbd.log 2>&1 & + +SET SESSION debug_sync = "now WAIT_FOR sync.wsrep_donor_state_reached"; + +# +# get hash of data directory contents before BP dirty page flushing +# +--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_before + +# this should force buffer pool flushing, if not already done by donor state change transfer +SET GLOBAL innodb_max_dirty_pages_pct_lwm=0; +SET GLOBAL innodb_max_dirty_pages_pct=0; + +--disable_query_log +--disable_result_log +select f1 from t1; +select * from ten; +--enable_result_log +--enable_query_log + +# +# +# record the hash of data directory contents after BP dirty page flushing +# +--exec find $datadir -type f ! -name tables_flushed ! -name backup_sst_complete -exec md5sum {} \; | md5sum >$MYSQLTEST_VARDIR/tmp/innodb_after + +# there should be no disk writes +--diff_files $MYSQLTEST_VARDIR/tmp/innodb_before $MYSQLTEST_VARDIR/tmp/innodb_after + +SET SESSION debug_sync = "now SIGNAL signal.wsrep_donor_state"; +SET GLOBAL debug_dbug = ""; +SET debug_sync='RESET'; + +--connection node_2 + +# +# garbd will die automatically, because of the backup SST script +# but just to be sure, sending explicit kill here, as well +# +--echo Killing garbd ... +# FreeBSD's /bin/pkill only supports short versions of the options: +# -o Select only the oldest (least recently started) +# -f Match against full argument lists +--error 0,1 +--exec pkill -o -f garbd.*$NODE_GALERAPORT_3 + +--connection node_1 +--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size' +--source include/wait_condition.inc + +--connection node_2 + +DROP TABLE t1; +DROP TABLE ten; + +--echo Restarting node #3 to satisfy MTR's end-of-test checks +--connection node_3 +let $restart_noprint=2; +--source include/start_mysqld.inc + +--connection node_1 +--eval SET GLOBAL innodb_max_dirty_pages_pct = $innodb_max_dirty_pages_pct +--eval SET GLOBAL innodb_max_dirty_pages_pct_lwm = $innodb_max_dirty_pages_pct_lwm + +--source ../galera/include/auto_increment_offset_restore.inc + +--connection node_1 +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); + +--connection node_2 +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); + +--connection node_3 +CALL mtr.add_suppression("WSREP: Protocol violation\. JOIN message sender 1\.0 \(.*\) is not in state transfer \(SYNCED\)"); diff --git a/scripts/CMakeLists.txt b/scripts/CMakeLists.txt index 41b4e556835..bd8aac00012 100644 --- a/scripts/CMakeLists.txt +++ b/scripts/CMakeLists.txt @@ -275,6 +275,7 @@ ELSE() wsrep_sst_mysqldump wsrep_sst_rsync wsrep_sst_mariabackup + wsrep_sst_backup ) # The following script is sourced from other SST scripts, so it should # not be made executable. diff --git a/scripts/wsrep_sst_backup.sh b/scripts/wsrep_sst_backup.sh new file mode 100644 index 00000000000..55e11ddffc0 --- /dev/null +++ b/scripts/wsrep_sst_backup.sh @@ -0,0 +1,112 @@ +#!/usr/bin/env bash + +set -ue + +# Copyright (C) 2017-2021 MariaDB +# Copyright (C) 2010-2014 Codership Oy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; see the file COPYING. If not, write to the +# Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston +# MA 02110-1335 USA. + +# This is a reference script for rsync-based state snapshot transfer + +RSYNC_REAL_PID=0 # rsync process id +STUNNEL_REAL_PID=0 # stunnel process id + +OS="$(uname)" +[ "$OS" = 'Darwin' ] && export -n LD_LIBRARY_PATH + +# Setting the path for lsof on CentOS +export PATH="/usr/sbin:/sbin:$PATH" + +. $(dirname "$0")/wsrep_sst_common + +MAGIC_FILE="$WSREP_SST_OPT_DATA/backup_sst_complete" +rm -rf "$MAGIC_FILE" + +WSREP_LOG_DIR=${WSREP_LOG_DIR:-""} +# if WSREP_LOG_DIR env. variable is not set, try to get it from my.cnf +if [ -z "$WSREP_LOG_DIR" ]; then + WSREP_LOG_DIR=$(parse_cnf mysqld innodb-log-group-home-dir '') +fi + +if [ -n "$WSREP_LOG_DIR" ]; then + # handle both relative and absolute paths + WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; mkdir -p "$WSREP_LOG_DIR"; cd $WSREP_LOG_DIR; pwd -P) +else + # default to datadir + WSREP_LOG_DIR=$(cd $WSREP_SST_OPT_DATA; pwd -P) +fi + +if [ "$WSREP_SST_OPT_ROLE" = 'donor' ] +then + + [ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" + + RC=0 + + if [ $WSREP_SST_OPT_BYPASS -eq 0 ]; then + + FLUSHED="$WSREP_SST_OPT_DATA/tables_flushed" + ERROR="$WSREP_SST_OPT_DATA/sst_error" + + [ -f "$FLUSHED" ] && rm -f "$FLUSHED" + [ -f "$ERROR" ] && rm -f "$ERROR" + + echo "flush tables" + + # Wait for : + # (a) Tables to be flushed, AND + # (b) Cluster state ID & wsrep_gtid_domain_id to be written to the file, OR + # (c) ERROR file, in case flush tables operation failed. + + while [ ! -r "$FLUSHED" ] && \ + ! grep -q -F ':' '--' "$FLUSHED" >/dev/null 2>&1 + do + # Check whether ERROR file exists. + if [ -f "$ERROR" ]; then + # Flush tables operation failed. + rm -f "$ERROR" + exit 255 + fi + sleep 0.2 + done + + STATE=$(cat "$FLUSHED") + rm -f "$FLUSHED" + + + else # BYPASS + + wsrep_log_info "Bypassing state dump." + fi + + echo 'continue' # now server can resume updating data + + echo "$STATE" > "$MAGIC_FILE" + + echo "done $STATE" + +elif [ "$WSREP_SST_OPT_ROLE" = 'joiner' ] +then + wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'" + exit 22 # EINVAL + + +else + wsrep_log_error "Unrecognized role: '$WSREP_SST_OPT_ROLE'" + exit 22 # EINVAL +fi + +exit 0 diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc index dbebe91ec5b..0549d3e1c1d 100644 --- a/sql/wsrep_sst.cc +++ b/sql/wsrep_sst.cc @@ -1,4 +1,4 @@ -/* Copyright 2008-2020 Codership Oy +/* Copyright 2008-2022 Codership Oy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -30,6 +30,7 @@ #include "wsrep_xid.h" #include #include +#include "debug_sync.h" #include @@ -1415,6 +1416,33 @@ static int run_sql_command(THD *thd, const char *query) return 0; } +static void sst_disallow_writes (THD* thd, bool yes) +{ + char query_str[64]= { 0, }; + ssize_t const query_max= sizeof(query_str) - 1; + CHARSET_INFO *current_charset; + + current_charset= thd->variables.character_set_client; + + if (!is_supported_parser_charset(current_charset)) + { + /* Do not use non-supported parser character sets */ + WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); + thd->variables.character_set_client= &my_charset_latin1; + WSREP_WARN("For SST temporally setting character set to : %s", + my_charset_latin1.csname); + } + + snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", + yes ? 1 : 0); + + if (run_sql_command(thd, query_str)) + { + WSREP_ERROR("Failed to disallow InnoDB writes"); + } + thd->variables.character_set_client= current_charset; +} + static int sst_flush_tables(THD* thd) { @@ -1477,6 +1505,10 @@ static int sst_flush_tables(THD* thd) { WSREP_INFO("Tables flushed."); + /* disable further disk IO */ + sst_disallow_writes(thd, true); + WSREP_INFO("Disabled further disk IO."); + /* Tables have been flushed. Create a file with cluster state ID and wsrep_gtid_domain_id. @@ -1485,39 +1517,14 @@ static int sst_flush_tables(THD* thd) snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid, (long long)wsrep_locked_seqno, wsrep_gtid_domain_id); err= sst_create_file(flush_success, content); + + if(err) + WSREP_INFO("Creating file for flush_success failed %d",err); } return err; } - -static void sst_disallow_writes (THD* thd, bool yes) -{ - char query_str[64] = { 0, }; - ssize_t const query_max = sizeof(query_str) - 1; - CHARSET_INFO *current_charset; - - current_charset = thd->variables.character_set_client; - - if (!is_supported_parser_charset(current_charset)) - { - /* Do not use non-supported parser character sets */ - WSREP_WARN("Current client character set is non-supported parser character set: %s", current_charset->csname); - thd->variables.character_set_client = &my_charset_latin1; - WSREP_WARN("For SST temporally setting character set to : %s", - my_charset_latin1.csname); - } - - snprintf (query_str, query_max, "SET GLOBAL innodb_disallow_writes=%d", - yes ? 1 : 0); - - if (run_sql_command(thd, query_str)) - { - WSREP_ERROR("Failed to disallow InnoDB writes"); - } - thd->variables.character_set_client = current_charset; -} - static void* sst_donor_thread (void* a) { sst_thread_arg* arg= (sst_thread_arg*)a; @@ -1565,8 +1572,7 @@ wait_signal: err= sst_flush_tables (thd.ptr); if (!err) { - sst_disallow_writes (thd.ptr, true); - /* + /* Lets also keep statements that modify binary logs (like RESET LOGS, RESET MASTER) from proceeding until the files have been transferred to the joiner node. @@ -1577,6 +1583,18 @@ wait_signal: } locked= true; + + WSREP_INFO("Donor state reached"); + + DBUG_EXECUTE_IF("sync.wsrep_donor_state", + { + const char act[]= + "now " + "SIGNAL sync.wsrep_donor_state_reached " + "WAIT_FOR signal.wsrep_donor_state"; + assert(!debug_sync_set_action(thd.ptr, + STRING_WITH_LEN(act))); + };); goto wait_signal; } } From 9f4ba624e2f7ad6cd35c842dbb07605f0751f4aa Mon Sep 17 00:00:00 2001 From: Sachin Kumar Date: Mon, 24 May 2021 11:23:03 +0100 Subject: [PATCH 32/52] MDEV-24667 LOAD DATA INFILE on temporary table not written to slave binlog Problem: In regular replication, when master binlogged using statement format slave might not have written an event to its binary log when the Query event aimed at a temporary table. Specifically this was observed with LOAD DATA INFILE. This effect was possible because unlike master slave holds temporary tables in its pool and the master side check of existence of a temporary table at the format bin-logging decision did not apply. Solution: replace THD::has_thd_temporary_tables() with THD::has_temporary_tables which allows to identify temporary table presence on either side. -- Reviewed by Andrei Elkin. --- mysql-test/suite/rpl/r/mdev_24667.result | 30 +++++++++++++ mysql-test/suite/rpl/t/mdev_24667.cnf | 8 ++++ mysql-test/suite/rpl/t/mdev_24667.test | 56 ++++++++++++++++++++++++ sql/sql_class.h | 4 +- sql/temporary_tables.cc | 2 +- 5 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 mysql-test/suite/rpl/r/mdev_24667.result create mode 100644 mysql-test/suite/rpl/t/mdev_24667.cnf create mode 100644 mysql-test/suite/rpl/t/mdev_24667.test diff --git a/mysql-test/suite/rpl/r/mdev_24667.result b/mysql-test/suite/rpl/r/mdev_24667.result new file mode 100644 index 00000000000..7c7342d63d6 --- /dev/null +++ b/mysql-test/suite/rpl/r/mdev_24667.result @@ -0,0 +1,30 @@ +include/rpl_init.inc [topology=1->2->3] +call mtr.add_suppression('Unsafe statement written to the binary log using '); +connection server_1; +set binlog_format=statement; +#first bug +create table t1 (a int); +create temporary table tmp like t1; +load data local infile 'MYSQLTEST_VARDIR/load_data' INTO TABLE tmp; +insert into t1 select * from tmp; +#second bug +create table t2 (a int); +create temporary table tmp2 like t2; +insert into tmp2 values(10); +update tmp2 set a = 20 limit 1; +Warnings: +Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. The statement is unsafe because it uses a LIMIT clause. This is unsafe because the set of rows included cannot be predicted +insert into t2 select * from tmp2; +connection server_2; +connection server_3; +#t1 should have 2 rows +select count(*) = 2 from t1; +count(*) = 2 +1 +#t2 should have 1 rows with a = 20 +select * from t2; +a +20 +connection server_1; +drop table t1, t2, tmp, tmp2; +include/rpl_end.inc diff --git a/mysql-test/suite/rpl/t/mdev_24667.cnf b/mysql-test/suite/rpl/t/mdev_24667.cnf new file mode 100644 index 00000000000..58b605ad928 --- /dev/null +++ b/mysql-test/suite/rpl/t/mdev_24667.cnf @@ -0,0 +1,8 @@ +!include ../my.cnf + +[mysqld.3] +log-slave-updates + +[ENV] +SERVER_MYPORT_3= @mysqld.3.port +SERVER_MYSOCK_3= @mysqld.3.socket diff --git a/mysql-test/suite/rpl/t/mdev_24667.test b/mysql-test/suite/rpl/t/mdev_24667.test new file mode 100644 index 00000000000..d8490b335db --- /dev/null +++ b/mysql-test/suite/rpl/t/mdev_24667.test @@ -0,0 +1,56 @@ +# +# MDEV-24667 LOAD DATA INFILE/inserted rows not written to binlog +# +# In this test we will have a replication configuration like 1->2->3 +# 1 will have statement format +# 2 and 3 will have mixed format +# We will make some updates on temporary table which are unsafe , So 2 must +# Log these queries in row format, Since it is on tmp table , It wont be logged +# So the next query which copies the data from tmp table to normal must be logged +# into the row format. Instead of checking for the binlog We will compare the +# results on the 3, If no binlog is lost(ie it is logged into row format), There +# should not be any data loss. +--let $rpl_topology=1->2->3 +--source include/rpl_init.inc +--source include/have_binlog_format_mixed.inc +call mtr.add_suppression('Unsafe statement written to the binary log using '); +--connection server_1 + +set binlog_format=statement; +--echo #first bug +create table t1 (a int); +create temporary table tmp like t1; +--write_file $MYSQLTEST_VARDIR/load_data +1 +2 +EOF +--replace_result $MYSQLTEST_VARDIR MYSQLTEST_VARDIR +eval load data local infile '$MYSQLTEST_VARDIR/load_data' INTO TABLE tmp; +insert into t1 select * from tmp; + +--echo #second bug +create table t2 (a int); +#insert into t2 values(10); +create temporary table tmp2 like t2; +insert into tmp2 values(10); +update tmp2 set a = 20 limit 1; +insert into t2 select * from tmp2; +--save_master_pos + +--connection server_2 +--sync_with_master +--save_master_pos + +--connection server_3 +--sync_with_master +--echo #t1 should have 2 rows +select count(*) = 2 from t1; +--echo #t2 should have 1 rows with a = 20 +select * from t2; + + +# cleanup +--connection server_1 +drop table t1, t2, tmp, tmp2; +--remove_file $MYSQLTEST_VARDIR/load_data +--source include/rpl_end.inc diff --git a/sql/sql_class.h b/sql/sql_class.h index a748def9b56..d3d54e11671 100644 --- a/sql/sql_class.h +++ b/sql/sql_class.h @@ -3960,13 +3960,13 @@ public: */ DBUG_PRINT("debug", ("temporary_tables: %s, in_sub_stmt: %s, system_thread: %s", - YESNO(has_thd_temporary_tables()), YESNO(in_sub_stmt), + YESNO(has_temporary_tables()), YESNO(in_sub_stmt), show_system_thread(system_thread))); if (in_sub_stmt == 0) { if (wsrep_binlog_format() == BINLOG_FORMAT_ROW) set_current_stmt_binlog_format_row(); - else if (!has_thd_temporary_tables()) + else if (!has_temporary_tables()) set_current_stmt_binlog_format_stmt(); } DBUG_VOID_RETURN; diff --git a/sql/temporary_tables.cc b/sql/temporary_tables.cc index 005a520ff64..fb28ac40aa6 100644 --- a/sql/temporary_tables.cc +++ b/sql/temporary_tables.cc @@ -866,7 +866,7 @@ void THD::restore_tmp_table_share(TMP_TABLE_SHARE *share) @return false Temporary tables exist true No temporary table exist */ -inline bool THD::has_temporary_tables() +bool THD::has_temporary_tables() { DBUG_ENTER("THD::has_temporary_tables"); bool result= (rgi_slave From fbcf0225e195bae2679272569e5a6310557ec853 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Fri, 25 Mar 2022 13:52:32 +0400 Subject: [PATCH 33/52] MDEV-19804 sql_mode=ORACLE: call procedure in packages Adding support for the fully qualified package procedure calls: BEGIN CALL db.pkg.proc(args); -- SQL/PSM call style db.pkg.proc(args); -- PL/SQL call style END; --- .../suite/compat/oracle/r/sp-package.result | 98 +++++++++++++++ .../suite/compat/oracle/t/sp-package.test | 114 ++++++++++++++++++ sql/sql_lex.cc | 34 ++++++ sql/sql_lex.h | 3 + sql/sql_yacc.yy | 24 +++- sql/sql_yacc_ora.yy | 42 ++++++- 6 files changed, 311 insertions(+), 4 deletions(-) diff --git a/mysql-test/suite/compat/oracle/r/sp-package.result b/mysql-test/suite/compat/oracle/r/sp-package.result index a8be1a8eb16..20e3d43fb85 100644 --- a/mysql-test/suite/compat/oracle/r/sp-package.result +++ b/mysql-test/suite/compat/oracle/r/sp-package.result @@ -3135,3 +3135,101 @@ collation_connection latin1_swedish_ci DROP VIEW v_test; SET sql_mode=ORACLE; DROP PACKAGE test1; +# +# MDEV-19804 sql_mode=ORACLE: call procedure in packages +# +CALL `db1 `.pkg.p; +ERROR 42000: Incorrect database name 'db1 ' +CALL db1.`pkg `.p; +ERROR 42000: Incorrect routine name 'pkg ' +CALL db1.pkg.`p `; +ERROR 42000: Incorrect routine name 'p ' +SET sql_mode=ORACLE; +CREATE PACKAGE pkg1 as +PROCEDURE p1(); +END; +$$ +CREATE PACKAGE BODY pkg1 as +PROCEDURE p1() as +BEGIN +SELECT 'test-function' AS c1; +END; +END; +$$ +CALL pkg1.p1; +c1 +test-function +CALL test.pkg1.p1; +c1 +test-function +SET sql_mode=DEFAULT; +CALL test.pkg1.p1; +c1 +test-function +SET sql_mode=ORACLE; +BEGIN +CALL pkg1.p1; +CALL test.pkg1.p1; +END +$$ +c1 +test-function +c1 +test-function +BEGIN +pkg1.p1; +test.pkg1.p1; +END +$$ +c1 +test-function +c1 +test-function +DROP PACKAGE pkg1; +CREATE DATABASE db1; +CREATE PACKAGE db1.pkg1 AS +PROCEDURE p1(a OUT TEXT); +END; +$$ +CREATE PACKAGE BODY db1.pkg1 AS +PROCEDURE p1(a OUT TEXT) AS +BEGIN +a:= 'This is db1.pkg1.p1'; +END; +END; +$$ +CREATE DATABASE db2; +CREATE PACKAGE db2.pkg1 AS +FUNCTION var1 RETURN TEXT; +PROCEDURE p1(a OUT TEXT); +PROCEDURE p2_db1_pkg1_p1; +END; +$$ +CREATE PACKAGE BODY db2.pkg1 AS +m_var1 TEXT; +FUNCTION var1 RETURN TEXT AS +BEGIN +RETURN m_var1; +END; +PROCEDURE p1(a OUT TEXT) AS +BEGIN +a:= 'This is db2.pkg1.p1'; +END; +PROCEDURE p2_db1_pkg1_p1 AS +a TEXT; +BEGIN +db1.pkg1.p1(a); +SELECT a; +END; +BEGIN +db1.pkg1.p1(m_var1); +END; +$$ +SELECT db2.pkg1.var1(); +db2.pkg1.var1() +This is db1.pkg1.p1 +CALL db2.pkg1.p2_db1_pkg1_p1; +a +This is db1.pkg1.p1 +DROP DATABASE db1; +DROP DATABASE db2; diff --git a/mysql-test/suite/compat/oracle/t/sp-package.test b/mysql-test/suite/compat/oracle/t/sp-package.test index 8fcf72d3145..578ee4e0073 100644 --- a/mysql-test/suite/compat/oracle/t/sp-package.test +++ b/mysql-test/suite/compat/oracle/t/sp-package.test @@ -2895,3 +2895,117 @@ DROP VIEW v_test; SET sql_mode=ORACLE; DROP PACKAGE test1; + + +--echo # +--echo # MDEV-19804 sql_mode=ORACLE: call procedure in packages +--echo # + +--error ER_WRONG_DB_NAME +CALL `db1 `.pkg.p; +--error ER_SP_WRONG_NAME +CALL db1.`pkg `.p; +--error ER_SP_WRONG_NAME +CALL db1.pkg.`p `; + + +SET sql_mode=ORACLE; +DELIMITER $$; +CREATE PACKAGE pkg1 as + PROCEDURE p1(); +END; +$$ +CREATE PACKAGE BODY pkg1 as + PROCEDURE p1() as + BEGIN + SELECT 'test-function' AS c1; + END; +END; +$$ +DELIMITER ;$$ + +CALL pkg1.p1; +CALL test.pkg1.p1; + +# In sql_mode=DEFAULT we support fully qualified package function names +# (this is needed for VIEWs). Let's make sure we also support fully +# qualified package procedure names, for symmetry + +SET sql_mode=DEFAULT; +CALL test.pkg1.p1; +SET sql_mode=ORACLE; + +DELIMITER $$; +BEGIN + CALL pkg1.p1; + CALL test.pkg1.p1; +END +$$ +DELIMITER ;$$ + +DELIMITER $$; +BEGIN + pkg1.p1; + test.pkg1.p1; +END +$$ +DELIMITER ;$$ + +DROP PACKAGE pkg1; + + +# +# Testing packages in different databases calling each other +# in routines and in the initialization section. +# + +CREATE DATABASE db1; +DELIMITER $$; +CREATE PACKAGE db1.pkg1 AS + PROCEDURE p1(a OUT TEXT); +END; +$$ +CREATE PACKAGE BODY db1.pkg1 AS + PROCEDURE p1(a OUT TEXT) AS + BEGIN + a:= 'This is db1.pkg1.p1'; + END; +END; +$$ +DELIMITER ;$$ + +CREATE DATABASE db2; +DELIMITER $$; +CREATE PACKAGE db2.pkg1 AS + FUNCTION var1 RETURN TEXT; + PROCEDURE p1(a OUT TEXT); + PROCEDURE p2_db1_pkg1_p1; +END; +$$ +CREATE PACKAGE BODY db2.pkg1 AS + m_var1 TEXT; + FUNCTION var1 RETURN TEXT AS + BEGIN + RETURN m_var1; + END; + PROCEDURE p1(a OUT TEXT) AS + BEGIN + a:= 'This is db2.pkg1.p1'; + END; + PROCEDURE p2_db1_pkg1_p1 AS + a TEXT; + BEGIN + db1.pkg1.p1(a); + SELECT a; + END; +BEGIN + db1.pkg1.p1(m_var1); +END; +$$ +DELIMITER ;$$ + +SELECT db2.pkg1.var1(); +CALL db2.pkg1.p2_db1_pkg1_p1; + +DROP DATABASE db1; +DROP DATABASE db2; diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index cffc0eb25dd..c70fef9709f 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -7893,6 +7893,40 @@ bool LEX::call_statement_start(THD *thd, const LEX_CSTRING *name1, } +bool LEX::call_statement_start(THD *thd, const LEX_CSTRING &db, + const LEX_CSTRING &pkg, + const LEX_CSTRING &proc) +{ + Database_qualified_name q_db_pkg(db, pkg); + Database_qualified_name q_pkg_proc(pkg, proc); + sp_name *spname; + + sql_command= SQLCOM_CALL; + + if (check_db_name((LEX_STRING*) const_cast(&db))) + { + my_error(ER_WRONG_DB_NAME, MYF(0), db.str); + return NULL; + } + if (check_routine_name(&pkg) || + check_routine_name(&proc)) + return NULL; + + // Concat `pkg` and `name` to `pkg.name` + LEX_CSTRING pkg_dot_proc; + if (q_pkg_proc.make_qname(thd->mem_root, &pkg_dot_proc) || + check_ident_length(&pkg_dot_proc) || + !(spname= new (thd->mem_root) sp_name(&db, &pkg_dot_proc, true))) + return NULL; + + sp_handler_package_function.add_used_routine(thd->lex, thd, spname); + sp_handler_package_body.add_used_routine(thd->lex, thd, &q_db_pkg); + + return !(m_sql_cmd= new (thd->mem_root) Sql_cmd_call(spname, + &sp_handler_package_procedure)); +} + + sp_package *LEX::get_sp_package() const { return sphead ? sphead->get_package() : NULL; diff --git a/sql/sql_lex.h b/sql/sql_lex.h index a63ec7c9153..b5cc9604a8f 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -3478,6 +3478,9 @@ public: bool call_statement_start(THD *thd, const LEX_CSTRING *name); bool call_statement_start(THD *thd, const LEX_CSTRING *name1, const LEX_CSTRING *name2); + bool call_statement_start(THD *thd, const LEX_CSTRING &name1, + const LEX_CSTRING &name2, + const LEX_CSTRING &name3); sp_variable *find_variable(const LEX_CSTRING *name, sp_pcontext **ctx, const Sp_rcontext_handler **rh) const; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index 2852d2efc0c..a427c7a40c5 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -3345,9 +3345,29 @@ sp_suid: ; call: - CALL_SYM sp_name + CALL_SYM ident { - if (unlikely(Lex->call_statement_start(thd, $2))) + if (unlikely(Lex->call_statement_start(thd, &$2))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, &$2, &$4))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, $2, $4, $6))) MYSQL_YYABORT; } opt_sp_cparam_list diff --git a/sql/sql_yacc_ora.yy b/sql/sql_yacc_ora.yy index 8b96937c955..f0e6b5b54c7 100644 --- a/sql/sql_yacc_ora.yy +++ b/sql/sql_yacc_ora.yy @@ -2999,9 +2999,29 @@ sp_suid: ; call: - CALL_SYM sp_name + CALL_SYM ident { - if (unlikely(Lex->call_statement_start(thd, $2))) + if (unlikely(Lex->call_statement_start(thd, &$2))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, &$2, &$4))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | CALL_SYM ident '.' ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, $2, $4, $6))) MYSQL_YYABORT; } opt_sp_cparam_list @@ -3922,12 +3942,30 @@ sp_statement: MYSQL_YYABORT; } opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } | ident_directly_assignable '.' ident { if (unlikely(Lex->call_statement_start(thd, &$1, &$3))) MYSQL_YYABORT; } opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } + | ident_directly_assignable '.' ident '.' ident + { + if (unlikely(Lex->call_statement_start(thd, $1, $3, $5))) + MYSQL_YYABORT; + } + opt_sp_cparam_list + { + if (Lex->check_cte_dependencies_and_resolve_references()) + MYSQL_YYABORT; + } ; sp_proc_stmt_statement: From 549a71e74b2fa494efcd79635a5db8af0d541f99 Mon Sep 17 00:00:00 2001 From: Rucha Deodhar Date: Fri, 25 Mar 2022 18:29:39 +0530 Subject: [PATCH 34/52] MDEV-21873: 10.2 to 10.3 upgrade doesn't remove semi-sync reference from mysql.plugin table Fix: Since mysql_upgrade runs commands from mysql_system_tables.fix, added sql commands to check for semisync plugins in INFORMATION_SCHEMA.PLUGINS and if they aren't there then delete them from mysql.plugin. --- scripts/mysql_system_tables_fix.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/mysql_system_tables_fix.sql b/scripts/mysql_system_tables_fix.sql index f87f1aa66f4..79866d149bb 100644 --- a/scripts/mysql_system_tables_fix.sql +++ b/scripts/mysql_system_tables_fix.sql @@ -796,3 +796,10 @@ ALTER TABLE help_topic MODIFY url TEXT NOT NULL; # MDEV-7383 - varbinary on mix/max of column_stats alter table column_stats modify min_value varbinary(255) DEFAULT NULL, modify max_value varbinary(255) DEFAULT NULL; + +# MDEV-21873: 10.2 to 10.3 upgrade doesn't remove semi-sync reference from +# mysql.plugin table. +# As per suggested fix, check INFORMATION_SCHEMA.PLUGINS +# and if semisync plugins aren't there, delete them from mysql.plugin. +DELETE FROM mysql.plugin WHERE name="rpl_semi_sync_master" AND NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME="rpl_semi_sync_master"); +DELETE FROM mysql.plugin WHERE name="rpl_semi_sync_slave" AND NOT EXISTS (SELECT * FROM INFORMATION_SCHEMA.PLUGINS WHERE PLUGIN_NAME="rpl_semi_sync_slave"); From f92388fa14236663d476fffa7ad7650d5706dd4c Mon Sep 17 00:00:00 2001 From: Sergei Golubchik Date: Thu, 24 Mar 2022 15:43:20 +0100 Subject: [PATCH 35/52] MDEV-27900 fixes * prevent infinite recursion in beyond-EOF reads (when pread returns 0) * reduce code duplication followup for d78173828e9 and f4fb6cb3fe6 --- tpool/aio_liburing.cc | 3 +-- tpool/aio_linux.cc | 3 +-- tpool/tpool.h | 12 +++++++++++- tpool/tpool_generic.cc | 15 +-------------- 4 files changed, 14 insertions(+), 19 deletions(-) diff --git a/tpool/aio_liburing.cc b/tpool/aio_liburing.cc index b8666482193..8192a5b7fed 100644 --- a/tpool/aio_liburing.cc +++ b/tpool/aio_liburing.cc @@ -161,8 +161,7 @@ private: } io_uring_cqe_seen(&aio->uring_, cqe); - if (iocb->m_ret_len != iocb->m_len && !iocb->m_err) - finish_synchronous(iocb); + finish_synchronous(iocb); // If we need to resubmit the IO operation, but the ring is full, // we will follow the same path as for any other error codes. diff --git a/tpool/aio_linux.cc b/tpool/aio_linux.cc index fc6e5b53e1a..5d01c588a88 100644 --- a/tpool/aio_linux.cc +++ b/tpool/aio_linux.cc @@ -128,8 +128,7 @@ class aio_linux final : public aio { iocb->m_ret_len= event.res; iocb->m_err= 0; - if (iocb->m_ret_len != iocb->m_len) - finish_synchronous(iocb); + finish_synchronous(iocb); } iocb->m_internal_task.m_func= iocb->m_callback; iocb->m_internal_task.m_arg= iocb; diff --git a/tpool/tpool.h b/tpool/tpool.h index 2c61c2d62b2..87a0122adce 100644 --- a/tpool/tpool.h +++ b/tpool/tpool.h @@ -173,7 +173,17 @@ public: protected: static void synchronous(aiocb *cb); /** finish a partial read/write callback synchronously */ - static void finish_synchronous(aiocb *cb); + static inline void finish_synchronous(aiocb *cb) + { + if (!cb->m_err && cb->m_ret_len != cb->m_len) + { + /* partial read/write */ + cb->m_buffer= (char *) cb->m_buffer + cb->m_ret_len; + cb->m_len-= (unsigned int) cb->m_ret_len; + cb->m_offset+= cb->m_ret_len; + synchronous(cb); + } + } }; class timer diff --git a/tpool/tpool_generic.cc b/tpool/tpool_generic.cc index a1b9a3ce945..5720c5b48aa 100644 --- a/tpool/tpool_generic.cc +++ b/tpool/tpool_generic.cc @@ -85,24 +85,11 @@ void aio::synchronous(aiocb *cb) #endif cb->m_ret_len = ret_len; cb->m_err = err; - if (!err && cb->m_ret_len != cb->m_len) + if (ret_len) finish_synchronous(cb); } -/** - A partial read/write has occured, continue synchronously. -*/ -void aio::finish_synchronous(aiocb *cb) -{ - assert(cb->m_ret_len != (unsigned int) cb->m_len && !cb->m_err); - /* partial read/write */ - cb->m_buffer= (char *) cb->m_buffer + cb->m_ret_len; - cb->m_len-= (unsigned int) cb->m_ret_len; - cb->m_offset+= cb->m_ret_len; - synchronous(cb); -} - /** Implementation of generic threadpool. This threadpool consists of the following components From e048289e557315b068a15083267329c443faadd3 Mon Sep 17 00:00:00 2001 From: Igor Babaev Date: Fri, 25 Mar 2022 11:04:56 -0700 Subject: [PATCH 36/52] MDEV-27937 Assertion failure when executing prepared statement with ? in IN list This bug affected queries with IN predicates that contain parameter markers in the value list. Such queries are executed via prepared statements. The problem appeared only if the number of elements in the value list was greater than the set value of the system variable in_predicate_conversion_threshold. The patch unconditionally prohibits conversion of an IN predicate to the equivalent IN predicand if the value list of the IN predicate contains parameters markers. Approved by Oleksandr Byelkin --- mysql-test/main/opt_tvc.result | 27 +++++++++++++++++++++++++++ mysql-test/main/opt_tvc.test | 26 ++++++++++++++++++++++++++ sql/item_cmpfunc.cc | 7 ++++--- sql/item_cmpfunc.h | 2 ++ sql/sql_tvc.cc | 27 ++++++++++++++++++++++++--- 5 files changed, 83 insertions(+), 6 deletions(-) diff --git a/mysql-test/main/opt_tvc.result b/mysql-test/main/opt_tvc.result index a68e70e8a25..02d9096ed09 100644 --- a/mysql-test/main/opt_tvc.result +++ b/mysql-test/main/opt_tvc.result @@ -732,3 +732,30 @@ a b 4 4 drop table t1; SET @@in_predicate_conversion_threshold= default; +# +# MDEV-27937: Prepared statement with ? in the list if IN predicate +# +set in_predicate_conversion_threshold=2; +create table t1 (id int, a int, b int); +insert into t1 values (1,3,30), (2,7,70), (3,1,10); +prepare stmt from " +select * from t1 where a in (7, ?, 5, 1); +"; +execute stmt using 3; +id a b +1 3 30 +2 7 70 +3 1 10 +deallocate prepare stmt; +prepare stmt from " +select * from t1 where (a,b) in ((7,70), (3,?), (5,50), (1,10)); +"; +execute stmt using 30; +id a b +1 3 30 +2 7 70 +3 1 10 +deallocate prepare stmt; +drop table t1; +set in_predicate_conversion_threshold=default; +# End of 10.3 tests diff --git a/mysql-test/main/opt_tvc.test b/mysql-test/main/opt_tvc.test index e4e8c6d7919..f8469f22aa1 100644 --- a/mysql-test/main/opt_tvc.test +++ b/mysql-test/main/opt_tvc.test @@ -428,3 +428,29 @@ eval $query; drop table t1; SET @@in_predicate_conversion_threshold= default; +--echo # +--echo # MDEV-27937: Prepared statement with ? in the list if IN predicate +--echo # + +set in_predicate_conversion_threshold=2; + +create table t1 (id int, a int, b int); +insert into t1 values (1,3,30), (2,7,70), (3,1,10); + +prepare stmt from " +select * from t1 where a in (7, ?, 5, 1); +"; +execute stmt using 3; +deallocate prepare stmt; + +prepare stmt from " +select * from t1 where (a,b) in ((7,70), (3,?), (5,50), (1,10)); +"; +execute stmt using 30; +deallocate prepare stmt; + +drop table t1; + +set in_predicate_conversion_threshold=default; + +--echo # End of 10.3 tests diff --git a/sql/item_cmpfunc.cc b/sql/item_cmpfunc.cc index 38f0a285e84..f41414f8ae9 100644 --- a/sql/item_cmpfunc.cc +++ b/sql/item_cmpfunc.cc @@ -4472,10 +4472,11 @@ void Item_func_in::mark_as_condition_AND_part(TABLE_LIST *embedding) Query_arena *arena, backup; arena= thd->activate_stmt_arena_if_needed(&backup); - if (to_be_transformed_into_in_subq(thd)) + if (!transform_into_subq_checked) { - transform_into_subq= true; - thd->lex->current_select->in_funcs.push_back(this, thd->mem_root); + if ((transform_into_subq= to_be_transformed_into_in_subq(thd))) + thd->lex->current_select->in_funcs.push_back(this, thd->mem_root); + transform_into_subq_checked= true; } if (arena) diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 4c88f5b274f..f3d3be44b62 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -2299,6 +2299,7 @@ protected: SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param, Field *field, Item *value); bool transform_into_subq; + bool transform_into_subq_checked; public: /// An array of values, created when the bisection lookup method is used in_vector *array; @@ -2321,6 +2322,7 @@ public: Item_func_opt_neg(thd, list), Predicant_to_list_comparator(thd, arg_count - 1), transform_into_subq(false), + transform_into_subq_checked(false), array(0), have_null(0), arg_types_compatible(FALSE), emb_on_expr_nest(0) { } diff --git a/sql/sql_tvc.cc b/sql/sql_tvc.cc index 3866b7c9352..13efd973326 100644 --- a/sql/sql_tvc.cc +++ b/sql/sql_tvc.cc @@ -900,8 +900,6 @@ Item *Item_func_in::in_predicate_to_in_subs_transformer(THD *thd, if (!transform_into_subq) return this; - transform_into_subq= false; - List values; LEX *lex= thd->lex; @@ -1058,15 +1056,38 @@ uint32 Item_func_in::max_length_of_left_expr() bool Item_func_in::to_be_transformed_into_in_subq(THD *thd) { + bool is_row_list= args[1]->type() == Item::ROW_ITEM; uint values_count= arg_count-1; - if (args[1]->type() == Item::ROW_ITEM) + if (is_row_list) values_count*= ((Item_row *)(args[1]))->cols(); if (thd->variables.in_subquery_conversion_threshold == 0 || thd->variables.in_subquery_conversion_threshold > values_count) return false; + if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE)) + return true; + + /* Occurence of '?' in IN list is checked only for PREPARE commands */ + for (uint i=1; i < arg_count; i++) + { + if (!is_row_list) + { + if (args[i]->type() == Item::PARAM_ITEM) + return false; + } + else + { + Item_row *row_list= (Item_row *)(args[i]); + for (uint j=0; j < row_list->cols(); j++) + { + if (row_list->element_index(j)->type() == Item::PARAM_ITEM) + return false; + } + } + } + return true; } From 9d6d1221230e2acf9fac2ab6fe685c0a2a7845aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Kek=C3=A4l=C3=A4inen?= Date: Thu, 24 Mar 2022 00:43:04 -0700 Subject: [PATCH 37/52] Deb: Fix Salsa-CI autopkgtest failure The autopkgtest was failing due to missing *.changes file. This is part of source build, so revert autobake-deb.sh back to NOT using -b for Gitlab-CI/Salsa-CI runs. --- debian/autobake-deb.sh | 7 ------- 1 file changed, 7 deletions(-) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index ce36b9abd37..08c83a86349 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -105,13 +105,6 @@ dch -b -D "${CODENAME}" -v "${VERSION}" "Automatic build with ${LOGSTRING}." --c echo "Creating package version ${VERSION} ... " -# On Gitlab-CI, use -b to build binary only packages as there is -# no need to waste time on generating the source package. -if [[ $GITLAB_CI ]] -then - BUILDPACKAGE_FLAGS="-b" -fi - # Use eatmydata is available to build faster with less I/O, skipping fsync() # during the entire build process (safe because a build can always be restarted) if which eatmydata > /dev/null From 7af133cc111c0fa8f5afa99055d2a22eaac6d94e Mon Sep 17 00:00:00 2001 From: hongdongjian Date: Fri, 25 Mar 2022 19:47:40 +0800 Subject: [PATCH 38/52] MDEV-28177: server_audit; Update the offset of dbName on the aarch64 platform. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On the aarch64 platform, MySQL 5.7.33 cannot install this version of the audit plugin, but X86_64 can run well。 --- plugin/server_audit/server_audit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugin/server_audit/server_audit.c b/plugin/server_audit/server_audit.c index 14f5b2f98b4..81ec33973e3 100644 --- a/plugin/server_audit/server_audit.c +++ b/plugin/server_audit/server_audit.c @@ -2328,6 +2328,9 @@ int get_db_mysql57(MYSQL_THD thd, char **name, int *len) #ifdef __x86_64__ db_off= 608; db_len_off= 616; +#elif __aarch64__ + db_off= 632; + db_len_off= 640; #else db_off= 0; db_len_off= 0; @@ -2338,6 +2341,9 @@ int get_db_mysql57(MYSQL_THD thd, char **name, int *len) #ifdef __x86_64__ db_off= 536; db_len_off= 544; +#elif __aarch64__ + db_off= 552; + db_len_off= 560; #else db_off= 0; db_len_off= 0; From a6dbb6b264109434da79c4b8aa5f332051f67cf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Mar 2022 08:33:52 +0300 Subject: [PATCH 39/52] Fix main.create_or_replace better InnoDB background statistics recalculation may acquire a metadata also on the table itself, not only on the tables that store the statistics. Hence, it is better to disable InnoDB persistent statistics altogether. This fixes up commit 9b8d9a1db32e7526e2b290bdb1f232eb444ae259. --- mysql-test/main/create_or_replace.result | 45 +++++++++-------------- mysql-test/main/create_or_replace.test | 47 ++++++++++-------------- 2 files changed, 36 insertions(+), 56 deletions(-) diff --git a/mysql-test/main/create_or_replace.result b/mysql-test/main/create_or_replace.result index 294b0623fc1..178b7182666 100644 --- a/mysql-test/main/create_or_replace.result +++ b/mysql-test/main/create_or_replace.result @@ -1,3 +1,5 @@ +SET @save_persistent=@@GLOBAL.innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent=OFF; CREATE TABLE t2 (a int); INSERT INTO t2 VALUES(1),(2),(3); # @@ -258,8 +260,7 @@ Note 1051 Unknown table 'test.t1,mysqltest2.t2' create table test.t1 (i int) engine=myisam; create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -272,8 +273,7 @@ ERROR 42000: A table must have at least 1 column show tables; Tables_in_test t2 -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -282,16 +282,14 @@ THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_SHARED_NO_READ_WRITE NULL Table metadata lock mysqltest2 t2 create or replace table mysqltest2.t2; ERROR 42000: A table must have at least 1 column -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME create table t1 (i int); drop table t1; create table test.t1 (i int); create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -304,8 +302,7 @@ ERROR 42S21: Duplicate column name 'a' show tables; Tables_in_test t2 -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -314,16 +311,14 @@ THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_SHARED_NO_READ_WRITE NULL Table metadata lock mysqltest2 t2 create or replace table mysqltest2.t2 (a int) select 1 as 'a', 2 as 'a'; ERROR 42S21: Duplicate column name 'a' -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME create table t1 (i int); drop table t1; create table test.t1 (i int) engine=innodb; create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_INTENTION_EXCLUSIVE NULL Schema metadata lock mysqltest2 @@ -335,8 +330,7 @@ drop table test.t1,mysqltest2.t2; create table test.t1 (i int) engine=aria transactional=1 checksum=1; create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_INTENTION_EXCLUSIVE NULL Schema metadata lock mysqltest2 @@ -353,8 +347,7 @@ drop table test.t1; # create table t1 (i int); lock table t1 write; -select * from information_schema.metadata_lock_info -where table_schema!='mysql' or table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -365,8 +358,7 @@ ERROR 22001: Data too long for column 'a' at row 1 show tables; Tables_in_test t2 -select * from information_schema.metadata_lock_info -where table_schema!='mysql' or table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME create table t1 (i int); drop table t1; @@ -454,8 +446,7 @@ drop view t1; # create table t1 (a int); lock table t1 write, t2 read; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -463,8 +454,7 @@ THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_SHARED_NO_READ_WRITE NULL Table metadata lock test t1 # MDL_SHARED_READ NULL Table metadata lock test t2 create or replace table t1 (i int); -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -472,8 +462,7 @@ THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_SHARED_NO_READ_WRITE NULL Table metadata lock test t1 # MDL_SHARED_READ NULL Table metadata lock test t2 create or replace table t1 like t2; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -481,8 +470,7 @@ THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_SHARED_NO_READ_WRITE NULL Table metadata lock test t1 # MDL_SHARED_READ NULL Table metadata lock test t2 create or replace table t1 select 1 as f1; -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; THREAD_ID LOCK_MODE LOCK_DURATION LOCK_TYPE TABLE_SCHEMA TABLE_NAME # MDL_BACKUP_DDL NULL Backup lock # MDL_BACKUP_DML NULL Backup lock @@ -580,3 +568,4 @@ ERROR HY000: Table 't3' was not locked with LOCK TABLES UNLOCK TABLES; DROP TABLE t3; # End of 10.4 tests +SET GLOBAL innodb_stats_persistent=@save_persistent; diff --git a/mysql-test/main/create_or_replace.test b/mysql-test/main/create_or_replace.test index 7fa08d13847..573e0e177c2 100644 --- a/mysql-test/main/create_or_replace.test +++ b/mysql-test/main/create_or_replace.test @@ -5,6 +5,9 @@ --source include/have_innodb.inc --source include/have_metadata_lock_info.inc +SET @save_persistent=@@GLOBAL.innodb_stats_persistent; +SET GLOBAL innodb_stats_persistent=OFF; + # # Create help table # @@ -212,21 +215,18 @@ create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; --error ER_TABLE_MUST_HAVE_COLUMNS create or replace table test.t1; show tables; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; --error ER_TABLE_MUST_HAVE_COLUMNS create or replace table mysqltest2.t2; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create table t1 (i int); drop table t1; @@ -235,21 +235,18 @@ create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; --error ER_DUP_FIELDNAME create or replace table test.t1 (a int) select 1 as 'a', 2 as 'a'; show tables; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; --error ER_DUP_FIELDNAME create or replace table mysqltest2.t2 (a int) select 1 as 'a', 2 as 'a'; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create table t1 (i int); drop table t1; @@ -258,8 +255,7 @@ create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; unlock tables; drop table test.t1,mysqltest2.t2; @@ -268,8 +264,7 @@ create table mysqltest2.t2 like test.t1; lock table test.t1 write, mysqltest2.t2 write; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; unlock tables; drop table t1; @@ -285,15 +280,13 @@ create table t1 (i int); lock table t1 write; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_schema!='mysql' or table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; --error ER_DATA_TOO_LONG create or replace table t1 (a char(1)) engine=Innodb select 'foo' as a; show tables; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_schema!='mysql' or table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create table t1 (i int); drop table t1; @@ -371,24 +364,20 @@ create table t1 (a int); lock table t1 write, t2 read; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create or replace table t1 (i int); --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create or replace table t1 like t2; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; create or replace table t1 select 1 as f1; --replace_column 1 # --sorted_result -select * from information_schema.metadata_lock_info -where table_name not like 'innodb_%_stats'; +select * from information_schema.metadata_lock_info; drop table t1; unlock tables; @@ -520,3 +509,5 @@ UNLOCK TABLES; DROP TABLE t3; --echo # End of 10.4 tests + +SET GLOBAL innodb_stats_persistent=@save_persistent; From 2ab941084309ebad211357d08d383e0040fefc85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Mar 2022 08:34:17 +0300 Subject: [PATCH 40/52] Cleanup: Invoke sql_print_error() directly --- storage/innobase/dict/dict0crea.cc | 62 +++++++++++++++--------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index 0f5e1761940..4532ccc1a3d 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -846,7 +846,7 @@ uint32_t dict_drop_index_tree(btr_pcur_t *pcur, trx_t *trx, mtr_t *mtr) len > DICT_NUM_FIELDS__SYS_INDEXES) { rec_corrupted: - ib::error() << "Corrupted SYS_INDEXES record"; + sql_print_error("InnoDB: Corrupted SYS_INDEXES record"); return 0; } @@ -1330,7 +1330,7 @@ bool dict_sys_t::load_sys_tables() { sys_foreign= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_FOREIGN"; + sql_print_error("InnoDB: Invalid definition of SYS_FOREIGN"); } if (!(sys_foreign_cols= load_table(SYS_TABLE[SYS_FOREIGN_COLS], DICT_ERR_IGNORE_FK_NOKEY))); @@ -1342,7 +1342,7 @@ bool dict_sys_t::load_sys_tables() { sys_foreign_cols= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_FOREIGN_COLS"; + sql_print_error("InnoDB: Invalid definition of SYS_FOREIGN_COLS"); } if (!(sys_virtual= load_table(SYS_TABLE[SYS_VIRTUAL], DICT_ERR_IGNORE_FK_NOKEY))); @@ -1353,7 +1353,7 @@ bool dict_sys_t::load_sys_tables() { sys_virtual= nullptr; mismatch= true; - ib::error() << "Invalid definition of SYS_VIRTUAL"; + sql_print_error("InnoDB: Invalid definition of SYS_VIRTUAL"); } unlock(); return mismatch; @@ -1369,8 +1369,8 @@ dberr_t dict_sys_t::create_or_check_sys_tables() if (load_sys_tables()) { - ib::info() << "Set innodb_read_only=1 or innodb_force_recovery=3" - " to start up"; + sql_print_information("InnoDB: Set innodb_read_only=1 " + "or innodb_force_recovery=3 to start up"); return DB_CORRUPTION; } @@ -1402,7 +1402,7 @@ dberr_t dict_sys_t::create_or_check_sys_tables() const auto srv_file_per_table_backup= srv_file_per_table; srv_file_per_table= 0; dberr_t error; - const char *tablename; + span tablename; if (!sys_foreign) { @@ -1420,9 +1420,11 @@ dberr_t dict_sys_t::create_or_check_sys_tables() "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_FOREIGN].data(); + tablename= SYS_TABLE[SYS_FOREIGN]; err_exit: - ib::error() << "Creation of " << tablename << " failed: " << error; + sql_print_error("InnoDB: Creation of %.*s failed: %s", + int(tablename.size()), tablename.data(), + ut_strerr(error)); trx->rollback(); row_mysql_unlock_data_dictionary(trx); trx->free(); @@ -1442,7 +1444,7 @@ err_exit: "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_FOREIGN_COLS].data(); + tablename= SYS_TABLE[SYS_FOREIGN_COLS]; goto err_exit; } } @@ -1457,7 +1459,7 @@ err_exit: "END;\n", trx); if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - tablename= SYS_TABLE[SYS_VIRTUAL].data(); + tablename= SYS_TABLE[SYS_VIRTUAL]; goto err_exit; } } @@ -1471,10 +1473,11 @@ err_exit: if (sys_foreign); else if (!(sys_foreign= load_table(SYS_TABLE[SYS_FOREIGN]))) { - tablename= SYS_TABLE[SYS_FOREIGN].data(); + tablename= SYS_TABLE[SYS_FOREIGN]; load_fail: unlock(); - ib::error() << "Failed to CREATE TABLE " << tablename; + sql_print_error("InnoDB: Failed to CREATE TABLE %.*s", + int(tablename.size()), tablename.data()); return DB_TABLE_NOT_FOUND; } else @@ -1483,7 +1486,7 @@ load_fail: if (sys_foreign_cols); else if (!(sys_foreign_cols= load_table(SYS_TABLE[SYS_FOREIGN_COLS]))) { - tablename= SYS_TABLE[SYS_FOREIGN_COLS].data(); + tablename= SYS_TABLE[SYS_FOREIGN_COLS]; goto load_fail; } else @@ -1492,7 +1495,7 @@ load_fail: if (sys_virtual); else if (!(sys_virtual= load_table(SYS_TABLE[SYS_VIRTUAL]))) { - tablename= SYS_TABLE[SYS_VIRTUAL].data(); + tablename= SYS_TABLE[SYS_VIRTUAL]; goto load_fail; } else @@ -1515,12 +1518,14 @@ dict_foreign_eval_sql( const char* id, /*!< in: foreign key id */ trx_t* trx) /*!< in/out: transaction */ { - dberr_t error; FILE* ef = dict_foreign_err_file; - error = que_eval_sql(info, sql, trx); + dberr_t error = que_eval_sql(info, sql, trx); - if (error == DB_DUPLICATE_KEY) { + switch (error) { + case DB_SUCCESS: + break; + case DB_DUPLICATE_KEY: mysql_mutex_lock(&dict_foreign_err_mutex); rewind(ef); ut_print_timestamp(ef); @@ -1540,15 +1545,11 @@ dict_foreign_eval_sql( "names can occur. Workaround: name your constraints\n" "explicitly with unique names.\n", ef); - - mysql_mutex_unlock(&dict_foreign_err_mutex); - - return(error); - } - - if (UNIV_UNLIKELY(error != DB_SUCCESS)) { - ib::error() << "Foreign key constraint creation failed: " - << error; + goto release; + default: + sql_print_error("InnoDB: " + "Foreign key constraint creation failed: %s", + ut_strerr(error)); mysql_mutex_lock(&dict_foreign_err_mutex); ut_print_timestamp(ef); @@ -1558,12 +1559,11 @@ dict_foreign_eval_sql( fputs(".\n" "See the MariaDB .err log in the datadir" " for more information.\n", ef); +release: mysql_mutex_unlock(&dict_foreign_err_mutex); - - return(error); } - return(DB_SUCCESS); + return error; } /********************************************************************//** From 8f8ba758559e473f643baa0a0601d321c42517b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 21 Mar 2022 17:15:06 +0200 Subject: [PATCH 41/52] MDEV-27234: Data dictionary recovery was not READ COMMITTED This also fixes MDEV-20198: Instant ALTER TABLE is not crash safe InnoDB dictionary recovery wrongly used the READ UNCOMMITTED isolation level, causing some mismatch. For example, if a table was renamed or replaced in a transaction, according to READ UNCOMMITTED the table might not exist at all. We implement READ COMMITTED isolation level for accessing the dictionary tables SYS_TABLES, SYS_COLUMNS, SYS_INDEXES, SYS_FIELDS, SYS_VIRTUAL, SYS_FOREIGN, SYS_FOREIGN_COLS. For most of these tables, no secondary index exists. For the secondary indexes (on SYS_TABLES.ID, SYS_FOREIGN.FOR_NAME, SYS_FOREIGN.REF_NAME), we will always look up the primary key in the clustered index and check if the record actually is a committed version. dict_check_sys_tables(): Recover tablespaces also from delete-marked committed records, so that if a matching .ibd file exists, it will be removed by fil_delete_tablespace() when the committed delete-marked SYS_INDEXES record of the clustered index is purged in row_purge_remove_clust_if_poss_low(). fil_ibd_open(): Change the Boolean parameter "validate" to a ternary one, to suppress error messages when the file might not exist. It is possible that a .ibd file was deleted and the server shut down before the SYS_INDEXES and SYS_TABLES records were purged. Hence, if dict_check_sys_tables() finds a committed delete-marked record, we must not complain if the tablespace file is not found. On Windows, we msut treat ERROR_PATH_NOT_FOUND (directory not found) in the same way as ERROR_FILE_NOT_FOUND. This fixes a few failures where a previous test successfully executed DROP DATABASE (and deleted all files and the directory), but a committed delete-marked SYS_TABLES record had not been purged before server restart. dict_getnext_system_low(): Do not filter out delete-marked records. dict_startscan_system(), dict_getnext_system(): Do filter out delete-marked records, for accessing the INFORMATION_SCHEMA tables. dict_sys_tables_rec_read(): Return the DB_TRX_ID of the committed version of the record. This is needed in dict_load_table_low(). dict_load_foreign_cols(), dict_load_foreign(): Add a parameter for the current transaction identifier. In some DDL operations, the FOREIGN KEY constraints are being loaded from the data dictionary before the DDL transaction has been committed. For SYS_FOREIGN and SYS_FOREIGN_COLS, we must implement the special case of READ COMMITTED that the changes of the uncommitted current transaction are visible. dict_load_foreign(): Validate the table name. We could find a SYS_FOREIGN.ID via a committed delete-marked secondary index record that does not match the REF_NAME or FOR_NAME of the secondary index record. dict_load_index_low(): Optionally take the table as a parameter, so that table->def_trx_id can be updated in case of a committed delete-marked SYS_INDEXES record corresponding to DROP INDEX, but not corresponding to an index stub of ADD INDEX. dict_load_indexes(): Do not update table->def_trx_id in case of delete-marked records. rec_is_metadata(), rec_offs_make_valid(), rec_get_offsets_func(), row_build_low(): Relax some assertions. We may now have !index->is_instant() even if a metadata record is present in the index. Previously, the recovery of instant ADD/DROP COLUMN assumed that READ UNCOMMITTED of the data dictionary will be performed. Now, we will have a READ COMMITTED copy of the data dictionary cache, and a READ UNCOMMITTED copy of the metadata record. btr_page_reorganize_low(): Correctly update the FIL_PAGE_TYPE when rolling back an instant ADD/DROP COLUMN operation. row_rec_to_index_entry_impl(): Relax some assertions, and disallow accessing "extra" fields. This fixes the recovery of a crash during an instant ADD COLUMN after a successful instant DROP COLUMN, in the test innodb.instant_alter_crash. Tested by: Matthias Leich --- .../innodb/r/row_format_redundant.result | 23 +- .../suite/innodb/t/alter_crash_rebuild.test | 26 + .../suite/innodb/t/row_format_redundant.test | 27 +- storage/innobase/btr/btr0btr.cc | 24 +- storage/innobase/dict/dict0load.cc | 642 +++++++++++------- storage/innobase/fil/fil0fil.cc | 37 +- storage/innobase/handler/ha_innodb.cc | 7 +- storage/innobase/handler/handler0alter.cc | 4 +- storage/innobase/handler/i_s.cc | 17 +- storage/innobase/include/dict0load.h | 7 +- storage/innobase/include/fil0fil.h | 7 +- storage/innobase/include/rem0rec.h | 8 +- storage/innobase/rem/rem0rec.cc | 11 +- storage/innobase/row/row0import.cc | 4 +- storage/innobase/row/row0mysql.cc | 2 +- storage/innobase/row/row0row.cc | 16 +- 16 files changed, 519 insertions(+), 343 deletions(-) create mode 100644 mysql-test/suite/innodb/t/alter_crash_rebuild.test diff --git a/mysql-test/suite/innodb/r/row_format_redundant.result b/mysql-test/suite/innodb/r/row_format_redundant.result index 8a629d06dd8..b798832e96f 100644 --- a/mysql-test/suite/innodb/r/row_format_redundant.result +++ b/mysql-test/suite/innodb/r/row_format_redundant.result @@ -1,3 +1,4 @@ +SET GLOBAL innodb_fast_shutdown=0; # restart: --innodb-data-home-dir=MYSQLTEST_VARDIR/tmp/row_format_redundant --innodb-log-group-home-dir=MYSQLTEST_VARDIR/tmp/row_format_redundant --innodb-data-file-path=ibdata1:1M:autoextend --innodb-undo-tablespaces=0 --innodb-stats-persistent=0 SET GLOBAL innodb_file_per_table=1; # @@ -8,25 +9,17 @@ SET GLOBAL innodb_file_per_table=ON; create table t1 (a int not null, d varchar(15) not null, b varchar(198) not null, c char(156)) engine=InnoDB row_format=redundant; -insert into t1 values(123, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(456, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(789, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(134, 'kasdfsdsadf', 'adfjlasdkfjasd', 'adfsadflkasdasdfljasdf'); -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; +create temporary table t like t1; +insert into t values(123, 'abcdef', 'jghikl', 'mnop'); +insert into t values(456, 'abcdef', 'jghikl', 'mnop'); +insert into t values(789, 'abcdef', 'jghikl', 'mnop'); +insert into t values(134, 'kasdfsdsadf', 'adfjlasdkfjasd', 'adfsadflkasdasdfljasdf'); +insert into t1 select a,d,b,c from t, seq_1_to_1024; SET GLOBAL innodb_file_per_table=OFF; create table t2 (a int not null, d varchar(15) not null, b varchar(198) not null, c char(156), fulltext ftsic(c)) engine=InnoDB row_format=redundant; -insert into t2 select * from t1; +insert into t2 select a,d,b,c from t, seq_1_to_1024; create table t3 (a int not null, d varchar(15) not null, b varchar(198), c varchar(150), index k1(c(99), b(56)), index k2(b(5), c(10))) engine=InnoDB row_format=redundant; diff --git a/mysql-test/suite/innodb/t/alter_crash_rebuild.test b/mysql-test/suite/innodb/t/alter_crash_rebuild.test new file mode 100644 index 00000000000..500cd28e5c5 --- /dev/null +++ b/mysql-test/suite/innodb/t/alter_crash_rebuild.test @@ -0,0 +1,26 @@ +--source include/have_innodb.inc +--source include/have_debug.inc +--source include/have_debug_sync.inc + +CREATE TABLE t1 (a INT NOT NULL) ENGINE=InnoDB STATS_PERSISTENT=0; + +connect ddl,localhost,root; +SET DEBUG_SYNC='after_trx_committed_in_memory SIGNAL stuck WAIT_FOR ever EXECUTE 2'; +send ALTER TABLE t1 ADD PRIMARY KEY(a); + +connection default; +SET DEBUG_SYNC='now WAIT_FOR stuck'; +SET DEBUG_SYNC='now SIGNAL ever'; +SET DEBUG_SYNC='now WAIT_FOR stuck'; + +SET GLOBAL innodb_log_checkpoint_now=ON; + +--let $shutdown_timeout=0 +--source include/restart_mysqld.inc + +disconnect ddl; + +SHOW CREATE TABLE t1; +SELECT * FROM t1; +DROP TABLE t1; +--source include/wait_all_purged.inc diff --git a/mysql-test/suite/innodb/t/row_format_redundant.test b/mysql-test/suite/innodb/t/row_format_redundant.test index aadbfd2b773..6de7597e983 100644 --- a/mysql-test/suite/innodb/t/row_format_redundant.test +++ b/mysql-test/suite/innodb/t/row_format_redundant.test @@ -1,6 +1,7 @@ --source include/have_innodb.inc # Embedded mode doesn't allow restarting --source include/not_embedded.inc +--source include/have_sequence.inc --disable_query_log call mtr.add_suppression("InnoDB: Table `mysql`\\.`innodb_table_stats` not found"); @@ -21,6 +22,8 @@ let bugdir= $MYSQLTEST_VARDIR/tmp/row_format_redundant; --let $d=$d --innodb-data-file-path=ibdata1:1M:autoextend --let $d=$d --innodb-undo-tablespaces=0 --innodb-stats-persistent=0 --let $restart_parameters= $d +# Ensure that any DDL records from previous tests have been purged. +SET GLOBAL innodb_fast_shutdown=0; --source include/restart_mysqld.inc SET GLOBAL innodb_file_per_table=1; @@ -35,27 +38,21 @@ create table t1 (a int not null, d varchar(15) not null, b varchar(198) not null, c char(156)) engine=InnoDB row_format=redundant; -insert into t1 values(123, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(456, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(789, 'abcdef', 'jghikl', 'mnop'); -insert into t1 values(134, 'kasdfsdsadf', 'adfjlasdkfjasd', 'adfsadflkasdasdfljasdf'); -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; -insert into t1 select * from t1; +create temporary table t like t1; + +insert into t values(123, 'abcdef', 'jghikl', 'mnop'); +insert into t values(456, 'abcdef', 'jghikl', 'mnop'); +insert into t values(789, 'abcdef', 'jghikl', 'mnop'); +insert into t values(134, 'kasdfsdsadf', 'adfjlasdkfjasd', 'adfsadflkasdasdfljasdf'); + +insert into t1 select a,d,b,c from t, seq_1_to_1024; SET GLOBAL innodb_file_per_table=OFF; create table t2 (a int not null, d varchar(15) not null, b varchar(198) not null, c char(156), fulltext ftsic(c)) engine=InnoDB row_format=redundant; -insert into t2 select * from t1; +insert into t2 select a,d,b,c from t, seq_1_to_1024; create table t3 (a int not null, d varchar(15) not null, b varchar(198), c varchar(150), index k1(c(99), b(56)), index k2(b(5), c(10))) engine=InnoDB diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 743a2f1fbfa..2d83f16b537 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2021, MariaDB Corporation. +Copyright (c) 2014, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1334,11 +1334,23 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, else ut_ad(cursor->rec == page_get_infimum_rec(block->page.frame)); - if (block->page.id().page_no() == index->page && - fil_page_get_type(old->page.frame) == FIL_PAGE_TYPE_INSTANT) + mtr->set_log_mode(log_mode); + + if (block->page.id().page_no() != index->page || + fil_page_get_type(old->page.frame) != FIL_PAGE_TYPE_INSTANT) + ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); + else if (!index->is_instant()) + { + ut_ad(!memcmp(old->page.frame, block->page.frame, FIL_PAGE_TYPE)); + ut_ad(!memcmp(old->page.frame + FIL_PAGE_TYPE + 2, + block->page.frame + FIL_PAGE_TYPE + 2, + PAGE_HEADER - FIL_PAGE_TYPE - 2)); + mtr->write<2,mtr_t::FORCED>(*block, FIL_PAGE_TYPE + block->page.frame, + FIL_PAGE_INDEX); + } + else { /* Preserve the PAGE_INSTANT information. */ - ut_ad(index->is_instant()); memcpy_aligned<2>(FIL_PAGE_TYPE + block->page.frame, FIL_PAGE_TYPE + old->page.frame, 2); memcpy_aligned<2>(PAGE_HEADER + PAGE_INSTANT + block->page.frame, @@ -1358,9 +1370,10 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, memcpy(PAGE_OLD_SUPREMUM + block->page.frame, PAGE_OLD_SUPREMUM + old->page.frame, 8); } + + ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); } - ut_ad(!memcmp(old->page.frame, block->page.frame, PAGE_HEADER)); ut_ad(!memcmp(old->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, block->page.frame + PAGE_MAX_TRX_ID + PAGE_HEADER, PAGE_DATA - (PAGE_MAX_TRX_ID + PAGE_HEADER))); @@ -1369,7 +1382,6 @@ static void btr_page_reorganize_low(page_cur_t *cursor, dict_index_t *index, lock_move_reorganize_page(block, old); /* Write log for the changes, if needed. */ - mtr->set_log_mode(log_mode); if (log_mode == MTR_LOG_ALL) { /* Check and log the changes in the page header. */ diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 155f2e55057..67aa4e4dcb6 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2021, MariaDB Corporation. +Copyright (c) 2016, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -43,6 +43,7 @@ Created 4/24/1996 Heikki Tuuri #include "srv0start.h" #include "srv0srv.h" #include "fts0opt.h" +#include "row0vers.h" /** Loads a table definition and also all its index definitions. @@ -65,22 +66,20 @@ static dict_table_t *dict_load_table_one(const span &name, dict_names_t &fk_tables); /** Load an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return error message @retval NULL on success */ static const char* dict_load_index_low( byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ + an "in" value if mtr + and "out" when !mtr */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ + mtr_t* mtr, /*!< in/out: mini-transaction, + or nullptr if a pre-allocated + *index is to be filled in */ + dict_table_t* table, /*!< in/out: table, or NULL */ dict_index_t** index); /*!< out,own: index, or NULL */ /** Load a table column definition from a SYS_COLUMNS record to dict_table_t. @@ -100,6 +99,7 @@ dict_load_column_low( table_id_t* table_id, /*!< out: table id */ const char** col_name, /*!< out: column name */ const rec_t* rec, /*!< in: SYS_COLUMNS record */ + mtr_t* mtr, /*!< in/out: mini-transaction */ ulint* nth_v_col); /*!< out: if not NULL, this records the "n" of "nth" virtual column */ @@ -143,6 +143,7 @@ dict_load_field_low( byte* last_index_id, /*!< in: last index id */ mem_heap_t* heap, /*!< in/out: memory heap for temporary storage */ + mtr_t* mtr, /*!< in/out: mini-transaction */ const rec_t* rec); /*!< in: SYS_FIELDS record */ #ifdef UNIV_DEBUG @@ -179,7 +180,7 @@ dict_getnext_system_low( { rec_t* rec = NULL; - while (!rec || rec_get_deleted_flag(rec, 0)) { + while (!rec) { btr_pcur_move_to_next_user_rec(pcur, mtr); rec = btr_pcur_get_rec(pcur); @@ -209,9 +210,13 @@ dict_startscan_system( mtr_t* mtr, /*!< in: the mini-transaction */ dict_table_t* table) /*!< in: system table */ { - btr_pcur_open_at_index_side(true, table->indexes.start, - BTR_SEARCH_LEAF, pcur, true, 0, mtr); - return dict_getnext_system_low(pcur, mtr); + btr_pcur_open_at_index_side(true, table->indexes.start, BTR_SEARCH_LEAF, + pcur, true, 0, mtr); + const rec_t *rec; + do + rec= dict_getnext_system_low(pcur, mtr); + while (rec && rec_get_deleted_flag(rec, 0)); + return rec; } /********************************************************************//** @@ -230,7 +235,9 @@ dict_getnext_system( pcur->restore_position(BTR_SEARCH_LEAF, mtr); /* Get the next record */ - rec = dict_getnext_system_low(pcur, mtr); + do { + rec = dict_getnext_system_low(pcur, mtr); + } while (rec && rec_get_deleted_flag(rec, 0)); return(rec); } @@ -249,14 +256,13 @@ dict_process_sys_indexes_rec( table_id_t* table_id) /*!< out: index table id */ { const char* err_msg; - byte* buf; + byte buf[8]; ut_d(index->is_dummy = true); ut_d(index->in_instant_init = false); - buf = static_cast(mem_heap_alloc(heap, 8)); /* Parse the record, and get "dict_index_t" struct filled */ - err_msg = dict_load_index_low(buf, heap, rec, FALSE, &index); + err_msg = dict_load_index_low(buf, heap, rec, nullptr, nullptr, &index); *table_id = mach_read_from_8(buf); @@ -282,7 +288,8 @@ dict_process_sys_columns_rec( /* Parse the record, and get "dict_col_t" struct filled */ err_msg = dict_load_column_low(NULL, heap, column, - table_id, col_name, rec, nth_v_col); + table_id, col_name, rec, nullptr, + nth_v_col); return(err_msg); } @@ -301,13 +308,7 @@ dict_process_sys_virtual_rec( ulint* pos, ulint* base_pos) { - const char* err_msg; - - /* Parse the record, and get "dict_col_t" struct filled */ - err_msg = dict_load_virtual_low(NULL, NULL, table_id, - pos, base_pos, rec); - - return(err_msg); + return dict_load_virtual_low(nullptr, nullptr, table_id, pos, base_pos, rec); } /********************************************************************//** @@ -325,17 +326,14 @@ dict_process_sys_fields_rec( index_id_t* index_id, /*!< out: current index id */ index_id_t last_id) /*!< in: previous index id */ { - byte* buf; - byte* last_index_id; + byte buf[8]; + byte last_index_id[8]; const char* err_msg; - buf = static_cast(mem_heap_alloc(heap, 8)); - - last_index_id = static_cast(mem_heap_alloc(heap, 8)); mach_write_to_8(last_index_id, last_id); err_msg = dict_load_field_low(buf, NULL, sys_field, - pos, last_index_id, heap, rec); + pos, last_index_id, heap, nullptr, rec); *index_id = mach_read_from_8(buf); @@ -507,10 +505,6 @@ dict_sys_tables_rec_check( ut_ad(dict_sys.locked()); - if (rec_get_deleted_flag(rec, 0)) { - return("delete-marked record in SYS_TABLES"); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) { return("wrong number of columns in SYS_TABLES record"); } @@ -637,30 +631,76 @@ dict_sys_tables_type_to_tf(ulint type, bool not_redundant) return(flags); } +/** Outcome of dict_sys_tables_rec_read() */ +enum table_read_status { READ_OK= 0, READ_ERROR, READ_NOT_FOUND }; + /** Read and return 5 integer fields from a SYS_TABLES record. @param[in] rec A record of SYS_TABLES -@param[in] name SYS_TABLES.NAME +@param[in] mtr mini-transaction @param[out] table_id Pointer to the table_id for this table @param[out] space_id Pointer to the space_id for this table @param[out] n_cols Pointer to number of columns for this table. @param[out] flags Pointer to table flags @param[out] flags2 Pointer to table flags2 -@return true if the record was read correctly, false if not. */ +@param[out] trx_id DB_TRX_ID of the committed SYS_TABLES record, + or nullptr to perform READ UNCOMMITTED +@return whether the record was read correctly */ MY_ATTRIBUTE((warn_unused_result)) static -bool +table_read_status dict_sys_tables_rec_read( const rec_t* rec, - const span& name, + mtr_t* mtr, table_id_t* table_id, ulint* space_id, ulint* n_cols, ulint* flags, - ulint* flags2) + ulint* flags2, + trx_id_t* trx_id) { const byte* field; ulint len; ulint type; + mem_heap_t* heap = nullptr; + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); + ut_ad(len == 6 || len == UNIV_SQL_NULL); + trx_id_t id = len == 6 ? trx_read_trx_id(field) : 0; + if (id && trx_sys.find(nullptr, id, false)) { + heap = mem_heap_create(1024); + dict_index_t* index = UT_LIST_GET_FIRST( + dict_sys.sys_tables->indexes); + rec_offs* offsets = rec_get_offsets( + rec, index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, index, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!rec) { + mem_heap_free(heap); + return READ_NOT_FOUND; + } + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len); + if (UNIV_UNLIKELY(len != 6)) { + mem_heap_free(heap); + return READ_ERROR; + } + id = trx_read_trx_id(field); + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(id); + if (trx_id) { + return READ_NOT_FOUND; + } + } + + if (trx_id) { + *trx_id = id; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_TABLES__ID, &len); @@ -769,8 +809,13 @@ dict_sys_tables_rec_read( " data dictionary contains invalid flags." " SYS_TABLES.TYPE=" ULINTPF " SYS_TABLES.N_COLS=" ULINTPF, - int(name.size()), name.data(), type, *n_cols); - return(false); + int(rec_get_field_start_offs(rec, 1)), rec, + type, *n_cols); +err_exit: + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return READ_ERROR; } *flags = dict_sys_tables_type_to_tf(type, not_redundant); @@ -794,9 +839,10 @@ dict_sys_tables_rec_read( " contains invalid flags." " SYS_TABLES.TYPE=" ULINTPF " SYS_TABLES.MIX_LEN=" ULINTPF, - int(name.size()), name.data(), + int(rec_get_field_start_offs(rec, 1)), + rec, type, *flags2); - return(false); + goto err_exit; } /* DICT_TF2_FTS will be set when indexes are being loaded */ @@ -808,7 +854,11 @@ dict_sys_tables_rec_read( *flags2 = 0; } - return(true); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + + return READ_OK; } /** Load and check each non-predefined tablespace mentioned in SYS_TABLES. @@ -844,7 +894,6 @@ static ulint dict_check_sys_tables() continue; } - /* Copy the table name from rec */ const char *field = reinterpret_cast( rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__NAME, &len)); @@ -852,10 +901,9 @@ static ulint dict_check_sys_tables() DBUG_PRINT("dict_check_sys_tables", ("name: %*.s", static_cast(len), field)); - const span name{field, len}; - - if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id, - &n_cols, &flags, &flags2) + if (dict_sys_tables_rec_read(rec, &mtr, &table_id, &space_id, + &n_cols, &flags, &flags2, nullptr) + != READ_OK || space_id == TRX_SYS_SPACE) { continue; } @@ -880,13 +928,18 @@ static ulint dict_check_sys_tables() continue; } + const span name{field, len}; + char* filepath = fil_make_filepath(nullptr, name, IBD, false); + const bool not_dropped{!rec_get_deleted_flag(rec, 0)}; + /* Check that the .ibd file exists. */ - if (fil_ibd_open(false, FIL_TYPE_TABLESPACE, + if (fil_ibd_open(not_dropped, FIL_TYPE_TABLESPACE, space_id, dict_tf_to_fsp_flags(flags), name, filepath)) { + } else if (!not_dropped) { } else if (srv_operation == SRV_OPERATION_NORMAL && srv_start_after_restore && srv_force_recovery < SRV_FORCE_NO_BACKGROUND @@ -899,8 +952,7 @@ static ulint dict_check_sys_tables() sql_print_warning("InnoDB: Ignoring tablespace for" " %.*s because it" " could not be opened.", - static_cast(name.size()), - name.data()); + static_cast(len), field); } max_space_id = ut_max(max_space_id, space_id); @@ -972,6 +1024,7 @@ dict_load_column_low( table_id_t* table_id, /*!< out: table id */ const char** col_name, /*!< out: column name */ const rec_t* rec, /*!< in: SYS_COLUMNS record */ + mtr_t* mtr, /*!< in/out: mini-transaction */ ulint* nth_v_col) /*!< out: if not NULL, this records the "n" of "nth" virtual column */ @@ -987,10 +1040,6 @@ dict_load_column_low( ut_ad(!table == !!column); - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_column_del); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) { return("wrong number of columns in SYS_COLUMNS record"); } @@ -1022,7 +1071,28 @@ err_len: goto err_len; } - const trx_id_t trx_id = mach_read_from_6(field); + const trx_id_t trx_id = trx_read_trx_id(field); + + if (trx_id && mtr && trx_sys.find(nullptr, trx_id, false)) { + dict_index_t* index = UT_LIST_GET_FIRST( + dict_sys.sys_columns->indexes); + rec_offs* offsets = rec_get_offsets( + rec, index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, index, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!old_vers) { + return dict_load_column_none; + } + ut_ad(!rec_get_deleted_flag(rec, 0)); + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(trx_id); + return dict_load_column_del; + } rec_get_nth_field_offs_old( rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len); @@ -1036,11 +1106,7 @@ err_len: goto err_len; } - name = mem_heap_strdupl(heap, (const char*) field, len); - - if (col_name) { - *col_name = name; - } + *col_name = name = mem_heap_strdupl(heap, (const char*) field, len); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len); @@ -1155,10 +1221,6 @@ dict_load_virtual_low( ulint len; ulint base; - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_virtual_del); - } - if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_VIRTUAL) { return("wrong number of columns in SYS_VIRTUAL record"); } @@ -1198,7 +1260,7 @@ err_len: *base_pos = base; } - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_VIRTUAL__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1210,6 +1272,17 @@ err_len: goto err_len; } + const trx_id_t trx_id = trx_read_trx_id(field); + + if (trx_id && column && trx_sys.find(nullptr, trx_id, false)) { + if (!rec_get_deleted_flag(rec, 0)) { + return dict_load_virtual_none; + } + } else if (rec_get_deleted_flag(rec, 0)) { + ut_ad(trx_id != 0); + return dict_load_virtual_del; + } + if (column != NULL) { *column = dict_table_get_nth_col(table, base); } @@ -1274,7 +1347,7 @@ dict_load_columns( err_msg = btr_pcur_is_on_user_rec(&pcur) ? dict_load_column_low(table, heap, NULL, NULL, - &name, rec, &nth_v_col) + &name, rec, &mtr, &nth_v_col) : dict_load_column_none; if (!err_msg) { @@ -1282,8 +1355,8 @@ dict_load_columns( n_skipped++; goto next_rec; } else if (err_msg == dict_load_column_none - && strstr(table->name.m_name, - "/" TEMP_FILE_PREFIX_INNODB)) { + && strstr(table->name.m_name, + "/" TEMP_FILE_PREFIX_INNODB)) { break; } else { ib::fatal() << err_msg << " for table " << table->name; @@ -1463,6 +1536,7 @@ dict_load_field_low( byte* last_index_id, /*!< in: last index id */ mem_heap_t* heap, /*!< in/out: memory heap for temporary storage */ + mtr_t* mtr, /*!< in/out: mini-transaction */ const rec_t* rec) /*!< in: SYS_FIELDS record */ { const byte* field; @@ -1473,11 +1547,8 @@ dict_load_field_low( ulint position; /* Either index or sys_field is supplied, not both */ - ut_a((!index) || (!sys_field)); - - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_field_del); - } + ut_ad((!index) != (!sys_field)); + ut_ad((!index) == !mtr); if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) { return("wrong number of columns in SYS_FIELDS record"); @@ -1531,7 +1602,7 @@ err_len: position = pos_and_prefix_len & 0xFFFFUL; } - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1542,6 +1613,29 @@ err_len: goto err_len; } + const trx_id_t trx_id = trx_read_trx_id(field); + + if (!trx_id) { + ut_ad(!rec_get_deleted_flag(rec, 0)); + } else if (mtr && trx_sys.find(nullptr, trx_id, false)) { + dict_index_t* sys_field = UT_LIST_GET_FIRST( + dict_sys.sys_fields->indexes); + rec_offs* offsets = rec_get_offsets( + rec, sys_field, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, sys_field, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!old_vers || rec_get_deleted_flag(rec, 0)) { + return dict_load_field_none; + } + } + + if (rec_get_deleted_flag(rec, 0)) { + return(dict_load_field_del); + } + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len); if (len == 0 || len == UNIV_SQL_NULL) { @@ -1553,9 +1647,6 @@ err_len: index, mem_heap_strdupl(heap, (const char*) field, len), prefix_len); } else { - ut_a(sys_field); - ut_a(pos); - sys_field->name = mem_heap_strdupl( heap, (const char*) field, len); sys_field->prefix_len = prefix_len & ((1U << 12) - 1); @@ -1606,7 +1697,8 @@ dict_load_fields( for (i = 0; i < index->n_fields; i++) { const char *err_msg = btr_pcur_is_on_user_rec(&pcur) ? dict_load_field_low(buf, index, NULL, NULL, NULL, - heap, btr_pcur_get_rec(&pcur)) + heap, &mtr, + btr_pcur_get_rec(&pcur)) : dict_load_field_none; if (!err_msg) { @@ -1645,36 +1737,30 @@ static const char *dict_load_index_none= "SYS_INDEXES record not found"; static const char *dict_load_table_flags= "incorrect flags in SYS_TABLES"; /** Load an index definition from a SYS_INDEXES record to dict_index_t. -If allocate=TRUE, we will create a dict_index_t structure and fill it -accordingly. If allocated=FALSE, the dict_index_t will be supplied by -the caller and filled with information read from the record. @return error message @retval NULL on success */ static const char* dict_load_index_low( byte* table_id, /*!< in/out: table id (8 bytes), - an "in" value if allocate=TRUE - and "out" when allocate=FALSE */ + an "in" value if mtr + and "out" when !mtr */ mem_heap_t* heap, /*!< in/out: temporary memory heap */ const rec_t* rec, /*!< in: SYS_INDEXES record */ - ibool allocate, /*!< in: TRUE=allocate *index, - FALSE=fill in a pre-allocated - *index */ + mtr_t* mtr, /*!< in/out: mini-transaction, + or nullptr if a pre-allocated + *index is to be filled in */ + dict_table_t* table, /*!< in/out: table, or NULL */ dict_index_t** index) /*!< out,own: index, or NULL */ { const byte* field; ulint len; - ulint name_len; - char* name_buf; index_id_t id; ulint n_fields; ulint type; unsigned merge_threshold; - if (allocate) { - /* If allocate=TRUE, no dict_index_t will - be supplied. Initialize "*index" to NULL */ + if (mtr) { *index = NULL; } @@ -1709,7 +1795,7 @@ err_len: return("incorrect column length in SYS_INDEXES"); } - if (!allocate) { + if (!mtr) { /* We are reading a SYS_INDEXES record. Copy the table_id */ memcpy(table_id, (const char*) field, 8); } else if (memcmp(field, table_id, 8)) { @@ -1726,7 +1812,7 @@ err_len: id = mach_read_from_8(field); - rec_get_nth_field_offs_old( + field = rec_get_nth_field_old( rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len); if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) { goto err_len; @@ -1737,15 +1823,29 @@ err_len: goto err_len; } - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_INDEXES__NAME, &name_len); - if (name_len == 0 || name_len == UNIV_SQL_NULL) { - goto err_len; + const trx_id_t trx_id = trx_read_trx_id(field); + if (!trx_id) { + ut_ad(!rec_get_deleted_flag(rec, 0)); + } else if (!mtr) { + } else if (trx_sys.find(nullptr, trx_id, false)) { + dict_index_t* sys_index = UT_LIST_GET_FIRST( + dict_sys.sys_indexes->indexes); + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!old_vers || rec_get_deleted_flag(rec, 0)) { + return dict_load_index_none; + } + } else if (rec_get_deleted_flag(rec, 0) + && rec[8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN] + != static_cast(*TEMP_INDEX_PREFIX_STR) + && table->def_trx_id < trx_id) { + table->def_trx_id = trx_id; } - ut_ad(field == &rec[8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN]); - - name_buf = mem_heap_strdupl(heap, (const char*) field, - name_len); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len); @@ -1770,16 +1870,27 @@ err_len: goto err_len; } - if (rec_get_deleted_flag(rec, 0)) { - return(dict_load_index_del); + ut_d(const auto name_offs =) + rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_INDEXES__NAME, &len); + ut_ad(name_offs == 8 + 8 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + + if (len == 0 || len == UNIV_SQL_NULL) { + goto err_len; } - if (allocate) { - *index = dict_mem_index_create(NULL, name_buf, type, n_fields); - } else { - ut_a(*index); + if (rec_get_deleted_flag(rec, 0)) { + return dict_load_index_del; + } - dict_mem_fill_index_struct(*index, NULL, name_buf, + char* name = mem_heap_strdupl(heap, reinterpret_cast(rec) + + (8 + 8 + DATA_TRX_ID_LEN + + DATA_ROLL_PTR_LEN), + len); + + if (mtr) { + *index = dict_mem_index_create(table, name, type, n_fields); + } else { + dict_mem_fill_index_struct(*index, nullptr, name, type, n_fields); } @@ -1811,7 +1922,7 @@ dict_load_indexes( dtuple_t* tuple; dfield_t* dfield; const rec_t* rec; - byte* buf; + byte buf[8]; mtr_t mtr; dberr_t error = DB_SUCCESS; @@ -1829,7 +1940,6 @@ dict_load_indexes( tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - buf = static_cast(mem_heap_alloc(heap, 8)); mach_write_to_8(buf, table->id); dfield_set_data(dfield, buf, 8); @@ -1865,7 +1975,8 @@ dict_load_indexes( } } - err_msg = dict_load_index_low(buf, heap, rec, TRUE, &index); + err_msg = dict_load_index_low(buf, heap, rec, &mtr, table, + &index); ut_ad(!index == !!err_msg); if (err_msg == dict_load_index_none) { @@ -1875,10 +1986,6 @@ dict_load_indexes( } if (err_msg == dict_load_index_del) { - const trx_id_t id = mach_read_from_6(rec + 8 + 8); - if (id > table->def_trx_id) { - table->def_trx_id = id; - } goto next_rec; } else if (err_msg) { ib::error() << err_msg; @@ -1891,7 +1998,7 @@ dict_load_indexes( == static_cast(*TEMP_INDEX_PREFIX_STR)) { goto next_rec; } else { - const trx_id_t id = mach_read_from_6(rec + 8 + 8); + const trx_id_t id = trx_read_trx_id(rec + 8 + 8); if (id > table->def_trx_id) { table->def_trx_id = id; } @@ -1953,7 +2060,6 @@ corrupted: dictionary cache for such metadata corruption, since we would always be able to set it when loading the dictionary cache */ - index->table = table; dict_set_corrupted_index_cache_only(index); } else if (!dict_index_is_clust(index) && NULL == dict_table_get_first_index(table)) { @@ -1972,7 +2078,6 @@ corrupted: of the database server */ dict_mem_index_free(index); } else { - index->table = table; dict_load_fields(index, heap); /* The data dictionary tables should never contain @@ -2034,12 +2139,12 @@ func_exit: /** Load a table definition from a SYS_TABLES record to dict_table_t. Do not load any columns or indexes. -@param[in] name Table name +@param[in,out] mtr mini-transaction @param[in] rec SYS_TABLES record @param[out,own] table table, or nullptr @return error message @retval nullptr on success */ -const char *dict_load_table_low(const span &name, +const char *dict_load_table_low(mtr_t *mtr, const rec_t *rec, dict_table_t **table) { table_id_t table_id; @@ -2048,6 +2153,7 @@ const char *dict_load_table_low(const span &name, ulint t_num; ulint flags; ulint flags2; + trx_id_t trx_id; ulint n_v_col; if (const char* error_text = dict_sys_tables_rec_check(rec)) { @@ -2055,26 +2161,23 @@ const char *dict_load_table_low(const span &name, return(error_text); } - if (!dict_sys_tables_rec_read(rec, name, &table_id, &space_id, - &t_num, &flags, &flags2)) { + if (auto r = dict_sys_tables_rec_read(rec, mtr, &table_id, &space_id, + &t_num, &flags, &flags2, + &trx_id)) { *table = NULL; - return(dict_load_table_flags); + return r == READ_ERROR ? dict_load_table_flags : nullptr; } dict_table_decode_n_col(t_num, &n_cols, &n_v_col); - *table = dict_table_t::create(name, nullptr, n_cols + n_v_col, - n_v_col, flags, flags2); + *table = dict_table_t::create( + span(reinterpret_cast(rec), + rec_get_field_start_offs(rec, 1)), + nullptr, n_cols + n_v_col, n_v_col, flags, flags2); (*table)->space_id = space_id; (*table)->id = table_id; (*table)->file_unreadable = !!(flags2 & DICT_TF2_DISCARDED); - - ulint len; - (*table)->def_trx_id = mach_read_from_6( - rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, - &len)); - ut_ad(len == DATA_TRX_ID_LEN); - static_assert(DATA_TRX_ID_LEN == 6, "compatibility"); + (*table)->def_trx_id = trx_id; return(NULL); } @@ -2160,7 +2263,7 @@ dict_load_tablespace( } table->space = fil_ibd_open( - true, FIL_TYPE_TABLESPACE, table->space_id, + 2, FIL_TYPE_TABLESPACE, table->space_id, dict_tf_to_fsp_flags(table->flags), {table->name.m_name, strlen(table->name.m_name)}, filepath); @@ -2198,8 +2301,6 @@ static dict_table_t *dict_load_table_one(const span &name, mem_heap_t* heap; dfield_t* dfield; const rec_t* rec; - const byte* field; - ulint len; mtr_t mtr; DBUG_ENTER("dict_load_table_one"); @@ -2235,8 +2336,7 @@ static dict_table_t *dict_load_table_one(const span &name, BTR_SEARCH_LEAF, &pcur, &mtr); rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { + if (!btr_pcur_is_on_user_rec(&pcur)) { /* Not found */ err_exit: btr_pcur_close(&pcur); @@ -2246,21 +2346,22 @@ err_exit: DBUG_RETURN(NULL); } - field = rec_get_nth_field_old( - rec, DICT_FLD__SYS_TABLES__NAME, &len); - /* Check if the table name in record is the searched one */ - if (len != name.size() || memcmp(name.data(), field, len)) { + if (rec_get_field_start_offs(rec, 1) != name.size() + || memcmp(name.data(), rec, name.size())) { goto err_exit; } dict_table_t* table; - if (const char* err_msg = dict_load_table_low(name, rec, &table)) { + if (const char* err_msg = dict_load_table_low(&mtr, rec, &table)) { if (err_msg != dict_load_table_flags) { ib::error() << err_msg; } goto err_exit; } + if (!table) { + goto err_exit; + } btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -2293,7 +2394,7 @@ err_exit: err = dict_load_indexes(table, heap, index_load_err); - if (err == DB_INDEX_CORRUPT) { + if (err == DB_INDEX_CORRUPT || !UT_LIST_GET_FIRST(table->indexes)) { /* Refuse to load the table if the table has a corrupted cluster index */ ut_ad(index_load_err != DICT_ERR_IGNORE_DROP); @@ -2364,9 +2465,8 @@ corrupted: if (!table->is_readable()) { /* Don't attempt to load the indexes from disk. */ } else if (err == DB_SUCCESS) { - err = dict_load_foreigns(table->name.m_name, NULL, - true, true, - ignore_err, fk_tables); + err = dict_load_foreigns(table->name.m_name, nullptr, + 0, true, ignore_err, fk_tables); if (err != DB_SUCCESS) { ib::warn() << "Load table " << table->name @@ -2488,11 +2588,16 @@ check_rec: /* Check if the table id in record is the one searched for */ if (table_id == mach_read_from_8(field)) { - if (rec_get_deleted_flag(rec, 0)) { - /* Until purge has completed, there - may be delete-marked duplicate records - for the same SYS_TABLES.ID, but different - SYS_TABLES.NAME. */ + field = rec_get_nth_field_old(rec, + DICT_FLD__SYS_TABLE_IDS__NAME, &len); + table = dict_sys.load_table( + {reinterpret_cast(field), + len}, ignore_err); + if (table && table->id != table_id) { + ut_ad(rec_get_deleted_flag(rec, 0)); + table = nullptr; + } + if (!table) { while (btr_pcur_move_to_next(&pcur, &mtr)) { rec = btr_pcur_get_rec(&pcur); @@ -2500,13 +2605,6 @@ check_rec: goto check_rec; } } - } else { - /* Now we get the table name from the record */ - field = rec_get_nth_field_old(rec, - DICT_FLD__SYS_TABLE_IDS__NAME, &len); - table = dict_sys.load_table( - {reinterpret_cast(field), - len}, ignore_err); } } } @@ -2547,11 +2645,7 @@ Members that will be created and set by this function: foreign->foreign_col_names[i] foreign->referenced_col_names[i] (for i=0..foreign->n_fields-1) */ -static -void -dict_load_foreign_cols( -/*===================*/ - dict_foreign_t* foreign)/*!< in/out: foreign constraint object */ +static void dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) { btr_pcur_t pcur; dtuple_t* tuple; @@ -2586,14 +2680,45 @@ dict_load_foreign_cols( dfield_set_data(dfield, foreign->id, id_len); dict_index_copy_types(tuple, sys_index, 1); + mem_heap_t* heap = nullptr; btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); for (i = 0; i < foreign->n_fields; i++) { +retry: + ut_a(btr_pcur_is_on_user_rec(&pcur)); rec = btr_pcur_get_rec(&pcur); - ut_a(btr_pcur_is_on_user_rec(&pcur)); - ut_a(!rec_get_deleted_flag(rec, 0)); + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len); + ut_a(len == DATA_TRX_ID_LEN); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_empty(heap); + } + + const trx_id_t id = trx_read_trx_id(field); + if (!id) { + } else if (id != trx_id && trx_sys.find(nullptr, id, false)) { + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, + &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, &mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!rec || rec_get_deleted_flag(rec, 0)) { + goto next; + } + } + + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(id); +next: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + goto retry; + } field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len); @@ -2654,23 +2779,26 @@ dict_load_foreign_cols( } btr_pcur_close(&pcur); - mtr_commit(&mtr); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } } /***********************************************************************//** Loads a foreign key constraint to the dictionary cache. If the referenced table is not yet loaded, it is added in the output parameter (fk_tables). @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1), warn_unused_result)) +static MY_ATTRIBUTE((warn_unused_result)) dberr_t dict_load_foreign( /*==============*/ - const char* id, - /*!< in: foreign constraint id, must be - '\0'-terminated */ + const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use foreign->foreign_table->col_names */ + trx_id_t trx_id, + /*!< in: current transaction id, or 0 */ bool check_recursive, /*!< in: whether to record the foreign table parent count to avoid unlimited recursive @@ -2678,6 +2806,8 @@ dict_load_foreign( bool check_charsets, /*!< in: whether to check charset compatibility */ + span id, + /*!< in: foreign constraint id */ dict_err_ignore_t ignore_err, /*!< in: error to be ignored */ dict_names_t& fk_tables) @@ -2690,81 +2820,80 @@ dict_load_foreign( { dict_foreign_t* foreign; btr_pcur_t pcur; - dtuple_t* tuple; - mem_heap_t* heap2; - dfield_t* dfield; - const rec_t* rec; const byte* field; ulint len; mtr_t mtr; dict_table_t* for_table; dict_table_t* ref_table; - size_t id_len; + byte dtuple_buf[DTUPLE_EST_ALLOC(1)]; DBUG_ENTER("dict_load_foreign"); DBUG_PRINT("dict_load_foreign", - ("id: '%s', check_recursive: %d", id, check_recursive)); + ("id: '%.*s', check_recursive: %d", + int(id.size()), id.data(), check_recursive)); ut_ad(dict_sys.locked()); - id_len = strlen(id); - - heap2 = mem_heap_create(1000); - - mtr_start(&mtr); - dict_index_t* sys_index = dict_sys.sys_foreign->indexes.start; ut_ad(!dict_sys.sys_foreign->not_redundant()); - tuple = dtuple_create(heap2, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, id, id_len); + dtuple_t* tuple = dtuple_create_from_mem(dtuple_buf, sizeof dtuple_buf, + 1, 0); + dfield_set_data(dtuple_get_nth_field(tuple, 0), id.data(), id.size()); dict_index_copy_types(tuple, sys_index, 1); + mtr.start(); + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ - - ib::error() << "Cannot load foreign constraint " << id - << ": could not find the relevant record in " - "SYS_FOREIGN"; + const rec_t* rec = btr_pcur_get_rec(&pcur); + mem_heap_t* heap = nullptr; + if (!btr_pcur_is_on_user_rec(&pcur)) { + not_found: btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - DBUG_RETURN(DB_ERROR); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + DBUG_RETURN(DB_NOT_FOUND); } + static_assert(DICT_FLD__SYS_FOREIGN__ID == 0, "compatibility"); field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len); /* Check if the id in record is the searched one */ - if (len != id_len || memcmp(id, field, len)) { - { - ib::error err; - err << "Cannot load foreign constraint " << id - << ": found "; - err.write(field, len); - err << " instead in SYS_FOREIGN"; + if (len != id.size() || memcmp(id.data(), field, id.size())) { + goto not_found; + } + + field = rec_get_nth_field_old( + rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len); + ut_a(len == DATA_TRX_ID_LEN); + + const trx_id_t tid = trx_read_trx_id(field); + + if (tid && tid != trx_id && trx_sys.find(nullptr, tid, false)) { + rec_offs* offsets = rec_get_offsets( + rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); + const rec_t* old_vers; + row_vers_build_for_semi_consistent_read( + nullptr, rec, &mtr, sys_index, &offsets, &heap, + heap, &old_vers, nullptr); + rec = old_vers; + if (!rec) { + goto not_found; } + } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - DBUG_RETURN(DB_ERROR); + if (rec_get_deleted_flag(rec, 0)) { + ut_ad(tid); + goto not_found; } /* Read the table names and the number of columns associated with the constraint */ - mem_heap_free(heap2); - foreign = dict_mem_foreign_create(); uint32_t n_fields_and_type = mach_read_from_4( @@ -2778,7 +2907,7 @@ dict_load_foreign( foreign->type = (n_fields_and_type >> 24) & ((1U << 6) - 1); foreign->n_fields = n_fields_and_type & dict_index_t::MAX_N_FIELDS; - foreign->id = mem_heap_strdupl(foreign->heap, id, id_len); + foreign->id = mem_heap_strdupl(foreign->heap, id.data(), id.size()); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len); @@ -2787,18 +2916,34 @@ dict_load_foreign( foreign->heap, (char*) field, len); dict_mem_foreign_table_name_lookup_set(foreign, TRUE); - const ulint foreign_table_name_len = len; + const size_t foreign_table_name_len = len; + const size_t table_name_len = strlen(table_name); field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len); + + if (!my_charset_latin1.strnncoll(table_name, table_name_len, + foreign->foreign_table_name, + foreign_table_name_len)) { + } else if (!check_recursive + && !my_charset_latin1.strnncoll(table_name, table_name_len, + (const char*) field, len)) { + } else { + dict_foreign_free(foreign); + goto not_found; + } + foreign->referenced_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); + foreign->heap, (const char*) field, len); dict_mem_referenced_table_name_lookup_set(foreign, TRUE); btr_pcur_close(&pcur); - mtr_commit(&mtr); + mtr.commit(); + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } - dict_load_foreign_cols(foreign); + dict_load_foreign_cols(foreign, trx_id); ref_table = dict_sys.find_table( {foreign->referenced_table_name_lookup, @@ -2853,7 +2998,8 @@ dict_load_foreigns( const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use table->col_names */ - bool check_recursive,/*!< in: Whether to check + trx_id_t trx_id, /*!< in: DDL transaction id, + or 0 to check recursive load of tables chained by FK */ bool check_charsets, /*!< in: whether to check @@ -2870,10 +3016,6 @@ dict_load_foreigns( btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; - const rec_t* rec; - const byte* field; - ulint len; - dberr_t err; mtr_t mtr; DBUG_ENTER("dict_load_foreigns"); @@ -2890,12 +3032,14 @@ dict_load_foreigns( } ut_ad(!dict_sys.sys_foreign->not_redundant()); - mtr_start(&mtr); dict_index_t *sec_index = dict_table_get_next_index( dict_table_get_first_index(dict_sys.sys_foreign)); ut_ad(!strcmp(sec_index->fields[0].name, "FOR_NAME")); + bool check_recursive = !trx_id; + start_load: + mtr.start(); tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1, 0); dfield = dtuple_get_nth_field(tuple, 0); @@ -2906,7 +3050,9 @@ start_load: btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); loop: - rec = btr_pcur_get_rec(&pcur); + const rec_t* rec = btr_pcur_get_rec(&pcur); + const byte* field; + const auto maybe_deleted = rec_get_deleted_flag(rec, 0); if (!btr_pcur_is_on_user_rec(&pcur)) { /* End of index */ @@ -2917,6 +3063,7 @@ loop: /* Now we have the record in the secondary index containing a table name and a foreign constraint ID */ + ulint len; field = rec_get_nth_field_old( rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len); @@ -2941,10 +3088,6 @@ loop: may not be the same case, but the previous comparison showed that they match with no-case. */ - if (rec_get_deleted_flag(rec, 0)) { - goto next_rec; - } - if (lower_case_table_names != 2 && memcmp(field, table_name, len)) { goto next_rec; } @@ -2959,26 +3102,33 @@ loop: ut_a(len <= MAX_TABLE_NAME_LEN); memcpy(fk_id, field, len); - fk_id[len] = '\0'; btr_pcur_store_position(&pcur, &mtr); - mtr_commit(&mtr); + mtr.commit(); /* Load the foreign constraint definition to the dictionary cache */ - err = dict_load_foreign(fk_id, col_names, - check_recursive, check_charsets, ignore_err, - fk_tables); - - if (err != DB_SUCCESS) { + switch (dberr_t err + = dict_load_foreign(table_name, col_names, trx_id, + check_recursive, check_charsets, + {fk_id, len}, ignore_err, fk_tables)) { + case DB_SUCCESS: + break; + case DB_NOT_FOUND: + if (maybe_deleted) { + break; + } + sql_print_error("InnoDB: Cannot load foreign constraint %.*s:" + " could not find the relevant record in " + "SYS_FOREIGN", int(len), fk_id); + /* fall through */ + default: btr_pcur_close(&pcur); - DBUG_RETURN(err); } - mtr_start(&mtr); - + mtr.start(); pcur.restore_position(BTR_SEARCH_LEAF, &mtr); next_rec: btr_pcur_move_to_next_user_rec(&pcur, &mtr); @@ -2991,15 +3141,11 @@ load_next_index: sec_index = dict_table_get_next_index(sec_index); - if (sec_index != NULL) { - - mtr_start(&mtr); - + if (sec_index) { /* Switch to scan index on REF_NAME, fk_max_recusive_level already been updated when scanning FOR_NAME index, no need to update again */ - check_recursive = FALSE; - + check_recursive = false; goto start_load; } diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index dca3964aad8..230c6efa743 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2136,7 +2136,7 @@ a remote tablespace is found it will be changed to true. If the fix_dict boolean is set, then it is safe to use an internal SQL statement to update the dictionary tables if they are incorrect. -@param[in] validate true if we should validate the tablespace +@param[in] validate 0=maybe missing, 1=do not validate, 2=validate @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY @param[in] id tablespace ID @param[in] flags expected FSP_SPACE_FLAGS @@ -2148,7 +2148,7 @@ If file-per-table, it is the table name in the databasename/tablename format @retval NULL if the tablespace could not be opened */ fil_space_t* fil_ibd_open( - bool validate, + unsigned validate, fil_type_t purpose, ulint id, ulint flags, @@ -2160,7 +2160,7 @@ fil_ibd_open( fil_space_t* space = fil_space_get_by_id(id); mysql_mutex_unlock(&fil_system.mutex); if (space) { - if (validate && !srv_read_only_mode) { + if (validate > 1 && !srv_read_only_mode) { fsp_flags_try_adjust(space, flags & ~FSP_FLAGS_MEM_MASK); } @@ -2197,8 +2197,9 @@ func_exit: /* Look for a filepath embedded in an ISL where the default file would be. */ - if (df_remote.open_link_file(name)) { - validate = true; + bool must_validate = df_remote.open_link_file(name); + + if (must_validate) { if (df_remote.open_read_only(true) == DB_SUCCESS) { ut_ad(df_remote.is_open()); ++tablespaces_found; @@ -2211,15 +2212,12 @@ func_exit: << df_remote.filepath() << "' could not be opened read-only."; } - } - - /* Attempt to open the tablespace at the dictionary filepath. */ - if (path_in) { - if (!df_default.same_filepath_as(path_in)) { - /* Dict path is not the default path. Always validate - remote files. If default is opened, it was moved. */ - validate = true; - } + } else if (path_in && !df_default.same_filepath_as(path_in)) { + /* Dict path is not the default path. Always validate + remote files. If default is opened, it was moved. */ + must_validate = true; + } else if (validate > 1) { + must_validate = true; } /* Always look for a file at the default location. But don't log @@ -2231,7 +2229,7 @@ func_exit: the first server startup. The tables ought to be dropped by drop_garbage_tables_after_restore() a little later. */ - const bool strict = !tablespaces_found + const bool strict = validate && !tablespaces_found && !(srv_operation == SRV_OPERATION_NORMAL && srv_start_after_restore && srv_force_recovery < SRV_FORCE_NO_BACKGROUND @@ -2257,7 +2255,7 @@ func_exit: normal, we only found 1. */ /* For encrypted tablespace, we need to check the encryption in header of first page. */ - if (!validate && tablespaces_found == 1) { + if (!must_validate && tablespaces_found == 1) { goto skip_validate; } @@ -2273,7 +2271,8 @@ func_exit: First, bail out if no tablespace files were found. */ if (valid_tablespaces_found == 0) { if (!strict - && IF_WIN(GetLastError() == ERROR_FILE_NOT_FOUND, + && IF_WIN(GetLastError() == ERROR_FILE_NOT_FOUND + || GetLastError() == ERROR_PATH_NOT_FOUND, errno == ENOENT)) { /* Suppress a message about a missing file. */ goto corrupted; @@ -2286,7 +2285,7 @@ func_exit: TROUBLESHOOT_DATADICT_MSG); goto corrupted; } - if (!validate) { + if (!must_validate) { goto skip_validate; } @@ -2369,7 +2368,7 @@ skip_validate: df_remote.is_open() ? df_remote.filepath() : df_default.filepath(), OS_FILE_CLOSED, 0, false, true); - if (validate && !srv_read_only_mode) { + if (must_validate && !srv_read_only_mode) { df_remote.close(); df_default.close(); if (space->acquire()) { diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 72300f83c9c..8151352d3ed 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -12793,7 +12793,8 @@ int create_table_info_t::create_table(bool create_fk) if (err == DB_SUCCESS) { /* Check that also referencing constraints are ok */ dict_names_t fk_tables; - err = dict_load_foreigns(m_table_name, NULL, false, true, + err = dict_load_foreigns(m_table_name, nullptr, + m_trx->id, true, DICT_ERR_IGNORE_NONE, fk_tables); while (err == DB_SUCCESS && !fk_tables.empty()) { dict_sys.load_table( @@ -13245,9 +13246,7 @@ ha_innobase::create( } if (error) { - /* Drop the being-created table before rollback, - so that rollback can possibly rename back a table - that could have been renamed before the failed creation. */ + /* Rollback will drop the being-created table. */ trx_rollback_for_mysql(trx); row_mysql_unlock_data_dictionary(trx); } else { diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index ff069777ec4..f8167b86582 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -9730,7 +9730,7 @@ innobase_update_foreign_cache( dict_names_t fk_tables; err = dict_load_foreigns(user_table->name.m_name, - ctx->col_names, false, true, + ctx->col_names, 1, true, DICT_ERR_IGNORE_NONE, fk_tables); @@ -9741,7 +9741,7 @@ innobase_update_foreign_cache( loaded with "foreign_key checks" off, so let's retry the loading with charset_check is off */ err = dict_load_foreigns(user_table->name.m_name, - ctx->col_names, false, false, + ctx->col_names, 1, false, DICT_ERR_IGNORE_NONE, fk_tables); diff --git a/storage/innobase/handler/i_s.cc b/storage/innobase/handler/i_s.cc index 175b07551b0..97f7013465c 100644 --- a/storage/innobase/handler/i_s.cc +++ b/storage/innobase/handler/i_s.cc @@ -4808,12 +4808,13 @@ i_s_dict_fill_sys_tables( /** Convert one SYS_TABLES record to dict_table_t. @param pcur persistent cursor position on SYS_TABLES record +@param mtr mini-transaction (nullptr=use the dict_sys cache) @param rec record to read from (nullptr=use the dict_sys cache) @param table the converted dict_table_t @return error message @retval nullptr on success */ -static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, const rec_t *rec, - dict_table_t **table) +static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, mtr_t *mtr, + const rec_t *rec, dict_table_t **table) { static_assert(DICT_FLD__SYS_TABLES__NAME == 0, "compatibility"); size_t len; @@ -4831,12 +4832,11 @@ static const char *i_s_sys_tables_rec(const btr_pcur_t &pcur, const rec_t *rec, return "corrupted SYS_TABLES.NAME"; } - const spanname{reinterpret_cast(pcur.old_rec), len}; - if (rec) - return dict_load_table_low(name, rec, table); + return dict_load_table_low(mtr, rec, table); - *table= dict_sys.load_table(name); + *table= dict_sys.load_table + (span{reinterpret_cast(pcur.old_rec), len}); return *table ? nullptr : "Table not found in cache"; } @@ -4878,7 +4878,7 @@ i_s_sys_tables_fill_table( /* Create and populate a dict_table_t structure with information from SYS_TABLES row */ - err_msg = i_s_sys_tables_rec(pcur, rec, &table_rec); + err_msg = i_s_sys_tables_rec(pcur, &mtr, rec, &table_rec); mtr.commit(); dict_sys.unlock(); @@ -5116,7 +5116,8 @@ i_s_sys_tables_fill_table_stats( mtr.commit(); /* Fetch the dict_table_t structure corresponding to this SYS_TABLES record */ - err_msg = i_s_sys_tables_rec(pcur, nullptr, &table_rec); + err_msg = i_s_sys_tables_rec(pcur, nullptr, nullptr, + &table_rec); if (UNIV_LIKELY(!err_msg)) { bool evictable = dict_sys.prevent_eviction(table_rec); diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index 43e732263fd..33095eb8dbc 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -89,7 +89,8 @@ dict_load_foreigns( const char* table_name, /*!< in: table name */ const char** col_names, /*!< in: column names, or NULL to use table->col_names */ - bool check_recursive,/*!< in: Whether to check + trx_id_t trx_id, /*!< in: DDL transaction id, + or 0 to check recursive load of tables chained by FK */ bool check_charsets, /*!< in: whether to check @@ -123,12 +124,12 @@ dict_getnext_system( /** Load a table definition from a SYS_TABLES record to dict_table_t. Do not load any columns or indexes. -@param[in] name Table name +@param[in,out] mtr mini-transaction @param[in] rec SYS_TABLES record @param[out,own] table table, or nullptr @return error message @retval nullptr on success */ -const char *dict_load_table_low(const span &name, +const char *dict_load_table_low(mtr_t *mtr, const rec_t *rec, dict_table_t **table) MY_ATTRIBUTE((nonnull, warn_unused_result)); diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index cf6873f8850..2041b857a48 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1740,10 +1740,7 @@ file inode probably is much faster (the OS caches them) than accessing the first page of the file. This boolean may be initially false, but if a remote tablespace is found it will be changed to true. -If the fix_dict boolean is set, then it is safe to use an internal SQL -statement to update the dictionary tables if they are incorrect. - -@param[in] validate true if we should validate the tablespace +@param[in] validate 0=maybe missing, 1=do not validate, 2=validate @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY @param[in] id tablespace ID @param[in] flags expected FSP_SPACE_FLAGS @@ -1755,7 +1752,7 @@ If file-per-table, it is the table name in the databasename/tablename format @retval NULL if the tablespace could not be opened */ fil_space_t* fil_ibd_open( - bool validate, + unsigned validate, fil_type_t purpose, ulint id, ulint flags, diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index c2ebad91ecd..7faf0ca06bd 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -727,11 +727,9 @@ in the clustered index for instant ADD COLUMN or ALTER TABLE. @param[in] rec leaf page record @param[in] index index of the record @return whether the record is the metadata pseudo-record */ -inline bool rec_is_metadata(const rec_t* rec, const dict_index_t& index) +inline bool rec_is_metadata(const rec_t *rec, const dict_index_t &index) { - bool is = rec_is_metadata(rec, dict_table_is_comp(index.table)); - ut_ad(!is || index.is_instant()); - return is; + return rec_is_metadata(rec, index.table->not_redundant()); } /** Determine if the record is the metadata pseudo-record diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 902f3f2d5ca..bd572372aca 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -478,7 +478,7 @@ rec_offs_make_valid( { const bool is_alter_metadata = leaf && rec_is_alter_metadata(rec, *index); - ut_ad(is_alter_metadata + ut_ad((leaf && rec_is_metadata(rec, *index)) || index->is_dummy || index->is_ibuf() || (leaf ? rec_offs_n_fields(offsets) @@ -572,7 +572,8 @@ rec_offs_validate( } /* index->n_def == 0 for dummy indexes if !comp */ ut_ad(!comp || index->n_def); - ut_ad(!index->n_def || i <= max_n_fields); + ut_ad(!index->n_def || i <= max_n_fields + || rec_is_metadata(rec, *index)); } while (i--) { ulint curr = get_value(rec_offs_base(offsets)[1 + i]); @@ -897,9 +898,7 @@ rec_get_offsets_func( ut_ad(!is_user_rec || !n_core || index->is_dummy || dict_index_is_ibuf(index) || n == n_fields /* btr_pcur_restore_position() */ - || (n + (index->id == DICT_INDEXES_ID) - >= n_core && n <= index->n_fields - + unsigned(rec_is_alter_metadata(rec, false)))); + || (n + (index->id == DICT_INDEXES_ID) >= n_core)); if (is_user_rec && n_core && n < index->n_fields) { ut_ad(!index->is_dummy); diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index e9034c05b89..1d53ede4e9c 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2021, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4517,7 +4517,7 @@ row_import_for_mysql( ulint fsp_flags = dict_tf_to_fsp_flags(table->flags); table->space = fil_ibd_open( - true, FIL_TYPE_IMPORT, table->space_id, + 2, FIL_TYPE_IMPORT, table->space_id, fsp_flags, name, filepath, &err); ut_ad((table->space == NULL) == (err != DB_SUCCESS)); diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 07ec578738b..2d5ef06fbb9 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -2912,7 +2912,7 @@ row_rename_table_for_mysql( dict_names_t fk_tables; err = dict_load_foreigns( - new_name, NULL, false, + new_name, nullptr, trx->id, !old_is_tmp || trx->check_foreigns, use_fk ? DICT_ERR_IGNORE_NONE diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index 19870906df8..4cd1c3a4d26 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -531,7 +531,11 @@ row_build_low( continue; } - ut_ad(ind_field < &index->fields[index->n_fields]); + if (UNIV_UNLIKELY(ind_field + >= &index->fields[index->n_fields])) { + ut_ad(rec_is_metadata(rec, *index)); + continue; + } const dict_col_t* col = dict_field_get_col(ind_field); @@ -745,11 +749,15 @@ row_rec_to_index_entry_impl( if (mblob == 2) { ut_ad(info_bits == REC_INFO_METADATA_ALTER || info_bits == REC_INFO_METADATA_ADD); - ut_ad(rec_len <= ulint(index->n_fields + got)); if (pad) { + ut_ad(rec_len <= ulint(index->n_fields + got)); rec_len = ulint(index->n_fields) + (info_bits == REC_INFO_METADATA_ALTER); - } else if (!got && info_bits == REC_INFO_METADATA_ALTER) { + } else if (got) { + rec_len = std::min(rec_len, + ulint(index->n_fields + got)); + } else if (info_bits == REC_INFO_METADATA_ALTER) { + ut_ad(rec_len <= index->n_fields); rec_len++; } } else { From b2fa874e462e7352be173a3075a65e49f2fcc404 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Mar 2022 11:35:10 +0300 Subject: [PATCH 42/52] MDEV-28181 The innochecksum -w option was inadvertently removed In commit 7a4fbb55b02b449a135fe935f624422eaacfdd7c (MDEV-25105) the innochecksum option --write (-w) was removed altogether. It should have been made a Boolean option, so that old data files may be converted to a format that is compatible with innodb_checksum_algorithm=strict_crc32 by executing the following: innochecksum -n -w ibdata* */*.ibd It would be better to use an older-version innochecksum for such a conversion, so that page checksums will be validated before updating the checksum. It never was possible for innochecksum to convert files to the innodb_checksum_algorithm=full_crc32 format that is the default for new InnoDB data files. --- extra/innochecksum.cc | 2 ++ mysql-test/suite/innodb_zip/r/innochecksum.result | 3 ++- mysql-test/suite/innodb_zip/r/innochecksum_2.result | 3 +++ mysql-test/suite/innodb_zip/r/innochecksum_3.result | 1 + mysql-test/suite/innodb_zip/t/innochecksum.test | 8 +++++++- 5 files changed, 15 insertions(+), 2 deletions(-) diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc index 6c732dbdb48..65224c8c3f5 100644 --- a/extra/innochecksum.cc +++ b/extra/innochecksum.cc @@ -1194,6 +1194,8 @@ static struct my_option innochecksum_options[] = { {"allow-mismatches", 'a', "Maximum checksum mismatch allowed.", &allow_mismatches, &allow_mismatches, 0, GET_ULL, REQUIRED_ARG, 0, 0, ULLONG_MAX, 0, 1, 0}, + {"write", 'w', "Rewrite the checksum.", + &do_write, &do_write, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"page-type-summary", 'S', "Display a count of each page type " "in a tablespace.", &page_type_summary, &page_type_summary, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, diff --git a/mysql-test/suite/innodb_zip/r/innochecksum.result b/mysql-test/suite/innodb_zip/r/innochecksum.result index e6525af4b52..bb94de7a369 100644 --- a/mysql-test/suite/innodb_zip/r/innochecksum.result +++ b/mysql-test/suite/innodb_zip/r/innochecksum.result @@ -14,7 +14,8 @@ FOUND 1 /Error: --no-check must be associated with --write option./ in my_restar FOUND 1 /unknown variable 'strict-check=innodb'/ in my_restart.err [7]: check the innochecksum with short form strict-check & no-check , an error is expected FOUND 1 /unknown option '-C'/ in my_restart.err -FOUND 1 /unknown variable 'write=crc32'/ in my_restart.err +FOUND 1 /ignoring option '--write' due to invalid value 'crc32'/ in my_restart.err +FOUND 1 /Error: --no-check must be associated with --write option/ in my_restart.err # restart SELECT * FROM tab1; c1 c2 diff --git a/mysql-test/suite/innodb_zip/r/innochecksum_2.result b/mysql-test/suite/innodb_zip/r/innochecksum_2.result index 7be6f6f9af3..681d8e1f4c7 100644 --- a/mysql-test/suite/innodb_zip/r/innochecksum_2.result +++ b/mysql-test/suite/innodb_zip/r/innochecksum_2.result @@ -27,6 +27,7 @@ end-page 0 page 0 no-check FALSE allow-mismatches 0 +write FALSE page-type-summary FALSE page-type-dump MYSQLTEST_VARDIR/tmp/dump.txt per-page-details FALSE @@ -54,6 +55,7 @@ See https://mariadb.com/kb/en/library/innochecksum/ for usage hints. -n, --no-check Ignore the checksum verification. -a, --allow-mismatches=# Maximum checksum mismatch allowed. + -w, --write Rewrite the checksum. -S, --page-type-summary Display a count of each page type in a tablespace. -D, --page-type-dump=name @@ -75,6 +77,7 @@ end-page 0 page 0 no-check FALSE allow-mismatches 0 +write FALSE page-type-summary FALSE page-type-dump (No default value) per-page-details FALSE diff --git a/mysql-test/suite/innodb_zip/r/innochecksum_3.result b/mysql-test/suite/innodb_zip/r/innochecksum_3.result index 280528f4200..03a31194c63 100644 --- a/mysql-test/suite/innodb_zip/r/innochecksum_3.result +++ b/mysql-test/suite/innodb_zip/r/innochecksum_3.result @@ -133,6 +133,7 @@ end-page 0 page 0 no-check FALSE allow-mismatches 0 +write FALSE page-type-summary FALSE page-type-dump MYSQLTEST_VARDIR/tmp/dump.txt per-page-details FALSE diff --git a/mysql-test/suite/innodb_zip/t/innochecksum.test b/mysql-test/suite/innodb_zip/t/innochecksum.test index 60860adeac8..b78cd4329de 100644 --- a/mysql-test/suite/innodb_zip/t/innochecksum.test +++ b/mysql-test/suite/innodb_zip/t/innochecksum.test @@ -57,9 +57,15 @@ let SEARCH_PATTERN= unknown option '-C'; --error 1 --exec $INNOCHECKSUM --no-check --write=crc32 $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE -let SEARCH_PATTERN= unknown variable 'write=crc32'; +--let SEARCH_PATTERN= ignoring option '--write' due to invalid value 'crc32' --source include/search_pattern_in_file.inc +--error 1 +--exec $INNOCHECKSUM --no-check $MYSQLD_DATADIR/test/tab1.ibd 2> $SEARCH_FILE +--let SEARCH_PATTERN= Error: --no-check must be associated with --write option +--source include/search_pattern_in_file.inc + +--exec $INNOCHECKSUM --no-check --write $MYSQLD_DATADIR/test/tab1.ibd --source include/start_mysqld.inc SELECT * FROM tab1; From 303448bc912486f4766129cc407a5077a3ca4359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 28 Mar 2022 13:36:36 +0300 Subject: [PATCH 43/52] MDEV-27931: buf_page_is_corrupted() wrongly claims corruption In commit 437da7bc54daa131b46900128ebe3ad2ca25c11a (MDEV-19534), the default value of the global variable srv_checksum_algorithm in innochecksum was changed from SRV_CHECKSUM_ALGORITHM_INNODB to implied 0 (innodb_checksum_algorithm=crc32). As a result, the function buf_page_is_corrupted() would by default invoke buf_calc_page_crc32() in innochecksum, and crc32_inited would hold. This would cause "innochecksum" to fail on a particular page. The actual problem is older, introduced in 2011 in mysql/mysql-server@17e497bdb793bc6b8360aa1c626dcd8bb5cfad1b (MySQL 5.6.3). It should affect the validation of pages of old data files that were written with innodb_checksum_algorithm=innodb. When using innodb_checksum_algorithm=crc32 (the default setting since MariaDB Server 10.2), some valid pages would be rejected only because exactly one of the two checksum fields accidentally matches the innodb_checksum_algorithm=crc32 value. buf_page_is_corrupted(): Simplify the logic of non-strict checksum validation, by always invoking buf_calc_page_crc32(). Remove a bogus condition that if only one of the checksum fields contains the value returned by buf_calc_page_crc32(), the page is corrupted. --- storage/innobase/buf/buf0buf.cc | 94 ++++++++------------------------- 1 file changed, 23 insertions(+), 71 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 505539f0217..679f12ca6d1 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -977,8 +977,6 @@ buf_page_is_corrupted( #endif size_t checksum_field1 = 0; size_t checksum_field2 = 0; - uint32_t crc32 = 0; - bool crc32_inited = false; ulint page_type = mach_read_from_2(read_buf + FIL_PAGE_TYPE); @@ -1104,8 +1102,13 @@ buf_page_is_corrupted( case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: return !buf_page_is_checksum_valid_none( read_buf, checksum_field1, checksum_field2); + case SRV_CHECKSUM_ALGORITHM_NONE: + /* should have returned false earlier */ + break; case SRV_CHECKSUM_ALGORITHM_CRC32: case SRV_CHECKSUM_ALGORITHM_INNODB: + const uint32_t crc32 = buf_calc_page_crc32(read_buf); + if (buf_page_is_checksum_valid_none(read_buf, checksum_field1, checksum_field2)) { #ifdef UNIV_INNOCHECKSUM @@ -1121,7 +1124,7 @@ buf_page_is_corrupted( " crc32 = " UINT32PF "; recorded = " ULINTPF ";\n", cur_page_num, buf_calc_page_new_checksum(read_buf), - buf_calc_page_crc32(read_buf), + crc32, checksum_field1); } #endif /* UNIV_INNOCHECKSUM */ @@ -1138,84 +1141,33 @@ buf_page_is_corrupted( != mach_read_from_4(read_buf + FIL_PAGE_LSN) && checksum_field2 != BUF_NO_CHECKSUM_MAGIC) { - if (curr_algo == SRV_CHECKSUM_ALGORITHM_CRC32) { - DBUG_EXECUTE_IF( - "page_intermittent_checksum_mismatch", { - static int page_counter; - if (page_counter++ == 2) { - checksum_field2++; - } - }); + DBUG_EXECUTE_IF( + "page_intermittent_checksum_mismatch", { + static int page_counter; + if (page_counter++ == 2) return true; + }); - crc32 = buf_page_check_crc32(read_buf, - checksum_field2); - crc32_inited = true; - - if (checksum_field2 != crc32 - && checksum_field2 - != buf_calc_page_old_checksum(read_buf)) { - return true; - } - } else { - ut_ad(curr_algo - == SRV_CHECKSUM_ALGORITHM_INNODB); - - if (checksum_field2 - != buf_calc_page_old_checksum(read_buf)) { - crc32 = buf_page_check_crc32( - read_buf, checksum_field2); - crc32_inited = true; - - if (checksum_field2 != crc32) { - return true; - } - } + if ((checksum_field1 != crc32 + || checksum_field2 != crc32) + && checksum_field2 + != buf_calc_page_old_checksum(read_buf)) { + return true; } } - if (checksum_field1 == 0 - || checksum_field1 == BUF_NO_CHECKSUM_MAGIC) { - } else if (curr_algo == SRV_CHECKSUM_ALGORITHM_CRC32) { - if (!crc32_inited) { - crc32 = buf_page_check_crc32( - read_buf, checksum_field2); - crc32_inited = true; - } - - if (checksum_field1 != crc32 + switch (checksum_field1) { + case 0: + case BUF_NO_CHECKSUM_MAGIC: + break; + default: + if ((checksum_field1 != crc32 + || checksum_field2 != crc32) && checksum_field1 != buf_calc_page_new_checksum(read_buf)) { return true; } - } else { - ut_ad(curr_algo == SRV_CHECKSUM_ALGORITHM_INNODB); - - if (checksum_field1 - != buf_calc_page_new_checksum(read_buf)) { - - if (!crc32_inited) { - crc32 = buf_page_check_crc32( - read_buf, checksum_field2); - crc32_inited = true; - } - - if (checksum_field1 != crc32) { - return true; - } - } } - if (crc32_inited - && ((checksum_field1 == crc32 - && checksum_field2 != crc32) - || (checksum_field1 != crc32 - && checksum_field2 == crc32))) { - return true; - } - - break; - case SRV_CHECKSUM_ALGORITHM_NONE: - /* should have returned false earlier */ break; } From 739002eec90efa73a3b77db1cc46b313e8ac1bfd Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 28 Mar 2022 09:42:26 +0200 Subject: [PATCH 44/52] MDEV-28178 Windows : sporadic ER_ERROR_ON_RENAME .. (errno: 13 "Permission denied") On affected machine, the error happens sporadically in innodb.instant_alter_limit. Procmon shows SetRenameInformationFile failing with ERROR_ACCESS_DENIED. In this case, the destination file was previously opened rsp oplocked by Windows defender antivirus. The fix is to retry MoveFileEx on ERROR_ACCESS_DENIED. --- mysys/my_rename.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mysys/my_rename.c b/mysys/my_rename.c index 5702af94272..23dbec2d7ff 100644 --- a/mysys/my_rename.c +++ b/mysys/my_rename.c @@ -46,12 +46,15 @@ static BOOL win_rename_with_retries(const char *from, const char *to) for (int retry= RENAME_MAX_RETRIES; retry--;) { - DWORD ret = MoveFileEx(from, to, + BOOL ret= MoveFileEx(from, to, MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING); - DBUG_ASSERT(fp == NULL || (ret == FALSE && GetLastError() == ERROR_SHARING_VIOLATION)); + if (ret) + return ret; - if (!ret && (GetLastError() == ERROR_SHARING_VIOLATION)) + DWORD last_error= GetLastError(); + if (last_error == ERROR_SHARING_VIOLATION || + last_error == ERROR_ACCESS_DENIED) { #ifndef DBUG_OFF /* From 97f237e66dc58a617b6293d6a2624378333e8065 Mon Sep 17 00:00:00 2001 From: mkaruza Date: Thu, 24 Mar 2022 17:14:28 +0100 Subject: [PATCH 45/52] MDEV-25912 wsrep does not identify checksummed events correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For GTID consistenty, GTID events was artificialy added before replication happned. This event should not contain CHECKSUM calculated. Reviewed-by: Jan Lindström --- sql/log.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/log.cc b/sql/log.cc index 5fbfa0f75d4..1fb4d45af27 100644 --- a/sql/log.cc +++ b/sql/log.cc @@ -5735,6 +5735,8 @@ THD::binlog_start_trans_and_stmt() this->variables.gtid_domain_id, true, LOG_EVENT_SUPPRESS_USE_F, true, 0); + // Replicated events in writeset doesn't have checksum + gtid_event.checksum_alg= BINLOG_CHECKSUM_ALG_OFF; gtid_event.server_id= this->variables.server_id; writer.write(>id_event); } From 088b37b5eaa8c3198c7f8ea0358d15135833f6bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= Date: Tue, 29 Mar 2022 11:45:19 +0300 Subject: [PATCH 46/52] Disable failing Galera tests --- mysql-test/suite/galera/disabled.def | 1 + mysql-test/suite/galera_sr/disabled.def | 1 + 2 files changed, 2 insertions(+) diff --git a/mysql-test/suite/galera/disabled.def b/mysql-test/suite/galera/disabled.def index d6cbf1a2f7f..6db86abaa3b 100644 --- a/mysql-test/suite/galera/disabled.def +++ b/mysql-test/suite/galera/disabled.def @@ -36,3 +36,4 @@ partition : MDEV-19958 Galera test failure on galera.partition query_cache: MDEV-15805 Test failure on galera.query_cache versioning_trx_id: MDEV-18590: galera.versioning_trx_id: Test failure: mysqltest: Result content mismatch galera_bf_abort_at_after_statement : Unstable +galera_bf_abort_ps_bind : MDEV-28193 Galera test failure on galera_bf_abort_ps_bind diff --git a/mysql-test/suite/galera_sr/disabled.def b/mysql-test/suite/galera_sr/disabled.def index 932b48eaae4..49b6f90e762 100644 --- a/mysql-test/suite/galera_sr/disabled.def +++ b/mysql-test/suite/galera_sr/disabled.def @@ -11,3 +11,4 @@ ############################################################################## galera-features#56 : MDEV-24896 +GCF-1060 : MDEV-26528 wrong usage of mutex LOCK_thd_kill and LOCK_thd_kill From c14f60a72f28bb5102d529b3946c2daf8f3b590b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 12:59:38 +0300 Subject: [PATCH 47/52] Fix g++-12 -O2 -Wstringop-overflow buf_pool_t::watch_unset(): Reorder some code so that no warning will be emitted in CMAKE_BUILD_TYPE=RelWithDebInfo. It is unclear why invoking watch_is_sentinel() before buf_fix_count() would make the warning disappear. --- storage/innobase/buf/buf0buf.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 8330ba36520..8bea22b3e85 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2383,10 +2383,8 @@ void buf_pool_t::watch_unset(const page_id_t id) page_hash_latch *hash_lock= page_hash.lock(fold); /* The page must exist because watch_set() increments buf_fix_count. */ buf_page_t *w= page_hash_get_low(id, fold); - const auto buf_fix_count= w->buf_fix_count(); - ut_ad(buf_fix_count); - const bool must_remove= buf_fix_count == 1 && watch_is_sentinel(*w); ut_ad(w->in_page_hash); + const bool must_remove= watch_is_sentinel(*w) && w->buf_fix_count() == 1; if (!must_remove) w->unfix(); hash_lock->write_unlock(); From b7016bd37929854a9c65b32a29e7314746da6ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 14:53:51 +0300 Subject: [PATCH 48/52] MDEV-26626 fixup: SIGFPE during startup srv_start(): Set srv_startup_is_before_trx_rollback_phase before starting the buf_flush_page_cleaner() thread, so that it will not invoke log_checkpoint() before the log file has been created. This race condition was reproduced with https://rr-project.org. This fixes up commit 15efb7ed48265b8d40897a13c0b8e09c6bdd34c9 --- storage/innobase/srv/srv0start.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index afb7834c9b9..82153d4cb3b 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1243,14 +1243,13 @@ dberr_t srv_start(bool create_new_db) recv_sys.create(); lock_sys.create(srv_lock_table_size); + srv_startup_is_before_trx_rollback_phase = true; if (!srv_read_only_mode) { buf_flush_page_cleaner_init(); ut_ad(buf_page_cleaner_is_active); } - srv_startup_is_before_trx_rollback_phase = true; - /* Check if undo tablespaces and redo log files exist before creating a new system tablespace */ if (create_new_db) { From 42609c240d7b0b5a418e7c77e74fa8274dda0bca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 14:56:44 +0300 Subject: [PATCH 49/52] Cleanup: Replace log_sys.n_pending_checkpoint_writes with a Boolean Only one checkpoint may be in progress at a time. The counter log_sys.n_pending_checkpoint_writes was being protected by log_sys.mutex. Let us replace it with the Boolean log_sys.checkpoint_pending. --- storage/innobase/buf/buf0flu.cc | 2 +- storage/innobase/include/log0log.h | 9 ++++----- storage/innobase/log/log0log.cc | 15 ++++++++------- storage/innobase/srv/srv0mon.cc | 7 ++----- 4 files changed, 15 insertions(+), 18 deletions(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 81183d42f7c..32348c26e9f 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1820,7 +1820,7 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) ut_ad(log_sys.get_flushed_lsn() >= flush_lsn); - if (log_sys.n_pending_checkpoint_writes) + if (log_sys.checkpoint_pending) { /* A checkpoint write is running */ mysql_mutex_unlock(&log_sys.mutex); diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index b40237acea8..870f5da0925 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -595,11 +595,10 @@ public: /*!< next checkpoint number */ /** latest completed checkpoint (protected by log_sys.mutex) */ Atomic_relaxed last_checkpoint_lsn; - lsn_t next_checkpoint_lsn; - /*!< next checkpoint lsn */ - ulint n_pending_checkpoint_writes; - /*!< number of currently pending - checkpoint writes */ + /** next checkpoint LSN (protected by log_sys.mutex) */ + lsn_t next_checkpoint_lsn; + /** whether a checkpoint is pending */ + Atomic_relaxed checkpoint_pending; /** buffer for checkpoint header */ byte *checkpoint_buf; diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index a03e79d27d8..69b5599811d 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -211,7 +211,7 @@ void log_t::create() max_checkpoint_age= 0; next_checkpoint_no= 0; next_checkpoint_lsn= 0; - n_pending_checkpoint_writes= 0; + checkpoint_pending= false; log_block_init(buf, LOG_START_LSN); log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE); @@ -939,7 +939,8 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn) ut_ad(LOG_CHECKPOINT_1 < srv_page_size); ut_ad(LOG_CHECKPOINT_2 < srv_page_size); - ++log_sys.n_pending_checkpoint_writes; + ut_ad(!log_sys.checkpoint_pending); + log_sys.checkpoint_pending = true; mysql_mutex_unlock(&log_sys.mutex); @@ -954,8 +955,8 @@ ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn) mysql_mutex_lock(&log_sys.mutex); - --log_sys.n_pending_checkpoint_writes; - ut_ad(log_sys.n_pending_checkpoint_writes == 0); + ut_ad(log_sys.checkpoint_pending); + log_sys.checkpoint_pending = false; log_sys.next_checkpoint_no++; @@ -1149,8 +1150,8 @@ wait_suspend_loop: if (log_sys.is_initialised()) { mysql_mutex_lock(&log_sys.mutex); - const ulint n_write = log_sys.n_pending_checkpoint_writes; - const ulint n_flush = log_sys.pending_flushes; + const size_t n_write{log_sys.checkpoint_pending}; + const size_t n_flush{log_sys.get_pending_flushes()}; mysql_mutex_unlock(&log_sys.mutex); if (n_write || n_flush) { @@ -1291,7 +1292,7 @@ log_print( ULINTPF " pending chkp writes\n" ULINTPF " log i/o's done, %.2f log i/o's/second\n", log_sys.pending_flushes.load(), - log_sys.n_pending_checkpoint_writes, + ulint{log_sys.checkpoint_pending}, log_sys.n_log_ios, static_cast( log_sys.n_log_ios - log_sys.n_log_ios_old) diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index f13af13c8e3..7a9974d4842 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -2,7 +2,7 @@ Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2021, MariaDB Corporation. +Copyright (c) 2013, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1909,10 +1909,7 @@ srv_mon_process_existing_counter( break; case MONITOR_PENDING_CHECKPOINT_WRITE: - mysql_mutex_lock(&log_sys.mutex); - value = static_cast( - log_sys.n_pending_checkpoint_writes); - mysql_mutex_unlock(&log_sys.mutex); + value = log_sys.checkpoint_pending; break; case MONITOR_LOG_IO: From 792972a6f7cb9bfc36220a21227c7d67c4ce950f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 16:33:41 +0300 Subject: [PATCH 50/52] MDEV-27234 fixup with MDEV-27557 Whenever we retrieve an older version for READ COMMITTED, it is better to release the undo page latches so that we can freely move to the next clustered index record without potentially violating any latching order. --- storage/innobase/dict/dict0load.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 67aa4e4dcb6..49ebc63d24c 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -668,6 +668,7 @@ dict_sys_tables_rec_read( ut_ad(len == 6 || len == UNIV_SQL_NULL); trx_id_t id = len == 6 ? trx_read_trx_id(field) : 0; if (id && trx_sys.find(nullptr, id, false)) { + const auto savepoint = mtr->get_savepoint(); heap = mem_heap_create(1024); dict_index_t* index = UT_LIST_GET_FIRST( dict_sys.sys_tables->indexes); @@ -677,6 +678,7 @@ dict_sys_tables_rec_read( row_vers_build_for_semi_consistent_read( nullptr, rec, mtr, index, &offsets, &heap, heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); rec = old_vers; if (!rec) { mem_heap_free(heap); @@ -1074,6 +1076,7 @@ err_len: const trx_id_t trx_id = trx_read_trx_id(field); if (trx_id && mtr && trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); dict_index_t* index = UT_LIST_GET_FIRST( dict_sys.sys_columns->indexes); rec_offs* offsets = rec_get_offsets( @@ -1082,6 +1085,7 @@ err_len: row_vers_build_for_semi_consistent_read( nullptr, rec, mtr, index, &offsets, &heap, heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); rec = old_vers; if (!old_vers) { return dict_load_column_none; @@ -1618,6 +1622,7 @@ err_len: if (!trx_id) { ut_ad(!rec_get_deleted_flag(rec, 0)); } else if (mtr && trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); dict_index_t* sys_field = UT_LIST_GET_FIRST( dict_sys.sys_fields->indexes); rec_offs* offsets = rec_get_offsets( @@ -1626,6 +1631,7 @@ err_len: row_vers_build_for_semi_consistent_read( nullptr, rec, mtr, sys_field, &offsets, &heap, heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); rec = old_vers; if (!old_vers || rec_get_deleted_flag(rec, 0)) { return dict_load_field_none; @@ -1828,6 +1834,7 @@ err_len: ut_ad(!rec_get_deleted_flag(rec, 0)); } else if (!mtr) { } else if (trx_sys.find(nullptr, trx_id, false)) { + const auto savepoint = mtr->get_savepoint(); dict_index_t* sys_index = UT_LIST_GET_FIRST( dict_sys.sys_indexes->indexes); rec_offs* offsets = rec_get_offsets( @@ -1836,6 +1843,7 @@ err_len: row_vers_build_for_semi_consistent_read( nullptr, rec, mtr, sys_index, &offsets, &heap, heap, &old_vers, nullptr); + mtr->rollback_to_savepoint(savepoint); rec = old_vers; if (!old_vers || rec_get_deleted_flag(rec, 0)) { return dict_load_index_none; @@ -2700,6 +2708,7 @@ retry: const trx_id_t id = trx_read_trx_id(field); if (!id) { } else if (id != trx_id && trx_sys.find(nullptr, id, false)) { + const auto savepoint = mtr.get_savepoint(); rec_offs* offsets = rec_get_offsets( rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); @@ -2707,6 +2716,7 @@ retry: row_vers_build_for_semi_consistent_read( nullptr, rec, &mtr, sys_index, &offsets, &heap, heap, &old_vers, nullptr); + mtr.rollback_to_savepoint(savepoint); rec = old_vers; if (!rec || rec_get_deleted_flag(rec, 0)) { goto next; @@ -2874,12 +2884,14 @@ dict_load_foreign( const trx_id_t tid = trx_read_trx_id(field); if (tid && tid != trx_id && trx_sys.find(nullptr, tid, false)) { + const auto savepoint = mtr.get_savepoint(); rec_offs* offsets = rec_get_offsets( rec, sys_index, nullptr, true, ULINT_UNDEFINED, &heap); const rec_t* old_vers; row_vers_build_for_semi_consistent_read( nullptr, rec, &mtr, sys_index, &offsets, &heap, heap, &old_vers, nullptr); + mtr.rollback_to_savepoint(savepoint); rec = old_vers; if (!rec) { goto not_found; From d875c50bf4d70e50583b2830aeec91fe6f941142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 19:41:38 +0300 Subject: [PATCH 51/52] MDEV-17841 fixup: GCC -Wmaybe-uninitialized Let us remove a redundant condition when the S3 plugin is disabled during compilation time. --- storage/maria/ma_open.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index cf5bf2a5fea..e3385a73f84 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -296,9 +296,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags, #ifndef WITH_S3_STORAGE_ENGINE DBUG_ASSERT(!s3); -#endif /* WITH_S3_STORAGE_ENGINE */ - +#else if (!s3) +#endif /* WITH_S3_STORAGE_ENGINE */ { realpath_err= my_realpath(name_buff, fn_format(org_name, name, "", MARIA_NAME_IEXT, From 7d7bdd4aaabb88ee13e21644d078c7dfb7cf41ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 29 Mar 2022 19:42:10 +0300 Subject: [PATCH 52/52] MDEV-28185 InnoDB generates redundant log checkpoints The comparison on the checkpoint age (number of log bytes written since the previous checkpoint) is inaccurate, because the previous FILE_CHECKPOINT record could span two 512-byte log blocks, which will cause the LSN to increase by the size of the log block header and footer. We will still generate a redudant checkpoint if the previous checkpoint wrote some FILE_MODIFY records before the FILE_CHECKPOINT record. --- storage/innobase/buf/buf0flu.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 32348c26e9f..00d96935461 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -1774,9 +1774,16 @@ static bool log_checkpoint_low(lsn_t oldest_lsn, lsn_t end_lsn) ut_ad(!recv_no_log_write); ut_ad(oldest_lsn >= log_sys.last_checkpoint_lsn); + const lsn_t age= oldest_lsn - log_sys.last_checkpoint_lsn; - if (oldest_lsn > log_sys.last_checkpoint_lsn + SIZE_OF_FILE_CHECKPOINT) + + if (age > SIZE_OF_FILE_CHECKPOINT + log_sys.framing_size()) /* Some log has been written since the previous checkpoint. */; + else if (age > SIZE_OF_FILE_CHECKPOINT && + !((log_sys.log.calc_lsn_offset(oldest_lsn) ^ + log_sys.log.calc_lsn_offset(log_sys.last_checkpoint_lsn)) & + ~lsn_t{OS_FILE_LOG_BLOCK_SIZE - 1})) + /* Some log has been written to the same log block. */; else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) /* MariaDB startup expects the redo log file to be logically empty (not even containing a FILE_CHECKPOINT record) after a clean shutdown.