From 6d9c9d92cc518fe429bd4aa866ad0655bb56c11e Mon Sep 17 00:00:00 2001 From: Denis Protivensky Date: Mon, 27 Nov 2023 11:45:34 +0300 Subject: [PATCH] MDEV-32938: DDL must check if not aborted before entering TOI Signed-off-by: Julius Goryavsky --- mysql-test/suite/galera/r/MDEV-32938.result | 21 ++++++++ mysql-test/suite/galera/t/MDEV-32938.test | 57 +++++++++++++++++++++ sql/sql_alter.cc | 19 +++---- sql/wsrep_mysqld.cc | 31 ++++++++++- 4 files changed, 118 insertions(+), 10 deletions(-) create mode 100644 mysql-test/suite/galera/r/MDEV-32938.result create mode 100644 mysql-test/suite/galera/t/MDEV-32938.test diff --git a/mysql-test/suite/galera/r/MDEV-32938.result b/mysql-test/suite/galera/r/MDEV-32938.result new file mode 100644 index 00000000000..5e310eb7843 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-32938.result @@ -0,0 +1,21 @@ +connection node_2; +connection node_1; +connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1; +call mtr.add_suppression("WSREP: ALTER TABLE isolation failure"); +CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT) ENGINE=InnoDB; +SET DEBUG_SYNC = 'wsrep_append_fk_toi_keys_before_close_tables SIGNAL may_alter WAIT_FOR bf_abort'; +ALTER TABLE t1 DROP COLUMN c2; +connection node_1; +SET DEBUG_SYNC = 'now WAIT_FOR may_alter'; +ALTER TABLE t1 ADD COLUMN c3 INT; +connection con1; +ERROR 70100: Query execution was interrupted +INSERT INTO t1 (c1, c2, c3) VALUES (1, 0, 0); +connection node_2; +INSERT INTO t1 (c1, c2, c3) VALUES (2, 0, 0); +connection node_1; +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1; +disconnect con1; +disconnect node_2; +disconnect node_1; diff --git a/mysql-test/suite/galera/t/MDEV-32938.test b/mysql-test/suite/galera/t/MDEV-32938.test new file mode 100644 index 00000000000..cb41f21a58a --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-32938.test @@ -0,0 +1,57 @@ +# +# MDEV-32938: ALTER command is replicated and successfully applied while being BF-aborted locally. +# +# Why it happend: +# - ALTER went to prepare FK-referenced tables as TOI keys +# - to do this, it would open the main table with SHARED_HIGH_PRIO MDL lock which disregarded any +# other locks (including X-lock) waiting in the queue in case someone was already holding a +# compatible lock type (like any DML operation) +# - if there was other TOI operation on the same table, it would go through BF-abort cycle to grab +# the lock for itself +# - since the initial ALTER had not reached TOI yet, it would loose to real TOI operation and got +# BF-aborted with its THD marked as killed +# - then, ALTER would enter TOI and get replicated with no checks that it has already been aborted +# - after entering TOI mode, it would later find it'd been killed, and complete with an error +# - at the same time, the command would successfully apply on every other node except the initiator. +# +# Fixed by checking killed state on THD before entering TOI. +# + +--source include/galera_cluster.inc +--source include/have_debug_sync.inc +--source include/have_debug.inc + +--connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1 + +call mtr.add_suppression("WSREP: ALTER TABLE isolation failure"); + +CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT) ENGINE=InnoDB; + +# Run ALTER DROP COLUMN and hang before closing tables on adding FK keys and before entering TOI. +# Wait until it gets BF-aborted. +SET DEBUG_SYNC = 'wsrep_append_fk_toi_keys_before_close_tables SIGNAL may_alter WAIT_FOR bf_abort'; +--send + ALTER TABLE t1 DROP COLUMN c2; + +--connection node_1 +# Run ALTER ADD COLUMN and BF-abort the previous ALTER DROP COLUMN. +SET DEBUG_SYNC = 'now WAIT_FOR may_alter'; +ALTER TABLE t1 ADD COLUMN c3 INT; + +--connection con1 +# ALTER DROP COLUMN gets BF aborted. +--error ER_QUERY_INTERRUPTED +--reap + +INSERT INTO t1 (c1, c2, c3) VALUES (1, 0, 0); + +--connection node_2 +# ALTER DROP COLUMN must not be replicated. +INSERT INTO t1 (c1, c2, c3) VALUES (2, 0, 0); + +# Cleanup. +--connection node_1 +SET DEBUG_SYNC = 'RESET'; +DROP TABLE t1; +--disconnect con1 +--source include/galera_end.inc diff --git a/sql/sql_alter.cc b/sql/sql_alter.cc index f83f629a715..b619f9ed741 100644 --- a/sql/sql_alter.cc +++ b/sql/sql_alter.cc @@ -490,17 +490,18 @@ bool Sql_cmd_alter_table::execute(THD *thd) } wsrep::key_array keys; - wsrep_append_fk_parent_table(thd, first_table, &keys); - - WSREP_TO_ISOLATION_BEGIN_ALTER((lex->name.str ? select_lex->db.str : NULL), - (lex->name.str ? lex->name.str : NULL), - first_table, &alter_info, &keys) + if (!wsrep_append_fk_parent_table(thd, first_table, &keys)) { - WSREP_WARN("ALTER TABLE isolation failure"); - DBUG_RETURN(TRUE); - } + WSREP_TO_ISOLATION_BEGIN_ALTER((lex->name.str ? select_lex->db.str : NULL), + (lex->name.str ? lex->name.str : NULL), + first_table, &alter_info, &keys) + { + WSREP_WARN("ALTER TABLE isolation failure"); + DBUG_RETURN(TRUE); + } - DEBUG_SYNC(thd, "wsrep_alter_table_after_toi"); + DEBUG_SYNC(thd, "wsrep_alter_table_after_toi"); + } } #endif diff --git a/sql/wsrep_mysqld.cc b/sql/wsrep_mysqld.cc index 688b0061389..7c8bb683aab 100644 --- a/sql/wsrep_mysqld.cc +++ b/sql/wsrep_mysqld.cc @@ -1288,7 +1288,7 @@ static void wsrep_keys_free(wsrep_key_arr_t* key_arr) * @param tables list of tables * @param keys prepared keys - * @return true if parent table append was successfull, otherwise false. + * @return 0 if parent table append was successful, non-zero otherwise. */ bool wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* tables, wsrep::key_array* keys) @@ -1337,6 +1337,8 @@ wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* tables, wsrep::key_array* key } exit: + DEBUG_SYNC(thd, "wsrep_append_fk_toi_keys_before_close_tables"); + /* close the table and release MDL locks */ close_thread_tables(thd); thd->mdl_context.rollback_to_savepoint(mdl_savepoint); @@ -1347,6 +1349,24 @@ exit: table->mdl_request.ticket= NULL; } + /* + MDEV-32938: Check if DDL operation has been killed before. + + It may be that during collecting foreign keys this operation gets BF-aborted + by another already-running TOI operation because it got MDL locks on the same + table for checking foreign keys. + After `close_thread_tables()` has been called it's safe to assume that no-one + can BF-abort this operation as it's not holding any MDL locks any more. + */ + if (!fail) + { + mysql_mutex_lock(&thd->LOCK_thd_kill); + if (thd->killed) + { + fail= true; + } + mysql_mutex_unlock(&thd->LOCK_thd_kill); + } return fail; } @@ -2294,6 +2314,15 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_, const TABLE_LIST* table_list, Alter_info* alter_info, wsrep::key_array* fk_tables) { + mysql_mutex_lock(&thd->LOCK_thd_kill); + const killed_state killed = thd->killed; + mysql_mutex_unlock(&thd->LOCK_thd_kill); + if (killed) + { + DBUG_ASSERT(FALSE); + return -1; + } + /* No isolation for applier or replaying threads. */