MDEV-32938: DDL must check if not aborted before entering TOI

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
Denis Protivensky 2023-11-27 11:45:34 +03:00 committed by Julius Goryavsky
parent dc7138cbed
commit 6d9c9d92cc
4 changed files with 118 additions and 10 deletions

View file

@ -0,0 +1,21 @@
connection node_2;
connection node_1;
connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1;
call mtr.add_suppression("WSREP: ALTER TABLE isolation failure");
CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT) ENGINE=InnoDB;
SET DEBUG_SYNC = 'wsrep_append_fk_toi_keys_before_close_tables SIGNAL may_alter WAIT_FOR bf_abort';
ALTER TABLE t1 DROP COLUMN c2;
connection node_1;
SET DEBUG_SYNC = 'now WAIT_FOR may_alter';
ALTER TABLE t1 ADD COLUMN c3 INT;
connection con1;
ERROR 70100: Query execution was interrupted
INSERT INTO t1 (c1, c2, c3) VALUES (1, 0, 0);
connection node_2;
INSERT INTO t1 (c1, c2, c3) VALUES (2, 0, 0);
connection node_1;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
disconnect con1;
disconnect node_2;
disconnect node_1;

View file

@ -0,0 +1,57 @@
#
# MDEV-32938: ALTER command is replicated and successfully applied while being BF-aborted locally.
#
# Why it happend:
# - ALTER went to prepare FK-referenced tables as TOI keys
# - to do this, it would open the main table with SHARED_HIGH_PRIO MDL lock which disregarded any
# other locks (including X-lock) waiting in the queue in case someone was already holding a
# compatible lock type (like any DML operation)
# - if there was other TOI operation on the same table, it would go through BF-abort cycle to grab
# the lock for itself
# - since the initial ALTER had not reached TOI yet, it would loose to real TOI operation and got
# BF-aborted with its THD marked as killed
# - then, ALTER would enter TOI and get replicated with no checks that it has already been aborted
# - after entering TOI mode, it would later find it'd been killed, and complete with an error
# - at the same time, the command would successfully apply on every other node except the initiator.
#
# Fixed by checking killed state on THD before entering TOI.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--source include/have_debug.inc
--connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1
call mtr.add_suppression("WSREP: ALTER TABLE isolation failure");
CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT) ENGINE=InnoDB;
# Run ALTER DROP COLUMN and hang before closing tables on adding FK keys and before entering TOI.
# Wait until it gets BF-aborted.
SET DEBUG_SYNC = 'wsrep_append_fk_toi_keys_before_close_tables SIGNAL may_alter WAIT_FOR bf_abort';
--send
ALTER TABLE t1 DROP COLUMN c2;
--connection node_1
# Run ALTER ADD COLUMN and BF-abort the previous ALTER DROP COLUMN.
SET DEBUG_SYNC = 'now WAIT_FOR may_alter';
ALTER TABLE t1 ADD COLUMN c3 INT;
--connection con1
# ALTER DROP COLUMN gets BF aborted.
--error ER_QUERY_INTERRUPTED
--reap
INSERT INTO t1 (c1, c2, c3) VALUES (1, 0, 0);
--connection node_2
# ALTER DROP COLUMN must not be replicated.
INSERT INTO t1 (c1, c2, c3) VALUES (2, 0, 0);
# Cleanup.
--connection node_1
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
--disconnect con1
--source include/galera_end.inc

View file

@ -490,17 +490,18 @@ bool Sql_cmd_alter_table::execute(THD *thd)
}
wsrep::key_array keys;
wsrep_append_fk_parent_table(thd, first_table, &keys);
WSREP_TO_ISOLATION_BEGIN_ALTER((lex->name.str ? select_lex->db.str : NULL),
(lex->name.str ? lex->name.str : NULL),
first_table, &alter_info, &keys)
if (!wsrep_append_fk_parent_table(thd, first_table, &keys))
{
WSREP_WARN("ALTER TABLE isolation failure");
DBUG_RETURN(TRUE);
}
WSREP_TO_ISOLATION_BEGIN_ALTER((lex->name.str ? select_lex->db.str : NULL),
(lex->name.str ? lex->name.str : NULL),
first_table, &alter_info, &keys)
{
WSREP_WARN("ALTER TABLE isolation failure");
DBUG_RETURN(TRUE);
}
DEBUG_SYNC(thd, "wsrep_alter_table_after_toi");
DEBUG_SYNC(thd, "wsrep_alter_table_after_toi");
}
}
#endif

View file

@ -1288,7 +1288,7 @@ static void wsrep_keys_free(wsrep_key_arr_t* key_arr)
* @param tables list of tables
* @param keys prepared keys
* @return true if parent table append was successfull, otherwise false.
* @return 0 if parent table append was successful, non-zero otherwise.
*/
bool
wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* tables, wsrep::key_array* keys)
@ -1337,6 +1337,8 @@ wsrep_append_fk_parent_table(THD* thd, TABLE_LIST* tables, wsrep::key_array* key
}
exit:
DEBUG_SYNC(thd, "wsrep_append_fk_toi_keys_before_close_tables");
/* close the table and release MDL locks */
close_thread_tables(thd);
thd->mdl_context.rollback_to_savepoint(mdl_savepoint);
@ -1347,6 +1349,24 @@ exit:
table->mdl_request.ticket= NULL;
}
/*
MDEV-32938: Check if DDL operation has been killed before.
It may be that during collecting foreign keys this operation gets BF-aborted
by another already-running TOI operation because it got MDL locks on the same
table for checking foreign keys.
After `close_thread_tables()` has been called it's safe to assume that no-one
can BF-abort this operation as it's not holding any MDL locks any more.
*/
if (!fail)
{
mysql_mutex_lock(&thd->LOCK_thd_kill);
if (thd->killed)
{
fail= true;
}
mysql_mutex_unlock(&thd->LOCK_thd_kill);
}
return fail;
}
@ -2294,6 +2314,15 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_,
const TABLE_LIST* table_list,
Alter_info* alter_info, wsrep::key_array* fk_tables)
{
mysql_mutex_lock(&thd->LOCK_thd_kill);
const killed_state killed = thd->killed;
mysql_mutex_unlock(&thd->LOCK_thd_kill);
if (killed)
{
DBUG_ASSERT(FALSE);
return -1;
}
/*
No isolation for applier or replaying threads.
*/