mirror of
https://github.com/MariaDB/server.git
synced 2026-02-16 07:38:43 +01:00
Problem was in wsrep_handle_mdl_conflict function was comparing
thd->lex->sql_command variable for granted MDL-lock.
There is two possible schedules:
(1) FLUSH TABLES ... FOR EXPORT that will take MDL-lock (granted_thd).
INSERT from other node is conflicting operation (request_thd)
and sees MDL-conflict. Because granted_thd has not executed anything
else thd->lex->sql_command == SQLCOM_FLUSH and this case was
correctly handled in wsrep_handle_mdl_conflict i.e. INSERT needs
to wait.
(2) FLUSH TABLES ... FOR EXPORT that will take MDL-lock (granted_thd).
SET SESSION wsrep_sync_wait=0; (granted_thd)
INSERT from other node is conflicting operation (request_thd)
However, thd->lex->sql_command is not stored to taken MDL-lock. Now
as granted_thd is executing SET thd->lex->sql_command != SQLCOM_FLUSH
and INSERT that is BF will abort it and that means also FTFE is
killed and MDL-lock relesed. This is incorrect as FTFE has written
file on filesystem and it can't be really killed.
In this fix wsrep_handle_mdl_conflict is refactored not to use
thd->lex->sql_command as a variable used for decisions. Instead
connection state can be determined also via THD members. E.g.:
* wsrep_thd_is_toi() || wsrep_thd_is_applying - ongoing TOI or applier
* wsrep_thd_is_BF - thread is brute force
* wsrep_thd_is_SR - thread is streaming replication thread
* thd->current_backup_stage != BACKUP_FINISHED - there's ongoing BACKUP
* thd->global_read_lock.is_acquired() - ongoing FTWRL
* thd->locked_tables_mode == LTM_LOCK_TABLES - ongoing FTFE or LOCK TABLES
57 lines
2 KiB
Text
57 lines
2 KiB
Text
#
|
|
# MDEV-32938: ALTER command is replicated and successfully applied while being BF-aborted locally.
|
|
#
|
|
# Why it happend:
|
|
# - ALTER went to prepare FK-referenced tables as TOI keys
|
|
# - to do this, it would open the main table with SHARED_HIGH_PRIO MDL lock which disregarded any
|
|
# other locks (including X-lock) waiting in the queue in case someone was already holding a
|
|
# compatible lock type (like any DML operation)
|
|
# - if there was other TOI operation on the same table, it would go through BF-abort cycle to grab
|
|
# the lock for itself
|
|
# - since the initial ALTER had not reached TOI yet, it would loose to real TOI operation and got
|
|
# BF-aborted with its THD marked as killed
|
|
# - then, ALTER would enter TOI and get replicated with no checks that it has already been aborted
|
|
# - after entering TOI mode, it would later find it'd been killed, and complete with an error
|
|
# - at the same time, the command would successfully apply on every other node except the initiator.
|
|
#
|
|
#
|
|
# NOTE : After MDL-lock conflict resolution rework this test behaviour has
|
|
# changed.
|
|
#
|
|
|
|
--source include/galera_cluster.inc
|
|
--source include/have_debug_sync.inc
|
|
--source include/have_debug.inc
|
|
|
|
--connect con1,127.0.0.1,root,,test,$NODE_MYPORT_1
|
|
|
|
call mtr.add_suppression("WSREP: ALTER TABLE isolation failure");
|
|
|
|
CREATE TABLE t1(c1 INT PRIMARY KEY, c2 INT) ENGINE=InnoDB;
|
|
|
|
# Run ALTER DROP COLUMN and hang before closing tables on adding FK keys and before entering TOI.
|
|
SET DEBUG_SYNC = 'wsrep_append_fk_toi_keys_before_close_tables SIGNAL may_alter';
|
|
--send
|
|
ALTER TABLE t1 DROP COLUMN c2;
|
|
|
|
--connection node_1
|
|
# Run ALTER ADD COLUMN
|
|
SET DEBUG_SYNC = 'now WAIT_FOR may_alter';
|
|
ALTER TABLE t1 ADD COLUMN c3 INT;
|
|
|
|
--connection con1
|
|
# ALTER DROP COLUMN waits
|
|
--reap
|
|
|
|
INSERT INTO t1 (c1, c3) VALUES (1, 0);
|
|
|
|
--connection node_2
|
|
# ALTER DROP COLUMN must be replicated.
|
|
INSERT INTO t1 (c1, c3) VALUES (2, 0);
|
|
|
|
# Cleanup.
|
|
--connection node_1
|
|
SET DEBUG_SYNC = 'RESET';
|
|
DROP TABLE t1;
|
|
--disconnect con1
|
|
--source include/galera_end.inc
|