mariadb/mysql-test/suite/galera/t/MDEV-33133.test
Denis Protivensky 235f33e360 MDEV-33133: MDL conflict handling code should skip BF-aborted trxs
It's possible that MDL conflict handling code is called more
than once for a transaction when:
- it holds more than one conflicting MDL lock
- reschedule_waiters() is executed,
which results in repeated attempts to BF-abort already aborted
transaction.
In such situations, it might be that BF-aborting logic sees
a partially rolled back transaction and erroneously decides
on future actions for such a transaction.

The specific situation tested and fixed is when a SR transaction
applied in the node gets BF-aborted by a started TOI operation.
It's then caught with the server transaction already rolled back,
but with no MDL locks yet released. This caused wrong state
detection for such a transaction during repeated MDL conflict
handling code execution.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
2024-09-01 16:19:59 +02:00

80 lines
2.5 KiB
Text

#
# MDEV-33133: MDL conflict handling code should skip transactions
# BF-aborted before.
#
# It's possible that MDL conflict handling code is called more
# than once for a transaction when:
# - it holds more than one conflicting MDL lock
# - reschedule_waiters() is executed,
# which results in repeated attempts to BF-abort already aborted
# transaction.
# In such situations, it might be that BF-aborting logic sees
# a partially rolled back transaction and erroneously decides
# on future actions for such a transaction.
#
# The specific situation tested and fixed is when a SR transaction
# applied in the node gets BF-aborted by a started TOI operation.
# It's then caught with the server transaction already rolled back,
# but with no MDL locks yet released. This caused wrong state
# detection for such a transaction during repeated MDL conflict
# handling code execution.
#
--source include/galera_cluster.inc
--source include/have_debug_sync.inc
--source include/have_debug.inc
--connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1
--connection node_1
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
--connection node_2
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO t1 VALUES (1);
--connection node_1a
# Sync wait for SR transaction to replicate and apply fragment.
SELECT COUNT(*) FROM t1;
SET SESSION wsrep_retry_autocommit = 0;
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
--send
INSERT INTO t1 VALUES (2);
--connection node_1
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
# BF-abort SR transaction and wait until it reaches the point
# prior to release MDL locks.
# Then abort local INSERT, which will go through rescedule_waiters()
# and see SR transaction holding MDL locks but already rolled back.
# In this case SR transaction should be skipped in MDL conflict
# handling code.
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
--send
TRUNCATE TABLE t1;
--connection node_1a
# Local INSERT gets aborted.
--error ER_LOCK_DEADLOCK
--reap
# Let the aborted SR transaction continue and finally release MDL locks,
# which in turn allows TRUNCATE to complete.
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
--connection node_2
# SR transaction has been BF-aborted.
--error ER_LOCK_DEADLOCK
INSERT INTO t1 VALUES (3);
--connection node_1
# TRUNCATE completes.
--reap
# Cleanup
SET GLOBAL DEBUG_DBUG = '';
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
--disconnect node_1a
--source include/galera_end.inc