mariadb/mysql-test/suite/galera/t/MDEV-29512.test
sjaakola 07a06022c4 MDEV-29512 deadlock between commit monitor and THD::LOCK_thd_data mutex
This commit contains only a mtr test for reproducing the issue in MDEV-29512
The actual fix will be pushed in wsrep-lib repository

The hanging in MDEV-29512 happens when binlog purging is attempted, and there is
one local BF aborted transaction waiting for commit monitor.

The test will launch two node cluster and enable binlogging with expire log days,
to force binlog purging to happen.
A local transaction is executed so that will become BF abort victim, and has advanced
to replication stage waiting for commit monitor for final cleanup (to mark position in innodb)
after that, applier is released to complete the BF abort and due to binlog configuration,
starting the binlog purging. This is where the hanging would occur, if code is buggy

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
2022-11-29 08:44:23 +02:00

91 lines
3.1 KiB
Text

#
# This test is for reproducing the issue in:
# https://jira.mariadb.org/browse/MDEV-29512
#
# The hanging in MDEV-29512 happens when binlog purging is attempted, and there is
# one local BF aborted transaction waiting for commit monitor.
#
# The test will launch two node cluster and enable binlogging with expire log days,
# to force binlog purging to happen.
# A local transaction is executed so that will become BF abort victim, and has advanced
# to replication stage waiting for commit monitor for final cleanup (to mark position in innodb)
# after that, applier is released to complete the BF abort and due to binlog configuration,
# starting the binlog purging. This is where the hanging would occur, if code is buggy
#
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc
#
# binlog size is limited to 4096 bytes, we will create enough events to
# cause binlog rotation
#
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 varchar(2000));
INSERT INTO t1 VALUES (1, 0, REPEAT('1234567890', 200));
INSERT INTO t1 VALUES (3, 3, REPEAT('1234567890', 200));
SET SESSION wsrep_sync_wait=0;
# set sync point for replication applier
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
# Control connection to manage sync points for appliers
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
--connection node_1a
SET SESSION wsrep_sync_wait=0;
# starting local transaction, only select so far,
# write will happen later and this will be ordered after the transaction in node_2
--connection node_1
begin;
select f1,f2 from t1;
# send from node 2 an UPDATE transaction, which will BF abort the transaction in node_1
--connection node_2
--let $wait_condition=select count(*)=2 from t1
--source include/wait_condition.inc
UPDATE t1 SET f2=2 WHERE f1=3;
--connection node_1a
# wait to see the UPDATE from node_2 in apply_cb sync point
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
--connection node_1
# now issuing conflicting update
UPDATE t1 SET f2=1 WHERE f1=3;
# Block the local commit, send final COMMIT and wait until it gets blocked
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc
--send COMMIT
--connection node_1a
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# release the local transaction to continue with commit
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc
# and now release the applier, it should force local trx to abort
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
--connection node_1
--error ER_LOCK_DEADLOCK
--reap
# wait until applying is complete
--let $wait_condition = SELECT COUNT(*)=1 FROM t1 WHERE f2=2
--source include/wait_condition.inc
# final read to verify what we got
select f1,f2 from t1;
DROP TABLE t1;