mariadb/mysql-test/suite/rpl/t/rpl_parallel_deadlock_victim2.test
Kristian Nielsen 900c4d6920 MDEV-31655: Parallel replication deadlock victim preference code errorneously removed
Restore code to make InnoDB choose the second transaction as a deadlock
victim if two transactions deadlock that need to commit in-order for
parallel replication. This code was erroneously removed when VATS was
implemented in InnoDB.

Also add a test case for InnoDB choosing the right deadlock victim.
Also fixes this bug, with testcase that reliably reproduces:

MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master

Note: This should be null-merged to 10.6, as a different fix is needed
there due to InnoDB locking code changes.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2023-08-15 16:35:30 +02:00

83 lines
3 KiB
Text

--source include/master-slave.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_binlog_format_statement.inc
--connection master
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1(a INT) ENGINE=INNODB;
INSERT INTO t1 VALUES(1);
--source include/save_master_gtid.inc
--connection slave
--source include/sync_with_master_gtid.inc
--source include/stop_slave.inc
--let $save_transaction_retries= `SELECT @@global.slave_transaction_retries`
--let $save_slave_parallel_threads= `SELECT @@global.slave_parallel_threads`
--let $save_slave_parallel_mode= `SELECT @@global.slave_parallel_mode`
set @@global.slave_parallel_threads= 2;
set @@global.slave_parallel_mode= OPTIMISTIC;
set @@global.slave_transaction_retries= 2;
--echo *** MDEV-28776: rpl.rpl_mark_optimize_tbl_ddl fails with timeout on sync_with_master
# This was a failure where a transaction T1 could deadlock multiple times
# with T2, eventually exceeding the default --slave-transaction-retries=10.
# Root cause was MDEV-31655, causing InnoDB to wrongly choose T1 as deadlock
# victim over T2. If thread scheduling is right, it was possible for T1 to
# repeatedly deadlock, roll back, and have time to grab an S lock again before
# T2 woke up and got its waiting X lock, thus repeating the same deadlock over
# and over.
# Once the bug is fixed, it is not possible to re-create the same execution
# and thread scheduling. Instead we inject small sleeps in a way that
# triggered the problem when the bug was there, to demonstrate that the
# problem no longer occurs.
--connection master
# T1
SET @@gtid_seq_no= 100;
INSERT INTO t1 SELECT 1+a FROM t1;
# T2
SET @@gtid_seq_no= 200;
INSERT INTO t1 SELECT 2+a FROM t1;
SELECT * FROM t1 ORDER BY a;
--source include/save_master_gtid.inc
--connection slave
SET @save_dbug= @@GLOBAL.debug_dbug;
# Inject various delays to hint thread scheduling to happen in the way that
# triggered MDEV-28776.
# Small delay starting T1 so it will be the youngest trx and be chosen over
# T2 as the deadlock victim by default in InnoDB.
SET GLOBAL debug_dbug="+d,rpl_parallel_delay_gtid_0_x_100_start";
# Small delay before taking insert X lock to give time for both T1 and T2 to
# get the S lock first and cause a deadlock.
SET GLOBAL debug_dbug="+d,rpl_write_record_small_sleep_gtid_100_200";
# Small delay after T2's wait on the X lock, to give time for T1 retry to
# re-aquire the T1 S lock first.
SET GLOBAL debug_dbug="+d,small_sleep_after_lock_wait";
# Delay deadlock kill of T2.
SET GLOBAL debug_dbug="+d,rpl_delay_deadlock_kill";
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
SET GLOBAL debug_dbug= @save_dbug;
SELECT * FROM t1 ORDER BY a;
# Cleanup.
--connection slave
--source include/stop_slave.inc
eval SET @@global.slave_parallel_threads= $save_slave_parallel_threads;
eval SET @@global.slave_parallel_mode= $save_slave_parallel_mode;
eval SET @@global.slave_transaction_retries= $save_transaction_retries;
--source include/start_slave.inc
--connection master
DROP TABLE t1;
--source include/rpl_end.inc