mariadb/mysql-test/suite/rpl/t/parallel_backup.test
Monty 9d53fbef7c MMDEV-19749 MDL scalability regression after backup locks
The MDL_BACKUP_COMMIT lock used in handler.cc:ha_commit_trans() that is
used to block commits as part of FLUSH TABLES WITH READ LOCK and BACKUP
STAGE BLOCK_COMMIT can have a notable performance impact
We need the commit block to be able to ensure consistent backups.

The purpose of this commit is to instead of taking an expensive MDL lock,
to protect each commit for a unlikely backup or FTWRL, we would instead
add markers to threads that are doing a commit and when BLOCK_COMMIT or
a FTWRL lock is used we would do slightly more work to ensure that all
threads are taking MDL locks before taking the requested lock.

When backup and FTRWL are not running we are now incrementing a global
variable and setting a marker in the THD to signal that the thread is
in the 'protected commit code block'.

When FTWRL or BACKUP STAGE START is executed we enable MDL locking for
commits and wait until all threads are outside of the protected block
before continuing.
When FTWRL and BACKUP STAGE end, we mark that MDL protecting is not
anymore needed.

The effect is that we change a MDL lock to two atomic increments and
two memory assignments and one extra if. This speeds up commits
but causes FTWRL and BACKUP STAGE START to be slower to start as
they have to wait for active commits to complete.

Most of the new logic can be found in the functions:
enable_backup_commit_locks(), protect_against_backup() and
unprotect_against_backup().

Other things:
- The changes in the tests where because BACKUP STAGE START will now
  block if there are threads inside the 'protected commit block'.
  Before we waited in the test for BACKUP STAGE BLOCK COMMIT. I have
  now changed the tests to either wait for BACKUP STAGE START or by
  doing BACKUP STAGE START early and wait for BACKUP STAGE COMMIT.
- Added MDL_request mdl_backup to THD to avoid initializing a new
  MDL_request for each commit and to simplify some code.
  This added 448 bytes to the THD.
2025-04-11 07:41:58 +03:00

150 lines
3.8 KiB
Text

--source include/have_innodb.inc
# The test is not format specific, MIXED is required to optimize testing time
--source include/have_binlog_format_mixed.inc
--source include/master-slave.inc
--echo #
--echo # MDEV-21953: deadlock between BACKUP STAGE BLOCK_COMMIT and parallel
--echo # replication
--echo #
--connection master
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE = innodb;
# MDEV-515 takes X-lock on the table for the first insert.
# So concurrent insert won't happen on the table
INSERT INTO t1 VALUES(100);
--sync_slave_with_master
call mtr.add_suppression("Deadlock found when trying to get lock");
call mtr.add_suppression("Commit failed due to failure of an earlier commit");
--source include/stop_slave.inc
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
SET @@global.slave_parallel_threads= 2;
SET @@global.slave_parallel_mode = 'optimistic';
--connection master
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
--save_master_pos
# The plot:
# Block the 1st of two workers and, at waiting-for-prior-commit by the 2nd,
# issue BACKUP commands.
# BLOCK STAGE START may hang so it is --send.
# Release the 1st worker to observe a deadlock unless its fixed.
--connect (aux_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
BEGIN;
# block the 1st worker and wait for the 2nd ready to commit
INSERT INTO t1 VALUES (1);
--connection slave
--source include/start_slave.inc
--connection aux_slave
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
# While the 1st worker is locked out run backup
--connect (backup_slave,127.0.0.1,root,,test,$SLAVE_MYPORT,)
--send BACKUP STAGE START
# release the 1st work
--connection aux_slave
--sleep 1
ROLLBACK;
--connection backup_slave
--reap
BACKUP STAGE END;
--connection slave
--sync_with_master
--let $diff_tables= master:t1,slave:t1
--source include/diff_tables.inc
--echo #
--echo # Test with blocking BLOCK_COMMIT
--echo #
--connection slave
--source include/stop_slave.inc
--connection backup_slave
BACKUP STAGE START;
--connection master
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
--save_master_pos
--connection aux_slave
BEGIN;
INSERT INTO t1 VALUES (3);
--connection slave
--source include/start_slave.inc
--connection aux_slave
--let $wait_condition= SELECT COUNT(*) > 0 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
--connection backup_slave
--send BACKUP STAGE BLOCK_COMMIT;
--connection aux_slave
--sleep 1
ROLLBACK;
--connection backup_slave
--reap
BACKUP STAGE END;
--connection slave
--sync_with_master
--let $diff_tables= master:t1,slave:t1
--source include/diff_tables.inc
--echo #
--echo # MDEV-30423: dealock XA COMMIT vs BACKUP
--echo #
# Prove XA "COMPLETE" 'xid' does not dealock similary to the normal trx case.
# The slave binlog group commit leader is blocked by a local trx like in
# the above normal trx case.
# [Notice a reuse of t1,aux_conn from above.]
#
--let $complete = COMMIT
--source parallel_backup_xa.inc
--let $complete = ROLLBACK
--source parallel_backup_xa.inc
--let $slave_ooo_error = 1
--let $complete = COMMIT
--source parallel_backup_xa.inc
--connection slave
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
--let $slave_ooo_error = 1
--let $complete = ROLLBACK
--source parallel_backup_xa.inc
--connection slave
--source include/start_slave.inc
--source include/sync_with_master_gtid.inc
# Clean up.
--connection slave
--source include/stop_slave.inc
SET @@global.slave_parallel_threads= @old_parallel_threads;
SET @@global.slave_parallel_mode = @old_parallel_mode;
--source include/start_slave.inc
--connection server_1
DROP TABLE t1;
--source include/rpl_end.inc