mariadb/mysql-test/suite/rpl/t/rpl_gtid_reconnect.test
Kristian Nielsen 0a68328673 MDEV-34705: Binlog-in-engine: Protect against concurrent RESET MASTER and dump threads
This is actually an existing problem in the old binlog implementation, and
this patch is applicable to old binlog also. The problem is that RESET
MASTER can run concurrently with binlog dump threads / connected slaves.
This will remove the binlog from under the feet of the reader, which can
cause all sorts of strange behaviour.

This patch fixes the problem by disallowing to run RESET MASTER when dump
threads (or other RESET MASTER or SHOW BINARY LOGS) are running. An error is
thrown in this case, user must stop slaves and/or kill dump threads to make
the RESET MASTER go through. A slave that connects in the middle of RESET
MASTER will wait for it to complete.

Fix a lot of test cases to kill any lingering dump threads before doing
RESET MASTER, mostly just by sourcing include/kill_binlog_dump_threads.inc.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2025-06-11 11:32:10 +02:00

204 lines
5.1 KiB
Text

--let $rpl_topology=1->2
--source include/rpl_init.inc
--source include/have_innodb.inc
--source include/have_debug.inc
--connection server_1
CREATE TABLE t1 (a INT);
FLUSH LOGS;
--save_master_pos
--connection server_2
--sync_with_master
# Prepare a string of events and have the slave replicate all of it.
--connection server_1
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (1);
INSERT INTO t1 VALUES (2);
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (3);
INSERT INTO t1 VALUES (4);
INSERT INTO t1 VALUES (5);
--save_master_pos
--connection server_2
--sync_with_master
--source include/stop_slave.inc
SELECT * FROM t1 ORDER BY a;
# Now start the slave again, but force a reconnect. There was a bug that this
# reconnect would cause duplicate events.
--connection server_1
# Make sure to get rid of any old binlog dump thread so it does not
# interfere with our DBUG error injection.
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (10);
SET @old_debug= @@GLOBAL.debug_dbug;
SET GLOBAL debug_dbug="+d,dummy_disable_default_dbug_output";
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
# A1 B1 A2 B2 A3 B3, slave reached A1 and B3 and stopped. Slave starts,
# reconnects at A2. There was a bug that B2 would be duplicated.
SET gtid_domain_id=10;
SET gtid_seq_no=50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id=10;
SET gtid_seq_no=100;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id=11;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id=10;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id=11;
SET gtid_seq_no=200;
INSERT INTO t1 VALUES (13);
--connection server_2
START SLAVE UNTIL master_gtid_pos="10-1-50,11-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
--source include/kill_binlog_dump_threads.inc
INSERT INTO t1 VALUES (20);
SET GLOBAL debug_dbug="+d,dummy_disable_default_dbug_output";
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
# A1 B1 A2 B2 A3 B3. START SLAVE UNTIL A1,B3, gets reconnect at B2.
# There was a bug that the UNTIL would be ignored, and A2 would be lost.
--source include/kill_binlog_dump_threads.inc
SET gtid_domain_id= 9;
SET gtid_seq_no= 50;
INSERT INTO t1 VALUES (1);
SET gtid_domain_id= 10;
INSERT INTO t1 VALUES (11);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (2);
SET gtid_domain_id= 10;
SET gtid_seq_no= 100;
INSERT INTO t1 VALUES (12);
SET gtid_domain_id= 9;
INSERT INTO t1 VALUES (3);
SET gtid_domain_id= 10;
SET gtid_seq_no= 200;
INSERT INTO t1 VALUES (13);
SET gtid_domain_id= 10;
SET GLOBAL debug_dbug="+d,dummy_disable_default_dbug_output";
SET GLOBAL debug_dbug="+d,gtid_force_reconnect_at_10_1_100";
--connection server_2
START SLAVE UNTIL master_gtid_pos="9-1-50,10-1-200";
--source include/wait_for_slave_to_stop.inc
SELECT * FROM t1 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
INSERT INTO t1 VALUES (20);
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t1 ORDER BY a;
--echo *** Test when slave IO thread needs to reconnect in the middle of an event group. ***
--connection server_2
--source include/stop_slave.inc
TRUNCATE t1;
RESET MASTER;
SET GLOBAL gtid_slave_pos= "";
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
TRUNCATE t1;
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
--source include/kill_binlog_dump_threads.inc
SET GLOBAL debug_dbug="+d,dummy_disable_default_dbug_output";
SET GLOBAL debug_dbug="+d,binlog_force_reconnect_after_22_events";
# 4 events for FD, fake rotate, gtid list, binlog checkpoint.
# 2 events for GTID, create table
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
# 3 events for BEGIN/query/COMMIT
INSERT INTO t2 VALUES (1);
# 4 events for BEGIN/query/query/COMMIT
BEGIN;
INSERT INTO t2 VALUES (10);
INSERT INTO t2 VALUES (11);
COMMIT;
# So this event group starts after 4+2+4+3=13 events. Or 16 in row-based.
BEGIN;
INSERT INTO t2 VALUES (20);
INSERT INTO t2 VALUES (21);
INSERT INTO t2 VALUES (22);
INSERT INTO t2 VALUES (23);
INSERT INTO t2 VALUES (24);
INSERT INTO t2 VALUES (25);
INSERT INTO t2 VALUES (26);
INSERT INTO t2 VALUES (27);
INSERT INTO t2 VALUES (28);
INSERT INTO t2 VALUES (29);
COMMIT;
--save_master_pos
--connection server_2
--source include/start_slave.inc
--sync_with_master
SELECT * FROM t2 ORDER BY a;
--connection server_1
SET GLOBAL debug_dbug= @old_debug;
# Clean up.
--connection server_1
DROP TABLE t1, t2;
--source include/rpl_end.inc