mariadb/mysql-test/suite/rpl/include/rpl_start_stop_slave.test
Kristian Nielsen 0a68328673 MDEV-34705: Binlog-in-engine: Protect against concurrent RESET MASTER and dump threads
This is actually an existing problem in the old binlog implementation, and
this patch is applicable to old binlog also. The problem is that RESET
MASTER can run concurrently with binlog dump threads / connected slaves.
This will remove the binlog from under the feet of the reader, which can
cause all sorts of strange behaviour.

This patch fixes the problem by disallowing to run RESET MASTER when dump
threads (or other RESET MASTER or SHOW BINARY LOGS) are running. An error is
thrown in this case, user must stop slaves and/or kill dump threads to make
the RESET MASTER go through. A slave that connects in the middle of RESET
MASTER will wait for it to complete.

Fix a lot of test cases to kill any lingering dump threads before doing
RESET MASTER, mostly just by sourcing include/kill_binlog_dump_threads.inc.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2025-06-11 11:32:10 +02:00

219 lines
5.4 KiB
Text

# Slow test, don't run during staging part
source include/not_staging.inc;
#
# Bug#6148 ()
#
# Let the master do lots of insertions
connection master;
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
connection slave;
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
connection master;
create table t1(n int);
sync_slave_with_master;
stop slave;
--source include/wait_for_slave_to_stop.inc
connection master;
let $1=2500;
disable_query_log;
while ($1)
{
eval insert into t1 values($1);
dec $1;
}
enable_query_log;
save_master_pos;
connection slave;
start slave;
let $wait_condition= SELECT COUNT(*) > 0 FROM t1;
source include/wait_condition.inc;
stop slave io_thread;
start slave io_thread;
source include/wait_for_slave_to_start.inc;
sync_with_master;
connection master;
drop table t1;
sync_slave_with_master;
#
# Bug#38205 Row-based Replication (RBR) causes inconsistencies...
# Bug#319 if while a non-transactional slave is replicating a transaction...
#
# Verifying that STOP SLAVE does not interrupt excution of a group
# execution of events if the group can not roll back.
# Killing the sql thread continues to provide a "hard" stop (the
# part II, moved to the bugs suite as it's hard to make it
# deterministic with KILL).
#
#
# Part I. The being stopped sql thread finishes first the current group of
# events if the group contains an event on a non-transaction table.
connection master;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
sync_slave_with_master;
connection slave;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
insert into t2m values (1); # non-ta update to process
insert into t1i values (5); # to block at. to be played with stopped
commit;
connection slave;
# slave sql thread must be locked out by the conn `slave' explicit lock
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
connection slave1;
let $count= 1;
let $table= t2m;
source include/wait_until_rows_count.inc;
send stop slave;
connection slave;
rollback; # release the sql thread
connection slave1;
reap;
source include/wait_for_slave_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is *not* running: $sql_status ***
connection master;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave;
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--echo *** the prove: the stopped slave has finished the current transaction ***
--disable_query_log
select count(*) as five from t1i;
eval select $pos1_master - $pos1_slave as zero;
eval select $pos1_slave > $pos0_slave as one;
--enable_query_log
source include/start_slave.inc;
# clean-up
connection master;
drop table t1i, t2m;
sync_slave_with_master;
--echo #
--echo # Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
--echo #
--connection master
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (a INT );
sync_slave_with_master;
--connection slave1
--echo # lock table for synchronization
LOCK TABLES t1 WRITE;
--connection master
--echo # insert into the table
INSERT INTO t1 SELECT SLEEP(4);
--connection slave
--echo # Slave: wait for the insert
let $wait_condition=
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE = "Waiting for table metadata lock"
AND INFO = "INSERT INTO t1 SELECT SLEEP(4)";
--source include/wait_condition.inc
--echo # send slave stop
--send STOP SLAVE
--connection slave1
--echo # wait for stop slave
let $wait_condition=
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE INFO = "STOP SLAVE";
--source include/wait_condition.inc
--echo # Slave1: unlock the table
UNLOCK TABLES;
--connection slave
--echo # wait for the slave to stop
--reap
--source include/wait_for_slave_to_stop.inc
--echo # Start slave again
--source include/start_slave.inc
--echo # Clean up
--connection master
DROP TABLE t1;
sync_slave_with_master;
#
# bug#3593869-64035 attempt to read a member of event_coordinates
# referenced by NULL pointer crashes server.
# Testing how out of valid range position value is handled with an error.
#
--connection slave
--source include/stop_slave.inc
--connection master
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
let $master_pos= query_get_value(SHOW MASTER STATUS, Position, 1);
let $master_pos= `SELECT $master_pos + 1`;
--connection slave
--replace_regex /[0-9]+/MASTER_POS/
eval CHANGE MASTER TO master_log_pos=$master_pos, master_use_gtid=no;
START SLAVE;
# ER_MASTER_FATAL_ERROR_READING_BINLOG 1236
--let $slave_param=Last_IO_Errno
--let $slave_param_value=1236
--source include/wait_for_slave_param.inc
--let $slave_field_result_replace= / at [0-9]*/ at XXX/
--let $status_items= Last_IO_Errno, Last_IO_Error
--source include/show_slave_status.inc
--source include/stop_slave_sql.inc
RESET SLAVE;
--connection master
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
# Slave is stopped by bug#3593869-64035 tests.