mariadb/mysql-test/extra/rpl_tests/rpl_start_stop_slave.test
Davi Arnaut 1d43e72a0b Bug#56096: STOP SLAVE hangs if executed in parallel with user sleep
The root of the problem is that to interrupt a slave SQL thread
wait, the STOP SLAVE implementation uses thd->awake(THD::NOT_KILLED).
This appears as a spurious wakeup (e.g. from a sleep on a
condition variable) to the code that the slave SQL thread is
executing at the time of the STOP. If the code is not written
to be spurious-wakeup safe, unexpected behavior can occur. For
the reported case, this problem led to an infinite loop around
the interruptible_wait() function in item_func.cc (SLEEP()
function implementation).  The loop was not being properly
restarted and, consequently, would not come to an end. Since the
SLEEP function sleeps on a timed event in order to be killable
and to perform periodic checks until the requested time has
elapsed, the spurious wake up was causing the requested sleep
time to be reset every two seconds.

The solution is to calculate the requested absolute time only
once and to ensure that the thread only sleeps until this
time is elapsed. In case of a spurious wake up, the sleep is
restarted using the previously calculated absolute time. This
restores the behavior present in previous releases. If a slave
thread is executing a SLEEP function, a STOP SLAVE statement
will wait until the time requested in the sleep function
has elapsed.

mysql-test/extra/rpl_tests/rpl_start_stop_slave.test:
  Add test case for Bug#56096.
mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result:
  Add test case result for Bug#56096.
sql/item_func.cc:
  Reorganize interruptible_wait into a class so that the absolute
  time can be preserved across calls to the wait function. This
  allows the sleep to be properly restarted in the presence of
  spurious wake ups, including those generated by a STOP SLAVE.
2010-10-13 22:54:07 -03:00

181 lines
4.4 KiB
Text

#
# Bug#6148 ()
#
# Let the master do lots of insertions
connection master;
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
connection slave;
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT");
connection master;
create table t1(n int);
sync_slave_with_master;
stop slave;
--source include/wait_for_slave_to_stop.inc
connection master;
let $1=5000;
disable_query_log;
while ($1)
{
eval insert into t1 values($1);
dec $1;
}
enable_query_log;
save_master_pos;
connection slave;
start slave;
let $wait_condition= SELECT COUNT(*) > 0 FROM t1;
source include/wait_condition.inc;
stop slave io_thread;
start slave io_thread;
source include/wait_for_slave_to_start.inc;
sync_with_master;
connection master;
drop table t1;
sync_slave_with_master;
#
# Bug#38205 Row-based Replication (RBR) causes inconsistencies...
# Bug#319 if while a non-transactional slave is replicating a transaction...
#
# Verifying that STOP SLAVE does not interrupt excution of a group
# execution of events if the group can not roll back.
# Killing the sql thread continues to provide a "hard" stop (the
# part II, moved to the bugs suite as it's hard to make it
# deterministic with KILL).
#
#
# Part I. The being stopped sql thread finishes first the current group of
# events if the group contains an event on a non-transaction table.
connection master;
create table t1i(n int primary key) engine=innodb;
create table t2m(n int primary key) engine=myisam;
begin;
insert into t1i values (1);
insert into t1i values (2);
insert into t1i values (3);
commit;
sync_slave_with_master;
connection slave;
begin;
insert into t1i values (5);
connection master;
let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
begin;
insert into t1i values (4);
insert into t2m values (1); # non-ta update to process
insert into t1i values (5); # to block at. to be played with stopped
commit;
connection slave;
# slave sql thread must be locked out by the conn `slave' explicit lock
let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--disable_query_log
eval select $pos0_master - $pos0_slave as zero;
--enable_query_log
connection slave1;
let $count= 1;
let $table= t2m;
source include/wait_until_rows_count.inc;
send stop slave;
connection slave;
rollback; # release the sql thread
connection slave1;
reap;
source include/wait_for_slave_to_stop.inc;
let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
--echo *** sql thread is *not* running: $sql_status ***
connection master;
let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
connection slave;
let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
--echo *** the prove: the stopped slave has finished the current transaction ***
--disable_query_log
select count(*) as five from t1i;
eval select $pos1_master - $pos1_slave as zero;
eval select $pos1_slave > $pos0_slave as one;
--enable_query_log
source include/start_slave.inc;
# clean-up
connection master;
drop table t1i, t2m;
sync_slave_with_master;
--echo #
--echo # Bug#56096 STOP SLAVE hangs if executed in parallel with user sleep
--echo #
--connection master
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
CREATE TABLE t1 (a INT );
sync_slave_with_master;
--connection slave1
--echo # Slave1: lock table for synchronization
LOCK TABLES t1 WRITE;
--connection master
--echo # Master: insert into the table
INSERT INTO t1 SELECT SLEEP(4);
--connection slave
--echo # Slave: wait for the insert
let $wait_condition=
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE STATE = "Waiting for table metadata lock"
AND INFO = "INSERT INTO t1 SELECT SLEEP(4)";
--source include/wait_condition.inc
--echo # Slave: send slave stop
--send STOP SLAVE
--connection slave1
--echo # Slave1: wait for stop slave
let $wait_condition=
SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.PROCESSLIST
WHERE INFO = "STOP SLAVE";
--source include/wait_condition.inc
--echo # Slave1: unlock the table
UNLOCK TABLES;
--connection slave
--echo # Slave: wait for the slave to stop
--reap
--source include/wait_for_slave_to_stop.inc
--echo # Start slave again
--source include/start_slave.inc
--echo # Clean up
--connection master
DROP TABLE t1;
sync_slave_with_master;
# End of tests