mirror of
https://github.com/MariaDB/server.git
synced 2025-02-09 23:24:11 +01:00
68938d2b42
The failing test case validates Seconds_Behind_Master for a delayed slave, while STOP SLAVE is executed during a delay. The test fixes initially added to the test (commitb04c857596
) added a table lock to ensure a transaction could not finish before validating the Seconds_Behind_Master field after SLAVE START, but did not address a possibility that the transaction could finish before running the STOP SLAVE command, which invalidates the validations for the rest of the test case. Specifically, this would result in 1) a timeout in “Waiting for table metadata lock” on the replica, which expects the transaction to retry after slave restart and hit a lock conflict on the locked tables (added inb04c857596
), and 2) that Seconds_Behind_Master should have increased, but did not. The failure can be reproduced by synchronizing the slave to the master before the MDEV-32265 echo statement (i.e. before the SLAVE STOP). This patch fixes the test by adding a mechanism to use DEBUG_SYNC to synchronize a MASTER_DELAY, rather than continually increase the duration of the delay each time the test fails on buildbot. This is to ensure that on slow machines, a delay does not pass before the test gets a chance to validate results. Additionally, it decreases overall test time because the test can continue immediately after validation, thereby bypassing the remainder of a full delay for each transaction.
266 lines
9.8 KiB
Text
266 lines
9.8 KiB
Text
#
|
|
# Ensure that Seconds_Behind_Master works correctly on the parallel replica.
|
|
#
|
|
--source include/have_log_bin.inc
|
|
--source include/have_debug.inc
|
|
--source include/have_debug_sync.inc
|
|
--source include/master-slave.inc
|
|
|
|
--echo #
|
|
--echo # MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
|
|
--echo #
|
|
|
|
# This test ensures that after a delayed parallel slave has idled, i.e.
|
|
# executed everything in its relay log, the next event group that the SQL
|
|
# thread reads from the relay log will immediately be used in the
|
|
# Seconds_Behind_Master. In particular, it ensures that the calculation for
|
|
# Seconds_Behind_Master is based on the timestamp of the new transaction,
|
|
# rather than the last committed transaction.
|
|
#
|
|
# Note that the test doesn't actually use the value of MASTER_DELAY, but
|
|
# rather uses debug_sync to coordinate the end of the delay. This is to ensure
|
|
# that on slow machines, a delay doesn't pass before the test gets a chance to
|
|
# validate results. Additionally, it lets us continue testing after validation
|
|
# so we don't have to wait out a full delay unnecessarily. The debug_sync point
|
|
# is enabled via sql_delay_by_debug_sync, which will delay transactions based
|
|
# only on GTID events, so only one synchronization is needed per transaction.
|
|
#
|
|
|
|
--connection slave
|
|
--source include/stop_slave.inc
|
|
--let $save_dbug= `SELECT @@GLOBAL.debug_dbug`
|
|
--let $save_parallel_mode= `SELECT @@GLOBAL.slave_parallel_mode`
|
|
set @@GLOBAL.slave_parallel_mode= CONSERVATIVE;
|
|
set @@GLOBAL.debug_dbug= "d,negate_clock_diff_with_master,sql_delay_by_debug_sync";
|
|
|
|
--let $master_delay= 1
|
|
--eval change master to master_delay=$master_delay, master_use_gtid=Slave_Pos
|
|
--source include/start_slave.inc
|
|
|
|
--connection master
|
|
--let insert_ctr= 0
|
|
create table t1 (a int);
|
|
create table t2 (a int);
|
|
--source include/save_master_gtid.inc
|
|
--connection slave
|
|
--let $slave_sync_method= gtid
|
|
--let $num_event_groups= 2
|
|
--source include/sync_with_master_sql_delay_debug_sync.inc
|
|
|
|
--echo #
|
|
--echo # Pt 1) Ensure SBM is updated immediately upon arrival of the next event
|
|
|
|
--connection master
|
|
--echo # Sleep 2 to allow a buffer between events for SBM check
|
|
sleep 2;
|
|
|
|
--let $ts_trx_before_ins= `SELECT UNIX_TIMESTAMP()`
|
|
--eval insert into t1 values ($insert_ctr)
|
|
--inc $insert_ctr
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
|
|
--echo # Waiting for transaction to arrive on slave and begin SQL Delay..
|
|
set debug_sync= "now WAIT_FOR at_sql_delay";
|
|
|
|
--echo # Validating SBM is updated on event arrival..
|
|
--let $sbm_trx1_arrive= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
|
--let $seconds_since_idling= `SELECT UNIX_TIMESTAMP() - $ts_trx_before_ins`
|
|
if (`SELECT $sbm_trx1_arrive > ($seconds_since_idling + 1)`)
|
|
{
|
|
--echo # SBM was $sbm_trx1_arrive yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
|
|
--die Seconds_Behind_Master should reset after idling
|
|
}
|
|
--echo # ..done
|
|
|
|
|
|
--echo # MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
|
|
--echo # delaying a transaction; then when the reciprocal START SLAVE occurs,
|
|
--echo # if the event is still to be delayed, SBM should resume accordingly
|
|
|
|
--connection server_2
|
|
--echo # Ensure the kill from STOP SLAVE will be received before continuing the
|
|
--echo # SQL thread
|
|
set debug_sync="after_thd_awake_kill SIGNAL slave_notified_of_kill";
|
|
--send STOP SLAVE
|
|
|
|
--connection slave
|
|
set debug_sync= "now WAIT_FOR slave_notified_of_kill";
|
|
set debug_sync= "now SIGNAL continue_sql_thread";
|
|
|
|
--connection server_2
|
|
--reap
|
|
--source include/wait_for_slave_to_stop.inc
|
|
set debug_sync="RESET";
|
|
|
|
--echo # Lock t1 on slave to ensure the event can't finish (and thereby update
|
|
--echo # Seconds_Behind_Master) so slow running servers don't accidentally
|
|
--echo # catch up to the master before checking SBM.
|
|
--connection server_2
|
|
LOCK TABLES t1 WRITE;
|
|
|
|
--source include/start_slave.inc
|
|
|
|
--connection slave
|
|
|
|
--echo # SQL delay has no impact for the rest of the test case, so ignore it
|
|
--let $slave_sync_method= none
|
|
--source include/sync_with_master_sql_delay_debug_sync.inc
|
|
|
|
--echo # Waiting for replica to get blocked by the table lock
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock';
|
|
--source include/wait_condition.inc
|
|
|
|
--echo # Sleeping 1s to increment SBM
|
|
sleep 1;
|
|
|
|
--echo # Ensuring Seconds_Behind_Master increases after sleeping..
|
|
--let $sbm_trx1_after_1s_sleep= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
|
if (`SELECT $sbm_trx1_after_1s_sleep <= $sbm_trx1_arrive`)
|
|
{
|
|
--echo # ..failed
|
|
--die Seconds_Behind_Master did not increase after sleeping, but should have
|
|
}
|
|
--echo # ..done
|
|
|
|
--connection server_2
|
|
UNLOCK TABLES;
|
|
|
|
--source include/sync_with_master_gtid.inc
|
|
|
|
--echo #
|
|
--echo # Pt 2) If the worker threads have not entered an idle state, ensure
|
|
--echo # following events do not update SBM
|
|
|
|
--connection slave
|
|
LOCK TABLES t1 WRITE;
|
|
|
|
--connection master
|
|
--echo # Sleep 2 to allow a buffer between events for SBM check
|
|
sleep 2;
|
|
--let $ts_trxpt2_before_ins= `SELECT UNIX_TIMESTAMP()`
|
|
--eval insert into t1 values ($insert_ctr)
|
|
--inc $insert_ctr
|
|
--echo # Sleep 3 to create gap between events
|
|
sleep 3;
|
|
--eval insert into t1 values ($insert_ctr)
|
|
--inc $insert_ctr
|
|
--let $ts_trx_after_ins= `SELECT UNIX_TIMESTAMP()`
|
|
--source include/save_master_pos.inc
|
|
|
|
--connection slave
|
|
--echo # Wait for first transaction to complete SQL delay and begin execution..
|
|
--let $slave_sync_method= none
|
|
--source include/sync_with_master_sql_delay_debug_sync.inc
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock%' AND command LIKE 'Slave_Worker';
|
|
--source include/wait_condition.inc
|
|
|
|
--echo # Wait for second transaction to complete SQL delay..
|
|
--source include/sync_with_master_sql_delay_debug_sync.inc
|
|
|
|
--echo # Validate SBM calculation doesn't use the second transaction because worker threads shouldn't have gone idle..
|
|
--let $sbm_after_trx_no_idle= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
|
--let $timestamp_trxpt2_arrive= `SELECT UNIX_TIMESTAMP()`
|
|
if (`SELECT $sbm_after_trx_no_idle < $timestamp_trxpt2_arrive - $ts_trx_after_ins - 1`)
|
|
{
|
|
--let $cmpv= `SELECT $timestamp_trxpt2_arrive - $ts_trx_after_ins`
|
|
--echo # SBM $sbm_after_trx_no_idle was more recent than time since last transaction ($cmpv seconds)
|
|
--die Seconds_Behind_Master should not have used second transaction timestamp
|
|
}
|
|
--let $seconds_since_idling= `SELECT ($timestamp_trxpt2_arrive - $ts_trxpt2_before_ins)`
|
|
--echo # ..and that SBM wasn't calculated using prior committed transactions
|
|
if (`SELECT $sbm_after_trx_no_idle > ($seconds_since_idling + 1)`)
|
|
{
|
|
--echo # SBM was $sbm_after_trx_no_idle yet shouldn't have been larger than $seconds_since_idling + 1 (for possible negative clock_diff_with_master)
|
|
--die Seconds_Behind_Master calculation should not have used prior committed transaction
|
|
}
|
|
--echo # ..done
|
|
|
|
--connection slave
|
|
UNLOCK TABLES;
|
|
--source include/sync_with_master.inc
|
|
|
|
--echo # Cleanup
|
|
--source include/stop_slave.inc
|
|
set debug_sync= "RESET";
|
|
set @@GLOBAL.debug_dbug= "-d,sql_delay_by_debug_sync";
|
|
--eval CHANGE MASTER TO master_delay=0
|
|
--source include/start_slave.inc
|
|
|
|
|
|
--echo #
|
|
--echo # MDEV-30619: Parallel Slave SQL Thread Can Update Seconds_Behind_Master with Active Workers
|
|
--echo #
|
|
|
|
# This test ensures that a parallel slave will not update
|
|
# Seconds_Behind_Master after the SQL Thread has idled if the worker threads
|
|
# are still executing events. To test this, two events are executed on the
|
|
# primary with $sleep seconds in-between them. Once the second event begins
|
|
# execution on the replica, Seconds_Behind_Master is queried to ensure it
|
|
# reflects the value of the first transaction, rather than the second.
|
|
|
|
--connection slave
|
|
--echo # Ensure the replica is fully idle before starting transactions
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Slave has read all relay log%';
|
|
--source include/wait_condition.inc
|
|
--let $wait_condition= SELECT count(*)=2 FROM information_schema.processlist WHERE state LIKE 'Waiting for work from SQL thread';
|
|
--source include/wait_condition.inc
|
|
|
|
--echo # Lock t1 on slave so the first received transaction does not complete/commit
|
|
LOCK TABLES t1 WRITE;
|
|
|
|
--connection master
|
|
--let $ts_t1_before_master_ins= `SELECT UNIX_TIMESTAMP()`
|
|
--eval insert into t1 values ($insert_ctr)
|
|
--inc $insert_ctr
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
--echo # Waiting for first transaction to begin..
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for table metadata lock';
|
|
--source include/wait_condition.inc
|
|
|
|
--let $sbm_1= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
|
|
|
--connection master
|
|
--let $sleep = 2
|
|
--echo # Sleep $sleep sec to create a gap between events
|
|
sleep $sleep;
|
|
INSERT INTO t2 VALUES (1);
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
--echo # Waiting for second transaction to begin..
|
|
--let $wait_condition= SELECT count(*) FROM information_schema.processlist WHERE state LIKE 'Waiting for prior transaction to start commit%';
|
|
--source include/wait_condition.inc
|
|
|
|
--let $sbm_2= query_get_value(SHOW SLAVE STATUS, Seconds_Behind_Master, 1)
|
|
|
|
if (`SELECT $sbm_1 + $sleep > $sbm_2`)
|
|
{
|
|
--echo # Seconds_Behind_Masters: $sbm_1 $sbm_2_0
|
|
--die Two successive Seconds_Behind_Master timestamps must be separated by the sleep parameter value or greater
|
|
}
|
|
|
|
--connection slave
|
|
UNLOCK TABLES;
|
|
--source include/sync_with_master_gtid.inc
|
|
|
|
|
|
--echo #
|
|
--echo # Cleanup
|
|
|
|
--connection master
|
|
DROP TABLE t1, t2;
|
|
--source include/save_master_gtid.inc
|
|
|
|
--connection slave
|
|
--source include/sync_with_master_gtid.inc
|
|
--source include/stop_slave.inc
|
|
--eval set @@GLOBAL.debug_dbug= "$save_dbug"
|
|
--evalp set @@GLOBAL.slave_parallel_mode= "$save_parallel_mode"
|
|
--source include/start_slave.inc
|
|
|
|
--source include/rpl_end.inc
|
|
--echo # End of rpl_parallel_sbm.test
|