mariadb/mysql-test/suite/rpl/r/rpl_parallel_sbm.result
Brandon Nesterenko 01fafd45eb MDEV-35939: rpl.rpl_parallel_sbm: "Slave_last_event_time is not equal to Master_last_event_time"
The test rpl.rpl_parallel_sbm would fail because
Slave_last_event_time would not match Master_last_event_time after
syncing with the master (i.e. via sync_with_master_gtid.inc). This
happens because the timing statistics are updated after updating
gtid_slave_pos. That is, gtid_slave_pos is updated with the
transaction commit, whereas the timing statistics are updated when
the relay log position is updated (i.e. after the commit). This is
by design. For the test, this means there is a small amount of time
after sync_with_master_gtid.inc where the SHOW SLAVE STATUS output
lags behind the data state of the slave. This would cause the test
to fail if comparing Slave_last_event_time to Master_last_event_time
during this period, because Slave_last_event_time would not yet
reflect the latest committed transaction.

The fix is to add a wait_condition before the comparison to wait for
the SHOW SLAVE STATUS statistics to be updated. The actual check is
to wait for Seconds_Behind_Master == 0, because the slave should be
idle at this point, which forces the value to be 0.

The test failure can be reproduced with the following patch:

diff --git a/sql/rpl_gtid.cc b/sql/rpl_gtid.cc
index db5c26b6237..b346a32b573 100644
--- a/sql/rpl_gtid.cc
+++ b/sql/rpl_gtid.cc
@@ -300,6 +300,7 @@ rpl_slave_state::update(uint32 domain_id, uint32 server_id, uint64 sub_id,
   mysql_mutex_lock(&LOCK_slave_state);
   res= update_nolock(domain_id, server_id, sub_id, seq_no, hton, rgi);
   mysql_mutex_unlock(&LOCK_slave_state);
+  sleep(1);
   return res;
 }
2025-01-26 09:17:23 -07:00

140 lines
5 KiB
Text

include/master-slave.inc
[connection master]
#
# MDEV-29639: Seconds_Behind_Master is incorrect for Delayed, Parallel Replicas
#
connection slave;
include/stop_slave.inc
set @@GLOBAL.slave_parallel_mode= CONSERVATIVE;
set @@GLOBAL.debug_dbug= "d,negate_clock_diff_with_master,sql_delay_by_debug_sync";
change master to master_delay=1, master_use_gtid=Slave_Pos;
include/start_slave.inc
connection master;
create table t1 (a int);
create table t2 (a int);
include/save_master_gtid.inc
connection slave;
include/sync_with_master_sql_delay_debug_sync.inc
set debug_sync= "now WAIT_FOR at_sql_delay";
set debug_sync= "now SIGNAL continue_sql_thread";
set debug_sync= "now WAIT_FOR at_sql_delay";
set debug_sync= "now SIGNAL continue_sql_thread";
#
# Pt 1) Ensure SBM is updated immediately upon arrival of the next event
connection master;
# Sleep 2 to allow a buffer between events for SBM check
insert into t1 values (0);
include/save_master_gtid.inc
connection slave;
# Waiting for transaction to arrive on slave and begin SQL Delay..
set debug_sync= "now WAIT_FOR at_sql_delay";
# Validating SBM is updated on event arrival..
# ..done
# MDEV-33856: New definition for Seconds_Behind_Master
# Validating Master_last_event_time is updated on event arrival..
# ..done
# Validating Slave_last_event_time is still from the last transaction..
# ..done
# MDEV-32265. At time of STOP SLAVE, if the SQL Thread is currently
# delaying a transaction; then when the reciprocal START SLAVE occurs,
# if the event is still to be delayed, SBM should resume accordingly
connection server_2;
# Ensure the kill from STOP SLAVE will be received before continuing the
# SQL thread
set debug_sync="after_thd_awake_kill SIGNAL slave_notified_of_kill";
STOP SLAVE;
connection slave;
set debug_sync= "now WAIT_FOR slave_notified_of_kill";
set debug_sync= "now SIGNAL continue_sql_thread";
connection server_2;
include/wait_for_slave_to_stop.inc
set debug_sync="RESET";
# Lock t1 on slave to ensure the event can't finish (and thereby update
# Seconds_Behind_Master) so slow running servers don't accidentally
# catch up to the master before checking SBM.
connection server_2;
LOCK TABLES t1 WRITE;
include/start_slave.inc
connection slave;
# SQL delay has no impact for the rest of the test case, so ignore it
include/sync_with_master_sql_delay_debug_sync.inc
set debug_sync= "now WAIT_FOR at_sql_delay";
set debug_sync= "now SIGNAL continue_sql_thread";
# Waiting for replica to get blocked by the table lock
# Sleeping 1s to increment SBM
# Ensuring Seconds_Behind_Master increases after sleeping..
# ..done
connection server_2;
UNLOCK TABLES;
include/sync_with_master_gtid.inc
# MDEV-33856: New definition for Seconds_Behind_Master
# Waiting for slave timing statistics to update ..
# Ensuring Slave_last_event_time is now up-to-date once event is executed
# ..done
#
# Pt 2) If the worker threads have not entered an idle state, ensure
# following events do not update SBM
connection slave;
LOCK TABLES t1 WRITE;
connection master;
# Sleep 2 to allow a buffer between events for SBM check
insert into t1 values (1);
# Sleep 3 to create gap between events
insert into t1 values (2);
include/save_master_pos.inc
connection slave;
# Wait for first transaction to complete SQL delay and begin execution..
include/sync_with_master_sql_delay_debug_sync.inc
set debug_sync= "now WAIT_FOR at_sql_delay";
set debug_sync= "now SIGNAL continue_sql_thread";
# Wait for second transaction to complete SQL delay..
include/sync_with_master_sql_delay_debug_sync.inc
set debug_sync= "now WAIT_FOR at_sql_delay";
set debug_sync= "now SIGNAL continue_sql_thread";
# Validate SBM calculation doesn't use the second transaction because worker threads shouldn't have gone idle..
# ..and that SBM wasn't calculated using prior committed transactions
# ..done
connection slave;
UNLOCK TABLES;
include/wait_for_slave_param.inc [Relay_Master_Log_File]
include/wait_for_slave_param.inc [Exec_Master_Log_Pos]
# Cleanup
include/stop_slave.inc
set debug_sync= "RESET";
set @@GLOBAL.debug_dbug= "-d,sql_delay_by_debug_sync";
CHANGE MASTER TO master_delay=0;
include/start_slave.inc
#
# MDEV-30619: Parallel Slave SQL Thread Can Update Seconds_Behind_Master with Active Workers
#
connection slave;
# Ensure the replica is fully idle before starting transactions
# Lock t1 on slave so the first received transaction does not complete/commit
LOCK TABLES t1 WRITE;
connection master;
insert into t1 values (3);
include/save_master_gtid.inc
connection slave;
# Waiting for first transaction to begin..
connection master;
# Sleep 2 sec to create a gap between events
INSERT INTO t2 VALUES (1);
include/save_master_gtid.inc
connection slave;
# Waiting for second transaction to begin..
connection slave;
UNLOCK TABLES;
include/sync_with_master_gtid.inc
#
# Cleanup
connection master;
DROP TABLE t1, t2;
include/save_master_gtid.inc
connection slave;
include/sync_with_master_gtid.inc
include/stop_slave.inc
set @@GLOBAL.debug_dbug= "";
set @@GLOBAL.slave_parallel_mode= "$save_parallel_mode";
include/start_slave.inc
include/rpl_end.inc
# End of rpl_parallel_sbm.test