MDEV-33327: rpl_seconds_behind_master_spike Sensitive to IO Thread Stop Position

rpl.rpl_seconds_behind_master_spike uses the DEBUG_SYNC mechanism to
count how many format descriptor events (FDEs) have been executed,
to attempt to pause on a specific relay log FDE after executing
transactions. However, depending on when the IO thread is stopped,
it can send an extra FDE before sending the transactions, forcing
the test to pause before executing any transactions, resulting in a
table not existing, that is attempted to be read for COUNT.

This patch fixes this by no longer counting FDEs, but rather by
programmatically waiting until the SQL thread has executed the
transaction and then automatically activating the DEBUG_SYNC point
to trigger at the next relay log FDE.
This commit is contained in:
Brandon Nesterenko 2024-01-29 15:17:57 -07:00
parent c768ac6208
commit e4f221a5f2
3 changed files with 17 additions and 15 deletions

View file

@ -3,7 +3,8 @@ include/master-slave.inc
connection slave;
include/stop_slave.inc
SET @save_dbug= @@GLOBAL.debug_dbug;
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde,negate_clock_diff_with_master";
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
include/start_slave.inc
# Future events must be logged at least 2 seconds after
# the slave starts
@ -15,11 +16,6 @@ insert into t1 values (1);
# event in its relay log
flush logs;
connection slave;
# Ignore FDEs that happen before the CREATE/INSERT commands
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
# On the next FDE, the slave should have the master CREATE/INSERT events
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
select count(*)=1 from t1;

View file

@ -27,7 +27,8 @@
--connection slave
--source include/stop_slave.inc
SET @save_dbug= @@GLOBAL.debug_dbug;
SET @@global.debug_dbug="+d,pause_sql_thread_on_fde,negate_clock_diff_with_master";
SET @@global.debug_dbug="+d,pause_sql_thread_on_relay_fde_after_trans";
SET @@global.debug_dbug="+d,negate_clock_diff_with_master";
--source include/start_slave.inc
--let $sleep_time=2
@ -46,12 +47,6 @@ insert into t1 values (1);
flush logs;
--connection slave
--echo # Ignore FDEs that happen before the CREATE/INSERT commands
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
SET DEBUG_SYNC='now SIGNAL sql_thread_continue';
--echo # On the next FDE, the slave should have the master CREATE/INSERT events
SET DEBUG_SYNC='now WAIT_FOR paused_on_fde';
select count(*)=1 from t1;

View file

@ -4301,6 +4301,15 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
{
Gtid_log_event *gev= static_cast<Gtid_log_event *>(ev);
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF(
"pause_sql_thread_on_relay_fde_after_trans",
{
DBUG_SET("-d,pause_sql_thread_on_relay_fde_after_trans");
DBUG_SET("+d,pause_sql_thread_on_next_relay_fde");
});
#endif
/*
For GTID, allocate a new sub_id for the given domain_id.
The sub_id must be allocated in increasing order of binlog order.
@ -4451,12 +4460,14 @@ static int exec_relay_log_event(THD* thd, Relay_log_info* rli,
#endif /* WITH_WSREP */
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF(
"pause_sql_thread_on_fde",
if (ev && typ == FORMAT_DESCRIPTION_EVENT) {
"pause_sql_thread_on_next_relay_fde",
if (ev && typ == FORMAT_DESCRIPTION_EVENT &&
((Format_description_log_event *) ev)->is_relay_log_event()) {
DBUG_ASSERT(!debug_sync_set_action(
thd,
STRING_WITH_LEN(
"now SIGNAL paused_on_fde WAIT_FOR sql_thread_continue")));
DBUG_SET("-d,pause_sql_thread_on_next_relay_fde");
});
#endif