MDEV-35109 PREP: simulate_delay_semisync_slave_reply use debug_sync

This is a preparatory commit for MDEV-35109 to make its
testing code cleaner (and harden other tests too).

The DEBUG_DBUG point simulate_delay_semisync_slave_reply
up to this patch used my_sleep() to delay an ACK response,
but sleeps are prone to test failures on machines that
run tests when already having a heavy load (e.g. on
buildbot).

This patch changes this DEBUG_DBUG sleep to use DEBUG_SYNC
to coordinate exactly when a slave should send its reply,
which is safer and faster.

As DEBUG_SYNC can't be used while a server is shutting
down, to synchronize threads with SHUTDOWN WAIT FOR SLAVES
logic, we use and extend wait_for_pattern_in_file.inc to
wait for an informational error message in the logic to
indicate that the shutdown process has reached the
intended state (i.e. indicating that the shutdown has
been delayed to await semi-sync ACKs). Specifically, the
extensions are as follows:

 1. wait_for_pattern_in_file.inc is extended with parameter
    wait_for_pattern_count as a number that indicates the
    number of times a pattern should occur in the file before
    return control back to the calling script.

 2. search_for_pattern_in_file.inc is extended with parameter
    SEARCH_ABORT_IS_SUCCESS to inverse the error/success
    logic, so the SEARCH_ABORT condition can be used to
    indicate success, rather than error.
This commit is contained in:
Brandon Nesterenko 2024-11-01 13:13:07 -06:00
parent f2bb2ab58c
commit 5290fa043b
8 changed files with 206 additions and 24 deletions

View file

@ -18,6 +18,23 @@
# Optionally, SEARCH_ABORT can be set to "FOUND" or "NOT FOUND" and this
# will abort if the search result doesn't match the requested one.
#
# Optionally, SEARCH_ABORT_IS_SUCCESS can be set to inverse the error logic
# when the SEARCH_ABORT condition is met. The specific cases and behaviors
# become:
#
# 1) In the normal case (SEARCH_ABORT_IS_SUCCESS is NOT set), when the
# matching result (FOUND N or NOT FOUND) matches SEARCH_ABORT, exit in
# error (via die); otherwise, we assume success and print the matching
# result. This is used in the normal search_pattern_in_file.inc case,
# as well as wait_for_pattern_in_file.inc when searching that some
# pattern exists at all (i.e. a result of NOT FOUND will trigger die,
# so the script can continue waiting until the result is found, and
# finally print the result).
#
# 2) If SEARCH_ABORT_IS_SUCCESS is set, then we want to inverse the logic
# from case (1). That is, if the match result is SEARCH_ABORT, this
# is the success case, and we die for all other matching results.
#
# Optionally, SEARCH_OUTPUT can be set to control the format of output.
# Supported formats:
# - (default) : "FOUND n /pattern/ in FILE " or "NOT FOUND ..."
@ -82,14 +99,26 @@ perl;
$ENV{SEARCH_FILE} =~ s{^.*?([^/\\]+)$}{$1};
my $print_func= sub { print($_[0]); };
my $die_func= sub { die($_[0]); };
my $abort_func;
my $match_func;
if ($ENV{SEARCH_ABORT} and $ENV{SEARCH_ABORT_IS_SUCCESS}) {
$abort_func= \&$print_func;
$match_func= \&$die_func;
} else {
$abort_func= \&$die_func;
$match_func= \&$print_func;
}
if ($ENV{SEARCH_OUTPUT} eq "matches") {
foreach (@matches) {
print $_ . "\n";
}
}
elsif ($ENV{SEARCH_ABORT} and $res =~ /^$ENV{SEARCH_ABORT}/) {
die "$res /$search_pattern/ in $ENV{SEARCH_FILE}\n";
&$abort_func("$res /$search_pattern/ in $ENV{SEARCH_FILE}\n");
} else {
print "$res /$search_pattern/ in $ENV{SEARCH_FILE}\n";
&$match_func("$res /$search_pattern/ in $ENV{SEARCH_FILE}\n");
}
EOF

View file

@ -7,6 +7,7 @@
# ==== Usage ====
#
# [--let $timeout= NUMBER in seconds]
# [--let $wait_for_pattern_count= Optional NUMBER of occurences to wait for]
# For other parameters, check search_pattern_in_file.inc
--let $wait_save_keep_include_silent=$keep_include_silent
@ -24,8 +25,21 @@ if (!$_timeout)
}
}
if (!$wait_for_pattern_count)
{
# If we don't care about the number of occurrences of a pattern, use
# NOT FOUND as the abort case
let SEARCH_ABORT=NOT FOUND;
let SEARCH_ABORT_IS_SUCCESS= 0;
}
if ($wait_for_pattern_count)
{
# If we care about waiting for an exact number of occurrences, use that
let SEARCH_ABORT=FOUND $wait_for_pattern_count;
let SEARCH_ABORT_IS_SUCCESS= 1;
}
let $_timeout_counter=`SELECT $_timeout * 10`;
let SEARCH_ABORT=NOT FOUND;
let $_continue= 1;
disable_abort_on_error;
while ($_continue)

View file

@ -22,11 +22,14 @@ connection server_1;
create table t1 (a int);
connection server_2;
# Verifying server_2 did not send ACK
set debug_sync= "now wait_for io_thd_at_slave_reply";
connection server_3;
# Verifying server_3 did send ACK
connection server_1;
# Verifying master's semi-sync status is still ON (This failed pre-MDEV-32960 fixes)
# Verifying rpl_semi_sync_master_yes_tx incremented
connection server_2;
set debug_sync= "now signal io_thd_do_reply";
#
# Cleanup
connection server_2;

View file

@ -83,12 +83,27 @@ connection server_1;
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
connection server_2;
set debug_sync= "now wait_for io_thd_at_slave_reply";
connection server_3;
set debug_sync= "now wait_for io_thd_at_slave_reply";
connection server_1_con2;
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_2;
# Waitng for shutdown to be delayed..
include/wait_for_pattern_in_file.inc
FOUND 1 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_2;
set debug_sync= "now signal io_thd_do_reply";
connection server_3;
set debug_sync= "now signal io_thd_do_reply";
# Reaping transaction..
connection server_1;
ERROR HY000: Lost connection to server during query
# Check logs to ensure shutdown was delayed
FOUND 1 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_2;
# Reaping shutdown..
connection server_1_con2;
# Validate slave data is in correct state
connection server_2;
select count(*)=1 from t1;
@ -184,12 +199,19 @@ connection server_1;
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
connection server_1_con2;
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_2;
# Waitng for shutdown to be delayed..
include/wait_for_pattern_in_file.inc
FOUND 2 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
# Reaping transaction..
connection server_1;
ERROR HY000: Lost connection to server during query
# Check logs to ensure shutdown was delayed
FOUND 2 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_2;
# Reaping shutdown..
connection server_1_con2;
# Validate slave data is in correct state
connection server_2;
select count(*)=0 from t1;
@ -298,12 +320,23 @@ connection server_1;
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
connection server_3;
set debug_sync= "now wait_for io_thd_at_slave_reply";
connection server_1_con2;
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_2;
# Waitng for shutdown to be delayed..
include/wait_for_pattern_in_file.inc
FOUND 3 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_3;
set debug_sync= "now signal io_thd_do_reply";
# Reaping transaction..
connection server_1;
ERROR HY000: Lost connection to server during query
# Check logs to ensure shutdown was delayed
FOUND 3 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_2;
# Reaping shutdown..
connection server_1_con2;
# Validate slave data is in correct state
connection server_2;
select count(*)=0 from t1;
@ -412,12 +445,23 @@ connection server_1;
INSERT INTO t1 VALUES (1);
connection server_1_con2;
#-- Wait until master recognizes a connection is awaiting semi-sync ACK
connection server_3;
set debug_sync= "now wait_for io_thd_at_slave_reply";
connection server_1_con2;
#-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
connection server_2;
# Waitng for shutdown to be delayed..
include/wait_for_pattern_in_file.inc
FOUND 4 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_3;
set debug_sync= "now signal io_thd_do_reply";
# Reaping transaction..
connection server_1;
ERROR HY000: Lost connection to server during query
# Check logs to ensure shutdown was delayed
FOUND 4 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection server_2;
# Reaping shutdown..
connection server_1_con2;
# Validate slave data is in correct state
connection server_2;
select count(*)=0 from t1;
@ -501,15 +545,21 @@ insert into t1 values (2);
connection server_1;
# Wait for thd to begin semi-sync wait..
# ..done
connection server_2;
set debug_sync= "now wait_for io_thd_at_slave_reply";
disconnect con1;
connection default;
connection con2;
SHUTDOWN WAIT FOR ALL SLAVES;
# Ensure the primary waited for the ACK of the killed thread
# Waitng for shutdown to be delayed..
connection server_2;
include/wait_for_pattern_in_file.inc
FOUND 5 /Delaying shutdown to await semi-sync ACK/ in mysqld.1.err
connection default;
connection server_1;
connection con2;
connection server_2;
set debug_sync= "now signal io_thd_do_reply";
include/stop_slave.inc
connection server_3;
include/stop_slave.inc

View file

@ -61,6 +61,7 @@ create table t1 (a int);
--connection server_2
--echo # Verifying server_2 did not send ACK
set debug_sync= "now wait_for io_thd_at_slave_reply";
--let $slave1_sent_ack= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack', Value, 1)
if (`SELECT $slave1_sent_ack`)
{
@ -99,6 +100,9 @@ if (`SELECT $cur_master_yes_tx != ($init_master_yes_tx + 1)`)
--die rpl_semi_sync_master_yes_tx should have been incremented by primary
}
--connection server_2
set debug_sync= "now signal io_thd_do_reply";
--echo #
--echo # Cleanup

View file

@ -12,10 +12,20 @@
# replica after the shutdown
# server_3_expect_row_count (int) The number of rows expected on the second
# replica after the shutdown
# server_2_sync_slave_reply (bool) True if server_2_dbug is configured to use
# debug_sync to synchronize the slave reply
# (i.e. to pause before the reply)
# server_3_sync_slave_reply (bool) True if server_3_dbug is configured to use
# debug_sync to synchronize the slave reply
# (i.e. to pause before the reply)
#
--connection server_1
let $log_error_file= `SELECT @@GLOBAL.log_error`;
if (!$n_logged_delayed_shutdown_notes)
{
let $n_logged_delayed_shutdown_notes= 0;
}
--echo #--
--echo #-- Semi-sync Setup
@ -78,19 +88,66 @@ let $status_var= Rpl_semi_sync_master_wait_sessions;
let $status_var_value= 1;
source include/wait_for_status_var.inc;
if ($server_2_sync_slave_reply)
{
--connection server_2
set debug_sync= "now wait_for io_thd_at_slave_reply";
}
if ($server_3_sync_slave_reply)
{
--connection server_3
set debug_sync= "now wait_for io_thd_at_slave_reply";
}
--connection server_1_con2
--echo #-- Begin master shutdown
SHUTDOWN WAIT FOR ALL SLAVES;
--source include/wait_until_disconnected.inc
# Use server_2 to search error log because 1 is down
--connection server_2
--echo # Waitng for shutdown to be delayed..
# Increment the number of notes to find each test case to ensure the pattern is
# ours
--inc $n_logged_delayed_shutdown_notes
let SEARCH_FILE= $log_error_file;
let SEARCH_PATTERN=Delaying shutdown to await semi-sync ACK;
--let $wait_for_pattern_count= $n_logged_delayed_shutdown_notes
source include/wait_for_pattern_in_file.inc;
if (`SELECT $server_2_sync_slave_reply AND $server_2_expect_row_count`)
{
--connection server_2
set debug_sync= "now signal io_thd_do_reply";
}
if (`SELECT $server_3_sync_slave_reply AND $server_3_expect_row_count`)
{
--connection server_3
set debug_sync= "now signal io_thd_do_reply";
}
--echo # Reaping transaction..
--connection server_1
--error 2013
--reap
--source include/wait_until_disconnected.inc
--echo # Check logs to ensure shutdown was delayed
--let SEARCH_FILE=$log_error_file
--let SEARCH_PATTERN=Delaying shutdown to await semi-sync ACK
--source include/search_pattern_in_file.inc
# Timeout should be hit from prior reap
--connection server_2
if (`SELECT $server_2_sync_slave_reply AND NOT $server_2_expect_row_count`)
{
--connection server_2
set debug_sync= "now signal io_thd_do_reply";
}
if (`SELECT $server_3_sync_slave_reply AND NOT $server_3_expect_row_count`)
{
--connection server_3
set debug_sync= "now signal io_thd_do_reply";
}
--echo # Reaping shutdown..
--connection server_1_con2
--source include/wait_until_disconnected.inc
--echo # Validate slave data is in correct state
--connection server_2

View file

@ -113,7 +113,9 @@ while (`SELECT $i <= $slave_last`)
--echo # allowed timeout, the primary should delay killing the Ack_thread
--echo # until an ACK is received.
--echo #
--let server_2_sync_slave_reply=1
--let server_2_dbug= "+d,simulate_delay_semisync_slave_reply"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 1
@ -125,7 +127,9 @@ while (`SELECT $i <= $slave_last`)
--echo # the primary should delay killing the Ack_thread until the
--echo # timeout is reached.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=0
--let server_3_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let semisync_timeout= 500
--let server_2_expect_row_count= 0
@ -138,7 +142,9 @@ while (`SELECT $i <= $slave_last`)
--echo # primary should delay killing the Ack_thread until it receives an
--echo # ACK from the delayed slave.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
@ -156,7 +162,9 @@ while (`SELECT $i <= $slave_last`)
--echo # sent to kill an active connection. This test case validates that the
--echo # slave does not send a `QUIT` in this case.
--echo #
--let server_2_sync_slave_reply=0
--let server_2_dbug= "+d,corrupt_queue_event,delay_semisync_kill_connection_for_mdev_28141"
--let server_3_sync_slave_reply=1
--let server_3_dbug= "+d,simulate_delay_semisync_slave_reply"
--let semisync_timeout= 1600
--let server_2_expect_row_count= 0
@ -208,6 +216,9 @@ SET GLOBAL debug_dbug="+d,simulate_delay_semisync_slave_reply";
--source include/wait_condition.inc
--echo # ..done
--connection server_2
set debug_sync= "now wait_for io_thd_at_slave_reply";
--disconnect con1
--connection default
@ -217,12 +228,14 @@ EOF
--connection con2
SHUTDOWN WAIT FOR ALL SLAVES;
--source include/wait_until_disconnected.inc
--echo # Ensure the primary waited for the ACK of the killed thread
--let $SEARCH_PATTERN= Delaying shutdown to await semi-sync ACK
--let $SEARCH_FILE= $MYSQLTEST_VARDIR/log/mysqld.1.err
--source include/search_pattern_in_file.inc
--echo # Waitng for shutdown to be delayed..
--connection server_2
--inc $n_logged_delayed_shutdown_notes
let SEARCH_FILE= $log_error_file;
let SEARCH_PATTERN=Delaying shutdown to await semi-sync ACK;
--let $wait_for_pattern_count= $n_logged_delayed_shutdown_notes
source include/wait_for_pattern_in_file.inc;
--connection default
--source include/wait_until_disconnected.inc
@ -230,7 +243,11 @@ SHUTDOWN WAIT FOR ALL SLAVES;
--connection server_1
--source include/wait_until_disconnected.inc
--connection con2
--source include/wait_until_disconnected.inc
--connection server_2
set debug_sync= "now signal io_thd_do_reply";
--let $rpl_allow_error= 1
source include/stop_slave.inc;
--connection server_3

View file

@ -5079,8 +5079,16 @@ Stopping slave I/O thread due to out-of-memory error from master");
mi->semi_sync_reply_enabled &&
(mi->semi_ack & SEMI_SYNC_NEED_ACK))
{
DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply",
my_sleep(800000););
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF("simulate_delay_semisync_slave_reply", {
const char act[]= "now "
"signal io_thd_at_slave_reply "
"wait_for io_thd_do_reply";
DBUG_ASSERT(debug_sync_service);
DBUG_ASSERT(
!debug_sync_set_action(current_thd, STRING_WITH_LEN(act)));
};);
#endif
if (repl_semisync_slave.slave_reply(mi))
{
/*