From a06d81ff3f3124abf7d889fcd3b9de98f9f6a4dc Mon Sep 17 00:00:00 2001 From: Brandon Nesterenko Date: Thu, 21 Nov 2024 11:28:59 -0700 Subject: [PATCH] MDEV-35477: rpl_semi_sync_no_missed_ack_after_add_slave fails after MDEV-35109 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MTR test rpl_semi_sync_no_missed_ack_after_add_slave fails on buildbot after the preparatory commit for MDEV-35109 (5290fa043b1) which changed a sleep to a debug_sync point. The problem is that the debug_sync point would time-out on a slave while waiting to enter the logic to send an ACK reply. More specifically, where the test config is a primary with two replicas, and the test waits on one of the replicas to start sending an ACK, if the other replica was able to receive the event and respond with an ACK before the binlog dump thread of the timing-out server would prepare to send event, it wouldn't set the SEMI_SYNC_NEED_ACK flag, and the replica wouldn't even try to respond with an ACK. Fix is to use debug_sync for both replicas such that both replicas are held before sending their ack, so one can’t temporarily disable semi-sync for the other before it receives the transaction. --- ..._sync_no_missed_ack_after_add_slave.result | 24 ++++++++++---- ...mi_sync_no_missed_ack_after_add_slave.test | 32 +++++++++++++------ 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result b/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result index 4b21435d2b5..8034314c90f 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_no_missed_ack_after_add_slave.result @@ -3,7 +3,7 @@ connection server_1; set @old_enabled= @@global.rpl_semi_sync_master_enabled; set @old_timeout= @@global.rpl_semi_sync_master_timeout; set global rpl_semi_sync_master_enabled= 1; -set global rpl_semi_sync_master_timeout= 500; +set global rpl_semi_sync_master_timeout= 2000; connection server_2; include/stop_slave.inc set @old_enabled= @@global.rpl_semi_sync_slave_enabled; @@ -14,15 +14,26 @@ include/start_slave.inc connection server_3; include/stop_slave.inc set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; set global rpl_semi_sync_slave_enabled= 1; +set global debug_dbug="+d,simulate_delay_semisync_slave_reply"; include/start_slave.inc # Ensure primary recognizes both replicas are semi-sync connection server_1; -connection server_1; -create table t1 (a int); +connection default; +create table t1 (a int);; +# Ensure both slaves get the event with the need_ack flag set (i.e. one +# slave shouldn't be able to receive the event and send an ACK before +# the dump thread for the other server prepares the event to send). +connection server_3; +set debug_sync= "now wait_for io_thd_at_slave_reply"; +connection server_2; +set debug_sync= "now wait_for io_thd_at_slave_reply"; +connection server_3; +set debug_sync= "now signal io_thd_do_reply"; +connection default; connection server_2; # Verifying server_2 did not send ACK -set debug_sync= "now wait_for io_thd_at_slave_reply"; connection server_3; # Verifying server_3 did send ACK connection server_1; @@ -33,12 +44,13 @@ set debug_sync= "now signal io_thd_do_reply"; # # Cleanup connection server_2; +include/stop_slave.inc set global rpl_semi_sync_slave_enabled= @old_enabled; set global debug_dbug= @old_dbug; -include/stop_slave.inc connection server_3; -set global rpl_semi_sync_slave_enabled= @old_enabled; include/stop_slave.inc +set global rpl_semi_sync_slave_enabled= @old_enabled; +set global debug_dbug= @old_dbug; connection server_1; set global rpl_semi_sync_master_enabled= @old_enabled; set global rpl_semi_sync_master_timeout= @old_timeout; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test index 2a74bfc80e5..9d8f87b4345 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_no_missed_ack_after_add_slave.test @@ -26,7 +26,7 @@ set @old_enabled= @@global.rpl_semi_sync_master_enabled; set @old_timeout= @@global.rpl_semi_sync_master_timeout; set global rpl_semi_sync_master_enabled= 1; -set global rpl_semi_sync_master_timeout= 500; +set global rpl_semi_sync_master_timeout= 2000; # 2s --connection server_2 --source include/stop_slave.inc @@ -39,7 +39,9 @@ set global debug_dbug="+d,simulate_delay_semisync_slave_reply"; --connection server_3 --source include/stop_slave.inc set @old_enabled= @@global.rpl_semi_sync_slave_enabled; +set @old_dbug= @@global.debug_dbug; set global rpl_semi_sync_slave_enabled= 1; +set global debug_dbug="+d,simulate_delay_semisync_slave_reply"; --source include/start_slave.inc --echo # Ensure primary recognizes both replicas are semi-sync @@ -55,13 +57,26 @@ if (`SELECT strcmp("$master_ss_status", "ON") != 0`) --die rpl_semi_sync_master_status should be ON to start } ---connection server_1 +--connection default --let $init_master_yes_tx= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_master_yes_tx', Value, 1) -create table t1 (a int); +--send create table t1 (a int); + +--echo # Ensure both slaves get the event with the need_ack flag set (i.e. one +--echo # slave shouldn't be able to receive the event and send an ACK before +--echo # the dump thread for the other server prepares the event to send). +--connection server_3 +set debug_sync= "now wait_for io_thd_at_slave_reply"; +--connection server_2 +set debug_sync= "now wait_for io_thd_at_slave_reply"; +--connection server_3 +set debug_sync= "now signal io_thd_do_reply"; + +--connection default +--reap + --connection server_2 --echo # Verifying server_2 did not send ACK -set debug_sync= "now wait_for io_thd_at_slave_reply"; --let $slave1_sent_ack= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack', Value, 1) if (`SELECT $slave1_sent_ack`) { @@ -71,10 +86,6 @@ if (`SELECT $slave1_sent_ack`) --connection server_3 --echo # Verifying server_3 did send ACK ---let $status_var= Rpl_semi_sync_slave_send_ack ---let $status_var_comparsion= > ---let $status_var_value= 0 ---source include/wait_for_status_var.inc --let $slave2_sent_ack= query_get_value(SHOW STATUS LIKE 'rpl_semi_sync_slave_send_ack', Value, 1) if (`SELECT NOT $slave2_sent_ack`) { @@ -108,13 +119,14 @@ set debug_sync= "now signal io_thd_do_reply"; --echo # Cleanup --connection server_2 +--source include/stop_slave.inc set global rpl_semi_sync_slave_enabled= @old_enabled; set global debug_dbug= @old_dbug; ---source include/stop_slave.inc --connection server_3 -set global rpl_semi_sync_slave_enabled= @old_enabled; --source include/stop_slave.inc +set global rpl_semi_sync_slave_enabled= @old_enabled; +set global debug_dbug= @old_dbug; --connection server_1 set global rpl_semi_sync_master_enabled= @old_enabled;