mariadb/mysql-test/suite/galera/t/galera_vote_during_ist.test
Alexey Yurchenko 77391482bd MDEV-34998: master can stop responding after cluster vote to evict a node
Additional corrections: there is a natural race between closing connection
to cluster in case of applying error and finishing the IST and sometimes
IST finishes and tries to send JOIN message over a closed connection.
This does not affect the correctness of the test or node behavior.
Added error message suppression.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
2025-04-14 18:20:25 +02:00

165 lines
4.7 KiB
Text

#
# Test a case where a joiner encounters an error during IST
# Instead of voting it should assume error and bail out.
#
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
# Make sure that the test is operating on the right version of galera library.
--let $galera_version=26.4.19
source ../wsrep/include/check_galera_version.inc;
--let $node_1=node_1
--let $node_2=node_2
--let $node_3=node_3
--let $node_4=node_4
--source ../include/auto_increment_offset_save.inc
# create table t1 and procedure p1 to generate wirtesets
--connection node_1
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
DELIMITER |;
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
DELIMITER ;|
CALL p1(130);
--connection node_4
--echo Shutting down server 4...
--let $node_4_server_id= `SELECT @@server_id`
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
--let $node_4_pid_file= `SELECT @@pid_file`
--source include/shutdown_mysqld.inc
# Wait for node #4 to leave cluster
--let $members = 3
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--echo Server 4 left the cluster
# Create some writesets for IST
--connection node_1
CALL p1(130);
# Create a writeset that node 4 won't be able to apply by creating a table
# that won't be present in the replication stream
--connection node_1
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_2
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--connection node_3
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
# This should cause error during IST
INSERT INTO t2 VALUES (DEFAULT);
# make sure nodes 1,2,3 progress far enough for commit cut update
CALL p1(130);
--connection node_1
# prepare to stop SST donor thread when it receives a request from starting node #4
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
--echo Restarting server 4
# Need to use this form instead of start_mysqld.inc because the latter is blocking
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
--echo Wait for server 1 to become a donor
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
--echo Server 1 got SST request from server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
#
# After this point node #4 shall proceed to IST and bail out
#
--echo Waiting for server 4 to leave the cluster
--let $members = 3
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--connection node_4
--echo Server 4 left the cluster, killing it...
# Kill the connected server
--exec echo "wait" > $node_4_expect_file_name
--let KILL_NODE_PIDFILE = $node_4_pid_file
--perl
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
my $mysqld_pid = `cat $pid_filename`;
chomp($mysqld_pid);
system("kill -9 $mysqld_pid");
exit(0);
EOF
--echo Killed server 4...
--source include/wait_until_disconnected.inc
--echo Restarting server 4...
--source include/start_mysqld.inc
--source include/galera_wait_ready.inc
# Confirm node #4 has rejoined
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
# Confirm that all is good and all nodes have identical data
--connection node_1
SELECT count(*) AS expect1_390 FROM t1;
SELECT count(*) AS expect1_1 FROM t2;
--connection node_2
SELECT count(*) AS expect2_390 FROM t1;
SELECT count(*) AS expect2_1 FROM t2;
--connection node_3
SELECT count(*) AS expect3_390 FROM t1;
SELECT count(*) AS expect3_1 FROM t2;
--connection node_4
SELECT count(*) AS expect4_390 FROM t1;
SELECT count(*) AS expect4_1 FROM t2;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;
CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table ");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
CALL mtr.add_suppression("Failed to apply write set");
CALL mtr.add_suppression("Sending JOIN failed: -103");
CALL mtr.add_suppression("Failed to JOIN the cluster after SST");
--source ../include/auto_increment_offset_restore.inc