mariadb/mysql-test/suite/galera/t/galera_vote_during_ist.test
Jan Lindström d7457b4076 MDEV-36628 : galera_vote_during_ist test failed
Test changes only. Added wait_conditions to wait for expected
database state.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
2025-05-20 12:30:40 +02:00

204 lines
6.2 KiB
Text

#
# Test a case where a joiner encounters an error during IST
# Instead of voting it should assume error and bail out.
#
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
# Make sure that the test is operating on the right version of galera library.
--let $galera_version=26.4.19
source ../wsrep/include/check_galera_version.inc;
--let $node_1=node_1
--let $node_2=node_2
--let $node_3=node_3
--let $node_4=node_4
--source ../include/auto_increment_offset_save.inc
# create table t1 and procedure p1 to generate wirtesets
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);
DELIMITER |;
CREATE PROCEDURE p1(IN max INT)
BEGIN
DECLARE i INT;
DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;
SET i = 0;
WHILE i < max DO
INSERT IGNORE INTO t1 VALUES (DEFAULT);
SET i = i + 1;
END WHILE;
END|
DELIMITER ;|
CALL p1(130);
--connection node_4
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 130 FROM t1;
--source include/wait_condition.inc
--echo Shutting down server 4...
--let $node_4_server_id= `SELECT @@server_id`
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
--let $node_4_pid_file= `SELECT @@pid_file`
--source include/shutdown_mysqld.inc
--source include/wait_until_disconnected.inc
# Wait for node #4 to leave cluster
--let $members = 3
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--echo Server 4 left the cluster
# Create some writesets for IST
--connection node_1
CALL p1(130);
# Create a writeset that node 4 won't be able to apply by creating a table
# that won't be present in the replication stream
--connection node_1
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 260 FROM t1;
--source include/wait_condition.inc
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 260 FROM t1;
--source include/wait_condition.inc
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc
# This should cause error during IST
INSERT INTO t2 VALUES (DEFAULT);
# make sure nodes 1,2,3 progress far enough for commit cut update
CALL p1(130);
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc
--connection node_1
# prepare to stop SST donor thread when it receives a request from starting node #4
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";
--echo Restarting server 4
# Need to use this form instead of start_mysqld.inc because the latter is blocking
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name
--echo Wait for server 1 to become a donor
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
--echo Server 1 got SST request from server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
#
# After this point node #4 shall proceed to IST and bail out
#
--echo Waiting for server 4 to leave the cluster
--let $members = 3
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--connection node_4
--echo Server 4 left the cluster, killing it...
# Kill the connected server
--exec echo "wait" > $node_4_expect_file_name
--let KILL_NODE_PIDFILE = $node_4_pid_file
--perl
my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
my $mysqld_pid = `cat $pid_filename`;
chomp($mysqld_pid);
system("kill -9 $mysqld_pid");
exit(0);
EOF
--echo Killed server 4...
--source include/wait_until_disconnected.inc
--connection node_1
--source include/wait_until_ready.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment';
--let $wait_condition_on_error_output = SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment';
--source include/wait_condition_with_debug.inc
--connection node_4
--echo Restarting server 4...
--source include/start_mysqld.inc
--source include/galera_wait_ready.inc
# Confirm node #4 has rejoined
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
# Confirm that all is good and all nodes have identical data
--connection node_1
SELECT count(*) AS expect1_390 FROM t1;
SELECT count(*) AS expect1_1 FROM t2;
--connection node_2
SELECT count(*) AS expect2_390 FROM t1;
SELECT count(*) AS expect2_1 FROM t2;
--connection node_3
SELECT count(*) AS expect3_390 FROM t1;
SELECT count(*) AS expect3_1 FROM t2;
--connection node_4
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1 FROM t2;
--source include/wait_condition.inc
SELECT count(*) AS expect4_390 FROM t1;
SELECT count(*) AS expect4_1 FROM t2;
DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;
CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table ");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
CALL mtr.add_suppression("Failed to apply write set");
CALL mtr.add_suppression("Sending JOIN failed: -103");
CALL mtr.add_suppression("Failed to JOIN the cluster after SST");
--source ../include/auto_increment_offset_restore.inc