mariadb/mysql-test/suite/galera/t/galera_vote_during_ist.test

#
# Test a case where a joiner encounters an error during IST
# Instead of voting it should assume error and  bail out.
#

--source include/galera_cluster.inc
--source include/big_test.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc

# Make sure that the test is operating on the right version of galera library.
--let $galera_version=26.4.19
source ../wsrep/include/check_galera_version.inc;

--let $node_1=node_1
--let $node_2=node_2
--let $node_3=node_3
--let $node_4=node_4
--source ../include/auto_increment_offset_save.inc

# create table t1 and procedure p1 to generate wirtesets
--connection node_1

--let $members = 4
--source include/wsrep_wait_membership.inc

CREATE TABLE t1(pk INT AUTO_INCREMENT PRIMARY KEY);

DELIMITER |;
CREATE PROCEDURE p1(IN max INT)
BEGIN
  DECLARE i INT;
  DECLARE CONTINUE HANDLER FOR SQLEXCEPTION BEGIN END;

  SET i = 0;
  WHILE i < max DO
    INSERT IGNORE INTO t1 VALUES (DEFAULT);
    SET i = i + 1;
  END WHILE;
END|
DELIMITER ;|

CALL p1(130);

--connection node_4
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 130 FROM t1;
--source include/wait_condition.inc

--echo Shutting down server 4...
--let $node_4_server_id= `SELECT @@server_id`
--let $node_4_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$node_4_server_id.expect
--let $node_4_pid_file= `SELECT @@pid_file`
--source include/shutdown_mysqld.inc
--source include/wait_until_disconnected.inc

# Wait for node #4 to leave cluster
--let $members = 3
--connection node_1
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc
--echo Server 4 left the cluster

# Create some writesets for IST
--connection node_1
CALL p1(130);

# Create a writeset that node 4 won't be able to apply by creating a table
# that won't be present in the replication stream
--connection node_1
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc

--connection node_2
--let $wait_condition = SELECT COUNT(*) = 260 FROM t1;
--source include/wait_condition.inc
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc

--connection node_3
--let $wait_condition = SELECT COUNT(*) = 260 FROM t1;
--source include/wait_condition.inc
SET SESSION wsrep_on = OFF;
CREATE TABLE t2(pk INT AUTO_INCREMENT PRIMARY KEY);
SET SESSION wsrep_on = ON;
--source include/wait_until_ready.inc

# This should cause error during IST
INSERT INTO t2 VALUES (DEFAULT);

# make sure nodes 1,2,3 progress far enough for commit cut update
CALL p1(130);

--connection node_2
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc

--connection node_1
# prepare to stop SST donor thread when it receives a request from starting node #4
SET GLOBAL debug = "+d,sync.wsrep_sst_donor_after_donation";

--echo Restarting server 4
# Need to use this form instead of start_mysqld.inc because the latter is blocking
--exec echo "restart:$start_mysqld_params" > $node_4_expect_file_name

--echo Wait for server 1 to become a donor
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_sst_donor_after_donation_reached";
--echo Server 1 got SST request from server 4
SET SESSION DEBUG_SYNC = "now SIGNAL signal.wsrep_sst_donor_after_donation_continue";
SET GLOBAL debug = "";
SET DEBUG_SYNC='RESET';
#
# After this point node #4 shall proceed to IST and bail out
#

--echo Waiting for server 4 to leave the cluster
--let $members = 3
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc

--connection node_4
--echo Server 4 left the cluster, killing it...

# Kill the connected server
--exec echo "wait" > $node_4_expect_file_name
--let KILL_NODE_PIDFILE = $node_4_pid_file
--perl
        my $pid_filename = $ENV{'KILL_NODE_PIDFILE'};
        my $mysqld_pid = `cat $pid_filename`;
        chomp($mysqld_pid);
        system("kill -9 $mysqld_pid");
        exit(0);
EOF
--echo Killed server 4...
--source include/wait_until_disconnected.inc

--connection node_1
--source include/wait_until_ready.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Synced' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment';
--let $wait_condition_on_error_output = SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_state_comment';
--source include/wait_condition_with_debug.inc

--connection node_4
--echo Restarting server 4...
--source include/start_mysqld.inc
--source include/galera_wait_ready.inc


# Confirm node #4 has rejoined
--connection node_1
--let $members = 4
--source include/wsrep_wait_membership.inc
--connection node_2
--source include/wsrep_wait_membership.inc
--connection node_3
--source include/wsrep_wait_membership.inc

# Confirm that all is good and all nodes have identical data

--connection node_1
SELECT count(*) AS expect1_390 FROM t1;
SELECT count(*) AS expect1_1 FROM t2;

--connection node_2
SELECT count(*) AS expect2_390 FROM t1;
SELECT count(*) AS expect2_1 FROM t2;

--connection node_3
SELECT count(*) AS expect3_390 FROM t1;
SELECT count(*) AS expect3_1 FROM t2;

--connection node_4
--let $wait_condition = SELECT COUNT(*) = 390 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1 FROM t2;
--source include/wait_condition.inc
SELECT count(*) AS expect4_390 FROM t1;
SELECT count(*) AS expect4_1 FROM t2;

DROP TABLE t1;
DROP TABLE t2;
DROP PROCEDURE p1;

CALL mtr.add_suppression("BF applier thread=.+ failed to open_and_lock_tables for Table ");
CALL mtr.add_suppression("Event 3 Write_rows_v1 apply failed: 1146");
CALL mtr.add_suppression("Inconsistency detected: Failed on preordered");
CALL mtr.add_suppression("Failed to apply write set");
CALL mtr.add_suppression("Sending JOIN failed: -103");
CALL mtr.add_suppression("Failed to JOIN the cluster after SST");

--source ../include/auto_increment_offset_restore.inc