mariadb/mysql-test/suite/galera/t/galera_restart_replica.test
Kristian Nielsen 0a68328673 MDEV-34705: Binlog-in-engine: Protect against concurrent RESET MASTER and dump threads
This is actually an existing problem in the old binlog implementation, and
this patch is applicable to old binlog also. The problem is that RESET
MASTER can run concurrently with binlog dump threads / connected slaves.
This will remove the binlog from under the feet of the reader, which can
cause all sorts of strange behaviour.

This patch fixes the problem by disallowing to run RESET MASTER when dump
threads (or other RESET MASTER or SHOW BINARY LOGS) are running. An error is
thrown in this case, user must stop slaves and/or kill dump threads to make
the RESET MASTER go through. A slave that connects in the middle of RESET
MASTER will wait for it to complete.

Fix a lot of test cases to kill any lingering dump threads before doing
RESET MASTER, mostly just by sourcing include/kill_binlog_dump_threads.inc.

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
2025-06-11 11:32:10 +02:00

287 lines
10 KiB
Text

#
# Test Galera as a replica to a MariaDB async replication
#
# MariaDB
# primary ---async replication--->galera node_2 (replica)<----galera replication---> galera node1
#
# Test outline:
#
# - Create user for async replication
# - Create table and some data in primary
# - Verify that table and data is replicated to galera nodes
# - Verify that mysql.gtid_slave_pos has some rows in all Galera nodes
# - Verify that gtid_slave_pos, gtid_binlog_pos and gtid_current_pos are
# same in all Galera nodes
# - Verify that we can shutdown and restart Galera replica (node #2)
# - Verify that gtid_slave_pos, gtid_binlog_pos and gtid_current_pos are
# same in all Galera nodes
# - Verify that mysql.gtid_slave_pos table has limited amount of rows
# - Veruft that ddl works (drop table)
#
# Similar test is done so that new rows are added to table in
# primary while async replica (node #2) is down.
#
# The galera/galera_2node_slave.cnf describes the setup of the nodes
#
--source include/force_restart.inc
--source include/galera_cluster.inc
--source include/have_innodb.inc
# In this test we mark node #2 as replica
--connect replica, 127.0.0.1, root, , test, $NODE_MYPORT_2
# As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it
# we open the primary connection her
--connect primary, 127.0.0.1, root, , test, $NODE_MYPORT_3
create user repl@'%' identified by 'repl';
grant all on *.* to repl@'%';
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
--let $node_1 = node_1
--let $node_2 = replica
--let $node_3 = primary
--source include/auto_increment_offset_save.inc
--connection replica
--disable_query_log
--eval CHANGE MASTER TO master_host='127.0.0.1', master_user='repl', master_password='repl', master_ssl_verify_server_cert=0, master_port=$NODE_MYPORT_3, master_use_gtid=slave_pos;
--enable_query_log
START SLAVE;
--connection primary
CREATE TABLE t1 (id bigint auto_increment primary key, msg varchar(100)) engine=innodb;
--echo # Intentionally generate 1k GTID-events
--let $inserts=1000
--let $count=0
--disable_query_log
while($count < $inserts)
{
--eval insert into t1 values (NULL,'test1')
--inc $count
}
--enable_query_log
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection replica
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection replica
--echo # Verify that graceful shutdown succeeds in replica.
--source include/shutdown_mysqld.inc
--echo # Force SST
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # Waiting until replica is not part of cluster anymore
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--connection replica
--echo # Start replica again
--source include/start_mysqld.inc
--echo # Wait until replica is back on cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection primary
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
#
# Cleanup
#
drop table t1;
--connection replica
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
#
# Case 2 : While slave is down add writes to master
#
--connection primary
CREATE TABLE t1 (id bigint auto_increment primary key, msg varchar(100)) engine=innodb;
--echo # Intentionally generate 1k GTID-events
--let $inserts=1000
--let $count=0
--disable_query_log
while($count < $inserts)
{
--eval insert into t1 values (NULL,'test1')
--inc $count
}
--enable_query_log
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection replica
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_1000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
--connection replica
--echo # Verify that graceful shutdown succeeds in replica.
--source include/shutdown_mysqld.inc
--echo # Force SST
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # Waiting until replica is not part of cluster anymore
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--echo # Add writes to primary
--connection primary
--echo # Intentionally generate 1k GTID-events
--let $inserts=1000
--let $count=0
--disable_query_log
while($count < $inserts)
{
--eval insert into t1 values (NULL,'test1')
--inc $count
}
--enable_query_log
SELECT COUNT(*) AS EXPECT_2000 FROM t1;
--connection replica
--echo # Start replica again
--source include/start_mysqld.inc
--echo # Wait until replica is back on cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 2000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_2000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 2000 FROM t1;
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) < 1000 FROM mysql.gtid_slave_pos;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) < 1000 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT @@gtid_slave_pos,@@gtid_binlog_pos,@@gtid_current_pos;
SELECT COUNT(*) AS EXPECT_2000 FROM t1;
--connection primary
SELECT COUNT(*) AS EXPECT_2000 FROM t1;
#
# Cleanup
#
drop table t1;
--connection replica
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection replica
STOP SLAVE;
RESET SLAVE ALL;
--connection primary
--source include/kill_binlog_dump_threads.inc
RESET MASTER;
--source include/auto_increment_offset_restore.inc
--connection node_1
--disconnect primary
--disconnect replica
--source include/galera_end.inc
--echo # End of test