mariadb/mysql-test/suite/galera/r/galera_restart_replica.result
Jan Lindström 277968aa4c MDEV-31413 : Node has been dropped from the cluster on Startup / Shutdown with async replica
There was two related problems:

(1) Galera node that is defined as a slave to async MariaDB
master at restart might do SST (state stransfer) and
part of that it will copy mysql.gtid_slave_pos table.
Problem is that updates on that table are not replicated
on a cluster. Therefore, table from donor that is not
slave is copied and joiner looses gtid position it was
and start executing events from wrong position of the binlog.
This incorrect position could break replication and
causes node to be dropped and requiring user action.

(2) Slave sql thread might start executing events before
galera is ready (wsrep_ready=ON) and that could also
cause node to be dropped from the cluster.

In this fix we enable replication of mysql.gtid_slave_pos
table on a cluster. In this way all nodes in a cluster
will know gtid slave position and even after SST joiner
knows correct gtid position to start.

Furthermore, we wait galera to be ready before slave
sql thread executes any events to prevent too early
execution.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
2023-08-08 03:25:56 +02:00

122 lines
2.7 KiB
Text

connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
create user repl@'%' identified by 'repl';
grant all on *.* to repl@'%';
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
connection node_1;
connection node_2;
connection node_2;
START SLAVE;
connection node_3;
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
# Verify that graceful shutdown succeeds.
# Force SST
connection node_1;
# Waiting until node_2 is not part of cluster anymore
connection node_2;
# Start node_2 again
¤ Wait until node_2 is back on cluster
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_3;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_3;
drop table t1;
connection node_2;
connection node_1;
connection node_3;
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
# Verify that graceful shutdown succeeds.
# Force SST
connection node_1;
# Waiting until node_2 is not part of cluster anymore
connection node_3;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_2;
# Start node_2 again
¤ Wait until node_2 is back on cluster
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_3;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_3;
drop table t1;
connection node_2;
connection node_1;
connection node_2;
STOP SLAVE;
RESET SLAVE ALL;
connection node_3;
RESET MASTER;
connection node_1;
disconnect node_3;
disconnect node_2;
disconnect node_1;
# End of test