MDEV-31413 : Node has been dropped from the cluster on Startup / Shutdown with async replica

There was two related problems:

(1) Galera node that is defined as a slave to async MariaDB
master at restart might do SST (state stransfer) and
part of that it will copy mysql.gtid_slave_pos table.
Problem is that updates on that table are not replicated
on a cluster. Therefore, table from donor that is not
slave is copied and joiner looses gtid position it was
and start executing events from wrong position of the binlog.
This incorrect position could break replication and
causes node to be dropped and requiring user action.

(2) Slave sql thread might start executing events before
galera is ready (wsrep_ready=ON) and that could also
cause node to be dropped from the cluster.

In this fix we enable replication of mysql.gtid_slave_pos
table on a cluster. In this way all nodes in a cluster
will know gtid slave position and even after SST joiner
knows correct gtid position to start.

Furthermore, we wait galera to be ready before slave
sql thread executes any events to prevent too early
execution.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
Jan Lindström 2023-06-20 14:57:04 +03:00 committed by Julius Goryavsky
parent b54e4bf00b
commit 277968aa4c
12 changed files with 433 additions and 19 deletions

View file

@ -17,12 +17,15 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1;
LENGTH(@@global.gtid_binlog_state) > 1
1
connection node_1;
SELECT COUNT(*) AS EXPECT_1 FROM t1;
EXPECT_1
1
gtid_binlog_state_equal
0
connection node_2;
SELECT COUNT(*) AS EXPECT_0 FROM t1;
EXPECT_0
0
SELECT COUNT(*) AS EXPECT_1 FROM t1;
EXPECT_1
1
gtid_binlog_state_equal
0
#cleanup

View file

@ -0,0 +1,122 @@
connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
create user repl@'%' identified by 'repl';
grant all on *.* to repl@'%';
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
connection node_1;
connection node_2;
connection node_2;
START SLAVE;
connection node_3;
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
# Verify that graceful shutdown succeeds.
# Force SST
connection node_1;
# Waiting until node_2 is not part of cluster anymore
connection node_2;
# Start node_2 again
¤ Wait until node_2 is back on cluster
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_3;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_3;
drop table t1;
connection node_2;
connection node_1;
connection node_3;
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
EXPECT_10000
10000
connection node_2;
# Verify that graceful shutdown succeeds.
# Force SST
connection node_1;
# Waiting until node_2 is not part of cluster anymore
connection node_3;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_2;
# Start node_2 again
¤ Wait until node_2 is back on cluster
connection node_2;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_1;
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
EXPECT_1
1
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_3;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
EXPECT_20000
20000
connection node_3;
drop table t1;
connection node_2;
connection node_1;
connection node_2;
STOP SLAVE;
RESET SLAVE ALL;
connection node_3;
RESET MASTER;
connection node_1;
disconnect node_3;
disconnect node_2;
disconnect node_1;
# End of test

View file

@ -4,3 +4,4 @@
log-bin=mysqld-bin
log-slave-updates
binlog-format=ROW
wsrep-replicate-myisam=ON

View file

@ -44,12 +44,20 @@ SELECT LENGTH(@@global.gtid_binlog_state) > 1;
--let $wait_condition = SELECT COUNT(*) = 1 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) AS EXPECT_1 FROM t1;
--disable_query_log
--eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal;
--enable_query_log
--connection node_2
SELECT COUNT(*) AS EXPECT_0 FROM t1;
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 1 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) AS EXPECT_1 FROM t1;
--disable_query_log
--eval SELECT '$gtid_binlog_state_node1' = @@global.gtid_binlog_state AS gtid_binlog_state_equal;
@ -59,8 +67,6 @@ SELECT COUNT(*) AS EXPECT_0 FROM t1;
--connection node_3
DROP TABLE t1;
--sleep 1
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc

View file

@ -0,0 +1,20 @@
!include ../galera_2nodes_as_slave.cnf
[mysqld]
wsrep-debug=1
[mysqld.1]
server_id=15
wsrep_gtid_mode=1
wsrep_gtid_domain_id=16
gtid_domain_id=11
gtid_strict_mode=1
[mysqld.2]
skip-slave-start=OFF
server_id=15
wsrep_gtid_mode=1
wsrep_gtid_domain_id=16
gtid_domain_id=11
gtid_strict_mode=1

View file

@ -0,0 +1,212 @@
#
# Test Galera as a replica to a MySQL async replication
#
# The galera/galera_2node_slave.cnf describes the setup of the nodes
#
--source include/big_test.inc
--source include/force_restart.inc
--source include/galera_cluster.inc
--source include/have_sequence.inc
# As node #3 is not a Galera node, and galera_cluster.inc does not open connetion to it
# we open the node_3 connection here
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
create user repl@'%' identified by 'repl';
grant all on *.* to repl@'%';
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
--let $node_1 = node_1
--let $node_2 = node_2
--source include/auto_increment_offset_save.inc
--connection node_2
--disable_query_log
--eval CHANGE MASTER TO master_host='127.0.0.1', master_user='repl', master_password='repl', master_port=$NODE_MYPORT_3, master_use_gtid=slave_pos;
--enable_query_log
START SLAVE;
--connection node_3
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
--disable_query_log
INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000;
--enable_query_log
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_2
--echo # Verify that graceful shutdown succeeds.
--source include/shutdown_mysqld.inc
--echo # Force SST
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # Waiting until node_2 is not part of cluster anymore
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--connection node_2
--echo # Start node_2 again
--source include/start_mysqld.inc
--echo ¤ Wait until node_2 is back on cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready';
--source include/wait_condition.inc
--connection node_2
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_1
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_3
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
#
# Cleanup
#
--connection node_3
drop table t1;
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
#
# Case 2 : While slave is down add writes to master
#
--connection node_3
CREATE TABLE t1 (id bigint primary key, msg varchar(100)) engine=innodb;
--disable_query_log
INSERT INTO t1 SELECT seq, 'test' from seq_1_to_10000;
--enable_query_log
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--let $wait_condition = SELECT COUNT(*) = 10000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_10000 FROM t1;
--connection node_2
--echo # Verify that graceful shutdown succeeds.
--source include/shutdown_mysqld.inc
--echo # Force SST
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # Waiting until node_2 is not part of cluster anymore
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
# Add writes to master
--connection node_3
--disable_query_log
INSERT INTO t1 SELECT seq, 'test' from seq_20001_to_30000;
--enable_query_log
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
--connection node_2
--echo # Start node_2 again
--source include/start_mysqld.inc
--echo ¤ Wait until node_2 is back on cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 'Primary' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_status';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--let $wait_condition = SELECT VARIABLE_VALUE = 'ON' FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_ready';
--source include/wait_condition.inc
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 20000 FROM t1;
--source include/wait_condition.inc
SELECT COUNT(*) > 0 AS EXPECT_1 FROM mysql.gtid_slave_pos;
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
--connection node_3
SELECT COUNT(*) AS EXPECT_20000 FROM t1;
#
# Cleanup
#
--connection node_3
drop table t1;
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_1
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1';
--source include/wait_condition.inc
--connection node_2
STOP SLAVE;
RESET SLAVE ALL;
--connection node_3
RESET MASTER;
--connection node_1
--disconnect node_3
--source include/auto_increment_offset_restore.inc
--source include/galera_end.inc
--echo # End of test

View file

@ -28,6 +28,10 @@
#include "rpl_rli.h"
#include "slave.h"
#include "log_event.h"
#ifdef WITH_WSREP
#include "wsrep_mysqld.h" // wsrep_thd_is_local
#include "wsrep_trans_observer.h" // wsrep_start_trx_if_not_started
#endif
const LEX_CSTRING rpl_gtid_slave_state_table_name=
{ STRING_WITH_LEN("gtid_slave_pos") };
@ -690,10 +694,18 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
#ifdef WITH_WSREP
/*
Updates in slave state table should not be appended to galera transaction
writeset.
We should replicate local gtid_slave_pos updates to other nodes.
In applier we should not append them to galera writeset.
*/
thd->wsrep_ignore_table= true;
if (WSREP_ON_ && wsrep_thd_is_local(thd))
{
thd->wsrep_ignore_table= false;
wsrep_start_trx_if_not_started(thd);
}
else
{
thd->wsrep_ignore_table= true;
}
#endif
if (!in_transaction)
@ -859,9 +871,20 @@ rpl_slave_state::gtid_delete_pending(THD *thd,
#ifdef WITH_WSREP
/*
Updates in slave state table should not be appended to galera transaction
writeset.
We should replicate local gtid_slave_pos updates to other nodes.
In applier we should not append them to galera writeset.
*/
if (WSREP_ON_ && wsrep_thd_is_local(thd) &&
thd->wsrep_cs().state() != wsrep::client_state::s_none)
{
if (thd->wsrep_trx().active() == false)
{
if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID)
thd->set_query_id(next_query_id());
wsrep_start_transaction(thd, thd->wsrep_next_trx_id());
}
thd->wsrep_ignore_table= false;
}
thd->wsrep_ignore_table= true;
#endif

View file

@ -5484,6 +5484,8 @@ pthread_handler_t handle_slave_sql(void *arg)
mysql_mutex_unlock(&rli->data_lock);
#ifdef WITH_WSREP
wsrep_open(thd);
if (WSREP_ON_)
wsrep_wait_ready(thd);
if (wsrep_before_command(thd))
{
WSREP_WARN("Slave SQL wsrep_before_command() failed");

View file

@ -1,4 +1,4 @@
/* Copyright 2008-2022 Codership Oy <http://www.codership.com>
/* Copyright 2008-2023 Codership Oy <http://www.codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -3151,3 +3151,27 @@ bool wsrep_consistency_check(THD *thd)
{
return thd->wsrep_consistency_check == CONSISTENCY_CHECK_RUNNING;
}
// Wait until wsrep has reached ready state
void wsrep_wait_ready(THD *thd)
{
mysql_mutex_lock(&LOCK_wsrep_ready);
while(!wsrep_ready)
{
WSREP_INFO("Waiting to reach ready state");
mysql_cond_wait(&COND_wsrep_ready, &LOCK_wsrep_ready);
}
WSREP_INFO("ready state reached");
mysql_mutex_unlock(&LOCK_wsrep_ready);
}
void wsrep_ready_set(bool ready_value)
{
WSREP_DEBUG("Setting wsrep_ready to %d", ready_value);
mysql_mutex_lock(&LOCK_wsrep_ready);
wsrep_ready= ready_value;
// Signal if we have reached ready state
if (wsrep_ready)
mysql_cond_signal(&COND_wsrep_ready);
mysql_mutex_unlock(&LOCK_wsrep_ready);
}

View file

@ -1,4 +1,4 @@
/* Copyright 2008-2022 Codership Oy <http://www.codership.com>
/* Copyright 2008-2023 Codership Oy <http://www.codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -442,6 +442,8 @@ enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit);
wsrep::key wsrep_prepare_key_for_toi(const char* db, const char* table,
enum wsrep::key::type type);
void wsrep_wait_ready(THD *thd);
void wsrep_ready_set(bool ready_value);
#else /* !WITH_WSREP */
/* These macros are needed to compile MariaDB without WSREP support

View file

@ -1,4 +1,4 @@
/* Copyright 2010 Codership Oy <http://www.codership.com>
/* Copyright 2010-2023 Codership Oy <http://www.codership.com>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -22,8 +22,6 @@
#include "wsrep_api.h"
#include "wsrep/server_state.hpp"
my_bool wsrep_ready_set (my_bool x);
ssize_t wsrep_sst_prepare (void** msg);
wsrep_cb_status wsrep_sst_donate_cb (void* app_ctx,
void* recv_ctx,

View file

@ -342,6 +342,7 @@ void Wsrep_server_service::log_state_change(
case Wsrep_server_state::s_synced:
wsrep_ready= TRUE;
WSREP_INFO("Synchronized with group, ready for connections");
wsrep_ready_set(true);
/* fall through */
case Wsrep_server_state::s_joined:
case Wsrep_server_state::s_donor:
@ -349,16 +350,16 @@ void Wsrep_server_service::log_state_change(
break;
case Wsrep_server_state::s_connected:
wsrep_cluster_status= "non-Primary";
wsrep_ready= FALSE;
wsrep_ready_set(false);
wsrep_connected= TRUE;
break;
case Wsrep_server_state::s_disconnected:
wsrep_ready= FALSE;
wsrep_ready_set(false);
wsrep_connected= FALSE;
wsrep_cluster_status= "Disconnected";
break;
default:
wsrep_ready= FALSE;
wsrep_ready_set(false);
wsrep_cluster_status= "non-Primary";
break;
}