MDEV-31905 GTID inconsistency

This commit fixes GTID inconsistency which was injected by mariabackup SST.
Donor node now writes new info file: donor_galera_info, which is streamed
along the mariabackup donation to the joiner node. The donor_galera_info
file contains both GTID and gtid domain_id, and joiner will use these to
initialize the GTID state.

Commit has new mtr test case: galera_3nodes.galera_gtid_consistency, which
exercises potentially harmful mariabackup SST scenarios. The test has also
scenario with IST joining.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
sjaakola 2023-11-21 15:43:11 +02:00 committed by Julius Goryavsky
parent 569381df83
commit c89f769f24
17 changed files with 692 additions and 15 deletions

View file

@ -1674,6 +1674,7 @@ ibx_copy_incremental_over_full()
NULL};
const char *sup_files[] = {"xtrabackup_binlog_info",
"xtrabackup_galera_info",
"donor_galera_info",
"xtrabackup_slave_info",
"xtrabackup_info",
"ib_lru_dump",

View file

@ -9,6 +9,7 @@
/* special files */
#define XTRABACKUP_SLAVE_INFO "xtrabackup_slave_info"
#define XTRABACKUP_GALERA_INFO "xtrabackup_galera_info"
#define XTRABACKUP_DONOR_GALERA_INFO "donor_galera_info"
#define XTRABACKUP_BINLOG_INFO "xtrabackup_binlog_info"
#define XTRABACKUP_INFO "xtrabackup_info"

View file

@ -1424,6 +1424,7 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{
char *state_uuid = NULL, *state_uuid55 = NULL;
char *last_committed = NULL, *last_committed55 = NULL;
char *domain_id = NULL, *domain_id55 = NULL;
bool result;
mysql_variable status[] = {
@ -1434,6 +1435,12 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
{NULL, NULL}
};
mysql_variable value[] = {
{"Wsrep_gtid_domain_id", &domain_id},
{"wsrep_gtid_domain_id", &domain_id55},
{NULL, NULL}
};
/* When backup locks are supported by the server, we should skip
creating xtrabackup_galera_info file on the backup stage, because
wsrep_local_state_uuid and wsrep_last_committed will be inconsistent
@ -1452,9 +1459,26 @@ write_galera_info(ds_ctxt *datasink, MYSQL *connection)
goto cleanup;
}
read_mysql_variables(connection, "SHOW VARIABLES LIKE 'wsrep%'", value, true);
if (domain_id == NULL && domain_id55 == NULL) {
msg("Warning: failed to get master wsrep state from SHOW VARIABLES.");
result = true;
goto cleanup;
}
result = datasink->backup_file_printf(XTRABACKUP_GALERA_INFO,
"%s:%s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55);
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
if (result)
{
result= datasink->backup_file_printf(XTRABACKUP_DONOR_GALERA_INFO,
"%s:%s %s\n", state_uuid ? state_uuid : state_uuid55,
last_committed ? last_committed : last_committed55,
domain_id ? domain_id : domain_id55);
}
if (result)
{
write_current_binlog_file(datasink, connection);

View file

@ -53,6 +53,7 @@ permission notice:
/*! Name of file where Galera info is stored on recovery */
#define XB_GALERA_INFO_FILENAME "xtrabackup_galera_info"
#define XB_GALERA_DONOR_INFO_FILENAME "donor_galera_info"
/***********************************************************************
Store Galera checkpoint info in the 'xtrabackup_galera_info' file, if that
@ -67,7 +68,7 @@ xb_write_galera_info(bool incremental_prepare)
long long seqno;
MY_STAT statinfo;
/* Do not overwrite existing an existing file to be compatible with
/* Do not overwrite an existing file to be compatible with
servers with older server versions */
if (!incremental_prepare &&
my_stat(XB_GALERA_INFO_FILENAME, &statinfo, MYF(0)) != NULL) {
@ -101,10 +102,11 @@ xb_write_galera_info(bool incremental_prepare)
seqno = wsrep_xid_seqno(&xid);
msg("mariabackup: Recovered WSREP position: %s:%lld\n",
uuid_str, (long long) seqno);
msg("mariabackup: Recovered WSREP position: %s:%lld domain_id: %lld\n",
uuid_str, (long long) seqno, (long long)wsrep_get_domain_id());
if (fprintf(fp, "%s:%lld", uuid_str, (long long) seqno) < 0) {
if (fprintf(fp, "%s:%lld %lld", uuid_str, (long long) seqno,
(long long)wsrep_get_domain_id()) < 0) {
die(
"could not write to " XB_GALERA_INFO_FILENAME

View file

@ -92,6 +92,7 @@ extern struct wsrep_service_st {
void (*wsrep_thd_kill_LOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_kill_UNLOCK_func)(const MYSQL_THD thd);
void (*wsrep_thd_set_wsrep_PA_unsafe_func)(MYSQL_THD thd);
uint32 (*wsrep_get_domain_id_func)();
} *wsrep_service;
#define MYSQL_SERVICE_WSREP_INCLUDED
@ -139,6 +140,7 @@ extern struct wsrep_service_st {
#define wsrep_thd_set_ignored_error(T,V) wsrep_service->wsrep_thd_set_ignored_error_func(T,V)
#define wsrep_report_bf_lock_wait(T,I) wsrep_service->wsrep_report_bf_lock_wait(T,I)
#define wsrep_thd_set_PA_unsafe(T) wsrep_service->wsrep_thd_set_PA_unsafe_func(T)
#define wsrep_get_domain_id(T) wsrep_service->wsrep_get_domain_id_func(T)
#else
#define MYSQL_SERVICE_WSREP_STATIC_INCLUDED
@ -241,5 +243,6 @@ extern "C" void wsrep_report_bf_lock_wait(const THD *thd,
unsigned long long trx_id);
/* declare parallel applying unsafety for the THD */
extern "C" void wsrep_thd_set_PA_unsafe(MYSQL_THD thd);
extern "C" uint32 wsrep_get_domain_id();
#endif
#endif /* MYSQL_SERVICE_WSREP_INCLUDED */

View file

@ -0,0 +1,5 @@
[rsync]
wsrep-sst-method=rsync
[mariabackup]
wsrep_sst_method=mariabackup

View file

@ -0,0 +1,4 @@
# The goal of including this file is to enable galera_sst_method combinations
# (see include/galera_sst_method.combinations)
--source include/have_innodb.inc

View file

@ -14,6 +14,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
@@wsrep_gtid_domain_id @@wsrep_node_name
100 node3
connection node_3;
connection node_1;
connection node_2;
connection node_1;
connection node_1;

View file

@ -0,0 +1,219 @@
connection node_2;
connection node_1;
connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3;
connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2;
set wsrep_sync_wait=0;
connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1;
CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int)
BEGIN
DECLARE current_num int;
SET current_num = 0;
WHILE current_num < repeat_count do
INSERT INTO t1(node, name) VALUES (node, UUID());
SET current_num = current_num + 1;
END WHILE;
END|
CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb;
# node_1
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_2;
# node_2
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_3;
# node_3
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2
connection node_1;
CALL insert_row('node1', 500);;
connection node_2;
CALL insert_row('node2', 500);;
connection node_3;
CALL insert_row('node3', 500);;
connection node_2;
# Shutdown node_2, force SST
connection node_2b;
# Wait until node_2 leaves cluster
connection node_1b;
connection node_1;
connection node_3;
connection node_1;
CALL insert_row('node1', 500);
connection node_3;
CALL insert_row('node3', 500);
CREATE TABLE t2(i int primary key) engine=innodb;
connection node_2;
# Restart node_2
# restart
connection node_1b;
# Wait until node_2 is back in cluster
# node2 has joined
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2503
# Shutdown node_3
connection node_3;
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1';
# Wait until node_3 leaves cluster
connection node_1b;
connection node_1;
CALL insert_row('node1', 50);
CREATE TABLE t3(i int primary key) engine=innodb;
connection node_3;
# Rejoin node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0';
connection node_1b;
# Wait until node_3 is back in cluster
# node3 has joined
connection node_1;
# GTID in node1
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_2;
# GTID in node2
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
connection node_3;
# GTID in node3
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
# One by one shutdown all nodes
connection node_3;
# shutdown node_3
connection node_2;
# wait until node_3 is out of cluster
# shutdown node_2
connection node_1;
# wait until node_2 is out of cluster
# shutdown node_1
# Bootstrap from node_1
connection node_1;
# restart: --wsrep_new_cluster
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2554
ANALYZE TABLE t2;
Table Op Msg_type Msg_text
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
CALL insert_row('node1', 100);;
# Restart node_2
connection node_2;
# restart
connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1;
set wsrep_sync_wait=0;
connection node_1c;
# wait until node_1 and node_2 are in cluster
connection node_2;
ALTER TABLE t2 ADD COLUMN (k int);
CALL insert_row('node2', 100);;
# Restart node_3
connection node_3;
# restart
connection node_1c;
# wait until all nodes are back in cluster
after cluster restart
connection node_2;
connection node_1;
connection node_1;
node1 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_2;
node2 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_3;
node3 GTID
show variables like 'wsrep_gtid_domain_id';
Variable_name Value
wsrep_gtid_domain_id 1111
show variables like '%gtid_binlog_pos%';
Variable_name Value
gtid_binlog_pos 1111-1-2756
connection node_1;
table size in node1
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
table size in node2
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_3;
table size in node3
SELECT COUNT(*) FROM t1;
COUNT(*)
2750
connection node_2;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
connection node_3;
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
# cleanup
connection node_1;
DROP PROCEDURE insert_row;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
connection node_3;
connection node_2;
disconnect node_3;
disconnect node_2b;
disconnect node_1b;
disconnect node_1c;

View file

@ -4,11 +4,13 @@
#
--source include/galera_cluster.inc
--source include/have_innodb.inc
--source include/galera_sst_method.inc
--source include/force_restart.inc
#
# Initially wsrep gtid domain id is 100
#
--connection node_1
select @@wsrep_gtid_domain_id,@@wsrep_node_name;
@ -26,6 +28,10 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--connection node_3
--source include/shutdown_mysqld.inc
--connection node_1
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--connection node_2
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
@ -36,6 +42,7 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--source include/wait_condition.inc
--source include/shutdown_mysqld.inc
--sleep 5
#
# Bootstrap from node_1 and change wsrep_gtid_domain_id to 200
@ -45,12 +52,11 @@ select @@wsrep_gtid_domain_id,@@wsrep_node_name;
--source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id';
#
# Restart node_2, expect that wsrep_gtid_domain_id has changed to 200
#
--connection node_2
--let $restart_parameters =
--let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
--source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id';

View file

@ -0,0 +1,35 @@
!include ../galera_3nodes.cnf
[mysqld.1]
wsrep-node-name="node1"
wsrep_gtid_domain_id=1111
gtid_domain_id=2
server_id=10999
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON
[mysqld.2]
wsrep-node-name="node2"
wsrep_gtid_domain_id=1112
gtid_domain_id=3
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON
[mysqld.3]
wsrep-node-name="node3"
wsrep_gtid_domain_id=1113
gtid_domain_id=4
wsrep_sst_auth="root:"
wsrep_sst_method=mariabackup
log_slave_updates=ON
log_bin=mariadb-bin-log
binlog-format=row
wsrep-gtid-mode=ON

View file

@ -0,0 +1,346 @@
--source include/galera_cluster.inc
--source include/big_test.inc
--source include/force_restart.inc
#
# Testing gtid consistency in 3 node cluster when nodes drop
# and join back to cluster.
# The tests verify that wsrep_gtid_domain_id and gtid_binlog_pos
# remains same across the cluster
# In the configuration, nodes have different wsrep_gtid_domain_id
# but all nodes are supposed to receive effective domain id
# from the bootstrap node (node_1), and use it
#
--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3
--connect node_2b, 127.0.0.1, root, , test, $NODE_MYPORT_2
set wsrep_sync_wait=0;
--connect node_1b, 127.0.0.1, root, , test, $NODE_MYPORT_1
set wsrep_sync_wait=0;
--connection node_1
DELIMITER |;
CREATE PROCEDURE insert_row (IN node varchar(10), IN repeat_count int)
BEGIN
DECLARE current_num int;
SET current_num = 0;
WHILE current_num < repeat_count do
INSERT INTO t1(node, name) VALUES (node, UUID());
SET current_num = current_num + 1;
END WHILE;
END|
DELIMITER ;|
CREATE TABLE t1 (id bigint not null primary key auto_increment, node VARCHAR(10), name VARCHAR(64)) ENGINE=innodb;
#
# report initial gtid positions after table t1 is created
#
--echo # node_1
show variables like '%gtid_binlog_pos%';
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc
--echo # node_2
show variables like '%gtid_binlog_pos%';
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 1 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'
--source include/wait_condition.inc
--echo # node_3
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: join node 2 by SST
##########################################
#
# start concurrent insert load and stop node2 while the load is on
#
--connection node_1
--send CALL insert_row('node1', 500);
--connection node_2
--send CALL insert_row('node2', 500);
--connection node_3
--send CALL insert_row('node3', 500);
#
# stop load to node 2 and shutdown the node, force SST
#
--connection node_2
--reap
--echo # Shutdown node_2, force SST
--connection node_2b
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--echo # Wait until node_2 leaves cluster
--connection node_1b
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# stop the remaining load to node 1 and 3
#
--connection node_1
--reap
--connection node_3
--reap
#
# some more inserts and DDL to nodes 1 and 3
# while node 2 is absent
#
--connection node_1
CALL insert_row('node1', 500);
--connection node_3
CALL insert_row('node3', 500);
CREATE TABLE t2(i int primary key) engine=innodb;
#
# restart node 2, should join by SST
#
--connection node_2
--echo # Restart node_2
--source include/start_mysqld.inc
--connection node_1b
--echo # Wait until node_2 is back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # node2 has joined
#
# check gtid positions in all nodes
#
--echo # GTID in node1
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo # GTID in node2
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo # GTID in node3
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: join node 3 by IST
##########################################
--echo # Shutdown node_3
--connection node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 1';
--echo # Wait until node_3 leaves cluster
--connection node_1b
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
#
# do some inserts and DDL to node 1
# while node 3 is absent
#
--connection node_1
CALL insert_row('node1', 50);
CREATE TABLE t3(i int primary key) engine=innodb;
#
# remove isolation in node 3, should join by IST
#
--connection node_3
--echo # Rejoin node_3
SET GLOBAL wsrep_provider_options = 'gmcast.isolate = 0';
--connection node_1b
--echo # Wait until node_3 is back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # node3 has joined
#
# check gtid positions in all nodes
#
--connection node_1
--echo # GTID in node1
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo # GTID in node2
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo # GTID in node3
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
###########################################
# scenario: restart full custer
##########################################
#
# stop all nodes, one by one
#
--echo # One by one shutdown all nodes
--connection node_3
--echo # shutdown node_3
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.3/data/grastate.dat
--connection node_2
--echo # wait until node_3 is out of cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # shutdown node_2
--source include/shutdown_mysqld.inc
--remove_file $MYSQLTEST_VARDIR/mysqld.2/data/grastate.dat
--connection node_1
--echo # wait until node_2 is out of cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo # shutdown node_1
--source include/shutdown_mysqld.inc
#
# bootstap cluster in order node1 - node2 - node3
# send some inserts and DDL after each node started
#
--sleep 5
--echo # Bootstrap from node_1
--connection node_1
--let $restart_parameters = --wsrep_new_cluster
--source include/start_mysqld.inc
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
ANALYZE TABLE t2;
--send CALL insert_row('node1', 100);
--echo # Restart node_2
--connection node_2
--let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
--source include/start_mysqld.inc
#
# connection node_1b may not be functional anymore, after node was
# shutdown, open node_1c for controlling node 1 state
#
--connect node_1c, 127.0.0.1, root, , test, $NODE_MYPORT_1
set wsrep_sync_wait=0;
--connection node_1c
--echo # wait until node_1 and node_2 are in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 2 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--connection node_2
ALTER TABLE t2 ADD COLUMN (k int);
--send CALL insert_row('node2', 100);
--echo # Restart node_3
--connection node_3
--let $restart_parameters =
--let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.3.expect
--source include/start_mysqld.inc
--connection node_1c
--echo # wait until all nodes are back in cluster
--let $wait_condition = SELECT VARIABLE_VALUE = 3 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_cluster_size';
--source include/wait_condition.inc
--echo after cluster restart
# stop load for nodes 1 and 2
--connection node_2
--reap
--connection node_1
--reap
#
# check gtid positions in all nodes
#
--connection node_1
--echo node1 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_2
--echo node2 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
--connection node_3
--echo node3 GTID
show variables like 'wsrep_gtid_domain_id';
show variables like '%gtid_binlog_pos%';
#
# check table size in all nodes
#
--connection node_1
--echo table size in node1
SELECT COUNT(*) FROM t1;
--connection node_2
--echo table size in node2
SELECT COUNT(*) FROM t1;
--connection node_3
--echo table size in node3
SELECT COUNT(*) FROM t1;
#
# cleanups
#
--connection node_2
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
--connection node_3
call mtr.add_suppression("WSREP: Ignoring server id for non bootstrap node");
call mtr.add_suppression("WSREP: Sending JOIN failed:.*");
call mtr.add_suppression("Sending JOIN failed:.*");
call mtr.add_suppression("WSREP: Failed to JOIN the cluster after SST.*");
--echo # cleanup
--connection node_1
DROP PROCEDURE insert_row;
DROP TABLE t1;
DROP TABLE t2;
DROP TABLE t3;
--connection node_3
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'
--source include/wait_condition.inc
--connection node_2
--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't2'
--source include/wait_condition.inc
--disconnect node_3
--disconnect node_2b
--disconnect node_1b
--disconnect node_1c

View file

@ -104,8 +104,10 @@ fi
DATA="$WSREP_SST_OPT_DATA"
INFO_FILE='xtrabackup_galera_info'
DONOR_INFO_FILE='donor_galera_info'
IST_FILE='xtrabackup_ist'
MAGIC_FILE="$DATA/$INFO_FILE"
DONOR_MAGIC_FILE="$DATA/$DONOR_INFO_FILE"
INNOAPPLYLOG="$DATA/mariabackup.prepare.log"
INNOMOVELOG="$DATA/mariabackup.move.log"
@ -651,14 +653,14 @@ get_stream()
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
strmcmd="'$STREAM_BIN' -x"
else
strmcmd="'$STREAM_BIN' -c '$INFO_FILE'"
strmcmd="'$STREAM_BIN' -c '$INFO_FILE' '$DONOR_INFO_FILE'"
fi
else
sfmt='tar'
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
strmcmd='tar xfi -'
else
strmcmd="tar cf - '$INFO_FILE'"
strmcmd="tar cf - '$INFO_FILE' '$DONOR_INFO_FILE'"
fi
fi
wsrep_log_info "Streaming with $sfmt"
@ -680,6 +682,7 @@ cleanup_at_exit()
if [ $estatus -ne 0 ]; then
wsrep_log_error "Removing $MAGIC_FILE file due to signal"
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE" || :
[ -f "$DONOR_MAGIC_FILE" ] && rm -f "$DONOR_MAGIC_FILE" || :
fi
if [ "$WSREP_SST_OPT_ROLE" = 'joiner' ]; then
@ -916,6 +919,7 @@ monitor_process()
}
[ -f "$MAGIC_FILE" ] && rm -f "$MAGIC_FILE"
[ -f "$DONOR_MAGIC_FILE" ] && rm -rf "$DONOR_MAGIC_FILE"
read_cnf
setup_ports
@ -1048,7 +1052,7 @@ send_magic()
# Store donor's wsrep GTID (state ID) and wsrep_gtid_domain_id
# (separated by a space).
echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$MAGIC_FILE"
echo "$WSREP_SST_OPT_GTID $WSREP_SST_OPT_GTID_DOMAIN_ID" > "$DONOR_MAGIC_FILE"
if [ -n "$WSREP_SST_OPT_REMOTE_PSWD" ]; then
# Let joiner know that we know its secret
echo "$SECRET_TAG $WSREP_SST_OPT_REMOTE_PSWD" >> "$MAGIC_FILE"
@ -1594,9 +1598,16 @@ else # joiner
exit 2
fi
# use donor magic file, if present
# if IST was used, donor magic file was not created
# Remove special tags from the magic file, and from the output:
coords=$(head -n1 "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
if [ -r "$DONOR_MAGIC_FILE" ]; then
coords=$(head -n1 "$DONOR_MAGIC_FILE")
wsrep_log_info "Galera co-ords from donor: $coords"
else
coords=$(head -n1 "$MAGIC_FILE")
wsrep_log_info "Galera co-ords from recovery: $coords"
fi
echo "$coords" # Output : UUID:seqno wsrep_gtid_domain_id
wsrep_log_info "Total time on joiner: $totime seconds"

View file

@ -409,3 +409,8 @@ extern "C" void wsrep_thd_set_PA_unsafe(THD *thd)
WSREP_DEBUG("session does not have active transaction, can not mark as PA unsafe");
}
}
extern "C" uint32 wsrep_get_domain_id()
{
return wsrep_gtid_domain_id;
}

View file

@ -181,7 +181,8 @@ static struct wsrep_service_st wsrep_handler = {
wsrep_report_bf_lock_wait,
wsrep_thd_kill_LOCK,
wsrep_thd_kill_UNLOCK,
wsrep_thd_set_PA_unsafe
wsrep_thd_set_PA_unsafe,
wsrep_get_domain_id
};
static struct thd_specifics_service_st thd_specifics_handler=

View file

@ -161,3 +161,5 @@ void wsrep_report_bf_lock_wait(const THD*,
void wsrep_thd_set_PA_unsafe(THD*)
{}
uint32 wsrep_get_domain_id()
{ return 0;}

View file

@ -652,7 +652,9 @@ static void* sst_joiner_thread (void* a)
else
{
// Read state ID (UUID:SEQNO) followed by wsrep_gtid_domain_id (if any).
unsigned long int domain_id= wsrep_gtid_domain_id;
const char *pos= strchr(out, ' ');
WSREP_DEBUG("SST state ID tmp=%s out=%s pos=%p", tmp, out, pos);
if (!pos) {
@ -662,6 +664,13 @@ static void* sst_joiner_thread (void* a)
WSREP_WARN("Did not find domain ID from SST script output '%s'. "
"Domain ID must be set manually to keep binlog consistent",
out);
if (wsrep_gtid_domain_id)
{
WSREP_INFO("This node is configured to use wsrep_gtid_domain_id=%lu by user.",
domain_id);
wsrep_gtid_server.domain_id= (uint32)domain_id;
wsrep_gtid_domain_id= (uint32)domain_id;
}
}
err= sst_scan_uuid_seqno (out, &ret_uuid, &ret_seqno);
@ -1688,6 +1697,8 @@ static int sst_flush_tables(THD* thd)
char content[100];
snprintf(content, sizeof(content), "%s:%lld %d\n", wsrep_cluster_state_uuid,
(long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id);
WSREP_DEBUG("sst_flush_tables : %s:%lld %d", wsrep_cluster_state_uuid,
(long long)wsrep_locked_seqno, wsrep_gtid_server.domain_id);
err= sst_create_file(flush_success, content);
if (err)