MDEV-32633: Fix Galera cluster <-> native replication interaction

It's possible to establish Galera multi-cluster setups connected
through the native replication when every Galera cluster is configured
to have a separate domain ID.
For this setup to work, we need to replace domain ID values in generated
GTID events when they are written at transaction commit to the values
configured by Wsrep replication.

At the same time, it's possible that the GTID event already contains
a correct domain ID if it comes through the native replication from
another Galera cluster.
In this case, when such an event is applied either through a native
replication slave thread or through Wsrep applier, we write GTID event
on transaction start and avoid writing it during transaction commit.

The code contained multiple problems that were fixed:
- applying GTID events didn't work because it's applied without a
running server transaction and Wsrep transaction was not started
- GTID event generation on transaction start didn't contain proper
"standalone" and "is_transactional" flags that the original applied
GTID event contained
- condition determining that GTID event is written on transaction start
to avoid writing it on commit relied on the fact that the GTID event
is the first found in transaction/statement caches, which wasn't the
case and resulted in duplicate GTID events written
- instead of relying on the caches to find a GTID event, a simple check
is introduced that follows the exact rules for checking if event is
written at transaction start as described above
- the test case is improved to check that exact GTID events are
applied after two Galera clusters have synced.

Signed-off-by: Julius Goryavsky <julius.goryavsky@mariadb.com>
This commit is contained in:
Denis Protivensky 2024-03-25 14:40:55 +01:00 committed by Julius Goryavsky
parent a6b7203d65
commit 0cc9b49751
6 changed files with 98 additions and 18 deletions

View file

@ -11,7 +11,6 @@
##############################################################################
galera_2_cluster : MDEV-32631 galera_2_cluster: before_rollback(): Assertion `0' failed
galera_gtid_2_cluster : MDEV-32633 galera_gtid_2_cluster: Assertion `thd->wsrep_next_trx_id() != (0x7fffffffffffffffLL * 2ULL + 1)'
galera_ssl_reload : MDEV-32778 galera_ssl_reload failed with warning message
galera_ipv6_mariabackup : temporarily disabled at the request of Codership
galera_pc_bootstrap : temporarily disabled at the request of Codership

View file

@ -75,15 +75,15 @@ insert into t1 values (2, 21, 1);
select @@gtid_binlog_state;
@@gtid_binlog_state
1-11-2,2-21-1
select * from t1;
cluster_domain_id node_server_id seq_no
1 11 2
2 21 1
#wait for sync cluster 1 and 2
connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
2 21 1
cluster 1 node 2
connection node_2;
select @@gtid_binlog_state;
@ -98,6 +98,11 @@ connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
2 21 1
cluster 1 node 3
connection node_3;
select @@gtid_binlog_state;
@ -112,6 +117,12 @@ connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
cluster 2 node 2
connection node_5;
select @@gtid_binlog_state;
@ -126,6 +137,13 @@ connection node_4;
include/save_master_gtid.inc
connection node_1;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
2 22 2
cluster 2 node 3
connection node_6;
select @@gtid_binlog_state;
@ -140,6 +158,14 @@ connection node_4;
include/save_master_gtid.inc
connection node_1;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
2 22 2
2 23 3
cluster 1 node 1
connection node_1;
select @@gtid_binlog_state;
@ -220,15 +246,15 @@ insert into t1 values (2, 21, 1);
select @@gtid_binlog_state;
@@gtid_binlog_state
1-11-7,2-21-4
select * from t1;
cluster_domain_id node_server_id seq_no
1 11 2
2 21 1
#wait for sync cluster 1 and 2
connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
2 21 1
cluster 1 node 2
connection node_2;
select @@gtid_binlog_state;
@ -243,6 +269,11 @@ connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
2 21 1
cluster 1 node 3
connection node_3;
select @@gtid_binlog_state;
@ -257,6 +288,12 @@ connection node_1;
include/save_master_gtid.inc
connection node_4;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
cluster 2 node 2
connection node_5;
select @@gtid_binlog_state;
@ -271,6 +308,13 @@ connection node_4;
include/save_master_gtid.inc
connection node_1;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
2 22 2
cluster 2 node 3
connection node_6;
select @@gtid_binlog_state;
@ -285,6 +329,14 @@ connection node_4;
include/save_master_gtid.inc
connection node_1;
include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
cluster_domain_id node_server_id seq_no
1 11 2
1 12 3
1 13 4
2 21 1
2 22 2
2 23 3
cluster 1 node 1
connection node_1;
select @@gtid_binlog_state;

View file

@ -75,12 +75,12 @@ select @@gtid_binlog_state;
select @@gtid_binlog_state;
insert into t1 values (2, 21, 1);
select @@gtid_binlog_state;
select * from t1;
--echo #wait for sync cluster 1 and 2
--connection node_1
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 2
@ -94,6 +94,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 3
--connection node_3
@ -106,6 +107,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 2 node 2
--connection node_5
@ -118,6 +120,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_1
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 2 node 3
--connection node_6
@ -130,6 +133,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_1
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 1
@ -226,13 +230,13 @@ select @@gtid_binlog_state;
--connection node_4
insert into t1 values (2, 21, 1);
select @@gtid_binlog_state;
select * from t1;
--echo #wait for sync cluster 1 and 2
--connection node_1
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 2
@ -246,6 +250,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 3
--connection node_3
@ -258,6 +263,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_4
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 2 node 2
--connection node_5
@ -270,6 +276,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_1
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 2 node 3
--connection node_6
@ -282,6 +289,7 @@ select @@gtid_binlog_state;
--source include/save_master_gtid.inc
--connection node_1
--source include/sync_with_master_gtid.inc
select * from t1 order by 1, 2, 3;
--echo cluster 1 node 1

View file

@ -5254,6 +5254,16 @@ MYSQL_BIN_LOG::is_xidlist_idle_nolock()
return true;
}
#ifdef WITH_WSREP
static bool is_gtid_written_on_trans_start(const THD *thd)
{
return wsrep_gtid_mode && WSREP(thd) &&
(thd->variables.gtid_seq_no || thd->variables.wsrep_gtid_seq_no) &&
((thd->slave_thread && wsrep_thd_is_local(thd)) ||
(!thd->slave_thread && (wsrep_thd_is_applying(thd))));
}
#endif
/**
Create a new log file name.
@ -5893,9 +5903,7 @@ THD::binlog_start_trans_and_stmt()
Ha_trx_info *ha_info;
ha_info= this->ha_data[binlog_hton->slot].ha_info + (mstmt_mode ? 1 : 0);
if (!ha_info->is_started() &&
(this->variables.gtid_seq_no || this->variables.wsrep_gtid_seq_no) &&
wsrep_on(this) &&
if (!ha_info->is_started() && is_gtid_written_on_trans_start(this) &&
(this->wsrep_cs().mode() == wsrep::client_state::m_local))
{
uchar *buf= 0;
@ -5914,8 +5922,14 @@ THD::binlog_start_trans_and_stmt()
domain_id= wsrep_gtid_server.domain_id;
server_id= wsrep_gtid_server.server_id;
}
Gtid_log_event gtid_event(this, seqno, domain_id, true,
LOG_EVENT_SUPPRESS_USE_F, true, 0);
rpl_group_info* rgi = this->slave_thread ? this->rgi_slave : this->wsrep_rgi;
const bool standalone =
rgi->gtid_ev_flags2 & Gtid_log_event::FL_STANDALONE;
const bool is_transactional =
rgi->gtid_ev_flags2 & Gtid_log_event::FL_TRANSACTIONAL;
Gtid_log_event gtid_event(this, seqno, domain_id,
standalone, LOG_EVENT_SUPPRESS_USE_F,
is_transactional, 0);
// Replicated events in writeset doesn't have checksum
gtid_event.checksum_alg= BINLOG_CHECKSUM_ALG_OFF;
gtid_event.server_id= server_id;

View file

@ -704,6 +704,8 @@ rpl_slave_state::record_gtid(THD *thd, const rpl_gtid *gtid, uint64 sub_id,
{
thd->wsrep_ignore_table= false;
table->file->row_logging= 1; // replication requires binary logging
if (thd->wsrep_next_trx_id() == WSREP_UNDEFINED_TRX_ID)
thd->set_query_id(next_query_id());
wsrep_start_trx_if_not_started(thd);
}
else

View file

@ -1827,8 +1827,13 @@ int wsrep_to_buf_helper(
domain_id= wsrep_gtid_server.domain_id;
server_id= wsrep_gtid_server.server_id;
}
Gtid_log_event gtid_event(thd, seqno, domain_id, true,
LOG_EVENT_SUPPRESS_USE_F, true, 0);
rpl_group_info* rgi = thd->slave_thread ? thd->rgi_slave : thd->wsrep_rgi;
const bool standalone =
rgi->gtid_ev_flags2 & Gtid_log_event::FL_STANDALONE;
const bool is_transactional =
rgi->gtid_ev_flags2 & Gtid_log_event::FL_TRANSACTIONAL;
Gtid_log_event gtid_event(thd, seqno, domain_id, standalone,
LOG_EVENT_SUPPRESS_USE_F, is_transactional, 0);
gtid_event.server_id= server_id;
if (!gtid_event.is_valid()) ret= 0;
ret= writer.write(&gtid_event);