From 8d12dd8f503282179a078f2f883b88f6ccee5ebd Mon Sep 17 00:00:00 2001 From: Daniele Sciascia Date: Wed, 11 May 2022 14:33:20 +0200 Subject: [PATCH] MDEV-28053 Sysbench data load crashes Galera secondary node in async master slave setup This patch fixes a problem that arises when a Galera node acts as a replica for native replication. When parallel applying is enabled, it is possible to end up with attempts to write binlog events with gtids out of order. This happens because when multiple events are delivered from the native replication stream and applied in concurrently, it is for them to be replicated to the Galera cluster in an order which is different from the original order in which they were committed in the aync replication master. To correct this behavior we now wait_for_prior_commit() before replicating changes though galera. As a consequence, parallel appliers may apply events in parallel until the galera replication step, which is now serialized. --- mysql-test/suite/galera/r/MDEV-28053.result | 14 +++++ mysql-test/suite/galera/t/MDEV-28053.cnf | 6 ++ mysql-test/suite/galera/t/MDEV-28053.test | 61 +++++++++++++++++++++ sql/wsrep_trans_observer.h | 4 ++ 4 files changed, 85 insertions(+) create mode 100644 mysql-test/suite/galera/r/MDEV-28053.result create mode 100644 mysql-test/suite/galera/t/MDEV-28053.cnf create mode 100644 mysql-test/suite/galera/t/MDEV-28053.test diff --git a/mysql-test/suite/galera/r/MDEV-28053.result b/mysql-test/suite/galera/r/MDEV-28053.result new file mode 100644 index 00000000000..b3f93688dd0 --- /dev/null +++ b/mysql-test/suite/galera/r/MDEV-28053.result @@ -0,0 +1,14 @@ +connection node_2; +connection node_1; +connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3; +connection node_3; +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB; +connection node_2; +connection node_3; +DROP TABLE t1; +connection node_2; +connection node_2; +STOP SLAVE; +RESET SLAVE ALL; +connection node_3; +RESET MASTER; diff --git a/mysql-test/suite/galera/t/MDEV-28053.cnf b/mysql-test/suite/galera/t/MDEV-28053.cnf new file mode 100644 index 00000000000..2a500639d1d --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-28053.cnf @@ -0,0 +1,6 @@ +!include ../galera_2nodes_as_slave.cnf + +[mysqld] +slave_parallel_threads=4 +slave_parallel_mode=optimistic +gtid_strict_mode=1 diff --git a/mysql-test/suite/galera/t/MDEV-28053.test b/mysql-test/suite/galera/t/MDEV-28053.test new file mode 100644 index 00000000000..85cb20c7e10 --- /dev/null +++ b/mysql-test/suite/galera/t/MDEV-28053.test @@ -0,0 +1,61 @@ +# +# MDEV-28053 - Sysbench data load crashes Galera secondary node in +# async master slave setup +# +# Setup: node 3 is a regular MariaDB server, nodes 1 and 2 are members +# of a Galera cluster. Node 2 connects to node 3 through async replication. +# +# Test uses multiple parallel async applier threads (see MDEV-28053.cnf) +# + +--source include/have_innodb.inc +--source include/galera_cluster.inc + +--connect node_3, 127.0.0.1, root, , test, $NODE_MYPORT_3 + +--connection node_3 +CREATE TABLE t1 (f1 INTEGER PRIMARY KEY AUTO_INCREMENT) ENGINE=InnoDB; + +# +# Execute a few INSERTs, to simulate sysbench data load phase +# +--let $counter=100 +--disable_query_log +while ($counter) { + --connection node_3 + INSERT INTO t1 VALUES(); + --dec $counter +} +--enable_query_log +--let gtid = `SELECT @@last_gtid` + +# +# Start async replication on node 2. +# If bug is present, expect a crash when applying +# events concurrently. +# +--connection node_2 +--disable_query_log +--disable_result_log +--eval CHANGE MASTER TO MASTER_HOST='127.0.0.1', MASTER_USER='root', MASTER_PORT=$NODE_MYPORT_3; +START SLAVE; +--eval SELECT MASTER_GTID_WAIT('$gtid', 600) +--enable_result_log +--enable_query_log + +# +# Cleanup +# +--connection node_3 +DROP TABLE t1; + +--connection node_2 +--let $wait_condition = SELECT COUNT(*) = 0 FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 't1'; +--source include/wait_condition.inc + +--connection node_2 +STOP SLAVE; +RESET SLAVE ALL; + +--connection node_3 +RESET MASTER; diff --git a/sql/wsrep_trans_observer.h b/sql/wsrep_trans_observer.h index 837bad4c50f..cde8163cf4f 100644 --- a/sql/wsrep_trans_observer.h +++ b/sql/wsrep_trans_observer.h @@ -229,6 +229,10 @@ static inline int wsrep_before_prepare(THD* thd, bool all) WSREP_DEBUG("wsrep_before_prepare: %d", wsrep_is_real(thd, all)); int ret= 0; DBUG_ASSERT(wsrep_run_commit_hook(thd, all)); + if ((ret= thd->wsrep_parallel_slave_wait_for_prior_commit())) + { + DBUG_RETURN(ret); + } if ((ret= thd->wsrep_cs().before_prepare()) == 0) { DBUG_ASSERT(!thd->wsrep_trx().ws_meta().gtid().is_undefined());