MDEV-27568 Parallel async replication hangs on a Galera node

Using parallel slave applying can cause deadlock between between DDL and
other events. GTID with lower seqno can be blocked in galera when node
entered TOI mode, but DDL GTID which has higher node can be blocked
before previous GTIDs are applied locally.

Fix is to check prior commits before entering TOI.

Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
mkaruza 2022-02-16 15:05:58 +01:00 committed by Jan Lindström
parent c63eab2c68
commit 304f75c973
2 changed files with 19 additions and 1 deletions

View file

@ -4932,7 +4932,10 @@ public:
{
return m_wsrep_next_trx_id;
}
/*
If node is async slave and have parallel execution, wait for prior commits.
*/
bool wsrep_parallel_slave_wait_for_prior_commit();
private:
wsrep_trx_id_t m_wsrep_next_trx_id; /* cast from query_id_t */
/* wsrep-lib */

View file

@ -2247,6 +2247,12 @@ int wsrep_to_isolation_begin(THD *thd, const char *db_, const char *table_,
if (!wsrep_thd_is_local(thd))
return 0;
if (thd->wsrep_parallel_slave_wait_for_prior_commit())
{
WSREP_WARN("TOI: wait_for_prior_commit() returned error.");
return -1;
}
int ret= 0;
mysql_mutex_lock(&thd->LOCK_thd_data);
@ -3064,6 +3070,15 @@ enum wsrep::streaming_context::fragment_unit wsrep_fragment_unit(ulong unit)
}
}
bool THD::wsrep_parallel_slave_wait_for_prior_commit()
{
if (rgi_slave && rgi_slave->is_parallel_exec && wait_for_prior_commit())
{
return 1;
}
return 0;
}
/***** callbacks for wsrep service ************/
my_bool get_wsrep_recovery()