mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
MDEV-35110 Deadlock on Replica during BACKUP STAGE BLOCK_COMMIT on XA transactions
This is an extension of MDEV-30423 "Deadlock on Replica during BACKUP STAGE BLOCK_COMMIT on XA transactions" The original commit in MDEV-30423 was not complete as some usage in XA of MDL_BACKUP_COMMIT locks did not set thd->backup_commit_lock. This is required to be set when using parallel replication. Fixed by ensuring that all usage of BACKUP_COMMIT lock i XA is uniform and all sets thd->backup_commit_lock. I also changed all locks to be MDL_EXPLICIT to keep also that part uniform. A regression test is added.
This commit is contained in:
parent
8c7786e7d5
commit
066f920484
4 changed files with 168 additions and 68 deletions
42
mysql-test/suite/rpl/r/parallel_backup_xa_debug.result
Normal file
42
mysql-test/suite/rpl/r/parallel_backup_xa_debug.result
Normal file
|
@ -0,0 +1,42 @@
|
|||
include/master-slave.inc
|
||||
[connection master]
|
||||
connection master;
|
||||
CREATE TABLE t (a INT) ENGINE = innodb;
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
|
||||
SET @@global.slave_parallel_threads= 2;
|
||||
SET @@global.slave_parallel_mode = 'optimistic';
|
||||
connection master;
|
||||
# MDEV-35110
|
||||
SET @@gtid_seq_no=100;
|
||||
insert into t set a=1;
|
||||
xa start 'x';
|
||||
insert into t set a=2;
|
||||
xa end 'x';
|
||||
xa prepare 'x';
|
||||
connection slave;
|
||||
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
|
||||
start slave;
|
||||
connection slave1;
|
||||
backup stage start;
|
||||
backup stage block_commit;
|
||||
connection slave;
|
||||
SET debug_sync = 'now SIGNAL continue_worker';
|
||||
SET debug_sync = RESET;
|
||||
connection slave1;
|
||||
backup stage end;
|
||||
connection master;
|
||||
xa rollback 'x';
|
||||
connection slave;
|
||||
# Clean up.
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
SET @@global.debug_dbug="";
|
||||
SET @@global.slave_parallel_threads= @old_parallel_threads;
|
||||
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||
include/start_slave.inc
|
||||
connection server_1;
|
||||
DROP TABLE t;
|
||||
include/rpl_end.inc
|
64
mysql-test/suite/rpl/t/parallel_backup_xa_debug.test
Normal file
64
mysql-test/suite/rpl/t/parallel_backup_xa_debug.test
Normal file
|
@ -0,0 +1,64 @@
|
|||
# Verify deadlock between XA-PREPARE and BACKUP on the optimistic slave
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--source include/have_innodb.inc
|
||||
# The test is not format specific, MIXED is required to optimize testing time
|
||||
--source include/have_binlog_format_mixed.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection master
|
||||
CREATE TABLE t (a INT) ENGINE = innodb;
|
||||
|
||||
--sync_slave_with_master
|
||||
--source include/stop_slave.inc
|
||||
SET @old_parallel_threads= @@GLOBAL.slave_parallel_threads;
|
||||
SET @old_parallel_mode = @@GLOBAL.slave_parallel_mode;
|
||||
SET @@global.slave_parallel_threads= 2;
|
||||
SET @@global.slave_parallel_mode = 'optimistic';
|
||||
|
||||
--connection master
|
||||
--echo # MDEV-35110
|
||||
SET @@gtid_seq_no=100;
|
||||
insert into t set a=1;
|
||||
xa start 'x';
|
||||
insert into t set a=2;
|
||||
xa end 'x';
|
||||
xa prepare 'x';
|
||||
|
||||
--connection slave
|
||||
SET @@global.debug_dbug="+d,hold_worker_on_schedule";
|
||||
start slave;
|
||||
--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for prior transaction to commit"
|
||||
--source include/wait_condition.inc
|
||||
|
||||
--connection slave1
|
||||
backup stage start;
|
||||
--send backup stage block_commit
|
||||
|
||||
--connection slave
|
||||
--let $wait_condition= SELECT count(*) = 1 FROM information_schema.processlist WHERE state LIKE "Waiting for backup lock"
|
||||
SET debug_sync = 'now SIGNAL continue_worker';
|
||||
SET debug_sync = RESET;
|
||||
|
||||
--connection slave1
|
||||
reap;
|
||||
backup stage end;
|
||||
|
||||
--connection master
|
||||
xa rollback 'x';
|
||||
|
||||
--sync_slave_with_master
|
||||
|
||||
--echo # Clean up.
|
||||
--connection slave
|
||||
--source include/stop_slave.inc
|
||||
SET @@global.debug_dbug="";
|
||||
SET @@global.slave_parallel_threads= @old_parallel_threads;
|
||||
SET @@global.slave_parallel_mode = @old_parallel_mode;
|
||||
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection server_1
|
||||
DROP TABLE t;
|
||||
|
||||
--source include/rpl_end.inc
|
|
@ -1781,7 +1781,13 @@ int ha_commit_trans(THD *thd, bool all)
|
|||
DBUG_PRINT("info", ("is_real_trans: %d rw_trans: %d rw_ha_count: %d",
|
||||
is_real_trans, rw_trans, rw_ha_count));
|
||||
|
||||
if (rw_trans)
|
||||
/*
|
||||
backup_commit_lock may have already been set.
|
||||
This can happen in case of spider that does xa_commit() by
|
||||
calling ha_commit_trans() from spader_commit().
|
||||
*/
|
||||
|
||||
if (rw_trans && !thd->backup_commit_lock)
|
||||
{
|
||||
/*
|
||||
Acquire a metadata lock which will ensure that COMMIT is blocked
|
||||
|
@ -2052,8 +2058,8 @@ end:
|
|||
not needed.
|
||||
*/
|
||||
thd->mdl_context.release_lock(mdl_backup.ticket);
|
||||
thd->backup_commit_lock= 0;
|
||||
}
|
||||
thd->backup_commit_lock= 0;
|
||||
#ifdef WITH_WSREP
|
||||
if (wsrep_is_active(thd) && is_real_trans && !error &&
|
||||
(rw_ha_count == 0 || all) &&
|
||||
|
|
120
sql/xa.cc
120
sql/xa.cc
|
@ -507,6 +507,40 @@ bool trans_xa_end(THD *thd)
|
|||
}
|
||||
|
||||
|
||||
/*
|
||||
Get the BACKUP_COMMIT lock for the duration of the XA.
|
||||
|
||||
The metadata lock which will ensure that COMMIT is blocked
|
||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
||||
progress blocks FTWRL) and also by MDL_BACKUP_WAIT_COMMIT.
|
||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
||||
|
||||
Note that the function sets thd->backup_lock on sucess. The caller needs
|
||||
to reset thd->backup_commit_lock before returning!
|
||||
*/
|
||||
|
||||
static bool trans_xa_get_backup_lock(THD *thd, MDL_request *mdl_request)
|
||||
{
|
||||
DBUG_ASSERT(thd->backup_commit_lock == 0);
|
||||
MDL_REQUEST_INIT(mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_EXPLICIT);
|
||||
if (thd->mdl_context.acquire_lock(mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
return 1;
|
||||
thd->backup_commit_lock= mdl_request;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void trans_xa_release_backup_lock(THD *thd)
|
||||
{
|
||||
if (thd->backup_commit_lock)
|
||||
{
|
||||
thd->mdl_context.release_lock(thd->backup_commit_lock->ticket);
|
||||
thd->backup_commit_lock= 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
Put a XA transaction in the PREPARED state.
|
||||
|
||||
|
@ -529,22 +563,15 @@ bool trans_xa_prepare(THD *thd)
|
|||
my_error(ER_XAER_NOTA, MYF(0));
|
||||
else
|
||||
{
|
||||
/*
|
||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
||||
progress blocks FTWRL).
|
||||
|
||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
||||
*/
|
||||
MDL_request mdl_request;
|
||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_STATEMENT);
|
||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout) ||
|
||||
if (trans_xa_get_backup_lock(thd, &mdl_request) ||
|
||||
ha_prepare(thd))
|
||||
{
|
||||
if (!mdl_request.ticket)
|
||||
{
|
||||
/* Failed to get the backup lock */
|
||||
ha_rollback_trans(thd, TRUE);
|
||||
}
|
||||
thd->variables.option_bits&= ~(OPTION_BEGIN | OPTION_KEEP_LOG);
|
||||
thd->transaction->all.reset();
|
||||
thd->server_status&=
|
||||
|
@ -572,6 +599,7 @@ bool trans_xa_prepare(THD *thd)
|
|||
res= thd->variables.pseudo_slave_mode || thd->slave_thread ?
|
||||
slave_applier_reset_xa_trans(thd) : 0;
|
||||
}
|
||||
trans_xa_release_backup_lock(thd);
|
||||
}
|
||||
|
||||
DBUG_RETURN(res);
|
||||
|
@ -635,19 +663,8 @@ bool trans_xa_commit(THD *thd)
|
|||
res= 1;
|
||||
goto _end_external_xid;
|
||||
}
|
||||
|
||||
res= xa_trans_rolled_back(xs);
|
||||
/*
|
||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
||||
progress blocks FTWRL).
|
||||
|
||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
||||
*/
|
||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_EXPLICIT);
|
||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||
{
|
||||
/*
|
||||
We can't rollback an XA transaction on lock failure due to
|
||||
|
@ -659,14 +676,11 @@ bool trans_xa_commit(THD *thd)
|
|||
res= true;
|
||||
goto _end_external_xid;
|
||||
}
|
||||
else
|
||||
{
|
||||
thd->backup_commit_lock= &mdl_request;
|
||||
}
|
||||
DBUG_ASSERT(!xid_state.xid_cache_element);
|
||||
|
||||
xid_state.xid_cache_element= xs;
|
||||
ha_commit_or_rollback_by_xid(thd->lex->xid, !res);
|
||||
|
||||
if (!res && thd->is_error())
|
||||
{
|
||||
// hton completion error retains xs/xid in the cache,
|
||||
|
@ -682,11 +696,7 @@ bool trans_xa_commit(THD *thd)
|
|||
res= res || thd->is_error();
|
||||
if (!xid_deleted)
|
||||
xs->acquired_to_recovered();
|
||||
if (mdl_request.ticket)
|
||||
{
|
||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
||||
thd->backup_commit_lock= 0;
|
||||
}
|
||||
trans_xa_release_backup_lock(thd);
|
||||
}
|
||||
else
|
||||
my_error(ER_XAER_NOTA, MYF(0));
|
||||
|
@ -709,7 +719,8 @@ bool trans_xa_commit(THD *thd)
|
|||
if ((res= MY_TEST(r)))
|
||||
my_error(r == 1 ? ER_XA_RBROLLBACK : ER_XAER_RMERR, MYF(0));
|
||||
}
|
||||
else if (thd->transaction->xid_state.xid_cache_element->xa_state == XA_PREPARED)
|
||||
else if (thd->transaction->xid_state.xid_cache_element->xa_state ==
|
||||
XA_PREPARED)
|
||||
{
|
||||
MDL_request mdl_request;
|
||||
if (thd->lex->xa_opt != XA_NONE)
|
||||
|
@ -718,18 +729,7 @@ bool trans_xa_commit(THD *thd)
|
|||
DBUG_RETURN(TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
Acquire metadata lock which will ensure that COMMIT is blocked
|
||||
by active FLUSH TABLES WITH READ LOCK (and vice versa COMMIT in
|
||||
progress blocks FTWRL).
|
||||
|
||||
We allow FLUSHer to COMMIT; we assume FLUSHer knows what it does.
|
||||
*/
|
||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_TRANSACTION);
|
||||
|
||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||
{
|
||||
/*
|
||||
We can't rollback an XA transaction on lock failure due to
|
||||
|
@ -756,6 +756,7 @@ bool trans_xa_commit(THD *thd)
|
|||
}
|
||||
|
||||
thd->m_transaction_psi= NULL;
|
||||
trans_xa_release_backup_lock(thd);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -793,7 +794,8 @@ bool trans_xa_commit(THD *thd)
|
|||
bool trans_xa_rollback(THD *thd)
|
||||
{
|
||||
XID_STATE &xid_state= thd->transaction->xid_state;
|
||||
|
||||
MDL_request mdl_request;
|
||||
bool error;
|
||||
DBUG_ENTER("trans_xa_rollback");
|
||||
|
||||
if (!xid_state.is_explicit_XA() ||
|
||||
|
@ -814,7 +816,6 @@ bool trans_xa_rollback(THD *thd)
|
|||
{
|
||||
bool res;
|
||||
bool xid_deleted= false;
|
||||
MDL_request mdl_request;
|
||||
bool rw_trans= (xs->rm_error != ER_XA_RBROLLBACK);
|
||||
|
||||
if (rw_trans && thd->is_read_only_ctx())
|
||||
|
@ -824,10 +825,7 @@ bool trans_xa_rollback(THD *thd)
|
|||
goto _end_external_xid;
|
||||
}
|
||||
|
||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_EXPLICIT);
|
||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||
{
|
||||
/*
|
||||
We can't rollback an XA transaction on lock failure due to
|
||||
|
@ -838,10 +836,6 @@ bool trans_xa_rollback(THD *thd)
|
|||
|
||||
goto _end_external_xid;
|
||||
}
|
||||
else
|
||||
{
|
||||
thd->backup_commit_lock= &mdl_request;
|
||||
}
|
||||
res= xa_trans_rolled_back(xs);
|
||||
DBUG_ASSERT(!xid_state.xid_cache_element);
|
||||
|
||||
|
@ -858,11 +852,7 @@ bool trans_xa_rollback(THD *thd)
|
|||
xid_state.xid_cache_element= 0;
|
||||
if (!xid_deleted)
|
||||
xs->acquired_to_recovered();
|
||||
if (mdl_request.ticket)
|
||||
{
|
||||
thd->mdl_context.release_lock(mdl_request.ticket);
|
||||
thd->backup_commit_lock= 0;
|
||||
}
|
||||
trans_xa_release_backup_lock(thd);
|
||||
}
|
||||
else
|
||||
my_error(ER_XAER_NOTA, MYF(0));
|
||||
|
@ -879,11 +869,7 @@ bool trans_xa_rollback(THD *thd)
|
|||
DBUG_RETURN(TRUE);
|
||||
}
|
||||
|
||||
MDL_request mdl_request;
|
||||
MDL_REQUEST_INIT(&mdl_request, MDL_key::BACKUP, "", "", MDL_BACKUP_COMMIT,
|
||||
MDL_STATEMENT);
|
||||
if (thd->mdl_context.acquire_lock(&mdl_request,
|
||||
thd->variables.lock_wait_timeout))
|
||||
if (trans_xa_get_backup_lock(thd, &mdl_request))
|
||||
{
|
||||
/*
|
||||
We can't rollback an XA transaction on lock failure due to
|
||||
|
@ -894,7 +880,9 @@ bool trans_xa_rollback(THD *thd)
|
|||
DBUG_RETURN(true);
|
||||
}
|
||||
|
||||
DBUG_RETURN(xa_trans_force_rollback(thd));
|
||||
error= xa_trans_force_rollback(thd);
|
||||
trans_xa_release_backup_lock(thd);
|
||||
DBUG_RETURN(error);
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue