mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 02:05:57 +01:00
MDEV-25717 Assertion `owning_thread_id_ == wsrep::this_thread::get_id()'
A test case to reproduce the issue. The actual fix is in galera library. Reviewed-by: Jan Lindström <jan.lindstrom@mariadb.com>
This commit is contained in:
parent
112b23969a
commit
ac2857a5fb
5 changed files with 193 additions and 3 deletions
47
mysql-test/suite/galera_sr/r/MDEV-25717.result
Normal file
47
mysql-test/suite/galera_sr/r/MDEV-25717.result
Normal file
|
@ -0,0 +1,47 @@
|
|||
connection node_2;
|
||||
connection node_1;
|
||||
connection node_1;
|
||||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) Engine=InnoDB;
|
||||
INSERT INTO t1 VALUES (1), (2), (3);
|
||||
connection node_2;
|
||||
SET SESSION wsrep_trx_fragment_size = 1;
|
||||
START TRANSACTION;
|
||||
INSERT INTO t1 VALUES (4);
|
||||
connection node_1;
|
||||
SELECT COUNT(*) FROM t1;
|
||||
COUNT(*)
|
||||
3
|
||||
connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2;
|
||||
connection node_2a;
|
||||
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_toi";
|
||||
connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1;
|
||||
connection node_1a;
|
||||
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_bf_abort";
|
||||
connection node_1;
|
||||
TRUNCATE TABLE t1;
|
||||
connection node_1a;
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_bf_abort_reached";
|
||||
connection node_2a;
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_toi_reached";
|
||||
connection node_2;
|
||||
INSERT INTO t1 VALUES (5);
|
||||
connection node_2a;
|
||||
SET SESSION wsrep_sync_wait = 0;
|
||||
SET SESSION wsrep_sync_wait = DEFAULT;
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_toi";
|
||||
connection node_2;
|
||||
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
|
||||
connection node_1a;
|
||||
SET SESSION wsrep_sync_wait=0;
|
||||
SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_log_dummy_write_set";
|
||||
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_bf_abort";
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_log_dummy_write_set_reached";
|
||||
connection node_1;
|
||||
connection node_2;
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "RESET";
|
||||
connection node_1;
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "RESET";
|
||||
DROP TABLE t1;
|
113
mysql-test/suite/galera_sr/t/MDEV-25717.test
Normal file
113
mysql-test/suite/galera_sr/t/MDEV-25717.test
Normal file
|
@ -0,0 +1,113 @@
|
|||
#
|
||||
# MDEV-25717 Assertion `owning_thread_id_ == wsrep::this_thread::get_id()'
|
||||
#
|
||||
# This test exposes a race condition between rollbacker thread and rollback
|
||||
# fragment processing.
|
||||
#
|
||||
|
||||
--source include/galera_cluster.inc
|
||||
--source include/have_debug_sync.inc
|
||||
|
||||
--connection node_1
|
||||
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) Engine=InnoDB;
|
||||
INSERT INTO t1 VALUES (1), (2), (3);
|
||||
|
||||
#
|
||||
# On node_2 we start a SR transaction, it going to
|
||||
# be BF aborted later on
|
||||
#
|
||||
--connection node_2
|
||||
SET SESSION wsrep_trx_fragment_size = 1;
|
||||
START TRANSACTION;
|
||||
INSERT INTO t1 VALUES (4);
|
||||
|
||||
--connection node_1
|
||||
SELECT COUNT(*) FROM t1; # Sync wait
|
||||
|
||||
#
|
||||
# Issue a conflicting TRUNCATE statement on node_1:
|
||||
# - on node_2, block it before it is going to apply
|
||||
# - on node_1, block before the before it BF aborts the INSERT
|
||||
#
|
||||
--connect node_2a, 127.0.0.1, root, , test, $NODE_MYPORT_2
|
||||
--connection node_2a
|
||||
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_toi";
|
||||
|
||||
--connect node_1a, 127.0.0.1, root, , test, $NODE_MYPORT_1
|
||||
--connection node_1a
|
||||
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_bf_abort";
|
||||
|
||||
--connection node_1
|
||||
--send TRUNCATE TABLE t1
|
||||
|
||||
--connection node_1a
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_bf_abort_reached";
|
||||
|
||||
--connection node_2a
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_toi_reached";
|
||||
|
||||
#
|
||||
# Generate one more fragment on the SR transaction.
|
||||
# This is going to fail certification and results
|
||||
# in a rollback fragment.
|
||||
#
|
||||
--connection node_2
|
||||
--let $expected_cert_failures = `SELECT VARIABLE_VALUE + 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'`
|
||||
|
||||
--send INSERT INTO t1 VALUES (5)
|
||||
|
||||
#
|
||||
# Wait until after certify and observe the certification
|
||||
# failure. Let both continue and we are done on node_2.
|
||||
#
|
||||
--connection node_2a
|
||||
SET SESSION wsrep_sync_wait = 0;
|
||||
--let $wait_condition = SELECT VARIABLE_VALUE = $expected_cert_failures FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_cert_failures'
|
||||
--source include/wait_condition.inc
|
||||
SET SESSION wsrep_sync_wait = DEFAULT;
|
||||
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_toi";
|
||||
|
||||
--connection node_2
|
||||
--error ER_LOCK_DEADLOCK
|
||||
--reap
|
||||
|
||||
#
|
||||
# On node_1 we expect the following things:
|
||||
# - the TRUNCATE should successfully bf abort the transaction
|
||||
# - A rollback fragment should be delivered as a result of
|
||||
# certification failure. We expect the rollback fragment to
|
||||
# be delivered after TRUNCATE has bf aborted, therefore rollback
|
||||
# fragment logs a dummy writeset.
|
||||
#
|
||||
--connection node_1a
|
||||
SET SESSION wsrep_sync_wait=0;
|
||||
SET GLOBAL DEBUG_DBUG = "+d,sync.wsrep_log_dummy_write_set";
|
||||
|
||||
# Signal the TRUNCATE to continue and observe the BF abort
|
||||
--let $expected_bf_aborts = `SELECT VARIABLE_VALUE + 1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'`
|
||||
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_bf_abort";
|
||||
|
||||
# Expect a timeout if bug is present
|
||||
--let $wait_condition = SELECT VARIABLE_VALUE = $expected_bf_aborts FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_bf_aborts'
|
||||
--source include/wait_condition.inc
|
||||
|
||||
# Observe logging of dummy writeset
|
||||
SET DEBUG_SYNC = "now WAIT_FOR sync.wsrep_log_dummy_write_set_reached";
|
||||
|
||||
# TRUNCATE succeeds
|
||||
--connection node_1
|
||||
--reap
|
||||
|
||||
#
|
||||
# Cleanup
|
||||
#
|
||||
--connection node_2
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "RESET";
|
||||
|
||||
--connection node_1
|
||||
SET GLOBAL DEBUG_DBUG = "";
|
||||
SET DEBUG_SYNC = "RESET";
|
||||
DROP TABLE t1;
|
|
@ -379,6 +379,16 @@ int Wsrep_high_priority_service::apply_toi(const wsrep::ws_meta& ws_meta,
|
|||
WSREP_DEBUG("Wsrep_high_priority_service::apply_toi: %lld",
|
||||
client_state.toi_meta().seqno().get());
|
||||
|
||||
DBUG_EXECUTE_IF("sync.wsrep_apply_toi",
|
||||
{
|
||||
const char act[]=
|
||||
"now "
|
||||
"SIGNAL sync.wsrep_apply_toi_reached "
|
||||
"WAIT_FOR signal.wsrep_apply_toi";
|
||||
DBUG_ASSERT(!debug_sync_set_action(thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
|
||||
int ret= wsrep_apply_events(thd, m_rli, data.data(), data.size());
|
||||
if (ret != 0 || thd->wsrep_has_ignored_error)
|
||||
{
|
||||
|
@ -427,6 +437,15 @@ int Wsrep_high_priority_service::log_dummy_write_set(const wsrep::ws_handle& ws_
|
|||
DBUG_PRINT("info",
|
||||
("Wsrep_high_priority_service::log_dummy_write_set: seqno=%lld",
|
||||
ws_meta.seqno().get()));
|
||||
DBUG_EXECUTE_IF("sync.wsrep_log_dummy_write_set",
|
||||
{
|
||||
const char act[]=
|
||||
"now "
|
||||
"SIGNAL sync.wsrep_log_dummy_write_set_reached ";
|
||||
DBUG_ASSERT(!debug_sync_set_action(m_thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
|
||||
if (ws_meta.ordered())
|
||||
{
|
||||
wsrep::client_state& cs(m_thd->wsrep_cs());
|
||||
|
|
|
@ -340,11 +340,20 @@ int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal)
|
|||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
bool wsrep_bf_abort(const THD* bf_thd, THD* victim_thd)
|
||||
bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd)
|
||||
{
|
||||
WSREP_LOG_THD(bf_thd, "BF aborter before");
|
||||
WSREP_LOG_THD(victim_thd, "victim before");
|
||||
wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno());
|
||||
|
||||
DBUG_EXECUTE_IF("sync.wsrep_bf_abort",
|
||||
{
|
||||
const char act[]=
|
||||
"now "
|
||||
"SIGNAL sync.wsrep_bf_abort_reached "
|
||||
"WAIT_FOR signal.wsrep_bf_abort";
|
||||
DBUG_ASSERT(!debug_sync_set_action(bf_thd,
|
||||
STRING_WITH_LEN(act)));
|
||||
};);
|
||||
|
||||
if (WSREP(victim_thd) && !victim_thd->wsrep_trx().active())
|
||||
{
|
||||
|
@ -362,6 +371,8 @@ bool wsrep_bf_abort(const THD* bf_thd, THD* victim_thd)
|
|||
}
|
||||
|
||||
bool ret;
|
||||
wsrep::seqno bf_seqno(bf_thd->wsrep_trx().ws_meta().seqno());
|
||||
|
||||
if (wsrep_thd_is_toi(bf_thd))
|
||||
{
|
||||
ret= victim_thd->wsrep_cs().total_order_bf_abort(bf_seqno);
|
||||
|
|
|
@ -87,7 +87,7 @@ int wsrep_show_bf_aborts (THD *thd, SHOW_VAR *var, char *buff,
|
|||
bool wsrep_create_appliers(long threads, bool mutex_protected=false);
|
||||
void wsrep_create_rollbacker();
|
||||
|
||||
bool wsrep_bf_abort(const THD*, THD*);
|
||||
bool wsrep_bf_abort(THD* bf_thd, THD* victim_thd);
|
||||
int wsrep_abort_thd(THD *bf_thd_ptr, THD *victim_thd_ptr, my_bool signal);
|
||||
|
||||
extern void wsrep_thd_set_PA_safe(void *thd_ptr, my_bool safe);
|
||||
|
|
Loading…
Add table
Reference in a new issue