mirror of
https://github.com/MariaDB/server.git
synced 2025-03-24 16:08:42 +01:00
MDEV-34122: Assertion `entry' failed in Active_tranx::assert_thd_is_waiter
If semi-sync is switched off then on while a transaction is in-between binlogging and waiting for an ACK, the semi-sync state of the transaction is removed, leading to a debug assertion that indicates the transaction tried to wait, but cannot receive an ACK signal. More specifically, when semi-sync is switched off, the Active_tranx list is cleared (where a transaction adds an entry to this list during binlogging), and each entry in this list saves the thread which will wait for an ACK, and the thread has the COND variable to signal to wake itself. So if the entry is lost, the Ack_receiver thread won’t be able to find the thread to wake up when an ACK comes in The fix is to ensure that the entry exists before awaiting the ACK, and if there is no entry, skip the wait. In debug builds, an informative message is written explaining that the transaction is skipping its wait. Additional debug-build only logic is added to ensure that the cause of the missing entry is due to semi-sync being turned off and on Reviewed By: ============ Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
parent
e8db5c8760
commit
1ed30e08af
6 changed files with 243 additions and 18 deletions
|
@ -32,4 +32,75 @@ set @@global.binlog_commit_wait_count=@save_bgc_count;
|
|||
set @@global.binlog_commit_wait_usec=@save_bgc_usec;
|
||||
set @@global.debug_dbug=@save_debug_dbug;
|
||||
drop table t1, t2, t3;
|
||||
#
|
||||
# MDEV-34122
|
||||
# If semi-sync is switched off then on while a transaction is
|
||||
# in-between binlogging and waiting for an ACK, ensure that the
|
||||
# transaction skips the wait altogether (otherwise it would time-out).
|
||||
# Note that prior to MDEV-34122, there was a debug assertion that would
|
||||
# trigger if the transaction tried to wait but could not receive an ACK
|
||||
# signal.
|
||||
#
|
||||
# MDEV-34122.a: Test wait_point = AFTER_SYNC
|
||||
# Here, debug_sync is used to pause the leader thread between reporting
|
||||
# the binlogging to semi-sync, and starting the wait for ACK; and during
|
||||
# this pause, semi-sync is manually switched off and on.
|
||||
connection master;
|
||||
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
|
||||
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
|
||||
set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
|
||||
set @@global.rpl_semi_sync_master_wait_no_slave= 1;
|
||||
set @@global.debug_dbug="+d,semisync_log_skip_trx_wait";
|
||||
# Waiting for semi-sync to turn on..
|
||||
connection slave;
|
||||
connection master;
|
||||
create table t (a int) engine=innodb;
|
||||
connection slave;
|
||||
# Wait no slave is on, so we don't need to slave on to wait for the
|
||||
# ACK, and this way we guarantee the ACK doesn't come, so we can verify
|
||||
# the informational message is displayed
|
||||
connection slave;
|
||||
include/stop_slave.inc
|
||||
connection server_1;
|
||||
start transaction;
|
||||
insert into t values (0);
|
||||
set debug_sync= "commit_after_release_LOCK_log SIGNAL trx_binlogged WAIT_FOR continue_commit";
|
||||
commit;
|
||||
connection master;
|
||||
set debug_sync= "now WAIT_FOR trx_binlogged";
|
||||
# Switching semi-sync off/on
|
||||
set @@global.rpl_semi_sync_master_enabled= 0;
|
||||
set @@global.rpl_semi_sync_master_enabled= 1;
|
||||
# Resuming transaction to await ACK
|
||||
set debug_sync= "now SIGNAL continue_commit";
|
||||
connection server_1;
|
||||
include/assert_grep.inc [Ensure error log shows that transaction is skipping its semi-sync wait]
|
||||
# Cleanup
|
||||
connection slave;
|
||||
include/start_slave.inc
|
||||
connection master;
|
||||
drop table t;
|
||||
connection slave;
|
||||
#
|
||||
# MDEV-34122.b: Test wait_point = AFTER_COMMIT
|
||||
# Here, use a transaction with a non-transactional statement to write to
|
||||
# the binlog directly, and turn off/on semi-sync before committing the
|
||||
# transaction.
|
||||
connection master;
|
||||
set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
|
||||
# Waiting for semi-sync to turn on..
|
||||
connection slave;
|
||||
connection master;
|
||||
create table tn (a int) engine=Aria;
|
||||
start transaction;
|
||||
insert into tn values (NULL);
|
||||
set @@global.rpl_semi_sync_master_enabled= 0;
|
||||
set @@global.rpl_semi_sync_master_enabled= 1;
|
||||
commit;
|
||||
# Cleanup
|
||||
connection master;
|
||||
drop table tn;
|
||||
set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
|
||||
set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
|
||||
set @@global.debug_dbug=@save_debug_dbug;
|
||||
include/rpl_end.inc
|
||||
|
|
|
@ -19,9 +19,12 @@
|
|||
# References:
|
||||
# MDEV-33551: Semi-sync Wait Point AFTER_COMMIT Slow on Workloads with Heavy
|
||||
# Concurrency
|
||||
# MDEV-34122: Assertion `entry' failed in Active_tranx::assert_thd_is_waiter
|
||||
#
|
||||
--source include/have_binlog_format_row.inc
|
||||
--source include/have_innodb.inc
|
||||
--source include/have_debug.inc
|
||||
--source include/have_debug_sync.inc
|
||||
--source include/master-slave.inc
|
||||
|
||||
--connection master
|
||||
|
@ -77,4 +80,118 @@ set @@global.binlog_commit_wait_usec=@save_bgc_usec;
|
|||
set @@global.debug_dbug=@save_debug_dbug;
|
||||
drop table t1, t2, t3;
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34122
|
||||
--echo # If semi-sync is switched off then on while a transaction is
|
||||
--echo # in-between binlogging and waiting for an ACK, ensure that the
|
||||
--echo # transaction skips the wait altogether (otherwise it would time-out).
|
||||
--echo # Note that prior to MDEV-34122, there was a debug assertion that would
|
||||
--echo # trigger if the transaction tried to wait but could not receive an ACK
|
||||
--echo # signal.
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34122.a: Test wait_point = AFTER_SYNC
|
||||
--echo # Here, debug_sync is used to pause the leader thread between reporting
|
||||
--echo # the binlogging to semi-sync, and starting the wait for ACK; and during
|
||||
--echo # this pause, semi-sync is manually switched off and on.
|
||||
|
||||
|
||||
--connection master
|
||||
set @old_master_wait_point= @@global.rpl_semi_sync_master_wait_point;
|
||||
set @old_master_wait_no_slave= @@global.rpl_semi_sync_master_wait_no_slave;
|
||||
set @@global.rpl_semi_sync_master_wait_point= AFTER_SYNC;
|
||||
set @@global.rpl_semi_sync_master_wait_no_slave= 1;
|
||||
--eval set @@global.debug_dbug="+d,semisync_log_skip_trx_wait"
|
||||
|
||||
--echo # Waiting for semi-sync to turn on..
|
||||
let $status_var= rpl_semi_sync_master_status;
|
||||
let $status_var_value= ON;
|
||||
source include/wait_for_status_var.inc;
|
||||
--connection slave
|
||||
let $status_var= rpl_semi_sync_slave_status;
|
||||
let $status_var_value= ON;
|
||||
source include/wait_for_status_var.inc;
|
||||
|
||||
--connection master
|
||||
create table t (a int) engine=innodb;
|
||||
--sync_slave_with_master
|
||||
|
||||
--echo # Wait no slave is on, so we don't need to slave on to wait for the
|
||||
--echo # ACK, and this way we guarantee the ACK doesn't come, so we can verify
|
||||
--echo # the informational message is displayed
|
||||
--connection slave
|
||||
--source include/stop_slave.inc
|
||||
|
||||
--connection server_1
|
||||
start transaction;
|
||||
insert into t values (0);
|
||||
set debug_sync= "commit_after_release_LOCK_log SIGNAL trx_binlogged WAIT_FOR continue_commit";
|
||||
--send commit
|
||||
|
||||
--connection master
|
||||
set debug_sync= "now WAIT_FOR trx_binlogged";
|
||||
|
||||
--echo # Switching semi-sync off/on
|
||||
set @@global.rpl_semi_sync_master_enabled= 0;
|
||||
set @@global.rpl_semi_sync_master_enabled= 1;
|
||||
let $status_var= rpl_semi_sync_master_status;
|
||||
let $status_var_value= ON;
|
||||
source include/wait_for_status_var.inc;
|
||||
|
||||
--echo # Resuming transaction to await ACK
|
||||
set debug_sync= "now SIGNAL continue_commit";
|
||||
|
||||
--connection server_1
|
||||
--reap
|
||||
|
||||
--let $assert_text= Ensure error log shows that transaction is skipping its semi-sync wait
|
||||
--let $assert_select=Skipping semi-sync wait for transaction
|
||||
--let $assert_file= $MYSQLTEST_VARDIR/log/mysqld.1.err
|
||||
--let $assert_count= 1
|
||||
--let $assert_only_after=CURRENT_TEST
|
||||
--source include/assert_grep.inc
|
||||
|
||||
--echo # Cleanup
|
||||
--connection slave
|
||||
--source include/start_slave.inc
|
||||
|
||||
--connection master
|
||||
drop table t;
|
||||
--sync_slave_with_master
|
||||
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-34122.b: Test wait_point = AFTER_COMMIT
|
||||
--echo # Here, use a transaction with a non-transactional statement to write to
|
||||
--echo # the binlog directly, and turn off/on semi-sync before committing the
|
||||
--echo # transaction.
|
||||
|
||||
--connection master
|
||||
set @@global.rpl_semi_sync_master_wait_point= AFTER_COMMIT;
|
||||
|
||||
--echo # Waiting for semi-sync to turn on..
|
||||
let $status_var= rpl_semi_sync_master_status;
|
||||
let $status_var_value= ON;
|
||||
source include/wait_for_status_var.inc;
|
||||
--connection slave
|
||||
let $status_var= rpl_semi_sync_slave_status;
|
||||
let $status_var_value= ON;
|
||||
source include/wait_for_status_var.inc;
|
||||
|
||||
--connection master
|
||||
create table tn (a int) engine=Aria;
|
||||
start transaction;
|
||||
insert into tn values (NULL);
|
||||
set @@global.rpl_semi_sync_master_enabled= 0;
|
||||
set @@global.rpl_semi_sync_master_enabled= 1;
|
||||
commit;
|
||||
|
||||
--echo # Cleanup
|
||||
--connection master
|
||||
drop table tn;
|
||||
set @@global.rpl_semi_sync_master_wait_point= @old_master_wait_point;
|
||||
set @@global.rpl_semi_sync_master_wait_no_slave= @old_master_wait_no_slave;
|
||||
set @@global.debug_dbug=@save_debug_dbug;
|
||||
|
||||
--source include/rpl_end.inc
|
||||
|
|
|
@ -360,10 +360,8 @@ void Active_tranx::unlink_thd_as_waiter(const char *log_file_name,
|
|||
DBUG_VOID_RETURN;
|
||||
}
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
void Active_tranx::assert_thd_is_waiter(THD *thd_to_check,
|
||||
const char *log_file_name,
|
||||
my_off_t log_file_pos)
|
||||
bool Active_tranx::is_thd_waiter(THD *thd_to_check, const char *log_file_name,
|
||||
my_off_t log_file_pos)
|
||||
{
|
||||
DBUG_ENTER("Active_tranx::assert_thd_is_waiter");
|
||||
mysql_mutex_assert_owner(m_lock);
|
||||
|
@ -379,13 +377,8 @@ void Active_tranx::assert_thd_is_waiter(THD *thd_to_check,
|
|||
entry = entry->hash_next;
|
||||
}
|
||||
|
||||
DBUG_ASSERT(entry);
|
||||
DBUG_ASSERT(entry->thd);
|
||||
DBUG_ASSERT(entry->thd->thread_id == thd_to_check->thread_id);
|
||||
|
||||
DBUG_VOID_RETURN;
|
||||
DBUG_RETURN(static_cast<bool>(entry));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
*
|
||||
|
@ -922,6 +915,36 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
|
|||
}
|
||||
}
|
||||
|
||||
/* In between the binlogging of this transaction and this wait, it is
|
||||
* possible that our entry in Active_tranx was removed (i.e. if
|
||||
* semi-sync was switched off and on). It is also possible that the
|
||||
* event was already sent to a replica; however, we don't know if
|
||||
* semi-sync was on or off at that time, so an ACK may never come. So
|
||||
* skip the wait. Note that rpl_semi_sync_master_request_acks was
|
||||
* already incremented in report_binlog_update(), so to keep
|
||||
* rpl_semi_sync_master_yes/no_tx consistent with it, we check for a
|
||||
* semi-sync restart _after_ checking the reply state.
|
||||
*/
|
||||
if (unlikely(!m_active_tranxs->is_thd_waiter(thd, trx_wait_binlog_name,
|
||||
trx_wait_binlog_pos)))
|
||||
{
|
||||
DBUG_EXECUTE_IF(
|
||||
"semisync_log_skip_trx_wait",
|
||||
sql_print_information(
|
||||
"Skipping semi-sync wait for transaction at pos %s, %lu. This "
|
||||
"should be because semi-sync turned off and on during the "
|
||||
"lifetime of this transaction.",
|
||||
trx_wait_binlog_name, trx_wait_binlog_pos););
|
||||
|
||||
/* The only known reason for a missing entry at this point is if
|
||||
* semi-sync was turned off then on, so on debug builds, we track
|
||||
* the number of times semi-sync turned off at binlogging, and compare
|
||||
* to the current value. */
|
||||
DBUG_ASSERT(rpl_semi_sync_master_off_times > thd->expected_semi_sync_offs);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/* Let us update the info about the minimum binlog position of waiting
|
||||
* threads.
|
||||
*/
|
||||
|
@ -968,10 +991,6 @@ int Repl_semi_sync_master::commit_trx(const char *trx_wait_binlog_name,
|
|||
m_wait_timeout,
|
||||
m_wait_file_name, (ulong)m_wait_file_pos));
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
m_active_tranxs->assert_thd_is_waiter(thd, trx_wait_binlog_name,
|
||||
trx_wait_binlog_pos);
|
||||
#endif
|
||||
create_timeout(&abstime, &start_ts);
|
||||
wait_result= mysql_cond_timedwait(&thd->COND_wakeup_ready, &LOCK_binlog,
|
||||
&abstime);
|
||||
|
@ -1307,6 +1326,10 @@ int Repl_semi_sync_master::write_tranx_in_binlog(THD *thd,
|
|||
else
|
||||
{
|
||||
rpl_semi_sync_master_request_ack++;
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
thd->expected_semi_sync_offs= rpl_semi_sync_master_off_times;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -377,14 +377,12 @@ public:
|
|||
*/
|
||||
void unlink_thd_as_waiter(const char *log_file_name, my_off_t log_file_pos);
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
/* Uses DBUG_ASSERT statements to ensure that the argument thd_to_check
|
||||
* matches the thread of the respective Tranx_node::thd of the passed in
|
||||
* log_file_name and log_file_pos.
|
||||
*/
|
||||
void assert_thd_is_waiter(THD *thd_to_check, const char *log_file_name,
|
||||
my_off_t log_file_pos);
|
||||
#endif
|
||||
bool is_thd_waiter(THD *thd_to_check, const char *log_file_name,
|
||||
my_off_t log_file_pos);
|
||||
|
||||
/* Given a position, check to see whether the position is an active
|
||||
* transaction's ending position by probing the hash table.
|
||||
|
|
|
@ -851,6 +851,11 @@ THD::THD(my_thread_id id, bool is_wsrep_applier)
|
|||
query_id= 0;
|
||||
query_name_consts= 0;
|
||||
semisync_info= 0;
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
expected_semi_sync_offs= 0;
|
||||
#endif
|
||||
|
||||
db_charset= global_system_variables.collation_database;
|
||||
bzero((void*) ha_data, sizeof(ha_data));
|
||||
mysys_var=0;
|
||||
|
|
|
@ -2922,6 +2922,17 @@ public:
|
|||
|
||||
/* Needed by MariaDB semi sync replication */
|
||||
Trans_binlog_info *semisync_info;
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
/*
|
||||
If Active_tranx is missing an entry for a transaction which is planning to
|
||||
await an ACK, this ensures that the reason is because semi-sync was turned
|
||||
off then on in-between the binlogging of the transaction, and before it had
|
||||
started waiting for the ACK.
|
||||
*/
|
||||
ulong expected_semi_sync_offs;
|
||||
#endif
|
||||
|
||||
/* If this is a semisync slave connection. */
|
||||
bool semi_sync_slave;
|
||||
ulonglong client_capabilities; /* What the client supports */
|
||||
|
|
Loading…
Add table
Reference in a new issue