MDEV-27850: MTR tests can hang due to DEBUG_SYNC race condition

DEBUG_SYNC signals can get lost in certain tests due to later
DEBUG_SYNC commands overwriting them. This patch addresses
these issues in three tests: main.query_cache_debug,
main.partition_debug_sync, and
rpl.rpl_dump_request_retry_warning.

Additionally, main.partition_debug_sync needed changes to the
result file (the others did not). The synchronization happened
between two commands, one based on ALTER, the other on DROP.
A new thread/connection was needed to synchronize the DEBUG_SYNC
actions between these commands, thereby changing the result file.
Additional comments were added for clarification.

Reviewed By:
============
Andrei Elkin <andrei.elkin@mariadb.com>
This commit is contained in:
Brandon Nesterenko 2022-02-22 08:54:54 -07:00
parent ed691eca99
commit 905baa646d
4 changed files with 40 additions and 3 deletions

View file

@ -53,9 +53,14 @@ SET DEBUG_SYNC= 'alter_table_before_rename_result_table WAIT_FOR delete_done';
ALTER TABLE t2 REMOVE PARTITIONING;
connection default;
SET DEBUG_SYNC= 'now WAIT_FOR removing_partitions';
SET DEBUG_SYNC= 'rm_table_no_locks_before_delete_table SIGNAL waiting_for_alter';
SET DEBUG_SYNC= 'rm_table_no_locks_before_delete_table SIGNAL waiting_for_alter WAIT_FOR mtr_continue';
SET DEBUG_SYNC= 'rm_table_no_locks_before_binlog SIGNAL delete_done';
DROP TABLE IF EXISTS t2;
# Create a new connection thread to orchestrate the ALTER and DROP
# threads' DEBUG_SYNC points to avoid lost signals
connect con2,localhost,root,,;
SET DEBUG_SYNC= 'now SIGNAL mtr_continue';
connection default;
connection con1;
ERROR 42S02: Table 'test.t2' doesn't exist
SET DEBUG_SYNC= 'RESET';

View file

@ -43,6 +43,12 @@ START SLAVE;
SET DEBUG_SYNC= 'now WAIT_FOR parked';
SET @@GLOBAL.debug_dbug = @saved_dbug;
SET DEBUG_SYNC= 'now SIGNAL continue';
# Ensure the last DEBUG_SYNC signal was received by the target thread before
# reset; otherwise, the reset can drop the last signal before it gets
# acknowledged
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "%debug%";
source include/wait_condition.inc;
SET DEBUG_SYNC= 'RESET';
--source include/wait_for_slave_io_to_start.inc

View file

@ -66,12 +66,27 @@ SET DEBUG_SYNC= 'alter_table_before_rename_result_table WAIT_FOR delete_done';
--send ALTER TABLE t2 REMOVE PARTITIONING
connection default;
SET DEBUG_SYNC= 'now WAIT_FOR removing_partitions';
SET DEBUG_SYNC= 'rm_table_no_locks_before_delete_table SIGNAL waiting_for_alter';
SET DEBUG_SYNC= 'rm_table_no_locks_before_delete_table SIGNAL waiting_for_alter WAIT_FOR mtr_continue';
SET DEBUG_SYNC= 'rm_table_no_locks_before_binlog SIGNAL delete_done';
DROP TABLE IF EXISTS t2;
--send DROP TABLE IF EXISTS t2
--echo # Create a new connection thread to orchestrate the ALTER and DROP
--echo # threads' DEBUG_SYNC points to avoid lost signals
connect(con2,localhost,root,,);
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "debug sync point: alter_table_before_open_tables";
source include/wait_condition.inc;
SET DEBUG_SYNC= 'now SIGNAL mtr_continue';
# Return execution for DROP TABLE
connection default;
--reap
# Return execution for ALTER TABLE
connection con1;
--error ER_NO_SUCH_TABLE
--reap
SET DEBUG_SYNC= 'RESET';
disconnect con1;
--source include/wait_until_disconnected.inc

View file

@ -197,8 +197,17 @@ SET DEBUG_SYNC="now WAIT_FOR parked1_2";
--echo ** and finally release the mutex. The threads will continue to wait
--echo ** until a broadcast signal reaches them causing both threads to
--echo ** come alive and check the condition.
# Before sending signals back-to-back, we have to ensure the previous signal
# was received
let $wait_condition= select count(*)=3 from information_schema.processlist where state like "%debug%";
source include/wait_condition.inc;
SET DEBUG_SYNC="now SIGNAL go2";
let $wait_condition= select count(*)=2 from information_schema.processlist where state like "%debug%";
source include/wait_condition.inc;
SET DEBUG_SYNC="now SIGNAL go3";
let $wait_condition= select count(*)=1 from information_schema.processlist where state like "%debug%";
source include/wait_condition.inc;
--echo **
--echo ** Finally signal the DELETE statement on THD1 one last time.
@ -208,6 +217,8 @@ SET DEBUG_SYNC="now SIGNAL go3";
--echo ** invalidations and a broadcast signal will be sent to the thread
--echo ** group holding result set writers.
SET DEBUG_SYNC="now SIGNAL go1_2";
let $wait_condition= select count(*)=0 from information_schema.processlist where state like "%debug%";
source include/wait_condition.inc;
--echo **
--echo *************************************************************************