mirror of
https://github.com/MariaDB/server.git
synced 2025-02-07 06:12:18 +01:00
![Vlad Lesin](/assets/img/avatar_default.png)
lock_rec_unlock_unmodified() is executed either under lock_sys.wr_lock() or under a combination of lock_sys.rd_lock() + record locks hash table cell latch. It also requests page latch to check if locked records were changed by the current transaction or not. Usually InnoDB requests page latch to find the certain record on the page, and then requests lock_sys and/or record lock hash cell latch to request record lock. lock_rec_unlock_unmodified() requests the latches in the opposite order, what causes deadlocks. One of the possible scenario for the deadlock is the following: thread 1 - lock_rec_unlock_unmodified() is invoked under locks hash table cell latch, the latch is acquired; thread 2 - purge thread acquires page latch and tries to remove delete-marked record, it invokes lock_update_delete(), which requests locks hash table cell latch, held by thread 1; thread 1 - requests page latch, held by thread 2. To fix it we need to release lock_sys.latch and/or lock hash cell latch, acquire page latch and re-acquire lock_sys related latches. When lock_sys.latch and/or lock hash cell latch are released in lock_release_on_prepare() and lock_release_on_prepare_try(), the page on which the current lock is held, can be merged. In this case the bitmap of the current lock must be cleared, and the new lock must be added to the end of trx->lock.trx_locks list, or bitmap of already existing lock must be changed. The new field trx_lock_t::set_nth_bit_calls indicates if new locks (bits in existing lock bitmaps or new lock objects) were created during the period when lock_sys was released in trx->lock.trx_locks list iteration loop in lock_release_on_prepare() or lock_release_on_prepare_try(). And, if so, we traverse the list again. The block can be freed during pages merging, what causes assertion failure in buf_page_get_gen(), as btr_block_get() passes BUF_GET as page get mode to it. That's why page_get_mode parameter was added to btr_block_get() to pass BUF_GET_POSSIBLY_FREED from lock_release_on_prepare() and lock_release_on_prepare_try() to buf_page_get_gen(). As searching for id of trx, which modified secondary index record, is quite expensive operation, restrict its usage for master. System variable was added to remove the restriction for testing simplifying. The variable exists only either for debug build or for build with -DINNODB_ENABLE_XAP_UNLOCK_UNMODIFIED_FOR_PRIMARY option to increase the probability of catching bugs for release build with RQG. Note that the code, which does primary index lookup to find out what transaction modified secondary index record, is necessary only when there is no primary key and no unique secondary key on replica with row based replication, because only in this case extra X locks on unmodified records can be set during scan phase. Reviewed by Marko Mäkelä.
86 lines
2.6 KiB
Text
86 lines
2.6 KiB
Text
--source include/have_innodb.inc
|
|
--source include/count_sessions.inc
|
|
--source include/have_debug.inc
|
|
--source include/have_debug_sync.inc
|
|
|
|
SET @old_innodb_enable_xap_unlock_unmodified_for_primary_debug=
|
|
@@GLOBAL.innodb_enable_xap_unlock_unmodified_for_primary_debug;
|
|
SET GLOBAL innodb_enable_xap_unlock_unmodified_for_primary_debug= 1;
|
|
|
|
SET @saved_dbug = @@GLOBAL.debug_dbug;
|
|
|
|
CREATE TABLE t(id INT PRIMARY KEY) ENGINE=InnoDB STATS_PERSISTENT=0;
|
|
|
|
--let $i = 2
|
|
|
|
while ($i) {
|
|
--source include/wait_all_purged.inc
|
|
|
|
INSERT INTO t VALUES (10), (20), (30);
|
|
|
|
--connect(prevent_purge,localhost,root,,)
|
|
start transaction with consistent snapshot;
|
|
|
|
--connection default
|
|
DELETE FROM t WHERE id = 20;
|
|
|
|
SET @@GLOBAL.debug_dbug=
|
|
"+d,enable_row_purge_remove_clust_if_poss_low_sync_point";
|
|
|
|
# Cover both lock_release_on_prepare() and lock_release_on_prepare_try()
|
|
# functions
|
|
if ($i == 1) {
|
|
SET @@GLOBAL.debug_dbug="+d,skip_lock_release_on_prepare_try";
|
|
}
|
|
|
|
XA START '1';
|
|
UPDATE t SET id=40 WHERE id=30;
|
|
XA END '1';
|
|
|
|
--connection prevent_purge
|
|
COMMIT;
|
|
|
|
# stop purge worker after it requested page X-latch, but before
|
|
# lock_update_delete() call
|
|
SET DEBUG_SYNC=
|
|
'now WAIT_FOR row_purge_remove_clust_if_poss_low_before_delete';
|
|
SET @@GLOBAL.debug_dbug=
|
|
"-d,enable_row_purge_remove_clust_if_poss_low_sync_point";
|
|
|
|
--connection default
|
|
# lock_rec_unlock_unmodified() is executed either under lock_sys.wr_lock() or
|
|
# under a combination of lock_sys.rd_lock() + record locks hash table cell
|
|
# latch. Stop it before page latch request.
|
|
SET DEBUG_SYNC=
|
|
"lock_rec_unlock_unmodified_start SIGNAL lock_sys_latched WAIT_FOR cont";
|
|
--send XA PREPARE '1'
|
|
|
|
--connection prevent_purge
|
|
# let purge thread to continue execution and invoke lock_update_delete(),
|
|
# which, in turns, requests locks_sys related latches.
|
|
SET DEBUG_SYNC= 'now SIGNAL row_purge_remove_clust_if_poss_low_cont';
|
|
SET DEBUG_SYNC= 'now SIGNAL cont';
|
|
--disconnect prevent_purge
|
|
|
|
--connection default
|
|
# deadlock if the bug is not fixed, as lock_rec_unlock_unmodified() requests
|
|
# page latch acquired by purge worker, and the purge worker requests lock_sys
|
|
# related latches in lock_update_delete() call, acquired by the current XA
|
|
# in lock_rec_unlock_unmodified() caller.
|
|
--reap
|
|
XA COMMIT '1';
|
|
|
|
SET DEBUG_SYNC="RESET";
|
|
|
|
TRUNCATE TABLE t;
|
|
|
|
--dec $i
|
|
}
|
|
|
|
SET @@GLOBAL.debug_dbug = @saved_dbug;
|
|
DROP TABLE t;
|
|
|
|
SET GLOBAL innodb_enable_xap_unlock_unmodified_for_primary_debug=
|
|
@old_innodb_enable_xap_unlock_unmodified_for_primary_debug;
|
|
|
|
--source include/wait_until_count_sessions.inc
|