MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock

The code was backported from 10.6 bd03c0e516
commit. See that commit message for details.

Apart from the above commit trx_lock_t::wait_trx was also backported from
MDEV-24738. trx_lock_t::wait_trx is protected with lock_sys.wait_mutex
in 10.6, but that mutex was implemented only in MDEV-24789. As there is no
need to backport MDEV-24789 for MDEV-27025,
trx_lock_t::wait_trx is protected with the same mutexes as
trx_lock_t::wait_lock.

This fix should not break innodb-lock-schedule-algorithm=VATS. This
algorithm uses an Eldest-Transaction-First (ETF) heuristic, which prefers
older transactions over new ones. In this fix we just insert granted lock
just before the last granted lock of the same transaction, what does not
change transactions execution order.

The changes in lock_rec_create_low() should not break Galera Cluster,
there is a big "if" branch for WSREP. This branch is necessary to provide
the correct transactions execution order, and should not be changed for
the current bug fix.
This commit is contained in:
Vlad Lesin 2022-01-11 18:19:39 +03:00
parent e44439ab73
commit be8113861c
12 changed files with 408 additions and 199 deletions

View file

@ -0,0 +1,27 @@
#
# MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
#
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
connect prevent_purge,localhost,root,,;
start transaction with consistent snapshot;
connection default;
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
connect con_ins,localhost,root,,;
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
INSERT INTO t VALUES(10, 20);
connect con_del,localhost,root,,;
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
DELETE FROM t WHERE b = 20;
connection default;
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
connection con_ins;
disconnect con_ins;
connection con_del;
disconnect con_del;
disconnect prevent_purge;
connection default;
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;

View file

@ -0,0 +1,60 @@
--source include/have_innodb.inc
--source include/count_sessions.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--echo #
--echo # MDEV-27025 insert-intention lock conflicts with waiting ORDINARY lock
--echo #
# The test checks the ability to acquire exclusive record lock if the acquiring
# transaction already holds a shared lock on the record and another transaction
# is waiting for a lock.
CREATE TABLE t (a INT PRIMARY KEY, b INT NOT NULL UNIQUE) ENGINE=InnoDB;
--connect(prevent_purge,localhost,root,,)
start transaction with consistent snapshot;
--connection default
INSERT INTO t VALUES (20,20);
DELETE FROM t WHERE b = 20;
--connect(con_ins,localhost,root,,)
SET DEBUG_SYNC = 'row_ins_sec_index_entry_dup_locks_created SIGNAL ins_set_locks WAIT_FOR ins_cont';
send
INSERT INTO t VALUES(10, 20);
--connect(con_del,localhost,root,,)
SET DEBUG_SYNC = 'now WAIT_FOR ins_set_locks';
SET DEBUG_SYNC = 'lock_wait_suspend_thread_enter SIGNAL del_locked';
###############################################################################
# This DELETE creates waiting ORDINARY X-lock for heap_no 2 as the record is
# delete-marked, this lock conflicts with ORDINARY S-lock set by the the last
# INSERT. After the last INSERT creates insert-intention lock on
# heap_no 2, this lock will conflict with waiting ORDINARY X-lock of this
# DELETE, what causes DEADLOCK error for this DELETE.
###############################################################################
send
DELETE FROM t WHERE b = 20;
--connection default
SET DEBUG_SYNC = 'now WAIT_FOR del_locked';
SET DEBUG_SYNC = 'now SIGNAL ins_cont';
--connection con_ins
--reap
--disconnect con_ins
--connection con_del
# Without the fix, ER_LOCK_DEADLOCK would be reported here.
--reap
--disconnect con_del
--disconnect prevent_purge
--connection default
SET DEBUG_SYNC = 'RESET';
DROP TABLE t;
--source include/wait_until_count_sessions.inc

View file

@ -283,7 +283,6 @@ connection default;
update t1 set b = 'foo';
connection con1;
update t1 set a = 'bar';
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
disconnect con1;
connection default;
drop table t1;

View file

@ -186,7 +186,9 @@ send update t1 set b = 'foo';
connection con1;
let $wait_condition= select count(*) from information_schema.innodb_lock_waits;
source include/wait_condition.inc;
error ER_LOCK_DEADLOCK;
# There must no be DEADLOCK here as con1 transaction already holds locks, and
# default's transaction lock is waiting, so the locks of the following "UPDATE"
# must not conflict with waiting lock.
update t1 set a = 'bar';
disconnect con1;
connection default;

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2018, 2020, MariaDB Corporation.
Copyright (c) 2018, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -28,8 +28,29 @@ Created 5/20/1997 Heikki Tuuri
#include "ut0rnd.h"
struct hash_table_t;
struct hash_cell_t{
void* node; /*!< hash chain node, NULL if none */
struct hash_cell_t
{
/** singly-linked, nullptr terminated list of hash buckets */
void *node;
/** Insert an element after another.
@tparam T type of the element
@param after the element after which to insert
@param insert the being-inserted element
@param next the next-element pointer in T */
template<typename T>
void insert_after(T &after, T &insert, T *T::*next)
{
#ifdef UNIV_DEBUG
for (const T *c= static_cast<const T*>(node); c; c= c->*next)
if (c == &after)
goto found;
ut_error;
found:
#endif
insert.*next= after.*next;
after.*next= &insert;
}
};
typedef void* hash_node_t;

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2021, MariaDB Corporation.
Copyright (c) 2017, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -832,26 +832,29 @@ public:
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
@param[in] c_lock conflicting lock
@param[in] thr thread owning trx
@param[in] type_mode lock mode and wait flag, type is ignored and replaced by
LOCK_REC
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in,out] trx transaction
@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
UNIV_INLINE
lock_t*
lock_rec_create(
/*============*/
lock_t *lock_rec_create(lock_t *c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
que_thr_t *thr,
#endif
unsigned type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in,out: transaction */
bool caller_owns_trx_mutex);
/*!< in: true if caller owns
trx mutex */
unsigned type_mode, const buf_block_t *block,
ulint heap_no, dict_index_t *index, trx_t *trx,
bool caller_owns_trx_mutex,
bool insert_before_waiting= false);
/*************************************************************//**
Removes a record lock request, waiting or granted, from the queue. */
@ -864,6 +867,7 @@ lock_rec_discard(
/** Create a new record lock and inserts it to the lock queue,
without checking for deadlocks or conflicts.
@param[in] c_lock conflicting lock
@param[in] type_mode lock mode and wait flag; type will be replaced
with LOCK_REC
@param[in] page_id index page number
@ -872,11 +876,15 @@ without checking for deadlocks or conflicts.
@param[in] index the index tree
@param[in,out] trx transaction
@param[in] holds_trx_mutex whether the caller holds trx->mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
lock_t*
lock_rec_create_low(
lock_t* c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode,
@ -885,7 +893,9 @@ lock_rec_create_low(
ulint heap_no,
dict_index_t* index,
trx_t* trx,
bool holds_trx_mutex);
bool holds_trx_mutex,
bool insert_before_waiting = false);
/** Enqueue a waiting request for a lock which cannot be granted immediately.
Check for deadlocks.
@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
@ -906,9 +916,7 @@ Check for deadlocks.
(or it happened to commit) */
dberr_t
lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
#endif
unsigned type_mode,
const buf_block_t* block,
ulint heap_no,

View file

@ -72,32 +72,35 @@ lock_hash_get(
/*********************************************************************//**
Creates a new record lock and inserts it to the lock queue. Does NOT check
for deadlocks or lock compatibility!
@param[in] c_lock conflicting lock
@param[in] thr thread owning trx
@param[in] type_mode lock mode and wait flag, type is ignored and replaced by
LOCK_REC
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in,out] trx transaction
@param[in] caller_owns_trx_mutex TRUE if caller owns trx mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
UNIV_INLINE
lock_t*
lock_rec_create(
/*============*/
lock_t *lock_rec_create(lock_t *c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
que_thr_t *thr,
#endif
unsigned type_mode,/*!< in: lock mode and wait
flag, type is ignored and
replaced by LOCK_REC */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in,out: transaction */
bool caller_owns_trx_mutex)
/*!< in: TRUE if caller owns
trx mutex */
unsigned type_mode, const buf_block_t *block,
ulint heap_no, dict_index_t *index, trx_t *trx,
bool caller_owns_trx_mutex,
bool insert_before_waiting)
{
btr_assert_not_corrupted(block, index);
return lock_rec_create_low(
return lock_rec_create_low(c_lock,
#ifdef WITH_WSREP
c_lock, thr,
thr,
#endif
type_mode, block->page.id(), block->frame, heap_no,
index, trx, caller_owns_trx_mutex);
index, trx, caller_owns_trx_mutex, insert_before_waiting);
}

View file

@ -562,14 +562,13 @@ lock_rec_get_next_const(
/*********************************************************************//**
Gets the first explicit lock request on a record.
@param[in] hash hash chain the lock on
@param[in] page_id page id
@param[in] heap_no heap number of the record
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
hash_table_t* hash, /*!< in: hash chain the lock on */
const buf_block_t* block, /*!< in: block containing the record */
ulint heap_no);/*!< in: heap number of the record */
lock_t *lock_rec_get_first(hash_table_t *hash, page_id_t page_id,
ulint heap_no);
/*********************************************************************//**
Gets the mode of a lock.
@ -623,15 +622,26 @@ lock_table_has(
/** Set the wait status of a lock.
@param[in,out] lock lock that will be waited for
@param[in,out] trx transaction that will wait for the lock */
inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx)
@param[in,out] trx transaction that will wait for the lock
@param[in] c_lock conflicting lock */
inline void lock_set_lock_and_trx_wait(lock_t* lock, trx_t* trx,
const lock_t *c_lock)
{
ut_ad(lock);
ut_ad(lock->trx == trx);
ut_ad(trx->lock.wait_lock == NULL);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
if (trx->lock.wait_trx) {
ut_ad(!c_lock || trx->lock.wait_trx == c_lock->trx);
ut_ad(trx->lock.wait_lock);
ut_ad((*trx->lock.wait_lock).trx == trx);
} else {
ut_ad(c_lock);
trx->lock.wait_trx = c_lock->trx;
ut_ad(!trx->lock.wait_lock);
}
trx->lock.wait_lock = lock;
lock->type_mode |= LOCK_WAIT;
}
@ -644,6 +654,7 @@ inline void lock_reset_lock_and_trx_wait(lock_t* lock)
ut_ad(lock_mutex_own());
ut_ad(lock->trx->lock.wait_lock == NULL
|| lock->trx->lock.wait_lock == lock);
lock->trx->lock.wait_trx= nullptr;
lock->trx->lock.wait_lock = NULL;
lock->type_mode &= ~LOCK_WAIT;
}

View file

@ -156,16 +156,15 @@ lock_rec_get_next_const(
/*********************************************************************//**
Gets the first explicit lock request on a record.
@return first lock, NULL if none exists */
@param[in] hash hash chain the lock on
@param[in] page_id page id
@param[in] heap_no heap number of the record
@return first lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_first(
/*===============*/
hash_table_t* hash, /*!< in: hash chain the lock on */
const buf_block_t* block, /*!< in: block containing the record */
ulint heap_no)/*!< in: heap number of the record */
lock_t *lock_rec_get_first(hash_table_t *hash, page_id_t page_id,
ulint heap_no)
{
for (lock_t *lock= lock_sys.get_first(*hash, block->page.id());
for (lock_t *lock= lock_sys.get_first(*hash, page_id);
lock; lock= lock_rec_get_next_on_page(lock))
if (lock_rec_get_nth_bit(lock, heap_no))
return lock;

View file

@ -426,7 +426,9 @@ struct trx_lock_t {
trx_que_t que_state; /*!< valid when trx->state
== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
TRX_QUE_LOCK_WAIT, ... */
/** Transaction being waited for; protected by the same mutexes as
wait_lock */
trx_t* wait_trx;
lock_t* wait_lock; /*!< if trx execution state is
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2021, MariaDB Corporation.
Copyright (c) 2014, 2022, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -970,7 +970,8 @@ lock_rec_has_expl(
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@ -1024,7 +1025,7 @@ lock_rec_other_has_expl_req(
}
for (lock_t* lock = lock_rec_get_first(&lock_sys.rec_hash,
block, heap_no);
block->page.id(), heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@ -1075,19 +1076,18 @@ static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock)
/*********************************************************************//**
Checks if some other transaction has a conflicting explicit lock request
in the queue, so that we have to wait.
@param[in] mode LOCK_S or LOCK_X, possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP,
LOCK_INSERT_INTENTION
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] trx our transaction
@param[out] was_ignored true if conflicting locks waiting for the current
transaction were ignored
@return lock or NULL */
static
lock_t*
lock_rec_other_has_conflicting(
/*===========================*/
unsigned mode, /*!< in: LOCK_S or LOCK_X,
possibly ORed to LOCK_GAP or
LOC_REC_NOT_GAP,
LOCK_INSERT_INTENTION */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
const trx_t* trx) /*!< in: our transaction */
static lock_t *lock_rec_other_has_conflicting(unsigned mode,
const buf_block_t *block,
ulint heap_no, const trx_t *trx,
bool *was_ignored= nullptr)
{
lock_t* lock;
@ -1095,10 +1095,21 @@ lock_rec_other_has_conflicting(
bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
/* There can't be lock loops for one record, because
all waiting locks of the record will always wait for the same
lock of the record in a cell array, and check for
conflicting lock will always start with the first lock for the
heap_no, and go ahead with the same order(the order of the
locks in the cell array) */
if (lock_get_wait(lock) && lock->trx->lock.wait_trx == trx) {
if (was_ignored) *was_ignored= true;
continue;
}
if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
#ifdef WITH_WSREP
if (trx->is_wsrep()) {
@ -1255,6 +1266,7 @@ static void check_trx_state(const trx_t *trx)
/** Create a new record lock and inserts it to the lock queue,
without checking for deadlocks or conflicts.
@param[in] c_lock conflicting lock
@param[in] type_mode lock mode and wait flag; type will be replaced
with LOCK_REC
@param[in] page_id index page number
@ -1263,11 +1275,15 @@ without checking for deadlocks or conflicts.
@param[in] index the index tree
@param[in,out] trx transaction
@param[in] holds_trx_mutex whether the caller holds trx->mutex
@param[in] insert_before_waiting if true, inserts new B-tree record lock
just after the last non-waiting lock of the current transaction which is
located before the first waiting for the current transaction lock, otherwise
the lock is inserted at the end of the queue
@return created lock */
lock_t*
lock_rec_create_low(
lock_t* c_lock,
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
que_thr_t* thr, /*!< thread owning trx */
#endif
unsigned type_mode,
@ -1276,7 +1292,8 @@ lock_rec_create_low(
ulint heap_no,
dict_index_t* index,
trx_t* trx,
bool holds_trx_mutex)
bool holds_trx_mutex,
bool insert_before_waiting)
{
lock_t* lock;
ulint n_bits;
@ -1385,7 +1402,7 @@ lock_rec_create_low(
}
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
lock_set_lock_and_trx_wait(lock, trx);
lock_set_lock_and_trx_wait(lock, trx, c_lock);
UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
trx->lock.wait_thr = thr;
@ -1413,15 +1430,46 @@ lock_rec_create_low(
trx_mutex_exit(c_lock->trx);
} else
#endif /* WITH_WSREP */
if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE))
&& innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !thd_is_replication_slave_thread(trx->mysql_thd)) {
HASH_PREPEND(lock_t, hash, &lock_sys.rec_hash,
page_id.fold(), lock);
} else {
HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
page_id.fold(), lock);
if (insert_before_waiting
&& !(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE))) {
/* Try to insert the lock just after the last non-waiting
lock of the current transaction which immediately
precedes the first waiting lock request. */
hash_table_t *lock_hash = lock_hash_get(type_mode);
hash_cell_t& cell = lock_hash->array[lock_hash->calc_hash(
page_id.fold())];
lock_t* last_non_waiting = nullptr;
for (lock_t* l = lock_rec_get_first(lock_hash, page_id,
heap_no); l; l = lock_rec_get_next(heap_no, l)) {
if (lock_get_wait(lock)
&& l->trx->lock.wait_trx == trx) {
break;
}
if (l->trx == trx) {
last_non_waiting = l;
}
}
if (!last_non_waiting) {
goto append_last;
}
cell.insert_after(*last_non_waiting, *lock, &lock_t::hash);
}
else {
append_last:
if (!(type_mode & (LOCK_WAIT | LOCK_PREDICATE | LOCK_PRDT_PAGE))
&& innodb_lock_schedule_algorithm
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
&& !thd_is_replication_slave_thread(trx->mysql_thd)) {
HASH_PREPEND(lock_t, hash, &lock_sys.rec_hash,
page_id.fold(), lock);
} else {
HASH_INSERT(lock_t, hash, lock_hash_get(type_mode),
page_id.fold(), lock);
}
}
if (!holds_trx_mutex) {
@ -1429,7 +1477,7 @@ lock_rec_create_low(
}
ut_ad(trx_mutex_own(trx));
if (type_mode & LOCK_WAIT) {
lock_set_lock_and_trx_wait(lock, trx);
lock_set_lock_and_trx_wait(lock, trx, c_lock);
}
UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
if (!holds_trx_mutex) {
@ -1576,6 +1624,7 @@ lock_rec_insert_to_head(
/** Enqueue a waiting request for a lock which cannot be granted immediately.
Check for deadlocks.
@param[in] c_lock conflicting lock
@param[in] type_mode the requested lock mode (LOCK_S or LOCK_X)
possibly ORed with LOCK_GAP or
LOCK_REC_NOT_GAP, ORed with
@ -1594,9 +1643,7 @@ Check for deadlocks.
(or it happened to commit) */
dberr_t
lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
lock_t* c_lock, /*!< conflicting lock */
#endif
lock_t* c_lock,
unsigned type_mode,
const buf_block_t* block,
ulint heap_no,
@ -1634,9 +1681,9 @@ lock_rec_enqueue_waiting(
/* Enqueue the lock request that will wait to be granted, note that
we already own the trx mutex. */
lock_t* lock = lock_rec_create(
lock_t* lock = lock_rec_create(c_lock,
#ifdef WITH_WSREP
c_lock, thr,
thr,
#endif
type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
@ -1730,22 +1777,20 @@ on the record, and the request to be added is not a waiting request, we
can reuse a suitable record lock object already existing on the same page,
just setting the appropriate bit in its bitmap. This is a low-level function
which does NOT check for deadlocks or lock compatibility!
@param[in] type_mode lock mode, wait, gap etc. flags; type is ignored and
replaced by LOCK_REC
@param[in] block buffer block containing the record
@param[in] heap_no heap number of the record
@param[in] index index of record
@param[in/out] trx transaction
@param[in] caller_owns_trx_mutex, TRUE if caller owns the transaction mutex
@param[in] insert_before_waiting true=insert B-tree record lock right before
a waiting lock request; false=insert the lock at the end of the queue
@return lock where the bit was set */
static
void
lock_rec_add_to_queue(
/*==================*/
unsigned type_mode,/*!< in: lock mode, wait, gap
etc. flags; type is ignored
and replaced by LOCK_REC */
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
dict_index_t* index, /*!< in: index of record */
trx_t* trx, /*!< in/out: transaction */
bool caller_owns_trx_mutex)
/*!< in: TRUE if caller owns the
transaction mutex */
static void lock_rec_add_to_queue(unsigned type_mode, const buf_block_t *block,
ulint heap_no, dict_index_t *index,
trx_t *trx, bool caller_owns_trx_mutex,
bool insert_before_waiting= false)
{
#ifdef UNIV_DEBUG
ut_ad(lock_mutex_own());
@ -1834,11 +1879,16 @@ lock_rec_add_to_queue(
}
}
lock_rec_create(
/* Note: We will not pass any conflicting lock to lock_rec_create(),
because we should be moving an existing waiting lock request. */
ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL,
NULL,
#endif
type_mode, block, heap_no, index, trx, caller_owns_trx_mutex);
type_mode, block, heap_no, index, trx, caller_owns_trx_mutex,
insert_before_waiting);
}
/*********************************************************************//**
@ -1896,28 +1946,23 @@ lock_rec_lock(
/* Do nothing if the trx already has a strong enough lock on rec */
if (!lock_rec_has_expl(mode, block, heap_no, trx))
{
if (
#ifdef WITH_WSREP
lock_t *c_lock=
#endif
lock_rec_other_has_conflicting(mode, block, heap_no, trx))
bool was_ignored = false;
if (lock_t *c_lock= lock_rec_other_has_conflicting(
mode, block, heap_no, trx, &was_ignored))
{
/*
If another transaction has a non-gap conflicting
request in the queue, as this transaction does not
have a lock strong enough already granted on the
record, we have to wait. */
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
c_lock,
#endif /* WITH_WSREP */
mode, block, heap_no, index, thr, NULL);
err = lock_rec_enqueue_waiting(c_lock, mode, block, heap_no, index,
thr, NULL);
}
else if (!impl)
{
/* Set the requested lock on the record. */
lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx,
true);
true, was_ignored);
err= DB_SUCCESS_LOCKED_REC;
}
}
@ -1943,9 +1988,9 @@ lock_rec_lock(
Note that we don't own the trx mutex.
*/
if (!impl)
lock_rec_create(
lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL,
NULL,
#endif
mode, block, heap_no, index, trx, false);
@ -2168,8 +2213,17 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock)
if (!lock_get_wait(lock)) {
continue;
}
const lock_t* c = lock_rec_has_to_wait_in_queue(lock);
if (!c) {
ut_ad(lock->trx->lock.wait_trx);
ut_ad(lock->trx->lock.wait_lock);
if (const lock_t* c = lock_rec_has_to_wait_in_queue(
lock)) {
trx_mutex_enter(lock->trx);
lock->trx->lock.wait_trx = c->trx;
trx_mutex_exit(lock->trx);
}
else {
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
@ -2259,7 +2313,7 @@ lock_rec_reset_and_release_wait_low(
ut_ad(lock_mutex_own());
for (lock = lock_rec_get_first(hash, block, heap_no);
for (lock = lock_rec_get_first(hash, block->page.id(), heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@ -2321,7 +2375,8 @@ lock_rec_inherit_to_gap(
DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@ -2357,7 +2412,8 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock_mutex_enter();
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
@ -2400,12 +2456,12 @@ lock_rec_move_low(
/* If the lock is predicate lock, it resides on INFIMUM record */
ut_ad(lock_rec_get_first(
lock_hash, receiver, receiver_heap_no) == NULL
lock_hash, receiver->page.id(), receiver_heap_no) == NULL
|| lock_hash == &lock_sys.prdt_hash
|| lock_hash == &lock_sys.prdt_page_hash);
for (lock = lock_rec_get_first(lock_hash,
donator, donator_heap_no);
donator->page.id(), donator_heap_no);
lock != NULL;
lock = lock_rec_get_next(donator_heap_no, lock)) {
@ -2414,7 +2470,8 @@ lock_rec_move_low(
lock_rec_reset_nth_bit(lock, donator_heap_no);
if (type_mode & LOCK_WAIT) {
lock_reset_lock_and_trx_wait(lock);
ut_ad(lock->trx->lock.wait_lock == lock);
lock->type_mode &= ~LOCK_WAIT;
}
/* Note that we FIRST reset the bit, and then set the lock:
@ -2426,7 +2483,7 @@ lock_rec_move_low(
}
ut_ad(!lock_rec_get_first(&lock_sys.rec_hash,
donator, donator_heap_no));
donator->page.id(), donator_heap_no));
}
/** Move all the granted locks to the front of the given lock list.
@ -2531,8 +2588,8 @@ lock_move_reorganize_page(
lock_rec_bitmap_reset(lock);
if (lock_get_wait(lock)) {
lock_reset_lock_and_trx_wait(lock);
ut_ad(lock->trx->lock.wait_lock == lock);
lock->type_mode&= ~LOCK_WAIT;
}
lock = lock_rec_get_next_on_page(lock);
@ -2708,7 +2765,9 @@ lock_move_rec_list_end(
ut_ad(!page_rec_is_metadata(orec));
if (type_mode & LOCK_WAIT) {
lock_reset_lock_and_trx_wait(lock);
ut_ad(lock->trx->lock.wait_lock ==
lock);
lock->type_mode&= ~LOCK_WAIT;
}
lock_rec_add_to_queue(
@ -2806,7 +2865,9 @@ lock_move_rec_list_start(
ut_ad(!page_rec_is_metadata(prev));
if (type_mode & LOCK_WAIT) {
lock_reset_lock_and_trx_wait(lock);
ut_ad(lock->trx->lock.wait_lock
== lock);
lock->type_mode&= ~LOCK_WAIT;
}
lock_rec_add_to_queue(
@ -2902,7 +2963,9 @@ lock_rtr_move_rec_list(
if (rec1_heap_no < lock->un_member.rec_lock.n_bits
&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
if (type_mode & LOCK_WAIT) {
lock_reset_lock_and_trx_wait(lock);
ut_ad(lock->trx->lock.wait_lock
== lock);
lock->type_mode&= ~LOCK_WAIT;
}
lock_rec_add_to_queue(
@ -3348,10 +3411,8 @@ lock_table_create(
in dictionary cache */
unsigned type_mode,/*!< in: lock mode possibly ORed with
LOCK_WAIT */
trx_t* trx /*!< in: trx */
#ifdef WITH_WSREP
, lock_t* c_lock = NULL /*!< in: conflicting lock */
#endif
trx_t* trx, /*!< in: trx */
lock_t* c_lock = NULL /*!< in: conflicting lock */
)
{
lock_t* lock;
@ -3434,8 +3495,7 @@ lock_table_create(
ut_list_append(table->locks, lock, TableLockGetNode());
if (type_mode & LOCK_WAIT) {
lock_set_lock_and_trx_wait(lock, trx);
lock_set_lock_and_trx_wait(lock, trx, c_lock);
}
lock->trx->lock.table_locks.push_back(lock);
@ -3590,10 +3650,8 @@ lock_table_enqueue_waiting(
unsigned mode, /*!< in: lock mode this transaction is
requesting */
dict_table_t* table, /*!< in/out: table */
que_thr_t* thr /*!< in: query thread */
#ifdef WITH_WSREP
, lock_t* c_lock /*!< in: conflicting lock or NULL */
#endif
que_thr_t* thr, /*!< in: query thread */
lock_t* c_lock /*!< in: conflicting lock or NULL */
)
{
trx_t* trx;
@ -3624,11 +3682,7 @@ lock_table_enqueue_waiting(
#endif /* WITH_WSREP */
/* Enqueue the lock request that will wait to be granted */
lock = lock_table_create(table, mode | LOCK_WAIT, trx
#ifdef WITH_WSREP
, c_lock
#endif
);
lock = lock_table_create(table, mode | LOCK_WAIT, trx, c_lock);
const trx_t* victim_trx =
DeadlockChecker::check_and_resolve(lock, trx);
@ -3784,11 +3838,7 @@ lock_table(
if (wait_for != NULL) {
err = lock_table_enqueue_waiting(flags | mode, table,
thr
#ifdef WITH_WSREP
, wait_for
#endif
);
thr, wait_for);
} else {
lock_table_create(table, flags | mode, trx);
@ -3836,7 +3886,7 @@ lock_table_ix_resurrect(
Checks if a waiting table lock request still has to wait in a queue.
@return TRUE if still has to wait */
static
bool
const lock_t*
lock_table_has_to_wait_in_queue(
/*============================*/
const lock_t* wait_lock) /*!< in: waiting table lock */
@ -3855,11 +3905,11 @@ lock_table_has_to_wait_in_queue(
if (lock_has_to_wait(wait_lock, lock)) {
return(true);
return(lock);
}
}
return(false);
return(NULL);
}
/*************************************************************//**
@ -3888,9 +3938,17 @@ lock_table_dequeue(
lock != NULL;
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
if (lock_get_wait(lock)
&& !lock_table_has_to_wait_in_queue(lock)) {
if (!lock_get_wait(lock))
continue;
ut_ad(lock->trx->lock.wait_trx);
ut_ad(lock->trx->lock.wait_lock);
if (const lock_t *c = lock_table_has_to_wait_in_queue(lock)) {
trx_mutex_enter(lock->trx);
lock->trx->lock.wait_trx = c->trx;
trx_mutex_exit(lock->trx);
} else {
/* Grant the lock */
ut_ad(in_lock->trx != lock->trx);
lock_grant(lock);
@ -4035,7 +4093,8 @@ lock_rec_unlock(
lock_mutex_enter();
trx_mutex_enter(trx);
first_lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
first_lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
/* Find the last lock with the same lock_mode and transaction
on the record. */
@ -4078,8 +4137,16 @@ released:
if (!lock_get_wait(lock)) {
continue;
}
const lock_t* c = lock_rec_has_to_wait_in_queue(lock);
if (!c) {
ut_ad(lock->trx->lock.wait_trx);
ut_ad(lock->trx->lock.wait_lock);
if (const lock_t* c = lock_rec_has_to_wait_in_queue(
lock)) {
if (lock->trx != trx)
trx_mutex_enter(lock->trx);
lock->trx->lock.wait_trx = c->trx;
if (lock->trx != trx)
trx_mutex_exit(lock->trx);
} else {
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock);
@ -4773,7 +4840,7 @@ lock_rec_queue_validate(
if (!page_rec_is_user_rec(rec)) {
for (lock = lock_rec_get_first(&lock_sys.rec_hash,
block, heap_no);
block->page.id(), heap_no);
lock != NULL;
lock = lock_rec_get_next_const(heap_no, lock)) {
@ -4847,7 +4914,7 @@ func_exit:
wsrep_report_bf_lock_wait(impl_trx->mysql_thd, impl_trx->id);
wsrep_report_bf_lock_wait(other_lock->trx->mysql_thd, other_lock->trx->id);
if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
if (!lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP,
block, heap_no,
impl_trx)) {
ib::info() << "WSREP impl BF lock conflict";
@ -4856,7 +4923,20 @@ func_exit:
#endif /* WITH_WSREP */
{
ut_ad(lock_get_wait(other_lock));
ut_ad(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
/* After MDEV-27025 fix the following case is
possible:
1. trx 1 acquires S-lock;
2. trx 2 creates X-lock waiting for trx 1;
3. trx 1 creates implicit lock, as
lock_rec_other_has_conflicting() returns no
conflicting trx 2 X-lock, the explicit lock
will not be created;
4. trx 3 creates waiting X-lock,
it will wait for S-lock of trx 1.
That is why we relaxing the condition here and
check only for S-lock.
*/
ut_ad(lock_rec_has_expl(LOCK_S | LOCK_REC_NOT_GAP,
block, heap_no, impl_trx));
}
}
@ -4864,7 +4944,8 @@ func_exit:
mutex_exit(&impl_trx->mutex);
}
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
lock != NULL;
lock = lock_rec_get_next_const(heap_no, lock)) {
ut_ad(!lock->trx->read_only
@ -5199,7 +5280,8 @@ lock_rec_insert_check_and_lock(
BTR_NO_LOCKING_FLAG and skip the locking altogether. */
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
lock = lock_rec_get_first(&lock_sys.rec_hash, block->page.id(),
heap_no);
if (lock == NULL) {
/* We optimize CPU time usage in the simplest case */
@ -5238,19 +5320,13 @@ lock_rec_insert_check_and_lock(
const unsigned type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
if (
#ifdef WITH_WSREP
lock_t* c_lock =
#endif /* WITH_WSREP */
if (lock_t* c_lock =
lock_rec_other_has_conflicting(type_mode, block, heap_no, trx)) {
/* Note that we may get DB_SUCCESS also here! */
trx_mutex_enter(trx);
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
c_lock,
#endif /* WITH_WSREP */
type_mode, block, heap_no, index, thr, NULL);
err = lock_rec_enqueue_waiting(c_lock, type_mode, block,
heap_no, index, thr, NULL);
trx_mutex_exit(trx);
} else {
@ -5327,7 +5403,7 @@ lock_rec_convert_impl_to_expl_for_trx(
&& !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, trx)) {
lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
block, heap_no, index, trx, true);
block, heap_no, index, trx, true, true);
}
lock_mutex_exit();

View file

@ -243,7 +243,7 @@ lock_prdt_has_lock(
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
for (lock = lock_rec_get_first(
lock_hash_get(type_mode), block, PRDT_HEAPNO);
lock_hash_get(type_mode), block->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
ut_ad(lock->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
@ -298,7 +298,7 @@ lock_prdt_other_has_conflicting(
ut_ad(lock_mutex_own());
for (lock_t* lock = lock_rec_get_first(
lock_hash_get(mode), block, PRDT_HEAPNO);
lock_hash_get(mode), block->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {
@ -489,9 +489,13 @@ lock_prdt_add_to_queue(
}
}
lock = lock_rec_create(
/* Note: We will not pass any conflicting lock to lock_rec_create(),
because we should be moving an existing waiting lock request. */
ut_ad(!(type_mode & LOCK_WAIT) || trx->lock.wait_trx);
lock = lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
type_mode, block, PRDT_HEAPNO, index, trx,
caller_owns_trx_mutex);
@ -543,7 +547,8 @@ lock_prdt_insert_check_and_lock(
lock_t* lock;
/* Only need to check locks on prdt_hash */
lock = lock_rec_get_first(&lock_sys.prdt_hash, block, PRDT_HEAPNO);
lock = lock_rec_get_first(&lock_sys.prdt_hash, block->page.id(),
PRDT_HEAPNO);
if (lock == NULL) {
lock_mutex_exit();
@ -581,9 +586,7 @@ lock_prdt_insert_check_and_lock(
trx_mutex_enter(trx);
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
LOCK_X | LOCK_PREDICATE | LOCK_INSERT_INTENTION,
block, PRDT_HEAPNO, index, thr, prdt);
@ -822,9 +825,9 @@ lock_prdt_lock(
lock_t* lock = lock_sys.get_first(hash, block->page.id());
if (lock == NULL) {
lock = lock_rec_create(
lock = lock_rec_create(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
prdt_mode, block, PRDT_HEAPNO,
index, trx, FALSE);
@ -854,10 +857,8 @@ lock_prdt_lock(
if (wait_for != NULL) {
err = lock_rec_enqueue_waiting(
#ifdef WITH_WSREP
NULL, /* FIXME: replicate
SPATIAL INDEX locks */
#endif
prdt_mode,
block, PRDT_HEAPNO,
index, thr, prdt);
@ -937,9 +938,9 @@ lock_place_prdt_page_lock(
}
if (lock == NULL) {
lock = lock_rec_create_low(
lock = lock_rec_create_low(NULL,
#ifdef WITH_WSREP
NULL, NULL, /* FIXME: replicate SPATIAL INDEX locks */
NULL, /* FIXME: replicate SPATIAL INDEX locks */
#endif
mode, page_id, NULL, PRDT_HEAPNO,
index, trx, FALSE);
@ -985,7 +986,7 @@ lock_prdt_rec_move(
lock_mutex_enter();
for (lock_t *lock = lock_rec_get_first(&lock_sys.prdt_hash,
donator, PRDT_HEAPNO);
donator->page.id(), PRDT_HEAPNO);
lock != NULL;
lock = lock_rec_get_next(PRDT_HEAPNO, lock)) {