MDEV-28709 unexpected X lock on Supremum in READ COMMITTED

The lock is created during page splitting after moving records and
locks(lock_move_rec_list_(start|end)()) to the new page, and inheriting
the locks to the supremum of left page from the successor of the infimum
on right page.

There is no need in such inheritance for READ COMMITTED isolation level
and not-gap locks, so the fix is to add the corresponding condition in
gap lock inheritance function.

One more fix is to forbid gap lock inheritance if XA was prepared. Use the
most significant bit of trx_t::n_ref to indicate that gap lock inheritance
is forbidden. This fix is based on
mysql/mysql-server@b063e52a83
This commit is contained in:
Vlad Lesin 2022-06-29 17:03:56 +03:00
parent ce2825a867
commit 8128a46827
5 changed files with 208 additions and 58 deletions

View file

@ -0,0 +1,34 @@
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
REPLACE INTO t VALUES(30, 1);
connect con1,localhost,root;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
connection default;
XA END '1';
XA PREPARE '1';
connection default;
XA COMMIT '1';
connection con1;
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
disconnect con1;
connection default;
SET DEBUG_SYNC= "RESET";
DROP TABLE t;

View file

@ -0,0 +1,76 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/count_sessions.inc
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
--disable_query_log
SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
--enable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
# We need the following sync point because mysql_insert() resets
# trx->duplicates with the following condition:
#
# if (duplic == DUP_REPLACE &&
# (!table->triggers || !table->triggers->has_delete_triggers()))
# table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
#
# and ha_innobase::extra() resets trx_t::duplicates, but we need
# lock_update_split_right() to be invoked when trx->duplicates is set to
# repeat the bug. So the transaction will hang just after
# row_insert_for_mysql() call until another transaction inserts new row and
# splits the page.
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
--send REPLACE INTO t VALUES(30, 1)
connect (con1,localhost,root);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
# The following statement will cause page split and (20, ...) will be split
# record. As the previous REPLACE set non-gap X-lock on it,
# lock_update_split_right() and lock_rec_inherit_to_gap() will 'inherit' the
# lock from the very first (20, ...) new right page record to the supremum of
# the old left page, what should not be for READ COMMITTED isolation level
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
--connection default
--reap
XA END '1';
# This will cause the assertion failure, because the supremum of the left page
# has X-lock.
XA PREPARE '1';
--connection default
XA COMMIT '1';
--connection con1
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
--disconnect con1
--connection default
SET DEBUG_SYNC= "RESET";
DROP TABLE t;
--disable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
--enable_query_log
--source include/wait_until_count_sessions.inc

View file

@ -646,14 +646,19 @@ struct trx_rsegs_t {
struct trx_t : ilist_node<> {
private:
/**
Count of references.
Least significant 31 bits is count of references.
We can't release the locks nor commit the transaction until this reference
is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
that it is no longer "active".
*/
Atomic_counter<int32_t> n_ref;
If the most significant bit is set this transaction should stop inheriting
(GAP)locks. Generally set to true during transaction prepare for RC or lower
isolation, if requested. Needed for replication replay where
we don't want to get blocked on GAP locks taken for protecting
concurrent unique insert or replace operation.
*/
Atomic_relaxed<uint32_t> skip_lock_inheritance_and_n_ref;
public:
@ -983,27 +988,47 @@ public:
/** Commit the transaction. */
void commit();
bool is_referenced() const { return n_ref > 0; }
bool is_referenced() const
{
return (skip_lock_inheritance_and_n_ref & ~(1U << 31)) > 0;
}
void reference()
{
#ifdef UNIV_DEBUG
auto old_n_ref=
#endif
n_ref++;
ut_ad(old_n_ref >= 0);
ut_d(auto old_n_ref =)
skip_lock_inheritance_and_n_ref.fetch_add(1);
ut_ad(int32_t(old_n_ref << 1) >= 0);
}
void release_reference()
{
#ifdef UNIV_DEBUG
auto old_n_ref=
ut_d(auto old_n_ref =)
skip_lock_inheritance_and_n_ref.fetch_sub(1);
ut_ad(int32_t(old_n_ref << 1) > 0);
}
bool is_not_inheriting_locks() const
{
return skip_lock_inheritance_and_n_ref >> 31;
}
void set_skip_lock_inheritance()
{
ut_d(auto old_n_ref=) skip_lock_inheritance_and_n_ref.fetch_add(1U << 31);
ut_ad(!(old_n_ref >> 31));
}
void reset_skip_lock_inheritance()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__("lock btrl $31, %0" : : "m"(skip_lock_inheritance_and_n_ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(
reinterpret_cast<volatile long *>(&skip_lock_inheritance_and_n_ref),
31);
#else
skip_lock_inheritance_and_n_ref.fetch_and(~1U << 31);
#endif
n_ref--;
ut_ad(old_n_ref > 0);
}
/** @return whether the table has lock on
@ -1031,6 +1056,7 @@ public:
ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
ut_ad(dict_operation == TRX_DICT_OP_NONE);
ut_ad(!is_not_inheriting_locks());
}
/** @return whether this is a non-locking autocommit transaction */

View file

@ -2291,50 +2291,54 @@ lock_rec_reset_and_release_wait(
&lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
}
/*************************************************************//**
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
/** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of
the other record. Also waiting lock requests on rec are inherited as
GRANTED gap locks. */
static
void
lock_rec_inherit_to_gap(
/*====================*/
const buf_block_t* heir_block, /*!< in: block containing the
record which inherits */
const buf_block_t* block, /*!< in: block containing the
record from which inherited;
does NOT reset the locks on
this record */
ulint heir_heap_no, /*!< in: heap_no of the
inheriting record */
ulint heap_no) /*!< in: heap_no of the
donating record */
GRANTED gap locks.
@param heir_block block containing the record which inherits
@param block block containing the record from which inherited; does NOT reset
the locks on this record
@param heir_heap_no heap_no of the inheriting record
@param heap_no heap_no of the donating record
@tparam from_split true if the function is invoked from
lock_update_split_(left|right)(), in this case not-gap
locks are not inherited to supremum if transaction
isolation level less or equal to READ COMMITTED */
template <bool from_split= false>
static void lock_rec_inherit_to_gap(const buf_block_t *heir_block,
const buf_block_t *block,
ulint heir_heap_no, ulint heap_no)
{
lock_t* lock;
ut_ad(lock_mutex_own());
ut_ad(!from_split || heir_heap_no == PAGE_HEAP_NO_SUPREMUM);
ut_ad(lock_mutex_own());
/* At READ UNCOMMITTED or READ COMMITTED isolation level,
we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */
/* At READ UNCOMMITTED or READ COMMITTED isolation level,
we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */
for (lock_t *lock= lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
lock != NULL; lock= lock_rec_get_next(heap_no, lock))
{
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (!lock_rec_get_insert_intention(lock)
&& (lock->trx->isolation_level > TRX_ISO_READ_COMMITTED
|| lock_get_mode(lock) !=
(lock->trx->duplicates ? LOCK_S : LOCK_X))) {
lock_rec_add_to_queue(
LOCK_REC | LOCK_GAP | lock_get_mode(lock),
heir_block, heir_heap_no, lock->index,
lock->trx, FALSE);
}
}
if (!lock->trx->is_not_inheriting_locks() &&
!lock_rec_get_insert_intention(lock) &&
(lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
/* When we are in a page split (not purge), then we don't set a lock
on supremum if the donor lock type is LOCK_REC_NOT_GAP. That is, do
not create bogus gap locks for non-gap locks for READ UNCOMMITTED and
READ COMMITTED isolation levels. LOCK_ORDINARY and
LOCK_GAP require a gap before the record to be locked, that is why
setting lock on supremmum is necessary. */
((!from_split || !lock->is_record_not_gap()) &&
(lock_get_mode(lock) != (lock->trx->duplicates ? LOCK_S : LOCK_X)))))
{
lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock),
heir_block, heir_heap_no, lock->index, lock->trx,
FALSE);
}
}
}
/*************************************************************//**
@ -2361,7 +2365,8 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
if (!lock_rec_get_insert_intention(lock)
if (!lock->trx->is_not_inheriting_locks()
&& !lock_rec_get_insert_intention(lock)
&& (heap_no == PAGE_HEAP_NO_SUPREMUM
|| !lock_rec_get_rec_not_gap(lock))) {
@ -2943,7 +2948,7 @@ lock_update_split_right(
/* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */
lock_rec_inherit_to_gap(left_block, right_block,
lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit();
@ -3063,7 +3068,7 @@ lock_update_split_left(
/* Inherit the locks to the supremum of the left page from the
successor of the infimum on the right page */
lock_rec_inherit_to_gap(left_block, right_block,
lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit();
@ -4251,6 +4256,11 @@ void lock_release_on_prepare(trx_t *trx)
{
ut_ad(trx->dict_operation ||
lock->index->table->id >= DICT_HDR_FIRST_ID);
ut_ad(lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
/* Insert-intention lock is valid for supremum for isolation
level > TRX_ISO_READ_COMMITTED */
lock_get_mode(lock) == LOCK_X ||
!lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM));
retain_lock:
lock= UT_LIST_GET_PREV(trx_locks, lock);
continue;
@ -4287,6 +4297,8 @@ retain_lock:
}
lock_mutex_exit();
trx->set_skip_lock_inheritance();
}
/* True if a lock mode is S or X */

View file

@ -412,7 +412,8 @@ void trx_t::free()
mod_tables.clear();
MEM_NOACCESS(&n_ref, sizeof n_ref);
MEM_NOACCESS(&skip_lock_inheritance_and_n_ref,
sizeof skip_lock_inheritance_and_n_ref);
/* do not poison mutex */
MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&state, sizeof state);
@ -518,6 +519,7 @@ inline void trx_t::release_locks()
}
lock.table_locks.clear();
reset_skip_lock_inheritance();
}
/** At shutdown, frees a transaction object. */