MDEV-28709 unexpected X lock on Supremum in READ COMMITTED

The lock is created during page splitting after moving records and
locks(lock_move_rec_list_(start|end)()) to the new page, and inheriting
the locks to the supremum of left page from the successor of the infimum
on right page.

There is no need in such inheritance for READ COMMITTED isolation level
and not-gap locks, so the fix is to add the corresponding condition in
gap lock inheritance function.

One more fix is to forbid gap lock inheritance if XA was prepared. Use the
most significant bit of trx_t::n_ref to indicate that gap lock inheritance
is forbidden. This fix is based on
mysql/mysql-server@b063e52a83
This commit is contained in:
Vlad Lesin 2022-06-29 17:03:56 +03:00
parent ce2825a867
commit 8128a46827
5 changed files with 208 additions and 58 deletions

View file

@ -0,0 +1,34 @@
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
REPLACE INTO t VALUES(30, 1);
connect con1,localhost,root;
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
connection default;
XA END '1';
XA PREPARE '1';
connection default;
XA COMMIT '1';
connection con1;
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
disconnect con1;
connection default;
SET DEBUG_SYNC= "RESET";
DROP TABLE t;

View file

@ -0,0 +1,76 @@
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/count_sessions.inc
CREATE TABLE t (
`a` INT NOT NULL,
`b` INT NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB;
--disable_query_log
SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug;
--enable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = 3;
INSERT INTO t VALUES(10, 0);
INSERT INTO t VALUES(20, 0);
INSERT INTO t VALUES(30, 0);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '1';
REPLACE INTO t VALUES(10, 1);
REPLACE INTO t VALUES(20, 1);
# We need the following sync point because mysql_insert() resets
# trx->duplicates with the following condition:
#
# if (duplic == DUP_REPLACE &&
# (!table->triggers || !table->triggers->has_delete_triggers()))
# table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
#
# and ha_innobase::extra() resets trx_t::duplicates, but we need
# lock_update_split_right() to be invoked when trx->duplicates is set to
# repeat the bug. So the transaction will hang just after
# row_insert_for_mysql() call until another transaction inserts new row and
# splits the page.
SET DEBUG_SYNC= 'ib_after_row_insert SIGNAL inserted WAIT_FOR cont';
--send REPLACE INTO t VALUES(30, 1)
connect (con1,localhost,root);
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
XA START '2';
SET DEBUG_SYNC= 'now WAIT_FOR inserted';
# The following statement will cause page split and (20, ...) will be split
# record. As the previous REPLACE set non-gap X-lock on it,
# lock_update_split_right() and lock_rec_inherit_to_gap() will 'inherit' the
# lock from the very first (20, ...) new right page record to the supremum of
# the old left page, what should not be for READ COMMITTED isolation level
INSERT INTO t VALUES(40, 2);
SET DEBUG_SYNC= 'now SIGNAL cont';
--connection default
--reap
XA END '1';
# This will cause the assertion failure, because the supremum of the left page
# has X-lock.
XA PREPARE '1';
--connection default
XA COMMIT '1';
--connection con1
XA END '2';
XA PREPARE '2';
XA COMMIT '2';
--disconnect con1
--connection default
SET DEBUG_SYNC= "RESET";
DROP TABLE t;
--disable_query_log
SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug;
--enable_query_log
--source include/wait_until_count_sessions.inc

View file

@ -646,14 +646,19 @@ struct trx_rsegs_t {
struct trx_t : ilist_node<> { struct trx_t : ilist_node<> {
private: private:
/** /**
Count of references. Least significant 31 bits is count of references.
We can't release the locks nor commit the transaction until this reference We can't release the locks nor commit the transaction until this reference
is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
that it is no longer "active". that it is no longer "active".
*/
Atomic_counter<int32_t> n_ref; If the most significant bit is set this transaction should stop inheriting
(GAP)locks. Generally set to true during transaction prepare for RC or lower
isolation, if requested. Needed for replication replay where
we don't want to get blocked on GAP locks taken for protecting
concurrent unique insert or replace operation.
*/
Atomic_relaxed<uint32_t> skip_lock_inheritance_and_n_ref;
public: public:
@ -983,27 +988,47 @@ public:
/** Commit the transaction. */ /** Commit the transaction. */
void commit(); void commit();
bool is_referenced() const
bool is_referenced() const { return n_ref > 0; } {
return (skip_lock_inheritance_and_n_ref & ~(1U << 31)) > 0;
}
void reference() void reference()
{ {
#ifdef UNIV_DEBUG ut_d(auto old_n_ref =)
auto old_n_ref= skip_lock_inheritance_and_n_ref.fetch_add(1);
#endif ut_ad(int32_t(old_n_ref << 1) >= 0);
n_ref++;
ut_ad(old_n_ref >= 0);
} }
void release_reference() void release_reference()
{ {
#ifdef UNIV_DEBUG ut_d(auto old_n_ref =)
auto old_n_ref= skip_lock_inheritance_and_n_ref.fetch_sub(1);
ut_ad(int32_t(old_n_ref << 1) > 0);
}
bool is_not_inheriting_locks() const
{
return skip_lock_inheritance_and_n_ref >> 31;
}
void set_skip_lock_inheritance()
{
ut_d(auto old_n_ref=) skip_lock_inheritance_and_n_ref.fetch_add(1U << 31);
ut_ad(!(old_n_ref >> 31));
}
void reset_skip_lock_inheritance()
{
#if defined __GNUC__ && (defined __i386__ || defined __x86_64__)
__asm__("lock btrl $31, %0" : : "m"(skip_lock_inheritance_and_n_ref));
#elif defined _MSC_VER && (defined _M_IX86 || defined _M_X64)
_interlockedbittestandreset(
reinterpret_cast<volatile long *>(&skip_lock_inheritance_and_n_ref),
31);
#else
skip_lock_inheritance_and_n_ref.fetch_and(~1U << 31);
#endif #endif
n_ref--;
ut_ad(old_n_ref > 0);
} }
/** @return whether the table has lock on /** @return whether the table has lock on
@ -1031,6 +1056,7 @@ public:
ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks)); ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0); ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
ut_ad(dict_operation == TRX_DICT_OP_NONE); ut_ad(dict_operation == TRX_DICT_OP_NONE);
ut_ad(!is_not_inheriting_locks());
} }
/** @return whether this is a non-locking autocommit transaction */ /** @return whether this is a non-locking autocommit transaction */

View file

@ -2291,50 +2291,54 @@ lock_rec_reset_and_release_wait(
&lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM); &lock_sys.prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
} }
/*************************************************************//** /** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
of another record as gap type locks, but does not reset the lock bits of of another record as gap type locks, but does not reset the lock bits of
the other record. Also waiting lock requests on rec are inherited as the other record. Also waiting lock requests on rec are inherited as
GRANTED gap locks. */ GRANTED gap locks.
static @param heir_block block containing the record which inherits
void @param block block containing the record from which inherited; does NOT reset
lock_rec_inherit_to_gap( the locks on this record
/*====================*/ @param heir_heap_no heap_no of the inheriting record
const buf_block_t* heir_block, /*!< in: block containing the @param heap_no heap_no of the donating record
record which inherits */ @tparam from_split true if the function is invoked from
const buf_block_t* block, /*!< in: block containing the lock_update_split_(left|right)(), in this case not-gap
record from which inherited; locks are not inherited to supremum if transaction
does NOT reset the locks on isolation level less or equal to READ COMMITTED */
this record */ template <bool from_split= false>
ulint heir_heap_no, /*!< in: heap_no of the static void lock_rec_inherit_to_gap(const buf_block_t *heir_block,
inheriting record */ const buf_block_t *block,
ulint heap_no) /*!< in: heap_no of the ulint heir_heap_no, ulint heap_no)
donating record */
{ {
lock_t* lock; ut_ad(lock_mutex_own());
ut_ad(!from_split || heir_heap_no == PAGE_HEAP_NO_SUPREMUM);
ut_ad(lock_mutex_own()); /* At READ UNCOMMITTED or READ COMMITTED isolation level,
we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */
/* At READ UNCOMMITTED or READ COMMITTED isolation level, for (lock_t *lock= lock_rec_get_first(&lock_sys.rec_hash, block, heap_no);
we do not want locks set lock != NULL; lock= lock_rec_get_next(heap_no, lock))
by an UPDATE or a DELETE to be inherited as gap type locks. But we {
DO want S-locks/X-locks(taken for replace) set by a consistency
constraint to be inherited also then. */
for (lock = lock_rec_get_first(&lock_sys.rec_hash, block, heap_no); if (!lock->trx->is_not_inheriting_locks() &&
lock != NULL; !lock_rec_get_insert_intention(lock) &&
lock = lock_rec_get_next(heap_no, lock)) { (lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
/* When we are in a page split (not purge), then we don't set a lock
if (!lock_rec_get_insert_intention(lock) on supremum if the donor lock type is LOCK_REC_NOT_GAP. That is, do
&& (lock->trx->isolation_level > TRX_ISO_READ_COMMITTED not create bogus gap locks for non-gap locks for READ UNCOMMITTED and
|| lock_get_mode(lock) != READ COMMITTED isolation levels. LOCK_ORDINARY and
(lock->trx->duplicates ? LOCK_S : LOCK_X))) { LOCK_GAP require a gap before the record to be locked, that is why
lock_rec_add_to_queue( setting lock on supremmum is necessary. */
LOCK_REC | LOCK_GAP | lock_get_mode(lock), ((!from_split || !lock->is_record_not_gap()) &&
heir_block, heir_heap_no, lock->index, (lock_get_mode(lock) != (lock->trx->duplicates ? LOCK_S : LOCK_X)))))
lock->trx, FALSE); {
} lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock),
} heir_block, heir_heap_no, lock->index, lock->trx,
FALSE);
}
}
} }
/*************************************************************//** /*************************************************************//**
@ -2361,7 +2365,8 @@ lock_rec_inherit_to_gap_if_gap_lock(
lock != NULL; lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) { lock = lock_rec_get_next(heap_no, lock)) {
if (!lock_rec_get_insert_intention(lock) if (!lock->trx->is_not_inheriting_locks()
&& !lock_rec_get_insert_intention(lock)
&& (heap_no == PAGE_HEAP_NO_SUPREMUM && (heap_no == PAGE_HEAP_NO_SUPREMUM
|| !lock_rec_get_rec_not_gap(lock))) { || !lock_rec_get_rec_not_gap(lock))) {
@ -2943,7 +2948,7 @@ lock_update_split_right(
/* Inherit the locks to the supremum of left page from the successor /* Inherit the locks to the supremum of left page from the successor
of the infimum on right page */ of the infimum on right page */
lock_rec_inherit_to_gap(left_block, right_block, lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no); PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit(); lock_mutex_exit();
@ -3063,7 +3068,7 @@ lock_update_split_left(
/* Inherit the locks to the supremum of the left page from the /* Inherit the locks to the supremum of the left page from the
successor of the infimum on the right page */ successor of the infimum on the right page */
lock_rec_inherit_to_gap(left_block, right_block, lock_rec_inherit_to_gap<true>(left_block, right_block,
PAGE_HEAP_NO_SUPREMUM, heap_no); PAGE_HEAP_NO_SUPREMUM, heap_no);
lock_mutex_exit(); lock_mutex_exit();
@ -4251,6 +4256,11 @@ void lock_release_on_prepare(trx_t *trx)
{ {
ut_ad(trx->dict_operation || ut_ad(trx->dict_operation ||
lock->index->table->id >= DICT_HDR_FIRST_ID); lock->index->table->id >= DICT_HDR_FIRST_ID);
ut_ad(lock->trx->isolation_level > TRX_ISO_READ_COMMITTED ||
/* Insert-intention lock is valid for supremum for isolation
level > TRX_ISO_READ_COMMITTED */
lock_get_mode(lock) == LOCK_X ||
!lock_rec_get_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM));
retain_lock: retain_lock:
lock= UT_LIST_GET_PREV(trx_locks, lock); lock= UT_LIST_GET_PREV(trx_locks, lock);
continue; continue;
@ -4287,6 +4297,8 @@ retain_lock:
} }
lock_mutex_exit(); lock_mutex_exit();
trx->set_skip_lock_inheritance();
} }
/* True if a lock mode is S or X */ /* True if a lock mode is S or X */

View file

@ -412,7 +412,8 @@ void trx_t::free()
mod_tables.clear(); mod_tables.clear();
MEM_NOACCESS(&n_ref, sizeof n_ref); MEM_NOACCESS(&skip_lock_inheritance_and_n_ref,
sizeof skip_lock_inheritance_and_n_ref);
/* do not poison mutex */ /* do not poison mutex */
MEM_NOACCESS(&id, sizeof id); MEM_NOACCESS(&id, sizeof id);
MEM_NOACCESS(&state, sizeof state); MEM_NOACCESS(&state, sizeof state);
@ -518,6 +519,7 @@ inline void trx_t::release_locks()
} }
lock.table_locks.clear(); lock.table_locks.clear();
reset_skip_lock_inheritance();
} }
/** At shutdown, frees a transaction object. */ /** At shutdown, frees a transaction object. */