mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 06:44:16 +01:00
bfffe571ac
buf_page_io_complete(): Do not test bpage for NULL, because it is declared (and always passed) as nonnull. buf_flush_batch(): Remove the constant local variable count=0. fil_ibd_load(): Use magic comment to suppress -Wimplicit-fallthrough. ut_stage_alter_t::inc(ulint): Disable references to an unused parameter. lock_queue_validate(), sync_array_find_thread(), rbt_check_ordering(): Define only in debug builds.
8580 lines
227 KiB
C++
8580 lines
227 KiB
C++
/*****************************************************************************
|
|
|
|
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
|
Copyright (c) 2014, 2017, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/**************************************************//**
|
|
@file lock/lock0lock.cc
|
|
The transaction lock system
|
|
|
|
Created 5/7/1996 Heikki Tuuri
|
|
*******************************************************/
|
|
|
|
#define LOCK_MODULE_IMPLEMENTATION
|
|
|
|
|
|
#include "ha_prototypes.h"
|
|
|
|
#include <mysql/service_thd_error_context.h>
|
|
#include <sql_class.h>
|
|
|
|
#include "lock0lock.h"
|
|
#include "lock0priv.h"
|
|
#include "dict0mem.h"
|
|
#include "usr0sess.h"
|
|
#include "trx0purge.h"
|
|
#include "trx0sys.h"
|
|
#include "srv0mon.h"
|
|
#include "ut0vec.h"
|
|
#include "btr0btr.h"
|
|
#include "dict0boot.h"
|
|
#include "ut0new.h"
|
|
#include "row0sel.h"
|
|
#include "row0mysql.h"
|
|
#include "pars0pars.h"
|
|
|
|
#include <set>
|
|
|
|
#ifdef WITH_WSREP
|
|
#include <mysql/service_wsrep.h>
|
|
#endif /* WITH_WSREP */
|
|
|
|
/** Lock scheduling algorithm */
|
|
ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
|
|
|
|
/** The value of innodb_deadlock_detect */
|
|
my_bool innobase_deadlock_detect;
|
|
|
|
/** Total number of cached record locks */
|
|
static const ulint REC_LOCK_CACHE = 8;
|
|
|
|
/** Maximum record lock size in bytes */
|
|
static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
|
|
|
|
/** Total number of cached table locks */
|
|
static const ulint TABLE_LOCK_CACHE = 8;
|
|
|
|
/** Size in bytes, of the table lock instance */
|
|
static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
|
|
|
|
/*********************************************************************//**
|
|
Checks if a waiting record lock request still has to wait in a queue.
|
|
@return lock that is causing the wait */
|
|
static
|
|
const lock_t*
|
|
lock_rec_has_to_wait_in_queue(
|
|
/*==========================*/
|
|
const lock_t* wait_lock); /*!< in: waiting record lock */
|
|
|
|
/*************************************************************//**
|
|
Grants a lock to a waiting lock request and releases the waiting transaction.
|
|
The caller must hold lock_sys->mutex. */
|
|
static
|
|
void
|
|
lock_grant(
|
|
/*=======*/
|
|
lock_t* lock, /*!< in/out: waiting lock request */
|
|
bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */
|
|
|
|
extern "C" void thd_rpl_deadlock_check(MYSQL_THD thd, MYSQL_THD other_thd);
|
|
extern "C" int thd_need_wait_reports(const MYSQL_THD thd);
|
|
extern "C" int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
|
|
|
|
extern "C" int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
|
|
|
|
/** Print info of a table lock.
|
|
@param[in,out] file output stream
|
|
@param[in] lock table lock */
|
|
static
|
|
void
|
|
lock_table_print(FILE* file, const lock_t* lock);
|
|
|
|
/** Print info of a record lock.
|
|
@param[in,out] file output stream
|
|
@param[in] lock record lock */
|
|
static
|
|
void
|
|
lock_rec_print(FILE* file, const lock_t* lock);
|
|
|
|
/** Deadlock checker. */
|
|
class DeadlockChecker {
|
|
public:
|
|
/** Checks if a joining lock request results in a deadlock. If
|
|
a deadlock is found this function will resolve the deadlock
|
|
by choosing a victim transaction and rolling it back. It
|
|
will attempt to resolve all deadlocks. The returned transaction
|
|
id will be the joining transaction id or 0 if some other
|
|
transaction was chosen as a victim and rolled back or no
|
|
deadlock found.
|
|
|
|
@param lock lock the transaction is requesting
|
|
@param trx transaction requesting the lock
|
|
|
|
@return id of transaction chosen as victim or 0 */
|
|
static const trx_t* check_and_resolve(
|
|
const lock_t* lock,
|
|
trx_t* trx);
|
|
|
|
private:
|
|
/** Do a shallow copy. Default destructor OK.
|
|
@param trx the start transaction (start node)
|
|
@param wait_lock lock that a transaction wants
|
|
@param mark_start visited node counter */
|
|
DeadlockChecker(
|
|
const trx_t* trx,
|
|
const lock_t* wait_lock,
|
|
ib_uint64_t mark_start,
|
|
bool report_waiters)
|
|
:
|
|
m_cost(),
|
|
m_start(trx),
|
|
m_too_deep(),
|
|
m_wait_lock(wait_lock),
|
|
m_mark_start(mark_start),
|
|
m_n_elems(),
|
|
m_report_waiters(report_waiters)
|
|
{
|
|
}
|
|
|
|
/** Check if the search is too deep. */
|
|
bool is_too_deep() const
|
|
{
|
|
return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
|
|
|| m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
|
|
}
|
|
|
|
/** Save current state.
|
|
@param lock lock to push on the stack.
|
|
@param heap_no the heap number to push on the stack.
|
|
@return false if stack is full. */
|
|
bool push(const lock_t* lock, ulint heap_no)
|
|
{
|
|
ut_ad((lock_get_type_low(lock) & LOCK_REC)
|
|
|| (lock_get_type_low(lock) & LOCK_TABLE));
|
|
|
|
ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
|
|
== (heap_no == ULINT_UNDEFINED));
|
|
|
|
/* Ensure that the stack is bounded. */
|
|
if (m_n_elems >= UT_ARR_SIZE(s_states)) {
|
|
return(false);
|
|
}
|
|
|
|
state_t& state = s_states[m_n_elems++];
|
|
|
|
state.m_lock = lock;
|
|
state.m_wait_lock = m_wait_lock;
|
|
state.m_heap_no =heap_no;
|
|
|
|
return(true);
|
|
}
|
|
|
|
/** Restore state.
|
|
@param[out] lock current lock
|
|
@param[out] heap_no current heap_no */
|
|
void pop(const lock_t*& lock, ulint& heap_no)
|
|
{
|
|
ut_a(m_n_elems > 0);
|
|
|
|
const state_t& state = s_states[--m_n_elems];
|
|
|
|
lock = state.m_lock;
|
|
heap_no = state.m_heap_no;
|
|
m_wait_lock = state.m_wait_lock;
|
|
}
|
|
|
|
/** Check whether the node has been visited.
|
|
@param lock lock to check
|
|
@return true if the node has been visited */
|
|
bool is_visited(const lock_t* lock) const
|
|
{
|
|
return(lock->trx->lock.deadlock_mark > m_mark_start);
|
|
}
|
|
|
|
/** Get the next lock in the queue that is owned by a transaction
|
|
whose sub-tree has not already been searched.
|
|
Note: "next" here means PREV for table locks.
|
|
@param lock Lock in queue
|
|
@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
|
|
@return next lock or NULL if at end of queue */
|
|
const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
|
|
|
|
/** Get the first lock to search. The search starts from the current
|
|
wait_lock. What we are really interested in is an edge from the
|
|
current wait_lock's owning transaction to another transaction that has
|
|
a lock ahead in the queue. We skip locks where the owning transaction's
|
|
sub-tree has already been searched.
|
|
|
|
Note: The record locks are traversed from the oldest lock to the
|
|
latest. For table locks we go from latest to oldest.
|
|
|
|
For record locks, we first position the iterator on first lock on
|
|
the page and then reposition on the actual heap_no. This is required
|
|
due to the way the record lock has is implemented.
|
|
|
|
@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
|
|
|
|
@return first lock or NULL */
|
|
const lock_t* get_first_lock(ulint* heap_no) const;
|
|
|
|
/** Notify that a deadlock has been detected and print the conflicting
|
|
transaction info.
|
|
@param lock lock causing deadlock */
|
|
void notify(const lock_t* lock) const;
|
|
|
|
/** Select the victim transaction that should be rolledback.
|
|
@return victim transaction */
|
|
const trx_t* select_victim() const;
|
|
|
|
/** Rollback transaction selected as the victim. */
|
|
void trx_rollback();
|
|
|
|
/** Looks iteratively for a deadlock. Note: the joining transaction
|
|
may have been granted its lock by the deadlock checks.
|
|
|
|
@return 0 if no deadlock else the victim transaction.*/
|
|
const trx_t* search();
|
|
|
|
/** Print transaction data to the deadlock file and possibly to stderr.
|
|
@param trx transaction
|
|
@param max_query_len max query length to print */
|
|
static void print(const trx_t* trx, ulint max_query_len);
|
|
|
|
/** rewind(3) the file used for storing the latest detected deadlock
|
|
and print a heading message to stderr if printing of all deadlocks to
|
|
stderr is enabled. */
|
|
static void start_print();
|
|
|
|
/** Print lock data to the deadlock file and possibly to stderr.
|
|
@param lock record or table type lock */
|
|
static void print(const lock_t* lock);
|
|
|
|
/** Print a message to the deadlock file and possibly to stderr.
|
|
@param msg message to print */
|
|
static void print(const char* msg);
|
|
|
|
/** Print info about transaction that was rolled back.
|
|
@param trx transaction rolled back
|
|
@param lock lock trx wants */
|
|
static void rollback_print(const trx_t* trx, const lock_t* lock);
|
|
|
|
private:
|
|
/** DFS state information, used during deadlock checking. */
|
|
struct state_t {
|
|
const lock_t* m_lock; /*!< Current lock */
|
|
const lock_t* m_wait_lock; /*!< Waiting for lock */
|
|
ulint m_heap_no; /*!< heap number if rec lock */
|
|
};
|
|
|
|
/** Used in deadlock tracking. Protected by lock_sys->mutex. */
|
|
static ib_uint64_t s_lock_mark_counter;
|
|
|
|
/** Calculation steps thus far. It is the count of the nodes visited. */
|
|
ulint m_cost;
|
|
|
|
/** Joining transaction that is requesting a lock in an
|
|
incompatible mode */
|
|
const trx_t* m_start;
|
|
|
|
/** TRUE if search was too deep and was aborted */
|
|
bool m_too_deep;
|
|
|
|
/** Lock that trx wants */
|
|
const lock_t* m_wait_lock;
|
|
|
|
/** Value of lock_mark_count at the start of the deadlock check. */
|
|
ib_uint64_t m_mark_start;
|
|
|
|
/** Number of states pushed onto the stack */
|
|
size_t m_n_elems;
|
|
|
|
/** This is to avoid malloc/free calls. */
|
|
static state_t s_states[MAX_STACK_SIZE];
|
|
|
|
/** Set if thd_rpl_deadlock_check() should be called for waits. */
|
|
bool m_report_waiters;
|
|
};
|
|
|
|
/** Counter to mark visited nodes during deadlock search. */
|
|
ib_uint64_t DeadlockChecker::s_lock_mark_counter = 0;
|
|
|
|
/** The stack used for deadlock searches. */
|
|
DeadlockChecker::state_t DeadlockChecker::s_states[MAX_STACK_SIZE];
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*********************************************************************//**
|
|
Validates the lock system.
|
|
@return TRUE if ok */
|
|
static
|
|
bool
|
|
lock_validate();
|
|
/*============*/
|
|
|
|
/*********************************************************************//**
|
|
Validates the record lock queues on a page.
|
|
@return TRUE if ok */
|
|
static
|
|
ibool
|
|
lock_rec_validate_page(
|
|
/*===================*/
|
|
const buf_block_t* block) /*!< in: buffer block */
|
|
MY_ATTRIBUTE((warn_unused_result));
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/* The lock system */
|
|
lock_sys_t* lock_sys = NULL;
|
|
|
|
/** We store info on the latest deadlock error to this buffer. InnoDB
|
|
Monitor will then fetch it and print */
|
|
static bool lock_deadlock_found = false;
|
|
|
|
/** Only created if !srv_read_only_mode */
|
|
static FILE* lock_latest_err_file;
|
|
|
|
/*********************************************************************//**
|
|
Reports that a transaction id is insensible, i.e., in the future. */
|
|
void
|
|
lock_report_trx_id_insanity(
|
|
/*========================*/
|
|
trx_id_t trx_id, /*!< in: trx id */
|
|
const rec_t* rec, /*!< in: user record */
|
|
dict_index_t* index, /*!< in: index */
|
|
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
|
|
trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
|
|
{
|
|
ib::error()
|
|
<< "Transaction id " << trx_id
|
|
<< " associated with record" << rec_offsets_print(rec, offsets)
|
|
<< " in index " << index->name
|
|
<< " of table " << index->table->name
|
|
<< " is greater than the global counter " << max_trx_id
|
|
<< "! The table is corrupted.";
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks that a transaction id is sensible, i.e., not in the future.
|
|
@return true if ok */
|
|
#ifdef UNIV_DEBUG
|
|
|
|
#else
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
|
#endif
|
|
bool
|
|
lock_check_trx_id_sanity(
|
|
/*=====================*/
|
|
trx_id_t trx_id, /*!< in: trx id */
|
|
const rec_t* rec, /*!< in: user record */
|
|
dict_index_t* index, /*!< in: index */
|
|
const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
|
|
{
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
trx_id_t max_trx_id = trx_sys_get_max_trx_id();
|
|
bool is_ok = trx_id < max_trx_id;
|
|
|
|
if (!is_ok) {
|
|
lock_report_trx_id_insanity(
|
|
trx_id, rec, index, offsets, max_trx_id);
|
|
}
|
|
|
|
return(is_ok);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks that a record is seen in a consistent read.
|
|
@return true if sees, or false if an earlier version of the record
|
|
should be retrieved */
|
|
bool
|
|
lock_clust_rec_cons_read_sees(
|
|
/*==========================*/
|
|
const rec_t* rec, /*!< in: user record which should be read or
|
|
passed over by a read cursor */
|
|
dict_index_t* index, /*!< in: clustered index */
|
|
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
|
|
ReadView* view) /*!< in: consistent read view */
|
|
{
|
|
ut_ad(dict_index_is_clust(index));
|
|
ut_ad(page_rec_is_user_rec(rec));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
/* Temp-tables are not shared across connections and multiple
|
|
transactions from different connections cannot simultaneously
|
|
operate on same temp-table and so read of temp-table is
|
|
always consistent read. */
|
|
if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
|
|
ut_ad(view == 0 || dict_table_is_temporary(index->table));
|
|
return(true);
|
|
}
|
|
|
|
/* NOTE that we call this function while holding the search
|
|
system latch. */
|
|
|
|
trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
|
|
|
|
return(view->changes_visible(trx_id, index->table->name));
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks that a non-clustered index record is seen in a consistent read.
|
|
|
|
NOTE that a non-clustered index page contains so little information on
|
|
its modifications that also in the case false, the present version of
|
|
rec may be the right, but we must check this from the clustered index
|
|
record.
|
|
|
|
@return true if certainly sees, or false if an earlier version of the
|
|
clustered index record might be needed */
|
|
bool
|
|
lock_sec_rec_cons_read_sees(
|
|
/*========================*/
|
|
const rec_t* rec, /*!< in: user record which
|
|
should be read or passed over
|
|
by a read cursor */
|
|
const dict_index_t* index, /*!< in: index */
|
|
const ReadView* view) /*!< in: consistent read view */
|
|
{
|
|
ut_ad(page_rec_is_user_rec(rec));
|
|
|
|
/* NOTE that we might call this function while holding the search
|
|
system latch. */
|
|
|
|
if (recv_recovery_is_on()) {
|
|
|
|
return(false);
|
|
|
|
} else if (dict_table_is_temporary(index->table)) {
|
|
|
|
/* Temp-tables are not shared across connections and multiple
|
|
transactions from different connections cannot simultaneously
|
|
operate on same temp-table and so read of temp-table is
|
|
always consistent read. */
|
|
|
|
return(true);
|
|
}
|
|
|
|
trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
|
|
|
|
ut_ad(max_trx_id > 0);
|
|
|
|
return(view->sees(max_trx_id));
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Creates the lock system at database start. */
|
|
void
|
|
lock_sys_create(
|
|
/*============*/
|
|
ulint n_cells) /*!< in: number of slots in lock hash table */
|
|
{
|
|
ulint lock_sys_sz;
|
|
|
|
lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
|
|
|
|
lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
|
|
|
|
void* ptr = &lock_sys[1];
|
|
|
|
lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
|
|
|
|
lock_sys->last_slot = lock_sys->waiting_threads;
|
|
|
|
mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
|
|
|
|
mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
|
|
|
|
lock_sys->timeout_event = os_event_create(0);
|
|
|
|
lock_sys->rec_hash = hash_create(n_cells);
|
|
lock_sys->prdt_hash = hash_create(n_cells);
|
|
lock_sys->prdt_page_hash = hash_create(n_cells);
|
|
|
|
if (!srv_read_only_mode) {
|
|
lock_latest_err_file = os_file_create_tmpfile(NULL);
|
|
ut_a(lock_latest_err_file);
|
|
}
|
|
}
|
|
|
|
/** Calculates the fold value of a lock: used in migrating the hash table.
|
|
@param[in] lock record lock object
|
|
@return folded value */
|
|
static
|
|
ulint
|
|
lock_rec_lock_fold(
|
|
const lock_t* lock)
|
|
{
|
|
return(lock_rec_fold(lock->un_member.rec_lock.space,
|
|
lock->un_member.rec_lock.page_no));
|
|
}
|
|
|
|
/** Resize the lock hash tables.
|
|
@param[in] n_cells number of slots in lock hash table */
|
|
void
|
|
lock_sys_resize(
|
|
ulint n_cells)
|
|
{
|
|
hash_table_t* old_hash;
|
|
|
|
lock_mutex_enter();
|
|
|
|
old_hash = lock_sys->rec_hash;
|
|
lock_sys->rec_hash = hash_create(n_cells);
|
|
HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
|
|
lock_rec_lock_fold);
|
|
hash_table_free(old_hash);
|
|
|
|
old_hash = lock_sys->prdt_hash;
|
|
lock_sys->prdt_hash = hash_create(n_cells);
|
|
HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
|
|
lock_rec_lock_fold);
|
|
hash_table_free(old_hash);
|
|
|
|
old_hash = lock_sys->prdt_page_hash;
|
|
lock_sys->prdt_page_hash = hash_create(n_cells);
|
|
HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
|
|
lock_rec_lock_fold);
|
|
hash_table_free(old_hash);
|
|
|
|
/* need to update block->lock_hash_val */
|
|
for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
|
|
buf_pool_t* buf_pool = buf_pool_from_array(i);
|
|
|
|
buf_pool_mutex_enter(buf_pool);
|
|
buf_page_t* bpage;
|
|
bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
|
|
|
|
while (bpage != NULL) {
|
|
if (buf_page_get_state(bpage)
|
|
== BUF_BLOCK_FILE_PAGE) {
|
|
buf_block_t* block;
|
|
block = reinterpret_cast<buf_block_t*>(
|
|
bpage);
|
|
|
|
block->lock_hash_val
|
|
= lock_rec_hash(
|
|
bpage->id.space(),
|
|
bpage->id.page_no());
|
|
}
|
|
bpage = UT_LIST_GET_NEXT(LRU, bpage);
|
|
}
|
|
buf_pool_mutex_exit(buf_pool);
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Closes the lock system at database shutdown. */
|
|
void
|
|
lock_sys_close(void)
|
|
/*================*/
|
|
{
|
|
if (lock_latest_err_file != NULL) {
|
|
fclose(lock_latest_err_file);
|
|
lock_latest_err_file = NULL;
|
|
}
|
|
|
|
hash_table_free(lock_sys->rec_hash);
|
|
hash_table_free(lock_sys->prdt_hash);
|
|
hash_table_free(lock_sys->prdt_page_hash);
|
|
|
|
os_event_destroy(lock_sys->timeout_event);
|
|
|
|
mutex_destroy(&lock_sys->mutex);
|
|
mutex_destroy(&lock_sys->wait_mutex);
|
|
|
|
srv_slot_t* slot = lock_sys->waiting_threads;
|
|
|
|
for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
|
|
if (slot->event != NULL) {
|
|
os_event_destroy(slot->event);
|
|
}
|
|
}
|
|
|
|
ut_free(lock_sys);
|
|
|
|
lock_sys = NULL;
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the size of a lock struct.
|
|
@return size in bytes */
|
|
ulint
|
|
lock_get_size(void)
|
|
/*===============*/
|
|
{
|
|
return((ulint) sizeof(lock_t));
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the source table of an ALTER TABLE transaction. The table must be
|
|
covered by an IX or IS table lock.
|
|
@return the source table of transaction, if it is covered by an IX or
|
|
IS table lock; dest if there is no source table, and NULL if the
|
|
transaction is locking more than two tables or an inconsistency is
|
|
found */
|
|
dict_table_t*
|
|
lock_get_src_table(
|
|
/*===============*/
|
|
trx_t* trx, /*!< in: transaction */
|
|
dict_table_t* dest, /*!< in: destination of ALTER TABLE */
|
|
lock_mode* mode) /*!< out: lock mode of the source table */
|
|
{
|
|
dict_table_t* src;
|
|
lock_t* lock;
|
|
|
|
ut_ad(!lock_mutex_own());
|
|
|
|
src = NULL;
|
|
*mode = LOCK_NONE;
|
|
|
|
/* The trx mutex protects the trx_locks for our purposes.
|
|
Other transactions could want to convert one of our implicit
|
|
record locks to an explicit one. For that, they would need our
|
|
trx mutex. Waiting locks can be removed while only holding
|
|
lock_sys->mutex, but this is a running transaction and cannot
|
|
thus be holding any waiting locks. */
|
|
trx_mutex_enter(trx);
|
|
|
|
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
lock_table_t* tab_lock;
|
|
lock_mode lock_mode;
|
|
if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
|
|
/* We are only interested in table locks. */
|
|
continue;
|
|
}
|
|
tab_lock = &lock->un_member.tab_lock;
|
|
if (dest == tab_lock->table) {
|
|
/* We are not interested in the destination table. */
|
|
continue;
|
|
} else if (!src) {
|
|
/* This presumably is the source table. */
|
|
src = tab_lock->table;
|
|
if (UT_LIST_GET_LEN(src->locks) != 1
|
|
|| UT_LIST_GET_FIRST(src->locks) != lock) {
|
|
/* We only support the case when
|
|
there is only one lock on this table. */
|
|
src = NULL;
|
|
goto func_exit;
|
|
}
|
|
} else if (src != tab_lock->table) {
|
|
/* The transaction is locking more than
|
|
two tables (src and dest): abort */
|
|
src = NULL;
|
|
goto func_exit;
|
|
}
|
|
|
|
/* Check that the source table is locked by
|
|
LOCK_IX or LOCK_IS. */
|
|
lock_mode = lock_get_mode(lock);
|
|
if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
|
|
if (*mode != LOCK_NONE && *mode != lock_mode) {
|
|
/* There are multiple locks on src. */
|
|
src = NULL;
|
|
goto func_exit;
|
|
}
|
|
*mode = lock_mode;
|
|
}
|
|
}
|
|
|
|
if (!src) {
|
|
/* No source table lock found: flag the situation to caller */
|
|
src = dest;
|
|
}
|
|
|
|
func_exit:
|
|
trx_mutex_exit(trx);
|
|
return(src);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Determine if the given table is exclusively "owned" by the given
|
|
transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
|
|
on the table.
|
|
@return TRUE if table is only locked by trx, with LOCK_IX, and
|
|
possibly LOCK_AUTO_INC */
|
|
ibool
|
|
lock_is_table_exclusive(
|
|
/*====================*/
|
|
const dict_table_t* table, /*!< in: table */
|
|
const trx_t* trx) /*!< in: transaction */
|
|
{
|
|
const lock_t* lock;
|
|
ibool ok = FALSE;
|
|
|
|
ut_ad(table);
|
|
ut_ad(trx);
|
|
|
|
lock_mutex_enter();
|
|
|
|
for (lock = UT_LIST_GET_FIRST(table->locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
|
|
if (lock->trx != trx) {
|
|
/* A lock on the table is held
|
|
by some other transaction. */
|
|
goto not_ok;
|
|
}
|
|
|
|
if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
|
|
/* We are interested in table locks only. */
|
|
continue;
|
|
}
|
|
|
|
switch (lock_get_mode(lock)) {
|
|
case LOCK_IX:
|
|
ok = TRUE;
|
|
break;
|
|
case LOCK_AUTO_INC:
|
|
/* It is allowed for trx to hold an
|
|
auto_increment lock. */
|
|
break;
|
|
default:
|
|
not_ok:
|
|
/* Other table locks than LOCK_IX are not allowed. */
|
|
ok = FALSE;
|
|
goto func_exit;
|
|
}
|
|
}
|
|
|
|
func_exit:
|
|
lock_mutex_exit();
|
|
|
|
return(ok);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Sets the wait flag of a lock and the back pointer in trx to lock. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_set_lock_and_trx_wait(
|
|
/*=======================*/
|
|
lock_t* lock, /*!< in: lock */
|
|
trx_t* trx) /*!< in/out: trx */
|
|
{
|
|
ut_ad(lock);
|
|
ut_ad(lock->trx == trx);
|
|
ut_ad(trx->lock.wait_lock == NULL);
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(trx));
|
|
|
|
trx->lock.wait_lock = lock;
|
|
lock->type_mode |= LOCK_WAIT;
|
|
}
|
|
|
|
/**********************************************************************//**
|
|
The back pointer to a waiting lock request in the transaction is set to NULL
|
|
and the wait bit in lock type_mode is reset. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_reset_lock_and_trx_wait(
|
|
/*=========================*/
|
|
lock_t* lock) /*!< in/out: record lock */
|
|
{
|
|
ut_ad(lock_get_wait(lock));
|
|
ut_ad(lock_mutex_own());
|
|
|
|
if (lock->trx->lock.wait_lock &&
|
|
lock->trx->lock.wait_lock != lock) {
|
|
const char* stmt=NULL;
|
|
const char* stmt2=NULL;
|
|
size_t stmt_len;
|
|
trx_id_t trx_id = 0;
|
|
stmt = lock->trx->mysql_thd
|
|
? innobase_get_stmt_unsafe(
|
|
lock->trx->mysql_thd, &stmt_len)
|
|
: NULL;
|
|
|
|
if (lock->trx->lock.wait_lock &&
|
|
lock->trx->lock.wait_lock->trx) {
|
|
trx_id = lock->trx->lock.wait_lock->trx->id;
|
|
stmt2 = lock->trx->lock.wait_lock->trx->mysql_thd
|
|
? innobase_get_stmt_unsafe(
|
|
lock->trx->lock.wait_lock
|
|
->trx->mysql_thd, &stmt_len)
|
|
: NULL;
|
|
}
|
|
|
|
ib::error() <<
|
|
"Trx id " << lock->trx->id
|
|
<< " is waiting a lock in statement "
|
|
<< (stmt ? stmt : "NULL")
|
|
<< " for this trx id " << trx_id
|
|
<< " and statement "
|
|
<< (stmt2 ? stmt2 : "NULL")
|
|
<< "wait_lock " << lock->trx->lock.wait_lock;
|
|
ut_ad(0);
|
|
}
|
|
|
|
lock->trx->lock.wait_lock = NULL;
|
|
lock->type_mode &= ~LOCK_WAIT;
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the gap flag of a record lock.
|
|
@return LOCK_GAP or 0 */
|
|
UNIV_INLINE
|
|
ulint
|
|
lock_rec_get_gap(
|
|
/*=============*/
|
|
const lock_t* lock) /*!< in: record lock */
|
|
{
|
|
ut_ad(lock);
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
return(lock->type_mode & LOCK_GAP);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the LOCK_REC_NOT_GAP flag of a record lock.
|
|
@return LOCK_REC_NOT_GAP or 0 */
|
|
UNIV_INLINE
|
|
ulint
|
|
lock_rec_get_rec_not_gap(
|
|
/*=====================*/
|
|
const lock_t* lock) /*!< in: record lock */
|
|
{
|
|
ut_ad(lock);
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
return(lock->type_mode & LOCK_REC_NOT_GAP);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the waiting insert flag of a record lock.
|
|
@return LOCK_INSERT_INTENTION or 0 */
|
|
UNIV_INLINE
|
|
ulint
|
|
lock_rec_get_insert_intention(
|
|
/*==========================*/
|
|
const lock_t* lock) /*!< in: record lock */
|
|
{
|
|
ut_ad(lock);
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
return(lock->type_mode & LOCK_INSERT_INTENTION);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if a lock request for a new lock has to wait for request lock2.
|
|
@return TRUE if new lock has to wait for lock2 to be removed */
|
|
UNIV_INLINE
|
|
ibool
|
|
lock_rec_has_to_wait(
|
|
/*=================*/
|
|
bool for_locking,
|
|
/*!< in is called locking or releasing */
|
|
const trx_t* trx, /*!< in: trx of new lock */
|
|
ulint type_mode,/*!< in: precise mode of the new lock
|
|
to set: LOCK_S or LOCK_X, possibly
|
|
ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
|
|
LOCK_INSERT_INTENTION */
|
|
const lock_t* lock2, /*!< in: another record lock; NOTE that
|
|
it is assumed that this has a lock bit
|
|
set on the same record as in the new
|
|
lock we are setting */
|
|
bool lock_is_on_supremum)
|
|
/*!< in: TRUE if we are setting the
|
|
lock on the 'supremum' record of an
|
|
index page: we know then that the lock
|
|
request is really for a 'gap' type lock */
|
|
{
|
|
ut_ad(trx && lock2);
|
|
ut_ad(lock_get_type_low(lock2) == LOCK_REC);
|
|
|
|
if (trx != lock2->trx
|
|
&& !lock_mode_compatible(static_cast<lock_mode>(
|
|
LOCK_MODE_MASK & type_mode),
|
|
lock_get_mode(lock2))) {
|
|
|
|
/* We have somewhat complex rules when gap type record locks
|
|
cause waits */
|
|
|
|
if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
|
|
&& !(type_mode & LOCK_INSERT_INTENTION)) {
|
|
|
|
/* Gap type locks without LOCK_INSERT_INTENTION flag
|
|
do not need to wait for anything. This is because
|
|
different users can have conflicting lock types
|
|
on gaps. */
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
if (!(type_mode & LOCK_INSERT_INTENTION)
|
|
&& lock_rec_get_gap(lock2)) {
|
|
|
|
/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
|
|
does not need to wait for a gap type lock */
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
if ((type_mode & LOCK_GAP)
|
|
&& lock_rec_get_rec_not_gap(lock2)) {
|
|
|
|
/* Lock on gap does not need to wait for
|
|
a LOCK_REC_NOT_GAP type lock */
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
if (lock_rec_get_insert_intention(lock2)) {
|
|
|
|
/* No lock request needs to wait for an insert
|
|
intention lock to be removed. This is ok since our
|
|
rules allow conflicting locks on gaps. This eliminates
|
|
a spurious deadlock caused by a next-key lock waiting
|
|
for an insert intention lock; when the insert
|
|
intention lock was granted, the insert deadlocked on
|
|
the waiting next-key lock.
|
|
|
|
Also, insert intention locks do not disturb each
|
|
other. */
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2)) &&
|
|
!thd_need_ordering_with(trx->mysql_thd,
|
|
lock2->trx->mysql_thd)) {
|
|
/* If the upper server layer has already decided on the
|
|
commit order between the transaction requesting the
|
|
lock and the transaction owning the lock, we do not
|
|
need to wait for gap locks. Such ordeering by the upper
|
|
server layer happens in parallel replication, where the
|
|
commit order is fixed to match the original order on the
|
|
master.
|
|
|
|
Such gap locks are mainly needed to get serialisability
|
|
between transactions so that they will be binlogged in
|
|
the correct order so that statement-based replication
|
|
will give the correct results. Since the right order
|
|
was already determined on the master, we do not need
|
|
to enforce it again here.
|
|
|
|
Skipping the locks is not essential for correctness,
|
|
since in case of deadlock we will just kill the later
|
|
transaction and retry it. But it can save some
|
|
unnecessary rollbacks and retries. */
|
|
|
|
return (FALSE);
|
|
}
|
|
|
|
#ifdef WITH_WSREP
|
|
/* if BF thread is locking and has conflict with another BF
|
|
thread, we need to look at trx ordering and lock types */
|
|
if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
|
|
wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
|
|
|
|
if (wsrep_debug) {
|
|
ib::info() <<
|
|
"BF-BF lock conflict, locking: " << for_locking;
|
|
lock_rec_print(stderr, lock2);
|
|
ib::info() << " SQL1: "
|
|
<< wsrep_thd_query(trx->mysql_thd);
|
|
ib::info() << " SQL2: "
|
|
<< wsrep_thd_query(lock2->trx->mysql_thd);
|
|
}
|
|
|
|
if (wsrep_trx_order_before(trx->mysql_thd,
|
|
lock2->trx->mysql_thd) &&
|
|
(type_mode & LOCK_MODE_MASK) == LOCK_X &&
|
|
(lock2->type_mode & LOCK_MODE_MASK) == LOCK_X) {
|
|
if (for_locking || wsrep_debug) {
|
|
/* exclusive lock conflicts are not
|
|
accepted */
|
|
ib::info() <<
|
|
"BF-BF X lock conflict,"
|
|
"mode: " << type_mode <<
|
|
" supremum: " << lock_is_on_supremum;
|
|
ib::info() <<
|
|
"conflicts states: my "
|
|
<< wsrep_thd_conflict_state(trx->mysql_thd, FALSE)
|
|
<< " locked "
|
|
<< wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE);
|
|
lock_rec_print(stderr, lock2);
|
|
ib::info() << " SQL1: "
|
|
<< wsrep_thd_query(trx->mysql_thd);
|
|
ib::info() << " SQL2: "
|
|
<< wsrep_thd_query(lock2->trx->mysql_thd);
|
|
|
|
if (for_locking) {
|
|
return FALSE;
|
|
}
|
|
}
|
|
} else {
|
|
/* if lock2->index->n_uniq <=
|
|
lock2->index->n_user_defined_cols
|
|
operation is on uniq index
|
|
*/
|
|
if (wsrep_debug) {
|
|
ib::info() <<
|
|
"BF conflict, modes: "
|
|
<< type_mode << ":" << lock2->type_mode
|
|
<< " idx: " << lock2->index->name()
|
|
<< " table: " << lock2->index->table->name.m_name
|
|
<< " n_uniq: " << lock2->index->n_uniq
|
|
<< " n_user: " << lock2->index->n_user_defined_cols;
|
|
ib::info() << " SQL1: "
|
|
<< wsrep_thd_query(trx->mysql_thd);
|
|
ib::info() << " SQL2: "
|
|
<< wsrep_thd_query(lock2->trx->mysql_thd);
|
|
}
|
|
return FALSE;
|
|
}
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if a lock request lock1 has to wait for request lock2.
|
|
@return TRUE if lock1 has to wait for lock2 to be removed */
|
|
ibool
|
|
lock_has_to_wait(
|
|
/*=============*/
|
|
const lock_t* lock1, /*!< in: waiting lock */
|
|
const lock_t* lock2) /*!< in: another lock; NOTE that it is
|
|
assumed that this has a lock bit set
|
|
on the same record as in lock1 if the
|
|
locks are record locks */
|
|
{
|
|
ut_ad(lock1 && lock2);
|
|
|
|
if (lock1->trx != lock2->trx
|
|
&& !lock_mode_compatible(lock_get_mode(lock1),
|
|
lock_get_mode(lock2))) {
|
|
if (lock_get_type_low(lock1) == LOCK_REC) {
|
|
ut_ad(lock_get_type_low(lock2) == LOCK_REC);
|
|
|
|
/* If this lock request is for a supremum record
|
|
then the second bit on the lock bitmap is set */
|
|
|
|
if (lock1->type_mode
|
|
& (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
|
|
return(lock_prdt_has_to_wait(
|
|
lock1->trx, lock1->type_mode,
|
|
lock_get_prdt_from_lock(lock1),
|
|
lock2));
|
|
} else {
|
|
return(lock_rec_has_to_wait(false,
|
|
lock1->trx, lock1->type_mode, lock2,
|
|
lock_rec_get_nth_bit(lock1, true)));
|
|
}
|
|
}
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
return(FALSE);
|
|
}
|
|
|
|
/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
|
|
|
|
/**********************************************************************//**
|
|
Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
|
|
if none found.
|
|
@return bit index == heap number of the record, or ULINT_UNDEFINED if
|
|
none found */
|
|
ulint
|
|
lock_rec_find_set_bit(
|
|
/*==================*/
|
|
const lock_t* lock) /*!< in: record lock with at least one bit set */
|
|
{
|
|
for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
|
|
|
|
if (lock_rec_get_nth_bit(lock, i)) {
|
|
|
|
return(i);
|
|
}
|
|
}
|
|
|
|
return(ULINT_UNDEFINED);
|
|
}
|
|
|
|
/** Reset the nth bit of a record lock.
|
|
@param[in,out] lock record lock
|
|
@param[in] i index of the bit that will be reset
|
|
@return previous value of the bit */
|
|
UNIV_INLINE
|
|
byte
|
|
lock_rec_reset_nth_bit(
|
|
lock_t* lock,
|
|
ulint i)
|
|
{
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
ut_ad(i < lock->un_member.rec_lock.n_bits);
|
|
|
|
byte* b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
|
|
byte mask = static_cast<byte>(1U << (i & 7));
|
|
byte bit = *b & mask;
|
|
*b &= ~mask;
|
|
|
|
if (bit != 0) {
|
|
ut_ad(lock->trx->lock.n_rec_locks > 0);
|
|
--lock->trx->lock.n_rec_locks;
|
|
}
|
|
|
|
return(bit);
|
|
}
|
|
|
|
/** Reset the nth bit of a record lock.
|
|
@param[in,out] lock record lock
|
|
@param[in] i index of the bit that will be reset
|
|
@param[in] type whether the lock is in wait mode */
|
|
void
|
|
lock_rec_trx_wait(
|
|
lock_t* lock,
|
|
ulint i,
|
|
ulint type)
|
|
{
|
|
lock_rec_reset_nth_bit(lock, i);
|
|
|
|
if (type & LOCK_WAIT) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Determines if there are explicit record locks on a page.
|
|
@return an explicit record lock on the page, or NULL if there are none */
|
|
lock_t*
|
|
lock_rec_expl_exist_on_page(
|
|
/*========================*/
|
|
ulint space, /*!< in: space id */
|
|
ulint page_no)/*!< in: page number */
|
|
{
|
|
lock_t* lock;
|
|
|
|
lock_mutex_enter();
|
|
/* Only used in ibuf pages, so rec_hash is good enough */
|
|
lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
|
|
space, page_no);
|
|
lock_mutex_exit();
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
|
|
pointer in the transaction! This function is used in lock object creation
|
|
and resetting. */
|
|
static
|
|
void
|
|
lock_rec_bitmap_reset(
|
|
/*==================*/
|
|
lock_t* lock) /*!< in: record lock */
|
|
{
|
|
ulint n_bytes;
|
|
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
/* Reset to zero the bitmap which resides immediately after the lock
|
|
struct */
|
|
|
|
n_bytes = lock_rec_get_n_bits(lock) / 8;
|
|
|
|
ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
|
|
|
|
memset(&lock[1], 0, n_bytes);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Copies a record lock to heap.
|
|
@return copy of lock */
|
|
static
|
|
lock_t*
|
|
lock_rec_copy(
|
|
/*==========*/
|
|
const lock_t* lock, /*!< in: record lock */
|
|
mem_heap_t* heap) /*!< in: memory heap */
|
|
{
|
|
ulint size;
|
|
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
|
|
|
|
return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Gets the previous record lock set on a record.
|
|
@return previous lock on the same record, NULL if none exists */
|
|
const lock_t*
|
|
lock_rec_get_prev(
|
|
/*==============*/
|
|
const lock_t* in_lock,/*!< in: record lock */
|
|
ulint heap_no)/*!< in: heap number of the record */
|
|
{
|
|
lock_t* lock;
|
|
ulint space;
|
|
ulint page_no;
|
|
lock_t* found_lock = NULL;
|
|
hash_table_t* hash;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
|
|
|
|
space = in_lock->un_member.rec_lock.space;
|
|
page_no = in_lock->un_member.rec_lock.page_no;
|
|
|
|
hash = lock_hash_get(in_lock->type_mode);
|
|
|
|
for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
|
|
/* No op */;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
|
|
ut_ad(lock);
|
|
|
|
if (lock == in_lock) {
|
|
|
|
return(found_lock);
|
|
}
|
|
|
|
if (lock_rec_get_nth_bit(lock, heap_no)) {
|
|
|
|
found_lock = lock;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
|
|
|
|
/*********************************************************************//**
|
|
Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
|
|
to precise_mode.
|
|
@return lock or NULL */
|
|
UNIV_INLINE
|
|
lock_t*
|
|
lock_rec_has_expl(
|
|
/*==============*/
|
|
ulint precise_mode,/*!< in: LOCK_S or LOCK_X
|
|
possibly ORed to LOCK_GAP or
|
|
LOCK_REC_NOT_GAP, for a
|
|
supremum record we regard this
|
|
always a gap type request */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of the record */
|
|
const trx_t* trx) /*!< in: transaction */
|
|
{
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
|
|
|| (precise_mode & LOCK_MODE_MASK) == LOCK_X);
|
|
ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
|
|
if (lock->trx == trx
|
|
&& !lock_rec_get_insert_intention(lock)
|
|
&& lock_mode_stronger_or_eq(
|
|
lock_get_mode(lock),
|
|
static_cast<lock_mode>(
|
|
precise_mode & LOCK_MODE_MASK))
|
|
&& !lock_get_wait(lock)
|
|
&& (!lock_rec_get_rec_not_gap(lock)
|
|
|| (precise_mode & LOCK_REC_NOT_GAP)
|
|
|| heap_no == PAGE_HEAP_NO_SUPREMUM)
|
|
&& (!lock_rec_get_gap(lock)
|
|
|| (precise_mode & LOCK_GAP)
|
|
|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
|
|
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*********************************************************************//**
|
|
Checks if some other transaction has a lock request in the queue.
|
|
@return lock or NULL */
|
|
static
|
|
const lock_t*
|
|
lock_rec_other_has_expl_req(
|
|
/*========================*/
|
|
lock_mode mode, /*!< in: LOCK_S or LOCK_X */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
bool wait, /*!< in: whether also waiting locks
|
|
are taken into account */
|
|
ulint heap_no,/*!< in: heap number of the record */
|
|
const trx_t* trx) /*!< in: transaction, or NULL if
|
|
requests by all transactions
|
|
are taken into account */
|
|
{
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(mode == LOCK_X || mode == LOCK_S);
|
|
|
|
/* Only GAP lock can be on SUPREMUM, and we are not looking for
|
|
GAP lock */
|
|
if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
|
|
return(NULL);
|
|
}
|
|
|
|
for (const lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
|
|
block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_const(heap_no, lock)) {
|
|
|
|
if (lock->trx != trx
|
|
&& !lock_rec_get_gap(lock)
|
|
&& (wait || !lock_get_wait(lock))
|
|
&& lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
|
|
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
#ifdef WITH_WSREP
|
|
static
|
|
void
|
|
wsrep_kill_victim(
|
|
/*==============*/
|
|
const trx_t * const trx,
|
|
const lock_t *lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(lock->trx));
|
|
|
|
/* quit for native mysql */
|
|
if (!wsrep_on(trx->mysql_thd)) {
|
|
return;
|
|
}
|
|
|
|
my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
|
|
my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
|
|
|
|
if ((bf_this && !bf_other) ||
|
|
(bf_this && bf_other && wsrep_trx_order_before(
|
|
trx->mysql_thd, lock->trx->mysql_thd))) {
|
|
|
|
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
if (wsrep_debug) {
|
|
ib::info() << "WSREP: BF victim waiting\n";
|
|
}
|
|
/* cannot release lock, until our lock
|
|
is in the queue*/
|
|
} else if (lock->trx != trx) {
|
|
if (wsrep_log_conflicts) {
|
|
if (bf_this) {
|
|
ib::info() << "*** Priority TRANSACTION:";
|
|
} else {
|
|
ib::info() << "*** Victim TRANSACTION:";
|
|
}
|
|
|
|
wsrep_trx_print_locking(stderr, trx, 3000);
|
|
|
|
if (bf_other) {
|
|
ib::info() << "*** Priority TRANSACTION:";
|
|
} else {
|
|
ib::info() << "*** Victim TRANSACTION:";
|
|
}
|
|
wsrep_trx_print_locking(stderr, lock->trx, 3000);
|
|
|
|
ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
|
|
|
|
if (lock_get_type(lock) == LOCK_REC) {
|
|
lock_rec_print(stderr, lock);
|
|
} else {
|
|
lock_table_print(stderr, lock);
|
|
}
|
|
|
|
ib::info() << " SQL1: "
|
|
<< wsrep_thd_query(trx->mysql_thd);
|
|
ib::info() << " SQL2: "
|
|
<< wsrep_thd_query(lock->trx->mysql_thd);
|
|
}
|
|
|
|
lock->trx->abort_type = TRX_WSREP_ABORT;
|
|
wsrep_innobase_kill_one_trx(trx->mysql_thd,
|
|
(const trx_t*) trx, lock->trx, TRUE);
|
|
lock->trx->abort_type = TRX_SERVER_ABORT;
|
|
}
|
|
}
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
/*********************************************************************//**
|
|
Checks if some other transaction has a conflicting explicit lock request
|
|
in the queue, so that we have to wait.
|
|
@return lock or NULL */
|
|
static
|
|
const lock_t*
|
|
lock_rec_other_has_conflicting(
|
|
/*===========================*/
|
|
ulint mode, /*!< in: LOCK_S or LOCK_X,
|
|
possibly ORed to LOCK_GAP or
|
|
LOC_REC_NOT_GAP,
|
|
LOCK_INSERT_INTENTION */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of the record */
|
|
const trx_t* trx) /*!< in: our transaction */
|
|
{
|
|
const lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
bool is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_const(heap_no, lock)) {
|
|
|
|
if (lock_rec_has_to_wait(true, trx, mode, lock, is_supremum)) {
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_on(trx->mysql_thd)) {
|
|
trx_mutex_enter(lock->trx);
|
|
wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
|
|
trx_mutex_exit(lock->trx);
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if some transaction has an implicit x-lock on a record in a secondary
|
|
index.
|
|
@return transaction id of the transaction which has the x-lock, or 0;
|
|
NOTE that this function can return false positives but never false
|
|
negatives. The caller must confirm all positive results by calling
|
|
trx_is_active(). */
|
|
static
|
|
trx_t*
|
|
lock_sec_rec_some_has_impl(
|
|
/*=======================*/
|
|
const rec_t* rec, /*!< in: user record */
|
|
dict_index_t* index, /*!< in: secondary index */
|
|
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
|
|
{
|
|
trx_t* trx;
|
|
trx_id_t max_trx_id;
|
|
const page_t* page = page_align(rec);
|
|
|
|
ut_ad(!lock_mutex_own());
|
|
ut_ad(!trx_sys_mutex_own());
|
|
ut_ad(!dict_index_is_clust(index));
|
|
ut_ad(page_rec_is_user_rec(rec));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
max_trx_id = page_get_max_trx_id(page);
|
|
|
|
/* Some transaction may have an implicit x-lock on the record only
|
|
if the max trx id for the page >= min trx id for the trx list, or
|
|
database recovery is running. We do not write the changes of a page
|
|
max trx id to the log, and therefore during recovery, this value
|
|
for a page may be incorrect. */
|
|
|
|
if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
|
|
|
|
trx = 0;
|
|
|
|
} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
|
|
|
|
/* The page is corrupt: try to avoid a crash by returning 0 */
|
|
trx = 0;
|
|
|
|
/* In this case it is possible that some transaction has an implicit
|
|
x-lock. We have to look in the clustered index. */
|
|
|
|
} else {
|
|
trx = row_vers_impl_x_locked(rec, index, offsets);
|
|
}
|
|
|
|
return(trx);
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*********************************************************************//**
|
|
Checks if some transaction, other than given trx_id, has an explicit
|
|
lock on the given rec, in the given precise_mode.
|
|
@return the transaction, whose id is not equal to trx_id, that has an
|
|
explicit lock on the given rec, in the given precise_mode or NULL.*/
|
|
static
|
|
trx_t*
|
|
lock_rec_other_trx_holds_expl(
|
|
/*==========================*/
|
|
ulint precise_mode, /*!< in: LOCK_S or LOCK_X
|
|
possibly ORed to LOCK_GAP or
|
|
LOCK_REC_NOT_GAP. */
|
|
trx_t* trx, /*!< in: trx holding implicit
|
|
lock on rec */
|
|
const rec_t* rec, /*!< in: user record */
|
|
const buf_block_t* block) /*!< in: buffer block
|
|
containing the record */
|
|
{
|
|
trx_t* holds = NULL;
|
|
|
|
lock_mutex_enter();
|
|
|
|
if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
|
|
ulint heap_no = page_rec_get_heap_no(rec);
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
|
|
t != NULL;
|
|
t = UT_LIST_GET_NEXT(trx_list, t)) {
|
|
|
|
lock_t* expl_lock = lock_rec_has_expl(
|
|
precise_mode, block, heap_no, t);
|
|
|
|
if (expl_lock && expl_lock->trx != impl_trx) {
|
|
/* An explicit lock is held by trx other than
|
|
the trx holding the implicit lock. */
|
|
holds = expl_lock->trx;
|
|
break;
|
|
}
|
|
}
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
return(holds);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/*********************************************************************//**
|
|
Return approximate number or record locks (bits set in the bitmap) for
|
|
this transaction. Since delete-marked records may be removed, the
|
|
record count will not be precise.
|
|
The caller must be holding lock_sys->mutex. */
|
|
ulint
|
|
lock_number_of_rows_locked(
|
|
/*=======================*/
|
|
const trx_lock_t* trx_lock) /*!< in: transaction locks */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
return(trx_lock->n_rec_locks);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Return the number of table locks for a transaction.
|
|
The caller must be holding lock_sys->mutex. */
|
|
ulint
|
|
lock_number_of_tables_locked(
|
|
/*=========================*/
|
|
const trx_lock_t* trx_lock) /*!< in: transaction locks */
|
|
{
|
|
const lock_t* lock;
|
|
ulint n_tables = 0;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
|
|
if (lock_get_type_low(lock) == LOCK_TABLE) {
|
|
n_tables++;
|
|
}
|
|
}
|
|
|
|
return(n_tables);
|
|
}
|
|
|
|
/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
|
|
|
|
#ifdef WITH_WSREP
|
|
static
|
|
void
|
|
wsrep_print_wait_locks(
|
|
/*===================*/
|
|
lock_t* c_lock) /* conflicting lock to print */
|
|
{
|
|
if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
|
|
ib::info() << "WSREP: c_lock != wait lock";
|
|
ib::info() << " SQL: "
|
|
<< wsrep_thd_query(c_lock->trx->mysql_thd);
|
|
|
|
if (lock_get_type_low(c_lock) & LOCK_TABLE) {
|
|
lock_table_print(stderr, c_lock);
|
|
} else {
|
|
lock_rec_print(stderr, c_lock);
|
|
}
|
|
|
|
if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE) {
|
|
lock_table_print(stderr, c_lock->trx->lock.wait_lock);
|
|
} else {
|
|
lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
|
|
}
|
|
}
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
/**
|
|
Check of the lock is on m_rec_id.
|
|
@param[in] lock Lock to compare with
|
|
@return true if the record lock is on m_rec_id*/
|
|
/**
|
|
@param[in] rhs Lock to compare with
|
|
@return true if the record lock equals rhs */
|
|
bool
|
|
RecLock::is_on_row(const lock_t* lock) const
|
|
{
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
const lock_rec_t& other = lock->un_member.rec_lock;
|
|
|
|
return(other.space == m_rec_id.m_space_id
|
|
&& other.page_no == m_rec_id.m_page_no
|
|
&& lock_rec_get_nth_bit(lock, m_rec_id.m_heap_no));
|
|
}
|
|
|
|
/**
|
|
Do some checks and prepare for creating a new record lock */
|
|
void
|
|
RecLock::prepare() const
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(m_trx == thr_get_trx(m_thr));
|
|
|
|
/* Test if there already is some other reason to suspend thread:
|
|
we do not enqueue a lock request if the query thread should be
|
|
stopped anyway */
|
|
|
|
if (que_thr_stop(m_thr)) {
|
|
ut_error;
|
|
}
|
|
|
|
switch (trx_get_dict_operation(m_trx)) {
|
|
case TRX_DICT_OP_NONE:
|
|
break;
|
|
case TRX_DICT_OP_TABLE:
|
|
case TRX_DICT_OP_INDEX:
|
|
ib::error() << "A record lock wait happens in a dictionary"
|
|
" operation. index " << m_index->name
|
|
<< " of table " << m_index->table->name
|
|
<< ". " << BUG_REPORT_MSG;
|
|
ut_ad(0);
|
|
}
|
|
|
|
ut_ad(m_index->table->n_ref_count > 0
|
|
|| !m_index->table->can_be_evicted);
|
|
}
|
|
|
|
/**
|
|
Create the lock instance
|
|
@param[in, out] trx The transaction requesting the lock
|
|
@param[in, out] index Index on which record lock is required
|
|
@param[in] mode The lock mode desired
|
|
@param[in] rec_id The record id
|
|
@param[in] size Size of the lock + bitmap requested
|
|
@return a record lock instance */
|
|
lock_t*
|
|
RecLock::lock_alloc(
|
|
trx_t* trx,
|
|
dict_index_t* index,
|
|
ulint mode,
|
|
const RecID& rec_id,
|
|
ulint size)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
lock_t* lock;
|
|
|
|
if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
|
|
|| sizeof(*lock) + size > REC_LOCK_SIZE) {
|
|
|
|
ulint n_bytes = size + sizeof(*lock);
|
|
mem_heap_t* heap = trx->lock.lock_heap;
|
|
|
|
lock = reinterpret_cast<lock_t*>(mem_heap_alloc(heap, n_bytes));
|
|
} else {
|
|
|
|
lock = trx->lock.rec_pool[trx->lock.rec_cached];
|
|
++trx->lock.rec_cached;
|
|
}
|
|
|
|
lock->trx = trx;
|
|
|
|
lock->index = index;
|
|
|
|
/* Setup the lock attributes */
|
|
|
|
lock->type_mode = uint32_t(LOCK_REC | (mode & ~LOCK_TYPE_MASK));
|
|
|
|
lock_rec_t& rec_lock = lock->un_member.rec_lock;
|
|
|
|
/* Predicate lock always on INFIMUM (0) */
|
|
|
|
if (is_predicate_lock(mode)) {
|
|
|
|
rec_lock.n_bits = 8;
|
|
|
|
memset(&lock[1], 0x0, 1);
|
|
|
|
} else {
|
|
ut_ad(8 * size < UINT32_MAX);
|
|
rec_lock.n_bits = static_cast<uint32_t>(8 * size);
|
|
|
|
memset(&lock[1], 0x0, size);
|
|
}
|
|
|
|
rec_lock.space = rec_id.m_space_id;
|
|
|
|
rec_lock.page_no = rec_id.m_page_no;
|
|
|
|
/* Set the bit corresponding to rec */
|
|
|
|
lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
|
|
|
|
MONITOR_INC(MONITOR_NUM_RECLOCK);
|
|
|
|
MONITOR_INC(MONITOR_RECLOCK_CREATED);
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Check if lock1 has higher priority than lock2.
|
|
NULL has lowest priority.
|
|
If neither of them is wait lock, the first one has higher priority.
|
|
If only one of them is a wait lock, it has lower priority.
|
|
If either is a high priority transaction, the lock has higher priority.
|
|
Otherwise, the one with an older transaction has higher priority.
|
|
@returns true if lock1 has higher priority, false otherwise. */
|
|
bool
|
|
has_higher_priority(
|
|
lock_t *lock1,
|
|
lock_t *lock2)
|
|
{
|
|
if (lock1 == NULL) {
|
|
return false;
|
|
} else if (lock2 == NULL) {
|
|
return true;
|
|
}
|
|
// Granted locks has higher priority.
|
|
if (!lock_get_wait(lock1)) {
|
|
return true;
|
|
} else if (!lock_get_wait(lock2)) {
|
|
return false;
|
|
}
|
|
if (trx_is_high_priority(lock1->trx)) {
|
|
return false;
|
|
}
|
|
return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Insert a lock to the hash list according to the mode (whether it is a wait
|
|
lock) and the age of the transaction the it is associated with.
|
|
If the lock is not a wait lock, insert it to the head of the hash list.
|
|
Otherwise, insert it to the middle of the wait locks according to the age of
|
|
the transaciton. */
|
|
static
|
|
dberr_t
|
|
lock_rec_insert_by_trx_age(
|
|
lock_t *in_lock) /*!< in: lock to be insert */{
|
|
ulint space;
|
|
ulint page_no;
|
|
ulint rec_fold;
|
|
lock_t* node;
|
|
lock_t* next;
|
|
hash_table_t* hash;
|
|
hash_cell_t* cell;
|
|
|
|
space = in_lock->un_member.rec_lock.space;
|
|
page_no = in_lock->un_member.rec_lock.page_no;
|
|
rec_fold = lock_rec_fold(space, page_no);
|
|
hash = lock_hash_get(in_lock->type_mode);
|
|
cell = hash_get_nth_cell(hash,
|
|
hash_calc_hash(rec_fold, hash));
|
|
|
|
node = (lock_t *) cell->node;
|
|
// If in_lock is not a wait lock, we insert it to the head of the list.
|
|
if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) {
|
|
cell->node = in_lock;
|
|
in_lock->hash = node;
|
|
if (lock_get_wait(in_lock)) {
|
|
lock_grant(in_lock, true);
|
|
return DB_SUCCESS_LOCKED_REC;
|
|
}
|
|
return DB_SUCCESS;
|
|
}
|
|
while (node != NULL && has_higher_priority((lock_t *) node->hash,
|
|
in_lock)) {
|
|
node = (lock_t *) node->hash;
|
|
}
|
|
next = (lock_t *) node->hash;
|
|
node->hash = in_lock;
|
|
in_lock->hash = next;
|
|
|
|
if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) {
|
|
lock_grant(in_lock, true);
|
|
if (cell->node != in_lock) {
|
|
// Move it to the front of the queue
|
|
node->hash = in_lock->hash;
|
|
next = (lock_t *) cell->node;
|
|
cell->node = in_lock;
|
|
in_lock->hash = next;
|
|
}
|
|
return DB_SUCCESS_LOCKED_REC;
|
|
}
|
|
|
|
return DB_SUCCESS;
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
static
|
|
bool
|
|
lock_queue_validate(
|
|
const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */
|
|
{
|
|
ulint space;
|
|
ulint page_no;
|
|
ulint rec_fold;
|
|
hash_table_t* hash;
|
|
hash_cell_t* cell;
|
|
lock_t* next;
|
|
bool wait_lock = false;
|
|
|
|
if (in_lock == NULL) {
|
|
return true;
|
|
}
|
|
|
|
space = in_lock->un_member.rec_lock.space;
|
|
page_no = in_lock->un_member.rec_lock.page_no;
|
|
rec_fold = lock_rec_fold(space, page_no);
|
|
hash = lock_hash_get(in_lock->type_mode);
|
|
cell = hash_get_nth_cell(hash,
|
|
hash_calc_hash(rec_fold, hash));
|
|
next = (lock_t *) cell->node;
|
|
while (next != NULL) {
|
|
// If this is a granted lock, check that there's no wait lock before it.
|
|
if (!lock_get_wait(next)) {
|
|
ut_ad(!wait_lock);
|
|
} else {
|
|
wait_lock = true;
|
|
}
|
|
next = next->hash;
|
|
}
|
|
return true;
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
static
|
|
void
|
|
lock_rec_insert_to_head(
|
|
lock_t *in_lock, /*!< in: lock to be insert */
|
|
ulint rec_fold) /*!< in: rec_fold of the page */
|
|
{
|
|
hash_table_t* hash;
|
|
hash_cell_t* cell;
|
|
lock_t* node;
|
|
|
|
if (in_lock == NULL) {
|
|
return;
|
|
}
|
|
|
|
hash = lock_hash_get(in_lock->type_mode);
|
|
cell = hash_get_nth_cell(hash,
|
|
hash_calc_hash(rec_fold, hash));
|
|
node = (lock_t *) cell->node;
|
|
if (node != in_lock) {
|
|
cell->node = in_lock;
|
|
in_lock->hash = node;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Add the lock to the record lock hash and the transaction's lock list
|
|
@param[in,out] lock Newly created record lock to add to the rec hash
|
|
@param[in] add_to_hash If the lock should be added to the hash table */
|
|
void
|
|
RecLock::lock_add(lock_t* lock, bool add_to_hash)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(lock->trx));
|
|
|
|
bool wait_lock = m_mode & LOCK_WAIT;
|
|
|
|
if (add_to_hash) {
|
|
ulint key = m_rec_id.fold();
|
|
hash_table_t *lock_hash = lock_hash_get(m_mode);
|
|
|
|
++lock->index->table->n_rec_locks;
|
|
|
|
if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
|
|
&& !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
|
|
if (wait_lock) {
|
|
HASH_INSERT(lock_t, hash, lock_hash, key, lock);
|
|
} else {
|
|
lock_rec_insert_to_head(lock, m_rec_id.fold());
|
|
}
|
|
} else {
|
|
HASH_INSERT(lock_t, hash, lock_hash, key, lock);
|
|
}
|
|
}
|
|
|
|
if (wait_lock) {
|
|
lock_set_lock_and_trx_wait(lock, lock->trx);
|
|
}
|
|
|
|
UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
|
|
}
|
|
|
|
/**
|
|
Create a new lock.
|
|
@param[in,out] trx Transaction requesting the lock
|
|
@param[in] owns_trx_mutex true if caller owns the trx_t::mutex
|
|
@param[in] add_to_hash add the lock to hash table
|
|
@param[in] prdt Predicate lock (optional)
|
|
@return a new lock instance */
|
|
lock_t*
|
|
RecLock::create(trx_t* trx, bool owns_trx_mutex, bool add_to_hash, const lock_prdt_t* prdt)
|
|
{
|
|
return create(NULL, trx, owns_trx_mutex, add_to_hash, prdt);
|
|
}
|
|
lock_t*
|
|
RecLock::create(
|
|
lock_t* const c_lock,
|
|
trx_t* trx,
|
|
bool owns_trx_mutex,
|
|
bool add_to_hash,
|
|
const lock_prdt_t* prdt)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(owns_trx_mutex == trx_mutex_own(trx));
|
|
|
|
/* Create the explicit lock instance and initialise it. */
|
|
|
|
lock_t* lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
|
|
|
|
if (prdt != NULL && (m_mode & LOCK_PREDICATE)) {
|
|
|
|
lock_prdt_set_prdt(lock, prdt);
|
|
}
|
|
|
|
#ifdef WITH_WSREP
|
|
if (c_lock &&
|
|
wsrep_on(trx->mysql_thd) &&
|
|
wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
|
|
lock_t *hash = (lock_t *)c_lock->hash;
|
|
lock_t *prev = NULL;
|
|
|
|
while (hash &&
|
|
wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) &&
|
|
wsrep_trx_order_before(
|
|
((lock_t *)hash)->trx->mysql_thd,
|
|
trx->mysql_thd)) {
|
|
prev = hash;
|
|
hash = (lock_t *)hash->hash;
|
|
}
|
|
|
|
lock->hash = hash;
|
|
|
|
if (prev) {
|
|
prev->hash = lock;
|
|
} else {
|
|
c_lock->hash = lock;
|
|
}
|
|
/*
|
|
* delayed conflict resolution '...kill_one_trx' was not called,
|
|
* if victim was waiting for some other lock
|
|
*/
|
|
trx_mutex_enter(c_lock->trx);
|
|
if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
|
|
c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
|
|
|
|
if (wsrep_debug) {
|
|
wsrep_print_wait_locks(c_lock);
|
|
}
|
|
|
|
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
|
|
lock_set_lock_and_trx_wait(lock, trx);
|
|
UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
|
|
|
|
ut_ad(m_thr != NULL);
|
|
trx->lock.wait_thr = m_thr;
|
|
m_thr->state = QUE_THR_LOCK_WAIT;
|
|
|
|
/* have to release trx mutex for the duration of
|
|
victim lock release. This will eventually call
|
|
lock_grant, which wants to grant trx mutex again
|
|
*/
|
|
if (owns_trx_mutex) {
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
lock_cancel_waiting_and_release(
|
|
c_lock->trx->lock.wait_lock);
|
|
|
|
if (owns_trx_mutex) {
|
|
trx_mutex_enter(trx);
|
|
}
|
|
|
|
/* trx might not wait for c_lock, but some other lock
|
|
does not matter if wait_lock was released above
|
|
*/
|
|
if (c_lock->trx->lock.wait_lock == c_lock) {
|
|
if (wsrep_debug) {
|
|
ib::info() <<
|
|
"victim trx waits for some other lock than c_lock";
|
|
}
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
trx_mutex_exit(c_lock->trx);
|
|
|
|
if (wsrep_debug) {
|
|
ib::info() << "WSREP: c_lock canceled " << c_lock->trx->id;
|
|
ib::info() << " SQL1: "
|
|
<< wsrep_thd_query(c_lock->trx->mysql_thd);
|
|
ib::info() << " SQL2: "
|
|
<< wsrep_thd_query(trx->mysql_thd);
|
|
}
|
|
|
|
++lock->index->table->n_rec_locks;
|
|
/* have to bail out here to avoid lock_set_lock... */
|
|
return(lock);
|
|
}
|
|
trx_mutex_exit(c_lock->trx);
|
|
/* we don't want to add to hash anymore, but need other updates from lock_add */
|
|
++lock->index->table->n_rec_locks;
|
|
lock_add(lock, false);
|
|
} else {
|
|
#endif /* WITH_WSREP */
|
|
|
|
/* Ensure that another transaction doesn't access the trx
|
|
lock state and lock data structures while we are adding the
|
|
lock and changing the transaction state to LOCK_WAIT */
|
|
|
|
if (!owns_trx_mutex) {
|
|
trx_mutex_enter(trx);
|
|
}
|
|
|
|
lock_add(lock, add_to_hash);
|
|
|
|
if (!owns_trx_mutex) {
|
|
trx_mutex_exit(trx);
|
|
}
|
|
#ifdef WITH_WSREP
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/**
|
|
Check the outcome of the deadlock check
|
|
@param[in,out] victim_trx Transaction selected for rollback
|
|
@param[in,out] lock Lock being requested
|
|
@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
|
|
dberr_t
|
|
RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(m_trx == lock->trx);
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
|
|
if (victim_trx != NULL) {
|
|
|
|
ut_ad(victim_trx == m_trx);
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
|
|
lock_rec_reset_nth_bit(lock, m_rec_id.m_heap_no);
|
|
|
|
return(DB_DEADLOCK);
|
|
|
|
} else if (m_trx->lock.wait_lock == NULL) {
|
|
|
|
/* If there was a deadlock but we chose another
|
|
transaction as a victim, it is possible that we
|
|
already have the lock now granted! */
|
|
|
|
return(DB_SUCCESS_LOCKED_REC);
|
|
}
|
|
|
|
return(DB_LOCK_WAIT);
|
|
}
|
|
|
|
/**
|
|
Check and resolve any deadlocks
|
|
@param[in, out] lock The lock being acquired
|
|
@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
|
|
DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
|
|
there was a deadlock, but another transaction was chosen
|
|
as a victim, and we got the lock immediately: no need to
|
|
wait then */
|
|
dberr_t
|
|
RecLock::deadlock_check(lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock->trx == m_trx);
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
|
|
const trx_t* victim_trx =
|
|
DeadlockChecker::check_and_resolve(lock, m_trx);
|
|
|
|
/* Check the outcome of the deadlock test. It is possible that
|
|
the transaction that blocked our lock was rolled back and we
|
|
were granted our lock. */
|
|
|
|
dberr_t err = check_deadlock_result(victim_trx, lock);
|
|
|
|
if (err == DB_LOCK_WAIT) {
|
|
|
|
set_wait_state(lock);
|
|
|
|
MONITOR_INC(MONITOR_LOCKREC_WAIT);
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/**
|
|
Collect the transactions that will need to be rolled back asynchronously
|
|
@param[in, out] trx Transaction to be rolled back */
|
|
void
|
|
RecLock::mark_trx_for_rollback(trx_t* trx)
|
|
{
|
|
trx->abort = true;
|
|
|
|
ut_ad(!trx->read_only);
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
|
|
ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
|
|
ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
|
|
|
|
/* Note that we will attempt an async rollback. The _ASYNC
|
|
flag will be cleared if the transaction is rolled back
|
|
synchronously before we get a chance to do it. */
|
|
|
|
trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
|
|
|
|
ut_a(!trx->killed_by);
|
|
my_atomic_storelong(&trx->killed_by, (long) os_thread_get_curr_id());
|
|
|
|
m_trx->hit_list.push_back(hit_list_t::value_type(trx));
|
|
|
|
#ifdef UNIV_DEBUG
|
|
THD* thd = trx->mysql_thd;
|
|
|
|
if (thd != NULL) {
|
|
|
|
char buffer[1024];
|
|
ib::info() << "Blocking transaction: ID: " << trx->id << " - "
|
|
<< " Blocked transaction ID: "<< m_trx->id << " - "
|
|
<< thd_get_error_context_description(thd, buffer, sizeof(buffer),
|
|
512);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
}
|
|
|
|
/**
|
|
Setup the requesting transaction state for lock grant
|
|
@param[in,out] lock Lock for which to change state */
|
|
void
|
|
RecLock::set_wait_state(lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(m_trx == lock->trx);
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
ut_ad(lock_get_wait(lock));
|
|
|
|
m_trx->lock.wait_started = ut_time();
|
|
|
|
m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
|
|
|
|
m_trx->lock.was_chosen_as_deadlock_victim = false;
|
|
|
|
bool stopped = que_thr_stop(m_thr);
|
|
ut_a(stopped);
|
|
}
|
|
|
|
/**
|
|
Enqueue a lock wait for normal transaction. If it is a high priority transaction
|
|
then jump the record lock wait queue and if the transaction at the head of the
|
|
queue is itself waiting roll it back, also do a deadlock check and resolve.
|
|
@param[in, out] wait_for The lock that the joining transaction is
|
|
waiting for
|
|
@param[in] prdt Predicate [optional]
|
|
@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
|
|
DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
|
|
there was a deadlock, but another transaction was chosen
|
|
as a victim, and we got the lock immediately: no need to
|
|
wait then */
|
|
dberr_t
|
|
RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(m_trx == thr_get_trx(m_thr));
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
|
|
DEBUG_SYNC_C("rec_lock_add_to_waitq");
|
|
|
|
m_mode |= LOCK_WAIT;
|
|
|
|
/* Do the preliminary checks, and set query thread state */
|
|
|
|
prepare();
|
|
|
|
bool high_priority = trx_is_high_priority(m_trx);
|
|
|
|
/* Don't queue the lock to hash table, if high priority transaction. */
|
|
lock_t* lock = create(m_trx, true, !high_priority, prdt);
|
|
|
|
/* Attempt to jump over the low priority waiting locks. */
|
|
if (high_priority && jump_queue(lock, wait_for)) {
|
|
|
|
/* Lock is granted */
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_ad(lock_get_wait(lock));
|
|
|
|
dberr_t err = deadlock_check(lock);
|
|
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
|
|
// Move it only when it does not cause a deadlock.
|
|
if (err != DB_DEADLOCK
|
|
&& innodb_lock_schedule_algorithm
|
|
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
|
|
&& !thd_is_replication_slave_thread(lock->trx->mysql_thd)
|
|
&& !trx_is_high_priority(lock->trx)) {
|
|
|
|
HASH_DELETE(lock_t, hash, lock_hash_get(lock->type_mode),
|
|
m_rec_id.fold(), lock);
|
|
dberr_t res = lock_rec_insert_by_trx_age(lock);
|
|
if (res != DB_SUCCESS) {
|
|
return res;
|
|
}
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Adds a record lock request in the record queue. The request is normally
|
|
added as the last in the queue, but if there are no waiting lock requests
|
|
on the record, and the request to be added is not a waiting request, we
|
|
can reuse a suitable record lock object already existing on the same page,
|
|
just setting the appropriate bit in its bitmap. This is a low-level function
|
|
which does NOT check for deadlocks or lock compatibility!
|
|
@return lock where the bit was set */
|
|
static
|
|
void
|
|
lock_rec_add_to_queue(
|
|
/*==================*/
|
|
ulint type_mode,/*!< in: lock mode, wait, gap
|
|
etc. flags; type is ignored
|
|
and replaced by LOCK_REC */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of the record */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
trx_t* trx, /*!< in/out: transaction */
|
|
bool caller_owns_trx_mutex)
|
|
/*!< in: TRUE if caller owns the
|
|
transaction mutex */
|
|
{
|
|
#ifdef UNIV_DEBUG
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
|
|
ut_ad(dict_index_is_clust(index)
|
|
|| dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
|
|
switch (type_mode & LOCK_MODE_MASK) {
|
|
case LOCK_X:
|
|
case LOCK_S:
|
|
break;
|
|
default:
|
|
ut_error;
|
|
}
|
|
|
|
if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
|
|
lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
|
|
? LOCK_X
|
|
: LOCK_S;
|
|
const lock_t* other_lock
|
|
= lock_rec_other_has_expl_req(
|
|
mode, block, false, heap_no, trx);
|
|
#ifdef WITH_WSREP
|
|
//ut_a(!other_lock || (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
|
|
// wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
|
|
if (other_lock &&
|
|
wsrep_on(trx->mysql_thd) &&
|
|
!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
|
|
!wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
|
|
|
|
ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
|
|
((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
|
|
wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
|
|
wsrep_thd_conflict_state(trx->mysql_thd, false) << " seqno: " <<
|
|
wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
|
|
wsrep_thd_query(trx->mysql_thd);
|
|
trx_t* otrx = other_lock->trx;
|
|
ib::info() << "WSREP other lock:\n BF:" <<
|
|
((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
|
|
wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
|
|
wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
|
|
wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
|
|
wsrep_thd_query(otrx->mysql_thd);
|
|
}
|
|
#else
|
|
ut_a(!other_lock);
|
|
#endif /* WITH_WSREP */
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
type_mode |= LOCK_REC;
|
|
|
|
/* If rec is the supremum record, then we can reset the gap bit, as
|
|
all locks on the supremum are automatically of the gap type, and we
|
|
try to avoid unnecessary memory consumption of a new record lock
|
|
struct for a gap type lock */
|
|
|
|
if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
|
|
ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
|
|
|
|
/* There should never be LOCK_REC_NOT_GAP on a supremum
|
|
record, but let us play safe */
|
|
|
|
type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
|
|
}
|
|
|
|
lock_t* lock;
|
|
lock_t* first_lock;
|
|
hash_table_t* hash = lock_hash_get(type_mode);
|
|
|
|
/* Look for a waiting lock request on the same record or on a gap */
|
|
|
|
for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
|
|
if (lock_get_wait(lock)
|
|
&& lock_rec_get_nth_bit(lock, heap_no)) {
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (lock == NULL && !(type_mode & LOCK_WAIT)) {
|
|
|
|
/* Look for a similar record lock on the same page:
|
|
if one is found and there are no waiting lock requests,
|
|
we can just set the bit */
|
|
|
|
lock = lock_rec_find_similar_on_page(
|
|
type_mode, heap_no, first_lock, trx);
|
|
|
|
if (lock != NULL) {
|
|
|
|
lock_rec_set_nth_bit(lock, heap_no);
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
RecLock rec_lock(index, block, heap_no, type_mode);
|
|
|
|
rec_lock.create(trx, caller_owns_trx_mutex, true);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
This is a fast routine for locking a record in the most common cases:
|
|
there are no explicit locks on the page, or there is just one lock, owned
|
|
by this transaction, and of the right type_mode. This is a low-level function
|
|
which does NOT look at implicit locks! Checks lock compatibility within
|
|
explicit locks. This function sets a normal next-key lock, or in the case of
|
|
a page supremum record, a gap type lock.
|
|
@return whether the locking succeeded */
|
|
UNIV_INLINE
|
|
lock_rec_req_status
|
|
lock_rec_lock_fast(
|
|
/*===============*/
|
|
bool impl, /*!< in: if TRUE, no lock is set
|
|
if no wait is necessary: we
|
|
assume that the caller will
|
|
set an implicit lock */
|
|
ulint mode, /*!< in: lock mode: LOCK_X or
|
|
LOCK_S possibly ORed to either
|
|
LOCK_GAP or LOCK_REC_NOT_GAP */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of record */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!srv_read_only_mode);
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
|
|
|| srv_read_only_mode);
|
|
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|
|
|| (LOCK_MODE_MASK & mode) == LOCK_X);
|
|
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|
|
|| mode - (LOCK_MODE_MASK & mode) == 0
|
|
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
|
|
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
|
|
|
|
DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
|
|
|
|
lock_t* lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
|
|
|
|
trx_t* trx = thr_get_trx(thr);
|
|
|
|
lock_rec_req_status status = LOCK_REC_SUCCESS;
|
|
|
|
if (lock == NULL) {
|
|
|
|
if (!impl) {
|
|
RecLock rec_lock(index, block, heap_no, mode);
|
|
|
|
/* Note that we don't own the trx mutex. */
|
|
rec_lock.create(trx, false, true);
|
|
}
|
|
|
|
status = LOCK_REC_SUCCESS_CREATED;
|
|
} else {
|
|
trx_mutex_enter(trx);
|
|
|
|
if (lock_rec_get_next_on_page(lock)
|
|
|| lock->trx != trx
|
|
|| lock->type_mode != (mode | LOCK_REC)
|
|
|| lock_rec_get_n_bits(lock) <= heap_no) {
|
|
|
|
status = LOCK_REC_FAIL;
|
|
} else if (!impl) {
|
|
/* If the nth bit of the record lock is already set
|
|
then we do not set a new lock bit, otherwise we do
|
|
set */
|
|
if (!lock_rec_get_nth_bit(lock, heap_no)) {
|
|
lock_rec_set_nth_bit(lock, heap_no);
|
|
status = LOCK_REC_SUCCESS_CREATED;
|
|
}
|
|
}
|
|
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
return(status);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
This is the general, and slower, routine for locking a record. This is a
|
|
low-level function which does NOT look at implicit locks! Checks lock
|
|
compatibility within explicit locks. This function sets a normal next-key
|
|
lock, or in the case of a page supremum record, a gap type lock.
|
|
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
|
|
or DB_QUE_THR_SUSPENDED */
|
|
static
|
|
dberr_t
|
|
lock_rec_lock_slow(
|
|
/*===============*/
|
|
ibool impl, /*!< in: if TRUE, no lock is set
|
|
if no wait is necessary: we
|
|
assume that the caller will
|
|
set an implicit lock */
|
|
ulint mode, /*!< in: lock mode: LOCK_X or
|
|
LOCK_S possibly ORed to either
|
|
LOCK_GAP or LOCK_REC_NOT_GAP */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of record */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!srv_read_only_mode);
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|
|
|| (LOCK_MODE_MASK & mode) == LOCK_X);
|
|
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|
|
|| mode - (LOCK_MODE_MASK & mode) == 0
|
|
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
|
|
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
|
|
|
|
DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
|
|
|
|
dberr_t err;
|
|
trx_t* trx = thr_get_trx(thr);
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
if (lock_rec_has_expl(mode, block, heap_no, trx)) {
|
|
|
|
/* The trx already has a strong enough lock on rec: do
|
|
nothing */
|
|
|
|
err = DB_SUCCESS;
|
|
|
|
} else {
|
|
|
|
const lock_t* wait_for = lock_rec_other_has_conflicting(
|
|
mode, block, heap_no, trx);
|
|
|
|
if (wait_for != NULL) {
|
|
|
|
/* If another transaction has a non-gap conflicting
|
|
request in the queue, as this transaction does not
|
|
have a lock strong enough already granted on the
|
|
record, we may have to wait. */
|
|
|
|
RecLock rec_lock(thr, index, block, heap_no, mode);
|
|
|
|
err = rec_lock.add_to_waitq(wait_for);
|
|
|
|
} else if (!impl) {
|
|
|
|
/* Set the requested lock on the record, note that
|
|
we already own the transaction mutex. */
|
|
|
|
lock_rec_add_to_queue(
|
|
LOCK_REC | mode, block, heap_no, index, trx,
|
|
true);
|
|
|
|
err = DB_SUCCESS_LOCKED_REC;
|
|
} else {
|
|
err = DB_SUCCESS;
|
|
}
|
|
}
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Tries to lock the specified record in the mode requested. If not immediately
|
|
possible, enqueues a waiting lock request. This is a low-level function
|
|
which does NOT look at implicit locks! Checks lock compatibility within
|
|
explicit locks. This function sets a normal next-key lock, or in the case
|
|
of a page supremum record, a gap type lock.
|
|
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
|
|
or DB_QUE_THR_SUSPENDED */
|
|
static
|
|
dberr_t
|
|
lock_rec_lock(
|
|
/*==========*/
|
|
bool impl, /*!< in: if true, no lock is set
|
|
if no wait is necessary: we
|
|
assume that the caller will
|
|
set an implicit lock */
|
|
ulint mode, /*!< in: lock mode: LOCK_X or
|
|
LOCK_S possibly ORed to either
|
|
LOCK_GAP or LOCK_REC_NOT_GAP */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no,/*!< in: heap number of record */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!srv_read_only_mode);
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|
|
ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
|
|
|| (LOCK_MODE_MASK & mode) == LOCK_X);
|
|
ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
|
|
|| mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
|
|
|| mode - (LOCK_MODE_MASK & mode) == 0);
|
|
ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
|
|
|
|
/* We try a simplified and faster subroutine for the most
|
|
common cases */
|
|
switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
|
|
case LOCK_REC_SUCCESS:
|
|
return(DB_SUCCESS);
|
|
case LOCK_REC_SUCCESS_CREATED:
|
|
return(DB_SUCCESS_LOCKED_REC);
|
|
case LOCK_REC_FAIL:
|
|
return(lock_rec_lock_slow(impl, mode, block,
|
|
heap_no, index, thr));
|
|
}
|
|
|
|
ut_error;
|
|
return(DB_ERROR);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if a waiting record lock request still has to wait in a queue.
|
|
@return lock that is causing the wait */
|
|
static
|
|
const lock_t*
|
|
lock_rec_has_to_wait_in_queue(
|
|
/*==========================*/
|
|
const lock_t* wait_lock) /*!< in: waiting record lock */
|
|
{
|
|
const lock_t* lock;
|
|
ulint space;
|
|
ulint page_no;
|
|
ulint heap_no;
|
|
ulint bit_mask;
|
|
ulint bit_offset;
|
|
hash_table_t* hash;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_wait(wait_lock));
|
|
ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
|
|
|
|
space = wait_lock->un_member.rec_lock.space;
|
|
page_no = wait_lock->un_member.rec_lock.page_no;
|
|
heap_no = lock_rec_find_set_bit(wait_lock);
|
|
|
|
bit_offset = heap_no / 8;
|
|
bit_mask = static_cast<ulint>(1) << (heap_no % 8);
|
|
|
|
hash = lock_hash_get(wait_lock->type_mode);
|
|
|
|
for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
|
|
lock != wait_lock;
|
|
lock = lock_rec_get_next_on_page_const(lock)) {
|
|
|
|
const byte* p = (const byte*) &lock[1];
|
|
|
|
if (heap_no < lock_rec_get_n_bits(lock)
|
|
&& (p[bit_offset] & bit_mask)
|
|
&& lock_has_to_wait(wait_lock, lock)) {
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
|
|
wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
|
|
/* don't wait for another BF lock */
|
|
continue;
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Grants a lock to a waiting lock request and releases the waiting transaction.
|
|
The caller must hold lock_sys->mutex but not lock->trx->mutex. */
|
|
static
|
|
void
|
|
lock_grant(
|
|
/*=======*/
|
|
lock_t* lock, /*!< in/out: waiting lock request */
|
|
bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(lock->trx) == owns_trx_mutex);
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
|
|
if (!owns_trx_mutex) {
|
|
trx_mutex_enter(lock->trx);
|
|
}
|
|
|
|
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
|
|
dict_table_t* table = lock->un_member.tab_lock.table;
|
|
|
|
if (table->autoinc_trx == lock->trx) {
|
|
ib::error() << "Transaction already had an"
|
|
<< " AUTO-INC lock!";
|
|
} else {
|
|
table->autoinc_trx = lock->trx;
|
|
|
|
ib_vector_push(lock->trx->autoinc_locks, &lock);
|
|
}
|
|
}
|
|
|
|
DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
|
|
trx_get_id_for_print(lock->trx)));
|
|
|
|
/* If we are resolving a deadlock by choosing another transaction
|
|
as a victim, then our original transaction may not be in the
|
|
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
|
|
for it */
|
|
|
|
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
que_thr_t* thr;
|
|
|
|
thr = que_thr_end_lock_wait(lock->trx);
|
|
|
|
if (thr != NULL) {
|
|
lock_wait_release_thread_if_suspended(thr);
|
|
}
|
|
}
|
|
|
|
if (!owns_trx_mutex) {
|
|
trx_mutex_exit(lock->trx);
|
|
}
|
|
}
|
|
|
|
/**
|
|
Jump the queue for the record over all low priority transactions and
|
|
add the lock. If all current granted locks are compatible, grant the
|
|
lock. Otherwise, mark all granted transaction for asynchronous
|
|
rollback and add to hit list.
|
|
@param[in, out] lock Lock being requested
|
|
@param[in] conflict_lock First conflicting lock from the head
|
|
@return true if the lock is granted */
|
|
bool
|
|
RecLock::jump_queue(
|
|
lock_t* lock,
|
|
const lock_t* conflict_lock)
|
|
{
|
|
ut_ad(m_trx == lock->trx);
|
|
ut_ad(trx_mutex_own(m_trx));
|
|
ut_ad(conflict_lock->trx != m_trx);
|
|
ut_ad(trx_is_high_priority(m_trx));
|
|
ut_ad(m_rec_id.m_heap_no != ULINT32_UNDEFINED);
|
|
|
|
bool high_priority = false;
|
|
|
|
/* Find out the position to add the lock. If there are other high
|
|
priority transactions in waiting state then we should add it after
|
|
the last high priority transaction. Otherwise, we can add it after
|
|
the last granted lock jumping over the wait queue. */
|
|
bool grant_lock = lock_add_priority(lock, conflict_lock,
|
|
&high_priority);
|
|
|
|
if (grant_lock) {
|
|
|
|
ut_ad(conflict_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT);
|
|
ut_ad(conflict_lock->trx->lock.wait_lock == conflict_lock);
|
|
|
|
DBUG_LOG("trx",
|
|
"Granting High Priority Transaction "
|
|
<< lock->trx->id << " a lock jumping over"
|
|
<< " waiting Transaction " << conflict_lock->trx->id);
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
return(true);
|
|
}
|
|
|
|
/* If another high priority transaction is found waiting
|
|
victim transactions are already marked for rollback. */
|
|
if (high_priority) {
|
|
|
|
return(false);
|
|
}
|
|
|
|
/* The lock is placed after the last granted lock in the queue. Check and add
|
|
low priority transactinos to hit list for ASYNC rollback. */
|
|
make_trx_hit_list(lock, conflict_lock);
|
|
|
|
return(false);
|
|
}
|
|
|
|
/** Find position in lock queue and add the high priority transaction
|
|
lock. Intention and GAP only locks can be granted even if there are
|
|
waiting locks in front of the queue. To add the High priority
|
|
transaction in a safe position we keep the following rule.
|
|
|
|
1. If the lock can be granted, add it before the first waiting lock
|
|
in the queue so that all currently waiting locks need to do conflict
|
|
check before getting granted.
|
|
|
|
2. If the lock has to wait, add it after the last granted lock or the
|
|
last waiting high priority transaction in the queue whichever is later.
|
|
This ensures that the transaction is granted only after doing conflict
|
|
check with all granted transactions.
|
|
@param[in] lock Lock being requested
|
|
@param[in] conflict_lock First conflicting lock from the head
|
|
@param[out] high_priority high priority transaction ahead in queue
|
|
@return true if the lock can be granted */
|
|
bool
|
|
RecLock::lock_add_priority(
|
|
lock_t* lock,
|
|
const lock_t* conflict_lock,
|
|
bool* high_priority)
|
|
{
|
|
ut_ad(high_priority);
|
|
|
|
*high_priority = false;
|
|
|
|
/* If the first conflicting lock is waiting for the current row,
|
|
then all other granted locks are compatible and the lock can be
|
|
directly granted if no other high priority transactions are
|
|
waiting. We need to recheck with all granted transaction as there
|
|
could be granted GAP or Intention locks down the queue. */
|
|
bool grant_lock = (conflict_lock->is_waiting());
|
|
lock_t* lock_head = NULL;
|
|
lock_t* grant_position = NULL;
|
|
lock_t* add_position = NULL;
|
|
|
|
HASH_SEARCH(hash, lock_sys->rec_hash, m_rec_id.fold(), lock_t*,
|
|
lock_head, ut_ad(lock_head->is_record_lock()), true);
|
|
|
|
ut_ad(lock_head);
|
|
|
|
for (lock_t* next = lock_head; next != NULL; next = next->hash) {
|
|
|
|
/* check only for locks on the current row */
|
|
if (!is_on_row(next)) {
|
|
continue;
|
|
}
|
|
|
|
if (next->is_waiting()) {
|
|
/* grant lock position is the granted lock just before
|
|
the first wait lock in the queue. */
|
|
if (grant_position == NULL) {
|
|
grant_position = add_position;
|
|
}
|
|
|
|
if (trx_is_high_priority(next->trx)) {
|
|
|
|
*high_priority = true;
|
|
grant_lock = false;
|
|
add_position = next;
|
|
}
|
|
} else {
|
|
|
|
add_position = next;
|
|
/* Cannot grant lock if there is any conflicting
|
|
granted lock. */
|
|
if (grant_lock && lock_has_to_wait(lock, next)) {
|
|
grant_lock = false;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* If the lock is to be granted it is safe to add before the first
|
|
waiting lock in the queue. */
|
|
if (grant_lock) {
|
|
|
|
ut_ad(!lock_has_to_wait(lock, grant_position));
|
|
add_position = grant_position;
|
|
}
|
|
|
|
ut_ad(add_position != NULL);
|
|
|
|
/* Add the lock to lock hash table. */
|
|
lock->hash = add_position->hash;
|
|
add_position->hash = lock;
|
|
++lock->index->table->n_rec_locks;
|
|
|
|
return(grant_lock);
|
|
}
|
|
|
|
/** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
|
|
If the transaction is waiting for some other lock then wake up with deadlock error.
|
|
Currently we don't mark following transactions for ASYNC Rollback.
|
|
1. Read only transactions
|
|
2. Background transactions
|
|
3. Other High priority transactions
|
|
@param[in] lock Lock being requested
|
|
@param[in] conflict_lock First conflicting lock from the head */
|
|
void
|
|
RecLock::make_trx_hit_list(
|
|
lock_t* lock,
|
|
const lock_t* conflict_lock)
|
|
{
|
|
const lock_t* next;
|
|
|
|
for (next = conflict_lock; next != NULL; next = next->hash) {
|
|
|
|
/* All locks ahead in the queue are checked. */
|
|
if (next == lock) {
|
|
|
|
ut_ad(next->is_waiting());
|
|
break;
|
|
}
|
|
|
|
trx_t* trx = next->trx;
|
|
/* Check only for conflicting, granted locks on the current row.
|
|
Currently, we don't rollback read only transactions, transactions
|
|
owned by background threads. */
|
|
if (trx == lock->trx
|
|
|| !is_on_row(next)
|
|
|| next->is_waiting()
|
|
|| trx->read_only
|
|
|| trx->mysql_thd == NULL
|
|
|| !lock_has_to_wait(lock, next)) {
|
|
|
|
continue;
|
|
}
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
/* Skip high priority transactions, if already marked for abort
|
|
by some other transaction or if ASYNC rollback is disabled. A
|
|
transaction must complete kill/abort of a victim transaction once
|
|
marked and added to hit list. */
|
|
if (trx_is_high_priority(trx)
|
|
|| (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0
|
|
|| trx->abort) {
|
|
|
|
trx_mutex_exit(trx);
|
|
continue;
|
|
}
|
|
|
|
/* If the transaction is waiting on some other resource then
|
|
wake it up with DEAD_LOCK error so that it can rollback. */
|
|
if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
|
|
/* Assert that it is not waiting for current record. */
|
|
ut_ad(trx->lock.wait_lock != next);
|
|
|
|
DBUG_LOG("trx", "High Priority Transaction "
|
|
<< lock->trx->id
|
|
<< " waking up blocking transaction "
|
|
<< trx->id);
|
|
|
|
trx->lock.was_chosen_as_deadlock_victim = true;
|
|
lock_cancel_waiting_and_release(trx->lock.wait_lock);
|
|
trx_mutex_exit(trx);
|
|
continue;
|
|
}
|
|
|
|
/* Mark for ASYNC Rollback and add to hit list. */
|
|
mark_trx_for_rollback(trx);
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
ut_ad(next == lock);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Cancels a waiting record lock request and releases the waiting transaction
|
|
that requested it. NOTE: does NOT check if waiting lock requests behind this
|
|
one can now be granted! */
|
|
static
|
|
void
|
|
lock_rec_cancel(
|
|
/*============*/
|
|
lock_t* lock) /*!< in: waiting record lock request */
|
|
{
|
|
que_thr_t* thr;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
/* Reset the bit (there can be only one set bit) in the lock bitmap */
|
|
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
|
|
|
|
/* Reset the wait flag and the back pointer to lock in trx */
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
|
|
/* The following function releases the trx from lock wait */
|
|
|
|
trx_mutex_enter(lock->trx);
|
|
|
|
thr = que_thr_end_lock_wait(lock->trx);
|
|
|
|
if (thr != NULL) {
|
|
lock_wait_release_thread_if_suspended(thr);
|
|
}
|
|
|
|
trx_mutex_exit(lock->trx);
|
|
}
|
|
|
|
static
|
|
void
|
|
lock_grant_and_move_on_page(
|
|
hash_table_t* lock_hash,
|
|
ulint space,
|
|
ulint page_no)
|
|
{
|
|
lock_t* lock;
|
|
lock_t* previous;
|
|
ulint rec_fold = lock_rec_fold(space, page_no);
|
|
|
|
previous = (lock_t *) hash_get_nth_cell(lock_hash,
|
|
hash_calc_hash(rec_fold, lock_hash))->node;
|
|
if (previous == NULL) {
|
|
return;
|
|
}
|
|
if (previous->un_member.rec_lock.space == space &&
|
|
previous->un_member.rec_lock.page_no == page_no) {
|
|
lock = previous;
|
|
}
|
|
else {
|
|
while (previous->hash &&
|
|
(previous->hash->un_member.rec_lock.space != space ||
|
|
previous->hash->un_member.rec_lock.page_no != page_no)) {
|
|
previous = previous->hash;
|
|
}
|
|
lock = previous->hash;
|
|
}
|
|
|
|
ut_ad(previous->hash == lock || previous == lock);
|
|
/* Grant locks if there are no conflicting locks ahead.
|
|
Move granted locks to the head of the list. */
|
|
for (;lock != NULL;) {
|
|
/* If the lock is a wait lock on this page, and it does not need to wait. */
|
|
if ((lock->un_member.rec_lock.space == space)
|
|
&& (lock->un_member.rec_lock.page_no == page_no)
|
|
&& lock_get_wait(lock)
|
|
&& !lock_rec_has_to_wait_in_queue(lock)) {
|
|
|
|
|
|
bool exit_trx_mutex = false;
|
|
|
|
if (lock->trx->abort_type != TRX_SERVER_ABORT) {
|
|
ut_ad(trx_mutex_own(lock->trx));
|
|
trx_mutex_exit(lock->trx);
|
|
exit_trx_mutex = true;
|
|
}
|
|
|
|
lock_grant(lock, false);
|
|
|
|
if (exit_trx_mutex) {
|
|
ut_ad(!trx_mutex_own(lock->trx));
|
|
trx_mutex_enter(lock->trx);
|
|
}
|
|
|
|
if (previous != NULL) {
|
|
/* Move the lock to the head of the list. */
|
|
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
|
|
lock_rec_insert_to_head(lock, rec_fold);
|
|
} else {
|
|
/* Already at the head of the list. */
|
|
previous = lock;
|
|
}
|
|
/* Move on to the next lock. */
|
|
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
|
|
} else {
|
|
previous = lock;
|
|
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
|
|
}
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes a record lock request, waiting or granted, from the queue and
|
|
grants locks to other transactions in the queue if they now are entitled
|
|
to a lock. NOTE: all record locks contained in in_lock are removed. */
|
|
static
|
|
void
|
|
lock_rec_dequeue_from_page(
|
|
/*=======================*/
|
|
lock_t* in_lock) /*!< in: record lock object: all
|
|
record locks which are contained in
|
|
this lock object are removed;
|
|
transactions waiting behind will
|
|
get their lock requests granted,
|
|
if they are now qualified to it */
|
|
{
|
|
ulint space;
|
|
ulint page_no;
|
|
lock_t* lock;
|
|
trx_lock_t* trx_lock;
|
|
hash_table_t* lock_hash;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
|
|
/* We may or may not be holding in_lock->trx->mutex here. */
|
|
|
|
trx_lock = &in_lock->trx->lock;
|
|
|
|
space = in_lock->un_member.rec_lock.space;
|
|
page_no = in_lock->un_member.rec_lock.page_no;
|
|
|
|
ut_ad(in_lock->index->table->n_rec_locks > 0);
|
|
in_lock->index->table->n_rec_locks--;
|
|
|
|
lock_hash = lock_hash_get(in_lock->type_mode);
|
|
|
|
HASH_DELETE(lock_t, hash, lock_hash,
|
|
lock_rec_fold(space, page_no), in_lock);
|
|
|
|
UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
|
|
|
|
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
|
|
MONITOR_DEC(MONITOR_NUM_RECLOCK);
|
|
|
|
if (innodb_lock_schedule_algorithm
|
|
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
|
|
thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
|
|
|
|
/* Check if waiting locks in the queue can now be granted:
|
|
grant locks if there are no conflicting locks ahead. Stop at
|
|
the first X lock that is waiting or has been granted. */
|
|
|
|
for (lock = lock_rec_get_first_on_page_addr(lock_hash, space,
|
|
page_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
|
|
if (lock_get_wait(lock)
|
|
&& !lock_rec_has_to_wait_in_queue(lock)) {
|
|
|
|
/* Grant the lock */
|
|
ut_ad(lock->trx != in_lock->trx);
|
|
|
|
bool exit_trx_mutex = false;
|
|
|
|
if (lock->trx->abort_type != TRX_SERVER_ABORT) {
|
|
ut_ad(trx_mutex_own(lock->trx));
|
|
trx_mutex_exit(lock->trx);
|
|
exit_trx_mutex = true;
|
|
}
|
|
|
|
lock_grant(lock, false);
|
|
|
|
if (exit_trx_mutex) {
|
|
ut_ad(!trx_mutex_own(lock->trx));
|
|
trx_mutex_enter(lock->trx);
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
lock_grant_and_move_on_page(lock_hash, space, page_no);
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes a record lock request, waiting or granted, from the queue. */
|
|
void
|
|
lock_rec_discard(
|
|
/*=============*/
|
|
lock_t* in_lock) /*!< in: record lock object: all
|
|
record locks which are contained
|
|
in this lock object are removed */
|
|
{
|
|
ulint space;
|
|
ulint page_no;
|
|
trx_lock_t* trx_lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
|
|
|
|
trx_lock = &in_lock->trx->lock;
|
|
|
|
space = in_lock->un_member.rec_lock.space;
|
|
page_no = in_lock->un_member.rec_lock.page_no;
|
|
|
|
ut_ad(in_lock->index->table->n_rec_locks > 0);
|
|
in_lock->index->table->n_rec_locks--;
|
|
|
|
HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
|
|
lock_rec_fold(space, page_no), in_lock);
|
|
|
|
UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
|
|
|
|
MONITOR_INC(MONITOR_RECLOCK_REMOVED);
|
|
MONITOR_DEC(MONITOR_NUM_RECLOCK);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes record lock objects set on an index page which is discarded. This
|
|
function does not move locks, or check for waiting locks, therefore the
|
|
lock bitmaps must already be reset when this function is called. */
|
|
static
|
|
void
|
|
lock_rec_free_all_from_discard_page_low(
|
|
/*====================================*/
|
|
ulint space,
|
|
ulint page_no,
|
|
hash_table_t* lock_hash)
|
|
{
|
|
lock_t* lock;
|
|
lock_t* next_lock;
|
|
|
|
lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
|
|
|
|
while (lock != NULL) {
|
|
ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
|
|
ut_ad(!lock_get_wait(lock));
|
|
|
|
next_lock = lock_rec_get_next_on_page(lock);
|
|
|
|
lock_rec_discard(lock);
|
|
|
|
lock = next_lock;
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes record lock objects set on an index page which is discarded. This
|
|
function does not move locks, or check for waiting locks, therefore the
|
|
lock bitmaps must already be reset when this function is called. */
|
|
void
|
|
lock_rec_free_all_from_discard_page(
|
|
/*================================*/
|
|
const buf_block_t* block) /*!< in: page to be discarded */
|
|
{
|
|
ulint space;
|
|
ulint page_no;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
space = block->page.id.space();
|
|
page_no = block->page.id.page_no();
|
|
|
|
lock_rec_free_all_from_discard_page_low(
|
|
space, page_no, lock_sys->rec_hash);
|
|
lock_rec_free_all_from_discard_page_low(
|
|
space, page_no, lock_sys->prdt_hash);
|
|
lock_rec_free_all_from_discard_page_low(
|
|
space, page_no, lock_sys->prdt_page_hash);
|
|
}
|
|
|
|
/*============= RECORD LOCK MOVING AND INHERITING ===================*/
|
|
|
|
/*************************************************************//**
|
|
Resets the lock bits for a single record. Releases transactions waiting for
|
|
lock requests here. */
|
|
static
|
|
void
|
|
lock_rec_reset_and_release_wait_low(
|
|
/*================================*/
|
|
hash_table_t* hash, /*!< in: hash table */
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no)/*!< in: heap number of record */
|
|
{
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
for (lock = lock_rec_get_first(hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
|
|
if (lock_get_wait(lock)) {
|
|
lock_rec_cancel(lock);
|
|
} else {
|
|
lock_rec_reset_nth_bit(lock, heap_no);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Resets the lock bits for a single record. Releases transactions waiting for
|
|
lock requests here. */
|
|
static
|
|
void
|
|
lock_rec_reset_and_release_wait(
|
|
/*============================*/
|
|
const buf_block_t* block, /*!< in: buffer block containing
|
|
the record */
|
|
ulint heap_no)/*!< in: heap number of record */
|
|
{
|
|
lock_rec_reset_and_release_wait_low(
|
|
lock_sys->rec_hash, block, heap_no);
|
|
|
|
lock_rec_reset_and_release_wait_low(
|
|
lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
|
|
lock_rec_reset_and_release_wait_low(
|
|
lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
|
|
of another record as gap type locks, but does not reset the lock bits of
|
|
the other record. Also waiting lock requests on rec are inherited as
|
|
GRANTED gap locks. */
|
|
static
|
|
void
|
|
lock_rec_inherit_to_gap(
|
|
/*====================*/
|
|
const buf_block_t* heir_block, /*!< in: block containing the
|
|
record which inherits */
|
|
const buf_block_t* block, /*!< in: block containing the
|
|
record from which inherited;
|
|
does NOT reset the locks on
|
|
this record */
|
|
ulint heir_heap_no, /*!< in: heap_no of the
|
|
inheriting record */
|
|
ulint heap_no) /*!< in: heap_no of the
|
|
donating record */
|
|
{
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
/* If srv_locks_unsafe_for_binlog is TRUE or session is using
|
|
READ COMMITTED isolation level, we do not want locks set
|
|
by an UPDATE or a DELETE to be inherited as gap type locks. But we
|
|
DO want S-locks/X-locks(taken for replace) set by a consistency
|
|
constraint to be inherited also then. */
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
|
|
if (!lock_rec_get_insert_intention(lock)
|
|
&& !((srv_locks_unsafe_for_binlog
|
|
|| lock->trx->isolation_level
|
|
<= TRX_ISO_READ_COMMITTED)
|
|
&& lock_get_mode(lock) ==
|
|
(lock->trx->duplicates ? LOCK_S : LOCK_X))) {
|
|
lock_rec_add_to_queue(
|
|
LOCK_REC | LOCK_GAP | lock_get_mode(lock),
|
|
heir_block, heir_heap_no, lock->index,
|
|
lock->trx, FALSE);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
|
|
of another record as gap type locks, but does not reset the lock bits of the
|
|
other record. Also waiting lock requests are inherited as GRANTED gap locks. */
|
|
static
|
|
void
|
|
lock_rec_inherit_to_gap_if_gap_lock(
|
|
/*================================*/
|
|
const buf_block_t* block, /*!< in: buffer block */
|
|
ulint heir_heap_no, /*!< in: heap_no of
|
|
record which inherits */
|
|
ulint heap_no) /*!< in: heap_no of record
|
|
from which inherited;
|
|
does NOT reset the locks
|
|
on this record */
|
|
{
|
|
lock_t* lock;
|
|
|
|
lock_mutex_enter();
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
|
|
if (!lock_rec_get_insert_intention(lock)
|
|
&& (heap_no == PAGE_HEAP_NO_SUPREMUM
|
|
|| !lock_rec_get_rec_not_gap(lock))) {
|
|
|
|
lock_rec_add_to_queue(
|
|
LOCK_REC | LOCK_GAP | lock_get_mode(lock),
|
|
block, heir_heap_no, lock->index,
|
|
lock->trx, FALSE);
|
|
}
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Moves the locks of a record to another record and resets the lock bits of
|
|
the donating record. */
|
|
static
|
|
void
|
|
lock_rec_move_low(
|
|
/*==============*/
|
|
hash_table_t* lock_hash, /*!< in: hash table to use */
|
|
const buf_block_t* receiver, /*!< in: buffer block containing
|
|
the receiving record */
|
|
const buf_block_t* donator, /*!< in: buffer block containing
|
|
the donating record */
|
|
ulint receiver_heap_no,/*!< in: heap_no of the record
|
|
which gets the locks; there
|
|
must be no lock requests
|
|
on it! */
|
|
ulint donator_heap_no)/*!< in: heap_no of the record
|
|
which gives the locks */
|
|
{
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
/* If the lock is predicate lock, it resides on INFIMUM record */
|
|
ut_ad(lock_rec_get_first(
|
|
lock_hash, receiver, receiver_heap_no) == NULL
|
|
|| lock_hash == lock_sys->prdt_hash
|
|
|| lock_hash == lock_sys->prdt_page_hash);
|
|
|
|
for (lock = lock_rec_get_first(lock_hash,
|
|
donator, donator_heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next(donator_heap_no, lock)) {
|
|
|
|
const ulint type_mode = lock->type_mode;
|
|
|
|
lock_rec_reset_nth_bit(lock, donator_heap_no);
|
|
|
|
if (type_mode & LOCK_WAIT) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
/* Note that we FIRST reset the bit, and then set the lock:
|
|
the function works also if donator == receiver */
|
|
|
|
lock_rec_add_to_queue(
|
|
type_mode, receiver, receiver_heap_no,
|
|
lock->index, lock->trx, FALSE);
|
|
}
|
|
|
|
ut_ad(lock_rec_get_first(lock_sys->rec_hash,
|
|
donator, donator_heap_no) == NULL);
|
|
}
|
|
|
|
/** Move all the granted locks to the front of the given lock list.
|
|
All the waiting locks will be at the end of the list.
|
|
@param[in,out] lock_list the given lock list. */
|
|
static
|
|
void
|
|
lock_move_granted_locks_to_front(
|
|
UT_LIST_BASE_NODE_T(lock_t)& lock_list)
|
|
{
|
|
lock_t* lock;
|
|
|
|
bool seen_waiting_lock = false;
|
|
|
|
for (lock = UT_LIST_GET_FIRST(lock_list); lock;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
|
|
if (!seen_waiting_lock) {
|
|
if (lock->is_waiting()) {
|
|
seen_waiting_lock = true;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
ut_ad(seen_waiting_lock);
|
|
|
|
if (!lock->is_waiting()) {
|
|
lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
|
|
ut_a(prev);
|
|
UT_LIST_MOVE_TO_FRONT(lock_list, lock);
|
|
lock = prev;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Moves the locks of a record to another record and resets the lock bits of
|
|
the donating record. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_rec_move(
|
|
/*==========*/
|
|
const buf_block_t* receiver, /*!< in: buffer block containing
|
|
the receiving record */
|
|
const buf_block_t* donator, /*!< in: buffer block containing
|
|
the donating record */
|
|
ulint receiver_heap_no,/*!< in: heap_no of the record
|
|
which gets the locks; there
|
|
must be no lock requests
|
|
on it! */
|
|
ulint donator_heap_no)/*!< in: heap_no of the record
|
|
which gives the locks */
|
|
{
|
|
lock_rec_move_low(lock_sys->rec_hash, receiver, donator,
|
|
receiver_heap_no, donator_heap_no);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when we have reorganized a page. NOTE: we copy
|
|
also the locks set on the infimum of the page; the infimum may carry
|
|
locks if an update of a record is occurring on the page, and its locks
|
|
were temporarily stored on the infimum. */
|
|
void
|
|
lock_move_reorganize_page(
|
|
/*======================*/
|
|
const buf_block_t* block, /*!< in: old index page, now
|
|
reorganized */
|
|
const buf_block_t* oblock) /*!< in: copy of the old, not
|
|
reorganized page */
|
|
{
|
|
lock_t* lock;
|
|
UT_LIST_BASE_NODE_T(lock_t) old_locks;
|
|
mem_heap_t* heap = NULL;
|
|
ulint comp;
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* FIXME: This needs to deal with predicate lock too */
|
|
lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
|
|
|
|
if (lock == NULL) {
|
|
lock_mutex_exit();
|
|
|
|
return;
|
|
}
|
|
|
|
heap = mem_heap_create(256);
|
|
|
|
/* Copy first all the locks on the page to heap and reset the
|
|
bitmaps in the original locks; chain the copies of the locks
|
|
using the trx_locks field in them. */
|
|
|
|
UT_LIST_INIT(old_locks, &lock_t::trx_locks);
|
|
|
|
do {
|
|
/* Make a copy of the lock */
|
|
lock_t* old_lock = lock_rec_copy(lock, heap);
|
|
|
|
UT_LIST_ADD_LAST(old_locks, old_lock);
|
|
|
|
/* Reset bitmap of lock */
|
|
lock_rec_bitmap_reset(lock);
|
|
|
|
if (lock_get_wait(lock)) {
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
lock = lock_rec_get_next_on_page(lock);
|
|
} while (lock != NULL);
|
|
|
|
comp = page_is_comp(block->frame);
|
|
ut_ad(comp == page_is_comp(oblock->frame));
|
|
|
|
lock_move_granted_locks_to_front(old_locks);
|
|
|
|
DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
|
|
UT_LIST_REVERSE(old_locks););
|
|
|
|
for (lock = UT_LIST_GET_FIRST(old_locks); lock;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
|
|
/* NOTE: we copy also the locks set on the infimum and
|
|
supremum of the page; the infimum may carry locks if an
|
|
update of a record is occurring on the page, and its locks
|
|
were temporarily stored on the infimum */
|
|
const rec_t* rec1 = page_get_infimum_rec(
|
|
buf_block_get_frame(block));
|
|
const rec_t* rec2 = page_get_infimum_rec(
|
|
buf_block_get_frame(oblock));
|
|
|
|
/* Set locks according to old locks */
|
|
for (;;) {
|
|
ulint old_heap_no;
|
|
ulint new_heap_no;
|
|
|
|
if (comp) {
|
|
old_heap_no = rec_get_heap_no_new(rec2);
|
|
new_heap_no = rec_get_heap_no_new(rec1);
|
|
|
|
rec1 = page_rec_get_next_low(rec1, TRUE);
|
|
rec2 = page_rec_get_next_low(rec2, TRUE);
|
|
} else {
|
|
old_heap_no = rec_get_heap_no_old(rec2);
|
|
new_heap_no = rec_get_heap_no_old(rec1);
|
|
ut_ad(!memcmp(rec1, rec2,
|
|
rec_get_data_size_old(rec2)));
|
|
|
|
rec1 = page_rec_get_next_low(rec1, FALSE);
|
|
rec2 = page_rec_get_next_low(rec2, FALSE);
|
|
}
|
|
|
|
/* Clear the bit in old_lock. */
|
|
if (old_heap_no < lock->un_member.rec_lock.n_bits
|
|
&& lock_rec_reset_nth_bit(lock, old_heap_no)) {
|
|
/* NOTE that the old lock bitmap could be too
|
|
small for the new heap number! */
|
|
|
|
lock_rec_add_to_queue(
|
|
lock->type_mode, block, new_heap_no,
|
|
lock->index, lock->trx, FALSE);
|
|
}
|
|
|
|
if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
|
|
ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
|
|
break;
|
|
}
|
|
}
|
|
|
|
ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
mem_heap_free(heap);
|
|
|
|
#ifdef UNIV_DEBUG_LOCK_VALIDATE
|
|
ut_ad(lock_rec_validate_page(block));
|
|
#endif
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Moves the explicit locks on user records to another page if a record
|
|
list end is moved to another page. */
|
|
void
|
|
lock_move_rec_list_end(
|
|
/*===================*/
|
|
const buf_block_t* new_block, /*!< in: index page to move to */
|
|
const buf_block_t* block, /*!< in: index page */
|
|
const rec_t* rec) /*!< in: record on page: this
|
|
is the first record moved */
|
|
{
|
|
lock_t* lock;
|
|
const ulint comp = page_rec_is_comp(rec);
|
|
|
|
ut_ad(buf_block_get_frame(block) == page_align(rec));
|
|
ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* Note: when we move locks from record to record, waiting locks
|
|
and possible granted gap type locks behind them are enqueued in
|
|
the original order, because new elements are inserted to a hash
|
|
table to the end of the hash chain, and lock_rec_add_to_queue
|
|
does not reuse locks if there are waiters in the queue. */
|
|
|
|
for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
const rec_t* rec1 = rec;
|
|
const rec_t* rec2;
|
|
const ulint type_mode = lock->type_mode;
|
|
|
|
if (comp) {
|
|
if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
|
|
rec1 = page_rec_get_next_low(rec1, TRUE);
|
|
}
|
|
|
|
rec2 = page_rec_get_next_low(
|
|
buf_block_get_frame(new_block)
|
|
+ PAGE_NEW_INFIMUM, TRUE);
|
|
} else {
|
|
if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
|
|
rec1 = page_rec_get_next_low(rec1, FALSE);
|
|
}
|
|
|
|
rec2 = page_rec_get_next_low(
|
|
buf_block_get_frame(new_block)
|
|
+ PAGE_OLD_INFIMUM, FALSE);
|
|
}
|
|
|
|
/* Copy lock requests on user records to new page and
|
|
reset the lock bits on the old */
|
|
|
|
for (;;) {
|
|
ulint rec1_heap_no;
|
|
ulint rec2_heap_no;
|
|
|
|
if (comp) {
|
|
rec1_heap_no = rec_get_heap_no_new(rec1);
|
|
|
|
if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
|
|
break;
|
|
}
|
|
|
|
rec2_heap_no = rec_get_heap_no_new(rec2);
|
|
rec1 = page_rec_get_next_low(rec1, TRUE);
|
|
rec2 = page_rec_get_next_low(rec2, TRUE);
|
|
} else {
|
|
rec1_heap_no = rec_get_heap_no_old(rec1);
|
|
|
|
if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
|
|
break;
|
|
}
|
|
|
|
rec2_heap_no = rec_get_heap_no_old(rec2);
|
|
|
|
ut_ad(!memcmp(rec1, rec2,
|
|
rec_get_data_size_old(rec2)));
|
|
|
|
rec1 = page_rec_get_next_low(rec1, FALSE);
|
|
rec2 = page_rec_get_next_low(rec2, FALSE);
|
|
}
|
|
|
|
if (rec1_heap_no < lock->un_member.rec_lock.n_bits
|
|
&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
|
|
if (type_mode & LOCK_WAIT) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
lock_rec_add_to_queue(
|
|
type_mode, new_block, rec2_heap_no,
|
|
lock->index, lock->trx, FALSE);
|
|
}
|
|
}
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
#ifdef UNIV_DEBUG_LOCK_VALIDATE
|
|
ut_ad(lock_rec_validate_page(block));
|
|
ut_ad(lock_rec_validate_page(new_block));
|
|
#endif
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Moves the explicit locks on user records to another page if a record
|
|
list start is moved to another page. */
|
|
void
|
|
lock_move_rec_list_start(
|
|
/*=====================*/
|
|
const buf_block_t* new_block, /*!< in: index page to
|
|
move to */
|
|
const buf_block_t* block, /*!< in: index page */
|
|
const rec_t* rec, /*!< in: record on page:
|
|
this is the first
|
|
record NOT copied */
|
|
const rec_t* old_end) /*!< in: old
|
|
previous-to-last
|
|
record on new_page
|
|
before the records
|
|
were copied */
|
|
{
|
|
lock_t* lock;
|
|
const ulint comp = page_rec_is_comp(rec);
|
|
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(new_block->frame == page_align(old_end));
|
|
ut_ad(comp == page_rec_is_comp(old_end));
|
|
|
|
lock_mutex_enter();
|
|
|
|
for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
const rec_t* rec1;
|
|
const rec_t* rec2;
|
|
const ulint type_mode = lock->type_mode;
|
|
|
|
if (comp) {
|
|
rec1 = page_rec_get_next_low(
|
|
buf_block_get_frame(block)
|
|
+ PAGE_NEW_INFIMUM, TRUE);
|
|
rec2 = page_rec_get_next_low(old_end, TRUE);
|
|
} else {
|
|
rec1 = page_rec_get_next_low(
|
|
buf_block_get_frame(block)
|
|
+ PAGE_OLD_INFIMUM, FALSE);
|
|
rec2 = page_rec_get_next_low(old_end, FALSE);
|
|
}
|
|
|
|
/* Copy lock requests on user records to new page and
|
|
reset the lock bits on the old */
|
|
|
|
while (rec1 != rec) {
|
|
ulint rec1_heap_no;
|
|
ulint rec2_heap_no;
|
|
|
|
if (comp) {
|
|
rec1_heap_no = rec_get_heap_no_new(rec1);
|
|
rec2_heap_no = rec_get_heap_no_new(rec2);
|
|
|
|
rec1 = page_rec_get_next_low(rec1, TRUE);
|
|
rec2 = page_rec_get_next_low(rec2, TRUE);
|
|
} else {
|
|
rec1_heap_no = rec_get_heap_no_old(rec1);
|
|
rec2_heap_no = rec_get_heap_no_old(rec2);
|
|
|
|
ut_ad(!memcmp(rec1, rec2,
|
|
rec_get_data_size_old(rec2)));
|
|
|
|
rec1 = page_rec_get_next_low(rec1, FALSE);
|
|
rec2 = page_rec_get_next_low(rec2, FALSE);
|
|
}
|
|
|
|
if (rec1_heap_no < lock->un_member.rec_lock.n_bits
|
|
&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
|
|
if (type_mode & LOCK_WAIT) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
lock_rec_add_to_queue(
|
|
type_mode, new_block, rec2_heap_no,
|
|
lock->index, lock->trx, FALSE);
|
|
}
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if (page_rec_is_supremum(rec)) {
|
|
ulint i;
|
|
|
|
for (i = PAGE_HEAP_NO_USER_LOW;
|
|
i < lock_rec_get_n_bits(lock); i++) {
|
|
if (lock_rec_get_nth_bit(lock, i)) {
|
|
ib::fatal()
|
|
<< "lock_move_rec_list_start():"
|
|
<< i << " not moved in "
|
|
<< (void*) lock;
|
|
}
|
|
}
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
#ifdef UNIV_DEBUG_LOCK_VALIDATE
|
|
ut_ad(lock_rec_validate_page(block));
|
|
#endif
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Moves the explicit locks on user records to another page if a record
|
|
list start is moved to another page. */
|
|
void
|
|
lock_rtr_move_rec_list(
|
|
/*===================*/
|
|
const buf_block_t* new_block, /*!< in: index page to
|
|
move to */
|
|
const buf_block_t* block, /*!< in: index page */
|
|
rtr_rec_move_t* rec_move, /*!< in: recording records
|
|
moved */
|
|
ulint num_move) /*!< in: num of rec to move */
|
|
{
|
|
lock_t* lock;
|
|
ulint comp;
|
|
|
|
if (!num_move) {
|
|
return;
|
|
}
|
|
|
|
comp = page_rec_is_comp(rec_move[0].old_rec);
|
|
|
|
ut_ad(block->frame == page_align(rec_move[0].old_rec));
|
|
ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
|
|
ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
|
|
|
|
lock_mutex_enter();
|
|
|
|
for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
|
|
lock = lock_rec_get_next_on_page(lock)) {
|
|
ulint moved = 0;
|
|
const rec_t* rec1;
|
|
const rec_t* rec2;
|
|
const ulint type_mode = lock->type_mode;
|
|
|
|
/* Copy lock requests on user records to new page and
|
|
reset the lock bits on the old */
|
|
|
|
while (moved < num_move) {
|
|
ulint rec1_heap_no;
|
|
ulint rec2_heap_no;
|
|
|
|
rec1 = rec_move[moved].old_rec;
|
|
rec2 = rec_move[moved].new_rec;
|
|
|
|
if (comp) {
|
|
rec1_heap_no = rec_get_heap_no_new(rec1);
|
|
rec2_heap_no = rec_get_heap_no_new(rec2);
|
|
|
|
} else {
|
|
rec1_heap_no = rec_get_heap_no_old(rec1);
|
|
rec2_heap_no = rec_get_heap_no_old(rec2);
|
|
|
|
ut_ad(!memcmp(rec1, rec2,
|
|
rec_get_data_size_old(rec2)));
|
|
}
|
|
|
|
if (rec1_heap_no < lock->un_member.rec_lock.n_bits
|
|
&& lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
|
|
if (type_mode & LOCK_WAIT) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
lock_rec_add_to_queue(
|
|
type_mode, new_block, rec2_heap_no,
|
|
lock->index, lock->trx, FALSE);
|
|
|
|
rec_move[moved].moved = true;
|
|
}
|
|
|
|
moved++;
|
|
}
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
#ifdef UNIV_DEBUG_LOCK_VALIDATE
|
|
ut_ad(lock_rec_validate_page(block));
|
|
#endif
|
|
}
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is split to the right. */
|
|
void
|
|
lock_update_split_right(
|
|
/*====================*/
|
|
const buf_block_t* right_block, /*!< in: right page */
|
|
const buf_block_t* left_block) /*!< in: left page */
|
|
{
|
|
ulint heap_no = lock_get_min_heap_no(right_block);
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* Move the locks on the supremum of the left page to the supremum
|
|
of the right page */
|
|
|
|
lock_rec_move(right_block, left_block,
|
|
PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
/* Inherit the locks to the supremum of left page from the successor
|
|
of the infimum on right page */
|
|
|
|
lock_rec_inherit_to_gap(left_block, right_block,
|
|
PAGE_HEAP_NO_SUPREMUM, heap_no);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is merged to the right. */
|
|
void
|
|
lock_update_merge_right(
|
|
/*====================*/
|
|
const buf_block_t* right_block, /*!< in: right page to
|
|
which merged */
|
|
const rec_t* orig_succ, /*!< in: original
|
|
successor of infimum
|
|
on the right page
|
|
before merge */
|
|
const buf_block_t* left_block) /*!< in: merged index
|
|
page which will be
|
|
discarded */
|
|
{
|
|
lock_mutex_enter();
|
|
|
|
/* Inherit the locks from the supremum of the left page to the
|
|
original successor of infimum on the right page, to which the left
|
|
page was merged */
|
|
|
|
lock_rec_inherit_to_gap(right_block, left_block,
|
|
page_rec_get_heap_no(orig_succ),
|
|
PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
/* Reset the locks on the supremum of the left page, releasing
|
|
waiting transactions */
|
|
|
|
lock_rec_reset_and_release_wait_low(
|
|
lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/* there should exist no page lock on the left page,
|
|
otherwise, it will be blocked from merge */
|
|
ulint space = left_block->page.id.space();
|
|
ulint page_no = left_block->page.id.page_no();
|
|
ut_ad(lock_rec_get_first_on_page_addr(
|
|
lock_sys->prdt_page_hash, space, page_no) == NULL);
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
lock_rec_free_all_from_discard_page(left_block);
|
|
|
|
lock_mutex_exit();
|
|
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when the root page is copied to another in
|
|
btr_root_raise_and_insert. Note that we leave lock structs on the
|
|
root page, even though they do not make sense on other than leaf
|
|
pages: the reason is that in a pessimistic update the infimum record
|
|
of the root page will act as a dummy carrier of the locks of the record
|
|
to be updated. */
|
|
void
|
|
lock_update_root_raise(
|
|
/*===================*/
|
|
const buf_block_t* block, /*!< in: index page to which copied */
|
|
const buf_block_t* root) /*!< in: root page */
|
|
{
|
|
lock_mutex_enter();
|
|
|
|
/* Move the locks on the supremum of the root to the supremum
|
|
of block */
|
|
|
|
lock_rec_move(block, root,
|
|
PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is copied to another and the original page
|
|
is removed from the chain of leaf pages, except if page is the root! */
|
|
void
|
|
lock_update_copy_and_discard(
|
|
/*=========================*/
|
|
const buf_block_t* new_block, /*!< in: index page to
|
|
which copied */
|
|
const buf_block_t* block) /*!< in: index page;
|
|
NOT the root! */
|
|
{
|
|
lock_mutex_enter();
|
|
|
|
/* Move the locks on the supremum of the old page to the supremum
|
|
of new_page */
|
|
|
|
lock_rec_move(new_block, block,
|
|
PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
|
|
lock_rec_free_all_from_discard_page(block);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is split to the left. */
|
|
void
|
|
lock_update_split_left(
|
|
/*===================*/
|
|
const buf_block_t* right_block, /*!< in: right page */
|
|
const buf_block_t* left_block) /*!< in: left page */
|
|
{
|
|
ulint heap_no = lock_get_min_heap_no(right_block);
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* Inherit the locks to the supremum of the left page from the
|
|
successor of the infimum on the right page */
|
|
|
|
lock_rec_inherit_to_gap(left_block, right_block,
|
|
PAGE_HEAP_NO_SUPREMUM, heap_no);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is merged to the left. */
|
|
void
|
|
lock_update_merge_left(
|
|
/*===================*/
|
|
const buf_block_t* left_block, /*!< in: left page to
|
|
which merged */
|
|
const rec_t* orig_pred, /*!< in: original predecessor
|
|
of supremum on the left page
|
|
before merge */
|
|
const buf_block_t* right_block) /*!< in: merged index page
|
|
which will be discarded */
|
|
{
|
|
const rec_t* left_next_rec;
|
|
|
|
ut_ad(left_block->frame == page_align(orig_pred));
|
|
|
|
lock_mutex_enter();
|
|
|
|
left_next_rec = page_rec_get_next_const(orig_pred);
|
|
|
|
if (!page_rec_is_supremum(left_next_rec)) {
|
|
|
|
/* Inherit the locks on the supremum of the left page to the
|
|
first record which was moved from the right page */
|
|
|
|
lock_rec_inherit_to_gap(left_block, left_block,
|
|
page_rec_get_heap_no(left_next_rec),
|
|
PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
/* Reset the locks on the supremum of the left page,
|
|
releasing waiting transactions */
|
|
|
|
lock_rec_reset_and_release_wait_low(
|
|
lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
|
|
}
|
|
|
|
/* Move the locks from the supremum of right page to the supremum
|
|
of the left page */
|
|
|
|
lock_rec_move(left_block, right_block,
|
|
PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/* there should exist no page lock on the right page,
|
|
otherwise, it will be blocked from merge */
|
|
ulint space = right_block->page.id.space();
|
|
ulint page_no = right_block->page.id.page_no();
|
|
lock_t* lock_test = lock_rec_get_first_on_page_addr(
|
|
lock_sys->prdt_page_hash, space, page_no);
|
|
ut_ad(!lock_test);
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
lock_rec_free_all_from_discard_page(right_block);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Resets the original locks on heir and replaces them with gap type locks
|
|
inherited from rec. */
|
|
void
|
|
lock_rec_reset_and_inherit_gap_locks(
|
|
/*=================================*/
|
|
const buf_block_t* heir_block, /*!< in: block containing the
|
|
record which inherits */
|
|
const buf_block_t* block, /*!< in: block containing the
|
|
record from which inherited;
|
|
does NOT reset the locks on
|
|
this record */
|
|
ulint heir_heap_no, /*!< in: heap_no of the
|
|
inheriting record */
|
|
ulint heap_no) /*!< in: heap_no of the
|
|
donating record */
|
|
{
|
|
lock_mutex_enter();
|
|
|
|
lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
|
|
|
|
lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is discarded. */
|
|
void
|
|
lock_update_discard(
|
|
/*================*/
|
|
const buf_block_t* heir_block, /*!< in: index page
|
|
which will inherit the locks */
|
|
ulint heir_heap_no, /*!< in: heap_no of the record
|
|
which will inherit the locks */
|
|
const buf_block_t* block) /*!< in: index page
|
|
which will be discarded */
|
|
{
|
|
const rec_t* rec;
|
|
ulint heap_no;
|
|
const page_t* page = block->frame;
|
|
|
|
lock_mutex_enter();
|
|
|
|
if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block)
|
|
&& (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
|
|
/* No locks exist on page, nothing to do */
|
|
|
|
lock_mutex_exit();
|
|
|
|
return;
|
|
}
|
|
|
|
/* Inherit all the locks on the page to the record and reset all
|
|
the locks on the page */
|
|
|
|
if (page_is_comp(page)) {
|
|
rec = page + PAGE_NEW_INFIMUM;
|
|
|
|
do {
|
|
heap_no = rec_get_heap_no_new(rec);
|
|
|
|
lock_rec_inherit_to_gap(heir_block, block,
|
|
heir_heap_no, heap_no);
|
|
|
|
lock_rec_reset_and_release_wait(block, heap_no);
|
|
|
|
rec = page + rec_get_next_offs(rec, TRUE);
|
|
} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
|
|
} else {
|
|
rec = page + PAGE_OLD_INFIMUM;
|
|
|
|
do {
|
|
heap_no = rec_get_heap_no_old(rec);
|
|
|
|
lock_rec_inherit_to_gap(heir_block, block,
|
|
heir_heap_no, heap_no);
|
|
|
|
lock_rec_reset_and_release_wait(block, heap_no);
|
|
|
|
rec = page + rec_get_next_offs(rec, FALSE);
|
|
} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
|
|
}
|
|
|
|
lock_rec_free_all_from_discard_page(block);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a new user record is inserted. */
|
|
void
|
|
lock_update_insert(
|
|
/*===============*/
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec) /*!< in: the inserted record */
|
|
{
|
|
ulint receiver_heap_no;
|
|
ulint donator_heap_no;
|
|
|
|
ut_ad(block->frame == page_align(rec));
|
|
|
|
/* Inherit the gap-locking locks for rec, in gap mode, from the next
|
|
record */
|
|
|
|
if (page_rec_is_comp(rec)) {
|
|
receiver_heap_no = rec_get_heap_no_new(rec);
|
|
donator_heap_no = rec_get_heap_no_new(
|
|
page_rec_get_next_low(rec, TRUE));
|
|
} else {
|
|
receiver_heap_no = rec_get_heap_no_old(rec);
|
|
donator_heap_no = rec_get_heap_no_old(
|
|
page_rec_get_next_low(rec, FALSE));
|
|
}
|
|
|
|
lock_rec_inherit_to_gap_if_gap_lock(
|
|
block, receiver_heap_no, donator_heap_no);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Updates the lock table when a record is removed. */
|
|
void
|
|
lock_update_delete(
|
|
/*===============*/
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec) /*!< in: the record to be removed */
|
|
{
|
|
const page_t* page = block->frame;
|
|
ulint heap_no;
|
|
ulint next_heap_no;
|
|
|
|
ut_ad(page == page_align(rec));
|
|
|
|
if (page_is_comp(page)) {
|
|
heap_no = rec_get_heap_no_new(rec);
|
|
next_heap_no = rec_get_heap_no_new(page
|
|
+ rec_get_next_offs(rec,
|
|
TRUE));
|
|
} else {
|
|
heap_no = rec_get_heap_no_old(rec);
|
|
next_heap_no = rec_get_heap_no_old(page
|
|
+ rec_get_next_offs(rec,
|
|
FALSE));
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* Let the next record inherit the locks from rec, in gap mode */
|
|
|
|
lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
|
|
|
|
/* Reset the lock bits on rec and release waiting transactions */
|
|
|
|
lock_rec_reset_and_release_wait(block, heap_no);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Stores on the page infimum record the explicit locks of another record.
|
|
This function is used to store the lock state of a record when it is
|
|
updated and the size of the record changes in the update. The record
|
|
is moved in such an update, perhaps to another page. The infimum record
|
|
acts as a dummy carrier record, taking care of lock releases while the
|
|
actual record is being moved. */
|
|
void
|
|
lock_rec_store_on_page_infimum(
|
|
/*===========================*/
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec) /*!< in: record whose lock state
|
|
is stored on the infimum
|
|
record of the same page; lock
|
|
bits are reset on the
|
|
record */
|
|
{
|
|
ulint heap_no = page_rec_get_heap_no(rec);
|
|
|
|
ut_ad(block->frame == page_align(rec));
|
|
|
|
lock_mutex_enter();
|
|
|
|
lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Restores the state of explicit lock requests on a single record, where the
|
|
state was stored on the infimum of the page. */
|
|
void
|
|
lock_rec_restore_from_page_infimum(
|
|
/*===============================*/
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec, /*!< in: record whose lock state
|
|
is restored */
|
|
const buf_block_t* donator)/*!< in: page (rec is not
|
|
necessarily on this page)
|
|
whose infimum stored the lock
|
|
state; lock bits are reset on
|
|
the infimum */
|
|
{
|
|
ulint heap_no = page_rec_get_heap_no(rec);
|
|
|
|
lock_mutex_enter();
|
|
|
|
lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*========================= TABLE LOCKS ==============================*/
|
|
|
|
/** Functor for accessing the embedded node within a table lock. */
|
|
struct TableLockGetNode {
|
|
ut_list_node<lock_t>& operator() (lock_t& elem)
|
|
{
|
|
return(elem.un_member.tab_lock.locks);
|
|
}
|
|
};
|
|
|
|
/*********************************************************************//**
|
|
Creates a table lock object and adds it as the last in the lock queue
|
|
of the table. Does NOT check for deadlocks or lock compatibility.
|
|
@return own: new lock object */
|
|
UNIV_INLINE
|
|
lock_t*
|
|
lock_table_create(
|
|
/*==============*/
|
|
lock_t* c_lock, /*!< in: conflicting lock or NULL */
|
|
dict_table_t* table, /*!< in/out: database table
|
|
in dictionary cache */
|
|
ulint type_mode,/*!< in: lock mode possibly ORed with
|
|
LOCK_WAIT */
|
|
trx_t* trx) /*!< in: trx */
|
|
{
|
|
lock_t* lock;
|
|
|
|
ut_ad(table && trx);
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(trx));
|
|
|
|
check_trx_state(trx);
|
|
|
|
if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
|
|
++table->n_waiting_or_granted_auto_inc_locks;
|
|
}
|
|
|
|
/* For AUTOINC locking we reuse the lock instance only if
|
|
there is no wait involved else we allocate the waiting lock
|
|
from the transaction lock heap. */
|
|
if (type_mode == LOCK_AUTO_INC) {
|
|
|
|
lock = table->autoinc_lock;
|
|
|
|
table->autoinc_trx = trx;
|
|
|
|
ib_vector_push(trx->autoinc_locks, &lock);
|
|
|
|
} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
|
|
lock = trx->lock.table_pool[trx->lock.table_cached++];
|
|
} else {
|
|
|
|
lock = static_cast<lock_t*>(
|
|
mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
|
|
|
|
}
|
|
|
|
lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
|
|
lock->trx = trx;
|
|
|
|
lock->un_member.tab_lock.table = table;
|
|
|
|
ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
|
|
|
|
UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
|
|
|
|
#ifdef WITH_WSREP
|
|
if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
|
|
ut_list_insert(table->locks, c_lock, lock, TableLockGetNode());
|
|
if (wsrep_debug) {
|
|
ib::info() << "table lock BF conflict for " <<
|
|
c_lock->trx->id;
|
|
ib::info() << " SQL: "
|
|
<< wsrep_thd_query(c_lock->trx->mysql_thd);
|
|
}
|
|
} else {
|
|
ut_list_append(table->locks, lock, TableLockGetNode());
|
|
}
|
|
if (c_lock) {
|
|
ut_ad(!trx_mutex_own(c_lock->trx));
|
|
trx_mutex_enter(c_lock->trx);
|
|
}
|
|
|
|
if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
|
|
|
|
if (wsrep_debug) {
|
|
wsrep_print_wait_locks(c_lock);
|
|
}
|
|
|
|
/* have to release trx mutex for the duration of
|
|
victim lock release. This will eventually call
|
|
lock_grant, which wants to grant trx mutex again
|
|
*/
|
|
/* caller has trx_mutex, have to release for lock cancel */
|
|
trx_mutex_exit(trx);
|
|
lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
|
|
trx_mutex_enter(trx);
|
|
|
|
/* trx might not wait for c_lock, but some other lock
|
|
does not matter if wait_lock was released above
|
|
*/
|
|
if (c_lock->trx->lock.wait_lock == c_lock) {
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
}
|
|
|
|
if (wsrep_debug) {
|
|
ib::info() << "WSREP: c_lock canceled " << c_lock->trx->id;
|
|
ib::info() << " SQL: "
|
|
<< wsrep_thd_query(c_lock->trx->mysql_thd);
|
|
}
|
|
}
|
|
|
|
if (c_lock) {
|
|
trx_mutex_exit(c_lock->trx);
|
|
}
|
|
#else
|
|
ut_list_append(table->locks, lock, TableLockGetNode());
|
|
#endif /* WITH_WSREP */
|
|
|
|
if (type_mode & LOCK_WAIT) {
|
|
|
|
lock_set_lock_and_trx_wait(lock, trx);
|
|
}
|
|
|
|
lock->trx->lock.table_locks.push_back(lock);
|
|
|
|
MONITOR_INC(MONITOR_TABLELOCK_CREATED);
|
|
MONITOR_INC(MONITOR_NUM_TABLELOCK);
|
|
|
|
return(lock);
|
|
}
|
|
UNIV_INLINE
|
|
lock_t*
|
|
lock_table_create(
|
|
/*==============*/
|
|
dict_table_t* table, /*!< in/out: database table
|
|
in dictionary cache */
|
|
ulint type_mode,/*!< in: lock mode possibly ORed with
|
|
LOCK_WAIT */
|
|
trx_t* trx) /*!< in: trx */
|
|
{
|
|
return (lock_table_create(NULL, table, type_mode, trx));
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Pops autoinc lock requests from the transaction's autoinc_locks. We
|
|
handle the case where there are gaps in the array and they need to
|
|
be popped off the stack. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_table_pop_autoinc_locks(
|
|
/*=========================*/
|
|
trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
|
|
|
|
/* Skip any gaps, gaps are NULL lock entries in the
|
|
trx->autoinc_locks vector. */
|
|
|
|
do {
|
|
ib_vector_pop(trx->autoinc_locks);
|
|
|
|
if (ib_vector_is_empty(trx->autoinc_locks)) {
|
|
return;
|
|
}
|
|
|
|
} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes an autoinc lock request from the transaction's autoinc_locks. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_table_remove_autoinc_lock(
|
|
/*===========================*/
|
|
lock_t* lock, /*!< in: table lock */
|
|
trx_t* trx) /*!< in/out: transaction that owns the lock */
|
|
{
|
|
lock_t* autoinc_lock;
|
|
lint i = ib_vector_size(trx->autoinc_locks) - 1;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
|
|
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
|
|
ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
|
|
|
|
/* With stored functions and procedures the user may drop
|
|
a table within the same "statement". This special case has
|
|
to be handled by deleting only those AUTOINC locks that were
|
|
held by the table being dropped. */
|
|
|
|
autoinc_lock = *static_cast<lock_t**>(
|
|
ib_vector_get(trx->autoinc_locks, i));
|
|
|
|
/* This is the default fast case. */
|
|
|
|
if (autoinc_lock == lock) {
|
|
lock_table_pop_autoinc_locks(trx);
|
|
} else {
|
|
/* The last element should never be NULL */
|
|
ut_a(autoinc_lock != NULL);
|
|
|
|
/* Handle freeing the locks from within the stack. */
|
|
|
|
while (--i >= 0) {
|
|
autoinc_lock = *static_cast<lock_t**>(
|
|
ib_vector_get(trx->autoinc_locks, i));
|
|
|
|
if (autoinc_lock == lock) {
|
|
void* null_var = NULL;
|
|
ib_vector_set(trx->autoinc_locks, i, &null_var);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Must find the autoinc lock. */
|
|
ut_error;
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes a table lock request from the queue and the trx list of locks;
|
|
this is a low-level function which does NOT check if waiting requests
|
|
can now be granted. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_table_remove_low(
|
|
/*==================*/
|
|
lock_t* lock) /*!< in/out: table lock */
|
|
{
|
|
trx_t* trx;
|
|
dict_table_t* table;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
trx = lock->trx;
|
|
table = lock->un_member.tab_lock.table;
|
|
|
|
/* Remove the table from the transaction's AUTOINC vector, if
|
|
the lock that is being released is an AUTOINC lock. */
|
|
if (lock_get_mode(lock) == LOCK_AUTO_INC) {
|
|
|
|
/* The table's AUTOINC lock can get transferred to
|
|
another transaction before we get here. */
|
|
if (table->autoinc_trx == trx) {
|
|
table->autoinc_trx = NULL;
|
|
}
|
|
|
|
/* The locks must be freed in the reverse order from
|
|
the one in which they were acquired. This is to avoid
|
|
traversing the AUTOINC lock vector unnecessarily.
|
|
|
|
We only store locks that were granted in the
|
|
trx->autoinc_locks vector (see lock_table_create()
|
|
and lock_grant()). Therefore it can be empty and we
|
|
need to check for that. */
|
|
|
|
if (!lock_get_wait(lock)
|
|
&& !ib_vector_is_empty(trx->autoinc_locks)) {
|
|
|
|
lock_table_remove_autoinc_lock(lock, trx);
|
|
}
|
|
|
|
ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
|
|
table->n_waiting_or_granted_auto_inc_locks--;
|
|
}
|
|
|
|
UT_LIST_REMOVE(trx->lock.trx_locks, lock);
|
|
ut_list_remove(table->locks, lock, TableLockGetNode());
|
|
|
|
MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
|
|
MONITOR_DEC(MONITOR_NUM_TABLELOCK);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Enqueues a waiting request for a table lock which cannot be granted
|
|
immediately. Checks for deadlocks.
|
|
@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
|
|
DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
|
|
transaction was chosen as a victim, and we got the lock immediately:
|
|
no need to wait then */
|
|
static
|
|
dberr_t
|
|
lock_table_enqueue_waiting(
|
|
/*=======================*/
|
|
lock_t* c_lock, /*!< in: conflicting lock or NULL */
|
|
ulint mode, /*!< in: lock mode this transaction is
|
|
requesting */
|
|
dict_table_t* table, /*!< in/out: table */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
trx_t* trx;
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!srv_read_only_mode);
|
|
|
|
trx = thr_get_trx(thr);
|
|
ut_ad(trx_mutex_own(trx));
|
|
|
|
/* Test if there already is some other reason to suspend thread:
|
|
we do not enqueue a lock request if the query thread should be
|
|
stopped anyway */
|
|
|
|
if (que_thr_stop(thr)) {
|
|
ut_error;
|
|
|
|
return(DB_QUE_THR_SUSPENDED);
|
|
}
|
|
|
|
switch (trx_get_dict_operation(trx)) {
|
|
case TRX_DICT_OP_NONE:
|
|
break;
|
|
case TRX_DICT_OP_TABLE:
|
|
case TRX_DICT_OP_INDEX:
|
|
ib::error() << "A table lock wait happens in a dictionary"
|
|
" operation. Table " << table->name
|
|
<< ". " << BUG_REPORT_MSG;
|
|
ut_ad(0);
|
|
}
|
|
|
|
#ifdef WITH_WSREP
|
|
if (trx->lock.was_chosen_as_deadlock_victim) {
|
|
return(DB_DEADLOCK);
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
/* Enqueue the lock request that will wait to be granted */
|
|
lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
|
|
|
|
const trx_t* victim_trx =
|
|
DeadlockChecker::check_and_resolve(lock, trx);
|
|
|
|
if (victim_trx != 0) {
|
|
ut_ad(victim_trx == trx);
|
|
|
|
/* The order here is important, we don't want to
|
|
lose the state of the lock before calling remove. */
|
|
lock_table_remove_low(lock);
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
|
|
return(DB_DEADLOCK);
|
|
|
|
} else if (trx->lock.wait_lock == NULL) {
|
|
/* Deadlock resolution chose another transaction as a victim,
|
|
and we accidentally got our lock granted! */
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
trx->lock.que_state = TRX_QUE_LOCK_WAIT;
|
|
|
|
trx->lock.wait_started = ut_time();
|
|
trx->lock.was_chosen_as_deadlock_victim = false;
|
|
|
|
ut_a(que_thr_stop(thr));
|
|
|
|
MONITOR_INC(MONITOR_TABLELOCK_WAIT);
|
|
|
|
return(DB_LOCK_WAIT);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if other transactions have an incompatible mode lock request in
|
|
the lock queue.
|
|
@return lock or NULL */
|
|
UNIV_INLINE
|
|
const lock_t*
|
|
lock_table_other_has_incompatible(
|
|
/*==============================*/
|
|
const trx_t* trx, /*!< in: transaction, or NULL if all
|
|
transactions should be included */
|
|
ulint wait, /*!< in: LOCK_WAIT if also
|
|
waiting locks are taken into
|
|
account, or 0 if not */
|
|
const dict_table_t* table, /*!< in: table */
|
|
lock_mode mode) /*!< in: lock mode */
|
|
{
|
|
const lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
for (lock = UT_LIST_GET_LAST(table->locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
|
|
|
|
if (lock->trx != trx
|
|
&& !lock_mode_compatible(lock_get_mode(lock), mode)
|
|
&& (wait || !lock_get_wait(lock))) {
|
|
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_on(lock->trx->mysql_thd)) {
|
|
if (wsrep_debug) {
|
|
ib::info() << "WSREP: table lock abort for table:"
|
|
<< table->name.m_name;
|
|
ib::info() << " SQL: "
|
|
<< wsrep_thd_query(lock->trx->mysql_thd);
|
|
}
|
|
trx_mutex_enter(lock->trx);
|
|
wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
|
|
trx_mutex_exit(lock->trx);
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Locks the specified database table in the mode given. If the lock cannot
|
|
be granted immediately, the query thread is put to wait.
|
|
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_table(
|
|
/*=======*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
|
|
does nothing */
|
|
dict_table_t* table, /*!< in/out: database table
|
|
in dictionary cache */
|
|
lock_mode mode, /*!< in: lock mode */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
trx_t* trx;
|
|
dberr_t err;
|
|
const lock_t* wait_for;
|
|
|
|
ut_ad(table && thr);
|
|
|
|
/* Given limited visibility of temp-table we can avoid
|
|
locking overhead */
|
|
if ((flags & BTR_NO_LOCKING_FLAG)
|
|
|| srv_read_only_mode
|
|
|| dict_table_is_temporary(table)) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_a(flags == 0);
|
|
|
|
trx = thr_get_trx(thr);
|
|
|
|
/* Look for equal or stronger locks the same trx already
|
|
has on the table. No need to acquire the lock mutex here
|
|
because only this transacton can add/access table locks
|
|
to/from trx_t::table_locks. */
|
|
|
|
if (lock_table_has(trx, table, mode)) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
/* Read only transactions can write to temp tables, we don't want
|
|
to promote them to RW transactions. Their updates cannot be visible
|
|
to other transactions. Therefore we can keep them out
|
|
of the read views. */
|
|
|
|
if ((mode == LOCK_IX || mode == LOCK_X)
|
|
&& !trx->read_only
|
|
&& trx->rsegs.m_redo.rseg == 0) {
|
|
|
|
trx_set_rw_mode(trx);
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
DBUG_EXECUTE_IF("fatal-semaphore-timeout",
|
|
{ os_thread_sleep(3600000000LL); });
|
|
|
|
/* We have to check if the new lock is compatible with any locks
|
|
other transactions have in the table lock queue. */
|
|
|
|
wait_for = lock_table_other_has_incompatible(
|
|
trx, LOCK_WAIT, table, mode);
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
/* Another trx has a request on the table in an incompatible
|
|
mode: this trx may have to wait */
|
|
|
|
if (wait_for != NULL) {
|
|
err = lock_table_enqueue_waiting((lock_t*)wait_for, mode | flags, table, thr);
|
|
} else {
|
|
lock_table_create(table, mode | flags, trx);
|
|
|
|
ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
|
|
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Creates a table IX lock object for a resurrected transaction. */
|
|
void
|
|
lock_table_ix_resurrect(
|
|
/*====================*/
|
|
dict_table_t* table, /*!< in/out: table */
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
{
|
|
ut_ad(trx->is_recovered);
|
|
|
|
if (lock_table_has(trx, table, LOCK_IX)) {
|
|
return;
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
/* We have to check if the new lock is compatible with any locks
|
|
other transactions have in the table lock queue. */
|
|
|
|
ut_ad(!lock_table_other_has_incompatible(
|
|
trx, LOCK_WAIT, table, LOCK_IX));
|
|
|
|
trx_mutex_enter(trx);
|
|
lock_table_create(table, LOCK_IX, trx);
|
|
lock_mutex_exit();
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if a waiting table lock request still has to wait in a queue.
|
|
@return TRUE if still has to wait */
|
|
static
|
|
bool
|
|
lock_table_has_to_wait_in_queue(
|
|
/*============================*/
|
|
const lock_t* wait_lock) /*!< in: waiting table lock */
|
|
{
|
|
const dict_table_t* table;
|
|
const lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(lock_get_wait(wait_lock));
|
|
|
|
table = wait_lock->un_member.tab_lock.table;
|
|
|
|
for (lock = UT_LIST_GET_FIRST(table->locks);
|
|
lock != wait_lock;
|
|
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
|
|
|
|
if (lock_has_to_wait(wait_lock, lock)) {
|
|
|
|
return(true);
|
|
}
|
|
}
|
|
|
|
return(false);
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes a table lock request, waiting or granted, from the queue and grants
|
|
locks to other transactions in the queue, if they now are entitled to a
|
|
lock. */
|
|
static
|
|
void
|
|
lock_table_dequeue(
|
|
/*===============*/
|
|
lock_t* in_lock)/*!< in/out: table lock object; transactions waiting
|
|
behind will get their lock requests granted, if
|
|
they are now qualified to it */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
|
|
|
|
lock_t* lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
|
|
|
|
lock_table_remove_low(in_lock);
|
|
|
|
/* Check if waiting locks in the queue can now be granted: grant
|
|
locks if there are no conflicting locks ahead. */
|
|
|
|
for (/* No op */;
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
|
|
|
|
if (lock_get_wait(lock)
|
|
&& !lock_table_has_to_wait_in_queue(lock)) {
|
|
|
|
/* Grant the lock */
|
|
ut_ad(in_lock->trx != lock->trx);
|
|
lock_grant(lock, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Sets a lock on a table based on the given mode.
|
|
@param[in] table table to lock
|
|
@param[in,out] trx transaction
|
|
@param[in] mode LOCK_X or LOCK_S
|
|
@return error code or DB_SUCCESS. */
|
|
dberr_t
|
|
lock_table_for_trx(
|
|
dict_table_t* table,
|
|
trx_t* trx,
|
|
enum lock_mode mode)
|
|
{
|
|
mem_heap_t* heap;
|
|
que_thr_t* thr;
|
|
dberr_t err;
|
|
sel_node_t* node;
|
|
heap = mem_heap_create(512);
|
|
|
|
node = sel_node_create(heap);
|
|
thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
|
|
thr->graph->state = QUE_FORK_ACTIVE;
|
|
|
|
/* We use the select query graph as the dummy graph needed
|
|
in the lock module call */
|
|
|
|
thr = static_cast<que_thr_t*>(
|
|
que_fork_get_first_thr(
|
|
static_cast<que_fork_t*>(que_node_get_parent(thr))));
|
|
|
|
que_thr_move_to_run_state_for_mysql(thr, trx);
|
|
|
|
run_again:
|
|
thr->run_node = thr;
|
|
thr->prev_node = thr->common.parent;
|
|
|
|
err = lock_table(0, table, mode, thr);
|
|
|
|
trx->error_state = err;
|
|
|
|
if (UNIV_LIKELY(err == DB_SUCCESS)) {
|
|
que_thr_stop_for_mysql_no_error(thr, trx);
|
|
} else {
|
|
que_thr_stop_for_mysql(thr);
|
|
|
|
if (err != DB_QUE_THR_SUSPENDED) {
|
|
bool was_lock_wait;
|
|
|
|
was_lock_wait = row_mysql_handle_errors(
|
|
&err, trx, thr, NULL);
|
|
|
|
if (was_lock_wait) {
|
|
goto run_again;
|
|
}
|
|
} else {
|
|
que_thr_t* run_thr;
|
|
que_node_t* parent;
|
|
|
|
parent = que_node_get_parent(thr);
|
|
|
|
run_thr = que_fork_start_command(
|
|
static_cast<que_fork_t*>(parent));
|
|
|
|
ut_a(run_thr == thr);
|
|
|
|
/* There was a lock wait but the thread was not
|
|
in a ready to run or running state. */
|
|
trx->error_state = DB_LOCK_WAIT;
|
|
|
|
goto run_again;
|
|
|
|
}
|
|
}
|
|
|
|
que_graph_free(thr->graph);
|
|
trx->op_info = "";
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*=========================== LOCK RELEASE ==============================*/
|
|
static
|
|
void
|
|
lock_grant_and_move_on_rec(
|
|
hash_table_t* lock_hash,
|
|
lock_t* first_lock,
|
|
ulint heap_no)
|
|
{
|
|
lock_t* lock;
|
|
lock_t* previous;
|
|
ulint space;
|
|
ulint page_no;
|
|
ulint rec_fold;
|
|
|
|
space = first_lock->un_member.rec_lock.space;
|
|
page_no = first_lock->un_member.rec_lock.page_no;
|
|
rec_fold = lock_rec_fold(space, page_no);
|
|
|
|
previous = (lock_t *) hash_get_nth_cell(lock_hash,
|
|
hash_calc_hash(rec_fold, lock_hash))->node;
|
|
if (previous == NULL) {
|
|
return;
|
|
}
|
|
if (previous == first_lock) {
|
|
lock = previous;
|
|
} else {
|
|
while (previous->hash &&
|
|
previous->hash != first_lock) {
|
|
previous = previous->hash;
|
|
}
|
|
lock = previous->hash;
|
|
}
|
|
/* Grant locks if there are no conflicting locks ahead.
|
|
Move granted locks to the head of the list. */
|
|
for (;lock != NULL;) {
|
|
|
|
/* If the lock is a wait lock on this page, and it does not need to wait. */
|
|
if (lock->un_member.rec_lock.space == space
|
|
&& lock->un_member.rec_lock.page_no == page_no
|
|
&& lock_rec_get_nth_bit(lock, heap_no)
|
|
&& lock_get_wait(lock)
|
|
&& !lock_rec_has_to_wait_in_queue(lock)) {
|
|
|
|
lock_grant(lock, false);
|
|
|
|
if (previous != NULL) {
|
|
/* Move the lock to the head of the list. */
|
|
HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
|
|
lock_rec_insert_to_head(lock, rec_fold);
|
|
} else {
|
|
/* Already at the head of the list. */
|
|
previous = lock;
|
|
}
|
|
/* Move on to the next lock. */
|
|
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
|
|
} else {
|
|
previous = lock;
|
|
lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
|
|
}
|
|
}
|
|
}
|
|
|
|
/*************************************************************//**
|
|
Removes a granted record lock of a transaction from the queue and grants
|
|
locks to other transactions waiting in the queue if they now are entitled
|
|
to a lock. */
|
|
void
|
|
lock_rec_unlock(
|
|
/*============*/
|
|
trx_t* trx, /*!< in/out: transaction that has
|
|
set a record lock */
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec, /*!< in: record */
|
|
lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
|
|
{
|
|
lock_t* first_lock;
|
|
lock_t* lock;
|
|
ulint heap_no;
|
|
|
|
ut_ad(trx);
|
|
ut_ad(rec);
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(!trx->lock.wait_lock);
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
|
|
|
|
heap_no = page_rec_get_heap_no(rec);
|
|
|
|
lock_mutex_enter();
|
|
trx_mutex_enter(trx);
|
|
|
|
first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
|
|
/* Find the last lock with the same lock_mode and transaction
|
|
on the record. */
|
|
|
|
for (lock = first_lock; lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
|
|
goto released;
|
|
}
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
trx_mutex_exit(trx);
|
|
|
|
{
|
|
ib::error err;
|
|
err << "Unlock row could not find a " << lock_mode
|
|
<< " mode lock on the record. Current statement: ";
|
|
size_t stmt_len;
|
|
if (const char* stmt = innobase_get_stmt_unsafe(
|
|
trx->mysql_thd, &stmt_len)) {
|
|
err.write(stmt, stmt_len);
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
released:
|
|
ut_a(!lock_get_wait(lock));
|
|
lock_rec_reset_nth_bit(lock, heap_no);
|
|
|
|
if (innodb_lock_schedule_algorithm
|
|
== INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
|
|
thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
|
|
|
|
/* Check if we can now grant waiting lock requests */
|
|
|
|
for (lock = first_lock; lock != NULL;
|
|
lock = lock_rec_get_next(heap_no, lock)) {
|
|
if (lock_get_wait(lock)
|
|
&& !lock_rec_has_to_wait_in_queue(lock)) {
|
|
|
|
/* Grant the lock */
|
|
ut_ad(trx != lock->trx);
|
|
lock_grant(lock, false);
|
|
}
|
|
}
|
|
} else {
|
|
lock_grant_and_move_on_rec(lock_sys->rec_hash, first_lock, heap_no);
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*********************************************************************//**
|
|
Check if a transaction that has X or IX locks has set the dict_op
|
|
code correctly. */
|
|
static
|
|
void
|
|
lock_check_dict_lock(
|
|
/*==================*/
|
|
const lock_t* lock) /*!< in: lock to check */
|
|
{
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
|
|
/* Check if the transcation locked a record
|
|
in a system table in X mode. It should have set
|
|
the dict_op code correctly if it did. */
|
|
if (lock->index->table->id < DICT_HDR_FIRST_ID
|
|
&& lock_get_mode(lock) == LOCK_X) {
|
|
|
|
ut_ad(lock_get_mode(lock) != LOCK_IX);
|
|
ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
|
|
}
|
|
} else {
|
|
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
|
|
|
|
const dict_table_t* table;
|
|
|
|
table = lock->un_member.tab_lock.table;
|
|
|
|
/* Check if the transcation locked a system table
|
|
in IX mode. It should have set the dict_op code
|
|
correctly if it did. */
|
|
if (table->id < DICT_HDR_FIRST_ID
|
|
&& (lock_get_mode(lock) == LOCK_X
|
|
|| lock_get_mode(lock) == LOCK_IX)) {
|
|
|
|
ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
|
|
}
|
|
}
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/*********************************************************************//**
|
|
Releases transaction locks, and releases possible other transactions waiting
|
|
because of these locks. */
|
|
static
|
|
void
|
|
lock_release(
|
|
/*=========*/
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
{
|
|
lock_t* lock;
|
|
ulint count = 0;
|
|
trx_id_t max_trx_id = trx_sys_get_max_trx_id();
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!trx_mutex_own(trx));
|
|
|
|
for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
|
|
|
|
ut_d(lock_check_dict_lock(lock));
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
|
|
lock_rec_dequeue_from_page(lock);
|
|
} else {
|
|
dict_table_t* table;
|
|
|
|
table = lock->un_member.tab_lock.table;
|
|
|
|
if (lock_get_mode(lock) != LOCK_IS
|
|
&& trx->undo_no != 0) {
|
|
|
|
/* The trx may have modified the table. We
|
|
block the use of the MySQL query cache for
|
|
all currently active transactions. */
|
|
|
|
table->query_cache_inv_id = max_trx_id;
|
|
}
|
|
|
|
lock_table_dequeue(lock);
|
|
}
|
|
|
|
if (count == LOCK_RELEASE_INTERVAL) {
|
|
/* Release the mutex for a while, so that we
|
|
do not monopolize it */
|
|
|
|
lock_mutex_exit();
|
|
|
|
lock_mutex_enter();
|
|
|
|
count = 0;
|
|
}
|
|
|
|
++count;
|
|
}
|
|
}
|
|
|
|
/* True if a lock mode is S or X */
|
|
#define IS_LOCK_S_OR_X(lock) \
|
|
(lock_get_mode(lock) == LOCK_S \
|
|
|| lock_get_mode(lock) == LOCK_X)
|
|
|
|
/*********************************************************************//**
|
|
Removes table locks of the transaction on a table to be dropped. */
|
|
static
|
|
void
|
|
lock_trx_table_locks_remove(
|
|
/*========================*/
|
|
const lock_t* lock_to_remove) /*!< in: lock to remove */
|
|
{
|
|
trx_t* trx = lock_to_remove->trx;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
/* It is safe to read this because we are holding the lock mutex */
|
|
if (!trx->lock.cancel) {
|
|
trx_mutex_enter(trx);
|
|
} else {
|
|
ut_ad(trx_mutex_own(trx));
|
|
}
|
|
|
|
typedef lock_pool_t::reverse_iterator iterator;
|
|
|
|
iterator end = trx->lock.table_locks.rend();
|
|
|
|
for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
|
|
|
|
const lock_t* lock = *it;
|
|
|
|
if (lock == NULL) {
|
|
continue;
|
|
}
|
|
|
|
ut_a(trx == lock->trx);
|
|
ut_a(lock_get_type_low(lock) & LOCK_TABLE);
|
|
ut_a(lock->un_member.tab_lock.table != NULL);
|
|
|
|
if (lock == lock_to_remove) {
|
|
|
|
*it = NULL;
|
|
|
|
if (!trx->lock.cancel) {
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (!trx->lock.cancel) {
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
/* Lock must exist in the vector. */
|
|
ut_error;
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Removes locks of a transaction on a table to be dropped.
|
|
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
|
|
also removed in addition to other table-level and record-level locks.
|
|
No lock that is going to be removed is allowed to be a wait lock. */
|
|
static
|
|
void
|
|
lock_remove_all_on_table_for_trx(
|
|
/*=============================*/
|
|
dict_table_t* table, /*!< in: table to be dropped */
|
|
trx_t* trx, /*!< in: a transaction */
|
|
ibool remove_also_table_sx_locks)/*!< in: also removes
|
|
table S and X locks */
|
|
{
|
|
lock_t* lock;
|
|
lock_t* prev_lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = prev_lock) {
|
|
|
|
prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC
|
|
&& lock->index->table == table) {
|
|
ut_a(!lock_get_wait(lock));
|
|
|
|
lock_rec_discard(lock);
|
|
} else if (lock_get_type_low(lock) & LOCK_TABLE
|
|
&& lock->un_member.tab_lock.table == table
|
|
&& (remove_also_table_sx_locks
|
|
|| !IS_LOCK_S_OR_X(lock))) {
|
|
|
|
ut_a(!lock_get_wait(lock));
|
|
|
|
lock_trx_table_locks_remove(lock);
|
|
lock_table_remove_low(lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Remove any explicit record locks held by recovering transactions on
|
|
the table.
|
|
@return number of recovered transactions examined */
|
|
static
|
|
ulint
|
|
lock_remove_recovered_trx_record_locks(
|
|
/*===================================*/
|
|
dict_table_t* table) /*!< in: check if there are any locks
|
|
held on records in this table or on the
|
|
table itself */
|
|
{
|
|
ut_a(table != NULL);
|
|
ut_ad(lock_mutex_own());
|
|
|
|
ulint n_recovered_trx = 0;
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
|
|
trx != NULL;
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
|
|
|
|
assert_trx_in_rw_list(trx);
|
|
|
|
if (!trx->is_recovered) {
|
|
continue;
|
|
}
|
|
|
|
/* Because we are holding the lock_sys->mutex,
|
|
implicit locks cannot be converted to explicit ones
|
|
while we are scanning the explicit locks. */
|
|
|
|
lock_t* next_lock;
|
|
|
|
for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = next_lock) {
|
|
|
|
ut_a(lock->trx == trx);
|
|
|
|
/* Recovered transactions can't wait on a lock. */
|
|
|
|
ut_a(!lock_get_wait(lock));
|
|
|
|
next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
|
|
|
|
switch (lock_get_type_low(lock)) {
|
|
default:
|
|
ut_error;
|
|
case LOCK_TABLE:
|
|
if (lock->un_member.tab_lock.table == table) {
|
|
lock_trx_table_locks_remove(lock);
|
|
lock_table_remove_low(lock);
|
|
}
|
|
break;
|
|
case LOCK_REC:
|
|
if (lock->index->table == table) {
|
|
lock_rec_discard(lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
++n_recovered_trx;
|
|
}
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
return(n_recovered_trx);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Removes locks on a table to be dropped or truncated.
|
|
If remove_also_table_sx_locks is TRUE then table-level S and X locks are
|
|
also removed in addition to other table-level and record-level locks.
|
|
No lock, that is going to be removed, is allowed to be a wait lock. */
|
|
void
|
|
lock_remove_all_on_table(
|
|
/*=====================*/
|
|
dict_table_t* table, /*!< in: table to be dropped
|
|
or truncated */
|
|
ibool remove_also_table_sx_locks)/*!< in: also removes
|
|
table S and X locks */
|
|
{
|
|
lock_t* lock;
|
|
|
|
lock_mutex_enter();
|
|
|
|
for (lock = UT_LIST_GET_FIRST(table->locks);
|
|
lock != NULL;
|
|
/* No op */) {
|
|
|
|
lock_t* prev_lock;
|
|
|
|
prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
|
|
|
|
/* If we should remove all locks (remove_also_table_sx_locks
|
|
is TRUE), or if the lock is not table-level S or X lock,
|
|
then check we are not going to remove a wait lock. */
|
|
if (remove_also_table_sx_locks
|
|
|| !(lock_get_type(lock) == LOCK_TABLE
|
|
&& IS_LOCK_S_OR_X(lock))) {
|
|
|
|
ut_a(!lock_get_wait(lock));
|
|
}
|
|
|
|
lock_remove_all_on_table_for_trx(
|
|
table, lock->trx, remove_also_table_sx_locks);
|
|
|
|
if (prev_lock == NULL) {
|
|
if (lock == UT_LIST_GET_FIRST(table->locks)) {
|
|
/* lock was not removed, pick its successor */
|
|
lock = UT_LIST_GET_NEXT(
|
|
un_member.tab_lock.locks, lock);
|
|
} else {
|
|
/* lock was removed, pick the first one */
|
|
lock = UT_LIST_GET_FIRST(table->locks);
|
|
}
|
|
} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
|
|
prev_lock) != lock) {
|
|
/* If lock was removed by
|
|
lock_remove_all_on_table_for_trx() then pick the
|
|
successor of prev_lock ... */
|
|
lock = UT_LIST_GET_NEXT(
|
|
un_member.tab_lock.locks, prev_lock);
|
|
} else {
|
|
/* ... otherwise pick the successor of lock. */
|
|
lock = UT_LIST_GET_NEXT(
|
|
un_member.tab_lock.locks, lock);
|
|
}
|
|
}
|
|
|
|
/* Note: Recovered transactions don't have table level IX or IS locks
|
|
but can have implicit record locks that have been converted to explicit
|
|
record locks. Such record locks cannot be freed by traversing the
|
|
transaction lock list in dict_table_t (as above). */
|
|
|
|
if (!lock_sys->rollback_complete
|
|
&& lock_remove_recovered_trx_record_locks(table) == 0) {
|
|
|
|
lock_sys->rollback_complete = TRUE;
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
/*===================== VALIDATION AND DEBUGGING ====================*/
|
|
|
|
/** Print info of a table lock.
|
|
@param[in,out] file output stream
|
|
@param[in] lock table lock */
|
|
static
|
|
void
|
|
lock_table_print(FILE* file, const lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_a(lock_get_type_low(lock) == LOCK_TABLE);
|
|
|
|
fputs("TABLE LOCK table ", file);
|
|
ut_print_name(file, lock->trx,
|
|
lock->un_member.tab_lock.table->name.m_name);
|
|
fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
|
|
|
|
if (lock_get_mode(lock) == LOCK_S) {
|
|
fputs(" lock mode S", file);
|
|
} else if (lock_get_mode(lock) == LOCK_X) {
|
|
ut_ad(lock->trx->id != 0);
|
|
fputs(" lock mode X", file);
|
|
} else if (lock_get_mode(lock) == LOCK_IS) {
|
|
fputs(" lock mode IS", file);
|
|
} else if (lock_get_mode(lock) == LOCK_IX) {
|
|
ut_ad(lock->trx->id != 0);
|
|
fputs(" lock mode IX", file);
|
|
} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
|
|
fputs(" lock mode AUTO-INC", file);
|
|
} else {
|
|
fprintf(file, " unknown lock mode %lu",
|
|
(ulong) lock_get_mode(lock));
|
|
}
|
|
|
|
if (lock_get_wait(lock)) {
|
|
fputs(" waiting", file);
|
|
}
|
|
|
|
putc('\n', file);
|
|
}
|
|
|
|
/** Print info of a record lock.
|
|
@param[in,out] file output stream
|
|
@param[in] lock record lock */
|
|
static
|
|
void
|
|
lock_rec_print(FILE* file, const lock_t* lock)
|
|
{
|
|
ulint space;
|
|
ulint page_no;
|
|
mtr_t mtr;
|
|
mem_heap_t* heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint* offsets = offsets_;
|
|
rec_offs_init(offsets_);
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_a(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
space = lock->un_member.rec_lock.space;
|
|
page_no = lock->un_member.rec_lock.page_no;
|
|
|
|
fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
|
|
"index %s of table ",
|
|
(ulong) space, (ulong) page_no,
|
|
(ulong) lock_rec_get_n_bits(lock),
|
|
lock->index->name());
|
|
ut_print_name(file, lock->trx, lock->index->table_name);
|
|
fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
|
|
|
|
if (lock_get_mode(lock) == LOCK_S) {
|
|
fputs(" lock mode S", file);
|
|
} else if (lock_get_mode(lock) == LOCK_X) {
|
|
fputs(" lock_mode X", file);
|
|
} else {
|
|
ut_error;
|
|
}
|
|
|
|
if (lock_rec_get_gap(lock)) {
|
|
fputs(" locks gap before rec", file);
|
|
}
|
|
|
|
if (lock_rec_get_rec_not_gap(lock)) {
|
|
fputs(" locks rec but not gap", file);
|
|
}
|
|
|
|
if (lock_rec_get_insert_intention(lock)) {
|
|
fputs(" insert intention", file);
|
|
}
|
|
|
|
if (lock_get_wait(lock)) {
|
|
fputs(" waiting", file);
|
|
}
|
|
|
|
mtr_start(&mtr);
|
|
|
|
putc('\n', file);
|
|
|
|
const buf_block_t* block;
|
|
|
|
block = buf_page_try_get(page_id_t(space, page_no), &mtr);
|
|
|
|
for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
|
|
|
|
if (!lock_rec_get_nth_bit(lock, i)) {
|
|
continue;
|
|
}
|
|
|
|
fprintf(file, "Record lock, heap no %lu", (ulong) i);
|
|
|
|
if (block) {
|
|
const rec_t* rec;
|
|
|
|
rec = page_find_rec_with_heap_no(
|
|
buf_block_get_frame(block), i);
|
|
|
|
offsets = rec_get_offsets(
|
|
rec, lock->index, offsets,
|
|
ULINT_UNDEFINED, &heap);
|
|
|
|
putc(' ', file);
|
|
rec_print_new(file, rec, offsets);
|
|
}
|
|
|
|
putc('\n', file);
|
|
}
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
if (heap) {
|
|
mem_heap_free(heap);
|
|
}
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/* Print the number of lock structs from lock_print_info_summary() only
|
|
in non-production builds for performance reasons, see
|
|
http://bugs.mysql.com/36942 */
|
|
#define PRINT_NUM_OF_LOCK_STRUCTS
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
#ifdef PRINT_NUM_OF_LOCK_STRUCTS
|
|
/*********************************************************************//**
|
|
Calculates the number of record lock structs in the record lock hash table.
|
|
@return number of record locks */
|
|
static
|
|
ulint
|
|
lock_get_n_rec_locks(void)
|
|
/*======================*/
|
|
{
|
|
ulint n_locks = 0;
|
|
ulint i;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
|
|
for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
|
|
const lock_t* lock;
|
|
|
|
for (lock = static_cast<const lock_t*>(
|
|
HASH_GET_FIRST(lock_sys->rec_hash, i));
|
|
lock != 0;
|
|
lock = static_cast<const lock_t*>(
|
|
HASH_GET_NEXT(hash, lock))) {
|
|
|
|
n_locks++;
|
|
}
|
|
}
|
|
|
|
return(n_locks);
|
|
}
|
|
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
|
|
|
|
/*********************************************************************//**
|
|
Prints info of locks for all transactions.
|
|
@return FALSE if not able to obtain lock mutex
|
|
and exits without printing info */
|
|
ibool
|
|
lock_print_info_summary(
|
|
/*====================*/
|
|
FILE* file, /*!< in: file where to print */
|
|
ibool nowait) /*!< in: whether to wait for the lock mutex */
|
|
{
|
|
/* if nowait is FALSE, wait on the lock mutex,
|
|
otherwise return immediately if fail to obtain the
|
|
mutex. */
|
|
if (!nowait) {
|
|
lock_mutex_enter();
|
|
} else if (lock_mutex_enter_nowait()) {
|
|
fputs("FAIL TO OBTAIN LOCK MUTEX,"
|
|
" SKIP LOCK INFO PRINTING\n", file);
|
|
return(FALSE);
|
|
}
|
|
|
|
if (lock_deadlock_found) {
|
|
fputs("------------------------\n"
|
|
"LATEST DETECTED DEADLOCK\n"
|
|
"------------------------\n", file);
|
|
|
|
if (!srv_read_only_mode) {
|
|
ut_copy_file(file, lock_latest_err_file);
|
|
}
|
|
}
|
|
|
|
fputs("------------\n"
|
|
"TRANSACTIONS\n"
|
|
"------------\n", file);
|
|
|
|
fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
|
|
trx_sys_get_max_trx_id());
|
|
|
|
fprintf(file,
|
|
"Purge done for trx's n:o < " TRX_ID_FMT
|
|
" undo n:o < " TRX_ID_FMT " state: ",
|
|
purge_sys->iter.trx_no,
|
|
purge_sys->iter.undo_no);
|
|
|
|
/* Note: We are reading the state without the latch. One because it
|
|
will violate the latching order and two because we are merely querying
|
|
the state of the variable for display. */
|
|
|
|
switch (purge_sys->state){
|
|
case PURGE_STATE_INIT:
|
|
/* Should never be in this state while the system is running. */
|
|
ut_error;
|
|
|
|
case PURGE_STATE_EXIT:
|
|
fprintf(file, "exited");
|
|
break;
|
|
|
|
case PURGE_STATE_DISABLED:
|
|
fprintf(file, "disabled");
|
|
break;
|
|
|
|
case PURGE_STATE_RUN:
|
|
fprintf(file, "running");
|
|
/* Check if it is waiting for more data to arrive. */
|
|
if (!purge_sys->running) {
|
|
fprintf(file, " but idle");
|
|
}
|
|
break;
|
|
|
|
case PURGE_STATE_STOP:
|
|
fprintf(file, "stopped");
|
|
break;
|
|
}
|
|
|
|
fprintf(file, "\n");
|
|
|
|
fprintf(file,
|
|
"History list length %lu\n",
|
|
(ulong) trx_sys->rseg_history_len);
|
|
|
|
#ifdef PRINT_NUM_OF_LOCK_STRUCTS
|
|
fprintf(file,
|
|
"Total number of lock structs in row lock hash table %lu\n",
|
|
(ulong) lock_get_n_rec_locks());
|
|
#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
|
|
return(TRUE);
|
|
}
|
|
|
|
/** Functor to print not-started transaction from the mysql_trx_list. */
|
|
|
|
struct PrintNotStarted {
|
|
|
|
PrintNotStarted(FILE* file) : m_file(file) { }
|
|
|
|
void operator()(const trx_t* trx)
|
|
{
|
|
ut_ad(trx->in_mysql_trx_list);
|
|
ut_ad(mutex_own(&trx_sys->mutex));
|
|
|
|
/* See state transitions and locking rules in trx0trx.h */
|
|
|
|
if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
|
|
|
|
fputs("---", m_file);
|
|
trx_print_latched(m_file, trx, 600);
|
|
}
|
|
}
|
|
|
|
FILE* m_file;
|
|
};
|
|
|
|
/** Iterate over a transaction's locks. Keeping track of the
|
|
iterator using an ordinal value. */
|
|
|
|
class TrxLockIterator {
|
|
public:
|
|
TrxLockIterator() { rewind(); }
|
|
|
|
/** Get the m_index(th) lock of a transaction.
|
|
@return current lock or 0 */
|
|
const lock_t* current(const trx_t* trx) const
|
|
{
|
|
lock_t* lock;
|
|
ulint i = 0;
|
|
|
|
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
|
|
lock != NULL && i < m_index;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
|
|
|
|
/* No op */
|
|
}
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/** Set the ordinal value to 0 */
|
|
void rewind()
|
|
{
|
|
m_index = 0;
|
|
}
|
|
|
|
/** Increment the ordinal value.
|
|
@retun the current index value */
|
|
ulint next()
|
|
{
|
|
return(++m_index);
|
|
}
|
|
|
|
private:
|
|
/** Current iterator position */
|
|
ulint m_index;
|
|
};
|
|
|
|
/** This iterates over both the RW and RO trx_sys lists. We need to keep
|
|
track where the iterator was up to and we do that using an ordinal value. */
|
|
|
|
class TrxListIterator {
|
|
public:
|
|
TrxListIterator() : m_index()
|
|
{
|
|
/* We iterate over the RW trx list first. */
|
|
|
|
m_trx_list = &trx_sys->rw_trx_list;
|
|
}
|
|
|
|
/** Get the current transaction whose ordinality is m_index.
|
|
@return current transaction or 0 */
|
|
|
|
const trx_t* current()
|
|
{
|
|
return(reposition());
|
|
}
|
|
|
|
/** Advance the transaction current ordinal value and reset the
|
|
transaction lock ordinal value */
|
|
|
|
void next()
|
|
{
|
|
++m_index;
|
|
m_lock_iter.rewind();
|
|
}
|
|
|
|
TrxLockIterator& lock_iter()
|
|
{
|
|
return(m_lock_iter);
|
|
}
|
|
|
|
private:
|
|
/** Reposition the "cursor" on the current transaction. If it
|
|
is the first time then the "cursor" will be positioned on the
|
|
first transaction.
|
|
|
|
@return transaction instance or 0 */
|
|
const trx_t* reposition() const
|
|
{
|
|
ulint i;
|
|
trx_t* trx;
|
|
|
|
/* Make the transaction at the ordinal value of m_index
|
|
the current transaction. ie. reposition/restore */
|
|
|
|
for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
|
|
trx != NULL && (i < m_index);
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
|
|
|
|
check_trx_state(trx);
|
|
}
|
|
|
|
return(trx);
|
|
}
|
|
|
|
/** Ordinal value of the transaction in the current transaction list */
|
|
ulint m_index;
|
|
|
|
/** Current transaction list */
|
|
trx_ut_list_t* m_trx_list;
|
|
|
|
/** For iterating over a transaction's locks */
|
|
TrxLockIterator m_lock_iter;
|
|
};
|
|
|
|
/** Prints transaction lock wait and MVCC state.
|
|
@param[in,out] file file where to print
|
|
@param[in] trx transaction */
|
|
void
|
|
lock_trx_print_wait_and_mvcc_state(
|
|
FILE* file,
|
|
const trx_t* trx)
|
|
{
|
|
fprintf(file, "---");
|
|
|
|
trx_print_latched(file, trx, 600);
|
|
|
|
const ReadView* read_view = trx_get_read_view(trx);
|
|
|
|
if (read_view != NULL) {
|
|
read_view->print_limits(file);
|
|
}
|
|
|
|
if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
|
|
fprintf(file,
|
|
"------- TRX HAS BEEN WAITING %lu SEC"
|
|
" FOR THIS LOCK TO BE GRANTED:\n",
|
|
(ulong) difftime(ut_time(), trx->lock.wait_started));
|
|
|
|
if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
|
|
lock_rec_print(file, trx->lock.wait_lock);
|
|
} else {
|
|
lock_table_print(file, trx->lock.wait_lock);
|
|
}
|
|
|
|
fprintf(file, "------------------\n");
|
|
}
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Prints info of locks for a transaction. This function will release the
|
|
lock mutex and the trx_sys_t::mutex if the page was read from disk.
|
|
@return true if page was read from the tablespace */
|
|
static
|
|
bool
|
|
lock_rec_fetch_page(
|
|
/*================*/
|
|
const lock_t* lock) /*!< in: record lock */
|
|
{
|
|
ut_ad(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
ulint space_id = lock->un_member.rec_lock.space;
|
|
fil_space_t* space;
|
|
bool found;
|
|
const page_size_t& page_size = fil_space_get_page_size(space_id,
|
|
&found);
|
|
ulint page_no = lock->un_member.rec_lock.page_no;
|
|
|
|
/* Check if the .ibd file exists. */
|
|
if (found) {
|
|
mtr_t mtr;
|
|
|
|
lock_mutex_exit();
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
|
|
|
|
/* Check if the space is exists or not. only
|
|
when the space is valid, try to get the page. */
|
|
space = fil_space_acquire(space_id);
|
|
if (space) {
|
|
dberr_t err = DB_SUCCESS;
|
|
mtr_start(&mtr);
|
|
buf_page_get_gen(
|
|
page_id_t(space_id, page_no), page_size,
|
|
RW_NO_LATCH, NULL,
|
|
BUF_GET_POSSIBLY_FREED,
|
|
__FILE__, __LINE__, &mtr, &err);
|
|
mtr_commit(&mtr);
|
|
fil_space_release(space);
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
return(true);
|
|
}
|
|
|
|
return(false);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Prints info of locks for a transaction.
|
|
@return true if all printed, false if latches were released. */
|
|
static
|
|
bool
|
|
lock_trx_print_locks(
|
|
/*=================*/
|
|
FILE* file, /*!< in/out: File to write */
|
|
const trx_t* trx, /*!< in: current transaction */
|
|
TrxLockIterator&iter, /*!< in: transaction lock iterator */
|
|
bool load_block) /*!< in: if true then read block
|
|
from disk */
|
|
{
|
|
const lock_t* lock;
|
|
|
|
/* Iterate over the transaction's locks. */
|
|
while ((lock = iter.current(trx)) != 0) {
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
|
|
if (load_block) {
|
|
|
|
/* Note: lock_rec_fetch_page() will
|
|
release both the lock mutex and the
|
|
trx_sys_t::mutex if it does a read
|
|
from disk. */
|
|
|
|
if (lock_rec_fetch_page(lock)) {
|
|
/* We need to resync the
|
|
current transaction. */
|
|
return(false);
|
|
}
|
|
|
|
/* It is a single table tablespace
|
|
and the .ibd file is missing
|
|
(TRUNCATE TABLE probably stole the
|
|
locks): just print the lock without
|
|
attempting to load the page in the
|
|
buffer pool. */
|
|
|
|
fprintf(file,
|
|
"RECORD LOCKS on non-existing"
|
|
" space %u\n",
|
|
lock->un_member.rec_lock.space);
|
|
}
|
|
|
|
/* Print all the record locks on the page from
|
|
the record lock bitmap */
|
|
|
|
lock_rec_print(file, lock);
|
|
|
|
load_block = true;
|
|
|
|
} else {
|
|
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
|
|
|
|
lock_table_print(file, lock);
|
|
}
|
|
|
|
if (iter.next() >= 10) {
|
|
|
|
fprintf(file,
|
|
"10 LOCKS PRINTED FOR THIS TRX:"
|
|
" SUPPRESSING FURTHER PRINTS\n");
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
return(true);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Prints info of locks for each transaction. This function assumes that the
|
|
caller holds the lock mutex and more importantly it will release the lock
|
|
mutex on behalf of the caller. (This should be fixed in the future). */
|
|
void
|
|
lock_print_info_all_transactions(
|
|
/*=============================*/
|
|
FILE* file) /*!< in/out: file where to print */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
/* First print info on non-active transactions */
|
|
|
|
/* NOTE: information of auto-commit non-locking read-only
|
|
transactions will be omitted here. The information will be
|
|
available from INFORMATION_SCHEMA.INNODB_TRX. */
|
|
|
|
PrintNotStarted print_not_started(file);
|
|
ut_list_map(trx_sys->mysql_trx_list, print_not_started);
|
|
|
|
const trx_t* trx;
|
|
TrxListIterator trx_iter;
|
|
const trx_t* prev_trx = 0;
|
|
|
|
/* Control whether a block should be fetched from the buffer pool. */
|
|
bool load_block = true;
|
|
bool monitor = srv_print_innodb_lock_monitor;
|
|
|
|
while ((trx = trx_iter.current()) != 0) {
|
|
|
|
check_trx_state(trx);
|
|
|
|
if (trx != prev_trx) {
|
|
lock_trx_print_wait_and_mvcc_state(file, trx);
|
|
prev_trx = trx;
|
|
|
|
/* The transaction that read in the page is no
|
|
longer the one that read the page in. We need to
|
|
force a page read. */
|
|
load_block = true;
|
|
}
|
|
|
|
/* If we need to print the locked record contents then we
|
|
need to fetch the containing block from the buffer pool. */
|
|
if (monitor) {
|
|
|
|
/* Print the locks owned by the current transaction. */
|
|
TrxLockIterator& lock_iter = trx_iter.lock_iter();
|
|
|
|
if (!lock_trx_print_locks(
|
|
file, trx, lock_iter, load_block)) {
|
|
|
|
/* Resync trx_iter, the trx_sys->mutex and
|
|
the lock mutex were released. A page was
|
|
successfully read in. We need to print its
|
|
contents on the next call to
|
|
lock_trx_print_locks(). On the next call to
|
|
lock_trx_print_locks() we should simply print
|
|
the contents of the page just read in.*/
|
|
load_block = false;
|
|
|
|
continue;
|
|
}
|
|
}
|
|
|
|
load_block = true;
|
|
|
|
/* All record lock details were printed without fetching
|
|
a page from disk, or we didn't need to print the detail. */
|
|
trx_iter.next();
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
ut_ad(lock_validate());
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*********************************************************************//**
|
|
Find the the lock in the trx_t::trx_lock_t::table_locks vector.
|
|
@return true if found */
|
|
static
|
|
bool
|
|
lock_trx_table_locks_find(
|
|
/*======================*/
|
|
trx_t* trx, /*!< in: trx to validate */
|
|
const lock_t* find_lock) /*!< in: lock to find */
|
|
{
|
|
bool found = false;
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
typedef lock_pool_t::const_reverse_iterator iterator;
|
|
|
|
iterator end = trx->lock.table_locks.rend();
|
|
|
|
for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
|
|
|
|
const lock_t* lock = *it;
|
|
|
|
if (lock == NULL) {
|
|
|
|
continue;
|
|
|
|
} else if (lock == find_lock) {
|
|
|
|
/* Can't be duplicates. */
|
|
ut_a(!found);
|
|
found = true;
|
|
}
|
|
|
|
ut_a(trx == lock->trx);
|
|
ut_a(lock_get_type_low(lock) & LOCK_TABLE);
|
|
ut_a(lock->un_member.tab_lock.table != NULL);
|
|
}
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
return(found);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validates the lock queue on a table.
|
|
@return TRUE if ok */
|
|
static
|
|
ibool
|
|
lock_table_queue_validate(
|
|
/*======================*/
|
|
const dict_table_t* table) /*!< in: table */
|
|
{
|
|
const lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_sys_mutex_own());
|
|
|
|
for (lock = UT_LIST_GET_FIRST(table->locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
|
|
|
|
/* lock->trx->state cannot change from or to NOT_STARTED
|
|
while we are holding the trx_sys->mutex. It may change
|
|
from ACTIVE to PREPARED, but it may not change to
|
|
COMMITTED, because we are holding the lock_sys->mutex. */
|
|
ut_ad(trx_assert_started(lock->trx));
|
|
|
|
if (!lock_get_wait(lock)) {
|
|
|
|
ut_a(!lock_table_other_has_incompatible(
|
|
lock->trx, 0, table,
|
|
lock_get_mode(lock)));
|
|
} else {
|
|
|
|
ut_a(lock_table_has_to_wait_in_queue(lock));
|
|
}
|
|
|
|
ut_a(lock_trx_table_locks_find(lock->trx, lock));
|
|
}
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validates the lock queue on a single record.
|
|
@return TRUE if ok */
|
|
static
|
|
ibool
|
|
lock_rec_queue_validate(
|
|
/*====================*/
|
|
ibool locked_lock_trx_sys,
|
|
/*!< in: if the caller holds
|
|
both the lock mutex and
|
|
trx_sys_t->lock. */
|
|
const buf_block_t* block, /*!< in: buffer block containing rec */
|
|
const rec_t* rec, /*!< in: record to look at */
|
|
const dict_index_t* index, /*!< in: index, or NULL if not known */
|
|
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
|
|
{
|
|
const trx_t* impl_trx;
|
|
const lock_t* lock;
|
|
ulint heap_no;
|
|
|
|
ut_a(rec);
|
|
ut_a(block->frame == page_align(rec));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
|
|
ut_ad(lock_mutex_own() == locked_lock_trx_sys);
|
|
ut_ad(!index || dict_index_is_clust(index)
|
|
|| !dict_index_is_online_ddl(index));
|
|
|
|
heap_no = page_rec_get_heap_no(rec);
|
|
|
|
if (!locked_lock_trx_sys) {
|
|
lock_mutex_enter();
|
|
mutex_enter(&trx_sys->mutex);
|
|
}
|
|
|
|
if (!page_rec_is_user_rec(rec)) {
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash,
|
|
block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_const(heap_no, lock)) {
|
|
|
|
ut_ad(!trx_is_ac_nl_ro(lock->trx));
|
|
|
|
if (lock_get_wait(lock)) {
|
|
ut_a(lock_rec_has_to_wait_in_queue(lock));
|
|
}
|
|
|
|
if (index != NULL) {
|
|
ut_a(lock->index == index);
|
|
}
|
|
}
|
|
|
|
goto func_exit;
|
|
}
|
|
|
|
if (index == NULL) {
|
|
|
|
/* Nothing we can do */
|
|
|
|
} else if (dict_index_is_clust(index)) {
|
|
trx_id_t trx_id;
|
|
|
|
/* Unlike the non-debug code, this invariant can only succeed
|
|
if the check and assertion are covered by the lock mutex. */
|
|
|
|
trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
|
|
impl_trx = trx_rw_is_active_low(trx_id, NULL);
|
|
|
|
ut_ad(lock_mutex_own());
|
|
/* impl_trx cannot be committed until lock_mutex_exit()
|
|
because lock_trx_release_locks() acquires lock_sys->mutex */
|
|
|
|
if (impl_trx != NULL) {
|
|
const lock_t* other_lock
|
|
= lock_rec_other_has_expl_req(
|
|
LOCK_S, block, true, heap_no,
|
|
impl_trx);
|
|
|
|
/* The impl_trx is holding an implicit lock on the
|
|
given record 'rec'. So there cannot be another
|
|
explicit granted lock. Also, there can be another
|
|
explicit waiting lock only if the impl_trx has an
|
|
explicit granted lock. */
|
|
|
|
if (other_lock != NULL) {
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_on(other_lock->trx->mysql_thd) && !lock_get_wait(other_lock) ) {
|
|
|
|
ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
|
|
((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
|
|
wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
|
|
wsrep_thd_conflict_state(impl_trx->mysql_thd, false) << " seqno: " <<
|
|
wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
|
|
wsrep_thd_query(impl_trx->mysql_thd);
|
|
|
|
trx_t* otrx = other_lock->trx;
|
|
|
|
ib::info() << "WSREP other lock:\n BF:" <<
|
|
((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
|
|
wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
|
|
wsrep_thd_conflict_state(otrx->mysql_thd, false) << " seqno: " <<
|
|
wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
|
|
wsrep_thd_query(otrx->mysql_thd);
|
|
}
|
|
|
|
if (wsrep_on(other_lock->trx->mysql_thd) && !lock_rec_has_expl(
|
|
LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, impl_trx)) {
|
|
ib::info() << "WSREP impl BF lock conflict";
|
|
}
|
|
#else /* !WITH_WSREP */
|
|
ut_a(lock_get_wait(other_lock));
|
|
ut_a(lock_rec_has_expl(
|
|
LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, impl_trx));
|
|
#endif /* WITH_WSREP */
|
|
}
|
|
}
|
|
}
|
|
|
|
for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
lock != NULL;
|
|
lock = lock_rec_get_next_const(heap_no, lock)) {
|
|
|
|
ut_ad(!trx_is_ac_nl_ro(lock->trx));
|
|
|
|
if (index) {
|
|
ut_a(lock->index == index);
|
|
}
|
|
|
|
if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
|
|
|
|
lock_mode mode;
|
|
|
|
if (lock_get_mode(lock) == LOCK_S) {
|
|
mode = LOCK_X;
|
|
} else {
|
|
mode = LOCK_S;
|
|
}
|
|
|
|
const lock_t* other_lock
|
|
= lock_rec_other_has_expl_req(
|
|
mode, block, false, heap_no,
|
|
lock->trx);
|
|
#ifdef WITH_WSREP
|
|
ut_a(!other_lock
|
|
|| wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)
|
|
|| wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
|
|
|
|
#else
|
|
ut_a(!other_lock);
|
|
#endif /* WITH_WSREP */
|
|
} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
|
|
|
|
ut_a(lock_rec_has_to_wait_in_queue(lock));
|
|
}
|
|
}
|
|
|
|
ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
|
|
lock_queue_validate(lock));
|
|
|
|
func_exit:
|
|
if (!locked_lock_trx_sys) {
|
|
lock_mutex_exit();
|
|
mutex_exit(&trx_sys->mutex);
|
|
}
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validates the record lock queues on a page.
|
|
@return TRUE if ok */
|
|
static
|
|
ibool
|
|
lock_rec_validate_page(
|
|
/*===================*/
|
|
const buf_block_t* block) /*!< in: buffer block */
|
|
{
|
|
const lock_t* lock;
|
|
const rec_t* rec;
|
|
ulint nth_lock = 0;
|
|
ulint nth_bit = 0;
|
|
ulint i;
|
|
mem_heap_t* heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint* offsets = offsets_;
|
|
rec_offs_init(offsets_);
|
|
|
|
ut_ad(!lock_mutex_own());
|
|
|
|
lock_mutex_enter();
|
|
mutex_enter(&trx_sys->mutex);
|
|
loop:
|
|
lock = lock_rec_get_first_on_page_addr(
|
|
lock_sys->rec_hash,
|
|
block->page.id.space(), block->page.id.page_no());
|
|
|
|
if (!lock) {
|
|
goto function_exit;
|
|
}
|
|
|
|
ut_ad(!block->page.file_page_was_freed);
|
|
|
|
for (i = 0; i < nth_lock; i++) {
|
|
|
|
lock = lock_rec_get_next_on_page_const(lock);
|
|
|
|
if (!lock) {
|
|
goto function_exit;
|
|
}
|
|
}
|
|
|
|
ut_ad(!trx_is_ac_nl_ro(lock->trx));
|
|
|
|
# ifdef UNIV_DEBUG
|
|
/* Only validate the record queues when this thread is not
|
|
holding a space->latch. Deadlocks are possible due to
|
|
latching order violation when UNIV_DEBUG is defined while
|
|
UNIV_DEBUG is not. */
|
|
if (!sync_check_find(SYNC_FSP))
|
|
# endif /* UNIV_DEBUG */
|
|
for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
|
|
|
|
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
|
|
|
|
rec = page_find_rec_with_heap_no(block->frame, i);
|
|
ut_a(rec);
|
|
offsets = rec_get_offsets(rec, lock->index, offsets,
|
|
ULINT_UNDEFINED, &heap);
|
|
|
|
/* If this thread is holding the file space
|
|
latch (fil_space_t::latch), the following
|
|
check WILL break the latching order and may
|
|
cause a deadlock of threads. */
|
|
|
|
lock_rec_queue_validate(
|
|
TRUE, block, rec, lock->index, offsets);
|
|
|
|
nth_bit = i + 1;
|
|
|
|
goto loop;
|
|
}
|
|
}
|
|
|
|
nth_bit = 0;
|
|
nth_lock++;
|
|
|
|
goto loop;
|
|
|
|
function_exit:
|
|
lock_mutex_exit();
|
|
mutex_exit(&trx_sys->mutex);
|
|
|
|
if (heap != NULL) {
|
|
mem_heap_free(heap);
|
|
}
|
|
return(TRUE);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validates the table locks.
|
|
@return TRUE if ok */
|
|
static
|
|
ibool
|
|
lock_validate_table_locks(
|
|
/*======================*/
|
|
const trx_ut_list_t* trx_list) /*!< in: trx list */
|
|
{
|
|
const trx_t* trx;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_sys_mutex_own());
|
|
|
|
ut_ad(trx_list == &trx_sys->rw_trx_list);
|
|
|
|
for (trx = UT_LIST_GET_FIRST(*trx_list);
|
|
trx != NULL;
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
|
|
|
|
const lock_t* lock;
|
|
|
|
check_trx_state(trx);
|
|
|
|
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
|
|
if (lock_get_type_low(lock) & LOCK_TABLE) {
|
|
|
|
lock_table_queue_validate(
|
|
lock->un_member.tab_lock.table);
|
|
}
|
|
}
|
|
}
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validate record locks up to a limit.
|
|
@return lock at limit or NULL if no more locks in the hash bucket */
|
|
static MY_ATTRIBUTE((warn_unused_result))
|
|
const lock_t*
|
|
lock_rec_validate(
|
|
/*==============*/
|
|
ulint start, /*!< in: lock_sys->rec_hash
|
|
bucket */
|
|
ib_uint64_t* limit) /*!< in/out: upper limit of
|
|
(space, page_no) */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_sys_mutex_own());
|
|
|
|
for (const lock_t* lock = static_cast<const lock_t*>(
|
|
HASH_GET_FIRST(lock_sys->rec_hash, start));
|
|
lock != NULL;
|
|
lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
|
|
|
|
ib_uint64_t current;
|
|
|
|
ut_ad(!trx_is_ac_nl_ro(lock->trx));
|
|
ut_ad(lock_get_type(lock) == LOCK_REC);
|
|
|
|
current = ut_ull_create(
|
|
lock->un_member.rec_lock.space,
|
|
lock->un_member.rec_lock.page_no);
|
|
|
|
if (current > *limit) {
|
|
*limit = current + 1;
|
|
return(lock);
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validate a record lock's block */
|
|
static
|
|
void
|
|
lock_rec_block_validate(
|
|
/*====================*/
|
|
ulint space_id,
|
|
ulint page_no)
|
|
{
|
|
/* The lock and the block that it is referring to may be freed at
|
|
this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
|
|
If the lock exists in lock_rec_validate_page() we assert
|
|
!block->page.file_page_was_freed. */
|
|
|
|
buf_block_t* block;
|
|
mtr_t mtr;
|
|
|
|
/* Make sure that the tablespace is not deleted while we are
|
|
trying to access the page. */
|
|
if (fil_space_t* space = fil_space_acquire(space_id)) {
|
|
dberr_t err = DB_SUCCESS;
|
|
mtr_start(&mtr);
|
|
|
|
block = buf_page_get_gen(
|
|
page_id_t(space_id, page_no),
|
|
page_size_t(space->flags),
|
|
RW_X_LATCH, NULL,
|
|
BUF_GET_POSSIBLY_FREED,
|
|
__FILE__, __LINE__, &mtr, &err);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
ib::error() << "Lock rec block validate failed for tablespace "
|
|
<< space->name
|
|
<< " space_id " << space_id
|
|
<< " page_no " << page_no << " err " << err;
|
|
}
|
|
|
|
if (block) {
|
|
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
|
|
|
|
ut_ad(lock_rec_validate_page(block));
|
|
}
|
|
|
|
mtr_commit(&mtr);
|
|
|
|
fil_space_release(space);
|
|
}
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Validates the lock system.
|
|
@return TRUE if ok */
|
|
static
|
|
bool
|
|
lock_validate()
|
|
/*===========*/
|
|
{
|
|
typedef std::pair<ulint, ulint> page_addr_t;
|
|
typedef std::set<
|
|
page_addr_t,
|
|
std::less<page_addr_t>,
|
|
ut_allocator<page_addr_t> > page_addr_set;
|
|
|
|
page_addr_set pages;
|
|
|
|
lock_mutex_enter();
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
|
|
|
|
/* Iterate over all the record locks and validate the locks. We
|
|
don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
|
|
Release both mutexes during the validation check. */
|
|
|
|
for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
|
|
const lock_t* lock;
|
|
ib_uint64_t limit = 0;
|
|
|
|
while ((lock = lock_rec_validate(i, &limit)) != 0) {
|
|
|
|
ulint space = lock->un_member.rec_lock.space;
|
|
ulint page_no = lock->un_member.rec_lock.page_no;
|
|
|
|
pages.insert(std::make_pair(space, page_no));
|
|
}
|
|
}
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
lock_mutex_exit();
|
|
|
|
for (page_addr_set::const_iterator it = pages.begin();
|
|
it != pages.end();
|
|
++it) {
|
|
lock_rec_block_validate((*it).first, (*it).second);
|
|
}
|
|
|
|
return(true);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
|
|
|
|
/*********************************************************************//**
|
|
Checks if locks of other transactions prevent an immediate insert of
|
|
a record. If they do, first tests if the query thread should anyway
|
|
be suspended for some reason; if not, then puts the transaction and
|
|
the query thread to the lock wait state and inserts a waiting request
|
|
for a gap x-lock to the lock queue.
|
|
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_rec_insert_check_and_lock(
|
|
/*===========================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
|
|
set, does nothing */
|
|
const rec_t* rec, /*!< in: record after which to insert */
|
|
buf_block_t* block, /*!< in/out: buffer block of rec */
|
|
dict_index_t* index, /*!< in: index */
|
|
que_thr_t* thr, /*!< in: query thread */
|
|
mtr_t* mtr, /*!< in/out: mini-transaction */
|
|
ibool* inherit)/*!< out: set to TRUE if the new
|
|
inserted record maybe should inherit
|
|
LOCK_GAP type locks from the successor
|
|
record */
|
|
{
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(!dict_index_is_online_ddl(index)
|
|
|| dict_index_is_clust(index)
|
|
|| (flags & BTR_CREATE_FLAG));
|
|
ut_ad(mtr->is_named_space(index->space));
|
|
|
|
if (flags & BTR_NO_LOCKING_FLAG) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
ut_ad(!dict_table_is_temporary(index->table));
|
|
|
|
dberr_t err;
|
|
lock_t* lock;
|
|
ibool inherit_in = *inherit;
|
|
trx_t* trx = thr_get_trx(thr);
|
|
const rec_t* next_rec = page_rec_get_next_const(rec);
|
|
ulint heap_no = page_rec_get_heap_no(next_rec);
|
|
|
|
lock_mutex_enter();
|
|
/* Because this code is invoked for a running transaction by
|
|
the thread that is serving the transaction, it is not necessary
|
|
to hold trx->mutex here. */
|
|
|
|
/* When inserting a record into an index, the table must be at
|
|
least IX-locked. When we are building an index, we would pass
|
|
BTR_NO_LOCKING_FLAG and skip the locking altogether. */
|
|
ut_ad(lock_table_has(trx, index->table, LOCK_IX));
|
|
|
|
lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
|
|
|
|
if (lock == NULL) {
|
|
/* We optimize CPU time usage in the simplest case */
|
|
|
|
lock_mutex_exit();
|
|
|
|
if (inherit_in && !dict_index_is_clust(index)) {
|
|
/* Update the page max trx id field */
|
|
page_update_max_trx_id(block,
|
|
buf_block_get_page_zip(block),
|
|
trx->id, mtr);
|
|
}
|
|
|
|
*inherit = FALSE;
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
/* Spatial index does not use GAP lock protection. It uses
|
|
"predicate lock" to protect the "range" */
|
|
if (dict_index_is_spatial(index)) {
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
*inherit = TRUE;
|
|
|
|
/* If another transaction has an explicit lock request which locks
|
|
the gap, waiting or granted, on the successor, the insert has to wait.
|
|
|
|
An exception is the case where the lock by the another transaction
|
|
is a gap type lock which it placed to wait for its turn to insert. We
|
|
do not consider that kind of a lock conflicting with our insert. This
|
|
eliminates an unnecessary deadlock which resulted when 2 transactions
|
|
had to wait for their insert. Both had waiting gap type lock requests
|
|
on the successor, which produced an unnecessary deadlock. */
|
|
|
|
const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
|
|
|
|
const lock_t* wait_for = lock_rec_other_has_conflicting(
|
|
type_mode, block, heap_no, trx);
|
|
|
|
if (wait_for != NULL) {
|
|
|
|
RecLock rec_lock(thr, index, block, heap_no, type_mode);
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
err = rec_lock.add_to_waitq(wait_for);
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
} else {
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
switch (err) {
|
|
case DB_SUCCESS_LOCKED_REC:
|
|
err = DB_SUCCESS;
|
|
/* fall through */
|
|
case DB_SUCCESS:
|
|
if (!inherit_in || dict_index_is_clust(index)) {
|
|
break;
|
|
}
|
|
|
|
/* Update the page max trx id field */
|
|
page_update_max_trx_id(
|
|
block, buf_block_get_page_zip(block), trx->id, mtr);
|
|
default:
|
|
/* We only care about the two return values. */
|
|
break;
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
{
|
|
mem_heap_t* heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
const ulint* offsets;
|
|
rec_offs_init(offsets_);
|
|
|
|
offsets = rec_get_offsets(next_rec, index, offsets_,
|
|
ULINT_UNDEFINED, &heap);
|
|
|
|
ut_ad(lock_rec_queue_validate(
|
|
FALSE, block, next_rec, index, offsets));
|
|
|
|
if (heap != NULL) {
|
|
mem_heap_free(heap);
|
|
}
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Creates an explicit record lock for a running transaction that currently only
|
|
has an implicit lock on the record. The transaction instance must have a
|
|
reference count > 0 so that it can't be committed and freed before this
|
|
function has completed. */
|
|
static
|
|
void
|
|
lock_rec_convert_impl_to_expl_for_trx(
|
|
/*==================================*/
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: user record on page */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
|
|
trx_t* trx, /*!< in/out: active transaction */
|
|
ulint heap_no)/*!< in: rec heap number to lock */
|
|
{
|
|
ut_ad(trx_is_referenced(trx));
|
|
|
|
DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
|
|
|
|
lock_mutex_enter();
|
|
|
|
ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
|
|
|
|
if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
|
|
&& !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, trx)) {
|
|
|
|
ulint type_mode;
|
|
|
|
type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
|
|
|
|
lock_rec_add_to_queue(
|
|
type_mode, block, heap_no, index, trx, FALSE);
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
trx_release_reference(trx);
|
|
|
|
DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
If a transaction has an implicit x-lock on a record, but no explicit x-lock
|
|
set on the record, sets one for it. */
|
|
static
|
|
void
|
|
lock_rec_convert_impl_to_expl(
|
|
/*==========================*/
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: user record on page */
|
|
dict_index_t* index, /*!< in: index of record */
|
|
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
|
|
{
|
|
trx_t* trx;
|
|
|
|
ut_ad(!lock_mutex_own());
|
|
ut_ad(page_rec_is_user_rec(rec));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
|
|
|
|
if (dict_index_is_clust(index)) {
|
|
trx_id_t trx_id;
|
|
|
|
trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
|
|
|
|
trx = trx_rw_is_active(trx_id, NULL, true);
|
|
} else {
|
|
ut_ad(!dict_index_is_online_ddl(index));
|
|
|
|
trx = lock_sec_rec_some_has_impl(rec, index, offsets);
|
|
|
|
ut_ad(!trx || !lock_rec_other_trx_holds_expl(
|
|
LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
|
|
}
|
|
|
|
if (trx != 0) {
|
|
ulint heap_no = page_rec_get_heap_no(rec);
|
|
|
|
ut_ad(trx_is_referenced(trx));
|
|
|
|
/* If the transaction is still active and has no
|
|
explicit x-lock set on the record, set one for it.
|
|
trx cannot be committed until the ref count is zero. */
|
|
|
|
lock_rec_convert_impl_to_expl_for_trx(
|
|
block, rec, index, offsets, trx, heap_no);
|
|
}
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if locks of other transactions prevent an immediate modify (update,
|
|
delete mark, or delete unmark) of a clustered index record. If they do,
|
|
first tests if the query thread should anyway be suspended for some
|
|
reason; if not, then puts the transaction and the query thread to the
|
|
lock wait state and inserts a waiting request for a record x-lock to the
|
|
lock queue.
|
|
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_clust_rec_modify_check_and_lock(
|
|
/*=================================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
|
|
bit is set, does nothing */
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: record which should be
|
|
modified */
|
|
dict_index_t* index, /*!< in: clustered index */
|
|
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
dberr_t err;
|
|
ulint heap_no;
|
|
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
ut_ad(dict_index_is_clust(index));
|
|
ut_ad(block->frame == page_align(rec));
|
|
|
|
if (flags & BTR_NO_LOCKING_FLAG) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
ut_ad(!dict_table_is_temporary(index->table));
|
|
|
|
heap_no = rec_offs_comp(offsets)
|
|
? rec_get_heap_no_new(rec)
|
|
: rec_get_heap_no_old(rec);
|
|
|
|
/* If a transaction has no explicit x-lock set on the record, set one
|
|
for it */
|
|
|
|
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
|
|
|
|
lock_mutex_enter();
|
|
|
|
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
|
|
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, index, thr);
|
|
|
|
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
|
|
|
|
lock_mutex_exit();
|
|
|
|
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
|
|
|
|
if (err == DB_SUCCESS_LOCKED_REC) {
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if locks of other transactions prevent an immediate modify (delete
|
|
mark or delete unmark) of a secondary index record.
|
|
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_sec_rec_modify_check_and_lock(
|
|
/*===============================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
|
|
bit is set, does nothing */
|
|
buf_block_t* block, /*!< in/out: buffer block of rec */
|
|
const rec_t* rec, /*!< in: record which should be
|
|
modified; NOTE: as this is a secondary
|
|
index, we always have to modify the
|
|
clustered index record first: see the
|
|
comment below */
|
|
dict_index_t* index, /*!< in: secondary index */
|
|
que_thr_t* thr, /*!< in: query thread
|
|
(can be NULL if BTR_NO_LOCKING_FLAG) */
|
|
mtr_t* mtr) /*!< in/out: mini-transaction */
|
|
{
|
|
dberr_t err;
|
|
ulint heap_no;
|
|
|
|
ut_ad(!dict_index_is_clust(index));
|
|
ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(mtr->is_named_space(index->space));
|
|
|
|
if (flags & BTR_NO_LOCKING_FLAG) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
ut_ad(!dict_table_is_temporary(index->table));
|
|
|
|
heap_no = page_rec_get_heap_no(rec);
|
|
|
|
/* Another transaction cannot have an implicit lock on the record,
|
|
because when we come here, we already have modified the clustered
|
|
index record, and this would not have been possible if another active
|
|
transaction had modified this secondary index record. */
|
|
|
|
lock_mutex_enter();
|
|
|
|
ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
|
|
err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, index, thr);
|
|
|
|
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
|
|
|
|
lock_mutex_exit();
|
|
|
|
#ifdef UNIV_DEBUG
|
|
{
|
|
mem_heap_t* heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
const ulint* offsets;
|
|
rec_offs_init(offsets_);
|
|
|
|
offsets = rec_get_offsets(rec, index, offsets_,
|
|
ULINT_UNDEFINED, &heap);
|
|
|
|
ut_ad(lock_rec_queue_validate(
|
|
FALSE, block, rec, index, offsets));
|
|
|
|
if (heap != NULL) {
|
|
mem_heap_free(heap);
|
|
}
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
|
|
/* Update the page max trx id field */
|
|
/* It might not be necessary to do this if
|
|
err == DB_SUCCESS (no new lock created),
|
|
but it should not cost too much performance. */
|
|
page_update_max_trx_id(block,
|
|
buf_block_get_page_zip(block),
|
|
thr_get_trx(thr)->id, mtr);
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Like lock_clust_rec_read_check_and_lock(), but reads a
|
|
secondary index record.
|
|
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
|
|
or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_sec_rec_read_check_and_lock(
|
|
/*=============================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
|
|
bit is set, does nothing */
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: user record or page
|
|
supremum record which should
|
|
be read or passed over by a
|
|
read cursor */
|
|
dict_index_t* index, /*!< in: secondary index */
|
|
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
|
|
lock_mode mode, /*!< in: mode of the lock which
|
|
the read cursor should set on
|
|
records: LOCK_S or LOCK_X; the
|
|
latter is possible in
|
|
SELECT FOR UPDATE */
|
|
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
|
|
LOCK_REC_NOT_GAP */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
dberr_t err;
|
|
ulint heap_no;
|
|
|
|
ut_ad(!dict_index_is_clust(index));
|
|
ut_ad(!dict_index_is_online_ddl(index));
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
ut_ad(mode == LOCK_X || mode == LOCK_S);
|
|
|
|
if ((flags & BTR_NO_LOCKING_FLAG)
|
|
|| srv_read_only_mode
|
|
|| dict_table_is_temporary(index->table)) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
heap_no = page_rec_get_heap_no(rec);
|
|
|
|
/* Some transaction may have an implicit x-lock on the record only
|
|
if the max trx id for the page >= min trx id for the trx list or a
|
|
database recovery is running. */
|
|
|
|
if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
|
|
|| recv_recovery_is_on())
|
|
&& !page_rec_is_supremum(rec)) {
|
|
|
|
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
ut_ad(mode != LOCK_X
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
ut_ad(mode != LOCK_S
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|
|
|
|
err = lock_rec_lock(FALSE, mode | gap_mode,
|
|
block, heap_no, index, thr);
|
|
|
|
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
|
|
|
|
lock_mutex_exit();
|
|
|
|
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Checks if locks of other transactions prevent an immediate read, or passing
|
|
over by a read cursor, of a clustered index record. If they do, first tests
|
|
if the query thread should anyway be suspended for some reason; if not, then
|
|
puts the transaction and the query thread to the lock wait state and inserts a
|
|
waiting request for a record lock to the lock queue. Sets the requested mode
|
|
lock on the record.
|
|
@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
|
|
or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_clust_rec_read_check_and_lock(
|
|
/*===============================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
|
|
bit is set, does nothing */
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: user record or page
|
|
supremum record which should
|
|
be read or passed over by a
|
|
read cursor */
|
|
dict_index_t* index, /*!< in: clustered index */
|
|
const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
|
|
lock_mode mode, /*!< in: mode of the lock which
|
|
the read cursor should set on
|
|
records: LOCK_S or LOCK_X; the
|
|
latter is possible in
|
|
SELECT FOR UPDATE */
|
|
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
|
|
LOCK_REC_NOT_GAP */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
dberr_t err;
|
|
ulint heap_no;
|
|
|
|
ut_ad(dict_index_is_clust(index));
|
|
ut_ad(block->frame == page_align(rec));
|
|
ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
|
|
ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
|
|
|| gap_mode == LOCK_REC_NOT_GAP);
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
if ((flags & BTR_NO_LOCKING_FLAG)
|
|
|| srv_read_only_mode
|
|
|| dict_table_is_temporary(index->table)) {
|
|
|
|
return(DB_SUCCESS);
|
|
}
|
|
|
|
heap_no = page_rec_get_heap_no(rec);
|
|
|
|
if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
|
|
|
|
lock_rec_convert_impl_to_expl(block, rec, index, offsets);
|
|
}
|
|
|
|
lock_mutex_enter();
|
|
|
|
ut_ad(mode != LOCK_X
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
|
|
ut_ad(mode != LOCK_S
|
|
|| lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
|
|
|
|
err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
|
|
|
|
MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
|
|
|
|
lock_mutex_exit();
|
|
|
|
ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
|
|
|
|
DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
|
|
|
|
return(err);
|
|
}
|
|
/*********************************************************************//**
|
|
Checks if locks of other transactions prevent an immediate read, or passing
|
|
over by a read cursor, of a clustered index record. If they do, first tests
|
|
if the query thread should anyway be suspended for some reason; if not, then
|
|
puts the transaction and the query thread to the lock wait state and inserts a
|
|
waiting request for a record lock to the lock queue. Sets the requested mode
|
|
lock on the record. This is an alternative version of
|
|
lock_clust_rec_read_check_and_lock() that does not require the parameter
|
|
"offsets".
|
|
@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
|
|
dberr_t
|
|
lock_clust_rec_read_check_and_lock_alt(
|
|
/*===================================*/
|
|
ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
|
|
bit is set, does nothing */
|
|
const buf_block_t* block, /*!< in: buffer block of rec */
|
|
const rec_t* rec, /*!< in: user record or page
|
|
supremum record which should
|
|
be read or passed over by a
|
|
read cursor */
|
|
dict_index_t* index, /*!< in: clustered index */
|
|
lock_mode mode, /*!< in: mode of the lock which
|
|
the read cursor should set on
|
|
records: LOCK_S or LOCK_X; the
|
|
latter is possible in
|
|
SELECT FOR UPDATE */
|
|
ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
|
|
LOCK_REC_NOT_GAP */
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
{
|
|
mem_heap_t* tmp_heap = NULL;
|
|
ulint offsets_[REC_OFFS_NORMAL_SIZE];
|
|
ulint* offsets = offsets_;
|
|
dberr_t err;
|
|
rec_offs_init(offsets_);
|
|
|
|
offsets = rec_get_offsets(rec, index, offsets,
|
|
ULINT_UNDEFINED, &tmp_heap);
|
|
err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
|
|
offsets, mode, gap_mode, thr);
|
|
if (tmp_heap) {
|
|
mem_heap_free(tmp_heap);
|
|
}
|
|
|
|
if (err == DB_SUCCESS_LOCKED_REC) {
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Release the last lock from the transaction's autoinc locks. */
|
|
UNIV_INLINE
|
|
void
|
|
lock_release_autoinc_last_lock(
|
|
/*===========================*/
|
|
ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
|
|
{
|
|
ulint last;
|
|
lock_t* lock;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_a(!ib_vector_is_empty(autoinc_locks));
|
|
|
|
/* The lock to be release must be the last lock acquired. */
|
|
last = ib_vector_size(autoinc_locks) - 1;
|
|
lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
|
|
|
|
/* Should have only AUTOINC locks in the vector. */
|
|
ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
|
|
ut_a(lock_get_type(lock) == LOCK_TABLE);
|
|
|
|
ut_a(lock->un_member.tab_lock.table != NULL);
|
|
|
|
/* This will remove the lock from the trx autoinc_locks too. */
|
|
lock_table_dequeue(lock);
|
|
|
|
/* Remove from the table vector too. */
|
|
lock_trx_table_locks_remove(lock);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Check if a transaction holds any autoinc locks.
|
|
@return TRUE if the transaction holds any AUTOINC locks. */
|
|
static
|
|
ibool
|
|
lock_trx_holds_autoinc_locks(
|
|
/*=========================*/
|
|
const trx_t* trx) /*!< in: transaction */
|
|
{
|
|
ut_a(trx->autoinc_locks != NULL);
|
|
|
|
return(!ib_vector_is_empty(trx->autoinc_locks));
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Release all the transaction's autoinc locks. */
|
|
static
|
|
void
|
|
lock_release_autoinc_locks(
|
|
/*=======================*/
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
/* If this is invoked for a running transaction by the thread
|
|
that is serving the transaction, then it is not necessary to
|
|
hold trx->mutex here. */
|
|
|
|
ut_a(trx->autoinc_locks != NULL);
|
|
|
|
/* We release the locks in the reverse order. This is to
|
|
avoid searching the vector for the element to delete at
|
|
the lower level. See (lock_table_remove_low()) for details. */
|
|
while (!ib_vector_is_empty(trx->autoinc_locks)) {
|
|
|
|
/* lock_table_remove_low() will also remove the lock from
|
|
the transaction's autoinc_locks vector. */
|
|
lock_release_autoinc_last_lock(trx->autoinc_locks);
|
|
}
|
|
|
|
/* Should release all locks. */
|
|
ut_a(ib_vector_is_empty(trx->autoinc_locks));
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the type of a lock. Non-inline version for using outside of the
|
|
lock module.
|
|
@return LOCK_TABLE or LOCK_REC */
|
|
ulint
|
|
lock_get_type(
|
|
/*==========*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
return(lock_get_type_low(lock));
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the id of the transaction owning a lock.
|
|
@return transaction id */
|
|
trx_id_t
|
|
lock_get_trx_id(
|
|
/*============*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
return(trx_get_id_for_print(lock->trx));
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the mode of a lock in a human readable string.
|
|
The string should not be free()'d or modified.
|
|
@return lock mode */
|
|
const char*
|
|
lock_get_mode_str(
|
|
/*==============*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
ibool is_gap_lock;
|
|
|
|
is_gap_lock = lock_get_type_low(lock) == LOCK_REC
|
|
&& lock_rec_get_gap(lock);
|
|
|
|
switch (lock_get_mode(lock)) {
|
|
case LOCK_S:
|
|
if (is_gap_lock) {
|
|
return("S,GAP");
|
|
} else {
|
|
return("S");
|
|
}
|
|
case LOCK_X:
|
|
if (is_gap_lock) {
|
|
return("X,GAP");
|
|
} else {
|
|
return("X");
|
|
}
|
|
case LOCK_IS:
|
|
if (is_gap_lock) {
|
|
return("IS,GAP");
|
|
} else {
|
|
return("IS");
|
|
}
|
|
case LOCK_IX:
|
|
if (is_gap_lock) {
|
|
return("IX,GAP");
|
|
} else {
|
|
return("IX");
|
|
}
|
|
case LOCK_AUTO_INC:
|
|
return("AUTO_INC");
|
|
default:
|
|
return("UNKNOWN");
|
|
}
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the type of a lock in a human readable string.
|
|
The string should not be free()'d or modified.
|
|
@return lock type */
|
|
const char*
|
|
lock_get_type_str(
|
|
/*==============*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
switch (lock_get_type_low(lock)) {
|
|
case LOCK_REC:
|
|
return("RECORD");
|
|
case LOCK_TABLE:
|
|
return("TABLE");
|
|
default:
|
|
return("UNKNOWN");
|
|
}
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the table on which the lock is.
|
|
@return table */
|
|
UNIV_INLINE
|
|
dict_table_t*
|
|
lock_get_table(
|
|
/*===========*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
switch (lock_get_type_low(lock)) {
|
|
case LOCK_REC:
|
|
ut_ad(dict_index_is_clust(lock->index)
|
|
|| !dict_index_is_online_ddl(lock->index));
|
|
return(lock->index->table);
|
|
case LOCK_TABLE:
|
|
return(lock->un_member.tab_lock.table);
|
|
default:
|
|
ut_error;
|
|
return(NULL);
|
|
}
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Gets the id of the table on which the lock is.
|
|
@return id of the table */
|
|
table_id_t
|
|
lock_get_table_id(
|
|
/*==============*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
dict_table_t* table;
|
|
|
|
table = lock_get_table(lock);
|
|
|
|
return(table->id);
|
|
}
|
|
|
|
/** Determine which table a lock is associated with.
|
|
@param[in] lock the lock
|
|
@return name of the table */
|
|
const table_name_t&
|
|
lock_get_table_name(
|
|
const lock_t* lock)
|
|
{
|
|
return(lock_get_table(lock)->name);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
For a record lock, gets the index on which the lock is.
|
|
@return index */
|
|
const dict_index_t*
|
|
lock_rec_get_index(
|
|
/*===============*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
ut_a(lock_get_type_low(lock) == LOCK_REC);
|
|
ut_ad(dict_index_is_clust(lock->index)
|
|
|| !dict_index_is_online_ddl(lock->index));
|
|
|
|
return(lock->index);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
For a record lock, gets the name of the index on which the lock is.
|
|
The string should not be free()'d or modified.
|
|
@return name of the index */
|
|
const char*
|
|
lock_rec_get_index_name(
|
|
/*====================*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
ut_a(lock_get_type_low(lock) == LOCK_REC);
|
|
ut_ad(dict_index_is_clust(lock->index)
|
|
|| !dict_index_is_online_ddl(lock->index));
|
|
|
|
return(lock->index->name);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
For a record lock, gets the tablespace number on which the lock is.
|
|
@return tablespace number */
|
|
ulint
|
|
lock_rec_get_space_id(
|
|
/*==================*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
ut_a(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
return(lock->un_member.rec_lock.space);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
For a record lock, gets the page number on which the lock is.
|
|
@return page number */
|
|
ulint
|
|
lock_rec_get_page_no(
|
|
/*=================*/
|
|
const lock_t* lock) /*!< in: lock */
|
|
{
|
|
ut_a(lock_get_type_low(lock) == LOCK_REC);
|
|
|
|
return(lock->un_member.rec_lock.page_no);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Cancels a waiting lock request and releases possible other transactions
|
|
waiting behind it. */
|
|
void
|
|
lock_cancel_waiting_and_release(
|
|
/*============================*/
|
|
lock_t* lock) /*!< in/out: waiting lock request */
|
|
{
|
|
que_thr_t* thr;
|
|
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(lock->trx));
|
|
|
|
lock->trx->lock.cancel = true;
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
|
|
lock_rec_dequeue_from_page(lock);
|
|
} else {
|
|
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
|
|
|
|
if (lock->trx->autoinc_locks != NULL) {
|
|
/* Release the transaction's AUTOINC locks. */
|
|
lock_release_autoinc_locks(lock->trx);
|
|
}
|
|
|
|
lock_table_dequeue(lock);
|
|
}
|
|
|
|
/* Reset the wait flag and the back pointer to lock in trx. */
|
|
|
|
lock_reset_lock_and_trx_wait(lock);
|
|
|
|
/* The following function releases the trx from lock wait. */
|
|
|
|
thr = que_thr_end_lock_wait(lock->trx);
|
|
|
|
if (thr != NULL) {
|
|
lock_wait_release_thread_if_suspended(thr);
|
|
}
|
|
|
|
lock->trx->lock.cancel = false;
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
|
|
function should be called at the the end of an SQL statement, by the
|
|
connection thread that owns the transaction (trx->mysql_thd). */
|
|
void
|
|
lock_unlock_table_autoinc(
|
|
/*======================*/
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
{
|
|
ut_ad(!lock_mutex_own());
|
|
ut_ad(!trx_mutex_own(trx));
|
|
ut_ad(!trx->lock.wait_lock);
|
|
|
|
/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
|
|
but not COMMITTED transactions. */
|
|
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
|
|
|| trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK)
|
|
|| !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
|
|
|
|
/* This function is invoked for a running transaction by the
|
|
thread that is serving the transaction. Therefore it is not
|
|
necessary to hold trx->mutex here. */
|
|
|
|
if (lock_trx_holds_autoinc_locks(trx)) {
|
|
lock_mutex_enter();
|
|
|
|
lock_release_autoinc_locks(trx);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Releases a transaction's locks, and releases possible other transactions
|
|
waiting because of these locks. Change the state of the transaction to
|
|
TRX_STATE_COMMITTED_IN_MEMORY. */
|
|
void
|
|
lock_trx_release_locks(
|
|
/*===================*/
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
{
|
|
check_trx_state(trx);
|
|
|
|
if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
ut_a(trx_sys->n_prepared_trx > 0);
|
|
--trx_sys->n_prepared_trx;
|
|
|
|
if (trx->is_recovered) {
|
|
ut_a(trx_sys->n_prepared_recovered_trx > 0);
|
|
trx_sys->n_prepared_recovered_trx--;
|
|
}
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
} else {
|
|
ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
|
|
}
|
|
|
|
bool release_lock;
|
|
|
|
release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
|
|
|
|
/* Don't take lock_sys mutex if trx didn't acquire any lock. */
|
|
if (release_lock) {
|
|
|
|
/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
|
|
is protected by both the lock_sys->mutex and the trx->mutex. */
|
|
lock_mutex_enter();
|
|
}
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
/* The following assignment makes the transaction committed in memory
|
|
and makes its changes to data visible to other transactions.
|
|
NOTE that there is a small discrepancy from the strict formal
|
|
visibility rules here: a human user of the database can see
|
|
modifications made by another transaction T even before the necessary
|
|
log segment has been flushed to the disk. If the database happens to
|
|
crash before the flush, the user has seen modifications from T which
|
|
will never be a committed transaction. However, any transaction T2
|
|
which sees the modifications of the committing transaction T, and
|
|
which also itself makes modifications to the database, will get an lsn
|
|
larger than the committing transaction T. In the case where the log
|
|
flush fails, and T never gets committed, also T2 will never get
|
|
committed. */
|
|
|
|
/*--------------------------------------*/
|
|
trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
|
|
/*--------------------------------------*/
|
|
|
|
if (trx_is_referenced(trx)) {
|
|
|
|
ut_a(release_lock);
|
|
|
|
lock_mutex_exit();
|
|
|
|
while (trx_is_referenced(trx)) {
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
DEBUG_SYNC_C("waiting_trx_is_not_referenced");
|
|
|
|
/** Doing an implicit to explicit conversion
|
|
should not be expensive. */
|
|
ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
|
|
|
|
trx_mutex_enter(trx);
|
|
}
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
lock_mutex_enter();
|
|
|
|
trx_mutex_enter(trx);
|
|
}
|
|
|
|
ut_ad(!trx_is_referenced(trx));
|
|
|
|
/* If the background thread trx_rollback_or_clean_recovered()
|
|
is still active then there is a chance that the rollback
|
|
thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
|
|
to clean it up calling trx_cleanup_at_db_startup(). This can
|
|
happen in the case we are committing a trx here that is left
|
|
in PREPARED state during the crash. Note that commit of the
|
|
rollback of a PREPARED trx happens in the recovery thread
|
|
while the rollback of other transactions happen in the
|
|
background thread. To avoid this race we unconditionally unset
|
|
the is_recovered flag. */
|
|
|
|
trx->is_recovered = false;
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
if (release_lock) {
|
|
|
|
lock_release(trx);
|
|
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
trx->lock.n_rec_locks = 0;
|
|
|
|
/* We don't remove the locks one by one from the vector for
|
|
efficiency reasons. We simply reset it because we would have
|
|
released all the locks anyway. */
|
|
|
|
trx->lock.table_locks.clear();
|
|
|
|
ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
|
|
ut_a(ib_vector_is_empty(trx->autoinc_locks));
|
|
ut_a(trx->lock.table_locks.empty());
|
|
|
|
mem_heap_empty(trx->lock.lock_heap);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Check whether the transaction has already been rolled back because it
|
|
was selected as a deadlock victim, or if it has to wait then cancel
|
|
the wait lock.
|
|
@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
|
|
dberr_t
|
|
lock_trx_handle_wait(
|
|
/*=================*/
|
|
trx_t* trx, /*!< in/out: trx lock state */
|
|
bool lock_mutex_taken,
|
|
bool trx_mutex_taken)
|
|
{
|
|
dberr_t err=DB_SUCCESS;
|
|
bool take_lock_mutex = false;
|
|
bool take_trx_mutex = false;
|
|
|
|
if (!lock_mutex_taken) {
|
|
ut_ad(!lock_mutex_own());
|
|
lock_mutex_enter();
|
|
take_lock_mutex = true;
|
|
}
|
|
|
|
if (!trx_mutex_taken) {
|
|
ut_ad(!trx_mutex_own(trx));
|
|
trx_mutex_enter(trx);
|
|
take_trx_mutex = true;
|
|
}
|
|
|
|
if (trx->lock.was_chosen_as_deadlock_victim) {
|
|
err = DB_DEADLOCK;
|
|
} else if (trx->lock.wait_lock != NULL) {
|
|
bool take_wait_trx_mutex = false;
|
|
trx_t* wait_trx = trx->lock.wait_lock->trx;
|
|
|
|
/* We take trx mutex for waiting trx if we have not yet
|
|
already taken it or we know that waiting trx and parameter
|
|
trx are not same and we are not already holding trx mutex. */
|
|
if ((wait_trx && wait_trx == trx && !take_trx_mutex && !trx_mutex_taken) ||
|
|
(wait_trx && wait_trx != trx && wait_trx->abort_type == TRX_SERVER_ABORT)) {
|
|
ut_ad(!trx_mutex_own(wait_trx));
|
|
trx_mutex_enter(wait_trx);
|
|
take_wait_trx_mutex = true;
|
|
}
|
|
|
|
ut_ad(trx_mutex_own(wait_trx));
|
|
|
|
lock_cancel_waiting_and_release(trx->lock.wait_lock);
|
|
|
|
if (wait_trx && take_wait_trx_mutex) {
|
|
ut_ad(trx_mutex_own(wait_trx));
|
|
trx_mutex_exit(wait_trx);
|
|
}
|
|
|
|
err = DB_LOCK_WAIT;
|
|
} else {
|
|
/* The lock was probably granted before we got here. */
|
|
err = DB_SUCCESS;
|
|
}
|
|
|
|
if (take_lock_mutex) {
|
|
ut_ad(lock_mutex_own());
|
|
lock_mutex_exit();
|
|
}
|
|
|
|
if (take_trx_mutex) {
|
|
ut_ad(trx_mutex_own(trx));
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT
|
|
|| err == DB_DEADLOCK);
|
|
|
|
return(err);
|
|
}
|
|
|
|
/*********************************************************************//**
|
|
Get the number of locks on a table.
|
|
@return number of locks */
|
|
ulint
|
|
lock_table_get_n_locks(
|
|
/*===================*/
|
|
const dict_table_t* table) /*!< in: table */
|
|
{
|
|
ulint n_table_locks;
|
|
|
|
lock_mutex_enter();
|
|
|
|
n_table_locks = UT_LIST_GET_LEN(table->locks);
|
|
|
|
lock_mutex_exit();
|
|
|
|
return(n_table_locks);
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*******************************************************************//**
|
|
Do an exhaustive check for any locks (table or rec) against the table.
|
|
@return lock if found */
|
|
static
|
|
const lock_t*
|
|
lock_table_locks_lookup(
|
|
/*====================*/
|
|
const dict_table_t* table, /*!< in: check if there are
|
|
any locks held on records in
|
|
this table or on the table
|
|
itself */
|
|
const trx_ut_list_t* trx_list) /*!< in: trx list to check */
|
|
{
|
|
trx_t* trx;
|
|
|
|
ut_a(table != NULL);
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_sys_mutex_own());
|
|
|
|
for (trx = UT_LIST_GET_FIRST(*trx_list);
|
|
trx != NULL;
|
|
trx = UT_LIST_GET_NEXT(trx_list, trx)) {
|
|
|
|
const lock_t* lock;
|
|
|
|
check_trx_state(trx);
|
|
|
|
for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
|
|
lock != NULL;
|
|
lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
|
|
|
|
ut_a(lock->trx == trx);
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
ut_ad(!dict_index_is_online_ddl(lock->index)
|
|
|| dict_index_is_clust(lock->index));
|
|
if (lock->index->table == table) {
|
|
return(lock);
|
|
}
|
|
} else if (lock->un_member.tab_lock.table == table) {
|
|
return(lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/*******************************************************************//**
|
|
Check if there are any locks (table or rec) against table.
|
|
@return true if table has either table or record locks. */
|
|
bool
|
|
lock_table_has_locks(
|
|
/*=================*/
|
|
const dict_table_t* table) /*!< in: check if there are any locks
|
|
held on records in this table or on the
|
|
table itself */
|
|
{
|
|
ibool has_locks;
|
|
|
|
lock_mutex_enter();
|
|
|
|
has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if (!has_locks) {
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
lock_mutex_exit();
|
|
|
|
return(has_locks);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Initialise the table lock list. */
|
|
void
|
|
lock_table_lock_list_init(
|
|
/*======================*/
|
|
table_lock_list_t* lock_list) /*!< List to initialise */
|
|
{
|
|
UT_LIST_INIT(*lock_list, &lock_table_t::locks);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Initialise the trx lock list. */
|
|
void
|
|
lock_trx_lock_list_init(
|
|
/*====================*/
|
|
trx_lock_list_t* lock_list) /*!< List to initialise */
|
|
{
|
|
UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Set the lock system timeout event. */
|
|
void
|
|
lock_set_timeout_event()
|
|
/*====================*/
|
|
{
|
|
os_event_set(lock_sys->timeout_event);
|
|
}
|
|
|
|
#ifdef UNIV_DEBUG
|
|
/*******************************************************************//**
|
|
Check if the transaction holds any locks on the sys tables
|
|
or its records.
|
|
@return the strongest lock found on any sys table or 0 for none */
|
|
const lock_t*
|
|
lock_trx_has_sys_table_locks(
|
|
/*=========================*/
|
|
const trx_t* trx) /*!< in: transaction to check */
|
|
{
|
|
const lock_t* strongest_lock = 0;
|
|
lock_mode strongest = LOCK_NONE;
|
|
|
|
lock_mutex_enter();
|
|
|
|
typedef lock_pool_t::const_reverse_iterator iterator;
|
|
|
|
iterator end = trx->lock.table_locks.rend();
|
|
iterator it = trx->lock.table_locks.rbegin();
|
|
|
|
/* Find a valid mode. Note: ib_vector_size() can be 0. */
|
|
|
|
for (/* No op */; it != end; ++it) {
|
|
const lock_t* lock = *it;
|
|
|
|
if (lock != NULL
|
|
&& dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
|
|
|
|
strongest = lock_get_mode(lock);
|
|
ut_ad(strongest != LOCK_NONE);
|
|
strongest_lock = lock;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (strongest == LOCK_NONE) {
|
|
lock_mutex_exit();
|
|
return(NULL);
|
|
}
|
|
|
|
for (/* No op */; it != end; ++it) {
|
|
const lock_t* lock = *it;
|
|
|
|
if (lock == NULL) {
|
|
continue;
|
|
}
|
|
|
|
ut_ad(trx == lock->trx);
|
|
ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
|
|
ut_ad(lock->un_member.tab_lock.table != NULL);
|
|
|
|
lock_mode mode = lock_get_mode(lock);
|
|
|
|
if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
|
|
&& lock_mode_stronger_or_eq(mode, strongest)) {
|
|
|
|
strongest = mode;
|
|
strongest_lock = lock;
|
|
}
|
|
}
|
|
|
|
lock_mutex_exit();
|
|
|
|
return(strongest_lock);
|
|
}
|
|
|
|
/*******************************************************************//**
|
|
Check if the transaction holds an exclusive lock on a record.
|
|
@return whether the locks are held */
|
|
bool
|
|
lock_trx_has_rec_x_lock(
|
|
/*====================*/
|
|
const trx_t* trx, /*!< in: transaction to check */
|
|
const dict_table_t* table, /*!< in: table to check */
|
|
const buf_block_t* block, /*!< in: buffer block of the record */
|
|
ulint heap_no)/*!< in: record heap number */
|
|
{
|
|
ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
lock_mutex_enter();
|
|
ut_a(lock_table_has(trx, table, LOCK_IX)
|
|
|| dict_table_is_temporary(table));
|
|
ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
|
|
block, heap_no, trx)
|
|
|| dict_table_is_temporary(table));
|
|
lock_mutex_exit();
|
|
return(true);
|
|
}
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/** rewind(3) the file used for storing the latest detected deadlock and
|
|
print a heading message to stderr if printing of all deadlocks to stderr
|
|
is enabled. */
|
|
void
|
|
DeadlockChecker::start_print()
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
rewind(lock_latest_err_file);
|
|
ut_print_timestamp(lock_latest_err_file);
|
|
|
|
if (srv_print_all_deadlocks) {
|
|
ib::info() << "Transactions deadlock detected, dumping"
|
|
<< " detailed information.";
|
|
}
|
|
}
|
|
|
|
/** Print a message to the deadlock file and possibly to stderr.
|
|
@param msg message to print */
|
|
void
|
|
DeadlockChecker::print(const char* msg)
|
|
{
|
|
fputs(msg, lock_latest_err_file);
|
|
|
|
if (srv_print_all_deadlocks) {
|
|
ib::info() << msg;
|
|
}
|
|
}
|
|
|
|
/** Print transaction data to the deadlock file and possibly to stderr.
|
|
@param trx transaction
|
|
@param max_query_len max query length to print */
|
|
void
|
|
DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
|
|
ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
|
|
ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
|
|
|
|
mutex_enter(&trx_sys->mutex);
|
|
|
|
trx_print_low(lock_latest_err_file, trx, max_query_len,
|
|
n_rec_locks, n_trx_locks, heap_size);
|
|
|
|
if (srv_print_all_deadlocks) {
|
|
trx_print_low(stderr, trx, max_query_len,
|
|
n_rec_locks, n_trx_locks, heap_size);
|
|
}
|
|
|
|
mutex_exit(&trx_sys->mutex);
|
|
}
|
|
|
|
/** Print lock data to the deadlock file and possibly to stderr.
|
|
@param lock record or table type lock */
|
|
void
|
|
DeadlockChecker::print(const lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
lock_rec_print(lock_latest_err_file, lock);
|
|
|
|
if (srv_print_all_deadlocks) {
|
|
lock_rec_print(stderr, lock);
|
|
}
|
|
} else {
|
|
lock_table_print(lock_latest_err_file, lock);
|
|
|
|
if (srv_print_all_deadlocks) {
|
|
lock_table_print(stderr, lock);
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Get the next lock in the queue that is owned by a transaction whose
|
|
sub-tree has not already been searched.
|
|
Note: "next" here means PREV for table locks.
|
|
|
|
@param lock Lock in queue
|
|
@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
|
|
|
|
@return next lock or NULL if at end of queue */
|
|
const lock_t*
|
|
DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
do {
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
ut_ad(heap_no != ULINT_UNDEFINED);
|
|
lock = lock_rec_get_next_const(heap_no, lock);
|
|
} else {
|
|
ut_ad(heap_no == ULINT_UNDEFINED);
|
|
ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
|
|
|
|
lock = UT_LIST_GET_NEXT(
|
|
un_member.tab_lock.locks, lock);
|
|
}
|
|
|
|
} while (lock != NULL && is_visited(lock));
|
|
|
|
ut_ad(lock == NULL
|
|
|| lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/** Get the first lock to search. The search starts from the current
|
|
wait_lock. What we are really interested in is an edge from the
|
|
current wait_lock's owning transaction to another transaction that has
|
|
a lock ahead in the queue. We skip locks where the owning transaction's
|
|
sub-tree has already been searched.
|
|
|
|
Note: The record locks are traversed from the oldest lock to the
|
|
latest. For table locks we go from latest to oldest.
|
|
|
|
For record locks, we first position the "iterator" on the first lock on
|
|
the page and then reposition on the actual heap_no. This is required
|
|
due to the way the record lock has is implemented.
|
|
|
|
@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
|
|
@return first lock or NULL */
|
|
const lock_t*
|
|
DeadlockChecker::get_first_lock(ulint* heap_no) const
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
const lock_t* lock = m_wait_lock;
|
|
|
|
if (lock_get_type_low(lock) == LOCK_REC) {
|
|
hash_table_t* lock_hash;
|
|
|
|
lock_hash = lock->type_mode & LOCK_PREDICATE
|
|
? lock_sys->prdt_hash
|
|
: lock_sys->rec_hash;
|
|
|
|
/* We are only interested in records that match the heap_no. */
|
|
*heap_no = lock_rec_find_set_bit(lock);
|
|
|
|
ut_ad(*heap_no <= 0xffff);
|
|
ut_ad(*heap_no != ULINT_UNDEFINED);
|
|
|
|
/* Find the locks on the page. */
|
|
lock = lock_rec_get_first_on_page_addr(
|
|
lock_hash,
|
|
lock->un_member.rec_lock.space,
|
|
lock->un_member.rec_lock.page_no);
|
|
|
|
/* Position on the first lock on the physical record.*/
|
|
if (!lock_rec_get_nth_bit(lock, *heap_no)) {
|
|
lock = lock_rec_get_next_const(*heap_no, lock);
|
|
}
|
|
|
|
ut_a(!lock_get_wait(lock));
|
|
} else {
|
|
/* Table locks don't care about the heap_no. */
|
|
*heap_no = ULINT_UNDEFINED;
|
|
ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
|
|
dict_table_t* table = lock->un_member.tab_lock.table;
|
|
lock = UT_LIST_GET_FIRST(table->locks);
|
|
}
|
|
|
|
/* Must find at least two locks, otherwise there cannot be a
|
|
waiting lock, secondly the first lock cannot be the wait_lock. */
|
|
ut_a(lock != NULL);
|
|
ut_a(lock != m_wait_lock ||
|
|
(innodb_lock_schedule_algorithm
|
|
== INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
|
|
&& !thd_is_replication_slave_thread(lock->trx->mysql_thd)));
|
|
|
|
/* Check that the lock type doesn't change. */
|
|
ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
|
|
|
|
return(lock);
|
|
}
|
|
|
|
/** Notify that a deadlock has been detected and print the conflicting
|
|
transaction info.
|
|
@param lock lock causing deadlock */
|
|
void
|
|
DeadlockChecker::notify(const lock_t* lock) const
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
start_print();
|
|
|
|
print("\n*** (1) TRANSACTION:\n");
|
|
|
|
print(m_wait_lock->trx, 3000);
|
|
|
|
print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
|
|
|
|
print(m_wait_lock);
|
|
|
|
print("*** (2) TRANSACTION:\n");
|
|
|
|
print(lock->trx, 3000);
|
|
|
|
print("*** (2) HOLDS THE LOCK(S):\n");
|
|
|
|
print(lock);
|
|
|
|
/* It is possible that the joining transaction was granted its
|
|
lock when we rolled back some other waiting transaction. */
|
|
|
|
if (m_start->lock.wait_lock != 0) {
|
|
print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
|
|
|
|
print(m_start->lock.wait_lock);
|
|
}
|
|
|
|
DBUG_PRINT("ib_lock", ("deadlock detected"));
|
|
}
|
|
|
|
/** Select the victim transaction that should be rolledback.
|
|
@return victim transaction */
|
|
const trx_t*
|
|
DeadlockChecker::select_victim() const
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(m_start->lock.wait_lock != 0);
|
|
ut_ad(m_wait_lock->trx != m_start);
|
|
|
|
if (thd_trx_priority(m_start->mysql_thd) > 0
|
|
|| thd_trx_priority(m_wait_lock->trx->mysql_thd) > 0) {
|
|
|
|
const trx_t* victim;
|
|
|
|
victim = trx_arbitrate(m_start, m_wait_lock->trx);
|
|
|
|
if (victim != NULL) {
|
|
|
|
return(victim);
|
|
}
|
|
}
|
|
|
|
if (trx_weight_ge(m_wait_lock->trx, m_start)) {
|
|
|
|
/* The joining transaction is 'smaller',
|
|
choose it as the victim and roll it back. */
|
|
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE)) {
|
|
return(m_wait_lock->trx);
|
|
} else {
|
|
#endif /* WITH_WSREP */
|
|
return(m_start);
|
|
#ifdef WITH_WSREP
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#ifdef WITH_WSREP
|
|
if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE)) {
|
|
return(m_start);
|
|
} else {
|
|
#endif /* WITH_WSREP */
|
|
return(m_wait_lock->trx);
|
|
#ifdef WITH_WSREP
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/** Looks iteratively for a deadlock. Note: the joining transaction may
|
|
have been granted its lock by the deadlock checks.
|
|
@return 0 if no deadlock else the victim transaction instance.*/
|
|
const trx_t*
|
|
DeadlockChecker::search()
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(!trx_mutex_own(m_start));
|
|
|
|
ut_ad(m_start != NULL);
|
|
ut_ad(m_wait_lock != NULL);
|
|
check_trx_state(m_wait_lock->trx);
|
|
ut_ad(m_mark_start <= s_lock_mark_counter);
|
|
|
|
/* Look at the locks ahead of wait_lock in the lock queue. */
|
|
ulint heap_no;
|
|
const lock_t* lock = get_first_lock(&heap_no);
|
|
|
|
for (;;) {
|
|
|
|
/* We should never visit the same sub-tree more than once. */
|
|
ut_ad(lock == NULL || !is_visited(lock));
|
|
|
|
while (m_n_elems > 0 && lock == NULL) {
|
|
|
|
/* Restore previous search state. */
|
|
|
|
pop(lock, heap_no);
|
|
|
|
lock = get_next_lock(lock, heap_no);
|
|
}
|
|
|
|
if (lock == NULL) {
|
|
break;
|
|
} else if (lock == m_wait_lock) {
|
|
|
|
/* We can mark this subtree as searched */
|
|
ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
|
|
|
|
lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
|
|
|
|
/* We are not prepared for an overflow. This 64-bit
|
|
counter should never wrap around. At 10^9 increments
|
|
per second, it would take 10^3 years of uptime. */
|
|
|
|
ut_ad(s_lock_mark_counter > 0);
|
|
|
|
/* Backtrack */
|
|
lock = NULL;
|
|
|
|
} else if (!lock_has_to_wait(m_wait_lock, lock)) {
|
|
|
|
/* No conflict, next lock */
|
|
lock = get_next_lock(lock, heap_no);
|
|
|
|
} else if (lock->trx == m_start) {
|
|
|
|
/* Found a cycle. */
|
|
|
|
notify(lock);
|
|
|
|
return(select_victim());
|
|
|
|
} else if (is_too_deep()) {
|
|
|
|
/* Search too deep to continue. */
|
|
m_too_deep = true;
|
|
return(m_start);
|
|
|
|
} else {
|
|
/* We do not need to report autoinc locks to the upper
|
|
layer. These locks are released before commit, so they
|
|
can not cause deadlocks with binlog-fixed commit
|
|
order. */
|
|
if (m_report_waiters &&
|
|
(lock_get_type_low(lock) != LOCK_TABLE ||
|
|
lock_get_mode(lock) != LOCK_AUTO_INC)) {
|
|
thd_rpl_deadlock_check(m_start->mysql_thd,
|
|
lock->trx->mysql_thd);
|
|
}
|
|
|
|
if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
|
|
|
|
/* Another trx ahead has requested a lock in an
|
|
incompatible mode, and is itself waiting for a lock. */
|
|
|
|
++m_cost;
|
|
|
|
if (!push(lock, heap_no)) {
|
|
m_too_deep = true;
|
|
return(m_start);
|
|
}
|
|
|
|
|
|
m_wait_lock = lock->trx->lock.wait_lock;
|
|
|
|
lock = get_first_lock(&heap_no);
|
|
|
|
if (is_visited(lock)) {
|
|
lock = get_next_lock(lock, heap_no);
|
|
}
|
|
|
|
} else {
|
|
lock = get_next_lock(lock, heap_no);
|
|
}
|
|
}
|
|
}
|
|
|
|
ut_a(lock == NULL && m_n_elems == 0);
|
|
|
|
/* No deadlock found. */
|
|
return(0);
|
|
}
|
|
|
|
/** Print info about transaction that was rolled back.
|
|
@param trx transaction rolled back
|
|
@param lock lock trx wants */
|
|
void
|
|
DeadlockChecker::rollback_print(const trx_t* trx, const lock_t* lock)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
/* If the lock search exceeds the max step
|
|
or the max depth, the current trx will be
|
|
the victim. Print its information. */
|
|
start_print();
|
|
|
|
print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
|
|
" WAITS-FOR GRAPH, WE WILL ROLL BACK"
|
|
" FOLLOWING TRANSACTION \n\n"
|
|
"*** TRANSACTION:\n");
|
|
|
|
print(trx, 3000);
|
|
|
|
print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
|
|
|
|
print(lock);
|
|
}
|
|
|
|
/** Rollback transaction selected as the victim. */
|
|
void
|
|
DeadlockChecker::trx_rollback()
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
|
|
trx_t* trx = m_wait_lock->trx;
|
|
|
|
print("*** WE ROLL BACK TRANSACTION (1)\n");
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
trx->lock.was_chosen_as_deadlock_victim = true;
|
|
|
|
lock_cancel_waiting_and_release(trx->lock.wait_lock);
|
|
|
|
trx_mutex_exit(trx);
|
|
}
|
|
|
|
/** Checks if a joining lock request results in a deadlock. If a deadlock is
|
|
found this function will resolve the deadlock by choosing a victim transaction
|
|
and rolling it back. It will attempt to resolve all deadlocks. The returned
|
|
transaction id will be the joining transaction instance or NULL if some other
|
|
transaction was chosen as a victim and rolled back or no deadlock found.
|
|
|
|
@param[in] lock lock the transaction is requesting
|
|
@param[in,out] trx transaction requesting the lock
|
|
|
|
@return transaction instanace chosen as victim or 0 */
|
|
const trx_t*
|
|
DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
|
|
{
|
|
ut_ad(lock_mutex_own());
|
|
ut_ad(trx_mutex_own(trx));
|
|
check_trx_state(trx);
|
|
ut_ad(!srv_read_only_mode);
|
|
|
|
/* If transaction is marked for ASYNC rollback then we should
|
|
not allow it to wait for another lock causing possible deadlock.
|
|
We return current transaction as deadlock victim here. */
|
|
if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
|
|
return(trx);
|
|
} else if (!innobase_deadlock_detect) {
|
|
return(NULL);
|
|
}
|
|
|
|
/* Release the mutex to obey the latching order.
|
|
This is safe, because DeadlockChecker::check_and_resolve()
|
|
is invoked when a lock wait is enqueued for the currently
|
|
running transaction. Because m_trx is a running transaction
|
|
(it is not currently suspended because of a lock wait),
|
|
its state can only be changed by this thread, which is
|
|
currently associated with the transaction. */
|
|
|
|
trx_mutex_exit(trx);
|
|
|
|
const trx_t* victim_trx;
|
|
THD* start_mysql_thd;
|
|
bool report_waits = false;
|
|
|
|
start_mysql_thd = trx->mysql_thd;
|
|
|
|
if (start_mysql_thd && thd_need_wait_reports(start_mysql_thd))
|
|
report_waits = true;
|
|
|
|
/* Try and resolve as many deadlocks as possible. */
|
|
do {
|
|
DeadlockChecker checker(trx, lock, s_lock_mark_counter, report_waits);
|
|
|
|
victim_trx = checker.search();
|
|
|
|
/* Search too deep, we rollback the joining transaction only
|
|
if it is possible to rollback. Otherwise we rollback the
|
|
transaction that is holding the lock that the joining
|
|
transaction wants. */
|
|
if (checker.is_too_deep()) {
|
|
|
|
ut_ad(trx == checker.m_start);
|
|
ut_ad(trx == victim_trx);
|
|
|
|
#ifdef WITH_WSREP
|
|
if (!wsrep_thd_is_BF(victim_trx->mysql_thd, TRUE))
|
|
{
|
|
#endif /* WITH_WSREP */
|
|
rollback_print(victim_trx, lock);
|
|
#ifdef WITH_WSREP
|
|
} else {
|
|
/* BF processor */;
|
|
}
|
|
#endif /* WITH_WSREP */
|
|
|
|
MONITOR_INC(MONITOR_DEADLOCK);
|
|
|
|
break;
|
|
|
|
} else if (victim_trx != NULL && victim_trx != trx) {
|
|
|
|
ut_ad(victim_trx == checker.m_wait_lock->trx);
|
|
|
|
checker.trx_rollback();
|
|
|
|
lock_deadlock_found = true;
|
|
|
|
MONITOR_INC(MONITOR_DEADLOCK);
|
|
}
|
|
|
|
} while (victim_trx != NULL && victim_trx != trx);
|
|
|
|
/* If the joining transaction was selected as the victim. */
|
|
if (victim_trx != NULL) {
|
|
|
|
print("*** WE ROLL BACK TRANSACTION (2)\n");
|
|
|
|
lock_deadlock_found = true;
|
|
}
|
|
|
|
trx_mutex_enter(trx);
|
|
|
|
return(victim_trx);
|
|
}
|
|
|
|
/**
|
|
Allocate cached locks for the transaction.
|
|
@param trx allocate cached record locks for this transaction */
|
|
void
|
|
lock_trx_alloc_locks(trx_t* trx)
|
|
{
|
|
ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
|
|
byte* ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
|
|
|
|
/* We allocate one big chunk and then distribute it among
|
|
the rest of the elements. The allocated chunk pointer is always
|
|
at index 0. */
|
|
|
|
for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
|
|
trx->lock.rec_pool.push_back(
|
|
reinterpret_cast<ib_lock_t*>(ptr));
|
|
}
|
|
|
|
sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
|
|
ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
|
|
|
|
for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
|
|
trx->lock.table_pool.push_back(
|
|
reinterpret_cast<ib_lock_t*>(ptr));
|
|
}
|
|
|
|
}
|
|
/*************************************************************//**
|
|
Updates the lock table when a page is split and merged to
|
|
two pages. */
|
|
UNIV_INTERN
|
|
void
|
|
lock_update_split_and_merge(
|
|
const buf_block_t* left_block, /*!< in: left page to which merged */
|
|
const rec_t* orig_pred, /*!< in: original predecessor of
|
|
supremum on the left page before merge*/
|
|
const buf_block_t* right_block) /*!< in: right page from which merged */
|
|
{
|
|
const rec_t* left_next_rec;
|
|
|
|
ut_a(left_block && right_block);
|
|
ut_a(orig_pred);
|
|
|
|
lock_mutex_enter();
|
|
|
|
left_next_rec = page_rec_get_next_const(orig_pred);
|
|
|
|
/* Inherit the locks on the supremum of the left page to the
|
|
first record which was moved from the right page */
|
|
lock_rec_inherit_to_gap(
|
|
left_block, left_block,
|
|
page_rec_get_heap_no(left_next_rec),
|
|
PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
/* Reset the locks on the supremum of the left page,
|
|
releasing waiting transactions */
|
|
lock_rec_reset_and_release_wait(left_block,
|
|
PAGE_HEAP_NO_SUPREMUM);
|
|
|
|
/* Inherit the locks to the supremum of the left page from the
|
|
successor of the infimum on the right page */
|
|
lock_rec_inherit_to_gap(left_block, right_block,
|
|
PAGE_HEAP_NO_SUPREMUM,
|
|
lock_get_min_heap_no(right_block));
|
|
|
|
lock_mutex_exit();
|
|
}
|