2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-04-22 10:50:45 +02:00
|
|
|
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
2019-05-03 16:47:07 +03:00
|
|
|
Copyright (c) 2017, 2019, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 19:25:02 +03:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file include/row0vers.h
|
|
|
|
Row versions
|
|
|
|
|
|
|
|
Created 2/6/1997 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#ifndef row0vers_h
|
|
|
|
#define row0vers_h
|
|
|
|
|
|
|
|
#include "data0data.h"
|
|
|
|
#include "trx0types.h"
|
|
|
|
#include "que0types.h"
|
|
|
|
#include "rem0types.h"
|
|
|
|
#include "mtr0mtr.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
#include "dict0mem.h"
|
2018-07-06 17:13:53 +03:00
|
|
|
#include "row0types.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
// Forward declaration
|
|
|
|
class ReadView;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2017-12-11 12:37:19 +02:00
|
|
|
/** Determine if an active transaction has inserted or modified a secondary
|
2014-02-26 19:11:54 +01:00
|
|
|
index record.
|
2017-12-13 15:40:41 +04:00
|
|
|
@param[in,out] caller_trx trx of current thread
|
2017-12-11 12:37:19 +02:00
|
|
|
@param[in] rec secondary index record
|
|
|
|
@param[in] index secondary index
|
|
|
|
@param[in] offsets rec_get_offsets(rec, index)
|
MDEV-15326: InnoDB: Failing assertion: !other_lock
MySQL 5.7.9 (and MariaDB 10.2.2) introduced a race condition
between InnoDB transaction commit and the conversion of implicit
locks into explicit ones.
The assertion failure can be triggered with a test that runs
3 concurrent single-statement transactions in a loop on a simple
table:
CREATE TABLE t (a INT PRIMARY KEY) ENGINE=InnoDB;
thread1: INSERT INTO t SET a=1;
thread2: DELETE FROM t;
thread3: SELECT * FROM t FOR UPDATE; -- or DELETE FROM t;
The failure scenarios are like the following:
(1) The INSERT statement is being committed, waiting for lock_sys->mutex.
(2) At the time of the failure, both the DELETE and SELECT transactions
are active but have not logged any changes yet.
(3) The transaction where the !other_lock assertion fails started
lock_rec_convert_impl_to_expl().
(4) After this point, the commit of the INSERT removed the transaction from
trx_sys->rw_trx_set, in trx_erase_lists().
(5) The other transaction consulted trx_sys->rw_trx_set and determined
that there is no implicit lock. Hence, it grabbed the lock.
(6) The !other_lock assertion fails in lock_rec_add_to_queue()
for the lock_rec_convert_impl_to_expl(), because the lock was 'stolen'.
This assertion failure looks genuine, because the INSERT transaction
is still active (trx->state=TRX_STATE_ACTIVE).
The problematic step (4) was introduced in
mysql/mysql-server@e27e0e0bb75b4d35e87059816f1cc370c09890ad
which fixed something related to MVCC (covered by the test
innodb.innodb-read-view). Basically, it reintroduced an error
that had been mentioned in an earlier commit
mysql/mysql-server@a17be6963fc0d9210fa0642d3985b7219cdaf0c5:
"The active transaction was removed from trx_sys->rw_trx_set prematurely."
Our fix goes along the following lines:
(a) Implicit locks will released by assigning
trx->state=TRX_STATE_COMMITTED_IN_MEMORY as the first step.
This transition will no longer be protected by lock_sys_t::mutex,
only by trx->mutex. This idea is by Sergey Vojtovich.
(b) We detach the transaction from trx_sys before starting to release
explicit locks.
(c) All callers of trx_rw_is_active() and trx_rw_is_active_low() must
recheck trx->state after acquiring trx->mutex.
(d) Before releasing any explicit locks, we will ensure that any activity
by other threads to convert implicit locks into explicit will have ceased,
by checking !trx_is_referenced(trx). There was a glitch
in this check when it was part of lock_trx_release_locks(); at the end
we would release trx->mutex and acquire lock_sys->mutex and trx->mutex,
and fail to recheck (trx_is_referenced() is protected by trx_t::mutex).
(e) Explicit locks can be released in batches (LOCK_RELEASE_INTERVAL=1000)
just like we did before.
trx_t::state: Document that the transition to COMMITTED is only
protected by trx_t::mutex, no longer by lock_sys_t::mutex.
trx_rw_is_active_low(), trx_rw_is_active(): Document that the transaction
state should be rechecked after acquiring trx_t::mutex.
trx_t::commit_state(): New function to change a transaction to committed
state, to release implicit locks.
trx_t::release_locks(): New function to release the explicit locks
after commit_state().
lock_trx_release_locks(): Move much of the logic to the caller
(which must invoke trx_t::commit_state() and trx_t::release_locks()
as needed), and assert that the transaction will have locks.
trx_get_trx_by_xid(): Make the parameter a pointer to const.
lock_rec_other_trx_holds_expl(): Recheck trx->state after acquiring
trx->mutex, and avoid a redundant lookup of the transaction.
lock_rec_queue_validate(): Recheck impl_trx->state while holding
impl_trx->mutex.
row_vers_impl_x_locked(), row_vers_impl_x_locked_low():
Document that the transaction state must be rechecked after
trx_mutex_enter().
trx_free_prepared(): Adjust for the changes to lock_trx_release_locks().
2019-09-03 12:31:37 +03:00
|
|
|
@return the active transaction; state must be rechecked after
|
2019-09-03 13:04:05 +03:00
|
|
|
trx_mutex_enter(), and trx->release_reference() must be invoked
|
2017-12-11 12:37:19 +02:00
|
|
|
@retval NULL if the record was committed */
|
2016-08-12 11:17:45 +03:00
|
|
|
trx_t*
|
2014-02-26 19:11:54 +01:00
|
|
|
row_vers_impl_x_locked(
|
2017-12-13 15:40:41 +04:00
|
|
|
trx_t* caller_trx,
|
2017-12-11 12:37:19 +02:00
|
|
|
const rec_t* rec,
|
|
|
|
dict_index_t* index,
|
2020-04-28 10:46:51 +10:00
|
|
|
const rec_offs* offsets);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2018-07-06 17:13:53 +03:00
|
|
|
/** Finds out if a version of the record, where the version >= the current
|
2014-02-26 19:11:54 +01:00
|
|
|
purge view, should have ientry as its secondary index entry. We check
|
|
|
|
if there is any not delete marked version of the record where the trx
|
|
|
|
id >= purge view, and the secondary index entry == ientry; exactly in
|
|
|
|
this case we return TRUE.
|
2018-07-06 17:13:53 +03:00
|
|
|
@param[in] also_curr TRUE if also rec is included in the versions
|
|
|
|
to search; otherwise only versions prior
|
|
|
|
to it are searched
|
|
|
|
@param[in] rec record in the clustered index; the caller
|
|
|
|
must have a latch on the page
|
|
|
|
@param[in] mtr mtr holding the latch on rec; it will
|
|
|
|
also hold the latch on purge_view
|
|
|
|
@param[in] index secondary index
|
|
|
|
@param[in] ientry secondary index entry
|
|
|
|
@param[in] roll_ptr roll_ptr for the purge record
|
|
|
|
@param[in] trx_id transaction ID on the purging record
|
2016-08-12 11:17:45 +03:00
|
|
|
@return TRUE if earlier version should have */
|
2018-07-06 17:13:53 +03:00
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
row_vers_old_has_index_entry(
|
2018-07-06 17:13:53 +03:00
|
|
|
bool also_curr,
|
|
|
|
const rec_t* rec,
|
|
|
|
mtr_t* mtr,
|
|
|
|
dict_index_t* index,
|
|
|
|
const dtuple_t* ientry,
|
|
|
|
roll_ptr_t roll_ptr,
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 15:42:50 +02:00
|
|
|
trx_id_t trx_id);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************//**
|
|
|
|
Constructs the version of a clustered index record which a consistent
|
|
|
|
read should see. We assume that the trx id stored in rec is such that
|
|
|
|
the consistent read should not see rec in its present version.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or DB_MISSING_HISTORY */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
row_vers_build_for_consistent_read(
|
|
|
|
/*===============================*/
|
|
|
|
const rec_t* rec, /*!< in: record in a clustered index; the
|
|
|
|
caller must have a latch on the page; this
|
|
|
|
latch locks the top of the stack of versions
|
|
|
|
of this records */
|
|
|
|
mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
|
|
|
|
also hold the latch on purge_view */
|
|
|
|
dict_index_t* index, /*!< in: the clustered index */
|
2020-04-28 10:46:51 +10:00
|
|
|
rec_offs** offsets,/*!< in/out: offsets returned by
|
2014-02-26 19:11:54 +01:00
|
|
|
rec_get_offsets(rec, index) */
|
2016-08-12 11:17:45 +03:00
|
|
|
ReadView* view, /*!< in: the consistent read view */
|
2014-02-26 19:11:54 +01:00
|
|
|
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
|
|
|
|
the offsets are allocated */
|
|
|
|
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
|
|
|
|
*old_vers is allocated; memory for possible
|
|
|
|
intermediate versions is allocated and freed
|
|
|
|
locally within the function */
|
2016-08-12 11:17:45 +03:00
|
|
|
rec_t** old_vers,/*!< out, own: old version, or NULL
|
2014-02-26 19:11:54 +01:00
|
|
|
if the history is missing or the record
|
|
|
|
does not exist in the view, that is,
|
|
|
|
it was freshly inserted afterwards */
|
2019-05-03 16:47:07 +03:00
|
|
|
dtuple_t** vrow); /*!< out: reports virtual column info if any */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/*****************************************************************//**
|
|
|
|
Constructs the last committed version of a clustered index record,
|
|
|
|
which should be seen by a semi-consistent read. */
|
|
|
|
void
|
|
|
|
row_vers_build_for_semi_consistent_read(
|
|
|
|
/*====================================*/
|
2017-12-13 15:40:41 +04:00
|
|
|
trx_t* caller_trx,/*!<in/out: trx of current thread */
|
2014-02-26 19:11:54 +01:00
|
|
|
const rec_t* rec, /*!< in: record in a clustered index; the
|
|
|
|
caller must have a latch on the page; this
|
|
|
|
latch locks the top of the stack of versions
|
|
|
|
of this records */
|
|
|
|
mtr_t* mtr, /*!< in: mtr holding the latch on rec */
|
|
|
|
dict_index_t* index, /*!< in: the clustered index */
|
2020-04-28 10:46:51 +10:00
|
|
|
rec_offs** offsets,/*!< in/out: offsets returned by
|
2014-02-26 19:11:54 +01:00
|
|
|
rec_get_offsets(rec, index) */
|
|
|
|
mem_heap_t** offset_heap,/*!< in/out: memory heap from which
|
|
|
|
the offsets are allocated */
|
|
|
|
mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
|
|
|
|
*old_vers is allocated; memory for possible
|
|
|
|
intermediate versions is allocated and freed
|
|
|
|
locally within the function */
|
2016-08-12 11:17:45 +03:00
|
|
|
const rec_t** old_vers,/*!< out: rec, old version, or NULL if the
|
2014-02-26 19:11:54 +01:00
|
|
|
record does not exist in the view, that is,
|
|
|
|
it was freshly inserted afterwards */
|
2019-05-03 16:47:07 +03:00
|
|
|
dtuple_t** vrow); /*!< out: holds virtual column info if any
|
2016-08-12 11:17:45 +03:00
|
|
|
is updated in the view */
|
2016-09-06 09:43:16 +03:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif
|