2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2018-01-29 14:17:23 +01:00
|
|
|
Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved.
|
2014-02-26 19:11:54 +01:00
|
|
|
Copyright (c) 2012, Facebook Inc.
|
2022-04-11 13:41:24 +02:00
|
|
|
Copyright (c) 2013, 2022, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 17:08:32 +02:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file include/dict0dict.h
|
|
|
|
Data dictionary system
|
|
|
|
|
|
|
|
Created 1/8/1996 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#ifndef dict0dict_h
|
|
|
|
#define dict0dict_h
|
|
|
|
|
|
|
|
#include "data0data.h"
|
2016-08-12 10:17:45 +02:00
|
|
|
#include "dict0mem.h"
|
|
|
|
#include "fsp0fsp.h"
|
2020-11-20 08:31:27 +01:00
|
|
|
#include "srw_lock.h"
|
2021-06-14 08:15:20 +02:00
|
|
|
#include <my_sys.h>
|
2016-08-12 10:17:45 +02:00
|
|
|
#include <deque>
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
class MDL_ticket;
|
2014-07-22 18:31:45 +02:00
|
|
|
|
2018-11-22 14:36:50 +01:00
|
|
|
/** the first table or index ID for other than hard-coded system tables */
|
2019-11-13 13:34:52 +01:00
|
|
|
constexpr uint8_t DICT_HDR_FIRST_ID= 10;
|
2018-11-22 14:36:50 +01:00
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
|
|
|
|
/** Get the database name length in a table name.
|
|
|
|
@param name filename-safe encoded table name "dbname/tablename"
|
2016-08-12 10:17:45 +02:00
|
|
|
@return database name length */
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
inline size_t dict_get_db_name_len(const char *name)
|
|
|
|
{
|
|
|
|
/* table_name_t::dblen() would assert that '/' is contained */
|
|
|
|
if (const char* s= strchr(name, '/'))
|
|
|
|
return size_t(s - name);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Open a table from its database and table name, this is currently used by
|
|
|
|
foreign constraint parser to get the referenced table.
|
|
|
|
@return complete table name with database and table name, allocated from
|
|
|
|
heap memory passed in */
|
|
|
|
char*
|
|
|
|
dict_get_referenced_table(
|
|
|
|
/*======================*/
|
|
|
|
const char* name, /*!< in: foreign key table name */
|
|
|
|
const char* database_name, /*!< in: table db name */
|
|
|
|
ulint database_name_len,/*!< in: db name length */
|
|
|
|
const char* table_name, /*!< in: table name */
|
|
|
|
ulint table_name_len, /*!< in: table name length */
|
|
|
|
dict_table_t** table, /*!< out: table object or NULL */
|
2019-11-20 11:18:31 +01:00
|
|
|
mem_heap_t* heap, /*!< in: heap memory */
|
|
|
|
CHARSET_INFO* from_cs); /*!< in: table name charset */
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Frees a foreign key struct. */
|
|
|
|
void
|
|
|
|
dict_foreign_free(
|
|
|
|
/*==============*/
|
|
|
|
dict_foreign_t* foreign); /*!< in, own: foreign key struct */
|
|
|
|
/*********************************************************************//**
|
|
|
|
Finds the highest [number] for foreign key constraints of the table. Looks
|
|
|
|
only at the >= 4.0.18-format id's, which are of the form
|
|
|
|
databasename/tablename_ibfk_[number].
|
|
|
|
@return highest number, 0 if table has no new format foreign key constraints */
|
|
|
|
ulint
|
|
|
|
dict_table_get_highest_foreign_id(
|
|
|
|
/*==============================*/
|
|
|
|
dict_table_t* table); /*!< in: table in the dictionary
|
|
|
|
memory cache */
|
2019-11-20 11:18:31 +01:00
|
|
|
/** Check whether the dict_table_t is a partition.
|
|
|
|
A partitioned table on the SQL level is composed of InnoDB tables,
|
|
|
|
where each InnoDB table is a [sub]partition including its secondary indexes
|
|
|
|
which belongs to the partition.
|
|
|
|
@param[in] table Table to check.
|
|
|
|
@return true if the dict_table_t is a partition else false. */
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_table_is_partition(const dict_table_t* table)
|
|
|
|
{
|
|
|
|
/* Check both P and p on all platforms in case it was moved to/from
|
|
|
|
WIN. */
|
|
|
|
return (strstr(table->name.m_name, "#p#")
|
|
|
|
|| strstr(table->name.m_name, "#P#"));
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Return the end of table name where we have removed dbname and '/'.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return table name */
|
2014-02-26 19:11:54 +01:00
|
|
|
const char*
|
|
|
|
dict_remove_db_name(
|
|
|
|
/*================*/
|
|
|
|
const char* name) /*!< in: table name in the form
|
|
|
|
dbname '/' tablename */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/** Operation to perform when opening a table */
|
|
|
|
enum dict_table_op_t {
|
|
|
|
/** Expect the tablespace to exist. */
|
|
|
|
DICT_TABLE_OP_NORMAL = 0,
|
|
|
|
/** Drop any orphan indexes after an aborted online index creation */
|
|
|
|
DICT_TABLE_OP_DROP_ORPHAN,
|
|
|
|
/** Silently load the tablespace if it does not exist,
|
|
|
|
and do not load the definitions of incomplete indexes. */
|
2014-08-06 14:28:58 +02:00
|
|
|
DICT_TABLE_OP_LOAD_TABLESPACE,
|
|
|
|
/** Open the table only if it's in table cache. */
|
|
|
|
DICT_TABLE_OP_OPEN_ONLY_IF_CACHED
|
2014-02-26 19:11:54 +01:00
|
|
|
};
|
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
/** Acquire MDL shared for the table name.
|
|
|
|
@tparam trylock whether to use non-blocking operation
|
|
|
|
@param[in,out] table table object
|
|
|
|
@param[in,out] thd background thread
|
|
|
|
@param[out] mdl mdl ticket
|
|
|
|
@param[in] table_op operation to perform when opening
|
|
|
|
@return table object after locking MDL shared
|
|
|
|
@retval NULL if the table is not readable, or if trylock && MDL blocked */
|
2022-08-05 21:46:21 +02:00
|
|
|
template<bool trylock, bool purge_thd= false>
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
dict_table_t*
|
|
|
|
dict_acquire_mdl_shared(dict_table_t *table,
|
|
|
|
THD *thd,
|
|
|
|
MDL_ticket **mdl,
|
|
|
|
dict_table_op_t table_op= DICT_TABLE_OP_NORMAL);
|
|
|
|
|
|
|
|
/** Look up a table by numeric identifier.
|
|
|
|
@param[in] table_id table identifier
|
|
|
|
@param[in] dict_locked data dictionary locked
|
|
|
|
@param[in] table_op operation to perform when opening
|
|
|
|
@param[in,out] thd background thread, or NULL to not acquire MDL
|
|
|
|
@param[out] mdl mdl ticket, or NULL
|
2016-08-12 10:17:45 +02:00
|
|
|
@return table, NULL if does not exist */
|
2022-04-04 09:30:03 +02:00
|
|
|
template<bool purge_thd= false>
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_t*
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
dict_table_open_on_id(table_id_t table_id, bool dict_locked,
|
|
|
|
dict_table_op_t table_op, THD *thd= nullptr,
|
|
|
|
MDL_ticket **mdl= nullptr)
|
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-12-22 15:53:17 +01:00
|
|
|
|
MDEV-25919 preparation: Various cleanup
que_eval_sql(): Remove the parameter lock_dict. The only caller
with lock_dict=true was dict_stats_exec_sql(), which will now
explicitly invoke dict_sys.lock() and dict_sys.unlock() by itself.
row_import_cleanup(): Do not unnecessarily lock the dictionary.
Concurrent access to the table during ALTER TABLE...IMPORT TABLESPACE
is prevented by MDL and the fact that there cannot exist any
undo log or change buffer records that would refer to the table
or tablespace.
row_import_for_mysql(): Do not unnecessarily lock the dictionary
while accessing fil_system. Thanks to MDL_EXCLUSIVE that was acquired
by the SQL layer, only one IMPORT may be in effect for the table name.
row_quiesce_set_state(): Do not unnecessarily lock the dictionary.
The dict_table_t::quiesce state is documented to be protected by
all index latches, which we are acquiring.
dict_table_close(): Introduce a simpler variant with fewer parameters.
dict_table_close(): Reduce the amount of calls.
We can simply invoke dict_table_t::release() on startup or
in DDL operations, or when the table is inaccessible.
In none of these cases, there is no need to invalidate the
InnoDB persistent statistics.
pars_info_t::graph_owns_us: Remove (unused).
pars_info_free(): Define inline.
fts_delete(), trx_t::evict_table(), row_prebuilt_free(),
row_rename_table_for_mysql(): Simplify.
row_mysql_lock_data_dictionary(): Remove some references;
use dict_sys.lock() and dict_sys.unlock() instead.
row_mysql_lock_table(): Remove. Use lock_table_for_trx() instead.
ha_innobase::check_if_supported_inplace_alter(),
row_create_table_for_mysql(): Simply assert dict_sys.sys_tables_exist().
In commit 49e2c8f0a6fefdeac50925f758090d6bd099768d and
commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 srv_start()
actually guarantees that the system tables will exist,
or the server is in read-only mode, or startup will fail.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:20 +02:00
|
|
|
/** Decrement the count of open handles */
|
|
|
|
void dict_table_close(dict_table_t *table);
|
|
|
|
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
/** Decrements the count of open handles of a table.
|
|
|
|
@param[in,out] table table
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
@param[in] dict_locked whether dict_sys.latch is being held
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
@param[in] thd thread to release MDL
|
|
|
|
@param[in] mdl metadata lock or NULL if the thread is a
|
|
|
|
foreground one. */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
dict_table_close(
|
MDEV-16678 Prefer MDL to dict_sys.latch for innodb background tasks
This is joint work with Thirunarayanan Balathandayuthapani.
The MDL interface between InnoDB and the rest of the server
(in storage/innobase/dict/dict0dict.cc and in include/)
is my work, while most everything else is Thiru's.
The collection of InnoDB persistent statistics and the
defragmentation were not refactored to use MDL. They will
keep relying on lower-level interlocking with
fil_check_pending_operations().
The purge of transaction history and the background operations on
fulltext indexes will use MDL. We will revert
commit 2c4844c9e76427525e8c39a2d72686085efe89c3
(MDEV-17813) because thanks to MDL, purge cannot conflict
with DDL operations anymore. For a similar reason, we will remove
the MDEV-16222 test case from gcol.innodb_virtual_debug_purge.
Purge is essentially replacing all use of the global dict_sys.latch
with MDL. Purge will skip the undo log records for tables whose names
start with #sql-ib or #sql2. Theoretically, such tables might
be renamed back to visible table names if TRUNCATE fails to
create a new table, or the final rename in ALTER TABLE...ALGORITHM=COPY
fails. In that case, purge could permanently leave some garbage
in the table. Such garbage will be tolerated; the table would not
be considered corrupted.
To avoid repeated MDL releases and acquisitions,
trx_purge_attach_undo_recs() will sort undo log records by table_id,
and purge_node_t will keep the MDL and table handle open for multiple
successive undo log records.
get_purge_table(): A new accessor, used during the purge of
history for indexed virtual columns. This interface should ideally
not exist at all.
thd_mdl_context(): Accessor of THD::mdl_context.
Wrapped in a new thd_mdl_service.
dict_get_db_name_len(): Define inline.
dict_acquire_mdl_shared(): Acquire explicit shared MDL on a table name
if needed.
dict_table_open_on_id(): Return MDL_ticket, if requested.
dict_table_close(): Release MDL ticket, if requested.
dict_fts_index_syncing(), dict_index_t::index_fts_syncing: Remove.
row_drop_table_for_mysql() no longer needs to check these, because
MDL guarantees that a fulltext index sync will not be in progress
while MDL_EXCLUSIVE is protecting a DDL operation.
dict_table_t::parse_name(): Parse the table name for acquiring MDL.
purge_node_t::undo_recs: Change the type to std::list<trx_purge_rec_t*>
(different container, and storing also roll_ptr).
purge_node_t: Add mdl_ticket, last_table_id, purge_thd, mdl_hold_recs
for acquiring MDL and for keeping the table open across multiple
undo log records.
purge_vcol_info_t, row_purge_store_vsec_cur(), row_purge_restore_vsec_cur():
Remove. We will acquire the MDL earlier.
purge_sys_t::heap: Added, for reading undo log records.
fts_sync_during_ddl(): Invoked during ALGORITHM=INPLACE operations
to ensure that fts_sync_table() will not conflict with MDL_EXCLUSIVE.
Uses fts_t::sync_message for bookkeeping.
2019-12-10 14:42:50 +01:00
|
|
|
dict_table_t* table,
|
|
|
|
bool dict_locked,
|
|
|
|
THD* thd = NULL,
|
|
|
|
MDL_ticket* mdl = NULL);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the minimum number of bytes per character.
|
|
|
|
@return minimum multi-byte char size, in bytes */
|
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_mbminlen(
|
|
|
|
/*==================*/
|
|
|
|
const dict_col_t* col) /*!< in: column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the maximum number of bytes per character.
|
|
|
|
@return maximum multi-byte char size, in bytes */
|
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_mbmaxlen(
|
|
|
|
/*==================*/
|
|
|
|
const dict_col_t* col) /*!< in: column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the column data type. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_col_copy_type(
|
|
|
|
/*===============*/
|
|
|
|
const dict_col_t* col, /*!< in: column */
|
2016-08-12 10:17:45 +02:00
|
|
|
dtype_t* type); /*!< out: data type */
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Determine bytes of column prefix to be stored in the undo log. Please
|
2017-06-01 12:03:55 +02:00
|
|
|
note that if !dict_table_has_atomic_blobs(table), no prefix
|
2014-02-26 19:11:54 +01:00
|
|
|
needs to be stored in the undo log.
|
|
|
|
@return bytes of column prefix to be stored in the undo log */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_max_field_len_store_undo(
|
|
|
|
/*==========================*/
|
|
|
|
dict_table_t* table, /*!< in: table */
|
|
|
|
const dict_col_t* col) /*!< in: column which index prefix
|
|
|
|
is based on */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Determine maximum bytes of a virtual column need to be stored
|
|
|
|
in the undo log.
|
|
|
|
@param[in] table dict_table_t for the table
|
|
|
|
@param[in] col_no virtual column number
|
|
|
|
@return maximum bytes of virtual column to be stored in the undo log */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_max_v_field_len_store_undo(
|
|
|
|
dict_table_t* table,
|
|
|
|
ulint col_no);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/*********************************************************************//**
|
|
|
|
Assert that a column and a data type match.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
dict_col_type_assert_equal(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_col_t* col, /*!< in: column */
|
|
|
|
const dtype_t* type) /*!< in: data type */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG */
|
2016-12-30 14:04:10 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/***********************************************************************//**
|
|
|
|
Returns the minimum size of the column.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return minimum size */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_min_size(
|
|
|
|
/*==================*/
|
|
|
|
const dict_col_t* col) /*!< in: column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/***********************************************************************//**
|
|
|
|
Returns the maximum size of the column.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return maximum size */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_col_get_max_size(
|
|
|
|
/*==================*/
|
|
|
|
const dict_col_t* col) /*!< in: column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/***********************************************************************//**
|
|
|
|
Returns the size of a fixed size column, 0 if not a fixed size column.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return fixed size, or 0 */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_fixed_size(
|
|
|
|
/*====================*/
|
|
|
|
const dict_col_t* col, /*!< in: column */
|
|
|
|
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/***********************************************************************//**
|
|
|
|
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
|
|
|
|
For fixed length types it is the fixed length of the type, otherwise 0.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return SQL null storage size in ROW_FORMAT=REDUNDANT */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_sql_null_size(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_col_t* col, /*!< in: column */
|
|
|
|
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the column number.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return col->ind, table column position (starting from 0) */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_col_get_no(
|
|
|
|
/*============*/
|
|
|
|
const dict_col_t* col) /*!< in: column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the column position in the clustered index. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_col_get_clust_pos(
|
|
|
|
/*===================*/
|
|
|
|
const dict_col_t* col, /*!< in: table column */
|
|
|
|
const dict_index_t* clust_index) /*!< in: clustered index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Gets the column position in the given index.
|
|
|
|
@param[in] col table column
|
|
|
|
@param[in] index index to be searched for column
|
|
|
|
@return position of column in the given index. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_col_get_index_pos(
|
|
|
|
const dict_col_t* col,
|
|
|
|
const dict_index_t* index)
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/****************************************************************//**
|
|
|
|
If the given column name is reserved for InnoDB system columns, return
|
|
|
|
TRUE.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if name is reserved */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
dict_col_name_is_reserved(
|
|
|
|
/*======================*/
|
|
|
|
const char* name) /*!< in: column name */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
/** Unconditionally set the AUTO_INCREMENT counter.
|
|
|
|
@param[in,out] table table or partition
|
|
|
|
@param[in] value next available AUTO_INCREMENT value */
|
|
|
|
MY_ATTRIBUTE((nonnull))
|
|
|
|
UNIV_INLINE
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
dict_table_autoinc_initialize(dict_table_t* table, ib_uint64_t value)
|
|
|
|
{
|
|
|
|
table->autoinc = value;
|
|
|
|
}
|
2015-10-09 17:21:46 +02:00
|
|
|
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
/**
|
|
|
|
@param[in] table table or partition
|
|
|
|
@return the next AUTO_INCREMENT counter value
|
|
|
|
@retval 0 if AUTO_INCREMENT is not yet initialized */
|
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result))
|
|
|
|
UNIV_INLINE
|
|
|
|
ib_uint64_t
|
|
|
|
dict_table_autoinc_read(const dict_table_t* table)
|
|
|
|
{
|
|
|
|
return(table->autoinc);
|
|
|
|
}
|
2015-10-09 17:21:46 +02:00
|
|
|
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
/** Update the AUTO_INCREMENT sequence if the value supplied is greater
|
|
|
|
than the current value.
|
|
|
|
@param[in,out] table table or partition
|
|
|
|
@param[in] value AUTO_INCREMENT value that was assigned to a row
|
|
|
|
@return whether the AUTO_INCREMENT sequence was updated */
|
|
|
|
MY_ATTRIBUTE((nonnull))
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_table_autoinc_update_if_greater(dict_table_t* table, ib_uint64_t value)
|
|
|
|
{
|
|
|
|
if (value > table->autoinc) {
|
|
|
|
|
|
|
|
table->autoinc = value;
|
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(false);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/**********************************************************************//**
|
|
|
|
Adds system columns to a table object. */
|
|
|
|
void
|
|
|
|
dict_table_add_system_columns(
|
|
|
|
/*==========================*/
|
|
|
|
dict_table_t* table, /*!< in/out: table */
|
|
|
|
mem_heap_t* heap) /*!< in: temporary heap */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2015-10-09 17:21:46 +02:00
|
|
|
/**********************************************************************//**
|
2014-02-26 19:11:54 +01:00
|
|
|
Renames a table object.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if success */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_table_rename_in_cache(
|
|
|
|
/*=======================*/
|
|
|
|
dict_table_t* table, /*!< in/out: table */
|
MDEV-13542: Crashing on corrupted page is unhelpful
The approach to handling corruption that was chosen by Oracle in
commit 177d8b0c125b841c0650d27d735e3b87509dc286
is not really useful. Not only did it actually fail to prevent InnoDB
from crashing, but it is making things worse by blocking attempts to
rescue data from or rebuild a partially readable table.
We will try to prevent crashes in a different way: by propagating
errors up the call stack. We will never mark the clustered index
persistently corrupted, so that data recovery may be attempted by
reading from the table, or by rebuilding the table.
This should also fix MDEV-13680 (crash on btr_page_alloc() failure);
it was extensively tested with innodb_file_per_table=0 and a
non-autoextend system tablespace.
We should now avoid crashes in many cases, such as when a page
cannot be read or allocated, or an inconsistency is detected when
attempting to update multiple pages. We will not crash on double-free,
such as on the recovery of DDL in system tablespace in case something
was corrupted.
Crashes on corrupted data are still possible. The fault injection mechanism
that is introduced in the subsequent commit may help catch more of them.
buf_page_import_corrupt_failure: Remove the fault injection, and instead
corrupt some pages using Perl code in the tests.
btr_cur_pessimistic_insert(): Always reserve extents (except for the
change buffer), in order to prevent a subsequent allocation failure.
btr_pcur_open_at_rnd_pos(): Merged to the only caller ibuf_merge_pages().
btr_assert_not_corrupted(), btr_corruption_report(): Remove.
Similar checks are already part of btr_block_get().
FSEG_MAGIC_N_BYTES: Replaces FSEG_MAGIC_N_VALUE.
dict_hdr_get(), trx_rsegf_get_new(), trx_undo_page_get(),
trx_undo_page_get_s_latched(): Replaced with error-checking calls.
trx_rseg_t::get(mtr_t*): Replaces trx_rsegf_get().
trx_rseg_header_create(): Let the caller update the TRX_SYS page if needed.
trx_sys_create_sys_pages(): Merged with trx_sysf_create().
dict_check_tablespaces_and_store_max_id(): Do not access
DICT_HDR_MAX_SPACE_ID, because it was already recovered in dict_boot().
Merge dict_check_sys_tables() with this function.
dir_pathname(): Replaces os_file_make_new_pathname().
row_undo_ins_remove_sec(): Do not modify the undo page by adding
a terminating NUL byte to the record.
btr_decryption_failed(): Report decryption failures
dict_set_corrupted_by_space(), dict_set_encrypted_by_space(),
dict_set_corrupted_index_cache_only(): Remove.
dict_set_corrupted(): Remove the constant parameter dict_locked=false.
Never flag the clustered index corrupted in SYS_INDEXES, because
that would deny further access to the table. It might be possible to
repair the table by executing ALTER TABLE or OPTIMIZE TABLE, in case
no B-tree leaf page is corrupted.
dict_table_skip_corrupt_index(), dict_table_next_uncorrupted_index(),
row_purge_skip_uncommitted_virtual_index(): Remove, and refactor
the callers to read dict_index_t::type only once.
dict_table_is_corrupted(): Remove.
dict_index_t::is_btree(): Determine if the index is a valid B-tree.
BUF_GET_NO_LATCH, BUF_EVICT_IF_IN_POOL: Remove.
UNIV_BTR_DEBUG: Remove. Any inconsistency will no longer trigger
assertion failures, but error codes being returned.
buf_corrupt_page_release(): Replaced with a direct call to
buf_pool.corrupted_evict().
fil_invalid_page_access_msg(): Never crash on an invalid read;
let the caller of buf_page_get_gen() decide.
btr_pcur_t::restore_position(): Propagate failure status to the caller
by returning CORRUPTED.
opt_search_plan_for_table(): Simplify the code.
row_purge_del_mark(), row_purge_upd_exist_or_extern_func(),
row_undo_ins_remove_sec_rec(), row_undo_mod_upd_del_sec(),
row_undo_mod_del_mark_sec(): Avoid mem_heap_create()/mem_heap_free()
when no secondary indexes exist.
row_undo_mod_upd_exist_sec(): Simplify the code.
row_upd_clust_step(), dict_load_table_one(): Return DB_TABLE_CORRUPT
if the clustered index (and therefore the table) is corrupted, similar
to what we do in row_insert_for_mysql().
fut_get_ptr(): Replace with buf_page_get_gen() calls.
buf_page_get_gen(): Return nullptr and *err=DB_CORRUPTION
if the page is marked as freed. For other modes than
BUF_GET_POSSIBLY_FREED or BUF_PEEK_IF_IN_POOL this will
trigger a debug assertion failure. For BUF_GET_POSSIBLY_FREED,
we will return nullptr for freed pages, so that the callers
can be simplified. The purge of transaction history will be
a new user of BUF_GET_POSSIBLY_FREED, to avoid crashes on
corrupted data.
buf_page_get_low(): Never crash on a corrupted page, but simply
return nullptr.
fseg_page_is_allocated(): Replaces fseg_page_is_free().
fts_drop_common_tables(): Return an error if the transaction
was rolled back.
fil_space_t::set_corrupted(): Report a tablespace as corrupted if
it was not reported already.
fil_space_t::io(): Invoke fil_space_t::set_corrupted() to report
out-of-bounds page access or other errors.
Clean up mtr_t::page_lock()
buf_page_get_low(): Validate the page identifier (to check for
recently read corrupted pages) after acquiring the page latch.
buf_page_t::read_complete(): Flag uninitialized (all-zero) pages
with DB_FAIL. Return DB_PAGE_CORRUPTED on page number mismatch.
mtr_t::defer_drop_ahi(): Renamed from mtr_defer_drop_ahi().
recv_sys_t::free_corrupted_page(): Only set_corrupt_fs()
if any log records exist for the page. We do not mind if read-ahead
produces corrupted (or all-zero) pages that were not actually needed
during recovery.
recv_recover_page(): Return whether the operation succeeded.
recv_sys_t::recover_low(): Simplify the logic. Check for recovery error.
Thanks to Matthias Leich for testing this extensively and to the
authors of https://rr-project.org for making it easy to diagnose
and fix any failures that were found during the testing.
2022-06-06 13:03:22 +02:00
|
|
|
span<const char> new_name, /*!< in: new name */
|
|
|
|
bool replace_new_file)
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 13:59:58 +02:00
|
|
|
/*!< in: whether to replace the
|
|
|
|
file with the new name
|
|
|
|
(as part of rolling back TRUNCATE) */
|
2017-12-20 21:19:47 +01:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Removes an index from the dictionary cache.
|
|
|
|
@param[in,out] table table whose index to remove
|
|
|
|
@param[in,out] index index to remove, this object is destroyed and must not
|
|
|
|
be accessed by the caller afterwards */
|
2014-02-26 19:11:54 +01:00
|
|
|
void
|
|
|
|
dict_index_remove_from_cache(
|
2016-08-12 10:17:45 +02:00
|
|
|
dict_table_t* table,
|
|
|
|
dict_index_t* index);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Change the id of a table object in the dictionary cache. This is used in
|
|
|
|
DISCARD TABLESPACE. */
|
|
|
|
void
|
|
|
|
dict_table_change_id_in_cache(
|
|
|
|
/*==========================*/
|
|
|
|
dict_table_t* table, /*!< in/out: table object already in cache */
|
|
|
|
table_id_t new_id) /*!< in: new id to set */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Removes a foreign constraint struct from the dictionary cache. */
|
|
|
|
void
|
|
|
|
dict_foreign_remove_from_cache(
|
|
|
|
/*===========================*/
|
|
|
|
dict_foreign_t* foreign) /*!< in, own: foreign constraint */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Adds a foreign key constraint object to the dictionary cache. May free
|
|
|
|
the object if there already is an object with the same identifier in.
|
|
|
|
At least one of foreign table or referenced table must already be in
|
|
|
|
the dictionary cache!
|
2016-08-12 10:17:45 +02:00
|
|
|
@return DB_SUCCESS or error code */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_foreign_add_to_cache(
|
|
|
|
/*======================*/
|
|
|
|
dict_foreign_t* foreign,
|
|
|
|
/*!< in, own: foreign key constraint */
|
|
|
|
const char** col_names,
|
|
|
|
/*!< in: column names, or NULL to use
|
|
|
|
foreign->foreign_table->col_names */
|
|
|
|
bool check_charsets,
|
|
|
|
/*!< in: whether to check charset
|
|
|
|
compatibility */
|
|
|
|
dict_err_ignore_t ignore_err)
|
|
|
|
/*!< in: error to be ignored */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Replace the index passed in with another equivalent index in the
|
|
|
|
foreign key lists of the table.
|
|
|
|
@return whether all replacements were found */
|
|
|
|
bool
|
|
|
|
dict_foreign_replace_index(
|
|
|
|
/*=======================*/
|
|
|
|
dict_table_t* table, /*!< in/out: table */
|
|
|
|
const char** col_names,
|
|
|
|
/*!< in: column names, or NULL
|
|
|
|
to use table->col_names */
|
|
|
|
const dict_index_t* index) /*!< in: index to be replaced */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
|
|
|
|
@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
|
|
|
|
constraint id does not match */
|
|
|
|
dberr_t
|
|
|
|
dict_foreign_parse_drop_constraints(
|
|
|
|
/*================================*/
|
|
|
|
mem_heap_t* heap, /*!< in: heap from which we can
|
|
|
|
allocate memory */
|
|
|
|
trx_t* trx, /*!< in: transaction */
|
|
|
|
dict_table_t* table, /*!< in: table */
|
|
|
|
ulint* n, /*!< out: number of constraints
|
|
|
|
to drop */
|
|
|
|
const char*** constraints_to_drop) /*!< out: id's of the
|
|
|
|
constraints to drop */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Returns a table object and increments its open handle count.
|
|
|
|
NOTE! This is a high-level function to be used mainly from outside the
|
|
|
|
'dict' directory. Inside this directory dict_table_get_low
|
|
|
|
is usually the appropriate function.
|
2016-08-12 10:17:45 +02:00
|
|
|
@param[in] table_name Table name
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
@param[in] dict_locked whether dict_sys.latch is being held exclusively
|
2016-08-12 10:17:45 +02:00
|
|
|
@param[in] ignore_err error to be ignored when loading the table
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
@return table
|
|
|
|
@retval nullptr if does not exist */
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_t*
|
|
|
|
dict_table_open_on_name(
|
2016-08-12 10:17:45 +02:00
|
|
|
const char* table_name,
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
bool dict_locked,
|
2016-08-12 10:17:45 +02:00
|
|
|
dict_err_ignore_t ignore_err)
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-08-20 11:52:57 +02:00
|
|
|
/** Outcome of dict_foreign_find_index() or dict_foreign_qualify_index() */
|
|
|
|
enum fkerr_t
|
|
|
|
{
|
|
|
|
/** A backing index was found for a FOREIGN KEY constraint */
|
|
|
|
FK_SUCCESS = 0,
|
|
|
|
/** There is no index that covers the columns in the constraint. */
|
|
|
|
FK_INDEX_NOT_FOUND,
|
|
|
|
/** The index is for a prefix index, not a full column. */
|
|
|
|
FK_IS_PREFIX_INDEX,
|
|
|
|
/** A condition of SET NULL conflicts with a NOT NULL column. */
|
|
|
|
FK_COL_NOT_NULL,
|
|
|
|
/** The column types do not match */
|
|
|
|
FK_COLS_NOT_EQUAL
|
|
|
|
};
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Tries to find an index whose first fields are the columns in the array,
|
|
|
|
in the same order and is not marked for deletion and is not the same
|
|
|
|
as types_idx.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return matching index, NULL if not found */
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_t*
|
|
|
|
dict_foreign_find_index(
|
|
|
|
/*====================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
const char** col_names,
|
|
|
|
/*!< in: column names, or NULL
|
|
|
|
to use table->col_names */
|
|
|
|
const char** columns,/*!< in: array of column names */
|
|
|
|
ulint n_cols, /*!< in: number of columns */
|
|
|
|
const dict_index_t* types_idx,
|
|
|
|
/*!< in: NULL or an index
|
|
|
|
whose types the column types
|
|
|
|
must match */
|
|
|
|
bool check_charsets,
|
|
|
|
/*!< in: whether to check
|
|
|
|
charsets. only has an effect
|
|
|
|
if types_idx != NULL */
|
2015-08-03 22:09:43 +02:00
|
|
|
ulint check_null,
|
2014-02-26 19:11:54 +01:00
|
|
|
/*!< in: nonzero if none of
|
|
|
|
the columns must be declared
|
|
|
|
NOT NULL */
|
2019-08-20 11:52:57 +02:00
|
|
|
fkerr_t* error = NULL, /*!< out: error code */
|
|
|
|
ulint* err_col_no = NULL,
|
2015-08-03 22:09:43 +02:00
|
|
|
/*!< out: column number where
|
|
|
|
error happened */
|
2019-08-20 11:52:57 +02:00
|
|
|
dict_index_t** err_index = NULL)
|
2016-09-06 08:43:16 +02:00
|
|
|
/*!< out: index where error
|
2015-08-03 22:09:43 +02:00
|
|
|
happened */
|
|
|
|
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Returns a virtual column's name.
|
|
|
|
@param[in] table table object
|
|
|
|
@param[in] col_nr virtual column number(nth virtual column)
|
|
|
|
@return column name. */
|
|
|
|
const char*
|
|
|
|
dict_table_get_v_col_name(
|
|
|
|
const dict_table_t* table,
|
|
|
|
ulint col_nr);
|
|
|
|
|
2016-09-06 08:43:16 +02:00
|
|
|
/** Check if the table has a given column.
|
|
|
|
@param[in] table table object
|
2016-08-12 10:17:45 +02:00
|
|
|
@param[in] col_name column name
|
|
|
|
@param[in] col_nr column number guessed, 0 as default
|
|
|
|
@return column number if the table has the specified column,
|
|
|
|
otherwise table->n_def */
|
|
|
|
ulint
|
|
|
|
dict_table_has_column(
|
|
|
|
const dict_table_t* table,
|
|
|
|
const char* col_name,
|
|
|
|
ulint col_nr = 0);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Outputs info on foreign keys of a table. */
|
2015-12-14 13:34:32 +01:00
|
|
|
std::string
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_print_info_on_foreign_keys(
|
|
|
|
/*============================*/
|
|
|
|
ibool create_table_format, /*!< in: if TRUE then print in
|
|
|
|
a format suitable to be inserted into
|
|
|
|
a CREATE TABLE, otherwise in the format
|
|
|
|
of SHOW TABLE STATUS */
|
|
|
|
trx_t* trx, /*!< in: transaction */
|
2015-12-14 13:34:32 +01:00
|
|
|
dict_table_t* table); /*!< in: table */
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Outputs info on a foreign key of a table in a format suitable for
|
|
|
|
CREATE TABLE. */
|
2015-12-14 13:34:32 +01:00
|
|
|
std::string
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_print_info_on_foreign_key_in_create_format(
|
|
|
|
/*============================================*/
|
|
|
|
trx_t* trx, /*!< in: transaction */
|
|
|
|
dict_foreign_t* foreign, /*!< in: foreign key constraint */
|
2015-12-14 13:34:32 +01:00
|
|
|
ibool add_newline); /*!< in: whether to add a newline */
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Tries to find an index whose first fields are the columns in the array,
|
|
|
|
in the same order and is not marked for deletion and is not the same
|
|
|
|
as types_idx.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return matching index, NULL if not found */
|
2014-02-26 19:11:54 +01:00
|
|
|
bool
|
|
|
|
dict_foreign_qualify_index(
|
|
|
|
/*====================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
const char** col_names,
|
|
|
|
/*!< in: column names, or NULL
|
|
|
|
to use table->col_names */
|
|
|
|
const char** columns,/*!< in: array of column names */
|
|
|
|
ulint n_cols, /*!< in: number of columns */
|
|
|
|
const dict_index_t* index, /*!< in: index to check */
|
|
|
|
const dict_index_t* types_idx,
|
|
|
|
/*!< in: NULL or an index
|
|
|
|
whose types the column types
|
|
|
|
must match */
|
|
|
|
bool check_charsets,
|
|
|
|
/*!< in: whether to check
|
|
|
|
charsets. only has an effect
|
|
|
|
if types_idx != NULL */
|
2015-08-03 22:09:43 +02:00
|
|
|
ulint check_null,
|
2014-02-26 19:11:54 +01:00
|
|
|
/*!< in: nonzero if none of
|
|
|
|
the columns must be declared
|
|
|
|
NOT NULL */
|
2019-08-20 11:52:57 +02:00
|
|
|
fkerr_t* error, /*!< out: error code */
|
2015-08-03 22:09:43 +02:00
|
|
|
ulint* err_col_no,
|
|
|
|
/*!< out: column number where
|
|
|
|
error happened */
|
|
|
|
dict_index_t** err_index)
|
|
|
|
/*!< out: index where error
|
|
|
|
happened */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/********************************************************************//**
|
|
|
|
Gets the first index on the table (the clustered index).
|
2016-08-12 10:17:45 +02:00
|
|
|
@return index, NULL if none exists */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_index_t*
|
|
|
|
dict_table_get_first_index(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the last index on the table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return index, NULL if none exists */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_index_t*
|
|
|
|
dict_table_get_last_index(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the next index on the table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return index, NULL if none left */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_index_t*
|
|
|
|
dict_table_get_next_index(
|
|
|
|
/*======================*/
|
|
|
|
const dict_index_t* index) /*!< in: index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#else /* UNIV_DEBUG */
|
|
|
|
# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
|
|
|
|
# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
|
|
|
|
# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
2018-11-16 19:10:33 +01:00
|
|
|
#define dict_index_is_clust(index) (index)->is_clust()
|
|
|
|
#define dict_index_is_auto_gen_clust(index) (index)->is_gen_clust()
|
|
|
|
#define dict_index_is_unique(index) (index)->is_unique()
|
|
|
|
#define dict_index_is_spatial(index) (index)->is_spatial()
|
|
|
|
#define dict_index_is_ibuf(index) (index)->is_ibuf()
|
|
|
|
#define dict_index_is_sec_or_ibuf(index) !(index)->is_primary()
|
2018-06-04 14:55:37 +02:00
|
|
|
#define dict_index_has_virtual(index) (index)->has_virtual()
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Get all the FTS indexes on a table.
|
|
|
|
@param[in] table table
|
|
|
|
@param[out] indexes all FTS indexes on this table
|
|
|
|
@return number of FTS indexes */
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint
|
|
|
|
dict_table_get_all_fts_indexes(
|
2016-08-12 10:17:45 +02:00
|
|
|
const dict_table_t* table,
|
|
|
|
ib_vector_t* indexes);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Gets the number of user-defined non-virtual columns in a table in the
|
|
|
|
dictionary cache.
|
|
|
|
@return number of user-defined (e.g., not ROW_ID) non-virtual
|
|
|
|
columns of a table */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_get_n_user_cols(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
2016-08-12 10:17:45 +02:00
|
|
|
Gets the number of all non-virtual columns (also system) in a table
|
|
|
|
in the dictionary cache.
|
|
|
|
@return number of columns of a table */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_get_n_cols(
|
|
|
|
/*==================*/
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Gets the number of virtual columns in a table in the dictionary cache.
|
|
|
|
@param[in] table the table to check
|
|
|
|
@return number of virtual columns of a table */
|
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2016-08-12 10:17:45 +02:00
|
|
|
dict_table_get_n_v_cols(
|
|
|
|
const dict_table_t* table);
|
|
|
|
|
|
|
|
/** Check if a table has indexed virtual columns
|
|
|
|
@param[in] table the table to check
|
|
|
|
@return true is the table has indexed virtual columns */
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_table_has_indexed_v_cols(
|
|
|
|
const dict_table_t* table);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the approximately estimated number of rows in the table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return estimated number of rows */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ib_uint64_t
|
|
|
|
dict_table_get_n_rows(
|
|
|
|
/*==================*/
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Increment the number of rows in the table by one.
|
|
|
|
Notice that this operation is not protected by any latch, the number is
|
|
|
|
approximate. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_table_n_rows_inc(
|
|
|
|
/*==================*/
|
|
|
|
dict_table_t* table) /*!< in/out: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Decrement the number of rows in the table by one.
|
|
|
|
Notice that this operation is not protected by any latch, the number is
|
|
|
|
approximate. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_table_n_rows_dec(
|
|
|
|
/*==================*/
|
|
|
|
dict_table_t* table) /*!< in/out: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Get nth virtual column
|
|
|
|
@param[in] table target table
|
|
|
|
@param[in] col_nr column number in MySQL Table definition
|
|
|
|
@return dict_v_col_t ptr */
|
|
|
|
dict_v_col_t*
|
|
|
|
dict_table_get_nth_v_col_mysql(
|
|
|
|
const dict_table_t* table,
|
|
|
|
ulint col_nr);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/********************************************************************//**
|
|
|
|
Gets the nth column of a table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return pointer to column object */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_col_t*
|
|
|
|
dict_table_get_nth_col(
|
|
|
|
/*===================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
ulint pos) /*!< in: position of column */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Gets the nth virtual column of a table.
|
|
|
|
@param[in] table table
|
|
|
|
@param[in] pos position of virtual column
|
|
|
|
@return pointer to virtual column object */
|
|
|
|
UNIV_INLINE
|
|
|
|
dict_v_col_t*
|
|
|
|
dict_table_get_nth_v_col(
|
|
|
|
const dict_table_t* table,
|
|
|
|
ulint pos);
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the given system column of a table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return pointer to column object */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_col_t*
|
|
|
|
dict_table_get_sys_col(
|
|
|
|
/*===================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned sys) /*!< in: DATA_ROW_ID, ... */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#else /* UNIV_DEBUG */
|
2019-04-03 13:07:18 +02:00
|
|
|
#define dict_table_get_nth_col(table, pos) (&(table)->cols[pos])
|
2016-08-12 10:17:45 +02:00
|
|
|
#define dict_table_get_sys_col(table, sys) \
|
2019-04-03 09:50:43 +02:00
|
|
|
&(table)->cols[(table)->n_cols + (sys) - DATA_N_SYS_COLS]
|
2016-08-12 10:17:45 +02:00
|
|
|
/* Get nth virtual columns */
|
2019-04-03 13:07:18 +02:00
|
|
|
#define dict_table_get_nth_v_col(table, pos) (&(table)->v_cols[pos])
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG */
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 06:00:05 +02:00
|
|
|
/** Wrapper function.
|
|
|
|
@see dict_col_t::name()
|
|
|
|
@param[in] table table
|
|
|
|
@param[in] col_nr column number in table
|
|
|
|
@return column name */
|
|
|
|
inline
|
|
|
|
const char*
|
|
|
|
dict_table_get_col_name(const dict_table_t* table, ulint col_nr)
|
|
|
|
{
|
|
|
|
return(dict_table_get_nth_col(table, col_nr)->name(*table));
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the given system column number of a table.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return column number */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_get_sys_col_no(
|
|
|
|
/*======================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned sys) /*!< in: DATA_ROW_ID, ... */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-12-30 14:04:10 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Returns the minimum data size of an index record.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return minimum data size in bytes */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_min_size(
|
|
|
|
/*====================*/
|
|
|
|
const dict_index_t* index) /*!< in: index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2018-11-12 16:01:56 +01:00
|
|
|
|
|
|
|
#define dict_table_is_comp(table) (table)->not_redundant()
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2017-06-01 12:03:55 +02:00
|
|
|
/** Determine if a table uses atomic BLOBs (no locally stored prefix).
|
|
|
|
@param[in] table InnoDB table
|
|
|
|
@return whether BLOBs are atomic */
|
|
|
|
inline
|
|
|
|
bool
|
|
|
|
dict_table_has_atomic_blobs(const dict_table_t* table)
|
|
|
|
{
|
|
|
|
return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags));
|
|
|
|
}
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2019-07-25 17:42:06 +02:00
|
|
|
/** @return potential max length stored inline for externally stored fields */
|
|
|
|
inline size_t dict_table_t::get_overflow_field_local_len() const
|
|
|
|
{
|
|
|
|
if (dict_table_has_atomic_blobs(this)) {
|
|
|
|
/* ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED: do not
|
|
|
|
store any BLOB prefix locally */
|
|
|
|
return BTR_EXTERN_FIELD_REF_SIZE;
|
|
|
|
}
|
|
|
|
/* up to MySQL 5.1: store a 768-byte prefix locally */
|
|
|
|
return BTR_EXTERN_FIELD_REF_SIZE + DICT_ANTELOPE_MAX_INDEX_COL_LEN;
|
|
|
|
}
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Set the various values in a dict_table_t::flags pointer.
|
|
|
|
@param[in,out] flags, Pointer to a 4 byte Table Flags
|
|
|
|
@param[in] format, File Format
|
|
|
|
@param[in] zip_ssize Zip Shift Size
|
|
|
|
@param[in] use_data_dir Table uses DATA DIRECTORY
|
2017-01-16 15:02:42 +01:00
|
|
|
@param[in] page_compressed Table uses page compression
|
2018-05-01 00:10:37 +02:00
|
|
|
@param[in] page_compression_level Page compression level */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_tf_set(
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint* flags,
|
|
|
|
rec_format_t format,
|
|
|
|
ulint zip_ssize,
|
|
|
|
bool use_data_dir,
|
|
|
|
bool page_compressed,
|
2018-05-01 00:10:37 +02:00
|
|
|
ulint page_compression_level);
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
|
|
|
|
Fsp Flags are written into the tablespace header at the offset
|
|
|
|
FSP_SPACE_FLAGS and are also stored in the fil_space_t::flags field.
|
|
|
|
The following chart shows the translation of the low order bit.
|
|
|
|
Other bits are the same.
|
2014-02-26 19:11:54 +01:00
|
|
|
========================= Low order bit ==========================
|
|
|
|
| REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
|
|
|
|
dict_table_t::flags | 0 | 1 | 1 | 1
|
|
|
|
fil_space_t::flags | 0 | 0 | 1 | 1
|
|
|
|
==================================================================
|
2016-08-12 10:17:45 +02:00
|
|
|
@param[in] table_flags dict_table_t::flags
|
|
|
|
@return tablespace flags (fil_space_t::flags) */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
2017-01-17 10:37:49 +01:00
|
|
|
dict_tf_to_fsp_flags(ulint table_flags)
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((const));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2019-02-06 18:50:11 +01:00
|
|
|
|
|
|
|
/** Extract the ROW_FORMAT=COMPRESSED page size from table flags.
|
2016-08-12 10:17:45 +02:00
|
|
|
@param[in] flags flags
|
2019-02-06 18:50:11 +01:00
|
|
|
@return ROW_FORMAT=COMPRESSED page size
|
|
|
|
@retval 0 if not compressed */
|
|
|
|
inline ulint dict_tf_get_zip_size(ulint flags)
|
|
|
|
{
|
|
|
|
flags &= DICT_TF_MASK_ZIP_SSIZE;
|
|
|
|
return flags
|
|
|
|
? (UNIV_ZIP_SIZE_MIN >> 1)
|
|
|
|
<< (FSP_FLAGS_GET_ZIP_SSIZE(flags >> DICT_TF_POS_ZIP_SSIZE
|
|
|
|
<< FSP_FLAGS_POS_ZIP_SSIZE))
|
|
|
|
: 0;
|
|
|
|
}
|
2016-08-12 10:17:45 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Checks if a column is in the ordering columns of the clustered index of a
|
|
|
|
table. Column prefixes are treated like whole columns.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if the column, or its prefix, is in the clustered key */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
dict_table_col_in_clustered_key(
|
|
|
|
/*============================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
ulint n) /*!< in: column number */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*******************************************************************//**
|
|
|
|
Check if the table has an FTS index.
|
|
|
|
@return TRUE if table has an FTS index */
|
|
|
|
UNIV_INLINE
|
|
|
|
ibool
|
|
|
|
dict_table_has_fts_index(
|
|
|
|
/*=====================*/
|
|
|
|
dict_table_t* table) /*!< in: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Copies types of virtual columns contained in table to tuple and sets all
|
|
|
|
fields of the tuple to the SQL NULL value. This function should
|
|
|
|
be called right after dtuple_create().
|
|
|
|
@param[in,out] tuple data tuple
|
|
|
|
@param[in] table table
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
dict_table_copy_v_types(
|
|
|
|
dtuple_t* tuple,
|
|
|
|
const dict_table_t* table);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*******************************************************************//**
|
|
|
|
Copies types of columns contained in table to tuple and sets all
|
|
|
|
fields of the tuple to the SQL NULL value. This function should
|
|
|
|
be called right after dtuple_create(). */
|
|
|
|
void
|
|
|
|
dict_table_copy_types(
|
|
|
|
/*==================*/
|
|
|
|
dtuple_t* tuple, /*!< in/out: data tuple */
|
|
|
|
const dict_table_t* table) /*!< in: table */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Adds an index to the dictionary cache, with possible indexing newly
|
|
|
|
added column.
|
2019-11-06 12:01:34 +01:00
|
|
|
@param[in,out] index index; NOTE! The index memory
|
2016-08-12 10:17:45 +02:00
|
|
|
object is freed in this function!
|
|
|
|
@param[in] page_no root page number of the index
|
2019-11-06 12:01:34 +01:00
|
|
|
@param[in] add_v virtual columns being added along with ADD INDEX
|
2019-11-02 22:15:29 +01:00
|
|
|
@return DB_SUCCESS, or DB_CORRUPTION */
|
2016-08-12 10:17:45 +02:00
|
|
|
dberr_t
|
2018-03-23 16:25:56 +01:00
|
|
|
dict_index_add_to_cache(
|
2019-11-06 12:01:34 +01:00
|
|
|
dict_index_t*& index,
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint page_no,
|
2018-03-23 16:25:56 +01:00
|
|
|
const dict_add_v_col_t* add_v = NULL)
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the number of fields in the internal representation of an index,
|
|
|
|
including fields added by the dictionary system.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return number of fields */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 18:46:41 +01:00
|
|
|
uint16_t
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_n_fields(
|
|
|
|
/*====================*/
|
|
|
|
const dict_index_t* index) /*!< in: an internal
|
|
|
|
representation of index (in
|
|
|
|
the dictionary cache) */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 06:00:05 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the number of fields in the internal representation of an index
|
|
|
|
that uniquely determine the position of an index entry in the index, if
|
|
|
|
we do not take multiversioning into account: in the B-tree use the value
|
|
|
|
returned by dict_index_get_n_unique_in_tree.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return number of fields */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 18:46:41 +01:00
|
|
|
uint16_t
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_n_unique(
|
|
|
|
/*====================*/
|
|
|
|
const dict_index_t* index) /*!< in: an internal representation
|
|
|
|
of index (in the dictionary cache) */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the number of fields in the internal representation of an index
|
|
|
|
which uniquely determine the position of an index entry in the index, if
|
|
|
|
we also take multiversioning into account.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return number of fields */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 18:46:41 +01:00
|
|
|
uint16_t
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_n_unique_in_tree(
|
|
|
|
/*============================*/
|
|
|
|
const dict_index_t* index) /*!< in: an internal representation
|
|
|
|
of index (in the dictionary cache) */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** The number of fields in the nonleaf page of spatial index, except
|
|
|
|
the page no field. */
|
|
|
|
#define DICT_INDEX_SPATIAL_NODEPTR_SIZE 1
|
|
|
|
/**
|
|
|
|
Gets the number of fields on nonleaf page level in the internal representation
|
|
|
|
of an index which uniquely determine the position of an index entry in the
|
|
|
|
index, if we also take multiversioning into account. Note, it doesn't
|
|
|
|
include page no field.
|
|
|
|
@param[in] index index
|
|
|
|
@return number of fields */
|
|
|
|
UNIV_INLINE
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 18:46:41 +01:00
|
|
|
uint16_t
|
2016-08-12 10:17:45 +02:00
|
|
|
dict_index_get_n_unique_in_tree_nonleaf(
|
|
|
|
const dict_index_t* index)
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the number of user-defined ordering fields in the index. In the internal
|
|
|
|
representation we add the row id to the ordering fields to make all indexes
|
|
|
|
unique, but this function returns the number of fields the user defined
|
|
|
|
in the index as ordering fields.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return number of fields */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
MDEV-21907: InnoDB: Enable -Wconversion on clang and GCC
The -Wconversion in GCC seems to be stricter than in clang.
GCC at least since version 4.4.7 issues truncation warnings for
assignments to bitfields, while clang 10 appears to only issue
warnings when the sizes in bytes rounded to the nearest integer
powers of 2 are different.
Before GCC 10.0.0, -Wconversion required more casts and would not
allow some operations, such as x<<=1 or x+=1 on a data type that
is narrower than int.
GCC 5 (but not GCC 4, GCC 6, or any later version) is complaining
about x|=y even when x and y are compatible types that are narrower
than int. Hence, we must rewrite some x|=y as
x=static_cast<byte>(x|y) or similar, or we must disable -Wconversion.
In GCC 6 and later, the warning for assigning wider to bitfields
that are narrower than 8, 16, or 32 bits can be suppressed by
applying a bitwise & with the exact bitmask of the bitfield.
For older GCC, we must disable -Wconversion for GCC 4 or 5 in such
cases.
The bitwise negation operator appears to promote short integers
to a wider type, and hence we must add explicit truncation casts
around them. Microsoft Visual C does not allow a static_cast to
truncate a constant, such as static_cast<byte>(1) truncating int.
Hence, we will use the constructor-style cast byte(~1) for such cases.
This has been tested at least with GCC 4.8.5, 5.4.0, 7.4.0, 9.2.1, 10.0.0,
clang 9.0.1, 10.0.0, and MSVC 14.22.27905 (Microsoft Visual Studio 2019)
on 64-bit and 32-bit targets (IA-32, AMD64, POWER 8, POWER 9, ARMv8).
2020-03-12 18:46:41 +01:00
|
|
|
uint16_t
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_n_ordering_defined_by_user(
|
|
|
|
/*======================================*/
|
|
|
|
const dict_index_t* index) /*!< in: an internal representation
|
|
|
|
of index (in the dictionary cache) */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/********************************************************************//**
|
|
|
|
Gets the nth field of an index.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return pointer to field object */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
dict_field_t*
|
|
|
|
dict_index_get_nth_field(
|
|
|
|
/*=====================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint pos) /*!< in: position of field */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
#else /* UNIV_DEBUG */
|
|
|
|
# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/********************************************************************//**
|
|
|
|
Gets pointer to the nth column in an index.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return column */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
const dict_col_t*
|
|
|
|
dict_index_get_nth_col(
|
|
|
|
/*===================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint pos) /*!< in: position of the field */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Gets the column number of the nth field in an index.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return column number */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_index_get_nth_col_no(
|
|
|
|
/*======================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint pos) /*!< in: position of the field */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Looks for column n in an index.
|
|
|
|
@return position in internal representation of the index;
|
|
|
|
ULINT_UNDEFINED if not contained */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_index_get_nth_col_pos(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
2014-11-03 10:18:52 +01:00
|
|
|
ulint n, /*!< in: column number */
|
2016-09-06 08:43:16 +02:00
|
|
|
ulint* prefix_col_pos) /*!< out: col num if prefix */
|
|
|
|
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Looks for column n in an index.
|
|
|
|
@param[in] index index
|
|
|
|
@param[in] n column number
|
|
|
|
@param[in] inc_prefix true=consider column prefixes too
|
|
|
|
@param[in] is_virtual true==virtual column
|
2014-02-26 19:11:54 +01:00
|
|
|
@return position in internal representation of the index;
|
|
|
|
ULINT_UNDEFINED if not contained */
|
|
|
|
ulint
|
|
|
|
dict_index_get_nth_col_or_prefix_pos(
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint n, /*!< in: column number */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool inc_prefix, /*!< in: TRUE=consider
|
2014-02-26 19:11:54 +01:00
|
|
|
column prefixes too */
|
2016-08-12 10:17:45 +02:00
|
|
|
bool is_virtual, /*!< in: is a virtual column
|
|
|
|
*/
|
|
|
|
ulint* prefix_col_pos) /*!< out: col num if prefix
|
|
|
|
*/
|
|
|
|
__attribute__((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Looks for a matching field in an index. The column has to be the same. The
|
|
|
|
column in index must be complete, or must contain a prefix longer than the
|
|
|
|
column in index2. That is, we must be able to construct the prefix in index2
|
|
|
|
from the prefix in index.
|
|
|
|
@return position in internal representation of the index;
|
|
|
|
ULINT_UNDEFINED if not contained */
|
|
|
|
ulint
|
|
|
|
dict_index_get_nth_field_pos(
|
|
|
|
/*=========================*/
|
|
|
|
const dict_index_t* index, /*!< in: index from which to search */
|
|
|
|
const dict_index_t* index2, /*!< in: index */
|
|
|
|
ulint n) /*!< in: field number in index2 */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Looks for column n position in the clustered index.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return position in internal representation of the clustered index */
|
2020-03-10 19:05:17 +01:00
|
|
|
unsigned
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_table_get_nth_col_pos(
|
|
|
|
/*=======================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint n, /*!< in: column number */
|
|
|
|
ulint* prefix_col_pos) /*!< out: col num if prefix */
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*******************************************************************//**
|
|
|
|
Adds a column to index. */
|
|
|
|
void
|
|
|
|
dict_index_add_col(
|
|
|
|
/*===============*/
|
|
|
|
dict_index_t* index, /*!< in/out: index */
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
dict_col_t* col, /*!< in: column */
|
|
|
|
ulint prefix_len) /*!< in: column prefix length */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2016-12-30 14:04:10 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*******************************************************************//**
|
|
|
|
Copies types of fields contained in index to tuple. */
|
|
|
|
void
|
|
|
|
dict_index_copy_types(
|
|
|
|
/*==================*/
|
|
|
|
dtuple_t* tuple, /*!< in/out: data tuple */
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint n_fields) /*!< in: number of
|
|
|
|
field types to copy */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the field column.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return field->col, pointer to the table column */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
const dict_col_t*
|
|
|
|
dict_field_get_col(
|
|
|
|
/*===============*/
|
|
|
|
const dict_field_t* field) /*!< in: index field */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-12-30 14:04:10 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Returns an index object if it is found in the dictionary cache.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return index, NULL if not found */
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_t*
|
|
|
|
dict_index_get_if_in_cache_low(
|
|
|
|
/*===========================*/
|
|
|
|
index_id_t index_id) /*!< in: index id */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
MDEV-15053 Reduce buf_pool_t::mutex contention
User-visible changes: The INFORMATION_SCHEMA views INNODB_BUFFER_PAGE
and INNODB_BUFFER_PAGE_LRU will report a dummy value FLUSH_TYPE=0
and will no longer report the PAGE_STATE value READY_FOR_USE.
We will remove some fields from buf_page_t and move much code to
member functions of buf_pool_t and buf_page_t, so that the access
rules of data members can be enforced consistently.
Evicting or adding pages in buf_pool.LRU will remain covered by
buf_pool.mutex.
Evicting or adding pages in buf_pool.page_hash will remain
covered by both buf_pool.mutex and the buf_pool.page_hash X-latch.
After this fix, buf_pool.page_hash lookups can entirely
avoid acquiring buf_pool.mutex, only relying on
buf_pool.hash_lock_get() S-latch.
Similarly, buf_flush_check_neighbors() can will rely solely on
buf_pool.mutex, no buf_pool.page_hash latch at all.
The buf_pool.mutex is rather contended in I/O heavy benchmarks,
especially when the workload does not fit in the buffer pool.
The first attempt to alleviate the contention was the
buf_pool_t::mutex split in
commit 4ed7082eefe56b3e97e0edefb3df76dd7ef5e858
which introduced buf_block_t::mutex, which we are now removing.
Later, multiple instances of buf_pool_t were introduced
in commit c18084f71b02ea707c6461353e6cfc15d7553bc6
and recently removed by us in
commit 1a6f708ec594ac0ae2dd30db926ab07b100fa24b (MDEV-15058).
UNIV_BUF_DEBUG: Remove. This option to enable some buffer pool
related debugging in otherwise non-debug builds has not been used
for years. Instead, we have been using UNIV_DEBUG, which is enabled
in CMAKE_BUILD_TYPE=Debug.
buf_block_t::mutex, buf_pool_t::zip_mutex: Remove. We can mainly rely on
std::atomic and the buf_pool.page_hash latches, and in some cases
depend on buf_pool.mutex or buf_pool.flush_list_mutex just like before.
We must always release buf_block_t::lock before invoking
unfix() or io_unfix(), to prevent a glitch where a block that was
added to the buf_pool.free list would apper X-latched. See
commit c5883debd6ef440a037011c11873b396923e93c5 how this glitch
was finally caught in a debug environment.
We move some buf_pool_t::page_hash specific code from the
ha and hash modules to buf_pool, for improved readability.
buf_pool_t::close(): Assert that all blocks are clean, except
on aborted startup or crash-like shutdown.
buf_pool_t::validate(): No longer attempt to validate
n_flush[] against the number of BUF_IO_WRITE fixed blocks,
because buf_page_t::flush_type no longer exists.
buf_pool_t::watch_set(): Replaces buf_pool_watch_set().
Reduce mutex contention by separating the buf_pool.watch[]
allocation and the insert into buf_pool.page_hash.
buf_pool_t::page_hash_lock<bool exclusive>(): Acquire a
buf_pool.page_hash latch.
Replaces and extends buf_page_hash_lock_s_confirm()
and buf_page_hash_lock_x_confirm().
buf_pool_t::READ_AHEAD_PAGES: Renamed from BUF_READ_AHEAD_PAGES.
buf_pool_t::curr_size, old_size, read_ahead_area, n_pend_reads:
Use Atomic_counter.
buf_pool_t::running_out(): Replaces buf_LRU_buf_pool_running_out().
buf_pool_t::LRU_remove(): Remove a block from the LRU list
and return its predecessor. Incorporates buf_LRU_adjust_hp(),
which was removed.
buf_page_get_gen(): Remove a redundant call of fsp_is_system_temporary(),
for mode == BUF_GET_IF_IN_POOL_OR_WATCH, which is only used by
BTR_DELETE_OP (purge), which is never invoked on temporary tables.
buf_free_from_unzip_LRU_list_batch(): Avoid redundant assignments.
buf_LRU_free_from_unzip_LRU_list(): Simplify the loop condition.
buf_LRU_free_page(): Clarify the function comment.
buf_flush_check_neighbor(), buf_flush_check_neighbors():
Rewrite the construction of the page hash range. We will hold
the buf_pool.mutex for up to buf_pool.read_ahead_area (at most 64)
consecutive lookups of buf_pool.page_hash.
buf_flush_page_and_try_neighbors(): Remove.
Merge to its only callers, and remove redundant operations in
buf_flush_LRU_list_batch().
buf_read_ahead_random(), buf_read_ahead_linear(): Rewrite.
Do not acquire buf_pool.mutex, and iterate directly with page_id_t.
ut_2_power_up(): Remove. my_round_up_to_next_power() is inlined
and avoids any loops.
fil_page_get_prev(), fil_page_get_next(), fil_addr_is_null(): Remove.
buf_flush_page(): Add a fil_space_t* parameter. Minimize the
buf_pool.mutex hold time. buf_pool.n_flush[] is no longer updated
atomically with the io_fix, and we will protect most buf_block_t
fields with buf_block_t::lock. The function
buf_flush_write_block_low() is removed and merged here.
buf_page_init_for_read(): Use static linkage. Initialize the newly
allocated block and acquire the exclusive buf_block_t::lock while not
holding any mutex.
IORequest::IORequest(): Remove the body. We only need to invoke
set_punch_hole() in buf_flush_page() and nowhere else.
buf_page_t::flush_type: Remove. Replaced by IORequest::flush_type.
This field is only used during a fil_io() call.
That function already takes IORequest as a parameter, so we had
better introduce for the rarely changing field.
buf_block_t::init(): Replaces buf_page_init().
buf_page_t::init(): Replaces buf_page_init_low().
buf_block_t::initialise(): Initialise many fields, but
keep the buf_page_t::state(). Both buf_pool_t::validate() and
buf_page_optimistic_get() requires that buf_page_t::in_file()
be protected atomically with buf_page_t::in_page_hash
and buf_page_t::in_LRU_list.
buf_page_optimistic_get(): Now that buf_block_t::mutex
no longer exists, we must check buf_page_t::io_fix()
after acquiring the buf_pool.page_hash lock, to detect
whether buf_page_init_for_read() has been initiated.
We will also check the io_fix() before acquiring hash_lock
in order to avoid unnecessary computation.
The field buf_block_t::modify_clock (protected by buf_block_t::lock)
allows buf_page_optimistic_get() to validate the block.
buf_page_t::real_size: Remove. It was only used while flushing
pages of page_compressed tables.
buf_page_encrypt(): Add an output parameter that allows us ot eliminate
buf_page_t::real_size. Replace a condition with debug assertion.
buf_page_should_punch_hole(): Remove.
buf_dblwr_t::add_to_batch(): Replaces buf_dblwr_add_to_batch().
Add the parameter size (to replace buf_page_t::real_size).
buf_dblwr_t::write_single_page(): Replaces buf_dblwr_write_single_page().
Add the parameter size (to replace buf_page_t::real_size).
fil_system_t::detach(): Replaces fil_space_detach().
Ensure that fil_validate() will not be violated even if
fil_system.mutex is released and reacquired.
fil_node_t::complete_io(): Renamed from fil_node_complete_io().
fil_node_t::close_to_free(): Replaces fil_node_close_to_free().
Avoid invoking fil_node_t::close() because fil_system.n_open
has already been decremented in fil_space_t::detach().
BUF_BLOCK_READY_FOR_USE: Remove. Directly use BUF_BLOCK_MEMORY.
BUF_BLOCK_ZIP_DIRTY: Remove. Directly use BUF_BLOCK_ZIP_PAGE,
and distinguish dirty pages by buf_page_t::oldest_modification().
BUF_BLOCK_POOL_WATCH: Remove. Use BUF_BLOCK_NOT_USED instead.
This state was only being used for buf_page_t that are in
buf_pool.watch.
buf_pool_t::watch[]: Remove pointer indirection.
buf_page_t::in_flush_list: Remove. It was set if and only if
buf_page_t::oldest_modification() is nonzero.
buf_page_decrypt_after_read(), buf_corrupt_page_release(),
buf_page_check_corrupt(): Change the const fil_space_t* parameter
to const fil_node_t& so that we can report the correct file name.
buf_page_monitor(): Declare as an ATTRIBUTE_COLD global function.
buf_page_io_complete(): Split to buf_page_read_complete() and
buf_page_write_complete().
buf_dblwr_t::in_use: Remove.
buf_dblwr_t::buf_block_array: Add IORequest::flush_t.
buf_dblwr_sync_datafiles(): Remove. It was a useless wrapper of
os_aio_wait_until_no_pending_writes().
buf_flush_write_complete(): Declare static, not global.
Add the parameter IORequest::flush_t.
buf_flush_freed_page(): Simplify the code.
recv_sys_t::flush_lru: Renamed from flush_type and changed to bool.
fil_read(), fil_write(): Replaced with direct use of fil_io().
fil_buffering_disabled(): Remove. Check srv_file_flush_method directly.
fil_mutex_enter_and_prepare_for_io(): Return the resolved
fil_space_t* to avoid a duplicated lookup in the caller.
fil_report_invalid_page_access(): Clean up the parameters.
fil_io(): Return fil_io_t, which comprises fil_node_t and error code.
Always invoke fil_space_t::acquire_for_io() and let either the
sync=true caller or fil_aio_callback() invoke
fil_space_t::release_for_io().
fil_aio_callback(): Rewrite to replace buf_page_io_complete().
fil_check_pending_operations(): Remove a parameter, and remove some
redundant lookups.
fil_node_close_to_free(): Wait for n_pending==0. Because we no longer
do an extra lookup of the tablespace between fil_io() and the
completion of the operation, we must give fil_node_t::complete_io() a
chance to decrement the counter.
fil_close_tablespace(): Remove unused parameter trx, and document
that this is only invoked during the error handling of IMPORT TABLESPACE.
row_import_discard_changes(): Merged with the only caller,
row_import_cleanup(). Do not lock up the data dictionary while
invoking fil_close_tablespace().
logs_empty_and_mark_files_at_shutdown(): Do not invoke
fil_close_all_files(), to avoid a !needs_flush assertion failure
on fil_node_t::close().
innodb_shutdown(): Invoke os_aio_free() before fil_close_all_files().
fil_close_all_files(): Invoke fil_flush_file_spaces()
to ensure proper durability.
thread_pool::unbind(): Fix a crash that would occur on Windows
after srv_thread_pool->disable_aio() and os_file_close().
This fix was submitted by Vladislav Vaintroub.
Thanks to Matthias Leich and Axel Schwenke for extensive testing,
Vladislav Vaintroub for helpful comments, and Eugene Kosov for a review.
2020-06-05 11:35:46 +02:00
|
|
|
#ifdef UNIV_DEBUG
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Returns an index object if it is found in the dictionary cache.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return index, NULL if not found */
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_t*
|
|
|
|
dict_index_get_if_in_cache(
|
|
|
|
/*=======================*/
|
|
|
|
index_id_t index_id) /*!< in: index id */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/**********************************************************************//**
|
|
|
|
Checks that a tuple has n_fields_cmp value in a sensible range, so that
|
|
|
|
no comparison can occur with the page number field in a node pointer.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if ok */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
dict_index_check_search_tuple(
|
|
|
|
/*==========================*/
|
|
|
|
const dict_index_t* index, /*!< in: index tree */
|
|
|
|
const dtuple_t* tuple) /*!< in: tuple used in a search */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Whether and when to allow temporary index names */
|
|
|
|
enum check_name {
|
|
|
|
/** Require all indexes to be complete. */
|
|
|
|
CHECK_ALL_COMPLETE,
|
|
|
|
/** Allow aborted online index creation. */
|
|
|
|
CHECK_ABORTED_OK,
|
|
|
|
/** Allow partial indexes to exist. */
|
|
|
|
CHECK_PARTIAL_OK
|
|
|
|
};
|
|
|
|
/**********************************************************************//**
|
|
|
|
Check for duplicate index entries in a table [using the index name] */
|
|
|
|
void
|
|
|
|
dict_table_check_for_dup_indexes(
|
|
|
|
/*=============================*/
|
|
|
|
const dict_table_t* table, /*!< in: Check for dup indexes
|
|
|
|
in this table */
|
|
|
|
enum check_name check) /*!< in: whether and when to allow
|
|
|
|
temporary index names */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
/**********************************************************************//**
|
|
|
|
Builds a node pointer out of a physical record and a page number.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return own: node pointer */
|
2014-02-26 19:11:54 +01:00
|
|
|
dtuple_t*
|
|
|
|
dict_index_build_node_ptr(
|
|
|
|
/*======================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
const rec_t* rec, /*!< in: record for which to build node
|
|
|
|
pointer */
|
|
|
|
ulint page_no,/*!< in: page number to put in node
|
|
|
|
pointer */
|
|
|
|
mem_heap_t* heap, /*!< in: memory heap where pointer
|
|
|
|
created */
|
|
|
|
ulint level) /*!< in: level of rec in tree:
|
|
|
|
0 means leaf level */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2017-09-20 21:34:20 +02:00
|
|
|
/** Convert a physical record into a search tuple.
|
|
|
|
@param[in] rec index record (not necessarily in an index page)
|
|
|
|
@param[in] index index
|
|
|
|
@param[in] leaf whether rec is in a leaf page
|
|
|
|
@param[in] n_fields number of data fields
|
|
|
|
@param[in,out] heap memory heap for allocation
|
2016-08-12 10:17:45 +02:00
|
|
|
@return own: data tuple */
|
2014-02-26 19:11:54 +01:00
|
|
|
dtuple_t*
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 06:00:05 +02:00
|
|
|
dict_index_build_data_tuple(
|
2017-09-20 21:34:20 +02:00
|
|
|
const rec_t* rec,
|
|
|
|
const dict_index_t* index,
|
|
|
|
bool leaf,
|
|
|
|
ulint n_fields,
|
|
|
|
mem_heap_t* heap)
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2017-09-20 21:34:20 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Gets the page number of the root of the index tree.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return page number */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
2020-03-10 19:05:17 +01:00
|
|
|
uint32_t
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_get_page(
|
|
|
|
/*================*/
|
|
|
|
const dict_index_t* tree) /*!< in: index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Returns free space reserved for future updates of records. This is
|
|
|
|
relevant only in the case of many consecutive inserts, as updates
|
|
|
|
which make the records bigger might fragment the index.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return number of free bytes on page, reserved for updates */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_index_get_space_reserve(void);
|
|
|
|
/*==============================*/
|
|
|
|
|
|
|
|
/* Online index creation @{ */
|
|
|
|
/********************************************************************//**
|
|
|
|
Gets the status of online index creation.
|
|
|
|
@return the status */
|
|
|
|
UNIV_INLINE
|
|
|
|
enum online_index_status
|
|
|
|
dict_index_get_online_status(
|
|
|
|
/*=========================*/
|
|
|
|
const dict_index_t* index) /*!< in: secondary index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Sets the status of online index creation. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_index_set_online_status(
|
|
|
|
/*=========================*/
|
|
|
|
dict_index_t* index, /*!< in/out: index */
|
|
|
|
enum online_index_status status) /*!< in: status */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Determines if a secondary index is being or has been created online,
|
|
|
|
or if the table is being rebuilt online, allowing concurrent modifications
|
|
|
|
to the table.
|
|
|
|
@retval true if the index is being or has been built online, or
|
|
|
|
if this is a clustered index and the table is being or has been rebuilt online
|
|
|
|
@retval false if the index has been created or the table has been
|
|
|
|
rebuilt completely */
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_index_is_online_ddl(
|
|
|
|
/*=====================*/
|
|
|
|
const dict_index_t* index) /*!< in: index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Calculates the minimum record length in an index. */
|
|
|
|
ulint
|
|
|
|
dict_index_calc_min_rec_len(
|
|
|
|
/*========================*/
|
|
|
|
const dict_index_t* index) /*!< in: index */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-12-22 15:53:17 +01:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Checks if the database name in two table names is the same.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return TRUE if same db name */
|
2014-02-26 19:11:54 +01:00
|
|
|
ibool
|
|
|
|
dict_tables_have_same_db(
|
|
|
|
/*=====================*/
|
|
|
|
const char* name1, /*!< in: table name in the form
|
|
|
|
dbname '/' tablename */
|
|
|
|
const char* name2) /*!< in: table name in the form
|
|
|
|
dbname '/' tablename */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Get an index by name.
|
|
|
|
@param[in] table the table where to look for the index
|
|
|
|
@param[in] name the index name to look for
|
|
|
|
@return index, NULL if does not exist */
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_t*
|
2019-03-07 16:50:13 +01:00
|
|
|
dict_table_get_index_on_name(dict_table_t* table, const char* name)
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Get an index by name.
|
|
|
|
@param[in] table the table where to look for the index
|
|
|
|
@param[in] name the index name to look for
|
|
|
|
@return index, NULL if does not exist */
|
|
|
|
inline
|
|
|
|
const dict_index_t*
|
2019-03-07 16:50:13 +01:00
|
|
|
dict_table_get_index_on_name(const dict_table_t* table, const char* name)
|
2016-08-12 10:17:45 +02:00
|
|
|
{
|
2019-03-07 16:50:13 +01:00
|
|
|
return dict_table_get_index_on_name(const_cast<dict_table_t*>(table),
|
|
|
|
name);
|
2016-08-12 10:17:45 +02:00
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/***************************************************************
|
|
|
|
Check whether a column exists in an FTS index. */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_table_is_fts_column(
|
|
|
|
/*=====================*/
|
|
|
|
/* out: ULINT_UNDEFINED if no match else
|
|
|
|
the offset within the vector */
|
|
|
|
ib_vector_t* indexes,/* in: vector containing only FTS indexes */
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint col_no, /* in: col number to search for */
|
|
|
|
bool is_virtual)/*!< in: whether it is a virtual column */
|
2016-09-06 08:43:16 +02:00
|
|
|
MY_ATTRIBUTE((warn_unused_result));
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Looks for an index with the given id given a table instance.
|
|
|
|
@param[in] table table instance
|
|
|
|
@param[in] id index id
|
|
|
|
@return index or NULL */
|
|
|
|
dict_index_t*
|
|
|
|
dict_table_find_index_on_id(
|
|
|
|
const dict_table_t* table,
|
|
|
|
index_id_t id)
|
|
|
|
MY_ATTRIBUTE((nonnull(1)));
|
2016-09-06 08:43:16 +02:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/** Maximum number of columns in a foreign key constraint. Please Note MySQL
|
|
|
|
has a much lower limit on the number of columns allowed in a foreign key
|
|
|
|
constraint */
|
|
|
|
#define MAX_NUM_FK_COLUMNS 500
|
|
|
|
|
|
|
|
/* Buffers for storing detailed information about the latest foreign key
|
|
|
|
and unique key errors */
|
2016-08-12 10:17:45 +02:00
|
|
|
extern FILE* dict_foreign_err_file;
|
2020-12-04 18:02:58 +01:00
|
|
|
extern mysql_mutex_t dict_foreign_err_mutex;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2019-05-17 13:32:53 +02:00
|
|
|
/** InnoDB data dictionary cache */
|
|
|
|
class dict_sys_t
|
|
|
|
{
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** The my_hrtime_coarse().val of the oldest lock_wait() start, or 0 */
|
|
|
|
std::atomic<ulonglong> latch_ex_wait_start;
|
2020-12-04 18:02:58 +01:00
|
|
|
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
/** the rw-latch protecting the data dictionary cache */
|
2022-04-14 09:40:26 +02:00
|
|
|
alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_lock latch;
|
2020-11-20 08:31:27 +01:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/** whether latch is being held in exclusive mode (by any thread) */
|
2022-11-08 16:34:34 +01:00
|
|
|
Atomic_relaxed<pthread_t> latch_ex;
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** number of S-latch holders */
|
|
|
|
Atomic_counter<uint32_t> latch_readers;
|
2020-11-20 08:31:27 +01:00
|
|
|
#endif
|
2019-05-17 13:32:53 +02:00
|
|
|
public:
|
2021-05-20 13:58:25 +02:00
|
|
|
/** Indexes of SYS_TABLE[] */
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
SYS_TABLES= 0,
|
|
|
|
SYS_INDEXES,
|
|
|
|
SYS_COLUMNS,
|
|
|
|
SYS_FIELDS,
|
|
|
|
SYS_FOREIGN,
|
|
|
|
SYS_FOREIGN_COLS,
|
|
|
|
SYS_VIRTUAL
|
|
|
|
};
|
|
|
|
/** System table names */
|
|
|
|
static const span<const char> SYS_TABLE[];
|
|
|
|
|
|
|
|
/** all tables (persistent and temporary), hashed by name */
|
|
|
|
hash_table_t table_hash;
|
|
|
|
/** hash table of persistent table IDs */
|
|
|
|
hash_table_t table_id_hash;
|
|
|
|
|
|
|
|
/** the SYS_TABLES table */
|
|
|
|
dict_table_t *sys_tables;
|
|
|
|
/** the SYS_COLUMNS table */
|
|
|
|
dict_table_t *sys_columns;
|
|
|
|
/** the SYS_INDEXES table */
|
|
|
|
dict_table_t *sys_indexes;
|
|
|
|
/** the SYS_FIELDS table */
|
|
|
|
dict_table_t *sys_fields;
|
|
|
|
/** the SYS_FOREIGN table */
|
|
|
|
dict_table_t *sys_foreign;
|
|
|
|
/** the SYS_FOREIGN_COLS table */
|
|
|
|
dict_table_t *sys_foreign_cols;
|
|
|
|
/** the SYS_VIRTUAL table */
|
|
|
|
dict_table_t *sys_virtual;
|
|
|
|
|
|
|
|
/** @return whether all non-hard-coded system tables exist */
|
|
|
|
bool sys_tables_exist() const
|
|
|
|
{ return UNIV_LIKELY(sys_foreign && sys_foreign_cols && sys_virtual); }
|
|
|
|
|
|
|
|
/** list of persistent tables that can be evicted */
|
|
|
|
UT_LIST_BASE_NODE_T(dict_table_t) table_LRU;
|
|
|
|
/** list of persistent tables that cannot be evicted */
|
|
|
|
UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU;
|
2020-11-20 11:28:47 +01:00
|
|
|
|
2019-05-17 13:32:53 +02:00
|
|
|
private:
|
2020-11-20 08:31:27 +01:00
|
|
|
bool m_initialised= false;
|
|
|
|
/** the sequence of temporary table IDs */
|
|
|
|
std::atomic<table_id_t> temp_table_id{DICT_HDR_FIRST_ID};
|
2020-11-20 11:28:47 +01:00
|
|
|
/** hash table of temporary table IDs */
|
|
|
|
hash_table_t temp_id_hash;
|
|
|
|
/** the next value of DB_ROW_ID, backed by DICT_HDR_ROW_ID
|
|
|
|
(FIXME: remove this, and move to dict_table_t) */
|
|
|
|
Atomic_relaxed<row_id_t> row_id;
|
|
|
|
/** The synchronization interval of row_id */
|
|
|
|
static constexpr size_t ROW_ID_WRITE_MARGIN= 256;
|
2019-05-17 13:32:53 +02:00
|
|
|
public:
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** Diagnostic message for exceeding the lock_wait() timeout */
|
2020-12-04 18:02:58 +01:00
|
|
|
static const char fatal_msg[];
|
|
|
|
|
2020-11-20 11:28:47 +01:00
|
|
|
/** @return A new value for GEN_CLUST_INDEX(DB_ROW_ID) */
|
|
|
|
inline row_id_t get_new_row_id();
|
|
|
|
|
|
|
|
/** Ensure that row_id is not smaller than id, on IMPORT TABLESPACE */
|
|
|
|
inline void update_row_id(row_id_t id);
|
|
|
|
|
|
|
|
/** Recover the global DB_ROW_ID sequence on database startup */
|
|
|
|
void recover_row_id(row_id_t id)
|
|
|
|
{
|
|
|
|
row_id= ut_uint64_align_up(id, ROW_ID_WRITE_MARGIN) + ROW_ID_WRITE_MARGIN;
|
|
|
|
}
|
|
|
|
|
2021-05-20 13:58:25 +02:00
|
|
|
/** @return a new temporary table ID */
|
|
|
|
table_id_t acquire_temporary_table_id()
|
|
|
|
{
|
|
|
|
return temp_table_id.fetch_add(1, std::memory_order_relaxed);
|
|
|
|
}
|
2018-11-22 14:36:50 +01:00
|
|
|
|
2021-05-20 13:58:25 +02:00
|
|
|
/** Look up a temporary table.
|
|
|
|
@param id temporary table ID
|
|
|
|
@return temporary table
|
|
|
|
@retval nullptr if the table does not exist
|
|
|
|
(should only happen during the rollback of CREATE...SELECT) */
|
|
|
|
dict_table_t *acquire_temporary_table(table_id_t id)
|
|
|
|
{
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_ad(frozen());
|
2021-05-20 13:58:25 +02:00
|
|
|
dict_table_t *table;
|
|
|
|
ulint fold = ut_fold_ull(id);
|
|
|
|
HASH_SEARCH(id_hash, &temp_id_hash, fold, dict_table_t*, table,
|
|
|
|
ut_ad(table->cached), table->id == id);
|
|
|
|
if (UNIV_LIKELY(table != nullptr))
|
|
|
|
{
|
|
|
|
DBUG_ASSERT(table->is_temporary());
|
|
|
|
DBUG_ASSERT(table->id >= DICT_HDR_FIRST_ID);
|
|
|
|
table->acquire();
|
|
|
|
}
|
|
|
|
return table;
|
|
|
|
}
|
2018-11-22 14:36:50 +01:00
|
|
|
|
2021-05-20 13:58:25 +02:00
|
|
|
/** Look up a persistent table.
|
|
|
|
@param id table ID
|
|
|
|
@return table
|
|
|
|
@retval nullptr if not cached */
|
|
|
|
dict_table_t *find_table(table_id_t id)
|
|
|
|
{
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_ad(frozen());
|
2021-05-20 13:58:25 +02:00
|
|
|
dict_table_t *table;
|
2021-06-14 08:15:20 +02:00
|
|
|
ulint fold= ut_fold_ull(id);
|
2021-05-20 13:58:25 +02:00
|
|
|
HASH_SEARCH(id_hash, &table_id_hash, fold, dict_table_t*, table,
|
|
|
|
ut_ad(table->cached), table->id == id);
|
|
|
|
DBUG_ASSERT(!table || !table->is_temporary());
|
|
|
|
return table;
|
|
|
|
}
|
2018-11-22 14:36:50 +01:00
|
|
|
|
2019-05-17 13:32:53 +02:00
|
|
|
bool is_initialised() const { return m_initialised; }
|
|
|
|
|
|
|
|
/** Initialise the data dictionary cache. */
|
|
|
|
void create();
|
|
|
|
|
|
|
|
/** Close the data dictionary cache on shutdown. */
|
|
|
|
void close();
|
|
|
|
|
|
|
|
/** Resize the hash tables based on the current buffer pool size. */
|
|
|
|
void resize();
|
|
|
|
|
|
|
|
/** Add a table definition to the data dictionary cache */
|
|
|
|
inline void add(dict_table_t* table);
|
|
|
|
/** Remove a table definition from the data dictionary cache.
|
|
|
|
@param[in,out] table cached table definition to be evicted
|
|
|
|
@param[in] lru whether this is part of least-recently-used evictiono
|
|
|
|
@param[in] keep whether to keep (not free) the object */
|
|
|
|
void remove(dict_table_t* table, bool lru = false, bool keep = false);
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/** Find a table */
|
2021-05-20 13:58:25 +02:00
|
|
|
template <bool in_lru> bool find(const dict_table_t *table)
|
2019-05-17 13:32:53 +02:00
|
|
|
{
|
|
|
|
ut_ad(table);
|
|
|
|
ut_ad(table->can_be_evicted == in_lru);
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_ad(frozen());
|
2021-05-20 13:58:25 +02:00
|
|
|
for (const dict_table_t* t= in_lru ? table_LRU.start : table_non_LRU.start;
|
|
|
|
t; t = UT_LIST_GET_NEXT(table_LRU, t))
|
2019-05-17 13:32:53 +02:00
|
|
|
{
|
|
|
|
if (t == table) return true;
|
|
|
|
ut_ad(t->can_be_evicted == in_lru);
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/** Find a table */
|
2021-05-20 13:58:25 +02:00
|
|
|
bool find(const dict_table_t *table)
|
2019-05-17 13:32:53 +02:00
|
|
|
{
|
|
|
|
return table->can_be_evicted ? find<true>(table) : find<false>(table);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2022-06-27 15:51:53 +02:00
|
|
|
/** Move a table to the non-LRU list from the LRU list. */
|
|
|
|
void prevent_eviction(dict_table_t *table)
|
2019-05-17 13:32:53 +02:00
|
|
|
{
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_d(locked());
|
2019-05-17 13:32:53 +02:00
|
|
|
ut_ad(find(table));
|
2021-08-31 12:48:10 +02:00
|
|
|
if (!table->can_be_evicted)
|
2022-06-27 15:51:53 +02:00
|
|
|
return;
|
2021-08-31 12:48:10 +02:00
|
|
|
table->can_be_evicted= false;
|
|
|
|
UT_LIST_REMOVE(table_LRU, table);
|
|
|
|
UT_LIST_ADD_LAST(table_non_LRU, table);
|
|
|
|
}
|
|
|
|
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/** @return whether any thread (not necessarily the current thread)
|
|
|
|
is holding the latch; that is, this check may return false
|
|
|
|
positives */
|
2022-11-08 16:34:34 +01:00
|
|
|
bool frozen() const { return latch_readers || latch_ex; }
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** @return whether any thread (not necessarily the current thread)
|
2022-11-08 16:34:34 +01:00
|
|
|
is holding a shared latch */
|
|
|
|
bool frozen_not_locked() const { return latch_readers; }
|
|
|
|
/** @return whether the current thread holds the exclusive latch */
|
|
|
|
bool locked() const { return latch_ex == pthread_self(); }
|
2019-05-17 14:17:37 +02:00
|
|
|
#endif
|
2020-12-04 18:02:58 +01:00
|
|
|
private:
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** Acquire the exclusive latch */
|
|
|
|
ATTRIBUTE_NOINLINE
|
|
|
|
void lock_wait(SRW_LOCK_ARGS(const char *file, unsigned line));
|
2020-12-04 18:02:58 +01:00
|
|
|
public:
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** @return the my_hrtime_coarse().val of the oldest lock_wait() start,
|
2020-12-04 18:02:58 +01:00
|
|
|
assuming that requests are served on a FIFO basis */
|
|
|
|
ulonglong oldest_wait() const
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
{ return latch_ex_wait_start.load(std::memory_order_relaxed); }
|
2020-12-04 18:02:58 +01:00
|
|
|
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** Exclusively lock the dictionary cache. */
|
|
|
|
void lock(SRW_LOCK_ARGS(const char *file, unsigned line))
|
|
|
|
{
|
|
|
|
if (latch.wr_lock_try())
|
|
|
|
{
|
|
|
|
ut_ad(!latch_readers);
|
|
|
|
ut_ad(!latch_ex);
|
2022-11-08 16:34:34 +01:00
|
|
|
ut_d(latch_ex= pthread_self());
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
}
|
|
|
|
else
|
|
|
|
lock_wait(SRW_LOCK_ARGS(file, line));
|
2022-04-11 13:41:24 +02:00
|
|
|
}
|
2019-05-17 14:17:37 +02:00
|
|
|
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
#ifdef UNIV_PFS_RWLOCK
|
|
|
|
/** Unlock the data dictionary cache. */
|
|
|
|
ATTRIBUTE_NOINLINE void unlock();
|
|
|
|
/** Acquire a shared lock on the dictionary cache. */
|
|
|
|
ATTRIBUTE_NOINLINE void freeze(const char *file, unsigned line);
|
|
|
|
/** Release a shared lock on the dictionary cache. */
|
|
|
|
ATTRIBUTE_NOINLINE void unfreeze();
|
|
|
|
#else
|
2019-05-17 14:17:37 +02:00
|
|
|
/** Unlock the data dictionary cache. */
|
|
|
|
void unlock()
|
|
|
|
{
|
2022-11-08 16:34:34 +01:00
|
|
|
ut_ad(latch_ex == pthread_self());
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_ad(!latch_readers);
|
2022-11-08 16:34:34 +01:00
|
|
|
ut_d(latch_ex= 0);
|
2020-11-20 08:31:27 +01:00
|
|
|
latch.wr_unlock();
|
2019-05-17 14:17:37 +02:00
|
|
|
}
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
/** Acquire a shared lock on the dictionary cache. */
|
|
|
|
void freeze()
|
|
|
|
{
|
|
|
|
latch.rd_lock();
|
|
|
|
ut_ad(!latch_ex);
|
|
|
|
ut_d(latch_readers++);
|
|
|
|
}
|
|
|
|
/** Release a shared lock on the dictionary cache. */
|
|
|
|
void unfreeze()
|
|
|
|
{
|
|
|
|
ut_ad(!latch_ex);
|
|
|
|
ut_ad(latch_readers--);
|
|
|
|
latch.rd_unlock();
|
|
|
|
}
|
|
|
|
#endif
|
2019-07-03 16:31:20 +02:00
|
|
|
|
|
|
|
/** Estimate the used memory occupied by the data dictionary
|
|
|
|
table and index objects.
|
|
|
|
@return number of bytes occupied */
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
TPOOL_SUPPRESS_TSAN ulint rough_size() const
|
2019-07-03 16:31:20 +02:00
|
|
|
{
|
MDEV-25919: Lock tables before acquiring dict_sys.latch
In commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 (MDEV-25506 part 3)
we introduced a "fake instant timeout" when a transaction would wait
for a table or record lock while holding dict_sys.latch. This prevented
a deadlock of the server but could cause bogus errors for operations
on the InnoDB persistent statistics tables.
A better fix is to ensure that whenever a transaction is being
executed in the InnoDB internal SQL parser (which will for now
require dict_sys.latch to be held), it will already have acquired
all locks that could be required for the execution. So, we will
acquire the following locks upfront, before acquiring dict_sys.latch:
(1) MDL on the affected user table (acquired by the SQL layer)
(2) If applicable (not for RENAME TABLE): InnoDB table lock
(3) If persistent statistics are going to be modified:
(3.a) MDL_SHARED on mysql.innodb_table_stats, mysql.innodb_index_stats
(3.b) exclusive table locks on the statistics tables
(4) Exclusive table locks on the InnoDB data dictionary tables
(not needed in ANALYZE TABLE and the like)
Note: Acquiring exclusive locks on the statistics tables may cause
more locking conflicts between concurrent DDL operations.
Notably, RENAME TABLE will lock the statistics tables
even if no persistent statistics are enabled for the table.
DROP DATABASE will only acquire locks on statistics tables if
persistent statistics are enabled for the tables on which the
SQL layer is invoking ha_innobase::delete_table().
For any "garbage collection" in innodb_drop_database(), a timeout
while acquiring locks on the statistics tables will result in any
statistics not being deleted for any tables that the SQL layer
did not know about.
If innodb_defragment=ON, information may be written to the statistics
tables even for tables for which InnoDB persistent statistics are
disabled. But, DROP TABLE will no longer attempt to delete that
information if persistent statistics are not enabled for the table.
This change should also fix the hangs related to InnoDB persistent
statistics and STATS_AUTO_RECALC (MDEV-15020) as well as
a bug that running ALTER TABLE on the statistics tables
concurrently with running ALTER TABLE on InnoDB tables could
cause trouble.
lock_rec_enqueue_waiting(), lock_table_enqueue_waiting():
Do not issue a fake instant timeout error when the transaction
is holding dict_sys.latch. Instead, assert that the dict_sys.latch
is never being held here.
lock_sys_tables(): A new function to acquire exclusive locks on all
dictionary tables, in case DROP TABLE or similar operation is
being executed. Locking non-hard-coded tables is optional to avoid
a crash in row_merge_drop_temp_indexes(). The SYS_VIRTUAL table was
introduced in MySQL 5.7 and MariaDB Server 10.2. Normally, we require
all these dictionary tables to exist before executing any DDL, but
the function row_merge_drop_temp_indexes() is an exception.
When upgrading from MariaDB Server 10.1 or MySQL 5.6 or earlier,
the table SYS_VIRTUAL would not exist at this point.
ha_innobase::commit_inplace_alter_table(): Invoke
log_write_up_to() while not holding dict_sys.latch.
dict_sys_t::remove(), dict_table_close(): No longer try to
drop index stubs that were left behind by aborted online ADD INDEX.
Such indexes should be dropped from the InnoDB data dictionary by
row_merge_drop_indexes() as part of the failed DDL operation.
Stubs for aborted indexes may only be left behind in the
data dictionary cache.
dict_stats_fetch_from_ps(): Use a normal read-only transaction.
ha_innobase::delete_table(), ha_innobase::truncate(), fts_lock_table():
While waiting for purge to stop using the table,
do not hold dict_sys.latch.
ha_innobase::delete_table(): Implement a work-around for the rollback
of ALTER TABLE...ADD PARTITION. MDL_EXCLUSIVE would not be held if
ALTER TABLE hits lock_wait_timeout while trying to upgrade the MDL
due to a conflicting LOCK TABLES, such as in the first ALTER TABLE
in the test case of Bug#53676 in parts.partition_special_innodb.
Therefore, we must explicitly stop purge, because it would not be
stopped by MDL.
dict_stats_func(), btr_defragment_chunk(): Allocate a THD so that
we can acquire MDL on the InnoDB persistent statistics tables.
mysqltest_embedded: Invoke ha_pre_shutdown() before free_used_memory()
in order to avoid ASAN heap-use-after-free related to acquire_thd().
trx_t::dict_operation_lock_mode: Changed the type to bool.
row_mysql_lock_data_dictionary(), row_mysql_unlock_data_dictionary():
Implemented as macros.
rollback_inplace_alter_table(): Apply an infinite timeout to lock waits.
innodb_thd_increment_pending_ops(): Wrapper for
thd_increment_pending_ops(). Never attempt async operation for
InnoDB background threads, such as the trx_t::commit() in
dict_stats_process_entry_from_recalc_pool().
lock_sys_t::cancel(trx_t*): Make dictionary transactions immune to KILL.
lock_wait(): Make dictionary transactions immune to KILL, and to
lock wait timeout when waiting for locks on dictionary tables.
parts.partition_special_innodb: Use lock_wait_timeout=0 to instantly
get ER_LOCK_WAIT_TIMEOUT.
main.mdl: Filter out MDL on InnoDB persistent statistics tables
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:44 +02:00
|
|
|
/* No latch; this is a very crude approximation anyway */
|
2019-07-03 16:31:20 +02:00
|
|
|
ulint size = UT_LIST_GET_LEN(table_LRU) + UT_LIST_GET_LEN(table_non_LRU);
|
|
|
|
size *= sizeof(dict_table_t)
|
|
|
|
+ sizeof(dict_index_t) * 2
|
|
|
|
+ (sizeof(dict_col_t) + sizeof(dict_field_t)) * 10
|
|
|
|
+ sizeof(dict_field_t) * 5 /* total number of key fields */
|
|
|
|
+ 200; /* arbitrary, covering names and overhead */
|
2021-05-20 13:58:25 +02:00
|
|
|
size += (table_hash.n_cells + table_id_hash.n_cells +
|
|
|
|
temp_id_hash.n_cells) * sizeof(hash_cell_t);
|
2019-07-03 16:31:20 +02:00
|
|
|
return size;
|
|
|
|
}
|
2021-05-20 08:26:23 +02:00
|
|
|
|
|
|
|
/** Evict unused, unlocked tables from table_LRU.
|
|
|
|
@param half whether to consider half the tables only (instead of all)
|
|
|
|
@return number of tables evicted */
|
|
|
|
ulint evict_table_LRU(bool half);
|
2021-05-20 13:58:25 +02:00
|
|
|
|
|
|
|
/** Look up a table in the dictionary cache.
|
|
|
|
@param name table name
|
|
|
|
@return table handle
|
|
|
|
@retval nullptr if not found */
|
|
|
|
dict_table_t *find_table(const span<const char> &name) const
|
|
|
|
{
|
MDEV-24258 Merge dict_sys.mutex into dict_sys.latch
In the parent commit, dict_sys.latch could theoretically have been
replaced with a mutex. But, we can do better and merge dict_sys.mutex
into dict_sys.latch. Generally, every occurrence of dict_sys.mutex_lock()
will be replaced with dict_sys.lock().
The PERFORMANCE_SCHEMA instrumentation for dict_sys_mutex
will be removed along with dict_sys.mutex. The dict_sys.latch
will remain instrumented as dict_operation_lock.
Some use of dict_sys.lock() will be replaced with dict_sys.freeze(),
which we will reintroduce for the new shared mode. Most notably,
concurrent table lookups are possible as long as the tables are present
in the dict_sys cache. In particular, this will allow more concurrency
among InnoDB purge workers.
Because dict_sys.mutex will no longer 'throttle' the threads that purge
InnoDB transaction history, a performance degradation may be observed
unless innodb_purge_threads=1.
The table cache eviction policy will become FIFO-like,
similar to what happened to fil_system.LRU
in commit 45ed9dd957eebc7fc84feb2509f4aa6baa908a95.
The name of the list dict_sys.table_LRU will become somewhat misleading;
that list contains tables that may be evicted, even though the
eviction policy no longer is least-recently-used but first-in-first-out.
(Note: Tables can never be evicted as long as locks exist on them or
the tables are in use by some thread.)
As demonstrated by the test perfschema.sxlock_func, there
will be less contention on dict_sys.latch, because some previous
use of exclusive latches will be replaced with shared latches.
fts_parse_sql_no_dict_lock(): Replaced with pars_sql().
fts_get_table_name_prefix(): Merged to fts_optimize_create().
dict_stats_update_transient_for_index(): Deduplicated some code.
ha_innobase::info_low(), dict_stats_stop_bg(): Use a combination
of dict_sys.latch and table->stats_mutex_lock() to cover the
changes of BG_STAT_SHOULD_QUIT, because the flag is being read
in dict_stats_update_persistent() while not holding dict_sys.latch.
row_discard_tablespace_for_mysql(): Protect stats_bg_flag by
exclusive dict_sys.latch, like most other code does.
row_quiesce_table_has_fts_index(): Remove unnecessary mutex
acquisition. FLUSH TABLES...FOR EXPORT is protected by MDL.
row_import::set_root_by_heuristic(): Remove unnecessary mutex
acquisition. ALTER TABLE...IMPORT TABLESPACE is protected by MDL.
row_ins_sec_index_entry_low(): Replace a call
to dict_set_corrupted_index_cache_only(). Reads of index->type
were not really protected by dict_sys.mutex, and writes
(flagging an index corrupted) should be extremely rare.
dict_stats_process_entry_from_defrag_pool(): Only freeze the dictionary,
do not lock it exclusively.
dict_stats_wait_bg_to_stop_using_table(), DICT_BG_YIELD: Remove trx.
We can simply invoke dict_sys.unlock() and dict_sys.lock() directly.
dict_acquire_mdl_shared()<trylock=false>: Assert that dict_sys.latch is
only held in shared more, not exclusive mode. Only acquire it in
exclusive mode if the table needs to be loaded to the cache.
dict_sys_t::acquire(): Remove. Relocating elements in dict_sys.table_LRU
would require holding an exclusive latch, which we want to avoid
for performance reasons.
dict_sys_t::allow_eviction(): Add the table first to dict_sys.table_LRU,
to compensate for the removal of dict_sys_t::acquire(). This function
is only invoked by INFORMATION_SCHEMA.INNODB_SYS_TABLESTATS.
dict_table_open_on_id(), dict_table_open_on_name(): If dict_locked=false,
try to acquire dict_sys.latch in shared mode. Only acquire the latch in
exclusive mode if the table is not found in the cache.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:51:35 +02:00
|
|
|
ut_ad(frozen());
|
2021-05-20 13:58:25 +02:00
|
|
|
for (dict_table_t *table= static_cast<dict_table_t*>
|
|
|
|
(HASH_GET_FIRST(&table_hash, table_hash.calc_hash
|
2021-06-14 08:15:20 +02:00
|
|
|
(my_crc32c(0, name.data(), name.size()))));
|
2021-05-20 13:58:25 +02:00
|
|
|
table; table= table->name_hash)
|
|
|
|
if (strlen(table->name.m_name) == name.size() &&
|
|
|
|
!memcmp(table->name.m_name, name.data(), name.size()))
|
|
|
|
return table;
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Look up or load a table definition
|
|
|
|
@param name table name
|
|
|
|
@param ignore errors to ignore when loading the table definition
|
|
|
|
@return table handle
|
|
|
|
@retval nullptr if not found */
|
|
|
|
dict_table_t *load_table(const span<const char> &name,
|
|
|
|
dict_err_ignore_t ignore= DICT_ERR_IGNORE_NONE);
|
|
|
|
|
|
|
|
/** Attempt to load the system tables on startup
|
|
|
|
@return whether any discrepancy with the expected definition was found */
|
|
|
|
bool load_sys_tables();
|
|
|
|
/** Create or check system tables on startup */
|
|
|
|
dberr_t create_or_check_sys_tables();
|
2014-02-26 19:11:54 +01:00
|
|
|
};
|
|
|
|
|
2019-05-17 13:32:53 +02:00
|
|
|
/** the data dictionary cache */
|
|
|
|
extern dict_sys_t dict_sys;
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Converts a database and table name from filesystem encoding
|
|
|
|
(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
|
|
|
|
strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
|
|
|
|
at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
|
|
|
|
void
|
|
|
|
dict_fs2utf8(
|
|
|
|
/*=========*/
|
|
|
|
const char* db_and_table, /*!< in: database and table names,
|
|
|
|
e.g. d@i1b/a@q1b@1Kc */
|
|
|
|
char* db_utf8, /*!< out: database name, e.g. dцb */
|
|
|
|
size_t db_utf8_size, /*!< in: dbname_utf8 size */
|
|
|
|
char* table_utf8, /*!< out: table name, e.g. aюbØc */
|
|
|
|
size_t table_utf8_size)/*!< in: table_utf8 size */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-13542: Crashing on corrupted page is unhelpful
The approach to handling corruption that was chosen by Oracle in
commit 177d8b0c125b841c0650d27d735e3b87509dc286
is not really useful. Not only did it actually fail to prevent InnoDB
from crashing, but it is making things worse by blocking attempts to
rescue data from or rebuild a partially readable table.
We will try to prevent crashes in a different way: by propagating
errors up the call stack. We will never mark the clustered index
persistently corrupted, so that data recovery may be attempted by
reading from the table, or by rebuilding the table.
This should also fix MDEV-13680 (crash on btr_page_alloc() failure);
it was extensively tested with innodb_file_per_table=0 and a
non-autoextend system tablespace.
We should now avoid crashes in many cases, such as when a page
cannot be read or allocated, or an inconsistency is detected when
attempting to update multiple pages. We will not crash on double-free,
such as on the recovery of DDL in system tablespace in case something
was corrupted.
Crashes on corrupted data are still possible. The fault injection mechanism
that is introduced in the subsequent commit may help catch more of them.
buf_page_import_corrupt_failure: Remove the fault injection, and instead
corrupt some pages using Perl code in the tests.
btr_cur_pessimistic_insert(): Always reserve extents (except for the
change buffer), in order to prevent a subsequent allocation failure.
btr_pcur_open_at_rnd_pos(): Merged to the only caller ibuf_merge_pages().
btr_assert_not_corrupted(), btr_corruption_report(): Remove.
Similar checks are already part of btr_block_get().
FSEG_MAGIC_N_BYTES: Replaces FSEG_MAGIC_N_VALUE.
dict_hdr_get(), trx_rsegf_get_new(), trx_undo_page_get(),
trx_undo_page_get_s_latched(): Replaced with error-checking calls.
trx_rseg_t::get(mtr_t*): Replaces trx_rsegf_get().
trx_rseg_header_create(): Let the caller update the TRX_SYS page if needed.
trx_sys_create_sys_pages(): Merged with trx_sysf_create().
dict_check_tablespaces_and_store_max_id(): Do not access
DICT_HDR_MAX_SPACE_ID, because it was already recovered in dict_boot().
Merge dict_check_sys_tables() with this function.
dir_pathname(): Replaces os_file_make_new_pathname().
row_undo_ins_remove_sec(): Do not modify the undo page by adding
a terminating NUL byte to the record.
btr_decryption_failed(): Report decryption failures
dict_set_corrupted_by_space(), dict_set_encrypted_by_space(),
dict_set_corrupted_index_cache_only(): Remove.
dict_set_corrupted(): Remove the constant parameter dict_locked=false.
Never flag the clustered index corrupted in SYS_INDEXES, because
that would deny further access to the table. It might be possible to
repair the table by executing ALTER TABLE or OPTIMIZE TABLE, in case
no B-tree leaf page is corrupted.
dict_table_skip_corrupt_index(), dict_table_next_uncorrupted_index(),
row_purge_skip_uncommitted_virtual_index(): Remove, and refactor
the callers to read dict_index_t::type only once.
dict_table_is_corrupted(): Remove.
dict_index_t::is_btree(): Determine if the index is a valid B-tree.
BUF_GET_NO_LATCH, BUF_EVICT_IF_IN_POOL: Remove.
UNIV_BTR_DEBUG: Remove. Any inconsistency will no longer trigger
assertion failures, but error codes being returned.
buf_corrupt_page_release(): Replaced with a direct call to
buf_pool.corrupted_evict().
fil_invalid_page_access_msg(): Never crash on an invalid read;
let the caller of buf_page_get_gen() decide.
btr_pcur_t::restore_position(): Propagate failure status to the caller
by returning CORRUPTED.
opt_search_plan_for_table(): Simplify the code.
row_purge_del_mark(), row_purge_upd_exist_or_extern_func(),
row_undo_ins_remove_sec_rec(), row_undo_mod_upd_del_sec(),
row_undo_mod_del_mark_sec(): Avoid mem_heap_create()/mem_heap_free()
when no secondary indexes exist.
row_undo_mod_upd_exist_sec(): Simplify the code.
row_upd_clust_step(), dict_load_table_one(): Return DB_TABLE_CORRUPT
if the clustered index (and therefore the table) is corrupted, similar
to what we do in row_insert_for_mysql().
fut_get_ptr(): Replace with buf_page_get_gen() calls.
buf_page_get_gen(): Return nullptr and *err=DB_CORRUPTION
if the page is marked as freed. For other modes than
BUF_GET_POSSIBLY_FREED or BUF_PEEK_IF_IN_POOL this will
trigger a debug assertion failure. For BUF_GET_POSSIBLY_FREED,
we will return nullptr for freed pages, so that the callers
can be simplified. The purge of transaction history will be
a new user of BUF_GET_POSSIBLY_FREED, to avoid crashes on
corrupted data.
buf_page_get_low(): Never crash on a corrupted page, but simply
return nullptr.
fseg_page_is_allocated(): Replaces fseg_page_is_free().
fts_drop_common_tables(): Return an error if the transaction
was rolled back.
fil_space_t::set_corrupted(): Report a tablespace as corrupted if
it was not reported already.
fil_space_t::io(): Invoke fil_space_t::set_corrupted() to report
out-of-bounds page access or other errors.
Clean up mtr_t::page_lock()
buf_page_get_low(): Validate the page identifier (to check for
recently read corrupted pages) after acquiring the page latch.
buf_page_t::read_complete(): Flag uninitialized (all-zero) pages
with DB_FAIL. Return DB_PAGE_CORRUPTED on page number mismatch.
mtr_t::defer_drop_ahi(): Renamed from mtr_defer_drop_ahi().
recv_sys_t::free_corrupted_page(): Only set_corrupt_fs()
if any log records exist for the page. We do not mind if read-ahead
produces corrupted (or all-zero) pages that were not actually needed
during recovery.
recv_recover_page(): Return whether the operation succeeded.
recv_sys_t::recover_low(): Simplify the logic. Check for recovery error.
Thanks to Matthias Leich for testing this extensively and to the
authors of https://rr-project.org for making it easy to diagnose
and fix any failures that were found during the testing.
2022-06-06 13:03:22 +02:00
|
|
|
/** Flag an index corrupted both in the data dictionary cache
|
MDEV-25919 preparation: Various cleanup
que_eval_sql(): Remove the parameter lock_dict. The only caller
with lock_dict=true was dict_stats_exec_sql(), which will now
explicitly invoke dict_sys.lock() and dict_sys.unlock() by itself.
row_import_cleanup(): Do not unnecessarily lock the dictionary.
Concurrent access to the table during ALTER TABLE...IMPORT TABLESPACE
is prevented by MDL and the fact that there cannot exist any
undo log or change buffer records that would refer to the table
or tablespace.
row_import_for_mysql(): Do not unnecessarily lock the dictionary
while accessing fil_system. Thanks to MDL_EXCLUSIVE that was acquired
by the SQL layer, only one IMPORT may be in effect for the table name.
row_quiesce_set_state(): Do not unnecessarily lock the dictionary.
The dict_table_t::quiesce state is documented to be protected by
all index latches, which we are acquiring.
dict_table_close(): Introduce a simpler variant with fewer parameters.
dict_table_close(): Reduce the amount of calls.
We can simply invoke dict_table_t::release() on startup or
in DDL operations, or when the table is inaccessible.
In none of these cases, there is no need to invalidate the
InnoDB persistent statistics.
pars_info_t::graph_owns_us: Remove (unused).
pars_info_free(): Define inline.
fts_delete(), trx_t::evict_table(), row_prebuilt_free(),
row_rename_table_for_mysql(): Simplify.
row_mysql_lock_data_dictionary(): Remove some references;
use dict_sys.lock() and dict_sys.unlock() instead.
row_mysql_lock_table(): Remove. Use lock_table_for_trx() instead.
ha_innobase::check_if_supported_inplace_alter(),
row_create_table_for_mysql(): Simply assert dict_sys.sys_tables_exist().
In commit 49e2c8f0a6fefdeac50925f758090d6bd099768d and
commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 srv_start()
actually guarantees that the system tables will exist,
or the server is in read-only mode, or startup will fail.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:20 +02:00
|
|
|
and in the system table SYS_INDEXES.
|
|
|
|
@param index index to be flagged as corrupted
|
MDEV-13542: Crashing on corrupted page is unhelpful
The approach to handling corruption that was chosen by Oracle in
commit 177d8b0c125b841c0650d27d735e3b87509dc286
is not really useful. Not only did it actually fail to prevent InnoDB
from crashing, but it is making things worse by blocking attempts to
rescue data from or rebuild a partially readable table.
We will try to prevent crashes in a different way: by propagating
errors up the call stack. We will never mark the clustered index
persistently corrupted, so that data recovery may be attempted by
reading from the table, or by rebuilding the table.
This should also fix MDEV-13680 (crash on btr_page_alloc() failure);
it was extensively tested with innodb_file_per_table=0 and a
non-autoextend system tablespace.
We should now avoid crashes in many cases, such as when a page
cannot be read or allocated, or an inconsistency is detected when
attempting to update multiple pages. We will not crash on double-free,
such as on the recovery of DDL in system tablespace in case something
was corrupted.
Crashes on corrupted data are still possible. The fault injection mechanism
that is introduced in the subsequent commit may help catch more of them.
buf_page_import_corrupt_failure: Remove the fault injection, and instead
corrupt some pages using Perl code in the tests.
btr_cur_pessimistic_insert(): Always reserve extents (except for the
change buffer), in order to prevent a subsequent allocation failure.
btr_pcur_open_at_rnd_pos(): Merged to the only caller ibuf_merge_pages().
btr_assert_not_corrupted(), btr_corruption_report(): Remove.
Similar checks are already part of btr_block_get().
FSEG_MAGIC_N_BYTES: Replaces FSEG_MAGIC_N_VALUE.
dict_hdr_get(), trx_rsegf_get_new(), trx_undo_page_get(),
trx_undo_page_get_s_latched(): Replaced with error-checking calls.
trx_rseg_t::get(mtr_t*): Replaces trx_rsegf_get().
trx_rseg_header_create(): Let the caller update the TRX_SYS page if needed.
trx_sys_create_sys_pages(): Merged with trx_sysf_create().
dict_check_tablespaces_and_store_max_id(): Do not access
DICT_HDR_MAX_SPACE_ID, because it was already recovered in dict_boot().
Merge dict_check_sys_tables() with this function.
dir_pathname(): Replaces os_file_make_new_pathname().
row_undo_ins_remove_sec(): Do not modify the undo page by adding
a terminating NUL byte to the record.
btr_decryption_failed(): Report decryption failures
dict_set_corrupted_by_space(), dict_set_encrypted_by_space(),
dict_set_corrupted_index_cache_only(): Remove.
dict_set_corrupted(): Remove the constant parameter dict_locked=false.
Never flag the clustered index corrupted in SYS_INDEXES, because
that would deny further access to the table. It might be possible to
repair the table by executing ALTER TABLE or OPTIMIZE TABLE, in case
no B-tree leaf page is corrupted.
dict_table_skip_corrupt_index(), dict_table_next_uncorrupted_index(),
row_purge_skip_uncommitted_virtual_index(): Remove, and refactor
the callers to read dict_index_t::type only once.
dict_table_is_corrupted(): Remove.
dict_index_t::is_btree(): Determine if the index is a valid B-tree.
BUF_GET_NO_LATCH, BUF_EVICT_IF_IN_POOL: Remove.
UNIV_BTR_DEBUG: Remove. Any inconsistency will no longer trigger
assertion failures, but error codes being returned.
buf_corrupt_page_release(): Replaced with a direct call to
buf_pool.corrupted_evict().
fil_invalid_page_access_msg(): Never crash on an invalid read;
let the caller of buf_page_get_gen() decide.
btr_pcur_t::restore_position(): Propagate failure status to the caller
by returning CORRUPTED.
opt_search_plan_for_table(): Simplify the code.
row_purge_del_mark(), row_purge_upd_exist_or_extern_func(),
row_undo_ins_remove_sec_rec(), row_undo_mod_upd_del_sec(),
row_undo_mod_del_mark_sec(): Avoid mem_heap_create()/mem_heap_free()
when no secondary indexes exist.
row_undo_mod_upd_exist_sec(): Simplify the code.
row_upd_clust_step(), dict_load_table_one(): Return DB_TABLE_CORRUPT
if the clustered index (and therefore the table) is corrupted, similar
to what we do in row_insert_for_mysql().
fut_get_ptr(): Replace with buf_page_get_gen() calls.
buf_page_get_gen(): Return nullptr and *err=DB_CORRUPTION
if the page is marked as freed. For other modes than
BUF_GET_POSSIBLY_FREED or BUF_PEEK_IF_IN_POOL this will
trigger a debug assertion failure. For BUF_GET_POSSIBLY_FREED,
we will return nullptr for freed pages, so that the callers
can be simplified. The purge of transaction history will be
a new user of BUF_GET_POSSIBLY_FREED, to avoid crashes on
corrupted data.
buf_page_get_low(): Never crash on a corrupted page, but simply
return nullptr.
fseg_page_is_allocated(): Replaces fseg_page_is_free().
fts_drop_common_tables(): Return an error if the transaction
was rolled back.
fil_space_t::set_corrupted(): Report a tablespace as corrupted if
it was not reported already.
fil_space_t::io(): Invoke fil_space_t::set_corrupted() to report
out-of-bounds page access or other errors.
Clean up mtr_t::page_lock()
buf_page_get_low(): Validate the page identifier (to check for
recently read corrupted pages) after acquiring the page latch.
buf_page_t::read_complete(): Flag uninitialized (all-zero) pages
with DB_FAIL. Return DB_PAGE_CORRUPTED on page number mismatch.
mtr_t::defer_drop_ahi(): Renamed from mtr_defer_drop_ahi().
recv_sys_t::free_corrupted_page(): Only set_corrupt_fs()
if any log records exist for the page. We do not mind if read-ahead
produces corrupted (or all-zero) pages that were not actually needed
during recovery.
recv_recover_page(): Return whether the operation succeeded.
recv_sys_t::recover_low(): Simplify the logic. Check for recovery error.
Thanks to Matthias Leich for testing this extensively and to the
authors of https://rr-project.org for making it easy to diagnose
and fix any failures that were found during the testing.
2022-06-06 13:03:22 +02:00
|
|
|
@param ctx context (for error log reporting) */
|
|
|
|
void dict_set_corrupted(dict_index_t *index, const char *ctx)
|
MDEV-25919 preparation: Various cleanup
que_eval_sql(): Remove the parameter lock_dict. The only caller
with lock_dict=true was dict_stats_exec_sql(), which will now
explicitly invoke dict_sys.lock() and dict_sys.unlock() by itself.
row_import_cleanup(): Do not unnecessarily lock the dictionary.
Concurrent access to the table during ALTER TABLE...IMPORT TABLESPACE
is prevented by MDL and the fact that there cannot exist any
undo log or change buffer records that would refer to the table
or tablespace.
row_import_for_mysql(): Do not unnecessarily lock the dictionary
while accessing fil_system. Thanks to MDL_EXCLUSIVE that was acquired
by the SQL layer, only one IMPORT may be in effect for the table name.
row_quiesce_set_state(): Do not unnecessarily lock the dictionary.
The dict_table_t::quiesce state is documented to be protected by
all index latches, which we are acquiring.
dict_table_close(): Introduce a simpler variant with fewer parameters.
dict_table_close(): Reduce the amount of calls.
We can simply invoke dict_table_t::release() on startup or
in DDL operations, or when the table is inaccessible.
In none of these cases, there is no need to invalidate the
InnoDB persistent statistics.
pars_info_t::graph_owns_us: Remove (unused).
pars_info_free(): Define inline.
fts_delete(), trx_t::evict_table(), row_prebuilt_free(),
row_rename_table_for_mysql(): Simplify.
row_mysql_lock_data_dictionary(): Remove some references;
use dict_sys.lock() and dict_sys.unlock() instead.
row_mysql_lock_table(): Remove. Use lock_table_for_trx() instead.
ha_innobase::check_if_supported_inplace_alter(),
row_create_table_for_mysql(): Simply assert dict_sys.sys_tables_exist().
In commit 49e2c8f0a6fefdeac50925f758090d6bd099768d and
commit 1bd681c8b3c5213ce1f7976940a7dc38b48a0d39 srv_start()
actually guarantees that the system tables will exist,
or the server is in read-only mode, or startup will fail.
Reviewed by: Thirunarayanan Balathandayuthapani
2021-08-31 12:54:20 +02:00
|
|
|
ATTRIBUTE_COLD __attribute__((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
/** Sets merge_threshold in the SYS_INDEXES
|
|
|
|
@param[in,out] index index
|
|
|
|
@param[in] merge_threshold value to set */
|
|
|
|
void
|
|
|
|
dict_index_set_merge_threshold(
|
|
|
|
dict_index_t* index,
|
|
|
|
ulint merge_threshold);
|
|
|
|
|
|
|
|
#ifdef UNIV_DEBUG
|
|
|
|
/** Sets merge_threshold for all indexes in dictionary cache for debug.
|
|
|
|
@param[in] merge_threshold_all value to set for all indexes */
|
|
|
|
void
|
|
|
|
dict_set_merge_threshold_all_debug(
|
|
|
|
uint merge_threshold_all);
|
|
|
|
#endif /* UNIV_DEBUG */
|
|
|
|
|
|
|
|
/** Validate the table flags.
|
|
|
|
@param[in] flags Table flags
|
|
|
|
@return true if valid. */
|
2014-02-26 19:11:54 +01:00
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_tf_is_valid(
|
2016-08-12 10:17:45 +02:00
|
|
|
ulint flags);
|
|
|
|
|
|
|
|
/** Validate both table flags and table flags2 and make sure they
|
|
|
|
are compatible.
|
|
|
|
@param[in] flags Table flags
|
|
|
|
@param[in] flags2 Table flags2
|
|
|
|
@return true if valid. */
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_tf2_is_valid(
|
|
|
|
ulint flags,
|
|
|
|
ulint flags2);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/*********************************************************************//**
|
|
|
|
This function should be called whenever a page is successfully
|
|
|
|
compressed. Updates the compression padding information. */
|
|
|
|
void
|
|
|
|
dict_index_zip_success(
|
|
|
|
/*===================*/
|
|
|
|
dict_index_t* index) /*!< in/out: index to be updated. */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
This function should be called whenever a page compression attempt
|
|
|
|
fails. Updates the compression padding information. */
|
|
|
|
void
|
|
|
|
dict_index_zip_failure(
|
|
|
|
/*===================*/
|
|
|
|
dict_index_t* index) /*!< in/out: index to be updated. */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*********************************************************************//**
|
|
|
|
Return the optimal page size, for which page will likely compress.
|
|
|
|
@return page size beyond which page may not compress*/
|
|
|
|
ulint
|
|
|
|
dict_index_zip_pad_optimal_page_size(
|
|
|
|
/*=================================*/
|
|
|
|
dict_index_t* index) /*!< in: index for which page size
|
|
|
|
is requested */
|
2016-06-21 14:21:03 +02:00
|
|
|
MY_ATTRIBUTE((nonnull, warn_unused_result));
|
2014-02-26 19:11:54 +01:00
|
|
|
/*************************************************************//**
|
|
|
|
Convert table flag to row format string.
|
|
|
|
@return row format name */
|
|
|
|
const char*
|
|
|
|
dict_tf_to_row_format_string(
|
|
|
|
/*=========================*/
|
|
|
|
ulint table_flag); /*!< in: row format setting */
|
2016-08-12 10:17:45 +02:00
|
|
|
|
|
|
|
/** encode number of columns and number of virtual columns in one
|
|
|
|
4 bytes value. We could do this because the number of columns in
|
|
|
|
InnoDB is limited to 1017
|
|
|
|
@param[in] n_col number of non-virtual column
|
|
|
|
@param[in] n_v_col number of virtual column
|
|
|
|
@return encoded value */
|
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
|
|
|
dict_table_encode_n_col(
|
|
|
|
ulint n_col,
|
|
|
|
ulint n_v_col);
|
|
|
|
|
|
|
|
/** Decode number of virtual and non-virtual columns in one 4 bytes value.
|
|
|
|
@param[in] encoded encoded value
|
|
|
|
@param[in,out] n_col number of non-virtual column
|
|
|
|
@param[in,out] n_v_col number of virtual column */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_table_decode_n_col(
|
|
|
|
ulint encoded,
|
|
|
|
ulint* n_col,
|
|
|
|
ulint* n_v_col);
|
|
|
|
|
2016-09-06 08:43:16 +02:00
|
|
|
/** Free the virtual column template
|
|
|
|
@param[in,out] vc_templ virtual column template */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
dict_free_vc_templ(
|
|
|
|
dict_vcol_templ_t* vc_templ);
|
|
|
|
|
|
|
|
/** Check whether the table have virtual index.
|
|
|
|
@param[in] table InnoDB table
|
|
|
|
@return true if the table have virtual index, false otherwise. */
|
|
|
|
UNIV_INLINE
|
|
|
|
bool
|
|
|
|
dict_table_have_virtual_index(
|
|
|
|
dict_table_t* table);
|
|
|
|
|
2022-01-13 17:27:28 +01:00
|
|
|
#include "dict0dict.inl"
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
#endif
|