2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-04-26 23:05:26 +02:00
|
|
|
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
|
2019-05-13 18:26:59 +03:00
|
|
|
Copyright (c) 2017, 2019, MariaDB Corporation.
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
2019-05-11 19:25:02 +03:00
|
|
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file dict/dict0crea.cc
|
|
|
|
Database object creation
|
|
|
|
|
|
|
|
Created 1/8/1996 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#include "dict0crea.h"
|
|
|
|
#include "btr0pcur.h"
|
|
|
|
#include "btr0btr.h"
|
|
|
|
#include "page0page.h"
|
|
|
|
#include "mach0data.h"
|
|
|
|
#include "dict0boot.h"
|
|
|
|
#include "dict0dict.h"
|
|
|
|
#include "que0que.h"
|
|
|
|
#include "row0ins.h"
|
|
|
|
#include "row0mysql.h"
|
|
|
|
#include "pars0pars.h"
|
|
|
|
#include "trx0roll.h"
|
2018-09-10 15:40:11 +03:00
|
|
|
#include "trx0rseg.h"
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
|
|
|
#include "trx0undo.h"
|
2014-02-26 19:11:54 +01:00
|
|
|
#include "ut0vec.h"
|
|
|
|
#include "dict0priv.h"
|
|
|
|
#include "fts0priv.h"
|
2016-08-12 11:17:45 +03:00
|
|
|
#include "srv0start.h"
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/*****************************************************************//**
|
|
|
|
Based on a table object, this function builds the entry to be inserted
|
|
|
|
in the SYS_TABLES system table.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return the tuple which should be inserted */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_sys_tables_tuple(
|
|
|
|
/*=========================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from
|
|
|
|
which the memory for the built
|
|
|
|
tuple is allocated */
|
|
|
|
{
|
|
|
|
dict_table_t* sys_tables;
|
|
|
|
dtuple_t* entry;
|
|
|
|
dfield_t* dfield;
|
|
|
|
byte* ptr;
|
|
|
|
ulint type;
|
|
|
|
|
|
|
|
ut_ad(table);
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
ut_ad(!table->space || table->space->id == table->space_id);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(heap);
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
ut_ad(table->n_cols >= DATA_N_SYS_COLS);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
sys_tables = dict_sys->sys_tables;
|
|
|
|
|
|
|
|
entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
|
|
|
|
|
|
|
|
dict_table_copy_types(entry, sys_tables);
|
|
|
|
|
|
|
|
/* 0: NAME -----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__NAME);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
dfield_set_data(dfield,
|
|
|
|
table->name.m_name, strlen(table->name.m_name));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* 1: DB_TRX_ID added later */
|
|
|
|
/* 2: DB_ROLL_PTR added later */
|
|
|
|
/* 3: ID -------------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
|
|
|
mach_write_to_8(ptr, table->id);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 4: N_COLS ---------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__N_COLS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/* If there is any virtual column, encode it in N_COLS */
|
|
|
|
mach_write_to_4(ptr, dict_table_encode_n_col(
|
2018-04-28 15:49:09 +03:00
|
|
|
ulint(table->n_cols - DATA_N_SYS_COLS),
|
|
|
|
ulint(table->n_v_def))
|
|
|
|
| (ulint(table->flags & DICT_TF_COMPACT) << 31));
|
2014-02-26 19:11:54 +01:00
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 5: TYPE (table flags) -----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__TYPE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
|
|
|
|
/* Validate the table flags and convert them to what is saved in
|
|
|
|
SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to
|
|
|
|
SYS_TABLES.TYPE as 1. */
|
|
|
|
type = dict_tf_to_sys_tables_type(table->flags);
|
|
|
|
mach_write_to_4(ptr, type);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 6: MIX_ID (obsolete) ---------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__MIX_ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8));
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 7: MIX_LEN (additional flags) --------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__MIX_LEN);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
/* Be sure all non-used bits are zero. */
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_a(!(table->flags2 & DICT_TF2_UNUSED_BIT_MASK));
|
2014-02-26 19:11:54 +01:00
|
|
|
mach_write_to_4(ptr, table->flags2);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 8: CLUSTER_NAME ---------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__CLUSTER_ID);
|
|
|
|
dfield_set_null(dfield); /* not supported */
|
|
|
|
|
|
|
|
/* 9: SPACE ----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_TABLES__SPACE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
mach_write_to_4(ptr, table->space_id);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
/*----------------------------------*/
|
|
|
|
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*****************************************************************//**
|
|
|
|
Based on a table object, this function builds the entry to be inserted
|
|
|
|
in the SYS_COLUMNS system table.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return the tuple which should be inserted */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_sys_columns_tuple(
|
|
|
|
/*==========================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
ulint i, /*!< in: column number */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from
|
|
|
|
which the memory for the built
|
|
|
|
tuple is allocated */
|
|
|
|
{
|
|
|
|
dict_table_t* sys_columns;
|
|
|
|
dtuple_t* entry;
|
|
|
|
const dict_col_t* column;
|
|
|
|
dfield_t* dfield;
|
|
|
|
byte* ptr;
|
|
|
|
const char* col_name;
|
2016-08-12 11:17:45 +03:00
|
|
|
ulint num_base = 0;
|
|
|
|
ulint v_col_no = ULINT_UNDEFINED;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(table);
|
|
|
|
ut_ad(heap);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* Any column beyond table->n_def would be virtual columns */
|
|
|
|
if (i >= table->n_def) {
|
|
|
|
dict_v_col_t* v_col = dict_table_get_nth_v_col(
|
|
|
|
table, i - table->n_def);
|
|
|
|
column = &v_col->m_col;
|
|
|
|
num_base = v_col->num_base;
|
|
|
|
v_col_no = column->ind;
|
|
|
|
} else {
|
|
|
|
column = dict_table_get_nth_col(table, i);
|
2018-05-12 09:42:53 +03:00
|
|
|
ut_ad(!column->is_virtual());
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
sys_columns = dict_sys->sys_columns;
|
|
|
|
|
|
|
|
entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
|
|
|
|
|
|
|
|
dict_table_copy_types(entry, sys_columns);
|
|
|
|
|
|
|
|
/* 0: TABLE_ID -----------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
|
|
|
mach_write_to_8(ptr, table->id);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 1: POS ----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
if (v_col_no != ULINT_UNDEFINED) {
|
|
|
|
/* encode virtual column's position in MySQL table and InnoDB
|
|
|
|
table in "POS" */
|
|
|
|
mach_write_to_4(ptr, dict_create_v_col_pos(
|
|
|
|
i - table->n_def, v_col_no));
|
|
|
|
} else {
|
|
|
|
mach_write_to_4(ptr, i);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 2: DB_TRX_ID added later */
|
|
|
|
/* 3: DB_ROLL_PTR added later */
|
|
|
|
/* 4: NAME ---------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
if (i >= table->n_def) {
|
|
|
|
col_name = dict_table_get_v_col_name(table, i - table->n_def);
|
|
|
|
} else {
|
|
|
|
col_name = dict_table_get_col_name(table, i);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
dfield_set_data(dfield, col_name, ut_strlen(col_name));
|
|
|
|
|
|
|
|
/* 5: MTYPE --------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, column->mtype);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 6: PRTYPE -------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, column->prtype);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 7: LEN ----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, column->len);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 8: PREC ---------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
2016-08-12 11:17:45 +03:00
|
|
|
mach_write_to_4(ptr, num_base);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
/*---------------------------------*/
|
|
|
|
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Based on a table object, this function builds the entry to be inserted
|
|
|
|
in the SYS_VIRTUAL system table. Each row maps a virtual column to one of
|
|
|
|
its base column.
|
|
|
|
@param[in] table table
|
|
|
|
@param[in] v_col_n virtual column number
|
|
|
|
@param[in] b_col_n base column sequence num
|
|
|
|
@param[in] heap memory heap
|
|
|
|
@return the tuple which should be inserted */
|
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_sys_virtual_tuple(
|
|
|
|
const dict_table_t* table,
|
|
|
|
ulint v_col_n,
|
|
|
|
ulint b_col_n,
|
|
|
|
mem_heap_t* heap)
|
|
|
|
{
|
|
|
|
dict_table_t* sys_virtual;
|
|
|
|
dtuple_t* entry;
|
|
|
|
const dict_col_t* base_column;
|
|
|
|
dfield_t* dfield;
|
|
|
|
byte* ptr;
|
|
|
|
|
|
|
|
ut_ad(table);
|
|
|
|
ut_ad(heap);
|
|
|
|
|
|
|
|
ut_ad(v_col_n < table->n_v_def);
|
|
|
|
dict_v_col_t* v_col = dict_table_get_nth_v_col(table, v_col_n);
|
|
|
|
base_column = v_col->base_col[b_col_n];
|
|
|
|
|
|
|
|
sys_virtual = dict_sys->sys_virtual;
|
|
|
|
|
|
|
|
entry = dtuple_create(heap, DICT_NUM_COLS__SYS_VIRTUAL
|
|
|
|
+ DATA_N_SYS_COLS);
|
|
|
|
|
|
|
|
dict_table_copy_types(entry, sys_virtual);
|
|
|
|
|
|
|
|
/* 0: TABLE_ID -----------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__TABLE_ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
|
|
|
mach_write_to_8(ptr, table->id);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 1: POS ---------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__POS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
ulint v_col_no = dict_create_v_col_pos(v_col_n, v_col->m_col.ind);
|
|
|
|
mach_write_to_4(ptr, v_col_no);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 2: BASE_POS ----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__BASE_POS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, base_column->ind);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 3: DB_TRX_ID added later */
|
|
|
|
/* 4: DB_ROLL_PTR added later */
|
|
|
|
|
|
|
|
/*---------------------------------*/
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/***************************************************************//**
|
|
|
|
Builds a table definition to insert.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or error code */
|
2016-06-21 14:21:03 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_build_table_def_step(
|
|
|
|
/*======================*/
|
|
|
|
que_thr_t* thr, /*!< in: query thread */
|
|
|
|
tab_node_t* node) /*!< in: table create node */
|
|
|
|
{
|
2016-12-05 21:04:30 +02:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2018-03-28 11:11:59 +03:00
|
|
|
dict_table_t* table = node->table;
|
|
|
|
ut_ad(!table->is_temporary());
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
ut_ad(!table->space);
|
|
|
|
ut_ad(table->space_id == ULINT_UNDEFINED);
|
2018-03-28 11:11:59 +03:00
|
|
|
dict_table_assign_new_id(table, thr_get_trx(thr));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* Always set this bit for all new created tables */
|
2014-05-05 18:20:28 +02:00
|
|
|
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
|
|
|
|
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
|
|
|
|
DICT_TF2_FLAG_UNSET(table,
|
|
|
|
DICT_TF2_FTS_AUX_HEX_NAME););
|
|
|
|
|
2018-03-28 11:11:59 +03:00
|
|
|
if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
|
2016-08-12 11:17:45 +03:00
|
|
|
/* This table will need a new tablespace. */
|
|
|
|
|
|
|
|
ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
|
2017-06-01 13:03:55 +03:00
|
|
|
|| dict_table_has_atomic_blobs(table));
|
2018-09-10 15:40:11 +03:00
|
|
|
trx_t* trx = thr_get_trx(thr);
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
|
|
|
ut_ad(trx->table_id);
|
|
|
|
mtr_t mtr;
|
2018-09-10 15:40:11 +03:00
|
|
|
trx_undo_t* undo = trx->rsegs.m_redo.undo;
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
|
|
|
if (undo && !undo->table_id
|
|
|
|
&& trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
|
|
|
|
/* This must be a TRUNCATE operation where
|
|
|
|
the empty table is created after the old table
|
|
|
|
was renamed. Be sure to mark the transaction
|
|
|
|
associated with the new empty table, so that
|
|
|
|
we can remove it on recovery. */
|
|
|
|
mtr.start();
|
2018-09-10 15:40:11 +03:00
|
|
|
undo->table_id = trx->table_id;
|
|
|
|
undo->dict_operation = TRUE;
|
|
|
|
page_t* page = trx_undo_page_get(
|
|
|
|
page_id_t(trx->rsegs.m_redo.rseg->space->id,
|
|
|
|
undo->hdr_page_no),
|
|
|
|
&mtr);
|
|
|
|
mlog_write_ulint(page + undo->hdr_offset
|
|
|
|
+ TRX_UNDO_DICT_TRANS,
|
|
|
|
TRUE, MLOG_1BYTE, &mtr);
|
|
|
|
mlog_write_ull(page + undo->hdr_offset
|
|
|
|
+ TRX_UNDO_TABLE_ID,
|
|
|
|
trx->table_id, &mtr);
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
|
|
|
mtr.commit();
|
|
|
|
log_write_up_to(mtr.commit_lsn(), true);
|
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
/* Get a new tablespace ID */
|
2018-03-28 11:11:59 +03:00
|
|
|
ulint space_id;
|
|
|
|
dict_hdr_get_new_id(NULL, NULL, &space_id, table, false);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
DBUG_EXECUTE_IF(
|
|
|
|
"ib_create_table_fail_out_of_space_ids",
|
2018-03-28 11:11:59 +03:00
|
|
|
space_id = ULINT_UNDEFINED;
|
2014-02-26 19:11:54 +01:00
|
|
|
);
|
|
|
|
|
2018-03-28 11:11:59 +03:00
|
|
|
if (space_id == ULINT_UNDEFINED) {
|
MDEV-17158 TRUNCATE is not atomic after MDEV-13564
It turned out that ha_innobase::truncate() would prematurely
commit the transaction already before the completion of the
ha_innobase::create(). All of this must be atomic.
innodb.truncate_crash: Use the correct DEBUG_SYNC point, and
tolerate non-truncation of the table, because the redo log
for the TRUNCATE transaction commit might be flushed due to
some InnoDB background activity.
dict_build_tablespace_for_table(): Merge to the function
dict_build_table_def_step().
dict_build_table_def_step(): If a table is being created during
an already started data dictionary transaction (such as TRUNCATE),
persistently write the table_id to the undo log header before
creating any file. In this way, the recovery of TRUNCATE will be
able to delete the new file before rolling back the rename of
the original table.
dict_table_rename_in_cache(): Add the parameter replace_new_file,
used as part of rolling back a TRUNCATE operation.
fil_rename_tablespace_check(): Add the parameter replace_new.
If the parameter is set and a file identified by new_path exists,
remove a possible tablespace and also the file.
create_table_info_t::create_table_def(): Remove some debug assertions
that no longer hold. During TRUNCATE, the transaction will already
have been started (and performed a rename operation) before the
table is created. Also, remove a call to dict_build_tablespace_for_table().
create_table_info_t::create_table(): Add the parameter create_fk=true.
During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the
InnoDB data dictionary, because they will also not be removed.
row_table_add_foreign_constraints(): If trx=NULL, do not modify
the InnoDB data dictionary, but only load the FOREIGN KEY constraints
from the data dictionary.
ha_innobase::create(): Lock the InnoDB data dictionary cache only
if no transaction was passed by the caller. Unlock it in any case.
innobase_rename_table(): Add the parameter commit = true.
If !commit, do not lock or unlock the data dictionary cache.
ha_innobase::truncate(): Lock the data dictionary before invoking
rename or create, and let ha_innobase::create() unlock it and
also commit or roll back the transaction.
trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation()
and declared global instead of static.
row_undo_ins_parse_undo_rec(): If table_id is set, this must
be rolling back the rename operation in TRUNCATE TABLE, and
therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
|
|
|
return DB_ERROR;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/* Determine the tablespace flags. */
|
|
|
|
bool has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
|
2017-01-17 11:37:49 +02:00
|
|
|
ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
|
2018-03-28 11:11:59 +03:00
|
|
|
ut_ad(!has_data_dir || table->data_dir_path);
|
|
|
|
char* filepath = has_data_dir
|
|
|
|
? fil_make_filepath(table->data_dir_path,
|
|
|
|
table->name.m_name, IBD, true)
|
|
|
|
: fil_make_filepath(NULL,
|
|
|
|
table->name.m_name, IBD, false);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We create a new single-table tablespace for the table.
|
|
|
|
We initially let it be 4 pages:
|
|
|
|
- page 0 is the fsp header and an extent descriptor page,
|
|
|
|
- page 1 is an ibuf bitmap page,
|
|
|
|
- page 2 is the first inode page,
|
2016-08-12 11:17:45 +03:00
|
|
|
- page 3 will contain the root of the clustered index of
|
|
|
|
the table we create here. */
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-22 14:17:43 +02:00
|
|
|
dberr_t err;
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
table->space = fil_ibd_create(
|
2018-03-28 11:11:59 +03:00
|
|
|
space_id, table->name.m_name, filepath, fsp_flags,
|
2015-08-14 11:09:06 +03:00
|
|
|
FIL_IBD_FILE_INITIAL_SIZE,
|
2018-03-22 14:17:43 +02:00
|
|
|
node->mode, node->key_id, &err);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_free(filepath);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
if (!table->space) {
|
2018-03-22 14:17:43 +02:00
|
|
|
ut_ad(err != DB_SUCCESS);
|
|
|
|
return err;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
table->space_id = space_id;
|
2018-03-28 11:11:59 +03:00
|
|
|
mtr.start();
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
mtr.set_named_space(table->space);
|
|
|
|
fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
|
2018-03-28 11:11:59 +03:00
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
} else {
|
2017-02-24 12:51:55 +02:00
|
|
|
ut_ad(dict_tf_get_rec_format(table->flags)
|
|
|
|
!= REC_FORMAT_COMPRESSED);
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
table->space = fil_system.sys_space;
|
|
|
|
table->space_id = TRX_SYS_SPACE;
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-28 11:11:59 +03:00
|
|
|
ins_node_set_new_row(node->tab_def,
|
|
|
|
dict_create_sys_tables_tuple(table, node->heap));
|
|
|
|
return DB_SUCCESS;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Builds a SYS_VIRTUAL row definition to insert.
|
|
|
|
@param[in] node table create node */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
dict_build_v_col_def_step(
|
|
|
|
tab_node_t* node)
|
|
|
|
{
|
|
|
|
dtuple_t* row;
|
|
|
|
|
|
|
|
row = dict_create_sys_virtual_tuple(node->table, node->col_no,
|
|
|
|
node->base_col_no,
|
|
|
|
node->heap);
|
|
|
|
ins_node_set_new_row(node->v_col_def, row);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*****************************************************************//**
|
|
|
|
Based on an index object, this function builds the entry to be inserted
|
|
|
|
in the SYS_INDEXES system table.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return the tuple which should be inserted */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_sys_indexes_tuple(
|
|
|
|
/*==========================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from
|
|
|
|
which the memory for the built
|
|
|
|
tuple is allocated */
|
|
|
|
{
|
|
|
|
dict_table_t* sys_indexes;
|
|
|
|
dtuple_t* entry;
|
|
|
|
dfield_t* dfield;
|
|
|
|
byte* ptr;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(index);
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
ut_ad(index->table->space || index->table->file_unreadable);
|
|
|
|
ut_ad(!index->table->space
|
|
|
|
|| index->table->space->id == index->table->space_id);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(heap);
|
|
|
|
|
|
|
|
sys_indexes = dict_sys->sys_indexes;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
entry = dtuple_create(
|
|
|
|
heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dict_table_copy_types(entry, sys_indexes);
|
|
|
|
|
|
|
|
/* 0: TABLE_ID -----------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__TABLE_ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
2018-03-23 17:25:56 +02:00
|
|
|
mach_write_to_8(ptr, index->table->id);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 1: ID ----------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
|
|
|
mach_write_to_8(ptr, index->id);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 2: DB_TRX_ID added later */
|
|
|
|
/* 3: DB_ROLL_PTR added later */
|
|
|
|
/* 4: NAME --------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__NAME);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
if (!index->is_committed()) {
|
|
|
|
ulint len = strlen(index->name) + 1;
|
|
|
|
char* name = static_cast<char*>(
|
|
|
|
mem_heap_alloc(heap, len));
|
|
|
|
*name = *TEMP_INDEX_PREFIX_STR;
|
|
|
|
memcpy(name + 1, index->name, len - 1);
|
|
|
|
dfield_set_data(dfield, name, len);
|
|
|
|
} else {
|
|
|
|
dfield_set_data(dfield, index->name, strlen(index->name));
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* 5: N_FIELDS ----------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__N_FIELDS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, index->n_fields);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 6: TYPE --------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__TYPE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, index->type);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 7: SPACE --------------------------*/
|
|
|
|
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__SPACE);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
mach_write_to_4(ptr, index->table->space_id);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 8: PAGE_NO --------------------------*/
|
|
|
|
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__PAGE_NO);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, FIL_NULL);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* 9: MERGE_THRESHOLD ----------------*/
|
|
|
|
|
|
|
|
dfield = dtuple_get_nth_field(
|
|
|
|
entry, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
mach_write_to_4(ptr, DICT_INDEX_MERGE_THRESHOLD_DEFAULT);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/*--------------------------------*/
|
|
|
|
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*****************************************************************//**
|
|
|
|
Based on an index object, this function builds the entry to be inserted
|
|
|
|
in the SYS_FIELDS system table.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return the tuple which should be inserted */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_sys_fields_tuple(
|
|
|
|
/*=========================*/
|
|
|
|
const dict_index_t* index, /*!< in: index */
|
|
|
|
ulint fld_no, /*!< in: field number */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from
|
|
|
|
which the memory for the built
|
|
|
|
tuple is allocated */
|
|
|
|
{
|
|
|
|
dict_table_t* sys_fields;
|
|
|
|
dtuple_t* entry;
|
|
|
|
dict_field_t* field;
|
|
|
|
dfield_t* dfield;
|
|
|
|
byte* ptr;
|
|
|
|
ibool index_contains_column_prefix_field = FALSE;
|
|
|
|
ulint j;
|
|
|
|
|
|
|
|
ut_ad(index);
|
|
|
|
ut_ad(heap);
|
|
|
|
|
|
|
|
for (j = 0; j < index->n_fields; j++) {
|
|
|
|
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
|
|
|
|
index_contains_column_prefix_field = TRUE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
field = dict_index_get_nth_field(index, fld_no);
|
|
|
|
|
|
|
|
sys_fields = dict_sys->sys_fields;
|
|
|
|
|
|
|
|
entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
|
|
|
|
|
|
|
|
dict_table_copy_types(entry, sys_fields);
|
|
|
|
|
|
|
|
/* 0: INDEX_ID -----------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
|
|
|
|
mach_write_to_8(ptr, index->id);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 8);
|
|
|
|
|
|
|
|
/* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/
|
|
|
|
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS);
|
|
|
|
|
|
|
|
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
|
|
|
|
|
|
|
|
if (index_contains_column_prefix_field) {
|
|
|
|
/* If there are column prefix fields in the index, then
|
|
|
|
we store the number of the field to the 2 HIGH bytes
|
|
|
|
and the prefix length to the 2 low bytes, */
|
|
|
|
|
|
|
|
mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len);
|
|
|
|
} else {
|
|
|
|
/* Else we store the number of the field to the 2 LOW bytes.
|
|
|
|
This is to keep the storage format compatible with
|
|
|
|
InnoDB versions < 4.0.14. */
|
|
|
|
|
|
|
|
mach_write_to_4(ptr, fld_no);
|
|
|
|
}
|
|
|
|
|
|
|
|
dfield_set_data(dfield, ptr, 4);
|
|
|
|
|
|
|
|
/* 2: DB_TRX_ID added later */
|
|
|
|
/* 3: DB_ROLL_PTR added later */
|
|
|
|
/* 4: COL_NAME -------------------------*/
|
|
|
|
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME);
|
|
|
|
|
|
|
|
dfield_set_data(dfield, field->name,
|
|
|
|
ut_strlen(field->name));
|
|
|
|
/*---------------------------------*/
|
|
|
|
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*****************************************************************//**
|
|
|
|
Creates the tuple with which the index entry is searched for writing the index
|
|
|
|
tree root page number, if such a tree is created.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return the tuple for search */
|
2014-02-26 19:11:54 +01:00
|
|
|
static
|
|
|
|
dtuple_t*
|
|
|
|
dict_create_search_tuple(
|
|
|
|
/*=====================*/
|
|
|
|
const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
|
|
|
|
table */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from which the memory for
|
|
|
|
the built tuple is allocated */
|
|
|
|
{
|
|
|
|
dtuple_t* search_tuple;
|
|
|
|
const dfield_t* field1;
|
|
|
|
dfield_t* field2;
|
|
|
|
|
|
|
|
ut_ad(tuple && heap);
|
|
|
|
|
|
|
|
search_tuple = dtuple_create(heap, 2);
|
|
|
|
|
|
|
|
field1 = dtuple_get_nth_field(tuple, 0);
|
|
|
|
field2 = dtuple_get_nth_field(search_tuple, 0);
|
|
|
|
|
|
|
|
dfield_copy(field2, field1);
|
|
|
|
|
|
|
|
field1 = dtuple_get_nth_field(tuple, 1);
|
|
|
|
field2 = dtuple_get_nth_field(search_tuple, 1);
|
|
|
|
|
|
|
|
dfield_copy(field2, field1);
|
|
|
|
|
|
|
|
ut_ad(dtuple_validate(search_tuple));
|
|
|
|
|
|
|
|
return(search_tuple);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***************************************************************//**
|
|
|
|
Builds an index definition row to insert.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or error code */
|
2016-06-21 14:21:03 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_build_index_def_step(
|
|
|
|
/*======================*/
|
|
|
|
que_thr_t* thr, /*!< in: query thread */
|
|
|
|
ind_node_t* node) /*!< in: index create node */
|
|
|
|
{
|
|
|
|
dict_table_t* table;
|
|
|
|
dict_index_t* index;
|
|
|
|
dtuple_t* row;
|
|
|
|
trx_t* trx;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
trx = thr_get_trx(thr);
|
|
|
|
|
|
|
|
index = node->index;
|
|
|
|
|
2018-03-23 17:25:56 +02:00
|
|
|
table = index->table = node->table = dict_table_open_on_name(
|
|
|
|
node->table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (table == NULL) {
|
|
|
|
return(DB_TABLE_NOT_FOUND);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!trx->table_id) {
|
|
|
|
/* Record only the first table id. */
|
|
|
|
trx->table_id = table->id;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|
|
|
|
|| dict_index_is_clust(index));
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* Inherit the space id from the table; we store all indexes of a
|
|
|
|
table in the same tablespace */
|
|
|
|
|
|
|
|
node->page_no = FIL_NULL;
|
|
|
|
row = dict_create_sys_indexes_tuple(index, node->heap);
|
|
|
|
node->ind_row = row;
|
|
|
|
|
|
|
|
ins_node_set_new_row(node->ind_def, row);
|
|
|
|
|
|
|
|
/* Note that the index was created by this transaction. */
|
|
|
|
index->trx_id = trx->id;
|
|
|
|
ut_ad(table->def_trx_id <= trx->id);
|
|
|
|
table->def_trx_id = trx->id;
|
2018-03-23 17:25:56 +02:00
|
|
|
dict_table_close(table, true, false);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/***************************************************************//**
|
|
|
|
Builds an index definition without updating SYSTEM TABLES.
|
|
|
|
@return DB_SUCCESS or error code */
|
|
|
|
void
|
|
|
|
dict_build_index_def(
|
|
|
|
/*=================*/
|
|
|
|
const dict_table_t* table, /*!< in: table */
|
|
|
|
dict_index_t* index, /*!< in/out: index */
|
|
|
|
trx_t* trx) /*!< in/out: InnoDB transaction handle */
|
|
|
|
{
|
2016-12-05 21:04:30 +02:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
if (trx->table_id == 0) {
|
|
|
|
/* Record only the first table id. */
|
|
|
|
trx->table_id = table->id;
|
|
|
|
}
|
|
|
|
|
|
|
|
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|
|
|
|
|| dict_index_is_clust(index));
|
|
|
|
|
2016-12-05 21:04:30 +02:00
|
|
|
dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/* Note that the index was created by this transaction. */
|
|
|
|
index->trx_id = trx->id;
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/***************************************************************//**
|
|
|
|
Builds a field definition row to insert. */
|
|
|
|
static
|
|
|
|
void
|
|
|
|
dict_build_field_def_step(
|
|
|
|
/*======================*/
|
|
|
|
ind_node_t* node) /*!< in: index create node */
|
|
|
|
{
|
|
|
|
dict_index_t* index;
|
|
|
|
dtuple_t* row;
|
|
|
|
|
|
|
|
index = node->index;
|
|
|
|
|
|
|
|
row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
|
|
|
|
|
|
|
|
ins_node_set_new_row(node->field_def, row);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***************************************************************//**
|
|
|
|
Creates an index tree for the index if it is not a member of a cluster.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
|
2016-06-21 14:21:03 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_index_tree_step(
|
|
|
|
/*========================*/
|
|
|
|
ind_node_t* node) /*!< in: index create node */
|
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
mtr_t mtr;
|
|
|
|
btr_pcur_t pcur;
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_index_t* index;
|
|
|
|
dict_table_t* sys_indexes;
|
|
|
|
dtuple_t* search_tuple;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
index = node->index;
|
|
|
|
|
|
|
|
sys_indexes = dict_sys->sys_indexes;
|
|
|
|
|
|
|
|
if (index->type == DICT_FTS) {
|
|
|
|
/* FTS index does not need an index tree */
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Run a mini-transaction in which the index tree is allocated for
|
|
|
|
the index and its root address is written to the index entry in
|
|
|
|
sys_indexes */
|
|
|
|
|
2018-03-22 15:30:54 +02:00
|
|
|
mtr.start();
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
|
|
|
|
|
|
|
|
btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
|
|
|
|
search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
|
|
|
|
&pcur, &mtr);
|
|
|
|
|
|
|
|
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
|
|
|
|
|
|
|
|
|
|
|
|
dberr_t err = DB_SUCCESS;
|
|
|
|
|
2018-05-12 10:11:38 +03:00
|
|
|
if (!index->is_readable()) {
|
2014-02-26 19:11:54 +01:00
|
|
|
node->page_no = FIL_NULL;
|
|
|
|
} else {
|
2018-03-22 15:30:54 +02:00
|
|
|
index->set_modified(mtr);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
node->page_no = btr_create(
|
2018-03-22 19:40:38 +02:00
|
|
|
index->type, index->table->space,
|
2016-08-12 11:17:45 +03:00
|
|
|
index->id, index, NULL, &mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (node->page_no == FIL_NULL) {
|
|
|
|
err = DB_OUT_OF_FILE_SPACE;
|
|
|
|
}
|
|
|
|
|
|
|
|
DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
|
|
|
|
node->page_no = FIL_NULL;
|
|
|
|
err = DB_OUT_OF_FILE_SPACE; );
|
|
|
|
}
|
|
|
|
|
2019-11-11 14:02:38 +02:00
|
|
|
ulint len;
|
|
|
|
byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
|
|
|
|
DICT_FLD__SYS_INDEXES__PAGE_NO,
|
|
|
|
&len);
|
|
|
|
ut_ad(len == 4);
|
|
|
|
if (mach_read_from_4(data) != node->page_no) {
|
|
|
|
mlog_write_ulint(data, node->page_no, MLOG_4BYTES, &mtr);
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-22 15:30:54 +02:00
|
|
|
mtr.commit();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(err);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/***************************************************************//**
|
|
|
|
Creates an index tree for the index if it is not a member of a cluster.
|
|
|
|
Don't update SYSTEM TABLES.
|
|
|
|
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
|
|
|
|
dberr_t
|
|
|
|
dict_create_index_tree_in_mem(
|
|
|
|
/*==========================*/
|
|
|
|
dict_index_t* index, /*!< in/out: index */
|
|
|
|
const trx_t* trx) /*!< in: InnoDB transaction handle */
|
|
|
|
{
|
|
|
|
mtr_t mtr;
|
|
|
|
|
2016-12-05 21:04:30 +02:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2018-05-12 10:11:38 +03:00
|
|
|
ut_ad(!(index->type & DICT_FTS));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
mtr_start(&mtr);
|
|
|
|
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
|
|
|
|
|
|
|
|
/* Currently this function is being used by temp-tables only.
|
|
|
|
Import/Discard of temp-table is blocked and so this assert. */
|
2017-05-06 15:54:31 +03:00
|
|
|
ut_ad(index->is_readable());
|
2018-05-12 10:11:38 +03:00
|
|
|
ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
index->page = btr_create(index->type, index->table->space,
|
|
|
|
index->id, index, NULL, &mtr);
|
|
|
|
mtr_commit(&mtr);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
index->trx_id = trx->id;
|
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
return index->page == FIL_NULL ? DB_OUT_OF_FILE_SPACE : DB_SUCCESS;
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/** Drop the index tree associated with a row in SYS_INDEXES table.
|
|
|
|
@param[in,out] rec SYS_INDEXES record
|
|
|
|
@param[in,out] pcur persistent cursor on rec
|
|
|
|
@param[in,out] mtr mini-transaction
|
|
|
|
@return whether freeing the B-tree was attempted */
|
|
|
|
bool
|
2014-02-26 19:11:54 +01:00
|
|
|
dict_drop_index_tree(
|
2016-08-12 11:17:45 +03:00
|
|
|
rec_t* rec,
|
|
|
|
btr_pcur_t* pcur,
|
|
|
|
mtr_t* mtr)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
const byte* ptr;
|
|
|
|
ulint len;
|
2018-03-30 21:36:56 +03:00
|
|
|
ulint space;
|
2016-08-12 11:17:45 +03:00
|
|
|
ulint root_page_no;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(len == 4);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
btr_pcur_store_position(pcur, mtr);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
|
|
|
|
|
|
|
|
if (root_page_no == FIL_NULL) {
|
|
|
|
/* The tree has already been freed */
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
return(false);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
mlog_write_ulint(const_cast<byte*>(ptr), FIL_NULL, MLOG_4BYTES, mtr);
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ptr = rec_get_nth_field_old(
|
|
|
|
rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
|
|
|
|
|
|
|
|
ut_ad(len == 4);
|
|
|
|
|
2018-03-30 21:36:56 +03:00
|
|
|
space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ptr = rec_get_nth_field_old(
|
|
|
|
rec, DICT_FLD__SYS_INDEXES__ID, &len);
|
|
|
|
|
|
|
|
ut_ad(len == 8);
|
|
|
|
|
2018-03-30 21:36:56 +03:00
|
|
|
bool found;
|
|
|
|
const page_size_t page_size(fil_space_get_page_size(space,
|
|
|
|
&found));
|
|
|
|
|
|
|
|
if (!found) {
|
|
|
|
/* It is a single table tablespace and the .ibd file is
|
|
|
|
missing: do nothing */
|
|
|
|
|
|
|
|
return(false);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* If tablespace is scheduled for truncate, do not try to drop
|
|
|
|
the indexes in that tablespace. There is a truncate fixup action
|
|
|
|
which will take care of it. */
|
2018-03-30 21:36:56 +03:00
|
|
|
if (srv_is_tablespace_truncated(space)) {
|
|
|
|
return(false);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-30 21:36:56 +03:00
|
|
|
btr_free_if_exists(page_id_t(space, root_page_no), page_size,
|
|
|
|
mach_read_from_8(ptr), mtr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-30 21:36:56 +03:00
|
|
|
return(true);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/*******************************************************************//**
|
2016-08-12 11:17:45 +03:00
|
|
|
Recreate the index tree associated with a row in SYS_INDEXES table.
|
2014-02-26 19:11:54 +01:00
|
|
|
@return new root page number, or FIL_NULL on failure */
|
|
|
|
ulint
|
2016-08-12 11:17:45 +03:00
|
|
|
dict_recreate_index_tree(
|
2014-02-26 19:11:54 +01:00
|
|
|
/*=====================*/
|
2016-08-12 11:17:45 +03:00
|
|
|
const dict_table_t*
|
|
|
|
table, /*!< in/out: the table the index belongs to */
|
2014-02-26 19:11:54 +01:00
|
|
|
btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
|
|
|
|
record in the clustered index of
|
|
|
|
SYS_INDEXES table. The cursor may be
|
|
|
|
repositioned in this call. */
|
2016-08-12 11:17:45 +03:00
|
|
|
mtr_t* mtr) /*!< in/out: mtr having the latch
|
|
|
|
on the record page. */
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
|
|
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
ut_ad(!table->space || table->space->id == table->space_id);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
ulint len;
|
2018-03-22 14:56:24 +02:00
|
|
|
const rec_t* rec = btr_pcur_get_rec(pcur);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
const byte* ptr = rec_get_nth_field_old(
|
2014-02-26 19:11:54 +01:00
|
|
|
rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
|
|
|
|
|
|
|
|
ut_ad(len == 4);
|
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
ut_ad(table->space_id == mach_read_from_4(
|
|
|
|
rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE,
|
|
|
|
&len)));
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(len == 4);
|
|
|
|
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
if (!table->space) {
|
2016-08-12 11:17:45 +03:00
|
|
|
/* It is a single table tablespae and the .ibd file is
|
|
|
|
missing: do nothing. */
|
|
|
|
|
|
|
|
ib::warn()
|
|
|
|
<< "Trying to TRUNCATE a missing .ibd file of table "
|
|
|
|
<< table->name << "!";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(FIL_NULL);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
|
2014-02-26 19:11:54 +01:00
|
|
|
ut_ad(len == 4);
|
2016-08-12 11:17:45 +03:00
|
|
|
ulint type = mach_read_from_4(ptr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
|
|
|
|
ut_ad(len == 8);
|
2016-08-12 11:17:45 +03:00
|
|
|
index_id_t index_id = mach_read_from_8(ptr);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We will need to commit the mini-transaction in order to avoid
|
|
|
|
deadlocks in the btr_create() call, because otherwise we would
|
|
|
|
be freeing and allocating pages in the same mini-transaction. */
|
|
|
|
btr_pcur_store_position(pcur, mtr);
|
|
|
|
mtr_commit(mtr);
|
|
|
|
|
|
|
|
mtr_start(mtr);
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
mtr->set_named_space(table->space);
|
2014-02-26 19:11:54 +01:00
|
|
|
btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
|
|
|
|
|
|
|
|
/* Find the index corresponding to this SYS_INDEXES record. */
|
2016-08-12 11:17:45 +03:00
|
|
|
for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
|
|
|
|
index != NULL;
|
2014-02-26 19:11:54 +01:00
|
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
|
|
if (index->id == index_id) {
|
2018-03-22 14:56:24 +02:00
|
|
|
ulint root_page_no = (index->type & DICT_FTS)
|
|
|
|
? FIL_NULL
|
MDEV-12266: Change dict_table_t::space to fil_space_t*
InnoDB always keeps all tablespaces in the fil_system cache.
The fil_system.LRU is only for closing file handles; the
fil_space_t and fil_node_t for all data files will remain
in main memory. Between startup to shutdown, they can only be
created and removed by DDL statements. Therefore, we can
let dict_table_t::space point directly to the fil_space_t.
dict_table_t::space_id: A numeric tablespace ID for the corner cases
where we do not have a tablespace. The most prominent examples are
ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file.
There are a few functional differences; most notably:
(1) DROP TABLE will delete matching .ibd and .cfg files,
even if they were not attached to the data dictionary.
(2) Some error messages will report file names instead of numeric IDs.
There still are many functions that use numeric tablespace IDs instead
of fil_space_t*, and many functions could be converted to fil_space_t
member functions. Also, Tablespace and Datafile should be merged with
fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use
fil_space_t& instead of a numeric ID, and after moving to a single
buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to
fil_space_t::page_hash.
FilSpace: Remove. Only few calls to fil_space_acquire() will remain,
and gradually they should be removed.
mtr_t::set_named_space_id(ulint): Renamed from set_named_space(),
to prevent accidental calls to this slower function. Very few
callers remain.
fseg_create(), fsp_reserve_free_extents(): Take fil_space_t*
as a parameter instead of a space_id.
fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(),
fil_name_write_rename(), fil_rename_tablespace(). Mariabackup
passes the parameter log=false; InnoDB passes log=true.
dict_mem_table_create(): Take fil_space_t* instead of space_id
as parameter.
dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter
'status' with 'bool cached'.
dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name.
fil_ibd_open(): Return the tablespace.
fil_space_t::set_imported(): Replaces fil_space_set_imported().
truncate_t: Change many member function parameters to fil_space_t*,
and remove page_size parameters.
row_truncate_prepare(): Merge to its only caller.
row_drop_table_from_cache(): Assert that the table is persistent.
dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL
if the tablespace has been discarded.
row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
|
|
|
: btr_create(type, table->space,
|
2018-03-22 14:56:24 +02:00
|
|
|
index_id, index, NULL, mtr);
|
|
|
|
index->page = unsigned(root_page_no);
|
|
|
|
return root_page_no;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ib::error() << "Failed to create index with index id " << index_id
|
|
|
|
<< " of table " << table->name;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(FIL_NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************//**
|
|
|
|
Creates a table create graph.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return own: table create node */
|
2014-02-26 19:11:54 +01:00
|
|
|
tab_node_t*
|
|
|
|
tab_create_graph_create(
|
|
|
|
/*====================*/
|
|
|
|
dict_table_t* table, /*!< in: table to create, built as a memory data
|
|
|
|
structure */
|
|
|
|
mem_heap_t* heap, /*!< in: heap where created */
|
2015-08-14 11:09:06 +03:00
|
|
|
fil_encryption_t mode, /*!< in: encryption mode */
|
2017-05-10 09:07:50 +03:00
|
|
|
uint32_t key_id) /*!< in: encryption key_id */
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
tab_node_t* node;
|
|
|
|
|
|
|
|
node = static_cast<tab_node_t*>(
|
|
|
|
mem_heap_alloc(heap, sizeof(tab_node_t)));
|
|
|
|
|
|
|
|
node->common.type = QUE_NODE_CREATE_TABLE;
|
|
|
|
|
|
|
|
node->table = table;
|
|
|
|
|
|
|
|
node->state = TABLE_BUILD_TABLE_DEF;
|
|
|
|
node->heap = mem_heap_create(256);
|
2015-08-14 11:09:06 +03:00
|
|
|
node->mode = mode;
|
|
|
|
node->key_id = key_id;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
|
|
|
|
heap);
|
|
|
|
node->tab_def->common.parent = node;
|
|
|
|
|
|
|
|
node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
|
|
|
|
heap);
|
|
|
|
node->col_def->common.parent = node;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
node->v_col_def = ins_node_create(INS_DIRECT, dict_sys->sys_virtual,
|
|
|
|
heap);
|
|
|
|
node->v_col_def->common.parent = node;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(node);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Creates an index create graph.
|
|
|
|
@param[in] index index to create, built as a memory data structure
|
2018-03-23 17:25:56 +02:00
|
|
|
@param[in] table table name
|
2016-08-12 11:17:45 +03:00
|
|
|
@param[in,out] heap heap where created
|
|
|
|
@param[in] add_v new virtual columns added in the same clause with
|
|
|
|
add index
|
|
|
|
@return own: index create node */
|
2014-02-26 19:11:54 +01:00
|
|
|
ind_node_t*
|
|
|
|
ind_create_graph_create(
|
2016-08-12 11:17:45 +03:00
|
|
|
dict_index_t* index,
|
2018-03-23 17:25:56 +02:00
|
|
|
const char* table,
|
2016-08-12 11:17:45 +03:00
|
|
|
mem_heap_t* heap,
|
|
|
|
const dict_add_v_col_t* add_v)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
ind_node_t* node;
|
|
|
|
|
|
|
|
node = static_cast<ind_node_t*>(
|
|
|
|
mem_heap_alloc(heap, sizeof(ind_node_t)));
|
|
|
|
|
|
|
|
node->common.type = QUE_NODE_CREATE_INDEX;
|
|
|
|
|
|
|
|
node->index = index;
|
|
|
|
|
2018-03-23 17:25:56 +02:00
|
|
|
node->table_name = table;
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
node->add_v = add_v;
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
node->state = INDEX_BUILD_INDEX_DEF;
|
|
|
|
node->page_no = FIL_NULL;
|
|
|
|
node->heap = mem_heap_create(256);
|
|
|
|
|
|
|
|
node->ind_def = ins_node_create(INS_DIRECT,
|
|
|
|
dict_sys->sys_indexes, heap);
|
|
|
|
node->ind_def->common.parent = node;
|
|
|
|
|
|
|
|
node->field_def = ins_node_create(INS_DIRECT,
|
|
|
|
dict_sys->sys_fields, heap);
|
|
|
|
node->field_def->common.parent = node;
|
|
|
|
|
|
|
|
return(node);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************//**
|
|
|
|
Creates a table. This is a high-level function used in SQL execution graphs.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return query thread to run next or NULL */
|
2014-02-26 19:11:54 +01:00
|
|
|
que_thr_t*
|
|
|
|
dict_create_table_step(
|
|
|
|
/*===================*/
|
|
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
|
|
{
|
|
|
|
tab_node_t* node;
|
|
|
|
dberr_t err = DB_ERROR;
|
|
|
|
trx_t* trx;
|
|
|
|
|
|
|
|
ut_ad(thr);
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
trx = thr_get_trx(thr);
|
|
|
|
|
|
|
|
node = static_cast<tab_node_t*>(thr->run_node);
|
|
|
|
|
|
|
|
ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
|
|
|
|
|
|
|
|
if (thr->prev_node == que_node_get_parent(node)) {
|
|
|
|
node->state = TABLE_BUILD_TABLE_DEF;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == TABLE_BUILD_TABLE_DEF) {
|
|
|
|
|
|
|
|
/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
|
|
|
|
|
|
|
|
err = dict_build_table_def_step(thr, node);
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
|
|
|
|
goto function_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
node->state = TABLE_BUILD_COL_DEF;
|
|
|
|
node->col_no = 0;
|
|
|
|
|
|
|
|
thr->run_node = node->tab_def;
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == TABLE_BUILD_COL_DEF) {
|
|
|
|
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
if (node->col_no + DATA_N_SYS_COLS
|
|
|
|
< (static_cast<ulint>(node->table->n_def)
|
|
|
|
+ static_cast<ulint>(node->table->n_v_def))) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
ulint i = node->col_no++;
|
|
|
|
if (i + DATA_N_SYS_COLS >= node->table->n_def) {
|
|
|
|
i += DATA_N_SYS_COLS;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
ins_node_set_new_row(
|
|
|
|
node->col_def,
|
|
|
|
dict_create_sys_columns_tuple(node->table, i,
|
|
|
|
node->heap));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
thr->run_node = node->col_def;
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
} else {
|
2016-08-12 11:17:45 +03:00
|
|
|
/* Move on to SYS_VIRTUAL table */
|
|
|
|
node->col_no = 0;
|
|
|
|
node->base_col_no = 0;
|
|
|
|
node->state = TABLE_BUILD_V_COL_DEF;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
if (node->state == TABLE_BUILD_V_COL_DEF) {
|
|
|
|
|
|
|
|
if (node->col_no < static_cast<ulint>(node->table->n_v_def)) {
|
|
|
|
dict_v_col_t* v_col = dict_table_get_nth_v_col(
|
|
|
|
node->table, node->col_no);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/* If no base column */
|
|
|
|
while (v_col->num_base == 0) {
|
|
|
|
node->col_no++;
|
|
|
|
if (node->col_no == static_cast<ulint>(
|
|
|
|
(node->table)->n_v_def)) {
|
|
|
|
node->state = TABLE_ADD_TO_CACHE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
v_col = dict_table_get_nth_v_col(
|
|
|
|
node->table, node->col_no);
|
|
|
|
node->base_col_no = 0;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
if (node->state != TABLE_ADD_TO_CACHE) {
|
|
|
|
ut_ad(node->col_no == v_col->v_pos);
|
|
|
|
dict_build_v_col_def_step(node);
|
|
|
|
|
|
|
|
if (node->base_col_no < v_col->num_base - 1) {
|
|
|
|
/* move on to next base column */
|
|
|
|
node->base_col_no++;
|
|
|
|
} else {
|
|
|
|
/* move on to next virtual column */
|
|
|
|
node->col_no++;
|
|
|
|
node->base_col_no = 0;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
thr->run_node = node->v_col_def;
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
node->state = TABLE_ADD_TO_CACHE;
|
|
|
|
}
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == TABLE_ADD_TO_CACHE) {
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
|
2014-02-26 19:11:54 +01:00
|
|
|
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
node->table->can_be_evicted = true;
|
|
|
|
node->table->add_to_cache();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
err = DB_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
function_exit:
|
|
|
|
trx->error_state = err;
|
|
|
|
|
|
|
|
if (err == DB_SUCCESS) {
|
|
|
|
/* Ok: do nothing */
|
|
|
|
|
|
|
|
} else if (err == DB_LOCK_WAIT) {
|
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
} else {
|
|
|
|
/* SQL error detected */
|
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
thr->run_node = que_node_get_parent(node);
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/***********************************************************//**
|
|
|
|
Creates an index. This is a high-level function used in SQL execution
|
|
|
|
graphs.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return query thread to run next or NULL */
|
2014-02-26 19:11:54 +01:00
|
|
|
que_thr_t*
|
|
|
|
dict_create_index_step(
|
|
|
|
/*===================*/
|
|
|
|
que_thr_t* thr) /*!< in: query thread */
|
|
|
|
{
|
|
|
|
ind_node_t* node;
|
|
|
|
dberr_t err = DB_ERROR;
|
|
|
|
trx_t* trx;
|
|
|
|
|
|
|
|
ut_ad(thr);
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
trx = thr_get_trx(thr);
|
|
|
|
|
|
|
|
node = static_cast<ind_node_t*>(thr->run_node);
|
|
|
|
|
|
|
|
ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
|
|
|
|
|
|
|
|
if (thr->prev_node == que_node_get_parent(node)) {
|
|
|
|
node->state = INDEX_BUILD_INDEX_DEF;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == INDEX_BUILD_INDEX_DEF) {
|
|
|
|
/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
|
|
|
|
err = dict_build_index_def_step(thr, node);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
|
|
|
|
goto function_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
node->state = INDEX_BUILD_FIELD_DEF;
|
|
|
|
node->field_no = 0;
|
|
|
|
|
|
|
|
thr->run_node = node->ind_def;
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == INDEX_BUILD_FIELD_DEF) {
|
|
|
|
|
|
|
|
if (node->field_no < (node->index)->n_fields) {
|
|
|
|
|
|
|
|
dict_build_field_def_step(node);
|
|
|
|
|
|
|
|
node->field_no++;
|
|
|
|
|
|
|
|
thr->run_node = node->field_def;
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
} else {
|
|
|
|
node->state = INDEX_ADD_TO_CACHE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == INDEX_ADD_TO_CACHE) {
|
2018-03-23 17:25:56 +02:00
|
|
|
ut_ad(node->index->table == node->table);
|
2019-11-01 14:21:29 +03:00
|
|
|
err = dict_index_add_to_cache(
|
2018-03-23 17:25:56 +02:00
|
|
|
node->index, FIL_NULL, trx_is_strict(trx),
|
2019-11-06 13:14:31 +02:00
|
|
|
node->add_v);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-23 17:25:56 +02:00
|
|
|
ut_ad((node->index == NULL) == (err != DB_SUCCESS));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2018-03-23 17:25:56 +02:00
|
|
|
if (!node->index) {
|
2014-02-26 19:11:54 +01:00
|
|
|
goto function_exit;
|
|
|
|
}
|
|
|
|
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
ut_ad(!node->index->is_instant());
|
|
|
|
ut_ad(node->index->n_core_null_bytes
|
|
|
|
== ((dict_index_is_clust(node->index)
|
|
|
|
&& node->table->supports_instant())
|
|
|
|
? dict_index_t::NO_CORE_NULL_BYTES
|
2018-04-28 15:49:09 +03:00
|
|
|
: UT_BITS_IN_BYTES(
|
|
|
|
unsigned(node->index->n_nullable))));
|
MDEV-11369 Instant ADD COLUMN for InnoDB
For InnoDB tables, adding, dropping and reordering columns has
required a rebuild of the table and all its indexes. Since MySQL 5.6
(and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing
concurrent modification of the tables.
This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT
and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously,
with only minor changes performed to the table structure. The counter
innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS
is incremented whenever a table rebuild operation is converted into
an instant ADD COLUMN operation.
ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN.
Some usability limitations will be addressed in subsequent work:
MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY
and ALGORITHM=INSTANT
MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE
The format of the clustered index (PRIMARY KEY) is changed as follows:
(1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT,
and a new field PAGE_INSTANT will contain the original number of fields
in the clustered index ('core' fields).
If instant ADD COLUMN has not been used or the table becomes empty,
or the very first instant ADD COLUMN operation is rolled back,
the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset
to 0 and FIL_PAGE_INDEX.
(2) A special 'default row' record is inserted into the leftmost leaf,
between the page infimum and the first user record. This record is
distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the
same format as records that contain values for the instantly added
columns. This 'default row' always has the same number of fields as
the clustered index according to the table definition. The values of
'core' fields are to be ignored. For other fields, the 'default row'
will contain the default values as they were during the ALTER TABLE
statement. (If the column default values are changed later, those
values will only be stored in the .frm file. The 'default row' will
contain the original evaluated values, which must be the same for
every row.) The 'default row' must be completely hidden from
higher-level access routines. Assertions have been added to ensure
that no 'default row' is ever present in the adaptive hash index
or in locked records. The 'default row' is never delete-marked.
(3) In clustered index leaf page records, the number of fields must
reside between the number of 'core' fields (dict_index_t::n_core_fields
introduced in this work) and dict_index_t::n_fields. If the number
of fields is less than dict_index_t::n_fields, the missing fields
are replaced with the column value of the 'default row'.
Note: The number of fields in the record may shrink if some of the
last instantly added columns are updated to the value that is
in the 'default row'. The function btr_cur_trim() implements this
'compression' on update and rollback; dtuple::trim() implements it
on insert.
(4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new
status value REC_STATUS_COLUMNS_ADDED will indicate the presence of
a new record header that will encode n_fields-n_core_fields-1 in
1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header
always explicitly encodes the number of fields.)
We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for
covering the insert of the 'default row' record when instant ADD COLUMN
is used for the first time. Subsequent instant ADD COLUMN can use
TRX_UNDO_UPD_EXIST_REC.
This is joint work with Vin Chen (陈福荣) from Tencent. The design
that was discussed in April 2017 would not have allowed import or
export of data files, because instead of the 'default row' it would
have introduced a data dictionary table. The test
rpl.rpl_alter_instant is exactly as contributed in pull request #408.
The test innodb.instant_alter is based on a contributed test.
The redo log record format changes for ROW_FORMAT=DYNAMIC and
ROW_FORMAT=COMPACT are as contributed. (With this change present,
crash recovery from MariaDB 10.3.1 will fail in spectacular ways!)
Also the semantics of higher-level redo log records that modify the
PAGE_INSTANT field is changed. The redo log format version identifier
was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1.
Everything else has been rewritten by me. Thanks to Elena Stepanova,
the code has been tested extensively.
When rolling back an instant ADD COLUMN operation, we must empty the
PAGE_FREE list after deleting or shortening the 'default row' record,
by calling either btr_page_empty() or btr_page_reorganize(). We must
know the size of each entry in the PAGE_FREE list. If rollback left a
freed copy of the 'default row' in the PAGE_FREE list, we would be
unable to determine its size (if it is in ROW_FORMAT=COMPACT or
ROW_FORMAT=DYNAMIC) because it would contain more fields than the
rolled-back definition of the clustered index.
UNIV_SQL_DEFAULT: A new special constant that designates an instantly
added column that is not present in the clustered index record.
len_is_stored(): Check if a length is an actual length. There are
two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL.
dict_col_t::def_val: The 'default row' value of the column. If the
column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT.
dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(),
instant_value().
dict_col_t::remove_instant(): Remove the 'instant ADD' status of
a column.
dict_col_t::name(const dict_table_t& table): Replaces
dict_table_get_col_name().
dict_index_t::n_core_fields: The original number of fields.
For secondary indexes and if instant ADD COLUMN has not been used,
this will be equal to dict_index_t::n_fields.
dict_index_t::n_core_null_bytes: Number of bytes needed to
represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable).
dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that
n_core_null_bytes was not initialized yet from the clustered index
root page.
dict_index_t: Add the accessors is_instant(), is_clust(),
get_n_nullable(), instant_field_value().
dict_index_t::instant_add_field(): Adjust clustered index metadata
for instant ADD COLUMN.
dict_index_t::remove_instant(): Remove the 'instant ADD' status
of a clustered index when the table becomes empty, or the very first
instant ADD COLUMN operation is rolled back.
dict_table_t: Add the accessors is_instant(), is_temporary(),
supports_instant().
dict_table_t::instant_add_column(): Adjust metadata for
instant ADD COLUMN.
dict_table_t::rollback_instant(): Adjust metadata on the rollback
of instant ADD COLUMN.
prepare_inplace_alter_table_dict(): First create the ctx->new_table,
and only then decide if the table really needs to be rebuilt.
We must split the creation of table or index metadata from the
creation of the dictionary table records and the creation of
the data. In this way, we can transform a table-rebuilding operation
into an instant ADD COLUMN operation. Dictionary objects will only
be added to cache when table rebuilding or index creation is needed.
The ctx->instant_table will never be added to cache.
dict_table_t::add_to_cache(): Modified and renamed from
dict_table_add_to_cache(). Do not modify the table metadata.
Let the callers invoke dict_table_add_system_columns() and if needed,
set can_be_evicted.
dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the
system columns (which will now exist in the dict_table_t object
already at this point).
dict_create_table_step(): Expect the callers to invoke
dict_table_add_system_columns().
pars_create_table(): Before creating the table creation execution
graph, invoke dict_table_add_system_columns().
row_create_table_for_mysql(): Expect all callers to invoke
dict_table_add_system_columns().
create_index_dict(): Replaces row_merge_create_index_graph().
innodb_update_n_cols(): Renamed from innobase_update_n_virtual().
Call my_error() if an error occurs.
btr_cur_instant_init(), btr_cur_instant_init_low(),
btr_cur_instant_root_init():
Load additional metadata from the clustered index and set
dict_index_t::n_core_null_bytes. This is invoked
when table metadata is first loaded into the data dictionary.
dict_boot(): Initialize n_core_null_bytes for the four hard-coded
dictionary tables.
dict_create_index_step(): Initialize n_core_null_bytes. This is
executed as part of CREATE TABLE.
dict_index_build_internal_clust(): Initialize n_core_null_bytes to
NO_CORE_NULL_BYTES if table->supports_instant().
row_create_index_for_mysql(): Initialize n_core_null_bytes for
CREATE TEMPORARY TABLE.
commit_cache_norebuild(): Call the code to rename or enlarge columns
in the cache only if instant ADD COLUMN is not being used.
(Instant ADD COLUMN would copy all column metadata from
instant_table to old_table, including the names and lengths.)
PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields.
This is repurposing the 16-bit field PAGE_DIRECTION, of which only the
least significant 3 bits were used. The original byte containing
PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B.
page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT.
page_ptr_get_direction(), page_get_direction(),
page_ptr_set_direction(): Accessors for PAGE_DIRECTION.
page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION.
page_direction_increment(): Increment PAGE_N_DIRECTION
and set PAGE_DIRECTION.
rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes,
and assume that heap_no is always set.
Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records,
even if the record contains fewer fields.
rec_offs_make_valid(): Add the parameter 'leaf'.
rec_copy_prefix_to_dtuple(): Assert that the tuple is only built
on the core fields. Instant ADD COLUMN only applies to the
clustered index, and we should never build a search key that has
more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR.
All these columns are always present.
dict_index_build_data_tuple(): Remove assertions that would be
duplicated in rec_copy_prefix_to_dtuple().
rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose
number of fields is between n_core_fields and n_fields.
cmp_rec_rec_with_match(): Implement the comparison between two
MIN_REC_FLAG records.
trx_t::in_rollback: Make the field available in non-debug builds.
trx_start_for_ddl_low(): Remove dangerous error-tolerance.
A dictionary transaction must be flagged as such before it has generated
any undo log records. This is because trx_undo_assign_undo() will mark
the transaction as a dictionary transaction in the undo log header
right before the very first undo log record is being written.
btr_index_rec_validate(): Account for instant ADD COLUMN
row_undo_ins_remove_clust_rec(): On the rollback of an insert into
SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the
last column from the table and the clustered index.
row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(),
trx_undo_update_rec_get_update(): Handle the 'default row'
as a special case.
dtuple_t::trim(index): Omit a redundant suffix of an index tuple right
before insert or update. After instant ADD COLUMN, if the last fields
of a clustered index tuple match the 'default row', there is no
need to store them. While trimming the entry, we must hold a page latch,
so that the table cannot be emptied and the 'default row' be deleted.
btr_cur_optimistic_update(), btr_cur_pessimistic_update(),
row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low():
Invoke dtuple_t::trim() if needed.
row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling
row_ins_clust_index_entry_low().
rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number
of fields to be between n_core_fields and n_fields. Do not support
infimum,supremum. They are never supposed to be stored in dtuple_t,
because page creation nowadays uses a lower-level method for initializing
them.
rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the
number of fields.
btr_cur_trim(): In an update, trim the index entry as needed. For the
'default row', handle rollback specially. For user records, omit
fields that match the 'default row'.
btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete():
Skip locking and adaptive hash index for the 'default row'.
row_log_table_apply_convert_mrec(): Replace 'default row' values if needed.
In the temporary file that is applied by row_log_table_apply(),
we must identify whether the records contain the extra header for
instantly added columns. For now, we will allocate an additional byte
for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table
has been subject to instant ADD COLUMN. The ROW_T_DELETE records are
fine, as they will be converted and will only contain 'core' columns
(PRIMARY KEY and some system columns) that are converted from dtuple_t.
rec_get_converted_size_temp(), rec_init_offsets_temp(),
rec_convert_dtuple_to_temp(): Add the parameter 'status'.
REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED:
An info_bits constant for distinguishing the 'default row' record.
rec_comp_status_t: An enum of the status bit values.
rec_leaf_format: An enum that replaces the bool parameter of
rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
|
|
|
node->index->n_core_null_bytes = UT_BITS_IN_BYTES(
|
2018-04-28 15:49:09 +03:00
|
|
|
unsigned(node->index->n_nullable));
|
2014-02-26 19:11:54 +01:00
|
|
|
node->state = INDEX_CREATE_INDEX_TREE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (node->state == INDEX_CREATE_INDEX_TREE) {
|
|
|
|
|
|
|
|
err = dict_create_index_tree_step(node);
|
|
|
|
|
|
|
|
DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
|
|
|
|
err = DB_OUT_OF_MEMORY;);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
/* If this is a FTS index, we will need to remove
|
|
|
|
it from fts->cache->indexes list as well */
|
|
|
|
if ((node->index->type & DICT_FTS)
|
|
|
|
&& node->table->fts) {
|
|
|
|
fts_index_cache_t* index_cache;
|
|
|
|
|
|
|
|
rw_lock_x_lock(
|
|
|
|
&node->table->fts->cache->init_lock);
|
|
|
|
|
|
|
|
index_cache = (fts_index_cache_t*)
|
|
|
|
fts_find_index_cache(
|
|
|
|
node->table->fts->cache,
|
|
|
|
node->index);
|
|
|
|
|
|
|
|
if (index_cache->words) {
|
|
|
|
rbt_free(index_cache->words);
|
|
|
|
index_cache->words = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
ib_vector_remove(
|
|
|
|
node->table->fts->cache->indexes,
|
|
|
|
*reinterpret_cast<void**>(index_cache));
|
|
|
|
|
|
|
|
rw_lock_x_unlock(
|
|
|
|
&node->table->fts->cache->init_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
dict_index_remove_from_cache(node->table, node->index);
|
|
|
|
node->index = NULL;
|
|
|
|
|
|
|
|
goto function_exit;
|
|
|
|
}
|
|
|
|
|
|
|
|
node->index->page = node->page_no;
|
|
|
|
/* These should have been set in
|
|
|
|
dict_build_index_def_step() and
|
|
|
|
dict_index_add_to_cache(). */
|
|
|
|
ut_ad(node->index->trx_id == trx->id);
|
|
|
|
ut_ad(node->index->table->def_trx_id == trx->id);
|
|
|
|
}
|
|
|
|
|
|
|
|
function_exit:
|
|
|
|
trx->error_state = err;
|
|
|
|
|
|
|
|
if (err == DB_SUCCESS) {
|
|
|
|
/* Ok: do nothing */
|
|
|
|
|
|
|
|
} else if (err == DB_LOCK_WAIT) {
|
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
} else {
|
|
|
|
/* SQL error detected */
|
|
|
|
|
|
|
|
return(NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
thr->run_node = que_node_get_parent(node);
|
|
|
|
|
|
|
|
return(thr);
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Check whether a system table exists. Additionally, if it exists,
|
|
|
|
move it to the non-LRU end of the table LRU list. This is oly used
|
|
|
|
for system tables that can be upgraded or added to an older database,
|
|
|
|
which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
|
|
|
|
SYS_DATAFILES.
|
|
|
|
@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
|
|
|
|
but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
|
|
|
|
static
|
|
|
|
dberr_t
|
|
|
|
dict_check_if_system_table_exists(
|
|
|
|
/*==============================*/
|
|
|
|
const char* tablename, /*!< in: name of table */
|
|
|
|
ulint num_fields, /*!< in: number of fields */
|
|
|
|
ulint num_indexes) /*!< in: number of indexes */
|
|
|
|
{
|
|
|
|
dict_table_t* sys_table;
|
|
|
|
dberr_t error = DB_SUCCESS;
|
|
|
|
|
|
|
|
ut_a(srv_get_active_thread_type() == SRV_NONE);
|
|
|
|
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
|
|
|
|
sys_table = dict_table_get_low(tablename);
|
|
|
|
|
|
|
|
if (sys_table == NULL) {
|
|
|
|
error = DB_TABLE_NOT_FOUND;
|
|
|
|
|
|
|
|
} else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
|
|
|
|
|| sys_table->n_cols != num_fields) {
|
|
|
|
error = DB_CORRUPTION;
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/* This table has already been created, and it is OK.
|
|
|
|
Ensure that it can't be evicted from the table LRU cache. */
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
dict_table_prevent_eviction(sys_table);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
|
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Creates the foreign key constraints system tables inside InnoDB
|
|
|
|
at server bootstrap or server start if they are not found or are
|
|
|
|
not of the right form.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or error code */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_or_check_foreign_constraint_tables(void)
|
|
|
|
/*================================================*/
|
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
my_bool srv_file_per_table_backup;
|
|
|
|
dberr_t err;
|
|
|
|
dberr_t sys_foreign_err;
|
|
|
|
dberr_t sys_foreign_cols_err;
|
|
|
|
|
|
|
|
ut_a(srv_get_active_thread_type() == SRV_NONE);
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
|
|
|
|
|
|
|
|
sys_foreign_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
|
|
|
|
sys_foreign_cols_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
|
|
|
|
|
|
|
|
if (sys_foreign_err == DB_SUCCESS
|
|
|
|
&& sys_foreign_cols_err == DB_SUCCESS) {
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2017-02-01 15:47:33 +02:00
|
|
|
if (srv_read_only_mode
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
|
|
|
|
return(DB_READ_ONLY);
|
|
|
|
}
|
|
|
|
|
2018-03-29 23:09:16 +04:00
|
|
|
trx = trx_create();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
|
|
|
|
trx->op_info = "creating foreign key sys tables";
|
|
|
|
|
|
|
|
row_mysql_lock_data_dictionary(trx);
|
|
|
|
|
2018-10-30 13:29:19 +02:00
|
|
|
DBUG_EXECUTE_IF(
|
|
|
|
"create_and_drop_garbage",
|
|
|
|
err = que_eval_sql(
|
|
|
|
NULL,
|
|
|
|
"PROCEDURE CREATE_GARBAGE_TABLE_PROC () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"CREATE TABLE\n"
|
|
|
|
"\"test/#sql-ib-garbage\"(ID CHAR);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX PRIMARY"
|
|
|
|
" ON \"test/#sql-ib-garbage\"(ID);\n"
|
|
|
|
"END;\n", FALSE, trx);
|
|
|
|
ut_ad(err == DB_SUCCESS);
|
|
|
|
row_drop_table_for_mysql("test/#sql-ib-garbage", trx,
|
|
|
|
SQLCOM_DROP_DB, true););
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/* Check which incomplete table definition to drop. */
|
|
|
|
|
|
|
|
if (sys_foreign_err == DB_CORRUPTION) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_FOREIGN", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (sys_foreign_cols_err == DB_CORRUPTION) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2017-03-14 17:11:46 +02:00
|
|
|
ib::info() << "Creating foreign key constraint system tables.";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* NOTE: in dict_load_foreigns we use the fact that
|
|
|
|
there are 2 secondary indexes on SYS_FOREIGN, and they
|
|
|
|
are defined just like below */
|
|
|
|
|
|
|
|
/* NOTE: when designing InnoDB's foreign key support in 2001, we made
|
|
|
|
an error and made the table names and the foreign key id of type
|
|
|
|
'CHAR' (internally, really a VARCHAR). We should have made the type
|
|
|
|
VARBINARY, like in other InnoDB system tables, to get a clean
|
|
|
|
design. */
|
|
|
|
|
|
|
|
srv_file_per_table_backup = srv_file_per_table;
|
|
|
|
|
|
|
|
/* We always want SYSTEM tables to be created inside the system
|
|
|
|
tablespace. */
|
|
|
|
|
|
|
|
srv_file_per_table = 0;
|
|
|
|
|
|
|
|
err = que_eval_sql(
|
|
|
|
NULL,
|
|
|
|
"PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"CREATE TABLE\n"
|
|
|
|
"SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
|
|
|
|
" REF_NAME CHAR, N_COLS INT);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
|
|
|
|
" ON SYS_FOREIGN (ID);\n"
|
|
|
|
"CREATE INDEX FOR_IND"
|
|
|
|
" ON SYS_FOREIGN (FOR_NAME);\n"
|
|
|
|
"CREATE INDEX REF_IND"
|
|
|
|
" ON SYS_FOREIGN (REF_NAME);\n"
|
|
|
|
"CREATE TABLE\n"
|
|
|
|
"SYS_FOREIGN_COLS(ID CHAR, POS INT,"
|
|
|
|
" FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
|
|
|
|
" ON SYS_FOREIGN_COLS (ID, POS);\n"
|
|
|
|
"END;\n",
|
|
|
|
FALSE, trx);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
ib::error() << "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS"
|
|
|
|
" failed: " << ut_strerr(err) << ". Tablespace is"
|
|
|
|
" full. Dropping incompletely created tables.";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_ad(err == DB_OUT_OF_FILE_SPACE
|
|
|
|
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
|
|
|
|
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_FOREIGN", trx);
|
|
|
|
row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (err == DB_OUT_OF_FILE_SPACE) {
|
|
|
|
err = DB_MUST_GET_MORE_FILE_SPACE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_commit_for_mysql(trx);
|
|
|
|
|
|
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
|
|
|
2018-03-30 15:10:40 +04:00
|
|
|
trx_free(trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
srv_file_per_table = srv_file_per_table_backup;
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
/* Confirm and move to the non-LRU part of the table LRU list. */
|
|
|
|
sys_foreign_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
|
|
|
|
ut_a(sys_foreign_err == DB_SUCCESS);
|
|
|
|
|
|
|
|
sys_foreign_cols_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
|
|
|
|
ut_a(sys_foreign_cols_err == DB_SUCCESS);
|
|
|
|
|
|
|
|
return(err);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Creates the virtual column system table (SYS_VIRTUAL) inside InnoDB
|
|
|
|
at server bootstrap or server start if the table is not found or is
|
|
|
|
not of the right form.
|
|
|
|
@return DB_SUCCESS or error code */
|
|
|
|
dberr_t
|
|
|
|
dict_create_or_check_sys_virtual()
|
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
my_bool srv_file_per_table_backup;
|
|
|
|
dberr_t err;
|
|
|
|
|
|
|
|
ut_a(srv_get_active_thread_type() == SRV_NONE);
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
|
|
|
|
|
|
|
|
if (err == DB_SUCCESS) {
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2017-02-01 15:47:33 +02:00
|
|
|
if (srv_read_only_mode
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
|
|
|
|
return(DB_READ_ONLY);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
2018-03-29 23:09:16 +04:00
|
|
|
trx = trx_create();
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
|
|
|
|
trx->op_info = "creating sys_virtual tables";
|
|
|
|
|
|
|
|
row_mysql_lock_data_dictionary(trx);
|
|
|
|
|
|
|
|
/* Check which incomplete table definition to drop. */
|
|
|
|
|
|
|
|
if (err == DB_CORRUPTION) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
|
2016-08-12 11:17:45 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
ib::info() << "Creating sys_virtual system tables.";
|
|
|
|
|
|
|
|
srv_file_per_table_backup = srv_file_per_table;
|
|
|
|
|
|
|
|
/* We always want SYSTEM tables to be created inside the system
|
|
|
|
tablespace. */
|
|
|
|
|
|
|
|
srv_file_per_table = 0;
|
|
|
|
|
|
|
|
err = que_eval_sql(
|
|
|
|
NULL,
|
|
|
|
"PROCEDURE CREATE_SYS_VIRTUAL_TABLES_PROC () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"CREATE TABLE\n"
|
|
|
|
"SYS_VIRTUAL(TABLE_ID BIGINT, POS INT,"
|
|
|
|
" BASE_POS INT);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX BASE_IDX"
|
|
|
|
" ON SYS_VIRTUAL(TABLE_ID, POS, BASE_POS);\n"
|
|
|
|
"END;\n",
|
|
|
|
FALSE, trx);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
|
|
|
|
ib::error() << "Creation of SYS_VIRTUAL"
|
|
|
|
" failed: " << ut_strerr(err) << ". Tablespace is"
|
|
|
|
" full or too many transactions."
|
|
|
|
" Dropping incompletely created tables.";
|
|
|
|
|
|
|
|
ut_ad(err == DB_OUT_OF_FILE_SPACE
|
|
|
|
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
|
|
|
|
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
if (err == DB_OUT_OF_FILE_SPACE) {
|
|
|
|
err = DB_MUST_GET_MORE_FILE_SPACE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_commit_for_mysql(trx);
|
|
|
|
|
|
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
|
|
|
2018-03-30 15:10:40 +04:00
|
|
|
trx_free(trx);
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
srv_file_per_table = srv_file_per_table_backup;
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
/* Confirm and move to the non-LRU part of the table LRU list. */
|
|
|
|
dberr_t sys_virtual_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
|
|
|
|
ut_a(sys_virtual_err == DB_SUCCESS);
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
|
|
dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
|
|
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
|
|
|
|
return(err);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/****************************************************************//**
|
|
|
|
Evaluate the given foreign key SQL statement.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return error code or DB_SUCCESS */
|
2016-06-21 14:21:03 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_foreign_eval_sql(
|
|
|
|
/*==================*/
|
|
|
|
pars_info_t* info, /*!< in: info struct */
|
|
|
|
const char* sql, /*!< in: SQL string to evaluate */
|
|
|
|
const char* name, /*!< in: table name (for diagnostics) */
|
|
|
|
const char* id, /*!< in: foreign key id */
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
|
|
{
|
|
|
|
dberr_t error;
|
|
|
|
FILE* ef = dict_foreign_err_file;
|
|
|
|
|
|
|
|
error = que_eval_sql(info, sql, FALSE, trx);
|
|
|
|
|
|
|
|
if (error == DB_DUPLICATE_KEY) {
|
|
|
|
mutex_enter(&dict_foreign_err_mutex);
|
|
|
|
rewind(ef);
|
|
|
|
ut_print_timestamp(ef);
|
|
|
|
fputs(" Error in foreign key constraint creation for table ",
|
|
|
|
ef);
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_print_name(ef, trx, name);
|
2014-02-26 19:11:54 +01:00
|
|
|
fputs(".\nA foreign key constraint of name ", ef);
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_print_name(ef, trx, id);
|
2014-02-26 19:11:54 +01:00
|
|
|
fputs("\nalready exists."
|
|
|
|
" (Note that internally InnoDB adds 'databasename'\n"
|
|
|
|
"in front of the user-defined constraint name.)\n"
|
|
|
|
"Note that InnoDB's FOREIGN KEY system tables store\n"
|
|
|
|
"constraint names as case-insensitive, with the\n"
|
|
|
|
"MySQL standard latin1_swedish_ci collation. If you\n"
|
|
|
|
"create tables or databases whose names differ only in\n"
|
|
|
|
"the character case, then collisions in constraint\n"
|
|
|
|
"names can occur. Workaround: name your constraints\n"
|
|
|
|
"explicitly with unique names.\n",
|
|
|
|
ef);
|
|
|
|
|
|
|
|
mutex_exit(&dict_foreign_err_mutex);
|
|
|
|
|
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (error != DB_SUCCESS) {
|
2016-08-12 11:17:45 +03:00
|
|
|
ib::error() << "Foreign key constraint creation failed: "
|
|
|
|
<< ut_strerr(error);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
mutex_enter(&dict_foreign_err_mutex);
|
|
|
|
ut_print_timestamp(ef);
|
|
|
|
fputs(" Internal error in foreign key constraint creation"
|
|
|
|
" for table ", ef);
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_print_name(ef, trx, name);
|
2014-02-26 19:11:54 +01:00
|
|
|
fputs(".\n"
|
|
|
|
"See the MySQL .err log in the datadir"
|
|
|
|
" for more information.\n", ef);
|
|
|
|
mutex_exit(&dict_foreign_err_mutex);
|
|
|
|
|
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Add a single foreign key field definition to the data dictionary tables in
|
|
|
|
the database.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return error code or DB_SUCCESS */
|
2016-06-21 14:21:03 +02:00
|
|
|
static MY_ATTRIBUTE((nonnull, warn_unused_result))
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_add_foreign_field_to_dictionary(
|
|
|
|
/*========================================*/
|
|
|
|
ulint field_nr, /*!< in: field number */
|
|
|
|
const char* table_name, /*!< in: table name */
|
|
|
|
const dict_foreign_t* foreign, /*!< in: foreign */
|
|
|
|
trx_t* trx) /*!< in/out: transaction */
|
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_ENTER("dict_create_add_foreign_field_to_dictionary");
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
pars_info_t* info = pars_info_create();
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "id", foreign->id);
|
|
|
|
|
|
|
|
pars_info_add_int4_literal(info, "pos", field_nr);
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "for_col_name",
|
|
|
|
foreign->foreign_col_names[field_nr]);
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "ref_col_name",
|
|
|
|
foreign->referenced_col_names[field_nr]);
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_RETURN(dict_foreign_eval_sql(
|
2014-02-26 19:11:54 +01:00
|
|
|
info,
|
|
|
|
"PROCEDURE P () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"INSERT INTO SYS_FOREIGN_COLS VALUES"
|
|
|
|
"(:id, :pos, :for_col_name, :ref_col_name);\n"
|
|
|
|
"END;\n",
|
|
|
|
table_name, foreign->id, trx));
|
|
|
|
}
|
|
|
|
|
2015-07-29 05:58:45 +03:00
|
|
|
/********************************************************************//**
|
|
|
|
Construct foreign key constraint defintion from data dictionary information.
|
|
|
|
*/
|
2015-07-31 08:52:24 +03:00
|
|
|
UNIV_INTERN
|
2015-07-29 05:58:45 +03:00
|
|
|
char*
|
|
|
|
dict_foreign_def_get(
|
2015-07-31 08:52:24 +03:00
|
|
|
/*=================*/
|
2015-07-29 05:58:45 +03:00
|
|
|
dict_foreign_t* foreign,/*!< in: foreign */
|
|
|
|
trx_t* trx) /*!< in: trx */
|
|
|
|
{
|
2015-08-03 23:09:43 +03:00
|
|
|
char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024);
|
2015-07-29 05:58:45 +03:00
|
|
|
const char* tbname;
|
|
|
|
char tablebuf[MAX_TABLE_NAME_LEN + 1] = "";
|
2017-01-25 10:11:37 +02:00
|
|
|
unsigned i;
|
2015-07-31 12:06:29 +03:00
|
|
|
char* bufend;
|
2015-07-29 05:58:45 +03:00
|
|
|
|
|
|
|
tbname = dict_remove_db_name(foreign->id);
|
2015-07-31 12:06:29 +03:00
|
|
|
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
|
2016-08-12 11:17:45 +03:00
|
|
|
tbname, strlen(tbname), trx->mysql_thd);
|
2015-07-31 12:06:29 +03:00
|
|
|
tablebuf[bufend - tablebuf] = '\0';
|
2015-07-29 05:58:45 +03:00
|
|
|
|
|
|
|
sprintf(fk_def,
|
|
|
|
(char *)"CONSTRAINT %s FOREIGN KEY (", (char *)tablebuf);
|
|
|
|
|
|
|
|
for(i = 0; i < foreign->n_fields; i++) {
|
|
|
|
char buf[MAX_TABLE_NAME_LEN + 1] = "";
|
|
|
|
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
|
|
|
|
foreign->foreign_col_names[i],
|
|
|
|
strlen(foreign->foreign_col_names[i]),
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->mysql_thd);
|
2015-07-29 05:58:45 +03:00
|
|
|
strcat(fk_def, buf);
|
2017-01-25 10:11:37 +02:00
|
|
|
if (i < static_cast<unsigned>(foreign->n_fields-1)) {
|
2015-07-29 05:58:45 +03:00
|
|
|
strcat(fk_def, (char *)",");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
strcat(fk_def,(char *)") REFERENCES ");
|
|
|
|
|
2015-07-31 12:06:29 +03:00
|
|
|
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
|
|
|
|
foreign->referenced_table_name,
|
|
|
|
strlen(foreign->referenced_table_name),
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->mysql_thd);
|
2015-07-31 12:06:29 +03:00
|
|
|
tablebuf[bufend - tablebuf] = '\0';
|
2015-07-29 05:58:45 +03:00
|
|
|
|
|
|
|
strcat(fk_def, tablebuf);
|
|
|
|
strcat(fk_def, " (");
|
|
|
|
|
|
|
|
for(i = 0; i < foreign->n_fields; i++) {
|
|
|
|
char buf[MAX_TABLE_NAME_LEN + 1] = "";
|
2015-07-31 12:06:29 +03:00
|
|
|
bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
|
2015-07-29 05:58:45 +03:00
|
|
|
foreign->referenced_col_names[i],
|
|
|
|
strlen(foreign->referenced_col_names[i]),
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->mysql_thd);
|
2015-07-31 12:06:29 +03:00
|
|
|
buf[bufend - buf] = '\0';
|
2015-07-29 05:58:45 +03:00
|
|
|
strcat(fk_def, buf);
|
2017-01-25 10:11:37 +02:00
|
|
|
if (i < (uint)foreign->n_fields-1) {
|
2015-07-29 05:58:45 +03:00
|
|
|
strcat(fk_def, (char *)",");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
strcat(fk_def, (char *)")");
|
|
|
|
|
|
|
|
return fk_def;
|
|
|
|
}
|
|
|
|
|
|
|
|
/********************************************************************//**
|
|
|
|
Convert foreign key column names from data dictionary to SQL-layer.
|
|
|
|
*/
|
|
|
|
static
|
|
|
|
void
|
|
|
|
dict_foreign_def_get_fields(
|
2015-07-31 08:52:24 +03:00
|
|
|
/*========================*/
|
2015-07-29 05:58:45 +03:00
|
|
|
dict_foreign_t* foreign,/*!< in: foreign */
|
|
|
|
trx_t* trx, /*!< in: trx */
|
|
|
|
char** field, /*!< out: foreign column */
|
|
|
|
char** field2, /*!< out: referenced column */
|
2017-05-10 09:07:50 +03:00
|
|
|
ulint col_no) /*!< in: column number */
|
2015-07-29 05:58:45 +03:00
|
|
|
{
|
2015-07-31 12:06:29 +03:00
|
|
|
char* bufend;
|
2015-08-03 23:09:43 +03:00
|
|
|
char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
|
|
|
|
char* fieldbuf2 = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
|
2015-07-31 12:06:29 +03:00
|
|
|
|
|
|
|
bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
|
|
|
|
foreign->foreign_col_names[col_no],
|
|
|
|
strlen(foreign->foreign_col_names[col_no]),
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->mysql_thd);
|
2015-07-31 12:06:29 +03:00
|
|
|
|
|
|
|
fieldbuf[bufend - fieldbuf] = '\0';
|
|
|
|
|
|
|
|
bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
|
|
|
|
foreign->referenced_col_names[col_no],
|
|
|
|
strlen(foreign->referenced_col_names[col_no]),
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->mysql_thd);
|
2015-07-31 12:06:29 +03:00
|
|
|
|
|
|
|
fieldbuf2[bufend - fieldbuf2] = '\0';
|
|
|
|
*field = fieldbuf;
|
|
|
|
*field2 = fieldbuf2;
|
2015-07-29 05:58:45 +03:00
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
/********************************************************************//**
|
|
|
|
Add a foreign key definition to the data dictionary tables.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return error code or DB_SUCCESS */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_add_foreign_to_dictionary(
|
|
|
|
/*==================================*/
|
|
|
|
const char* name, /*!< in: table name */
|
|
|
|
const dict_foreign_t* foreign,/*!< in: foreign key */
|
|
|
|
trx_t* trx) /*!< in/out: dictionary transaction */
|
|
|
|
{
|
|
|
|
dberr_t error;
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
DBUG_ENTER("dict_create_add_foreign_to_dictionary");
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
pars_info_t* info = pars_info_create();
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "id", foreign->id);
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "for_name", name);
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "ref_name",
|
|
|
|
foreign->referenced_table_name);
|
|
|
|
|
|
|
|
pars_info_add_int4_literal(info, "n_cols",
|
2018-04-28 15:49:09 +03:00
|
|
|
ulint(foreign->n_fields)
|
|
|
|
| (ulint(foreign->type) << 24));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_PRINT("dict_create_add_foreign_to_dictionary",
|
|
|
|
("'%s', '%s', '%s', %d", foreign->id, name,
|
|
|
|
foreign->referenced_table_name,
|
|
|
|
foreign->n_fields + (foreign->type << 24)));
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
error = dict_foreign_eval_sql(info,
|
|
|
|
"PROCEDURE P () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"INSERT INTO SYS_FOREIGN VALUES"
|
|
|
|
"(:id, :for_name, :ref_name, :n_cols);\n"
|
|
|
|
"END;\n"
|
|
|
|
, name, foreign->id, trx);
|
|
|
|
|
|
|
|
if (error != DB_SUCCESS) {
|
|
|
|
|
2015-07-29 05:58:45 +03:00
|
|
|
if (error == DB_DUPLICATE_KEY) {
|
|
|
|
char buf[MAX_TABLE_NAME_LEN + 1] = "";
|
2015-07-31 08:52:24 +03:00
|
|
|
char tablename[MAX_TABLE_NAME_LEN + 1] = "";
|
2015-07-29 05:58:45 +03:00
|
|
|
char* fk_def;
|
|
|
|
|
2015-07-31 08:52:24 +03:00
|
|
|
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
|
2016-11-26 15:26:34 +01:00
|
|
|
name, strlen(name), trx->mysql_thd);
|
2015-07-31 08:52:24 +03:00
|
|
|
|
2015-07-29 05:58:45 +03:00
|
|
|
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
|
2016-08-12 11:17:45 +03:00
|
|
|
foreign->id, strlen(foreign->id), trx->mysql_thd);
|
2015-07-29 05:58:45 +03:00
|
|
|
|
2015-08-03 23:09:43 +03:00
|
|
|
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
|
2015-07-29 05:58:45 +03:00
|
|
|
|
2015-07-31 08:52:24 +03:00
|
|
|
ib_push_warning(trx, error,
|
|
|
|
"Create or Alter table %s with foreign key constraint"
|
|
|
|
" failed. Foreign key constraint %s"
|
|
|
|
" already exists on data dictionary."
|
2015-07-29 05:58:45 +03:00
|
|
|
" Foreign key constraint names need to be unique in database."
|
|
|
|
" Error in foreign key definition: %s.",
|
2015-07-31 08:52:24 +03:00
|
|
|
tablename, buf, fk_def);
|
2015-07-29 05:58:45 +03:00
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_RETURN(error);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
for (ulint i = 0; i < foreign->n_fields; i++) {
|
|
|
|
error = dict_create_add_foreign_field_to_dictionary(
|
|
|
|
i, name, foreign, trx);
|
|
|
|
|
|
|
|
if (error != DB_SUCCESS) {
|
2015-07-29 05:58:45 +03:00
|
|
|
char buf[MAX_TABLE_NAME_LEN + 1] = "";
|
2015-07-31 08:52:24 +03:00
|
|
|
char tablename[MAX_TABLE_NAME_LEN + 1] = "";
|
2015-07-29 05:58:45 +03:00
|
|
|
char* field=NULL;
|
|
|
|
char* field2=NULL;
|
|
|
|
char* fk_def;
|
|
|
|
|
2015-07-31 08:52:24 +03:00
|
|
|
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
|
2016-11-26 15:26:34 +01:00
|
|
|
name, strlen(name), trx->mysql_thd);
|
2015-07-29 05:58:45 +03:00
|
|
|
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
|
2016-08-12 11:17:45 +03:00
|
|
|
foreign->id, strlen(foreign->id), trx->mysql_thd);
|
2015-08-03 23:09:43 +03:00
|
|
|
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
|
|
|
|
dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
|
2015-07-29 05:58:45 +03:00
|
|
|
|
|
|
|
ib_push_warning(trx, error,
|
2015-07-31 08:52:24 +03:00
|
|
|
"Create or Alter table %s with foreign key constraint"
|
|
|
|
" failed. Error adding foreign key constraint name %s"
|
|
|
|
" fields %s or %s to the dictionary."
|
2015-07-29 05:58:45 +03:00
|
|
|
" Error in foreign key definition: %s.",
|
2015-07-31 08:52:24 +03:00
|
|
|
tablename, buf, i+1, fk_def);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_RETURN(error);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
DBUG_RETURN(error);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-09-06 09:43:16 +03:00
|
|
|
/** Check if a foreign constraint is on the given column name.
|
|
|
|
@param[in] col_name column name to be searched for fk constraint
|
|
|
|
@param[in] table table to which foreign key constraint belongs
|
|
|
|
@return true if fk constraint is present on the table, false otherwise. */
|
|
|
|
static
|
|
|
|
bool
|
|
|
|
dict_foreign_base_for_stored(
|
|
|
|
const char* col_name,
|
|
|
|
const dict_table_t* table)
|
|
|
|
{
|
|
|
|
/* Loop through each stored column and check if its base column has
|
|
|
|
the same name as the column name being checked */
|
|
|
|
dict_s_col_list::const_iterator it;
|
|
|
|
for (it = table->s_cols->begin();
|
|
|
|
it != table->s_cols->end(); ++it) {
|
|
|
|
dict_s_col_t s_col = *it;
|
|
|
|
|
|
|
|
for (ulint j = 0; j < s_col.num_base; j++) {
|
|
|
|
if (strcmp(col_name, dict_table_get_col_name(
|
|
|
|
table,
|
|
|
|
s_col.base_col[j]->ind)) == 0) {
|
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Check if a foreign constraint is on columns served as base columns
|
|
|
|
of any stored column. This is to prevent creating SET NULL or CASCADE
|
|
|
|
constraint on such columns
|
|
|
|
@param[in] local_fk_set set of foreign key objects, to be added to
|
|
|
|
the dictionary tables
|
|
|
|
@param[in] table table to which the foreign key objects in
|
|
|
|
local_fk_set belong to
|
|
|
|
@return true if yes, otherwise, false */
|
|
|
|
bool
|
|
|
|
dict_foreigns_has_s_base_col(
|
|
|
|
const dict_foreign_set& local_fk_set,
|
|
|
|
const dict_table_t* table)
|
|
|
|
{
|
|
|
|
dict_foreign_t* foreign;
|
|
|
|
|
|
|
|
if (table->s_cols == NULL) {
|
|
|
|
return (false);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (dict_foreign_set::const_iterator it = local_fk_set.begin();
|
|
|
|
it != local_fk_set.end(); ++it) {
|
|
|
|
|
|
|
|
foreign = *it;
|
|
|
|
ulint type = foreign->type;
|
|
|
|
|
|
|
|
type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
|
|
|
|
| DICT_FOREIGN_ON_UPDATE_NO_ACTION);
|
|
|
|
|
|
|
|
if (type == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (ulint i = 0; i < foreign->n_fields; i++) {
|
|
|
|
/* Check if the constraint is on a column that
|
|
|
|
is a base column of any stored column */
|
|
|
|
if (dict_foreign_base_for_stored(
|
|
|
|
foreign->foreign_col_names[i], table)) {
|
|
|
|
return(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(false);
|
|
|
|
}
|
|
|
|
|
2014-09-11 10:13:35 +02:00
|
|
|
/** Adds the given set of foreign key objects to the dictionary tables
|
|
|
|
in the database. This function does not modify the dictionary cache. The
|
|
|
|
caller must ensure that all foreign key objects contain a valid constraint
|
|
|
|
name in foreign->id.
|
|
|
|
@param[in] local_fk_set set of foreign key objects, to be added to
|
|
|
|
the dictionary tables
|
|
|
|
@param[in] table table to which the foreign key objects in
|
|
|
|
local_fk_set belong to
|
|
|
|
@param[in,out] trx transaction
|
|
|
|
@return error code or DB_SUCCESS */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_add_foreigns_to_dictionary(
|
|
|
|
/*===================================*/
|
2014-09-11 10:13:35 +02:00
|
|
|
const dict_foreign_set& local_fk_set,
|
|
|
|
const dict_table_t* table,
|
|
|
|
trx_t* trx)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
|
|
|
dict_foreign_t* foreign;
|
|
|
|
dberr_t error;
|
|
|
|
|
2016-12-05 21:04:30 +02:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (NULL == dict_table_get_low("SYS_FOREIGN")) {
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
ib::error() << "Table SYS_FOREIGN not found"
|
|
|
|
" in internal data dictionary";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(DB_ERROR);
|
|
|
|
}
|
|
|
|
|
2014-09-11 10:13:35 +02:00
|
|
|
for (dict_foreign_set::const_iterator it = local_fk_set.begin();
|
|
|
|
it != local_fk_set.end();
|
|
|
|
++it) {
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2014-09-11 10:13:35 +02:00
|
|
|
foreign = *it;
|
|
|
|
ut_ad(foreign->id != NULL);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
error = dict_create_add_foreign_to_dictionary(
|
2016-11-26 15:26:34 +01:00
|
|
|
table->name.m_name, foreign, trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (error != DB_SUCCESS) {
|
|
|
|
|
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
|
|
|
/****************************************************************//**
|
|
|
|
Creates the tablespaces and datafiles system tables inside InnoDB
|
|
|
|
at server bootstrap or server start if they are not found or are
|
|
|
|
not of the right form.
|
2016-08-12 11:17:45 +03:00
|
|
|
@return DB_SUCCESS or error code */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
|
|
|
dict_create_or_check_sys_tablespace(void)
|
|
|
|
/*=====================================*/
|
|
|
|
{
|
|
|
|
trx_t* trx;
|
|
|
|
my_bool srv_file_per_table_backup;
|
|
|
|
dberr_t err;
|
|
|
|
dberr_t sys_tablespaces_err;
|
|
|
|
dberr_t sys_datafiles_err;
|
|
|
|
|
|
|
|
ut_a(srv_get_active_thread_type() == SRV_NONE);
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
|
|
|
|
sys_tablespaces_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
|
|
|
|
sys_datafiles_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
|
|
|
|
|
|
|
|
if (sys_tablespaces_err == DB_SUCCESS
|
|
|
|
&& sys_datafiles_err == DB_SUCCESS) {
|
2017-02-01 15:47:33 +02:00
|
|
|
srv_sys_tablespaces_open = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2017-02-01 15:47:33 +02:00
|
|
|
if (srv_read_only_mode
|
|
|
|
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
|
|
|
|
return(DB_READ_ONLY);
|
|
|
|
}
|
|
|
|
|
2018-03-29 23:09:16 +04:00
|
|
|
trx = trx_create();
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
|
|
|
|
|
|
|
|
trx->op_info = "creating tablepace and datafile sys tables";
|
|
|
|
|
|
|
|
row_mysql_lock_data_dictionary(trx);
|
|
|
|
|
|
|
|
/* Check which incomplete table definition to drop. */
|
|
|
|
|
|
|
|
if (sys_tablespaces_err == DB_CORRUPTION) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (sys_datafiles_err == DB_CORRUPTION) {
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_DATAFILES", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
ib::info() << "Creating tablespace and datafile system tables.";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
/* We always want SYSTEM tables to be created inside the system
|
|
|
|
tablespace. */
|
|
|
|
srv_file_per_table_backup = srv_file_per_table;
|
|
|
|
srv_file_per_table = 0;
|
|
|
|
|
|
|
|
err = que_eval_sql(
|
|
|
|
NULL,
|
|
|
|
"PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"CREATE TABLE SYS_TABLESPACES(\n"
|
|
|
|
" SPACE INT, NAME CHAR, FLAGS INT);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
|
|
|
|
" ON SYS_TABLESPACES (SPACE);\n"
|
|
|
|
"CREATE TABLE SYS_DATAFILES(\n"
|
|
|
|
" SPACE INT, PATH CHAR);\n"
|
|
|
|
"CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
|
|
|
|
" ON SYS_DATAFILES (SPACE);\n"
|
|
|
|
"END;\n",
|
|
|
|
FALSE, trx);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
ib::error() << "Creation of SYS_TABLESPACES and SYS_DATAFILES"
|
|
|
|
" has failed with error " << ut_strerr(err)
|
|
|
|
<< ". Dropping incompletely created tables.";
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
ut_a(err == DB_OUT_OF_FILE_SPACE
|
2017-06-21 16:37:48 +03:00
|
|
|
|| err == DB_DUPLICATE_KEY
|
2014-02-26 19:11:54 +01:00
|
|
|
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
|
|
|
|
|
MDEV-13564 Mariabackup does not work with TRUNCATE
Implement undo tablespace truncation via normal redo logging.
Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name,
CREATE, and DROP.
Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2
is killed before the DROP operation is committed. If MariaDB Server 10.2
is killed during TRUNCATE, it is also possible that the old table
was renamed to #sql-ib*.ibd but the data dictionary will refer to the
table using the original name.
In MariaDB Server 10.3, RENAME inside InnoDB is transactional,
and #sql-* tables will be dropped on startup. So, this new TRUNCATE
will be fully crash-safe in 10.3.
ha_mroonga::wrapper_truncate(): Pass table options to the underlying
storage engine, now that ha_innobase::truncate() will need them.
rpl_slave_state::truncate_state_table(): Before truncating
mysql.gtid_slave_pos, evict any cached table handles from
the table definition cache, so that there will be no stale
references to the old table after truncating.
== TRUNCATE TABLE ==
WL#6501 in MySQL 5.7 introduced separate log files for implementing
atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB
undo and redo log. Some convoluted logic was added to the InnoDB
crash recovery, and some extra synchronization (including a redo log
checkpoint) was introduced to make this work. This synchronization
has caused performance problems and race conditions, and the extra
log files cannot be copied or applied by external backup programs.
In order to support crash-upgrade from MariaDB 10.2, we will keep
the logic for parsing and applying the extra log files, but we will
no longer generate those files in TRUNCATE TABLE.
A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE
(with full redo and undo logging and proper rollback). This will
be implemented in MDEV-14717.
ha_innobase::truncate(): Invoke RENAME, create(), delete_table().
Because RENAME cannot be fully rolled back before MariaDB 10.3
due to missing undo logging, add some explicit rename-back in
case the operation fails.
ha_innobase::delete(): Introduce a variant that takes sqlcom as
a parameter. In TRUNCATE TABLE, we do not want to touch any
FOREIGN KEY constraints.
ha_innobase::create(): Add the parameters file_per_table, trx.
In TRUNCATE, the new table must be created in the same transaction
that renames the old table.
create_table_info_t::create_table_info_t(): Add the parameters
file_per_table, trx.
row_drop_table_for_mysql(): Replace a bool parameter with sqlcom.
row_drop_table_after_create_fail(): New function, wrapping
row_drop_table_for_mysql().
dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(),
fil_prepare_for_truncate(), fil_reinit_space_header_for_table(),
row_truncate_table_for_mysql(), TruncateLogger,
row_truncate_prepare(), row_truncate_rollback(),
row_truncate_complete(), row_truncate_fts(),
row_truncate_update_system_tables(),
row_truncate_foreign_key_checks(), row_truncate_sanity_checks():
Remove.
row_upd_check_references_constraints(): Remove a check for
TRUNCATE, now that the table is no longer truncated in place.
The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some
race-condition like scenarios. The test innodb-innodb.truncate does
not use any synchronization.
We add a redo log subformat to indicate backup-friendly format.
MariaDB 10.4 will remove support for the old TRUNCATE logging,
so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve
limitations.
== Undo tablespace truncation ==
MySQL 5.7 implements undo tablespace truncation. It is only
possible when innodb_undo_tablespaces is set to at least 2.
The logging is implemented similar to the WL#6501 TRUNCATE,
that is, using separate log files and a redo log checkpoint.
We can simply implement undo tablespace truncation within
a single mini-transaction that reinitializes the undo log
tablespace file. Unfortunately, due to the redo log format
of some operations, currently, the total redo log written by
undo tablespace truncation will be more than the combined size
of the truncated undo tablespace. It should be acceptable
to have a little more than 1 megabyte of log in a single
mini-transaction. This will be fixed in MDEV-17138 in
MariaDB Server 10.4.
recv_sys_t: Add truncated_undo_spaces[] to remember for which undo
tablespaces a MLOG_FILE_CREATE2 record was seen.
namespace undo: Remove some unnecessary declarations.
fil_space_t::is_being_truncated: Document that this flag now
only applies to undo tablespaces. Remove some references.
fil_space_t::is_stopping(): Do not refer to is_being_truncated.
This check is for tablespaces of tables. Potentially used
tablespaces are never truncated any more.
buf_dblwr_process(): Suppress the out-of-bounds warning
for undo tablespaces.
fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero
page number (new size of the tablespace in pages) to inform
crash recovery that the undo tablespace size has been reduced.
fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2
can be written for undo tablespaces (without .ibd file suffix)
for a nonzero page number.
os_file_truncate(): Add the parameter allow_shrink=false
so that undo tablespaces can actually be shrunk using this function.
fil_name_parse(): For undo tablespace truncation,
buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[].
recv_read_in_area(): Avoid reading pages for which no redo log
records remain buffered, after recv_addr_trim() removed them.
trx_rseg_header_create(): Add a FIXME comment that we could write
much less redo log.
trx_undo_truncate_tablespace(): Reinitialize the undo tablespace
in a single mini-transaction, which will be flushed to the redo log
before the file size is trimmed.
recv_addr_trim(): Discard any redo logs for pages that were
logged after the new end of a file, before the truncation LSN.
If the rec_list becomes empty, reduce n_addrs. After removing
any affected records, actually truncate the file.
recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before
applying any log records. The undo tablespace files must be open
at this point.
buf_flush_or_remove_pages(), buf_flush_dirty_pages(),
buf_LRU_flush_or_remove_pages(): Add a parameter for specifying
the number of the first page to flush or remove (default 0).
trx_purge_initiate_truncate(): Remove the log checkpoints, the
extra logging, and some unnecessary crash points. Merge the code
from trx_undo_truncate_tablespace(). First, flush all to-be-discarded
pages (beyond the new end of the file), then trim the space->size
to make the page allocation deterministic. At the only remaining
crash injection point, flush the redo log, so that the recovery
can be tested.
2018-08-28 13:43:06 +03:00
|
|
|
row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
|
|
|
|
row_drop_table_after_create_fail("SYS_DATAFILES", trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
if (err == DB_OUT_OF_FILE_SPACE) {
|
|
|
|
err = DB_MUST_GET_MORE_FILE_SPACE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trx_commit_for_mysql(trx);
|
|
|
|
|
|
|
|
row_mysql_unlock_data_dictionary(trx);
|
|
|
|
|
2018-03-30 15:10:40 +04:00
|
|
|
trx_free(trx);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
srv_file_per_table = srv_file_per_table_backup;
|
|
|
|
|
|
|
|
if (err == DB_SUCCESS) {
|
2017-02-01 15:47:33 +02:00
|
|
|
srv_sys_tablespaces_open = true;
|
2014-02-26 19:11:54 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Note: The master thread has not been started at this point. */
|
|
|
|
/* Confirm and move to the non-LRU part of the table LRU list. */
|
|
|
|
|
|
|
|
sys_tablespaces_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
|
2017-06-21 16:37:48 +03:00
|
|
|
ut_a(sys_tablespaces_err == DB_SUCCESS || err != DB_SUCCESS);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
sys_datafiles_err = dict_check_if_system_table_exists(
|
|
|
|
"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
|
2017-06-21 16:37:48 +03:00
|
|
|
ut_a(sys_datafiles_err == DB_SUCCESS || err != DB_SUCCESS);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
return(err);
|
|
|
|
}
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
/** Put a tablespace definition into the data dictionary,
|
|
|
|
replacing what was there previously.
|
|
|
|
@param[in] space Tablespace id
|
|
|
|
@param[in] name Tablespace name
|
|
|
|
@param[in] flags Tablespace flags
|
|
|
|
@param[in] path Tablespace path
|
|
|
|
@param[in] trx Transaction
|
|
|
|
@return error code or DB_SUCCESS */
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t
|
2016-08-12 11:17:45 +03:00
|
|
|
dict_replace_tablespace_in_dictionary(
|
|
|
|
ulint space_id,
|
|
|
|
const char* name,
|
|
|
|
ulint flags,
|
|
|
|
const char* path,
|
2017-10-02 11:27:53 +03:00
|
|
|
trx_t* trx)
|
2014-02-26 19:11:54 +01:00
|
|
|
{
|
2016-08-12 11:17:45 +03:00
|
|
|
if (!srv_sys_tablespaces_open) {
|
|
|
|
/* Startup procedure is not yet ready for updates. */
|
|
|
|
return(DB_SUCCESS);
|
|
|
|
}
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
dberr_t error;
|
|
|
|
|
|
|
|
pars_info_t* info = pars_info_create();
|
|
|
|
|
2016-08-12 11:17:45 +03:00
|
|
|
pars_info_add_int4_literal(info, "space", space_id);
|
2014-02-26 19:11:54 +01:00
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "name", name);
|
|
|
|
|
|
|
|
pars_info_add_int4_literal(info, "flags", flags);
|
|
|
|
|
|
|
|
pars_info_add_str_literal(info, "path", path);
|
|
|
|
|
|
|
|
error = que_eval_sql(info,
|
|
|
|
"PROCEDURE P () IS\n"
|
2016-08-12 11:17:45 +03:00
|
|
|
"p CHAR;\n"
|
|
|
|
|
|
|
|
"DECLARE CURSOR c IS\n"
|
|
|
|
" SELECT PATH FROM SYS_DATAFILES\n"
|
|
|
|
" WHERE SPACE=:space FOR UPDATE;\n"
|
|
|
|
|
2014-02-26 19:11:54 +01:00
|
|
|
"BEGIN\n"
|
2016-08-12 11:17:45 +03:00
|
|
|
"OPEN c;\n"
|
|
|
|
"FETCH c INTO p;\n"
|
|
|
|
|
|
|
|
"IF (SQL % NOTFOUND) THEN"
|
|
|
|
" DELETE FROM SYS_TABLESPACES "
|
|
|
|
"WHERE SPACE=:space;\n"
|
|
|
|
" INSERT INTO SYS_TABLESPACES VALUES"
|
2014-02-26 19:11:54 +01:00
|
|
|
"(:space, :name, :flags);\n"
|
2016-08-12 11:17:45 +03:00
|
|
|
" INSERT INTO SYS_DATAFILES VALUES"
|
2014-02-26 19:11:54 +01:00
|
|
|
"(:space, :path);\n"
|
2016-08-12 11:17:45 +03:00
|
|
|
"ELSIF p <> :path THEN\n"
|
|
|
|
" UPDATE SYS_DATAFILES SET PATH=:path"
|
|
|
|
" WHERE CURRENT OF c;\n"
|
|
|
|
"END IF;\n"
|
2014-02-26 19:11:54 +01:00
|
|
|
"END;\n",
|
|
|
|
FALSE, trx);
|
|
|
|
|
|
|
|
if (error != DB_SUCCESS) {
|
|
|
|
return(error);
|
|
|
|
}
|
|
|
|
|
|
|
|
trx->op_info = "";
|
|
|
|
|
|
|
|
return(error);
|
|
|
|
}
|
2016-08-12 11:17:45 +03:00
|
|
|
|
|
|
|
/** Delete records from SYS_TABLESPACES and SYS_DATAFILES associated
|
|
|
|
with a particular tablespace ID.
|
|
|
|
@param[in] space Tablespace ID
|
|
|
|
@param[in,out] trx Current transaction
|
|
|
|
@return DB_SUCCESS if OK, dberr_t if the operation failed */
|
|
|
|
|
|
|
|
dberr_t
|
|
|
|
dict_delete_tablespace_and_datafiles(
|
|
|
|
ulint space,
|
|
|
|
trx_t* trx)
|
|
|
|
{
|
|
|
|
dberr_t err = DB_SUCCESS;
|
|
|
|
|
2019-05-13 18:26:59 +03:00
|
|
|
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_X));
|
2016-08-12 11:17:45 +03:00
|
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
|
|
ut_ad(srv_sys_tablespaces_open);
|
|
|
|
|
|
|
|
trx->op_info = "delete tablespace and datafiles from dictionary";
|
|
|
|
|
|
|
|
pars_info_t* info = pars_info_create();
|
|
|
|
ut_a(!is_system_tablespace(space));
|
|
|
|
pars_info_add_int4_literal(info, "space", space);
|
|
|
|
|
|
|
|
err = que_eval_sql(info,
|
|
|
|
"PROCEDURE P () IS\n"
|
|
|
|
"BEGIN\n"
|
|
|
|
"DELETE FROM SYS_TABLESPACES\n"
|
|
|
|
"WHERE SPACE = :space;\n"
|
|
|
|
"DELETE FROM SYS_DATAFILES\n"
|
|
|
|
"WHERE SPACE = :space;\n"
|
|
|
|
"END;\n",
|
|
|
|
FALSE, trx);
|
|
|
|
|
|
|
|
if (err != DB_SUCCESS) {
|
|
|
|
ib::warn() << "Could not delete space_id "
|
|
|
|
<< space << " from data dictionary";
|
|
|
|
}
|
|
|
|
|
|
|
|
trx->op_info = "";
|
|
|
|
|
|
|
|
return(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
/** Assign a new table ID and put it into the table cache and the transaction.
|
|
|
|
@param[in,out] table Table that needs an ID
|
|
|
|
@param[in,out] trx Transaction */
|
|
|
|
void
|
|
|
|
dict_table_assign_new_id(
|
|
|
|
dict_table_t* table,
|
|
|
|
trx_t* trx)
|
|
|
|
{
|
2016-12-05 21:04:30 +02:00
|
|
|
dict_hdr_get_new_id(&table->id, NULL, NULL, table, false);
|
2016-08-12 11:17:45 +03:00
|
|
|
trx->table_id = table->id;
|
|
|
|
}
|