mariadb/storage/innobase/dict/dict0crea.cc

2364 lines
63 KiB
C++
Raw Normal View History

/*****************************************************************************
2016-04-26 23:05:26 +02:00
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
MDEV-22456 Dropping the adaptive hash index may cause DDL to lock up InnoDB If the InnoDB buffer pool contains many pages for a table or index that is being dropped or rebuilt, and if many of such pages are pointed to by the adaptive hash index, dropping the adaptive hash index may consume a lot of time. The time-consuming operation of dropping the adaptive hash index entries is being executed while the InnoDB data dictionary cache dict_sys is exclusively locked. It is not actually necessary to drop all adaptive hash index entries at the time a table or index is being dropped or rebuilt. We can let the LRU replacement policy of the buffer pool take care of this gradually. For this to work, we must detach the dict_table_t and dict_index_t objects from the main dict_sys cache, and once the last adaptive hash index entry for the detached table is removed (when the garbage page is evicted from the buffer pool) we can free the dict_table_t and dict_index_t object. Related to this, in MDEV-16283, we made ALTER TABLE...DISCARD TABLESPACE skip both the buffer pool eviction and the drop of the adaptive hash index. We shifted the burden to ALTER TABLE...IMPORT TABLESPACE or DROP TABLE. We can remove the eviction from DROP TABLE. We must retain the eviction in the ALTER TABLE...IMPORT TABLESPACE code path, so that in case the discarded table is being re-imported with the same tablespace identifier, the fresh data from the imported tablespace will replace any stale pages in the buffer pool. rpl.rpl_failed_drop_tbl_binlog: Remove the test. DROP TABLE can no longer be interrupted inside InnoDB. fseg_free_page(), fseg_free_step(), fseg_free_step_not_header(), fseg_free_page_low(), fseg_free_extent(): Remove the parameter that specifies whether the adaptive hash index should be dropped. btr_search_lazy_free(): Lazily free an index when the last reference to it is dropped from the adaptive hash index. buf_pool_clear_hash_index(): Declare static, and move to the same compilation unit with the bulk of the adaptive hash index code. dict_index_t::clone(), dict_index_t::clone_if_needed(): Clone an index that is being rebuilt while adaptive hash index entries exist. The original index will be inserted into dict_table_t::freed_indexes and dict_index_t::set_freed() will be called. dict_index_t::set_freed(), dict_index_t::freed(): Note that or check whether the index has been freed. We will use the impossible page number 1 to denote this condition. dict_index_t::n_ahi_pages(): Replaces btr_search_info_get_ref_count(). dict_index_t::detach_columns(): Move the assignment n_fields=0 to ha_innobase_inplace_ctx::clear_added_indexes(). We must have access to the columns when freeing the adaptive hash index. Note: dict_table_t::v_cols[] will remain valid. If virtual columns are dropped or added, the table definition will be reloaded in ha_innobase::commit_inplace_alter_table(). buf_page_mtr_lock(): Drop a stale adaptive hash index if needed. We will also reduce the number of btr_get_search_latch() calls and enclose some more code inside #ifdef BTR_CUR_HASH_ADAPT in order to benefit cmake -DWITH_INNODB_AHI=OFF.
2020-05-15 17:10:59 +03:00
Copyright (c) 2017, 2020, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
2019-05-11 19:25:02 +03:00
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file dict/dict0crea.cc
Database object creation
Created 1/8/1996 Heikki Tuuri
*******************************************************/
#include "dict0crea.h"
#include "btr0pcur.h"
MDEV-22456 Dropping the adaptive hash index may cause DDL to lock up InnoDB If the InnoDB buffer pool contains many pages for a table or index that is being dropped or rebuilt, and if many of such pages are pointed to by the adaptive hash index, dropping the adaptive hash index may consume a lot of time. The time-consuming operation of dropping the adaptive hash index entries is being executed while the InnoDB data dictionary cache dict_sys is exclusively locked. It is not actually necessary to drop all adaptive hash index entries at the time a table or index is being dropped or rebuilt. We can let the LRU replacement policy of the buffer pool take care of this gradually. For this to work, we must detach the dict_table_t and dict_index_t objects from the main dict_sys cache, and once the last adaptive hash index entry for the detached table is removed (when the garbage page is evicted from the buffer pool) we can free the dict_table_t and dict_index_t object. Related to this, in MDEV-16283, we made ALTER TABLE...DISCARD TABLESPACE skip both the buffer pool eviction and the drop of the adaptive hash index. We shifted the burden to ALTER TABLE...IMPORT TABLESPACE or DROP TABLE. We can remove the eviction from DROP TABLE. We must retain the eviction in the ALTER TABLE...IMPORT TABLESPACE code path, so that in case the discarded table is being re-imported with the same tablespace identifier, the fresh data from the imported tablespace will replace any stale pages in the buffer pool. rpl.rpl_failed_drop_tbl_binlog: Remove the test. DROP TABLE can no longer be interrupted inside InnoDB. fseg_free_page(), fseg_free_step(), fseg_free_step_not_header(), fseg_free_page_low(), fseg_free_extent(): Remove the parameter that specifies whether the adaptive hash index should be dropped. btr_search_lazy_free(): Lazily free an index when the last reference to it is dropped from the adaptive hash index. buf_pool_clear_hash_index(): Declare static, and move to the same compilation unit with the bulk of the adaptive hash index code. dict_index_t::clone(), dict_index_t::clone_if_needed(): Clone an index that is being rebuilt while adaptive hash index entries exist. The original index will be inserted into dict_table_t::freed_indexes and dict_index_t::set_freed() will be called. dict_index_t::set_freed(), dict_index_t::freed(): Note that or check whether the index has been freed. We will use the impossible page number 1 to denote this condition. dict_index_t::n_ahi_pages(): Replaces btr_search_info_get_ref_count(). dict_index_t::detach_columns(): Move the assignment n_fields=0 to ha_innobase_inplace_ctx::clear_added_indexes(). We must have access to the columns when freeing the adaptive hash index. Note: dict_table_t::v_cols[] will remain valid. If virtual columns are dropped or added, the table definition will be reloaded in ha_innobase::commit_inplace_alter_table(). buf_page_mtr_lock(): Drop a stale adaptive hash index if needed. We will also reduce the number of btr_get_search_latch() calls and enclose some more code inside #ifdef BTR_CUR_HASH_ADAPT in order to benefit cmake -DWITH_INNODB_AHI=OFF.
2020-05-15 17:10:59 +03:00
#ifdef BTR_CUR_HASH_ADAPT
# include "btr0sea.h"
#endif /* BTR_CUR_HASH_ADAPT */
#include "page0page.h"
#include "mach0data.h"
#include "dict0boot.h"
#include "dict0dict.h"
#include "que0que.h"
#include "row0ins.h"
#include "row0mysql.h"
#include "pars0pars.h"
#include "trx0roll.h"
#include "trx0rseg.h"
MDEV-17158 TRUNCATE is not atomic after MDEV-13564 It turned out that ha_innobase::truncate() would prematurely commit the transaction already before the completion of the ha_innobase::create(). All of this must be atomic. innodb.truncate_crash: Use the correct DEBUG_SYNC point, and tolerate non-truncation of the table, because the redo log for the TRUNCATE transaction commit might be flushed due to some InnoDB background activity. dict_build_tablespace_for_table(): Merge to the function dict_build_table_def_step(). dict_build_table_def_step(): If a table is being created during an already started data dictionary transaction (such as TRUNCATE), persistently write the table_id to the undo log header before creating any file. In this way, the recovery of TRUNCATE will be able to delete the new file before rolling back the rename of the original table. dict_table_rename_in_cache(): Add the parameter replace_new_file, used as part of rolling back a TRUNCATE operation. fil_rename_tablespace_check(): Add the parameter replace_new. If the parameter is set and a file identified by new_path exists, remove a possible tablespace and also the file. create_table_info_t::create_table_def(): Remove some debug assertions that no longer hold. During TRUNCATE, the transaction will already have been started (and performed a rename operation) before the table is created. Also, remove a call to dict_build_tablespace_for_table(). create_table_info_t::create_table(): Add the parameter create_fk=true. During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the InnoDB data dictionary, because they will also not be removed. row_table_add_foreign_constraints(): If trx=NULL, do not modify the InnoDB data dictionary, but only load the FOREIGN KEY constraints from the data dictionary. ha_innobase::create(): Lock the InnoDB data dictionary cache only if no transaction was passed by the caller. Unlock it in any case. innobase_rename_table(): Add the parameter commit = true. If !commit, do not lock or unlock the data dictionary cache. ha_innobase::truncate(): Lock the data dictionary before invoking rename or create, and let ha_innobase::create() unlock it and also commit or roll back the transaction. trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation() and declared global instead of static. row_undo_ins_parse_undo_rec(): If table_id is set, this must be rolling back the rename operation in TRUNCATE TABLE, and therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
#include "trx0undo.h"
#include "ut0vec.h"
#include "dict0priv.h"
#include "fts0priv.h"
#include "srv0start.h"
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_TABLES system table.
@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_tables_tuple(
/*=========================*/
const dict_table_t* table, /*!< in: table */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_tables;
dtuple_t* entry;
dfield_t* dfield;
byte* ptr;
ulint type;
ut_ad(table);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
ut_ad(!table->space || table->space->id == table->space_id);
ut_ad(heap);
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
ut_ad(table->n_cols >= DATA_N_SYS_COLS);
sys_tables = dict_sys->sys_tables;
entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_tables);
/* 0: NAME -----------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__NAME);
dfield_set_data(dfield,
table->name.m_name, strlen(table->name.m_name));
/* 1: DB_TRX_ID added later */
/* 2: DB_ROLL_PTR added later */
/* 3: ID -------------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 4: N_COLS ---------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__N_COLS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
/* If there is any virtual column, encode it in N_COLS */
mach_write_to_4(ptr, dict_table_encode_n_col(
ulint(table->n_cols - DATA_N_SYS_COLS),
ulint(table->n_v_def))
| (ulint(table->flags & DICT_TF_COMPACT) << 31));
dfield_set_data(dfield, ptr, 4);
/* 5: TYPE (table flags) -----------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__TYPE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
/* Validate the table flags and convert them to what is saved in
SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to
SYS_TABLES.TYPE as 1. */
type = dict_tf_to_sys_tables_type(table->flags);
mach_write_to_4(ptr, type);
dfield_set_data(dfield, ptr, 4);
/* 6: MIX_ID (obsolete) ---------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__MIX_ID);
ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8));
dfield_set_data(dfield, ptr, 8);
/* 7: MIX_LEN (additional flags) --------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__MIX_LEN);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
/* Be sure all non-used bits are zero. */
ut_a(!(table->flags2 & DICT_TF2_UNUSED_BIT_MASK));
mach_write_to_4(ptr, table->flags2);
dfield_set_data(dfield, ptr, 4);
/* 8: CLUSTER_NAME ---------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__CLUSTER_ID);
dfield_set_null(dfield); /* not supported */
/* 9: SPACE ----------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_TABLES__SPACE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
mach_write_to_4(ptr, table->space_id);
dfield_set_data(dfield, ptr, 4);
/*----------------------------------*/
return(entry);
}
/*****************************************************************//**
Based on a table object, this function builds the entry to be inserted
in the SYS_COLUMNS system table.
@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_columns_tuple(
/*==========================*/
const dict_table_t* table, /*!< in: table */
ulint i, /*!< in: column number */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_columns;
dtuple_t* entry;
const dict_col_t* column;
dfield_t* dfield;
byte* ptr;
const char* col_name;
ulint num_base = 0;
ulint v_col_no = ULINT_UNDEFINED;
ut_ad(table);
ut_ad(heap);
/* Any column beyond table->n_def would be virtual columns */
if (i >= table->n_def) {
dict_v_col_t* v_col = dict_table_get_nth_v_col(
table, i - table->n_def);
column = &v_col->m_col;
num_base = v_col->num_base;
v_col_no = column->ind;
} else {
column = dict_table_get_nth_col(table, i);
ut_ad(!column->is_virtual());
}
sys_columns = dict_sys->sys_columns;
entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_columns);
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS ----------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
if (v_col_no != ULINT_UNDEFINED) {
/* encode virtual column's position in MySQL table and InnoDB
table in "POS" */
mach_write_to_4(ptr, dict_create_v_col_pos(
i - table->n_def, v_col_no));
} else {
mach_write_to_4(ptr, i);
}
dfield_set_data(dfield, ptr, 4);
/* 2: DB_TRX_ID added later */
/* 3: DB_ROLL_PTR added later */
/* 4: NAME ---------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
if (i >= table->n_def) {
col_name = dict_table_get_v_col_name(table, i - table->n_def);
} else {
col_name = dict_table_get_col_name(table, i);
}
dfield_set_data(dfield, col_name, ut_strlen(col_name));
/* 5: MTYPE --------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, column->mtype);
dfield_set_data(dfield, ptr, 4);
/* 6: PRTYPE -------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, column->prtype);
dfield_set_data(dfield, ptr, 4);
/* 7: LEN ----------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, column->len);
dfield_set_data(dfield, ptr, 4);
/* 8: PREC ---------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, num_base);
dfield_set_data(dfield, ptr, 4);
/*---------------------------------*/
return(entry);
}
/** Based on a table object, this function builds the entry to be inserted
in the SYS_VIRTUAL system table. Each row maps a virtual column to one of
its base column.
@param[in] table table
@param[in] v_col_n virtual column number
@param[in] b_col_n base column sequence num
@param[in] heap memory heap
@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_virtual_tuple(
const dict_table_t* table,
ulint v_col_n,
ulint b_col_n,
mem_heap_t* heap)
{
dict_table_t* sys_virtual;
dtuple_t* entry;
const dict_col_t* base_column;
dfield_t* dfield;
byte* ptr;
ut_ad(table);
ut_ad(heap);
ut_ad(v_col_n < table->n_v_def);
dict_v_col_t* v_col = dict_table_get_nth_v_col(table, v_col_n);
base_column = v_col->base_col[b_col_n];
sys_virtual = dict_sys->sys_virtual;
entry = dtuple_create(heap, DICT_NUM_COLS__SYS_VIRTUAL
+ DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_virtual);
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__TABLE_ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS ---------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__POS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
ulint v_col_no = dict_create_v_col_pos(v_col_n, v_col->m_col.ind);
mach_write_to_4(ptr, v_col_no);
dfield_set_data(dfield, ptr, 4);
/* 2: BASE_POS ----------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_VIRTUAL__BASE_POS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, base_column->ind);
dfield_set_data(dfield, ptr, 4);
/* 3: DB_TRX_ID added later */
/* 4: DB_ROLL_PTR added later */
/*---------------------------------*/
return(entry);
}
/***************************************************************//**
Builds a table definition to insert.
@return DB_SUCCESS or error code */
2016-06-21 14:21:03 +02:00
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_build_table_def_step(
/*======================*/
que_thr_t* thr, /*!< in: query thread */
tab_node_t* node) /*!< in: table create node */
{
ut_ad(mutex_own(&dict_sys->mutex));
dict_table_t* table = node->table;
ut_ad(!table->is_temporary());
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
ut_ad(!table->space);
ut_ad(table->space_id == ULINT_UNDEFINED);
dict_table_assign_new_id(table, thr_get_trx(thr));
/* Always set this bit for all new created tables */
2014-05-05 18:20:28 +02:00
DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
DICT_TF2_FLAG_UNSET(table,
DICT_TF2_FTS_AUX_HEX_NAME););
if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)) {
/* This table will need a new tablespace. */
ut_ad(DICT_TF_GET_ZIP_SSIZE(table->flags) == 0
|| dict_table_has_atomic_blobs(table));
trx_t* trx = thr_get_trx(thr);
MDEV-17158 TRUNCATE is not atomic after MDEV-13564 It turned out that ha_innobase::truncate() would prematurely commit the transaction already before the completion of the ha_innobase::create(). All of this must be atomic. innodb.truncate_crash: Use the correct DEBUG_SYNC point, and tolerate non-truncation of the table, because the redo log for the TRUNCATE transaction commit might be flushed due to some InnoDB background activity. dict_build_tablespace_for_table(): Merge to the function dict_build_table_def_step(). dict_build_table_def_step(): If a table is being created during an already started data dictionary transaction (such as TRUNCATE), persistently write the table_id to the undo log header before creating any file. In this way, the recovery of TRUNCATE will be able to delete the new file before rolling back the rename of the original table. dict_table_rename_in_cache(): Add the parameter replace_new_file, used as part of rolling back a TRUNCATE operation. fil_rename_tablespace_check(): Add the parameter replace_new. If the parameter is set and a file identified by new_path exists, remove a possible tablespace and also the file. create_table_info_t::create_table_def(): Remove some debug assertions that no longer hold. During TRUNCATE, the transaction will already have been started (and performed a rename operation) before the table is created. Also, remove a call to dict_build_tablespace_for_table(). create_table_info_t::create_table(): Add the parameter create_fk=true. During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the InnoDB data dictionary, because they will also not be removed. row_table_add_foreign_constraints(): If trx=NULL, do not modify the InnoDB data dictionary, but only load the FOREIGN KEY constraints from the data dictionary. ha_innobase::create(): Lock the InnoDB data dictionary cache only if no transaction was passed by the caller. Unlock it in any case. innobase_rename_table(): Add the parameter commit = true. If !commit, do not lock or unlock the data dictionary cache. ha_innobase::truncate(): Lock the data dictionary before invoking rename or create, and let ha_innobase::create() unlock it and also commit or roll back the transaction. trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation() and declared global instead of static. row_undo_ins_parse_undo_rec(): If table_id is set, this must be rolling back the rename operation in TRUNCATE TABLE, and therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
ut_ad(trx->table_id);
mtr_t mtr;
trx_undo_t* undo = trx->rsegs.m_redo.undo;
MDEV-17158 TRUNCATE is not atomic after MDEV-13564 It turned out that ha_innobase::truncate() would prematurely commit the transaction already before the completion of the ha_innobase::create(). All of this must be atomic. innodb.truncate_crash: Use the correct DEBUG_SYNC point, and tolerate non-truncation of the table, because the redo log for the TRUNCATE transaction commit might be flushed due to some InnoDB background activity. dict_build_tablespace_for_table(): Merge to the function dict_build_table_def_step(). dict_build_table_def_step(): If a table is being created during an already started data dictionary transaction (such as TRUNCATE), persistently write the table_id to the undo log header before creating any file. In this way, the recovery of TRUNCATE will be able to delete the new file before rolling back the rename of the original table. dict_table_rename_in_cache(): Add the parameter replace_new_file, used as part of rolling back a TRUNCATE operation. fil_rename_tablespace_check(): Add the parameter replace_new. If the parameter is set and a file identified by new_path exists, remove a possible tablespace and also the file. create_table_info_t::create_table_def(): Remove some debug assertions that no longer hold. During TRUNCATE, the transaction will already have been started (and performed a rename operation) before the table is created. Also, remove a call to dict_build_tablespace_for_table(). create_table_info_t::create_table(): Add the parameter create_fk=true. During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the InnoDB data dictionary, because they will also not be removed. row_table_add_foreign_constraints(): If trx=NULL, do not modify the InnoDB data dictionary, but only load the FOREIGN KEY constraints from the data dictionary. ha_innobase::create(): Lock the InnoDB data dictionary cache only if no transaction was passed by the caller. Unlock it in any case. innobase_rename_table(): Add the parameter commit = true. If !commit, do not lock or unlock the data dictionary cache. ha_innobase::truncate(): Lock the data dictionary before invoking rename or create, and let ha_innobase::create() unlock it and also commit or roll back the transaction. trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation() and declared global instead of static. row_undo_ins_parse_undo_rec(): If table_id is set, this must be rolling back the rename operation in TRUNCATE TABLE, and therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
if (undo && !undo->table_id
&& trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
/* This must be a TRUNCATE operation where
the empty table is created after the old table
was renamed. Be sure to mark the transaction
associated with the new empty table, so that
we can remove it on recovery. */
mtr.start();
undo->table_id = trx->table_id;
undo->dict_operation = TRUE;
page_t* page = trx_undo_page_get(
page_id_t(trx->rsegs.m_redo.rseg->space->id,
undo->hdr_page_no),
&mtr);
mlog_write_ulint(page + undo->hdr_offset
+ TRX_UNDO_DICT_TRANS,
TRUE, MLOG_1BYTE, &mtr);
mlog_write_ull(page + undo->hdr_offset
+ TRX_UNDO_TABLE_ID,
trx->table_id, &mtr);
MDEV-17158 TRUNCATE is not atomic after MDEV-13564 It turned out that ha_innobase::truncate() would prematurely commit the transaction already before the completion of the ha_innobase::create(). All of this must be atomic. innodb.truncate_crash: Use the correct DEBUG_SYNC point, and tolerate non-truncation of the table, because the redo log for the TRUNCATE transaction commit might be flushed due to some InnoDB background activity. dict_build_tablespace_for_table(): Merge to the function dict_build_table_def_step(). dict_build_table_def_step(): If a table is being created during an already started data dictionary transaction (such as TRUNCATE), persistently write the table_id to the undo log header before creating any file. In this way, the recovery of TRUNCATE will be able to delete the new file before rolling back the rename of the original table. dict_table_rename_in_cache(): Add the parameter replace_new_file, used as part of rolling back a TRUNCATE operation. fil_rename_tablespace_check(): Add the parameter replace_new. If the parameter is set and a file identified by new_path exists, remove a possible tablespace and also the file. create_table_info_t::create_table_def(): Remove some debug assertions that no longer hold. During TRUNCATE, the transaction will already have been started (and performed a rename operation) before the table is created. Also, remove a call to dict_build_tablespace_for_table(). create_table_info_t::create_table(): Add the parameter create_fk=true. During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the InnoDB data dictionary, because they will also not be removed. row_table_add_foreign_constraints(): If trx=NULL, do not modify the InnoDB data dictionary, but only load the FOREIGN KEY constraints from the data dictionary. ha_innobase::create(): Lock the InnoDB data dictionary cache only if no transaction was passed by the caller. Unlock it in any case. innobase_rename_table(): Add the parameter commit = true. If !commit, do not lock or unlock the data dictionary cache. ha_innobase::truncate(): Lock the data dictionary before invoking rename or create, and let ha_innobase::create() unlock it and also commit or roll back the transaction. trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation() and declared global instead of static. row_undo_ins_parse_undo_rec(): If table_id is set, this must be rolling back the rename operation in TRUNCATE TABLE, and therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
mtr.commit();
log_write_up_to(mtr.commit_lsn(), true);
}
/* Get a new tablespace ID */
ulint space_id;
dict_hdr_get_new_id(NULL, NULL, &space_id, table, false);
DBUG_EXECUTE_IF(
"ib_create_table_fail_out_of_space_ids",
space_id = ULINT_UNDEFINED;
);
if (space_id == ULINT_UNDEFINED) {
MDEV-17158 TRUNCATE is not atomic after MDEV-13564 It turned out that ha_innobase::truncate() would prematurely commit the transaction already before the completion of the ha_innobase::create(). All of this must be atomic. innodb.truncate_crash: Use the correct DEBUG_SYNC point, and tolerate non-truncation of the table, because the redo log for the TRUNCATE transaction commit might be flushed due to some InnoDB background activity. dict_build_tablespace_for_table(): Merge to the function dict_build_table_def_step(). dict_build_table_def_step(): If a table is being created during an already started data dictionary transaction (such as TRUNCATE), persistently write the table_id to the undo log header before creating any file. In this way, the recovery of TRUNCATE will be able to delete the new file before rolling back the rename of the original table. dict_table_rename_in_cache(): Add the parameter replace_new_file, used as part of rolling back a TRUNCATE operation. fil_rename_tablespace_check(): Add the parameter replace_new. If the parameter is set and a file identified by new_path exists, remove a possible tablespace and also the file. create_table_info_t::create_table_def(): Remove some debug assertions that no longer hold. During TRUNCATE, the transaction will already have been started (and performed a rename operation) before the table is created. Also, remove a call to dict_build_tablespace_for_table(). create_table_info_t::create_table(): Add the parameter create_fk=true. During TRUNCATE TABLE, do not add FOREIGN KEY constraints to the InnoDB data dictionary, because they will also not be removed. row_table_add_foreign_constraints(): If trx=NULL, do not modify the InnoDB data dictionary, but only load the FOREIGN KEY constraints from the data dictionary. ha_innobase::create(): Lock the InnoDB data dictionary cache only if no transaction was passed by the caller. Unlock it in any case. innobase_rename_table(): Add the parameter commit = true. If !commit, do not lock or unlock the data dictionary cache. ha_innobase::truncate(): Lock the data dictionary before invoking rename or create, and let ha_innobase::create() unlock it and also commit or roll back the transaction. trx_undo_mark_as_dict(): Renamed from trx_undo_mark_as_dict_operation() and declared global instead of static. row_undo_ins_parse_undo_rec(): If table_id is set, this must be rolling back the rename operation in TRUNCATE TABLE, and therefore replace_new_file=true.
2018-09-10 14:59:58 +03:00
return DB_ERROR;
}
/* Determine the tablespace flags. */
bool has_data_dir = DICT_TF_HAS_DATA_DIR(table->flags);
ulint fsp_flags = dict_tf_to_fsp_flags(table->flags);
ut_ad(!has_data_dir || table->data_dir_path);
char* filepath = has_data_dir
? fil_make_filepath(table->data_dir_path,
table->name.m_name, IBD, true)
: fil_make_filepath(NULL,
table->name.m_name, IBD, false);
/* We create a new single-table tablespace for the table.
We initially let it be 4 pages:
- page 0 is the fsp header and an extent descriptor page,
- page 1 is an ibuf bitmap page,
- page 2 is the first inode page,
- page 3 will contain the root of the clustered index of
the table we create here. */
dberr_t err;
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
table->space = fil_ibd_create(
space_id, table->name.m_name, filepath, fsp_flags,
FIL_IBD_FILE_INITIAL_SIZE,
node->mode, node->key_id, &err);
ut_free(filepath);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
if (!table->space) {
ut_ad(err != DB_SUCCESS);
return err;
}
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
table->space_id = space_id;
mtr.start();
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
mtr.set_named_space(table->space);
fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
mtr.commit();
} else {
ut_ad(dict_tf_get_rec_format(table->flags)
!= REC_FORMAT_COMPRESSED);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
table->space = fil_system.sys_space;
table->space_id = TRX_SYS_SPACE;
}
ins_node_set_new_row(node->tab_def,
dict_create_sys_tables_tuple(table, node->heap));
return DB_SUCCESS;
}
/** Builds a SYS_VIRTUAL row definition to insert.
@param[in] node table create node */
static
void
dict_build_v_col_def_step(
tab_node_t* node)
{
dtuple_t* row;
row = dict_create_sys_virtual_tuple(node->table, node->col_no,
node->base_col_no,
node->heap);
ins_node_set_new_row(node->v_col_def, row);
}
/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
in the SYS_INDEXES system table.
@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_indexes_tuple(
/*==========================*/
const dict_index_t* index, /*!< in: index */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_indexes;
dtuple_t* entry;
dfield_t* dfield;
byte* ptr;
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(index);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
ut_ad(index->table->space || index->table->file_unreadable);
ut_ad(!index->table->space
|| index->table->space->id == index->table->space_id);
ut_ad(heap);
sys_indexes = dict_sys->sys_indexes;
entry = dtuple_create(
heap, DICT_NUM_COLS__SYS_INDEXES + DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_indexes);
/* 0: TABLE_ID -----------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__TABLE_ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, index->table->id);
dfield_set_data(dfield, ptr, 8);
/* 1: ID ----------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, index->id);
dfield_set_data(dfield, ptr, 8);
/* 2: DB_TRX_ID added later */
/* 3: DB_ROLL_PTR added later */
/* 4: NAME --------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__NAME);
if (!index->is_committed()) {
ulint len = strlen(index->name) + 1;
char* name = static_cast<char*>(
mem_heap_alloc(heap, len));
*name = *TEMP_INDEX_PREFIX_STR;
memcpy(name + 1, index->name, len - 1);
dfield_set_data(dfield, name, len);
} else {
dfield_set_data(dfield, index->name, strlen(index->name));
}
/* 5: N_FIELDS ----------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__N_FIELDS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, index->n_fields);
dfield_set_data(dfield, ptr, 4);
/* 6: TYPE --------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__TYPE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, index->type);
dfield_set_data(dfield, ptr, 4);
/* 7: SPACE --------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__SPACE);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
mach_write_to_4(ptr, index->table->space_id);
dfield_set_data(dfield, ptr, 4);
/* 8: PAGE_NO --------------------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__PAGE_NO);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, FIL_NULL);
dfield_set_data(dfield, ptr, 4);
/* 9: MERGE_THRESHOLD ----------------*/
dfield = dtuple_get_nth_field(
entry, DICT_COL__SYS_INDEXES__MERGE_THRESHOLD);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
mach_write_to_4(ptr, DICT_INDEX_MERGE_THRESHOLD_DEFAULT);
dfield_set_data(dfield, ptr, 4);
/*--------------------------------*/
return(entry);
}
/*****************************************************************//**
Based on an index object, this function builds the entry to be inserted
in the SYS_FIELDS system table.
@return the tuple which should be inserted */
static
dtuple_t*
dict_create_sys_fields_tuple(
/*=========================*/
const dict_index_t* index, /*!< in: index */
ulint fld_no, /*!< in: field number */
mem_heap_t* heap) /*!< in: memory heap from
which the memory for the built
tuple is allocated */
{
dict_table_t* sys_fields;
dtuple_t* entry;
dict_field_t* field;
dfield_t* dfield;
byte* ptr;
ibool index_contains_column_prefix_field = FALSE;
ulint j;
ut_ad(index);
ut_ad(heap);
for (j = 0; j < index->n_fields; j++) {
if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
index_contains_column_prefix_field = TRUE;
break;
}
}
field = dict_index_get_nth_field(index, fld_no);
sys_fields = dict_sys->sys_fields;
entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
dict_table_copy_types(entry, sys_fields);
/* 0: INDEX_ID -----------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
mach_write_to_8(ptr, index->id);
dfield_set_data(dfield, ptr, 8);
/* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS);
ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
if (index_contains_column_prefix_field) {
/* If there are column prefix fields in the index, then
we store the number of the field to the 2 HIGH bytes
and the prefix length to the 2 low bytes, */
mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len);
} else {
/* Else we store the number of the field to the 2 LOW bytes.
This is to keep the storage format compatible with
InnoDB versions < 4.0.14. */
mach_write_to_4(ptr, fld_no);
}
dfield_set_data(dfield, ptr, 4);
/* 2: DB_TRX_ID added later */
/* 3: DB_ROLL_PTR added later */
/* 4: COL_NAME -------------------------*/
dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME);
dfield_set_data(dfield, field->name,
ut_strlen(field->name));
/*---------------------------------*/
return(entry);
}
/*****************************************************************//**
Creates the tuple with which the index entry is searched for writing the index
tree root page number, if such a tree is created.
@return the tuple for search */
static
dtuple_t*
dict_create_search_tuple(
/*=====================*/
const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
table */
mem_heap_t* heap) /*!< in: memory heap from which the memory for
the built tuple is allocated */
{
dtuple_t* search_tuple;
const dfield_t* field1;
dfield_t* field2;
ut_ad(tuple && heap);
search_tuple = dtuple_create(heap, 2);
field1 = dtuple_get_nth_field(tuple, 0);
field2 = dtuple_get_nth_field(search_tuple, 0);
dfield_copy(field2, field1);
field1 = dtuple_get_nth_field(tuple, 1);
field2 = dtuple_get_nth_field(search_tuple, 1);
dfield_copy(field2, field1);
ut_ad(dtuple_validate(search_tuple));
return(search_tuple);
}
/***************************************************************//**
Builds an index definition row to insert.
@return DB_SUCCESS or error code */
2016-06-21 14:21:03 +02:00
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_build_index_def_step(
/*======================*/
que_thr_t* thr, /*!< in: query thread */
ind_node_t* node) /*!< in: index create node */
{
dict_table_t* table;
dict_index_t* index;
dtuple_t* row;
trx_t* trx;
ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
index = node->index;
table = index->table = node->table = dict_table_open_on_name(
node->table_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
if (table == NULL) {
return(DB_TABLE_NOT_FOUND);
}
if (!trx->table_id) {
/* Record only the first table id. */
trx->table_id = table->id;
}
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|| dict_index_is_clust(index));
dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
/* Inherit the space id from the table; we store all indexes of a
table in the same tablespace */
node->page_no = FIL_NULL;
row = dict_create_sys_indexes_tuple(index, node->heap);
node->ind_row = row;
ins_node_set_new_row(node->ind_def, row);
/* Note that the index was created by this transaction. */
index->trx_id = trx->id;
ut_ad(table->def_trx_id <= trx->id);
table->def_trx_id = trx->id;
dict_table_close(table, true, false);
return(DB_SUCCESS);
}
/***************************************************************//**
Builds an index definition without updating SYSTEM TABLES.
@return DB_SUCCESS or error code */
void
dict_build_index_def(
/*=================*/
const dict_table_t* table, /*!< in: table */
dict_index_t* index, /*!< in/out: index */
trx_t* trx) /*!< in/out: InnoDB transaction handle */
{
ut_ad(mutex_own(&dict_sys->mutex));
if (trx->table_id == 0) {
/* Record only the first table id. */
trx->table_id = table->id;
}
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|| dict_index_is_clust(index));
dict_hdr_get_new_id(NULL, &index->id, NULL, table, false);
/* Note that the index was created by this transaction. */
index->trx_id = trx->id;
}
/***************************************************************//**
Builds a field definition row to insert. */
static
void
dict_build_field_def_step(
/*======================*/
ind_node_t* node) /*!< in: index create node */
{
dict_index_t* index;
dtuple_t* row;
index = node->index;
row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
ins_node_set_new_row(node->field_def, row);
}
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
2016-06-21 14:21:03 +02:00
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_create_index_tree_step(
/*========================*/
ind_node_t* node) /*!< in: index create node */
{
mtr_t mtr;
btr_pcur_t pcur;
dict_index_t* index;
dict_table_t* sys_indexes;
dtuple_t* search_tuple;
ut_ad(mutex_own(&dict_sys->mutex));
index = node->index;
sys_indexes = dict_sys->sys_indexes;
if (index->type == DICT_FTS) {
/* FTS index does not need an index tree */
return(DB_SUCCESS);
}
/* Run a mini-transaction in which the index tree is allocated for
the index and its root address is written to the index entry in
sys_indexes */
mtr.start();
search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
&pcur, &mtr);
btr_pcur_move_to_next_user_rec(&pcur, &mtr);
dberr_t err = DB_SUCCESS;
if (!index->is_readable()) {
node->page_no = FIL_NULL;
} else {
index->set_modified(mtr);
node->page_no = btr_create(
index->type, index->table->space,
index->id, index, NULL, &mtr);
if (node->page_no == FIL_NULL) {
err = DB_OUT_OF_FILE_SPACE;
}
DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
node->page_no = FIL_NULL;
err = DB_OUT_OF_FILE_SPACE; );
}
ulint len;
byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur),
DICT_FLD__SYS_INDEXES__PAGE_NO,
&len);
ut_ad(len == 4);
if (mach_read_from_4(data) != node->page_no) {
mlog_write_ulint(data, node->page_no, MLOG_4BYTES, &mtr);
}
mtr.commit();
return(err);
}
/***************************************************************//**
Creates an index tree for the index if it is not a member of a cluster.
Don't update SYSTEM TABLES.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
dberr_t
dict_create_index_tree_in_mem(
/*==========================*/
dict_index_t* index, /*!< in/out: index */
const trx_t* trx) /*!< in: InnoDB transaction handle */
{
mtr_t mtr;
ut_ad(mutex_own(&dict_sys->mutex));
ut_ad(!(index->type & DICT_FTS));
mtr_start(&mtr);
mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
/* Currently this function is being used by temp-tables only.
Import/Discard of temp-table is blocked and so this assert. */
ut_ad(index->is_readable());
ut_ad(!(index->table->flags2 & DICT_TF2_DISCARDED));
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
index->page = btr_create(index->type, index->table->space,
index->id, index, NULL, &mtr);
mtr_commit(&mtr);
index->trx_id = trx->id;
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
return index->page == FIL_NULL ? DB_OUT_OF_FILE_SPACE : DB_SUCCESS;
}
/** Drop the index tree associated with a row in SYS_INDEXES table.
@param[in,out] rec SYS_INDEXES record
@param[in,out] pcur persistent cursor on rec
@param[in,out] trx dictionary transaction
@param[in,out] mtr mini-transaction
@return whether freeing the B-tree was attempted */
bool dict_drop_index_tree(rec_t* rec, btr_pcur_t* pcur, trx_t* trx, mtr_t* mtr)
{
const byte* ptr;
ulint len;
ulint root_page_no;
ut_ad(mutex_own(&dict_sys->mutex));
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
ut_ad(len == 4);
btr_pcur_store_position(pcur, mtr);
root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
if (root_page_no == FIL_NULL) {
/* The tree has already been freed */
return(false);
}
mlog_write_ulint(const_cast<byte*>(ptr), FIL_NULL, MLOG_4BYTES, mtr);
ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
ut_ad(len == 4);
const uint32_t space_id = mach_read_from_4(ptr);
ut_ad(space_id < SRV_TMP_SPACE_ID);
if (space_id != TRX_SYS_SPACE
&& trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE) {
/* We are about to delete the entire .ibd file;
do not bother to free pages inside it. */
return false;
}
ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__ID, &len);
ut_ad(len == 8);
bool found;
const page_size_t page_size(fil_space_get_page_size(space_id,
&found));
if (!found) {
/* It is a single table tablespace and the .ibd file is
missing: do nothing */
return(false);
}
/* If tablespace is scheduled for truncate, do not try to drop
the indexes in that tablespace. There is a truncate fixup action
which will take care of it. */
if (srv_is_tablespace_truncated(space_id)) {
return(false);
}
btr_free_if_exists(page_id_t(space_id, root_page_no), page_size,
mach_read_from_8(ptr), mtr);
return(true);
}
/*******************************************************************//**
Recreate the index tree associated with a row in SYS_INDEXES table.
@return new root page number, or FIL_NULL on failure */
ulint
dict_recreate_index_tree(
/*=====================*/
const dict_table_t*
table, /*!< in/out: the table the index belongs to */
btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
record in the clustered index of
SYS_INDEXES table. The cursor may be
repositioned in this call. */
mtr_t* mtr) /*!< in/out: mtr having the latch
on the record page. */
{
ut_ad(mutex_own(&dict_sys->mutex));
ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
ut_ad(!table->space || table->space->id == table->space_id);
ulint len;
const rec_t* rec = btr_pcur_get_rec(pcur);
const byte* ptr = rec_get_nth_field_old(
rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
ut_ad(len == 4);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
ut_ad(table->space_id == mach_read_from_4(
rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__SPACE,
&len)));
ut_ad(len == 4);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
if (!table->space) {
/* It is a single table tablespae and the .ibd file is
missing: do nothing. */
ib::warn()
<< "Trying to TRUNCATE a missing .ibd file of table "
<< table->name << "!";
return(FIL_NULL);
}
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
ut_ad(len == 4);
ulint type = mach_read_from_4(ptr);
ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
ut_ad(len == 8);
index_id_t index_id = mach_read_from_8(ptr);
/* We will need to commit the mini-transaction in order to avoid
deadlocks in the btr_create() call, because otherwise we would
be freeing and allocating pages in the same mini-transaction. */
btr_pcur_store_position(pcur, mtr);
mtr_commit(mtr);
mtr_start(mtr);
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
mtr->set_named_space(table->space);
btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
/* Find the index corresponding to this SYS_INDEXES record. */
for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
index != NULL;
index = UT_LIST_GET_NEXT(indexes, index)) {
if (index->id == index_id) {
ulint root_page_no = (index->type & DICT_FTS)
? FIL_NULL
MDEV-12266: Change dict_table_t::space to fil_space_t* InnoDB always keeps all tablespaces in the fil_system cache. The fil_system.LRU is only for closing file handles; the fil_space_t and fil_node_t for all data files will remain in main memory. Between startup to shutdown, they can only be created and removed by DDL statements. Therefore, we can let dict_table_t::space point directly to the fil_space_t. dict_table_t::space_id: A numeric tablespace ID for the corner cases where we do not have a tablespace. The most prominent examples are ALTER TABLE...DISCARD TABLESPACE or a missing or corrupted file. There are a few functional differences; most notably: (1) DROP TABLE will delete matching .ibd and .cfg files, even if they were not attached to the data dictionary. (2) Some error messages will report file names instead of numeric IDs. There still are many functions that use numeric tablespace IDs instead of fil_space_t*, and many functions could be converted to fil_space_t member functions. Also, Tablespace and Datafile should be merged with fil_space_t and fil_node_t. page_id_t and buf_page_get_gen() could use fil_space_t& instead of a numeric ID, and after moving to a single buffer pool (MDEV-15058), buf_pool_t::page_hash could be moved to fil_space_t::page_hash. FilSpace: Remove. Only few calls to fil_space_acquire() will remain, and gradually they should be removed. mtr_t::set_named_space_id(ulint): Renamed from set_named_space(), to prevent accidental calls to this slower function. Very few callers remain. fseg_create(), fsp_reserve_free_extents(): Take fil_space_t* as a parameter instead of a space_id. fil_space_t::rename(): Wrapper for fil_rename_tablespace_check(), fil_name_write_rename(), fil_rename_tablespace(). Mariabackup passes the parameter log=false; InnoDB passes log=true. dict_mem_table_create(): Take fil_space_t* instead of space_id as parameter. dict_process_sys_tables_rec_and_mtr_commit(): Replace the parameter 'status' with 'bool cached'. dict_get_and_save_data_dir_path(): Avoid copying the fil_node_t::name. fil_ibd_open(): Return the tablespace. fil_space_t::set_imported(): Replaces fil_space_set_imported(). truncate_t: Change many member function parameters to fil_space_t*, and remove page_size parameters. row_truncate_prepare(): Merge to its only caller. row_drop_table_from_cache(): Assert that the table is persistent. dict_create_sys_indexes_tuple(): Write SYS_INDEXES.SPACE=FIL_NULL if the tablespace has been discarded. row_import_update_discarded_flag(): Remove a constant parameter.
2018-03-27 16:31:10 +03:00
: btr_create(type, table->space,
index_id, index, NULL, mtr);
index->page = unsigned(root_page_no);
return root_page_no;
}
}
ib::error() << "Failed to create index with index id " << index_id
<< " of table " << table->name;
return(FIL_NULL);
}
/*********************************************************************//**
Creates a table create graph.
@return own: table create node */
tab_node_t*
tab_create_graph_create(
/*====================*/
dict_table_t* table, /*!< in: table to create, built as a memory data
structure */
mem_heap_t* heap, /*!< in: heap where created */
fil_encryption_t mode, /*!< in: encryption mode */
uint32_t key_id) /*!< in: encryption key_id */
{
tab_node_t* node;
node = static_cast<tab_node_t*>(
mem_heap_alloc(heap, sizeof(tab_node_t)));
node->common.type = QUE_NODE_CREATE_TABLE;
node->table = table;
node->state = TABLE_BUILD_TABLE_DEF;
node->heap = mem_heap_create(256);
node->mode = mode;
node->key_id = key_id;
node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
heap);
node->tab_def->common.parent = node;
node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
heap);
node->col_def->common.parent = node;
node->v_col_def = ins_node_create(INS_DIRECT, dict_sys->sys_virtual,
heap);
node->v_col_def->common.parent = node;
return(node);
}
/** Creates an index create graph.
@param[in] index index to create, built as a memory data structure
@param[in] table table name
@param[in,out] heap heap where created
@param[in] add_v new virtual columns added in the same clause with
add index
@return own: index create node */
ind_node_t*
ind_create_graph_create(
dict_index_t* index,
const char* table,
mem_heap_t* heap,
const dict_add_v_col_t* add_v)
{
ind_node_t* node;
node = static_cast<ind_node_t*>(
mem_heap_alloc(heap, sizeof(ind_node_t)));
node->common.type = QUE_NODE_CREATE_INDEX;
node->index = index;
node->table_name = table;
node->add_v = add_v;
node->state = INDEX_BUILD_INDEX_DEF;
node->page_no = FIL_NULL;
node->heap = mem_heap_create(256);
node->ind_def = ins_node_create(INS_DIRECT,
dict_sys->sys_indexes, heap);
node->ind_def->common.parent = node;
node->field_def = ins_node_create(INS_DIRECT,
dict_sys->sys_fields, heap);
node->field_def->common.parent = node;
return(node);
}
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
@return query thread to run next or NULL */
que_thr_t*
dict_create_table_step(
/*===================*/
que_thr_t* thr) /*!< in: query thread */
{
tab_node_t* node;
dberr_t err = DB_ERROR;
trx_t* trx;
ut_ad(thr);
ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
node = static_cast<tab_node_t*>(thr->run_node);
ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
if (thr->prev_node == que_node_get_parent(node)) {
node->state = TABLE_BUILD_TABLE_DEF;
}
if (node->state == TABLE_BUILD_TABLE_DEF) {
/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
err = dict_build_table_def_step(thr, node);
if (err != DB_SUCCESS) {
goto function_exit;
}
node->state = TABLE_BUILD_COL_DEF;
node->col_no = 0;
thr->run_node = node->tab_def;
return(thr);
}
if (node->state == TABLE_BUILD_COL_DEF) {
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
if (node->col_no + DATA_N_SYS_COLS
< (static_cast<ulint>(node->table->n_def)
+ static_cast<ulint>(node->table->n_v_def))) {
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
ulint i = node->col_no++;
if (i + DATA_N_SYS_COLS >= node->table->n_def) {
i += DATA_N_SYS_COLS;
}
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
ins_node_set_new_row(
node->col_def,
dict_create_sys_columns_tuple(node->table, i,
node->heap));
thr->run_node = node->col_def;
return(thr);
} else {
/* Move on to SYS_VIRTUAL table */
node->col_no = 0;
node->base_col_no = 0;
node->state = TABLE_BUILD_V_COL_DEF;
}
}
if (node->state == TABLE_BUILD_V_COL_DEF) {
if (node->col_no < static_cast<ulint>(node->table->n_v_def)) {
dict_v_col_t* v_col = dict_table_get_nth_v_col(
node->table, node->col_no);
/* If no base column */
while (v_col->num_base == 0) {
node->col_no++;
if (node->col_no == static_cast<ulint>(
(node->table)->n_v_def)) {
node->state = TABLE_ADD_TO_CACHE;
break;
}
v_col = dict_table_get_nth_v_col(
node->table, node->col_no);
node->base_col_no = 0;
}
if (node->state != TABLE_ADD_TO_CACHE) {
ut_ad(node->col_no == v_col->v_pos);
dict_build_v_col_def_step(node);
if (node->base_col_no < v_col->num_base - 1) {
/* move on to next base column */
node->base_col_no++;
} else {
/* move on to next virtual column */
node->col_no++;
node->base_col_no = 0;
}
thr->run_node = node->v_col_def;
return(thr);
}
} else {
node->state = TABLE_ADD_TO_CACHE;
}
}
if (node->state == TABLE_ADD_TO_CACHE) {
DBUG_EXECUTE_IF("ib_ddl_crash_during_create", DBUG_SUICIDE(););
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
node->table->can_be_evicted = true;
node->table->add_to_cache();
err = DB_SUCCESS;
}
function_exit:
trx->error_state = err;
if (err == DB_SUCCESS) {
/* Ok: do nothing */
} else if (err == DB_LOCK_WAIT) {
return(NULL);
} else {
/* SQL error detected */
return(NULL);
}
thr->run_node = que_node_get_parent(node);
return(thr);
}
/***********************************************************//**
Creates an index. This is a high-level function used in SQL execution
graphs.
@return query thread to run next or NULL */
que_thr_t*
dict_create_index_step(
/*===================*/
que_thr_t* thr) /*!< in: query thread */
{
ind_node_t* node;
dberr_t err = DB_ERROR;
trx_t* trx;
ut_ad(thr);
ut_ad(mutex_own(&dict_sys->mutex));
trx = thr_get_trx(thr);
node = static_cast<ind_node_t*>(thr->run_node);
ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
if (thr->prev_node == que_node_get_parent(node)) {
node->state = INDEX_BUILD_INDEX_DEF;
}
if (node->state == INDEX_BUILD_INDEX_DEF) {
/* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
err = dict_build_index_def_step(thr, node);
if (err != DB_SUCCESS) {
goto function_exit;
}
node->state = INDEX_BUILD_FIELD_DEF;
node->field_no = 0;
thr->run_node = node->ind_def;
return(thr);
}
if (node->state == INDEX_BUILD_FIELD_DEF) {
if (node->field_no < (node->index)->n_fields) {
dict_build_field_def_step(node);
node->field_no++;
thr->run_node = node->field_def;
return(thr);
} else {
node->state = INDEX_ADD_TO_CACHE;
}
}
if (node->state == INDEX_ADD_TO_CACHE) {
ut_ad(node->index->table == node->table);
err = dict_index_add_to_cache(node->index, FIL_NULL,
node->add_v);
ut_ad((node->index == NULL) == (err != DB_SUCCESS));
if (!node->index) {
goto function_exit;
}
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
ut_ad(!node->index->is_instant());
ut_ad(node->index->n_core_null_bytes
== ((dict_index_is_clust(node->index)
&& node->table->supports_instant())
? dict_index_t::NO_CORE_NULL_BYTES
: UT_BITS_IN_BYTES(
unsigned(node->index->n_nullable))));
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
node->index->n_core_null_bytes = UT_BITS_IN_BYTES(
unsigned(node->index->n_nullable));
node->state = INDEX_CREATE_INDEX_TREE;
}
if (node->state == INDEX_CREATE_INDEX_TREE) {
err = dict_create_index_tree_step(node);
DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
err = DB_OUT_OF_MEMORY;);
if (err != DB_SUCCESS) {
/* If this is a FTS index, we will need to remove
it from fts->cache->indexes list as well */
if ((node->index->type & DICT_FTS)
&& node->table->fts) {
fts_index_cache_t* index_cache;
rw_lock_x_lock(
&node->table->fts->cache->init_lock);
index_cache = (fts_index_cache_t*)
fts_find_index_cache(
node->table->fts->cache,
node->index);
if (index_cache->words) {
rbt_free(index_cache->words);
index_cache->words = 0;
}
ib_vector_remove(
node->table->fts->cache->indexes,
*reinterpret_cast<void**>(index_cache));
rw_lock_x_unlock(
&node->table->fts->cache->init_lock);
}
MDEV-22456 Dropping the adaptive hash index may cause DDL to lock up InnoDB If the InnoDB buffer pool contains many pages for a table or index that is being dropped or rebuilt, and if many of such pages are pointed to by the adaptive hash index, dropping the adaptive hash index may consume a lot of time. The time-consuming operation of dropping the adaptive hash index entries is being executed while the InnoDB data dictionary cache dict_sys is exclusively locked. It is not actually necessary to drop all adaptive hash index entries at the time a table or index is being dropped or rebuilt. We can let the LRU replacement policy of the buffer pool take care of this gradually. For this to work, we must detach the dict_table_t and dict_index_t objects from the main dict_sys cache, and once the last adaptive hash index entry for the detached table is removed (when the garbage page is evicted from the buffer pool) we can free the dict_table_t and dict_index_t object. Related to this, in MDEV-16283, we made ALTER TABLE...DISCARD TABLESPACE skip both the buffer pool eviction and the drop of the adaptive hash index. We shifted the burden to ALTER TABLE...IMPORT TABLESPACE or DROP TABLE. We can remove the eviction from DROP TABLE. We must retain the eviction in the ALTER TABLE...IMPORT TABLESPACE code path, so that in case the discarded table is being re-imported with the same tablespace identifier, the fresh data from the imported tablespace will replace any stale pages in the buffer pool. rpl.rpl_failed_drop_tbl_binlog: Remove the test. DROP TABLE can no longer be interrupted inside InnoDB. fseg_free_page(), fseg_free_step(), fseg_free_step_not_header(), fseg_free_page_low(), fseg_free_extent(): Remove the parameter that specifies whether the adaptive hash index should be dropped. btr_search_lazy_free(): Lazily free an index when the last reference to it is dropped from the adaptive hash index. buf_pool_clear_hash_index(): Declare static, and move to the same compilation unit with the bulk of the adaptive hash index code. dict_index_t::clone(), dict_index_t::clone_if_needed(): Clone an index that is being rebuilt while adaptive hash index entries exist. The original index will be inserted into dict_table_t::freed_indexes and dict_index_t::set_freed() will be called. dict_index_t::set_freed(), dict_index_t::freed(): Note that or check whether the index has been freed. We will use the impossible page number 1 to denote this condition. dict_index_t::n_ahi_pages(): Replaces btr_search_info_get_ref_count(). dict_index_t::detach_columns(): Move the assignment n_fields=0 to ha_innobase_inplace_ctx::clear_added_indexes(). We must have access to the columns when freeing the adaptive hash index. Note: dict_table_t::v_cols[] will remain valid. If virtual columns are dropped or added, the table definition will be reloaded in ha_innobase::commit_inplace_alter_table(). buf_page_mtr_lock(): Drop a stale adaptive hash index if needed. We will also reduce the number of btr_get_search_latch() calls and enclose some more code inside #ifdef BTR_CUR_HASH_ADAPT in order to benefit cmake -DWITH_INNODB_AHI=OFF.
2020-05-15 17:10:59 +03:00
#ifdef BTR_CUR_HASH_ADAPT
ut_ad(!node->index->search_info->ref_count);
#endif /* BTR_CUR_HASH_ADAPT */
dict_index_remove_from_cache(node->table, node->index);
node->index = NULL;
goto function_exit;
}
node->index->page = node->page_no;
/* These should have been set in
dict_build_index_def_step() and
dict_index_add_to_cache(). */
ut_ad(node->index->trx_id == trx->id);
ut_ad(node->index->table->def_trx_id == trx->id);
}
function_exit:
trx->error_state = err;
if (err == DB_SUCCESS) {
/* Ok: do nothing */
} else if (err == DB_LOCK_WAIT) {
return(NULL);
} else {
/* SQL error detected */
return(NULL);
}
thr->run_node = que_node_get_parent(node);
return(thr);
}
/****************************************************************//**
Check whether a system table exists. Additionally, if it exists,
move it to the non-LRU end of the table LRU list. This is oly used
for system tables that can be upgraded or added to an older database,
which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
SYS_DATAFILES.
@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
static
dberr_t
dict_check_if_system_table_exists(
/*==============================*/
const char* tablename, /*!< in: name of table */
ulint num_fields, /*!< in: number of fields */
ulint num_indexes) /*!< in: number of indexes */
{
dict_table_t* sys_table;
dberr_t error = DB_SUCCESS;
ut_a(srv_get_active_thread_type() == SRV_NONE);
mutex_enter(&dict_sys->mutex);
sys_table = dict_table_get_low(tablename);
if (sys_table == NULL) {
error = DB_TABLE_NOT_FOUND;
} else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
|| sys_table->n_cols != num_fields) {
error = DB_CORRUPTION;
} else {
/* This table has already been created, and it is OK.
Ensure that it can't be evicted from the table LRU cache. */
dict_table_prevent_eviction(sys_table);
}
mutex_exit(&dict_sys->mutex);
return(error);
}
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_foreign_constraint_tables(void)
/*================================================*/
{
trx_t* trx;
my_bool srv_file_per_table_backup;
dberr_t err;
dberr_t sys_foreign_err;
dberr_t sys_foreign_cols_err;
ut_a(srv_get_active_thread_type() == SRV_NONE);
/* Note: The master thread has not been started at this point. */
sys_foreign_err = dict_check_if_system_table_exists(
"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
sys_foreign_cols_err = dict_check_if_system_table_exists(
"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
if (sys_foreign_err == DB_SUCCESS
&& sys_foreign_cols_err == DB_SUCCESS) {
return(DB_SUCCESS);
}
if (srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
return(DB_READ_ONLY);
}
trx = trx_create();
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
trx->op_info = "creating foreign key sys tables";
row_mysql_lock_data_dictionary(trx);
DBUG_EXECUTE_IF(
"create_and_drop_garbage",
err = que_eval_sql(
NULL,
"PROCEDURE CREATE_GARBAGE_TABLE_PROC () IS\n"
"BEGIN\n"
"CREATE TABLE\n"
"\"test/#sql-ib-garbage\"(ID CHAR);\n"
"CREATE UNIQUE CLUSTERED INDEX PRIMARY"
" ON \"test/#sql-ib-garbage\"(ID);\n"
"END;\n", FALSE, trx);
ut_ad(err == DB_SUCCESS);
row_drop_table_for_mysql("test/#sql-ib-garbage", trx,
SQLCOM_DROP_DB, true););
/* Check which incomplete table definition to drop. */
if (sys_foreign_err == DB_CORRUPTION) {
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_FOREIGN", trx);
}
if (sys_foreign_cols_err == DB_CORRUPTION) {
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
}
ib::info() << "Creating foreign key constraint system tables.";
/* NOTE: in dict_load_foreigns we use the fact that
there are 2 secondary indexes on SYS_FOREIGN, and they
are defined just like below */
/* NOTE: when designing InnoDB's foreign key support in 2001, we made
an error and made the table names and the foreign key id of type
'CHAR' (internally, really a VARCHAR). We should have made the type
VARBINARY, like in other InnoDB system tables, to get a clean
design. */
srv_file_per_table_backup = srv_file_per_table;
/* We always want SYSTEM tables to be created inside the system
tablespace. */
srv_file_per_table = 0;
err = que_eval_sql(
NULL,
"PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
"BEGIN\n"
"CREATE TABLE\n"
"SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
" REF_NAME CHAR, N_COLS INT);\n"
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
" ON SYS_FOREIGN (ID);\n"
"CREATE INDEX FOR_IND"
" ON SYS_FOREIGN (FOR_NAME);\n"
"CREATE INDEX REF_IND"
" ON SYS_FOREIGN (REF_NAME);\n"
"CREATE TABLE\n"
"SYS_FOREIGN_COLS(ID CHAR, POS INT,"
" FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
" ON SYS_FOREIGN_COLS (ID, POS);\n"
"END;\n",
FALSE, trx);
if (err != DB_SUCCESS) {
ib::error() << "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS"
" failed: " << ut_strerr(err) << ". Tablespace is"
" full. Dropping incompletely created tables.";
ut_ad(err == DB_OUT_OF_FILE_SPACE
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_FOREIGN", trx);
row_drop_table_after_create_fail("SYS_FOREIGN_COLS", trx);
if (err == DB_OUT_OF_FILE_SPACE) {
err = DB_MUST_GET_MORE_FILE_SPACE;
}
}
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
trx_free(trx);
srv_file_per_table = srv_file_per_table_backup;
/* Note: The master thread has not been started at this point. */
/* Confirm and move to the non-LRU part of the table LRU list. */
sys_foreign_err = dict_check_if_system_table_exists(
"SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
ut_a(sys_foreign_err == DB_SUCCESS);
sys_foreign_cols_err = dict_check_if_system_table_exists(
"SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
ut_a(sys_foreign_cols_err == DB_SUCCESS);
return(err);
}
/** Creates the virtual column system table (SYS_VIRTUAL) inside InnoDB
at server bootstrap or server start if the table is not found or is
not of the right form.
@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_sys_virtual()
{
trx_t* trx;
my_bool srv_file_per_table_backup;
dberr_t err;
ut_a(srv_get_active_thread_type() == SRV_NONE);
/* Note: The master thread has not been started at this point. */
err = dict_check_if_system_table_exists(
"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
if (err == DB_SUCCESS) {
mutex_enter(&dict_sys->mutex);
dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
mutex_exit(&dict_sys->mutex);
return(DB_SUCCESS);
}
if (srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
return(DB_READ_ONLY);
}
trx = trx_create();
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
trx->op_info = "creating sys_virtual tables";
row_mysql_lock_data_dictionary(trx);
/* Check which incomplete table definition to drop. */
if (err == DB_CORRUPTION) {
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
}
ib::info() << "Creating sys_virtual system tables.";
srv_file_per_table_backup = srv_file_per_table;
/* We always want SYSTEM tables to be created inside the system
tablespace. */
srv_file_per_table = 0;
err = que_eval_sql(
NULL,
"PROCEDURE CREATE_SYS_VIRTUAL_TABLES_PROC () IS\n"
"BEGIN\n"
"CREATE TABLE\n"
"SYS_VIRTUAL(TABLE_ID BIGINT, POS INT,"
" BASE_POS INT);\n"
"CREATE UNIQUE CLUSTERED INDEX BASE_IDX"
" ON SYS_VIRTUAL(TABLE_ID, POS, BASE_POS);\n"
"END;\n",
FALSE, trx);
if (err != DB_SUCCESS) {
ib::error() << "Creation of SYS_VIRTUAL"
" failed: " << ut_strerr(err) << ". Tablespace is"
" full or too many transactions."
" Dropping incompletely created tables.";
ut_ad(err == DB_OUT_OF_FILE_SPACE
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_VIRTUAL", trx);
if (err == DB_OUT_OF_FILE_SPACE) {
err = DB_MUST_GET_MORE_FILE_SPACE;
}
}
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
trx_free(trx);
srv_file_per_table = srv_file_per_table_backup;
/* Note: The master thread has not been started at this point. */
/* Confirm and move to the non-LRU part of the table LRU list. */
dberr_t sys_virtual_err = dict_check_if_system_table_exists(
"SYS_VIRTUAL", DICT_NUM_FIELDS__SYS_VIRTUAL + 1, 1);
ut_a(sys_virtual_err == DB_SUCCESS);
mutex_enter(&dict_sys->mutex);
dict_sys->sys_virtual = dict_table_get_low("SYS_VIRTUAL");
mutex_exit(&dict_sys->mutex);
return(err);
}
/****************************************************************//**
Evaluate the given foreign key SQL statement.
@return error code or DB_SUCCESS */
2016-06-21 14:21:03 +02:00
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_foreign_eval_sql(
/*==================*/
pars_info_t* info, /*!< in: info struct */
const char* sql, /*!< in: SQL string to evaluate */
const char* name, /*!< in: table name (for diagnostics) */
const char* id, /*!< in: foreign key id */
trx_t* trx) /*!< in/out: transaction */
{
dberr_t error;
FILE* ef = dict_foreign_err_file;
error = que_eval_sql(info, sql, FALSE, trx);
if (error == DB_DUPLICATE_KEY) {
mutex_enter(&dict_foreign_err_mutex);
rewind(ef);
ut_print_timestamp(ef);
fputs(" Error in foreign key constraint creation for table ",
ef);
ut_print_name(ef, trx, name);
fputs(".\nA foreign key constraint of name ", ef);
ut_print_name(ef, trx, id);
fputs("\nalready exists."
" (Note that internally InnoDB adds 'databasename'\n"
"in front of the user-defined constraint name.)\n"
"Note that InnoDB's FOREIGN KEY system tables store\n"
"constraint names as case-insensitive, with the\n"
"MySQL standard latin1_swedish_ci collation. If you\n"
"create tables or databases whose names differ only in\n"
"the character case, then collisions in constraint\n"
"names can occur. Workaround: name your constraints\n"
"explicitly with unique names.\n",
ef);
mutex_exit(&dict_foreign_err_mutex);
return(error);
}
if (error != DB_SUCCESS) {
ib::error() << "Foreign key constraint creation failed: "
<< ut_strerr(error);
mutex_enter(&dict_foreign_err_mutex);
ut_print_timestamp(ef);
fputs(" Internal error in foreign key constraint creation"
" for table ", ef);
ut_print_name(ef, trx, name);
fputs(".\n"
"See the MySQL .err log in the datadir"
" for more information.\n", ef);
mutex_exit(&dict_foreign_err_mutex);
return(error);
}
return(DB_SUCCESS);
}
/********************************************************************//**
Add a single foreign key field definition to the data dictionary tables in
the database.
@return error code or DB_SUCCESS */
2016-06-21 14:21:03 +02:00
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
dict_create_add_foreign_field_to_dictionary(
/*========================================*/
ulint field_nr, /*!< in: field number */
const char* table_name, /*!< in: table name */
const dict_foreign_t* foreign, /*!< in: foreign */
trx_t* trx) /*!< in/out: transaction */
{
DBUG_ENTER("dict_create_add_foreign_field_to_dictionary");
pars_info_t* info = pars_info_create();
pars_info_add_str_literal(info, "id", foreign->id);
pars_info_add_int4_literal(info, "pos", field_nr);
pars_info_add_str_literal(info, "for_col_name",
foreign->foreign_col_names[field_nr]);
pars_info_add_str_literal(info, "ref_col_name",
foreign->referenced_col_names[field_nr]);
DBUG_RETURN(dict_foreign_eval_sql(
info,
"PROCEDURE P () IS\n"
"BEGIN\n"
"INSERT INTO SYS_FOREIGN_COLS VALUES"
"(:id, :pos, :for_col_name, :ref_col_name);\n"
"END;\n",
table_name, foreign->id, trx));
}
/********************************************************************//**
Construct foreign key constraint defintion from data dictionary information.
*/
UNIV_INTERN
char*
dict_foreign_def_get(
/*=================*/
dict_foreign_t* foreign,/*!< in: foreign */
trx_t* trx) /*!< in: trx */
{
char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024);
const char* tbname;
char tablebuf[MAX_TABLE_NAME_LEN + 1] = "";
unsigned i;
char* bufend;
tbname = dict_remove_db_name(foreign->id);
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
tbname, strlen(tbname), trx->mysql_thd);
tablebuf[bufend - tablebuf] = '\0';
sprintf(fk_def,
(char *)"CONSTRAINT %s FOREIGN KEY (", (char *)tablebuf);
for(i = 0; i < foreign->n_fields; i++) {
char buf[MAX_TABLE_NAME_LEN + 1] = "";
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->foreign_col_names[i],
strlen(foreign->foreign_col_names[i]),
trx->mysql_thd);
strcat(fk_def, buf);
if (i < static_cast<unsigned>(foreign->n_fields-1)) {
strcat(fk_def, (char *)",");
}
}
strcat(fk_def,(char *)") REFERENCES ");
bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
foreign->referenced_table_name,
strlen(foreign->referenced_table_name),
trx->mysql_thd);
tablebuf[bufend - tablebuf] = '\0';
strcat(fk_def, tablebuf);
strcat(fk_def, " (");
for(i = 0; i < foreign->n_fields; i++) {
char buf[MAX_TABLE_NAME_LEN + 1] = "";
bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->referenced_col_names[i],
strlen(foreign->referenced_col_names[i]),
trx->mysql_thd);
buf[bufend - buf] = '\0';
strcat(fk_def, buf);
if (i < (uint)foreign->n_fields-1) {
strcat(fk_def, (char *)",");
}
}
strcat(fk_def, (char *)")");
return fk_def;
}
/********************************************************************//**
Convert foreign key column names from data dictionary to SQL-layer.
*/
static
void
dict_foreign_def_get_fields(
/*========================*/
dict_foreign_t* foreign,/*!< in: foreign */
trx_t* trx, /*!< in: trx */
char** field, /*!< out: foreign column */
char** field2, /*!< out: referenced column */
ulint col_no) /*!< in: column number */
{
char* bufend;
char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
char* fieldbuf2 = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
foreign->foreign_col_names[col_no],
strlen(foreign->foreign_col_names[col_no]),
trx->mysql_thd);
fieldbuf[bufend - fieldbuf] = '\0';
bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
foreign->referenced_col_names[col_no],
strlen(foreign->referenced_col_names[col_no]),
trx->mysql_thd);
fieldbuf2[bufend - fieldbuf2] = '\0';
*field = fieldbuf;
*field2 = fieldbuf2;
}
/********************************************************************//**
Add a foreign key definition to the data dictionary tables.
@return error code or DB_SUCCESS */
dberr_t
dict_create_add_foreign_to_dictionary(
/*==================================*/
const char* name, /*!< in: table name */
const dict_foreign_t* foreign,/*!< in: foreign key */
trx_t* trx) /*!< in/out: dictionary transaction */
{
dberr_t error;
DBUG_ENTER("dict_create_add_foreign_to_dictionary");
pars_info_t* info = pars_info_create();
pars_info_add_str_literal(info, "id", foreign->id);
pars_info_add_str_literal(info, "for_name", name);
pars_info_add_str_literal(info, "ref_name",
foreign->referenced_table_name);
pars_info_add_int4_literal(info, "n_cols",
ulint(foreign->n_fields)
| (ulint(foreign->type) << 24));
DBUG_PRINT("dict_create_add_foreign_to_dictionary",
("'%s', '%s', '%s', %d", foreign->id, name,
foreign->referenced_table_name,
foreign->n_fields + (foreign->type << 24)));
error = dict_foreign_eval_sql(info,
"PROCEDURE P () IS\n"
"BEGIN\n"
"INSERT INTO SYS_FOREIGN VALUES"
"(:id, :for_name, :ref_name, :n_cols);\n"
"END;\n"
, name, foreign->id, trx);
if (error != DB_SUCCESS) {
if (error == DB_DUPLICATE_KEY) {
char buf[MAX_TABLE_NAME_LEN + 1] = "";
char tablename[MAX_TABLE_NAME_LEN + 1] = "";
char* fk_def;
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
name, strlen(name), trx->mysql_thd);
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->id, strlen(foreign->id), trx->mysql_thd);
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
ib_push_warning(trx, error,
"Create or Alter table %s with foreign key constraint"
" failed. Foreign key constraint %s"
" already exists on data dictionary."
" Foreign key constraint names need to be unique in database."
" Error in foreign key definition: %s.",
tablename, buf, fk_def);
}
DBUG_RETURN(error);
}
for (ulint i = 0; i < foreign->n_fields; i++) {
error = dict_create_add_foreign_field_to_dictionary(
i, name, foreign, trx);
if (error != DB_SUCCESS) {
char buf[MAX_TABLE_NAME_LEN + 1] = "";
char tablename[MAX_TABLE_NAME_LEN + 1] = "";
char* field=NULL;
char* field2=NULL;
char* fk_def;
innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
name, strlen(name), trx->mysql_thd);
innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
foreign->id, strlen(foreign->id), trx->mysql_thd);
fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
ib_push_warning(trx, error,
"Create or Alter table %s with foreign key constraint"
" failed. Error adding foreign key constraint name %s"
" fields %s or %s to the dictionary."
" Error in foreign key definition: %s.",
tablename, buf, i+1, fk_def);
DBUG_RETURN(error);
}
}
DBUG_RETURN(error);
}
/** Check if a foreign constraint is on the given column name.
@param[in] col_name column name to be searched for fk constraint
@param[in] table table to which foreign key constraint belongs
@return true if fk constraint is present on the table, false otherwise. */
static
bool
dict_foreign_base_for_stored(
const char* col_name,
const dict_table_t* table)
{
/* Loop through each stored column and check if its base column has
the same name as the column name being checked */
dict_s_col_list::const_iterator it;
for (it = table->s_cols->begin();
it != table->s_cols->end(); ++it) {
dict_s_col_t s_col = *it;
for (ulint j = 0; j < s_col.num_base; j++) {
if (strcmp(col_name, dict_table_get_col_name(
table,
s_col.base_col[j]->ind)) == 0) {
return(true);
}
}
}
return(false);
}
/** Check if a foreign constraint is on columns served as base columns
of any stored column. This is to prevent creating SET NULL or CASCADE
constraint on such columns
@param[in] local_fk_set set of foreign key objects, to be added to
the dictionary tables
@param[in] table table to which the foreign key objects in
local_fk_set belong to
@return true if yes, otherwise, false */
bool
dict_foreigns_has_s_base_col(
const dict_foreign_set& local_fk_set,
const dict_table_t* table)
{
dict_foreign_t* foreign;
if (table->s_cols == NULL) {
return (false);
}
for (dict_foreign_set::const_iterator it = local_fk_set.begin();
it != local_fk_set.end(); ++it) {
foreign = *it;
ulint type = foreign->type;
type &= ~(DICT_FOREIGN_ON_DELETE_NO_ACTION
| DICT_FOREIGN_ON_UPDATE_NO_ACTION);
if (type == 0) {
continue;
}
for (ulint i = 0; i < foreign->n_fields; i++) {
/* Check if the constraint is on a column that
is a base column of any stored column */
if (dict_foreign_base_for_stored(
foreign->foreign_col_names[i], table)) {
return(true);
}
}
}
return(false);
}
2014-09-11 10:13:35 +02:00
/** Adds the given set of foreign key objects to the dictionary tables
in the database. This function does not modify the dictionary cache. The
caller must ensure that all foreign key objects contain a valid constraint
name in foreign->id.
@param[in] local_fk_set set of foreign key objects, to be added to
the dictionary tables
@param[in] table table to which the foreign key objects in
local_fk_set belong to
@param[in,out] trx transaction
@return error code or DB_SUCCESS */
dberr_t
dict_create_add_foreigns_to_dictionary(
/*===================================*/
2014-09-11 10:13:35 +02:00
const dict_foreign_set& local_fk_set,
const dict_table_t* table,
trx_t* trx)
{
dict_foreign_t* foreign;
dberr_t error;
ut_ad(mutex_own(&dict_sys->mutex));
if (NULL == dict_table_get_low("SYS_FOREIGN")) {
ib::error() << "Table SYS_FOREIGN not found"
" in internal data dictionary";
return(DB_ERROR);
}
2014-09-11 10:13:35 +02:00
for (dict_foreign_set::const_iterator it = local_fk_set.begin();
it != local_fk_set.end();
++it) {
2014-09-11 10:13:35 +02:00
foreign = *it;
ut_ad(foreign->id != NULL);
error = dict_create_add_foreign_to_dictionary(
table->name.m_name, foreign, trx);
if (error != DB_SUCCESS) {
return(error);
}
}
return(DB_SUCCESS);
}
/****************************************************************//**
Creates the tablespaces and datafiles system tables inside InnoDB
at server bootstrap or server start if they are not found or are
not of the right form.
@return DB_SUCCESS or error code */
dberr_t
dict_create_or_check_sys_tablespace(void)
/*=====================================*/
{
trx_t* trx;
my_bool srv_file_per_table_backup;
dberr_t err;
dberr_t sys_tablespaces_err;
dberr_t sys_datafiles_err;
ut_a(srv_get_active_thread_type() == SRV_NONE);
/* Note: The master thread has not been started at this point. */
sys_tablespaces_err = dict_check_if_system_table_exists(
"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
sys_datafiles_err = dict_check_if_system_table_exists(
"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
if (sys_tablespaces_err == DB_SUCCESS
&& sys_datafiles_err == DB_SUCCESS) {
srv_sys_tablespaces_open = true;
return(DB_SUCCESS);
}
if (srv_read_only_mode
|| srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
return(DB_READ_ONLY);
}
trx = trx_create();
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
trx->op_info = "creating tablepace and datafile sys tables";
row_mysql_lock_data_dictionary(trx);
/* Check which incomplete table definition to drop. */
if (sys_tablespaces_err == DB_CORRUPTION) {
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
}
if (sys_datafiles_err == DB_CORRUPTION) {
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_DATAFILES", trx);
}
ib::info() << "Creating tablespace and datafile system tables.";
/* We always want SYSTEM tables to be created inside the system
tablespace. */
srv_file_per_table_backup = srv_file_per_table;
srv_file_per_table = 0;
err = que_eval_sql(
NULL,
"PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
"BEGIN\n"
"CREATE TABLE SYS_TABLESPACES(\n"
" SPACE INT, NAME CHAR, FLAGS INT);\n"
"CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
" ON SYS_TABLESPACES (SPACE);\n"
"CREATE TABLE SYS_DATAFILES(\n"
" SPACE INT, PATH CHAR);\n"
"CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
" ON SYS_DATAFILES (SPACE);\n"
"END;\n",
FALSE, trx);
if (err != DB_SUCCESS) {
ib::error() << "Creation of SYS_TABLESPACES and SYS_DATAFILES"
" has failed with error " << ut_strerr(err)
<< ". Dropping incompletely created tables.";
ut_a(err == DB_OUT_OF_FILE_SPACE
|| err == DB_DUPLICATE_KEY
|| err == DB_TOO_MANY_CONCURRENT_TRXS);
MDEV-13564 Mariabackup does not work with TRUNCATE Implement undo tablespace truncation via normal redo logging. Implement TRUNCATE TABLE as a combination of RENAME to #sql-ib name, CREATE, and DROP. Note: Orphan #sql-ib*.ibd may be left behind if MariaDB Server 10.2 is killed before the DROP operation is committed. If MariaDB Server 10.2 is killed during TRUNCATE, it is also possible that the old table was renamed to #sql-ib*.ibd but the data dictionary will refer to the table using the original name. In MariaDB Server 10.3, RENAME inside InnoDB is transactional, and #sql-* tables will be dropped on startup. So, this new TRUNCATE will be fully crash-safe in 10.3. ha_mroonga::wrapper_truncate(): Pass table options to the underlying storage engine, now that ha_innobase::truncate() will need them. rpl_slave_state::truncate_state_table(): Before truncating mysql.gtid_slave_pos, evict any cached table handles from the table definition cache, so that there will be no stale references to the old table after truncating. == TRUNCATE TABLE == WL#6501 in MySQL 5.7 introduced separate log files for implementing atomic and crash-safe TRUNCATE TABLE, instead of using the InnoDB undo and redo log. Some convoluted logic was added to the InnoDB crash recovery, and some extra synchronization (including a redo log checkpoint) was introduced to make this work. This synchronization has caused performance problems and race conditions, and the extra log files cannot be copied or applied by external backup programs. In order to support crash-upgrade from MariaDB 10.2, we will keep the logic for parsing and applying the extra log files, but we will no longer generate those files in TRUNCATE TABLE. A prerequisite for crash-safe TRUNCATE is a crash-safe RENAME TABLE (with full redo and undo logging and proper rollback). This will be implemented in MDEV-14717. ha_innobase::truncate(): Invoke RENAME, create(), delete_table(). Because RENAME cannot be fully rolled back before MariaDB 10.3 due to missing undo logging, add some explicit rename-back in case the operation fails. ha_innobase::delete(): Introduce a variant that takes sqlcom as a parameter. In TRUNCATE TABLE, we do not want to touch any FOREIGN KEY constraints. ha_innobase::create(): Add the parameters file_per_table, trx. In TRUNCATE, the new table must be created in the same transaction that renames the old table. create_table_info_t::create_table_info_t(): Add the parameters file_per_table, trx. row_drop_table_for_mysql(): Replace a bool parameter with sqlcom. row_drop_table_after_create_fail(): New function, wrapping row_drop_table_for_mysql(). dict_truncate_index_tree_in_mem(), fil_truncate_tablespace(), fil_prepare_for_truncate(), fil_reinit_space_header_for_table(), row_truncate_table_for_mysql(), TruncateLogger, row_truncate_prepare(), row_truncate_rollback(), row_truncate_complete(), row_truncate_fts(), row_truncate_update_system_tables(), row_truncate_foreign_key_checks(), row_truncate_sanity_checks(): Remove. row_upd_check_references_constraints(): Remove a check for TRUNCATE, now that the table is no longer truncated in place. The new test innodb.truncate_foreign uses DEBUG_SYNC to cover some race-condition like scenarios. The test innodb-innodb.truncate does not use any synchronization. We add a redo log subformat to indicate backup-friendly format. MariaDB 10.4 will remove support for the old TRUNCATE logging, so crash-upgrade from old 10.2 or 10.3 to 10.4 will involve limitations. == Undo tablespace truncation == MySQL 5.7 implements undo tablespace truncation. It is only possible when innodb_undo_tablespaces is set to at least 2. The logging is implemented similar to the WL#6501 TRUNCATE, that is, using separate log files and a redo log checkpoint. We can simply implement undo tablespace truncation within a single mini-transaction that reinitializes the undo log tablespace file. Unfortunately, due to the redo log format of some operations, currently, the total redo log written by undo tablespace truncation will be more than the combined size of the truncated undo tablespace. It should be acceptable to have a little more than 1 megabyte of log in a single mini-transaction. This will be fixed in MDEV-17138 in MariaDB Server 10.4. recv_sys_t: Add truncated_undo_spaces[] to remember for which undo tablespaces a MLOG_FILE_CREATE2 record was seen. namespace undo: Remove some unnecessary declarations. fil_space_t::is_being_truncated: Document that this flag now only applies to undo tablespaces. Remove some references. fil_space_t::is_stopping(): Do not refer to is_being_truncated. This check is for tablespaces of tables. Potentially used tablespaces are never truncated any more. buf_dblwr_process(): Suppress the out-of-bounds warning for undo tablespaces. fil_truncate_log(): Write a MLOG_FILE_CREATE2 with a nonzero page number (new size of the tablespace in pages) to inform crash recovery that the undo tablespace size has been reduced. fil_op_write_log(): Relax assertions, so that MLOG_FILE_CREATE2 can be written for undo tablespaces (without .ibd file suffix) for a nonzero page number. os_file_truncate(): Add the parameter allow_shrink=false so that undo tablespaces can actually be shrunk using this function. fil_name_parse(): For undo tablespace truncation, buffer MLOG_FILE_CREATE2 in truncated_undo_spaces[]. recv_read_in_area(): Avoid reading pages for which no redo log records remain buffered, after recv_addr_trim() removed them. trx_rseg_header_create(): Add a FIXME comment that we could write much less redo log. trx_undo_truncate_tablespace(): Reinitialize the undo tablespace in a single mini-transaction, which will be flushed to the redo log before the file size is trimmed. recv_addr_trim(): Discard any redo logs for pages that were logged after the new end of a file, before the truncation LSN. If the rec_list becomes empty, reduce n_addrs. After removing any affected records, actually truncate the file. recv_apply_hashed_log_recs(): Invoke recv_addr_trim() right before applying any log records. The undo tablespace files must be open at this point. buf_flush_or_remove_pages(), buf_flush_dirty_pages(), buf_LRU_flush_or_remove_pages(): Add a parameter for specifying the number of the first page to flush or remove (default 0). trx_purge_initiate_truncate(): Remove the log checkpoints, the extra logging, and some unnecessary crash points. Merge the code from trx_undo_truncate_tablespace(). First, flush all to-be-discarded pages (beyond the new end of the file), then trim the space->size to make the page allocation deterministic. At the only remaining crash injection point, flush the redo log, so that the recovery can be tested.
2018-08-28 13:43:06 +03:00
row_drop_table_after_create_fail("SYS_TABLESPACES", trx);
row_drop_table_after_create_fail("SYS_DATAFILES", trx);
if (err == DB_OUT_OF_FILE_SPACE) {
err = DB_MUST_GET_MORE_FILE_SPACE;
}
}
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
trx_free(trx);
srv_file_per_table = srv_file_per_table_backup;
if (err == DB_SUCCESS) {
srv_sys_tablespaces_open = true;
}
/* Note: The master thread has not been started at this point. */
/* Confirm and move to the non-LRU part of the table LRU list. */
sys_tablespaces_err = dict_check_if_system_table_exists(
"SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
ut_a(sys_tablespaces_err == DB_SUCCESS || err != DB_SUCCESS);
sys_datafiles_err = dict_check_if_system_table_exists(
"SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
ut_a(sys_datafiles_err == DB_SUCCESS || err != DB_SUCCESS);
return(err);
}
/** Put a tablespace definition into the data dictionary,
replacing what was there previously.
@param[in] space Tablespace id
@param[in] name Tablespace name
@param[in] flags Tablespace flags
@param[in] path Tablespace path
@param[in] trx Transaction
@return error code or DB_SUCCESS */
dberr_t
dict_replace_tablespace_in_dictionary(
ulint space_id,
const char* name,
ulint flags,
const char* path,
trx_t* trx)
{
if (!srv_sys_tablespaces_open) {
/* Startup procedure is not yet ready for updates. */
return(DB_SUCCESS);
}
dberr_t error;
pars_info_t* info = pars_info_create();
pars_info_add_int4_literal(info, "space", space_id);
pars_info_add_str_literal(info, "name", name);
pars_info_add_int4_literal(info, "flags", flags);
pars_info_add_str_literal(info, "path", path);
error = que_eval_sql(info,
"PROCEDURE P () IS\n"
"p CHAR;\n"
"DECLARE CURSOR c IS\n"
" SELECT PATH FROM SYS_DATAFILES\n"
" WHERE SPACE=:space FOR UPDATE;\n"
"BEGIN\n"
"OPEN c;\n"
"FETCH c INTO p;\n"
"IF (SQL % NOTFOUND) THEN"
" DELETE FROM SYS_TABLESPACES "
"WHERE SPACE=:space;\n"
" INSERT INTO SYS_TABLESPACES VALUES"
"(:space, :name, :flags);\n"
" INSERT INTO SYS_DATAFILES VALUES"
"(:space, :path);\n"
"ELSIF p <> :path THEN\n"
" UPDATE SYS_DATAFILES SET PATH=:path"
" WHERE CURRENT OF c;\n"
"END IF;\n"
"END;\n",
FALSE, trx);
if (error != DB_SUCCESS) {
return(error);
}
trx->op_info = "";
return(error);
}
/** Assign a new table ID and put it into the table cache and the transaction.
@param[in,out] table Table that needs an ID
@param[in,out] trx Transaction */
void
dict_table_assign_new_id(
dict_table_t* table,
trx_t* trx)
{
dict_hdr_get_new_id(&table->id, NULL, NULL, table, false);
trx->table_id = table->id;
}