mariadb/storage/innobase/include/btr0cur.h

1033 lines
40 KiB
C
Raw Normal View History

/*****************************************************************************
2016-06-21 14:21:03 +02:00
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
*****************************************************************************/
/**************************************************//**
@file include/btr0cur.h
The index tree cursor
Created 10/16/1994 Heikki Tuuri
*******************************************************/
#ifndef btr0cur_h
#define btr0cur_h
#include "univ.i"
#include "dict0dict.h"
#include "page0cur.h"
#include "btr0types.h"
#include "gis0type.h"
/** Mode flags for btr_cur operations; these can be ORed */
enum {
/** do no undo logging */
BTR_NO_UNDO_LOG_FLAG = 1,
/** do no record lock checking */
BTR_NO_LOCKING_FLAG = 2,
/** sys fields will be found in the update vector or inserted
entry */
BTR_KEEP_SYS_FLAG = 4,
MDEV-10139 Support for InnoDB SEQUENCE objects We introduce a NO_ROLLBACK flag for InnoDB tables. This flag only works for tables that have a single index. Apart from undo logging, this flag will also prevent locking and the assignment of DB_ROW_ID or DB_TRX_ID, and imply READ UNCOMMITTED isolation. It is assumed that the SQL layer is guaranteeing mutual exclusion. After the initial insert of the single record during CREATE SEQUENCE, InnoDB will be updating the single record in-place. This is crash-safe thanks to the redo log. (That is, after a crash after CREATE SEQUENCE was committed, the effect of sequence operations will be observable fully or not at all.) When it comes to the durability of the updates of SEQUENCE in InnoDB, there is a clear analogy to MDEV-6076 Persistent AUTO_INCREMENT. The updates would be made persistent by the InnoDB redo log flush at transaction commit or rollback (or XA PREPARE), provided that innodb_log_flush_at_trx_commit=1. Similar to AUTO_INCREMENT, it is possible that the update of a SEQUENCE in a middle of transaction becomes durable before the COMMIT/ROLLBACK of the transaction, in case the InnoDB redo log is being flushed as a result of the a commit or rollback of some other transaction, or as a result of a redo log checkpoint that can be initiated at any time by operations that are writing redo log. dict_table_t::no_rollback(): Check if the table does not support rollback. BTR_NO_ROLLBACK: Logging and locking flags for no_rollback() tables. DICT_TF_BITS: Add the NO_ROLLBACK flag. row_ins_step(): Assign 0 to DB_ROW_ID and DB_TRX_ID, and skip any locking for no-rollback tables. There will be only a single row in no-rollback tables (or there must be a proper PRIMARY KEY). row_search_mvcc(): Execute the READ UNCOMMITTED code path for no-rollback tables. ha_innobase::external_lock(), ha_innobase::store_lock(): Block CREATE/DROP SEQUENCE in innodb_read_only mode. This probably has no effect for CREATE SEQUENCE, because already ha_innobase::create() should have been called (and refused) before external_lock() or store_lock() is called. ha_innobase::store_lock(): For CREATE SEQUENCE, do not acquire any InnoDB locks, even though TL_WRITE is being requested. (This is just a performance optimization.) innobase_copy_frm_flags_from_create_info(), row_drop_table_for_mysql(): Disable persistent statistics for no_rollback tables.
2017-03-27 18:58:16 +03:00
/** no rollback */
BTR_NO_ROLLBACK = BTR_NO_UNDO_LOG_FLAG
| BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG,
/** btr_cur_pessimistic_update() must keep cursor position
when moving columns to big_rec */
BTR_KEEP_POS_FLAG = 8,
/** the caller is creating the index or wants to bypass the
index->info.online creation log */
BTR_CREATE_FLAG = 16,
/** the caller of btr_cur_optimistic_update() or
btr_cur_update_in_place() will take care of
updating IBUF_BITMAP_FREE */
BTR_KEEP_IBUF_BITMAP = 32
};
/* btr_cur_latch_leaves() returns latched blocks and savepoints. */
struct btr_latch_leaves_t {
/* left block, target block and right block */
buf_block_t* blocks[3];
ulint savepoints[3];
};
#include "que0types.h"
#include "row0types.h"
#include "ha0ha.h"
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@return pointer to page cursor component */
UNIV_INLINE
page_cur_t*
btr_cur_get_page_cur(
/*=================*/
const btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the buffer block on which the tree cursor is positioned.
@return pointer to buffer block */
UNIV_INLINE
buf_block_t*
btr_cur_get_block(
/*==============*/
const btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the record pointer of a tree cursor.
@return pointer to record */
UNIV_INLINE
rec_t*
btr_cur_get_rec(
/*============*/
const btr_cur_t* cursor);/*!< in: tree cursor */
#else /* UNIV_DEBUG */
# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
# define btr_cur_get_block(cursor) ((cursor)->page_cur.block)
# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec)
#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the compressed page on which the tree cursor is positioned.
@return pointer to compressed page, or NULL if the page is not compressed */
UNIV_INLINE
page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the page of a tree cursor.
@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the index of a cursor.
@param cursor b-tree cursor
@return index */
#define btr_cur_get_index(cursor) ((cursor)->index)
/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE
void
btr_cur_position(
/*=============*/
dict_index_t* index, /*!< in: index */
rec_t* rec, /*!< in: record in tree */
buf_block_t* block, /*!< in: buffer block of rec */
btr_cur_t* cursor);/*!< in: cursor */
MDEV-11369 Instant ADD COLUMN for InnoDB For InnoDB tables, adding, dropping and reordering columns has required a rebuild of the table and all its indexes. Since MySQL 5.6 (and MariaDB 10.0) this has been supported online (LOCK=NONE), allowing concurrent modification of the tables. This work revises the InnoDB ROW_FORMAT=REDUNDANT, ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC so that columns can be appended instantaneously, with only minor changes performed to the table structure. The counter innodb_instant_alter_column in INFORMATION_SCHEMA.GLOBAL_STATUS is incremented whenever a table rebuild operation is converted into an instant ADD COLUMN operation. ROW_FORMAT=COMPRESSED tables will not support instant ADD COLUMN. Some usability limitations will be addressed in subsequent work: MDEV-13134 Introduce ALTER TABLE attributes ALGORITHM=NOCOPY and ALGORITHM=INSTANT MDEV-14016 Allow instant ADD COLUMN, ADD INDEX, LOCK=NONE The format of the clustered index (PRIMARY KEY) is changed as follows: (1) The FIL_PAGE_TYPE of the root page will be FIL_PAGE_TYPE_INSTANT, and a new field PAGE_INSTANT will contain the original number of fields in the clustered index ('core' fields). If instant ADD COLUMN has not been used or the table becomes empty, or the very first instant ADD COLUMN operation is rolled back, the fields PAGE_INSTANT and FIL_PAGE_TYPE will be reset to 0 and FIL_PAGE_INDEX. (2) A special 'default row' record is inserted into the leftmost leaf, between the page infimum and the first user record. This record is distinguished by the REC_INFO_MIN_REC_FLAG, and it is otherwise in the same format as records that contain values for the instantly added columns. This 'default row' always has the same number of fields as the clustered index according to the table definition. The values of 'core' fields are to be ignored. For other fields, the 'default row' will contain the default values as they were during the ALTER TABLE statement. (If the column default values are changed later, those values will only be stored in the .frm file. The 'default row' will contain the original evaluated values, which must be the same for every row.) The 'default row' must be completely hidden from higher-level access routines. Assertions have been added to ensure that no 'default row' is ever present in the adaptive hash index or in locked records. The 'default row' is never delete-marked. (3) In clustered index leaf page records, the number of fields must reside between the number of 'core' fields (dict_index_t::n_core_fields introduced in this work) and dict_index_t::n_fields. If the number of fields is less than dict_index_t::n_fields, the missing fields are replaced with the column value of the 'default row'. Note: The number of fields in the record may shrink if some of the last instantly added columns are updated to the value that is in the 'default row'. The function btr_cur_trim() implements this 'compression' on update and rollback; dtuple::trim() implements it on insert. (4) In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC records, the new status value REC_STATUS_COLUMNS_ADDED will indicate the presence of a new record header that will encode n_fields-n_core_fields-1 in 1 or 2 bytes. (In ROW_FORMAT=REDUNDANT records, the record header always explicitly encodes the number of fields.) We introduce the undo log record type TRX_UNDO_INSERT_DEFAULT for covering the insert of the 'default row' record when instant ADD COLUMN is used for the first time. Subsequent instant ADD COLUMN can use TRX_UNDO_UPD_EXIST_REC. This is joint work with Vin Chen (陈福荣) from Tencent. The design that was discussed in April 2017 would not have allowed import or export of data files, because instead of the 'default row' it would have introduced a data dictionary table. The test rpl.rpl_alter_instant is exactly as contributed in pull request #408. The test innodb.instant_alter is based on a contributed test. The redo log record format changes for ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPACT are as contributed. (With this change present, crash recovery from MariaDB 10.3.1 will fail in spectacular ways!) Also the semantics of higher-level redo log records that modify the PAGE_INSTANT field is changed. The redo log format version identifier was already changed to LOG_HEADER_FORMAT_CURRENT=103 in MariaDB 10.3.1. Everything else has been rewritten by me. Thanks to Elena Stepanova, the code has been tested extensively. When rolling back an instant ADD COLUMN operation, we must empty the PAGE_FREE list after deleting or shortening the 'default row' record, by calling either btr_page_empty() or btr_page_reorganize(). We must know the size of each entry in the PAGE_FREE list. If rollback left a freed copy of the 'default row' in the PAGE_FREE list, we would be unable to determine its size (if it is in ROW_FORMAT=COMPACT or ROW_FORMAT=DYNAMIC) because it would contain more fields than the rolled-back definition of the clustered index. UNIV_SQL_DEFAULT: A new special constant that designates an instantly added column that is not present in the clustered index record. len_is_stored(): Check if a length is an actual length. There are two magic length values: UNIV_SQL_DEFAULT, UNIV_SQL_NULL. dict_col_t::def_val: The 'default row' value of the column. If the column is not added instantly, def_val.len will be UNIV_SQL_DEFAULT. dict_col_t: Add the accessors is_virtual(), is_nullable(), is_instant(), instant_value(). dict_col_t::remove_instant(): Remove the 'instant ADD' status of a column. dict_col_t::name(const dict_table_t& table): Replaces dict_table_get_col_name(). dict_index_t::n_core_fields: The original number of fields. For secondary indexes and if instant ADD COLUMN has not been used, this will be equal to dict_index_t::n_fields. dict_index_t::n_core_null_bytes: Number of bytes needed to represent the null flags; usually equal to UT_BITS_IN_BYTES(n_nullable). dict_index_t::NO_CORE_NULL_BYTES: Magic value signalling that n_core_null_bytes was not initialized yet from the clustered index root page. dict_index_t: Add the accessors is_instant(), is_clust(), get_n_nullable(), instant_field_value(). dict_index_t::instant_add_field(): Adjust clustered index metadata for instant ADD COLUMN. dict_index_t::remove_instant(): Remove the 'instant ADD' status of a clustered index when the table becomes empty, or the very first instant ADD COLUMN operation is rolled back. dict_table_t: Add the accessors is_instant(), is_temporary(), supports_instant(). dict_table_t::instant_add_column(): Adjust metadata for instant ADD COLUMN. dict_table_t::rollback_instant(): Adjust metadata on the rollback of instant ADD COLUMN. prepare_inplace_alter_table_dict(): First create the ctx->new_table, and only then decide if the table really needs to be rebuilt. We must split the creation of table or index metadata from the creation of the dictionary table records and the creation of the data. In this way, we can transform a table-rebuilding operation into an instant ADD COLUMN operation. Dictionary objects will only be added to cache when table rebuilding or index creation is needed. The ctx->instant_table will never be added to cache. dict_table_t::add_to_cache(): Modified and renamed from dict_table_add_to_cache(). Do not modify the table metadata. Let the callers invoke dict_table_add_system_columns() and if needed, set can_be_evicted. dict_create_sys_tables_tuple(), dict_create_table_step(): Omit the system columns (which will now exist in the dict_table_t object already at this point). dict_create_table_step(): Expect the callers to invoke dict_table_add_system_columns(). pars_create_table(): Before creating the table creation execution graph, invoke dict_table_add_system_columns(). row_create_table_for_mysql(): Expect all callers to invoke dict_table_add_system_columns(). create_index_dict(): Replaces row_merge_create_index_graph(). innodb_update_n_cols(): Renamed from innobase_update_n_virtual(). Call my_error() if an error occurs. btr_cur_instant_init(), btr_cur_instant_init_low(), btr_cur_instant_root_init(): Load additional metadata from the clustered index and set dict_index_t::n_core_null_bytes. This is invoked when table metadata is first loaded into the data dictionary. dict_boot(): Initialize n_core_null_bytes for the four hard-coded dictionary tables. dict_create_index_step(): Initialize n_core_null_bytes. This is executed as part of CREATE TABLE. dict_index_build_internal_clust(): Initialize n_core_null_bytes to NO_CORE_NULL_BYTES if table->supports_instant(). row_create_index_for_mysql(): Initialize n_core_null_bytes for CREATE TEMPORARY TABLE. commit_cache_norebuild(): Call the code to rename or enlarge columns in the cache only if instant ADD COLUMN is not being used. (Instant ADD COLUMN would copy all column metadata from instant_table to old_table, including the names and lengths.) PAGE_INSTANT: A new 13-bit field for storing dict_index_t::n_core_fields. This is repurposing the 16-bit field PAGE_DIRECTION, of which only the least significant 3 bits were used. The original byte containing PAGE_DIRECTION will be accessible via the new constant PAGE_DIRECTION_B. page_get_instant(), page_set_instant(): Accessors for the PAGE_INSTANT. page_ptr_get_direction(), page_get_direction(), page_ptr_set_direction(): Accessors for PAGE_DIRECTION. page_direction_reset(): Reset PAGE_DIRECTION, PAGE_N_DIRECTION. page_direction_increment(): Increment PAGE_N_DIRECTION and set PAGE_DIRECTION. rec_get_offsets(): Use the 'leaf' parameter for non-debug purposes, and assume that heap_no is always set. Initialize all dict_index_t::n_fields for ROW_FORMAT=REDUNDANT records, even if the record contains fewer fields. rec_offs_make_valid(): Add the parameter 'leaf'. rec_copy_prefix_to_dtuple(): Assert that the tuple is only built on the core fields. Instant ADD COLUMN only applies to the clustered index, and we should never build a search key that has more than the PRIMARY KEY and possibly DB_TRX_ID,DB_ROLL_PTR. All these columns are always present. dict_index_build_data_tuple(): Remove assertions that would be duplicated in rec_copy_prefix_to_dtuple(). rec_init_offsets(): Support ROW_FORMAT=REDUNDANT records whose number of fields is between n_core_fields and n_fields. cmp_rec_rec_with_match(): Implement the comparison between two MIN_REC_FLAG records. trx_t::in_rollback: Make the field available in non-debug builds. trx_start_for_ddl_low(): Remove dangerous error-tolerance. A dictionary transaction must be flagged as such before it has generated any undo log records. This is because trx_undo_assign_undo() will mark the transaction as a dictionary transaction in the undo log header right before the very first undo log record is being written. btr_index_rec_validate(): Account for instant ADD COLUMN row_undo_ins_remove_clust_rec(): On the rollback of an insert into SYS_COLUMNS, revert instant ADD COLUMN in the cache by removing the last column from the table and the clustered index. row_search_on_row_ref(), row_undo_mod_parse_undo_rec(), row_undo_mod(), trx_undo_update_rec_get_update(): Handle the 'default row' as a special case. dtuple_t::trim(index): Omit a redundant suffix of an index tuple right before insert or update. After instant ADD COLUMN, if the last fields of a clustered index tuple match the 'default row', there is no need to store them. While trimming the entry, we must hold a page latch, so that the table cannot be emptied and the 'default row' be deleted. btr_cur_optimistic_update(), btr_cur_pessimistic_update(), row_upd_clust_rec_by_insert(), row_ins_clust_index_entry_low(): Invoke dtuple_t::trim() if needed. row_ins_clust_index_entry(): Restore dtuple_t::n_fields after calling row_ins_clust_index_entry_low(). rec_get_converted_size(), rec_get_converted_size_comp(): Allow the number of fields to be between n_core_fields and n_fields. Do not support infimum,supremum. They are never supposed to be stored in dtuple_t, because page creation nowadays uses a lower-level method for initializing them. rec_convert_dtuple_to_rec_comp(): Assign the status bits based on the number of fields. btr_cur_trim(): In an update, trim the index entry as needed. For the 'default row', handle rollback specially. For user records, omit fields that match the 'default row'. btr_cur_optimistic_delete_func(), btr_cur_pessimistic_delete(): Skip locking and adaptive hash index for the 'default row'. row_log_table_apply_convert_mrec(): Replace 'default row' values if needed. In the temporary file that is applied by row_log_table_apply(), we must identify whether the records contain the extra header for instantly added columns. For now, we will allocate an additional byte for this for ROW_T_INSERT and ROW_T_UPDATE records when the source table has been subject to instant ADD COLUMN. The ROW_T_DELETE records are fine, as they will be converted and will only contain 'core' columns (PRIMARY KEY and some system columns) that are converted from dtuple_t. rec_get_converted_size_temp(), rec_init_offsets_temp(), rec_convert_dtuple_to_temp(): Add the parameter 'status'. REC_INFO_DEFAULT_ROW = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED: An info_bits constant for distinguishing the 'default row' record. rec_comp_status_t: An enum of the status bit values. rec_leaf_format: An enum that replaces the bool parameter of rec_init_offsets_comp_ordinary().
2017-10-06 07:00:05 +03:00
/** Load the instant ALTER TABLE metadata from the clustered index
when loading a table definition.
@param[in,out] table table definition from the data dictionary
@return error code
@retval DB_SUCCESS if no error occurred */
dberr_t
btr_cur_instant_init(dict_table_t* table)
ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
/** Initialize the n_core_null_bytes on first access to a clustered
index root page.
@param[in] index clustered index that is on its first access
@param[in] page clustered index root page
@return whether the page is corrupted */
bool
btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
/** Optimistically latches the leaf page or pages requested.
@param[in] block guessed buffer block
@param[in] modify_clock modify clock value
@param[in,out] latch_mode BTR_SEARCH_LEAF, ...
@param[in,out] cursor cursor
@param[in] file file name
@param[in] line line where called
@param[in] mtr mini-transaction
@return true if success */
bool
btr_cur_optimistic_latch_leaves(
buf_block_t* block,
ib_uint64_t modify_clock,
ulint* latch_mode,
btr_cur_t* cursor,
const char* file,
unsigned line,
mtr_t* mtr);
/********************************************************************//**
Searches an index tree and positions a tree cursor on a given level.
NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
to node pointer page number fields on the upper levels of the tree!
Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
dberr_t
btr_cur_search_to_nth_level_func(
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the tree level of search */
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
tuple must be set so that it cannot get
compared to the node ptr page number field! */
page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...;
NOTE that if the search is made using a unique
prefix of a record, mode should be PAGE_CUR_LE,
not PAGE_CUR_GE, as the latter may end up on
the previous page of the record! Inserts
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
at most one of BTR_INSERT, BTR_DELETE_MARK,
BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
NOTE that if ahi_latch, we might not have a
cursor page latch, we assume that ahi_latch
protects the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
#ifdef BTR_CUR_HASH_ADAPT
rw_lock_t* ahi_latch,
/*!< in: currently held btr_search_latch
(in RW_S_LATCH mode), or NULL */
#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with the notable difference that the file format changes are limited to repurposing a previously unused data field in B-tree pages. For persistent InnoDB tables, write the last used AUTO_INCREMENT value to the root page of the clustered index, in the previously unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC. Unlike some other previously unused InnoDB data fields, this one was actually always zero-initialized, at least since MySQL 3.23.49. The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the root page. The SX latch will allow concurrent read access to the root page. (The field PAGE_ROOT_AUTO_INC will only be read on the first-time call to ha_innobase::open() from the SQL layer. The PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so read/write races are not possible.) During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level function btr_cur_search_to_nth_level(), adding no extra page access. [Adaptive hash index lookup will be disabled during INSERT.] If some rare UPDATE modifies an AUTO_INCREMENT column, the PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in ha_innobase::update_row(). When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC field. During ALTER TABLE, the initial AUTO_INCREMENT value will be copied from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will update PAGE_ROOT_AUTO_INC in real time. innodb_col_no(): Determine the dict_table_t::cols[] element index corresponding to a Field of a non-virtual column. (The MySQL 5.7 implementation of virtual columns breaks the 1:1 relationship between Field::field_index and dict_table_t::cols[]. Virtual columns are omitted from dict_table_t::cols[]. Therefore, we must translate the field_index of AUTO_INCREMENT columns into an index of dict_table_t::cols[].) Upgrade from old data files: By default, the AUTO_INCREMENT sequence in old data files would appear to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain the value 0 in each clustered index page. In new data files, PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain any AUTO_INCREMENT column. For backward compatibility, we use the old method of SELECT MAX(auto_increment_column) for initializing the sequence. btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format data file. btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc() that will resort to reading MAX(auto_increment_column) for data files that did not use AUTO_INCREMENT yet. It was manually tested that during the execution of innodb.autoinc_persist the compatibility logic is not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty clustered index root pages). initialize_auto_increment(): Replaces ha_innobase::innobase_initialize_autoinc(). This initializes the AUTO_INCREMENT metadata. Only called from ha_innobase::open(). ha_innobase::info_low(): Do not try to lazily initialize dict_table_t::autoinc. It must already have been initialized by ha_innobase::open() or ha_innobase::create(). Note: The adjustments to class ha_innopart were not tested, because the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 19:56:39 +02:00
mtr_t* mtr, /*!< in/out: mini-transaction */
ib_uint64_t autoinc = 0);
/*!< in: PAGE_ROOT_AUTO_INC to be written
(0 if none) */
#ifdef BTR_CUR_HASH_ADAPT
# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,a,fi,li,mtr)
#else /* BTR_CUR_HASH_ADAPT */
# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,fi,li,mtr)
#endif /* BTR_CUR_HASH_ADAPT */
/*****************************************************************//**
Opens a cursor at either end of an index.
@return DB_SUCCESS or error code */
dberr_t
btr_cur_open_at_index_side_func(
/*============================*/
bool from_left, /*!< in: true if open to the low end,
false if to the high end */
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: latch mode */
btr_cur_t* cursor, /*!< in/out: cursor */
ulint level, /*!< in: level to search for
(0=leaf) */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull));
#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
/**********************************************************************//**
Positions a cursor at a randomly chosen position within a B-tree.
@return true if the index is available and we have put the cursor, false
if the index is unavailable */
bool
btr_cur_open_at_rnd_pos_func(
/*=========================*/
dict_index_t* index, /*!< in: index */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
btr_cur_t* cursor, /*!< in/out: B-tree cursor */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
#define btr_cur_open_at_rnd_pos(i,l,c,m) \
btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
/*************************************************************//**
Tries to perform an insert to a page in an index tree, next to cursor.
It is assumed that mtr holds an x-latch on the page. The operation does
not succeed if there is too little space on the page. If there is just
one record on the page, the insert will always succeed; this is to
prevent trying to split a page with just one record.
@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
dberr_t
btr_cur_optimistic_insert(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
cursor stays valid */
ulint** offsets,/*!< out: offsets on *rec */
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
mem_heap_t** heap, /*!< in/out: pointer to memory heap */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
be stored externally by the caller */
ulint n_ext, /*!< in: number of externally stored columns */
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
que_thr_t* thr, /*!< in/out: query thread; can be NULL if
!(~flags
& (BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG)) */
mtr_t* mtr) /*!< in/out: mini-transaction;
if this function returns DB_SUCCESS on
a leaf page of a secondary index in a
compressed tablespace, the caller must
mtr_commit(mtr) before latching
any further pages */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
Performs an insert on a page of an index tree. It is assumed that mtr
holds an x-latch on the tree and on the cursor page. If the insert is
made on the leaf level, to avoid deadlocks, mtr must also own x-latches
to brothers of page, if those brothers exist.
@return DB_SUCCESS or error number */
dberr_t
btr_cur_pessimistic_insert(
/*=======================*/
ulint flags, /*!< in: undo logging and locking flags: if not
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
btr_cur_t* cursor, /*!< in: cursor after which to insert;
cursor stays valid */
ulint** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
that can be emptied */
dtuple_t* entry, /*!< in/out: entry to insert */
rec_t** rec, /*!< out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
be stored externally by the caller */
ulint n_ext, /*!< in: number of externally stored columns */
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
que_thr_t* thr, /*!< in/out: query thread; can be NULL if
!(~flags
& (BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG)) */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
/*************************************************************//**
See if there is enough place in the page modification log to log
an update-in-place.
@retval false if out of space; IBUF_BITMAP_FREE will be reset
outside mtr if the page was recompressed
@retval true if enough place;
IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
a secondary index leaf page. This has to be done either within the
same mini-transaction, or by invoking ibuf_reset_free_bits() before
mtr_commit(mtr). */
bool
btr_cur_update_alloc_zip_func(
/*==========================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page */
page_cur_t* cursor, /*!< in/out: B-tree page cursor */
dict_index_t* index, /*!< in: the index corresponding to cursor */
#ifdef UNIV_DEBUG
ulint* offsets,/*!< in/out: offsets of the cursor record */
#endif /* UNIV_DEBUG */
ulint length, /*!< in: size needed */
bool create, /*!< in: true=delete-and-insert,
false=update-in-place */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr)
#else /* UNIV_DEBUG */
# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr) \
btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr)
#endif /* UNIV_DEBUG */
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
@return locking or undo log related error code, or
@retval DB_SUCCESS on success
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
dberr_t
btr_cur_update_in_place(
/*====================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
const upd_t* update, /*!< in: update vector */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in/out: mini-transaction; if this
is a secondary index, the caller must
mtr_commit(mtr) before latching any
further pages */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((warn_unused_result, nonnull));
2014-02-26 19:23:04 +01:00
/***********************************************************//**
Writes a redo log record of updating a record in-place. */
void
btr_cur_update_in_place_log(
/*========================*/
ulint flags, /*!< in: flags */
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: index of the record */
const upd_t* update, /*!< in: update vector */
trx_id_t trx_id, /*!< in: transaction id */
roll_ptr_t roll_ptr, /*!< in: roll ptr */
mtr_t* mtr) /*!< in: mtr */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull));
/*************************************************************//**
Tries to update a record on a page in an index tree. It is assumed that mtr
holds an x-latch on the page. The operation does not succeed if there is too
little space on the page or if the update would result in too empty a page,
so that tree compression is recommended.
@return error code, including
@retval DB_SUCCESS on success
@retval DB_OVERFLOW if the updated record does not fit
@retval DB_UNDERFLOW if the page would become too empty
@retval DB_ZIP_OVERFLOW if there is not enough space left
on the compressed page */
dberr_t
btr_cur_optimistic_update(
/*======================*/
ulint flags, /*!< in: undo logging and locking flags */
btr_cur_t* cursor, /*!< in: cursor on the record to update;
cursor stays valid and positioned on the
same record */
ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
const upd_t* update, /*!< in: update vector; this must also
contain trx id and roll ptr fields */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in/out: mini-transaction; if this
is a secondary index, the caller must
mtr_commit(mtr) before latching any
further pages */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((warn_unused_result, nonnull));
/*************************************************************//**
Performs an update of a record on a page of a tree. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. If the
update is made on the leaf level, to avoid deadlocks, mtr must also
own x-latches to brothers of page, if those brothers exist.
@return DB_SUCCESS or error code */
dberr_t
btr_cur_pessimistic_update(
/*=======================*/
ulint flags, /*!< in: undo logging, locking, and rollback
flags */
btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
cursor may become invalid if *big_rec == NULL
|| !(flags & BTR_KEEP_POS_FLAG) */
ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
mem_heap_t** offsets_heap,
/*!< in/out: pointer to memory heap
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
that can be emptied */
mem_heap_t* entry_heap,
/*!< in/out: memory heap for allocating
big_rec and the index tuple */
big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
MDEV-12358 Work around what looks like a bug in GCC 7.1.0 The parameter thr of the function btr_cur_optimistic_insert() is not declared as nonnull, but GCC 7.1.0 with -O3 is wrongly optimizing away the first part of the condition UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes) when the function is being called by row_merge_insert_index_tuples() with thr==NULL. The fake_changes is an XtraDB addition. This GCC bug only appears to have an impact on XtraDB, not InnoDB. We work around the problem by not attempting to dereference thr when both BTR_NO_LOCKING_FLAG and BTR_NO_UNDO_LOG_FLAG are set in the flags. Probably BTR_NO_LOCKING_FLAG alone should suffice. btr_cur_optimistic_insert(), btr_cur_pessimistic_insert(), btr_cur_pessimistic_update(): Correct comments that disagree with usage and with nonnull attributes. No other parameter than thr can actually be NULL. row_ins_duplicate_error_in_clust(): Remove an unused parameter. innobase_is_fake_change(): Unused function; remove. ibuf_insert_low(), row_log_table_apply(), row_log_apply(), row_undo_mod_clust_low(): Because we will be passing BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG in the flags, the trx->fake_changes flag will be treated as false, which is the right thing to do at these low-level operations (change buffer merge, ALTER TABLE…LOCK=NONE, or ROLLBACK). This might be fixing actual XtraDB bugs. Other callers that pass these two flags are also passing thr=NULL, implying fake_changes=false. (Some callers in ROLLBACK are passing BTR_NO_LOCKING_FLAG and a nonnull thr. In these callers, fake_changes better be false, to avoid corruption.)
2017-05-17 14:08:08 +03:00
be stored externally by the caller */
upd_t* update, /*!< in/out: update vector; this is allowed to
also contain trx id and roll ptr fields.
Non-updated columns that are moved offpage will
be appended to this. */
ulint cmpl_info,/*!< in: compiler info on secondary index
updates */
que_thr_t* thr, /*!< in: query thread */
trx_id_t trx_id, /*!< in: transaction id */
mtr_t* mtr) /*!< in/out: mini-transaction; must be committed
before latching any further pages */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((warn_unused_result, nonnull));
/***********************************************************//**
Marks a clustered index record deleted. Writes an undo log record to
undo log on this delete marking. Writes in the trx id field the id
of the deleting transaction, and in the roll ptr field pointer to the
undo log record created.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_clust_rec(
/*===========================*/
buf_block_t* block, /*!< in/out: buffer block of the record */
rec_t* rec, /*!< in/out: record */
dict_index_t* index, /*!< in: clustered index of the record */
const ulint* offsets,/*!< in: rec_get_offsets(rec) */
que_thr_t* thr, /*!< in: query thread */
const dtuple_t* entry, /*!< in: dtuple for the deleting record */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull, warn_unused_result));
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
dberr_t
btr_cur_del_mark_set_sec_rec(
/*=========================*/
ulint flags, /*!< in: locking flag */
btr_cur_t* cursor, /*!< in: cursor */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
deadlocks, mtr must also own x-latches to brothers of page, if those
brothers exist. NOTE: it is assumed that the caller has reserved enough
free extents so that the compression will always succeed if done!
@return TRUE if compression occurred */
ibool
btr_cur_compress_if_useful(
/*=======================*/
btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
cursor does not stay valid if compression
occurs */
ibool adjust, /*!< in: TRUE if should adjust the
cursor position even if compression occurs */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull));
/*******************************************************//**
Removes the record on which the tree cursor is positioned. It is assumed
that the mtr has an x-latch on the page where the cursor is positioned,
but no latch on the whole tree.
@return TRUE if success, i.e., the page did not become too empty */
ibool
btr_cur_optimistic_delete_func(
/*===========================*/
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
cursor stays valid: if deletion succeeds,
on function exit it points to the successor
of the deleted record */
# ifdef UNIV_DEBUG
ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
# endif /* UNIV_DEBUG */
mtr_t* mtr) /*!< in: mtr; if this function returns
TRUE on a leaf page of a secondary
index, the mtr must be committed
before latching any further pages */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull, warn_unused_result));
# ifdef UNIV_DEBUG
# define btr_cur_optimistic_delete(cursor, flags, mtr) \
btr_cur_optimistic_delete_func(cursor, flags, mtr)
# else /* UNIV_DEBUG */
# define btr_cur_optimistic_delete(cursor, flags, mtr) \
btr_cur_optimistic_delete_func(cursor, mtr)
# endif /* UNIV_DEBUG */
/*************************************************************//**
Removes the record on which the tree cursor is positioned. Tries
to compress the page if its fillfactor drops below a threshold
or if it is the only page on the level. It is assumed that mtr holds
an x-latch on the tree and on the cursor page. To avoid deadlocks,
mtr must also own x-latches to brothers of page, if those brothers
exist.
@return TRUE if compression occurred */
ibool
btr_cur_pessimistic_delete(
/*=======================*/
dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
the latter may occur because we may have
to update node pointers on upper levels,
and in the case of variable length keys
these may actually grow in size */
ibool has_reserved_extents, /*!< in: TRUE if the
caller has already reserved enough free
extents so that he knows that the operation
will succeed */
btr_cur_t* cursor, /*!< in: cursor on the record to delete;
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
bool rollback,/*!< in: performing rollback? */
mtr_t* mtr) /*!< in: mtr */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Parses a redo log record of updating a record in-place.
@return end of log record or NULL */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
dict_index_t* index); /*!< in: index corresponding to page */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a clustered
index record.
@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
dict_index_t* index); /*!< in: index corresponding to page */
/****************************************************************//**
Parses the redo log record for delete marking or unmarking of a secondary
index record.
@return end of log record or NULL */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
byte* ptr, /*!< in: buffer */
byte* end_ptr,/*!< in: buffer end */
page_t* page, /*!< in/out: page or NULL */
page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
/** Estimates the number of rows in a given index range.
@param[in] index index
@param[in] tuple1 range start, may also be empty tuple
@param[in] mode1 search mode for range start
@param[in] tuple2 range end, may also be empty tuple
@param[in] mode2 search mode for range end
@return estimated number of rows */
int64_t
btr_estimate_n_rows_in_range(
dict_index_t* index,
const dtuple_t* tuple1,
page_cur_mode_t mode1,
const dtuple_t* tuple2,
page_cur_mode_t mode2);
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
0..n_uniq-1) and the number of pages that were sampled is saved in
index->stat_n_sample_sizes[].
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals.
@return true if the index is available and we get the estimated numbers,
false if the index is unavailable. */
bool
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /*!< in: index */
2014-06-09 18:16:00 +02:00
/** Gets the externally stored size of a record, in units of a database page.
@param[in] rec record
@param[in] offsets array returned by rec_get_offsets()
@return externally stored part, in units of a database page */
ulint
btr_rec_get_externally_stored_len(
const rec_t* rec,
const ulint* offsets);
/*******************************************************************//**
Marks non-updated off-page fields as disowned by this record. The ownership
must be transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
to free the field. */
void
btr_cur_disown_inherited_fields(
/*============================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
mtr_t* mtr) /*!< in/out: mini-transaction */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull(2,3,4,5,6)));
/** Operation code for btr_store_big_rec_extern_fields(). */
enum blob_op {
/** Store off-page columns for a freshly inserted record */
BTR_STORE_INSERT = 0,
/** Store off-page columns for an insert by update */
BTR_STORE_INSERT_UPDATE,
/** Store off-page columns for an update */
BTR_STORE_UPDATE,
/** Store off-page columns for a freshly inserted record by bulk */
BTR_STORE_INSERT_BULK
};
/*******************************************************************//**
Determine if an operation on off-page columns is an update.
@return TRUE if op != BTR_STORE_INSERT */
UNIV_INLINE
ibool
btr_blob_op_is_update(
/*==================*/
enum blob_op op) /*!< in: operation */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree.
@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
dberr_t
btr_store_big_rec_extern_fields(
/*============================*/
btr_pcur_t* pcur, /*!< in/out: a persistent cursor. if
btr_mtr is restarted, then this can
be repositioned. */
ulint* offsets, /*!< in/out: rec_get_offsets() on
pcur. the "external storage" flags
in offsets will correctly correspond
to rec when this function returns */
const big_rec_t*big_rec_vec, /*!< in: vector containing fields
to be stored externally */
mtr_t* btr_mtr, /*!< in/out: mtr containing the
latches to the clustered index. can be
committed and restarted. */
enum blob_op op) /*! in: operation code */
MY_ATTRIBUTE((warn_unused_result));
/*******************************************************************//**
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
in a rollback we may have the additional condition that the field must
not be inherited. */
void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /*!< in: index of the data, the index
tree MUST be X-latched; if the tree
height is 1, then also the root page
must be X-latched! (this is relevant
in the case this function is called
from purge where 'data' is located on
an undo log page, not an index
page) */
byte* field_ref, /*!< in/out: field reference */
const rec_t* rec, /*!< in: record containing field_ref, for
page_zip_write_blob_ptr(), or NULL */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
or NULL */
page_zip_des_t* page_zip, /*!< in: compressed page corresponding
to rec, or NULL if rec == NULL */
ulint i, /*!< in: field number of field_ref;
ignored if rec == NULL */
bool rollback, /*!< in: performing rollback? */
mtr_t* local_mtr); /*!< in: mtr containing the latch */
/** Copies the prefix of an externally stored field of a record.
The clustered index record must be protected by a lock or a page latch.
@param[out] buf the field, or a prefix of it
@param[in] len length of buf, in bytes
@param[in] page_size BLOB page size
@param[in] data 'internally' stored part of the field
containing also the reference to the external part; must be protected by
a lock or a page latch
@param[in] local_len length of data, in bytes
@return the length of the copied field, or 0 if the column was being
or has been deleted */
ulint
btr_copy_externally_stored_field_prefix(
byte* buf,
ulint len,
const page_size_t& page_size,
const byte* data,
ulint local_len);
/** Copies an externally stored field of a record to mem heap.
The clustered index record must be protected by a lock or a page latch.
@param[out] len length of the whole field
@param[in] data 'internally' stored part of the field
containing also the reference to the external part; must be protected by
a lock or a page latch
@param[in] page_size BLOB page size
@param[in] local_len length of data
@param[in,out] heap mem heap
@return the whole field copied to heap */
byte*
btr_copy_externally_stored_field(
ulint* len,
const byte* data,
const page_size_t& page_size,
ulint local_len,
mem_heap_t* heap);
/** Copies an externally stored field of a record to mem heap.
@param[in] rec record in a clustered index; must be
protected by a lock or a page latch
@param[in] offset array returned by rec_get_offsets()
@param[in] page_size BLOB page size
@param[in] no field number
@param[out] len length of the field
@param[in,out] heap mem heap
@return the field copied to heap, or NULL if the field is incomplete */
byte*
btr_rec_copy_externally_stored_field(
const rec_t* rec,
const ulint* offsets,
const page_size_t& page_size,
ulint no,
ulint* len,
mem_heap_t* heap);
/*******************************************************************//**
Flags the data tuple fields that are marked as extern storage in the
update vector. We use this function to remember which fields we must
mark as extern storage in a record inserted for an update.
@return number of flagged external columns */
ulint
btr_push_update_extern_fields(
/*==========================*/
dtuple_t* tuple, /*!< in/out: data tuple */
const upd_t* update, /*!< in: update vector */
mem_heap_t* heap) /*!< in: memory heap */
2016-06-21 14:21:03 +02:00
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
void
btr_cur_set_deleted_flag_for_ibuf(
/*==============================*/
rec_t* rec, /*!< in/out: record */
page_zip_des_t* page_zip, /*!< in/out: compressed page
corresponding to rec, or NULL
when the tablespace is uncompressed */
ibool val, /*!< in: value to set */
mtr_t* mtr); /*!< in/out: mini-transaction */
/******************************************************//**
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
void
btr_rec_set_deleted_flag(
/*=====================*/
rec_t* rec, /*!< in/out: physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
ulint flag); /*!< in: nonzero if delete marked */
/** Latches the leaf page or pages requested.
@param[in] block leaf page where the search converged
@param[in] page_id page id of the leaf
@param[in] latch_mode BTR_SEARCH_LEAF, ...
@param[in] cursor cursor
@param[in] mtr mini-transaction
@return blocks and savepoints which actually latched. */
btr_latch_leaves_t
btr_cur_latch_leaves(
buf_block_t* block,
const page_id_t& page_id,
const page_size_t& page_size,
ulint latch_mode,
btr_cur_t* cursor,
mtr_t* mtr);
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
struct btr_path_t {
/* Assume a page like:
records: (inf, a, b, c, d, sup)
index of the record: 0, 1, 2, 3, 4, 5
*/
/** Index of the record where the page cursor stopped on this level
(index in alphabetical order). Value ULINT_UNDEFINED denotes array
end. In the above example, if the search stopped on record 'c', then
nth_rec will be 3. */
ulint nth_rec;
/** Number of the records on the page, not counting inf and sup.
In the above example n_recs will be 4. */
ulint n_recs;
/** Number of the page containing the record. */
ulint page_no;
/** Level of the page. If later we fetch the page under page_no
and it is no different level then we know that the tree has been
reorganized. */
ulint page_level;
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
/** Values for the flag documenting the used search method */
enum btr_cur_method {
BTR_CUR_HASH = 1, /*!< successful shortcut using
the hash index */
BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
binary search: the misleading hash
reference is stored in the field
hash_node, and might be necessary to
update */
BTR_CUR_BINARY, /*!< success using the binary search */
BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
the insert buffer */
BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
mark in the insert/delete buffer */
BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
the insert/delete buffer */
BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
};
/** The tree cursor: the definition appears here only for the compiler
to know struct size! */
struct btr_cur_t {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
BTR_SEARCH_PREV and
BTR_MODIFY_PREV */
/*------------------------------*/
que_thr_t* thr; /*!< this field is only used
when btr_cur_search_to_nth_level
is called for an index entry
insertion: the calling query
thread is passed here to be
used in the insert buffer */
/*------------------------------*/
/** The following fields are used in
btr_cur_search_to_nth_level to pass information: */
/* @{ */
enum btr_cur_method flag; /*!< Search method used */
ulint tree_height; /*!< Tree height if the search is done
for a pessimistic insert or update
operation */
ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
the number of matched fields to the
the first user record to the right of
the cursor record after
btr_cur_search_to_nth_level;
for the mode PAGE_CUR_GE, the matched
fields to the first user record AT THE
CURSOR or to the right of it;
NOTE that the up_match and low_match
values may exceed the correct values
for comparison to the adjacent user
record if that record is on a
different leaf page! (See the note in
row_ins_duplicate_error_in_clust.) */
ulint up_bytes; /*!< number of matched bytes to the
right at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint low_match; /*!< if search mode was PAGE_CUR_LE,
the number of matched fields to the
first user record AT THE CURSOR or
to the left of it after
btr_cur_search_to_nth_level;
NOT defined for PAGE_CUR_GE or any
other search modes; see also the NOTE
in up_match! */
ulint low_bytes; /*!< number of matched bytes to the
left at the time cursor positioned;
only used internally in searches: not
defined after the search */
ulint n_fields; /*!< prefix length used in a hash
search if hash_node != NULL */
ulint n_bytes; /*!< hash prefix bytes if hash_node !=
NULL */
ulint fold; /*!< fold value used in the search if
flag is BTR_CUR_HASH */
/* @} */
btr_path_t* path_arr; /*!< in estimating the number of
rows in range, we store in this array
information of the path through
the tree */
rtr_info_t* rtr_info; /*!< rtree search info */
btr_cur_t():thr(NULL), rtr_info(NULL) {}
/* default values */
};
/******************************************************//**
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
void
btr_rec_set_deleted_flag(
/*=====================*/
rec_t* rec, /*!< in/out: physical record */
page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
ulint flag); /*!< in: nonzero if delete marked */
/** If pessimistic delete fails because of lack of file space, there
is still a good change of success a little later. Try this many
times. */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
/** If pessimistic delete fails because of lack of file space, there
is still a good change of success a little later. Sleep this many
microseconds between retries. */
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/** The reference in a field for which data is stored on a different page.
The reference is at the end of the 'locally' stored part of the field.
'Locally' means storage in the index record.
We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*-------------------------------------- @{ */
#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
on that page */
#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
reserved to the flags below. */
/*-------------------------------------- @} */
/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
significant bit of the byte at smallest address) is set to 1 if this
field does not 'own' the externally stored field; only the owner field
is allowed to free the field in purge! */
#define BTR_EXTERN_OWNER_FLAG 128U
/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
second most significant bit of the byte at smallest address) is 1 then
it means that the externally stored field was inherited from an
earlier version of the row. In rollback we are not allowed to free an
inherited external field. */
#define BTR_EXTERN_INHERITED_FLAG 64U
/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_non_sea;
/** Old value of btr_cur_n_non_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_non_sea_old;
2017-02-23 23:05:12 +02:00
#ifdef BTR_CUR_HASH_ADAPT
/** Number of successful adaptive hash index lookups in
btr_cur_search_to_nth_level(). */
extern ulint btr_cur_n_sea;
/** Old value of btr_cur_n_sea. Copied by
srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
2017-02-23 23:05:12 +02:00
#endif /* BTR_CUR_HASH_ADAPT */
#ifdef UNIV_DEBUG
/* Flag to limit optimistic insert records */
extern uint btr_cur_limit_optimistic_insert_debug;
#endif /* UNIV_DEBUG */
#include "btr0cur.ic"
#endif