2009-05-27 11:45:59 +02:00
|
|
|
/*****************************************************************************
|
|
|
|
|
2016-08-12 10:17:45 +02:00
|
|
|
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
|
2009-05-27 11:45:59 +02:00
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
|
|
Foundation; version 2 of the License.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
2012-08-01 16:27:34 +02:00
|
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
2009-05-27 11:45:59 +02:00
|
|
|
|
|
|
|
*****************************************************************************/
|
|
|
|
|
|
|
|
/**************************************************//**
|
|
|
|
@file include/row0row.ic
|
|
|
|
General row routines
|
|
|
|
|
|
|
|
Created 4/20/1996 Heikki Tuuri
|
|
|
|
*******************************************************/
|
|
|
|
|
|
|
|
#include "dict0dict.h"
|
|
|
|
#include "rem0rec.h"
|
|
|
|
#include "trx0undo.h"
|
|
|
|
|
2011-05-24 13:11:21 +02:00
|
|
|
/*********************************************************************//**
|
2011-06-29 09:04:00 +02:00
|
|
|
Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
|
2011-05-24 13:11:21 +02:00
|
|
|
a clustered index record.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return offset of DATA_TRX_ID */
|
2011-05-24 13:11:21 +02:00
|
|
|
UNIV_INLINE
|
|
|
|
ulint
|
2011-06-29 09:04:00 +02:00
|
|
|
row_get_trx_id_offset(
|
|
|
|
/*==================*/
|
2011-05-24 13:11:21 +02:00
|
|
|
const dict_index_t* index, /*!< in: clustered index */
|
2011-06-29 09:04:00 +02:00
|
|
|
const ulint* offsets)/*!< in: record offsets */
|
2011-05-24 13:11:21 +02:00
|
|
|
{
|
|
|
|
ulint pos;
|
|
|
|
ulint offset;
|
|
|
|
ulint len;
|
|
|
|
|
|
|
|
ut_ad(dict_index_is_clust(index));
|
2011-06-29 09:04:00 +02:00
|
|
|
ut_ad(rec_offs_validate(NULL, index, offsets));
|
2011-05-24 13:11:21 +02:00
|
|
|
|
|
|
|
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
|
|
|
|
|
|
|
|
offset = rec_get_nth_field_offs(offsets, pos, &len);
|
|
|
|
|
|
|
|
ut_ad(len == DATA_TRX_ID_LEN);
|
|
|
|
|
|
|
|
return(offset);
|
|
|
|
}
|
|
|
|
|
2009-05-27 11:45:59 +02:00
|
|
|
/*********************************************************************//**
|
|
|
|
Reads the trx id field from a clustered index record.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return value of the field */
|
2009-05-27 11:45:59 +02:00
|
|
|
UNIV_INLINE
|
|
|
|
trx_id_t
|
|
|
|
row_get_rec_trx_id(
|
|
|
|
/*===============*/
|
2011-05-24 13:11:21 +02:00
|
|
|
const rec_t* rec, /*!< in: record */
|
|
|
|
const dict_index_t* index, /*!< in: clustered index */
|
|
|
|
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
|
2009-05-27 11:45:59 +02:00
|
|
|
{
|
|
|
|
ulint offset;
|
|
|
|
|
|
|
|
ut_ad(dict_index_is_clust(index));
|
|
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
|
|
|
|
offset = index->trx_id_offset;
|
|
|
|
|
|
|
|
if (!offset) {
|
2011-06-29 09:04:00 +02:00
|
|
|
offset = row_get_trx_id_offset(index, offsets);
|
2009-05-27 11:45:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return(trx_read_trx_id(rec + offset));
|
|
|
|
}
|
|
|
|
|
|
|
|
/*********************************************************************//**
|
|
|
|
Reads the roll pointer field from a clustered index record.
|
2016-08-12 10:17:45 +02:00
|
|
|
@return value of the field */
|
2009-05-27 11:45:59 +02:00
|
|
|
UNIV_INLINE
|
|
|
|
roll_ptr_t
|
|
|
|
row_get_rec_roll_ptr(
|
|
|
|
/*=================*/
|
2011-06-29 09:04:00 +02:00
|
|
|
const rec_t* rec, /*!< in: record */
|
|
|
|
const dict_index_t* index, /*!< in: clustered index */
|
|
|
|
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
|
2009-05-27 11:45:59 +02:00
|
|
|
{
|
|
|
|
ulint offset;
|
|
|
|
|
|
|
|
ut_ad(dict_index_is_clust(index));
|
|
|
|
ut_ad(rec_offs_validate(rec, index, offsets));
|
|
|
|
|
|
|
|
offset = index->trx_id_offset;
|
|
|
|
|
|
|
|
if (!offset) {
|
2011-06-29 09:04:00 +02:00
|
|
|
offset = row_get_trx_id_offset(index, offsets);
|
2009-05-27 11:45:59 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
|
|
|
|
}
|
|
|
|
|
2013-03-25 23:03:13 +01:00
|
|
|
/*****************************************************************//**
|
|
|
|
When an insert or purge to a table is performed, this function builds
|
|
|
|
the entry to be inserted into or purged from an index on the table.
|
|
|
|
@return index entry which should be inserted or purged, or NULL if the
|
|
|
|
externally stored columns in the clustered index record are
|
|
|
|
unavailable and ext != NULL */
|
|
|
|
UNIV_INLINE
|
|
|
|
dtuple_t*
|
|
|
|
row_build_index_entry(
|
|
|
|
/*==================*/
|
|
|
|
const dtuple_t* row, /*!< in: row which should be
|
|
|
|
inserted or purged */
|
|
|
|
const row_ext_t* ext, /*!< in: externally stored column
|
|
|
|
prefixes, or NULL */
|
|
|
|
dict_index_t* index, /*!< in: index on the table */
|
|
|
|
mem_heap_t* heap) /*!< in: memory heap from which
|
|
|
|
the memory for the index entry
|
|
|
|
is allocated */
|
|
|
|
{
|
|
|
|
dtuple_t* entry;
|
|
|
|
|
|
|
|
ut_ad(dtuple_check_typed(row));
|
2016-08-12 10:17:45 +02:00
|
|
|
entry = row_build_index_entry_low(row, ext, index, heap,
|
|
|
|
ROW_BUILD_NORMAL);
|
2013-03-25 23:03:13 +01:00
|
|
|
ut_ad(!entry || dtuple_check_typed(entry));
|
|
|
|
return(entry);
|
|
|
|
}
|
|
|
|
|
2009-05-27 11:45:59 +02:00
|
|
|
/*******************************************************************//**
|
|
|
|
Builds from a secondary index record a row reference with which we can
|
|
|
|
search the clustered index record. */
|
|
|
|
UNIV_INLINE
|
|
|
|
void
|
|
|
|
row_build_row_ref_fast(
|
|
|
|
/*===================*/
|
|
|
|
dtuple_t* ref, /*!< in/out: typed data tuple where the
|
|
|
|
reference is built */
|
|
|
|
const ulint* map, /*!< in: array of field numbers in rec
|
|
|
|
telling how ref should be built from
|
|
|
|
the fields of rec */
|
|
|
|
const rec_t* rec, /*!< in: record in the index; must be
|
|
|
|
preserved while ref is used, as we do
|
|
|
|
not copy field values to heap */
|
|
|
|
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
|
|
|
|
{
|
|
|
|
dfield_t* dfield;
|
|
|
|
const byte* field;
|
|
|
|
ulint len;
|
|
|
|
ulint ref_len;
|
|
|
|
ulint field_no;
|
|
|
|
ulint i;
|
|
|
|
|
|
|
|
ut_ad(rec_offs_validate(rec, NULL, offsets));
|
|
|
|
ut_ad(!rec_offs_any_extern(offsets));
|
|
|
|
ref_len = dtuple_get_n_fields(ref);
|
|
|
|
|
|
|
|
for (i = 0; i < ref_len; i++) {
|
|
|
|
dfield = dtuple_get_nth_field(ref, i);
|
|
|
|
|
|
|
|
field_no = *(map + i);
|
|
|
|
|
|
|
|
if (field_no != ULINT_UNDEFINED) {
|
|
|
|
|
|
|
|
field = rec_get_nth_field(rec, offsets,
|
|
|
|
field_no, &len);
|
|
|
|
dfield_set_data(dfield, field, len);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
|
|
|
|
/** Parse the integer data from specified data, which could be
|
|
|
|
DATA_INT, DATA_FLOAT or DATA_DOUBLE. If the value is less than 0
|
|
|
|
and the type is not unsigned then we reset the value to 0
|
|
|
|
@param[in] data data to read
|
|
|
|
@param[in] len length of data
|
|
|
|
@param[in] mtype mtype of data
|
|
|
|
@param[in] unsigned_type if the data is unsigned
|
|
|
|
@return the integer value from the data */
|
|
|
|
ib_uint64_t
|
|
|
|
row_parse_int(
|
|
|
|
const byte* data,
|
|
|
|
ulint len,
|
|
|
|
ulint mtype,
|
|
|
|
bool unsigned_type)
|
|
|
|
{
|
|
|
|
ib_uint64_t value = 0;
|
|
|
|
|
|
|
|
switch (mtype) {
|
|
|
|
case DATA_INT:
|
|
|
|
|
|
|
|
ut_a(len <= sizeof value);
|
|
|
|
value = mach_read_int_type(data, len, unsigned_type);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case DATA_FLOAT:
|
|
|
|
|
|
|
|
ut_a(len == sizeof(float));
|
2017-01-25 09:11:37 +01:00
|
|
|
value = static_cast<ib_uint64_t>(mach_float_read(data));
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case DATA_DOUBLE:
|
|
|
|
|
|
|
|
ut_a(len == sizeof(double));
|
2017-01-25 09:11:37 +01:00
|
|
|
value = static_cast<ib_uint64_t>(mach_double_read(data));
|
MDEV-6076 Persistent AUTO_INCREMENT for InnoDB
This should be functionally equivalent to WL#6204 in MySQL 8.0.0, with
the notable difference that the file format changes are limited to
repurposing a previously unused data field in B-tree pages.
For persistent InnoDB tables, write the last used AUTO_INCREMENT
value to the root page of the clustered index, in the previously
unused (0) PAGE_MAX_TRX_ID field, now aliased as PAGE_ROOT_AUTO_INC.
Unlike some other previously unused InnoDB data fields, this one was
actually always zero-initialized, at least since MySQL 3.23.49.
The writes to PAGE_ROOT_AUTO_INC are protected by SX or X latch on the
root page. The SX latch will allow concurrent read access to the root
page. (The field PAGE_ROOT_AUTO_INC will only be read on the
first-time call to ha_innobase::open() from the SQL layer. The
PAGE_ROOT_AUTO_INC can only be updated when executing SQL, so
read/write races are not possible.)
During INSERT, the PAGE_ROOT_AUTO_INC is updated by the low-level
function btr_cur_search_to_nth_level(), adding no extra page
access. [Adaptive hash index lookup will be disabled during INSERT.]
If some rare UPDATE modifies an AUTO_INCREMENT column, the
PAGE_ROOT_AUTO_INC will be adjusted in a separate mini-transaction in
ha_innobase::update_row().
When a page is reorganized, we have to preserve the PAGE_ROOT_AUTO_INC
field.
During ALTER TABLE, the initial AUTO_INCREMENT value will be copied
from the table. ALGORITHM=COPY and online log apply in LOCK=NONE will
update PAGE_ROOT_AUTO_INC in real time.
innodb_col_no(): Determine the dict_table_t::cols[] element index
corresponding to a Field of a non-virtual column.
(The MySQL 5.7 implementation of virtual columns breaks the 1:1
relationship between Field::field_index and dict_table_t::cols[].
Virtual columns are omitted from dict_table_t::cols[]. Therefore,
we must translate the field_index of AUTO_INCREMENT columns into
an index of dict_table_t::cols[].)
Upgrade from old data files:
By default, the AUTO_INCREMENT sequence in old data files would appear
to be reset, because PAGE_MAX_TRX_ID or PAGE_ROOT_AUTO_INC would contain
the value 0 in each clustered index page. In new data files,
PAGE_ROOT_AUTO_INC can only be 0 if the table is empty or does not contain
any AUTO_INCREMENT column.
For backward compatibility, we use the old method of
SELECT MAX(auto_increment_column) for initializing the sequence.
btr_read_autoinc(): Read the AUTO_INCREMENT sequence from a new-format
data file.
btr_read_autoinc_with_fallback(): A variant of btr_read_autoinc()
that will resort to reading MAX(auto_increment_column) for data files
that did not use AUTO_INCREMENT yet. It was manually tested that during
the execution of innodb.autoinc_persist the compatibility logic is
not activated (for new files, PAGE_ROOT_AUTO_INC is never 0 in nonempty
clustered index root pages).
initialize_auto_increment(): Replaces
ha_innobase::innobase_initialize_autoinc(). This initializes
the AUTO_INCREMENT metadata. Only called from ha_innobase::open().
ha_innobase::info_low(): Do not try to lazily initialize
dict_table_t::autoinc. It must already have been initialized by
ha_innobase::open() or ha_innobase::create().
Note: The adjustments to class ha_innopart were not tested, because
the source code (native InnoDB partitioning) is not being compiled.
2016-12-14 18:56:39 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
ut_error;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!unsigned_type && static_cast<int64_t>(value) < 0) {
|
|
|
|
value = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return(value);
|
|
|
|
}
|
|
|
|
|