mirror of
https://github.com/MariaDB/server.git
synced 2025-01-22 14:54:20 +01:00
50faeda4d6
When the btr_search_latch was split into an array of latches in MySQL 5.7.8 as part of the Oracle Bug#20985298 fix, the "caching" of the latch across storage engine API calls was removed, and the field trx->has_search_latch would only be set during a short time frame in the execution of row_search_mvcc(), which was formerly called row_search_for_mysql(). This means that the column INFORMATION_SCHEMA.INNODB_TRX.TRX_ADAPTIVE_HASH_LATCHED will always report 0. That column cannot be removed in MariaDB 10.2, but it can be removed in future releases. trx_t::has_search_latch: Remove. trx_assert_no_search_latch(): Remove. row_sel_try_search_shortcut_for_mysql(): Remove a redundant condition on trx->has_search_latch (it was always true). sync_check_iterate(): Make the parameter const. sync_check_functor_t: Make the operator() const, and remove result() and the virtual destructor. There is no need to have mutable state in the functors. sync_checker<bool>: Replaces dict_sync_check and btrsea_sync_check. sync_check: Replaces btrsea_sync_check. dict_sync_check: Instantiated from sync_checker. sync_allowed_latches: Use std::find() directly on the array. Remove the std::vector. TrxInInnoDB::enter(), TrxInInnoDB::exit(): Remove obviously redundant debug assertions on trx->in_depth, and use equality comparison against 0 because it could be more efficient on some architectures.
4264 lines
114 KiB
C++
4264 lines
114 KiB
C++
/*****************************************************************************
|
|
|
|
Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
|
|
Copyright (c) 2016, 2017, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc.,
|
|
51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
|
|
|
|
*****************************************************************************/
|
|
|
|
/** @file ha_innopart.cc
|
|
Code for native partitioning in InnoDB.
|
|
|
|
Created Nov 22, 2013 Mattias Jonsson */
|
|
|
|
#include "univ.i"
|
|
|
|
/* Include necessary SQL headers */
|
|
#include <debug_sync.h>
|
|
#include <log.h>
|
|
#include <strfunc.h>
|
|
#include <sql_acl.h>
|
|
#include <sql_class.h>
|
|
#include <sql_show.h>
|
|
#include <sql_table.h>
|
|
#include <my_check_opt.h>
|
|
|
|
/* Include necessary InnoDB headers */
|
|
#include "btr0sea.h"
|
|
#include "dict0dict.h"
|
|
#include "dict0stats.h"
|
|
#include "lock0lock.h"
|
|
#include "row0import.h"
|
|
#include "row0merge.h"
|
|
#include "row0mysql.h"
|
|
#include "row0quiesce.h"
|
|
#include "row0sel.h"
|
|
#include "row0ins.h"
|
|
#include "row0upd.h"
|
|
#include "fsp0sysspace.h"
|
|
#include "ut0ut.h"
|
|
|
|
#include "ha_innodb.h"
|
|
#include "ha_innopart.h"
|
|
#include "partition_info.h"
|
|
#include "key.h"
|
|
|
|
#define INSIDE_HA_INNOPART_CC
|
|
|
|
/* To be backwards compatible we also fold partition separator on windows. */
|
|
#ifdef _WIN32
|
|
static const char* part_sep = "#p#";
|
|
static const char* sub_sep = "#sp#";
|
|
#else
|
|
static const char* part_sep = "#P#";
|
|
static const char* sub_sep = "#SP#";
|
|
#endif /* _WIN32 */
|
|
|
|
/* Partition separator for *nix platforms */
|
|
const char* part_sep_nix = "#P#";
|
|
const char* sub_sep_nix = "#SP#";
|
|
|
|
extern char* innobase_file_format_max;
|
|
|
|
Ha_innopart_share::Ha_innopart_share(
|
|
TABLE_SHARE* table_share)
|
|
:
|
|
Partition_share(),
|
|
m_table_parts(),
|
|
m_index_mapping(),
|
|
m_tot_parts(),
|
|
m_index_count(),
|
|
m_ref_count(),
|
|
m_table_share(table_share)
|
|
{}
|
|
|
|
Ha_innopart_share::~Ha_innopart_share()
|
|
{
|
|
ut_ad(m_ref_count == 0);
|
|
if (m_table_parts != NULL) {
|
|
ut_free(m_table_parts);
|
|
m_table_parts = NULL;
|
|
}
|
|
if (m_index_mapping != NULL) {
|
|
ut_free(m_index_mapping);
|
|
m_index_mapping = NULL;
|
|
}
|
|
}
|
|
|
|
/** Fold to lower case if windows or lower_case_table_names == 1.
|
|
@param[in,out] s String to fold.*/
|
|
void
|
|
Ha_innopart_share::partition_name_casedn_str(
|
|
char* s)
|
|
{
|
|
#ifdef _WIN32
|
|
innobase_casedn_str(s);
|
|
#endif
|
|
}
|
|
|
|
/** Translate and append partition name.
|
|
@param[out] to String to write in filesystem charset
|
|
@param[in] from Name in system charset
|
|
@param[in] sep Separator
|
|
@param[in] len Max length of to buffer
|
|
@return length of written string. */
|
|
size_t
|
|
Ha_innopart_share::append_sep_and_name(
|
|
char* to,
|
|
const char* from,
|
|
const char* sep,
|
|
size_t len)
|
|
{
|
|
size_t ret;
|
|
size_t sep_len = strlen(sep);
|
|
|
|
ut_ad(len > sep_len + strlen(from));
|
|
ut_ad(to != NULL);
|
|
ut_ad(from != NULL);
|
|
ut_ad(from[0] != '\0');
|
|
memcpy(to, sep, sep_len);
|
|
|
|
ret = tablename_to_filename(from, to + sep_len,
|
|
len - sep_len);
|
|
|
|
/* Don't convert to lower case for nix style name. */
|
|
if (strcmp(sep, part_sep_nix) != 0
|
|
&& strcmp(sep, sub_sep_nix) != 0) {
|
|
|
|
partition_name_casedn_str(to);
|
|
}
|
|
|
|
return(ret + sep_len);
|
|
}
|
|
|
|
/** Copy a cached MySQL row.
|
|
If requested, also avoids overwriting non-read columns.
|
|
@param[out] buf Row in MySQL format.
|
|
@param[in] cached_row Which row to copy. */
|
|
inline
|
|
void
|
|
ha_innopart::copy_cached_row(
|
|
uchar* buf,
|
|
const uchar* cached_row)
|
|
{
|
|
if (m_prebuilt->keep_other_fields_on_keyread) {
|
|
row_sel_copy_cached_fields_for_mysql(buf, cached_row,
|
|
m_prebuilt);
|
|
} else {
|
|
memcpy(buf, cached_row, m_rec_length);
|
|
}
|
|
}
|
|
|
|
/** Open one partition.
|
|
@param[in] part_id Partition id to open.
|
|
@param[in] partition_name Name of internal innodb table to open.
|
|
@return false on success else true. */
|
|
bool
|
|
Ha_innopart_share::open_one_table_part(
|
|
uint part_id,
|
|
const char* partition_name)
|
|
{
|
|
char norm_name[FN_REFLEN];
|
|
|
|
normalize_table_name(norm_name, partition_name);
|
|
m_table_parts[part_id] =
|
|
ha_innobase::open_dict_table(partition_name, norm_name,
|
|
TRUE, DICT_ERR_IGNORE_NONE);
|
|
|
|
if (m_table_parts[part_id] == NULL) {
|
|
return(true);
|
|
}
|
|
|
|
dict_table_t *ib_table = m_table_parts[part_id];
|
|
if ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
|
|
&& m_table_share->fields
|
|
!= (dict_table_get_n_user_cols(ib_table)
|
|
+ dict_table_get_n_v_cols(ib_table)))
|
|
|| (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
|
|
&& (m_table_share->fields
|
|
!= dict_table_get_n_user_cols(ib_table)
|
|
+ dict_table_get_n_v_cols(ib_table) - 1))) {
|
|
ib::warn() << "Partition `" << get_partition_name(part_id)
|
|
<< "` contains " << dict_table_get_n_user_cols(ib_table)
|
|
<< " user defined columns in InnoDB, but "
|
|
<< m_table_share->fields
|
|
<< " columns in MySQL. Please check"
|
|
" INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and " REFMAN
|
|
"innodb-troubleshooting.html for how to resolve the"
|
|
" issue.";
|
|
|
|
/* Mark this partition as corrupted, so the drop table
|
|
or force recovery can still use it, but not others.
|
|
TODO: persist table->corrupted so it will be retained on
|
|
restart and out-of-bounds operations will see it. */
|
|
|
|
ib_table->corrupted = true;
|
|
dict_table_close(ib_table, FALSE, FALSE);
|
|
}
|
|
|
|
/* TODO: To save memory, compare with first partition and reuse
|
|
the column names etc. in the internal InnoDB meta-data cache. */
|
|
|
|
return(false);
|
|
}
|
|
|
|
/** Set up the virtual column template for partition table, and points
|
|
all m_table_parts[]->vc_templ to it.
|
|
@param[in] table MySQL TABLE object
|
|
@param[in] ib_table InnoDB dict_table_t
|
|
@param[in] table_name Table name (db/table_name) */
|
|
void
|
|
Ha_innopart_share::set_v_templ(
|
|
TABLE* table,
|
|
dict_table_t* ib_table,
|
|
const char* name)
|
|
{
|
|
ut_ad(mutex_own(&dict_sys->mutex));
|
|
|
|
if (ib_table->n_v_cols > 0) {
|
|
for (ulint i = 0; i < m_tot_parts; i++) {
|
|
if (m_table_parts[i]->vc_templ == NULL) {
|
|
m_table_parts[i]->vc_templ
|
|
= UT_NEW_NOKEY(dict_vcol_templ_t());
|
|
m_table_parts[i]->vc_templ->vtempl = NULL;
|
|
} else if (m_table_parts[i]->get_ref_count() == 1) {
|
|
/* Clean and refresh the template */
|
|
dict_free_vc_templ(m_table_parts[i]->vc_templ);
|
|
m_table_parts[i]->vc_templ->vtempl = NULL;
|
|
}
|
|
|
|
if (m_table_parts[i]->vc_templ->vtempl == NULL) {
|
|
innobase_build_v_templ(
|
|
table, ib_table,
|
|
m_table_parts[i]->vc_templ,
|
|
NULL, true, name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Initialize the share with table and indexes per partition.
|
|
@param[in] part_info Partition info (partition names to use).
|
|
@param[in] table_name Table name (db/table_name).
|
|
@return false on success else true. */
|
|
bool
|
|
Ha_innopart_share::open_table_parts(
|
|
partition_info* part_info,
|
|
const char* table_name)
|
|
{
|
|
size_t table_name_len;
|
|
size_t len;
|
|
uint ib_num_index;
|
|
uint mysql_num_index;
|
|
char partition_name[FN_REFLEN];
|
|
bool index_loaded = true;
|
|
|
|
#ifndef DBUG_OFF
|
|
if (m_table_share->tmp_table == NO_TMP_TABLE) {
|
|
mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
|
|
}
|
|
#endif /* DBUG_OFF */
|
|
m_ref_count++;
|
|
if (m_table_parts != NULL) {
|
|
ut_ad(m_ref_count > 1);
|
|
ut_ad(m_tot_parts > 0);
|
|
|
|
/* Increment dict_table_t reference count for all partitions */
|
|
mutex_enter(&dict_sys->mutex);
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
dict_table_t* table = m_table_parts[i];
|
|
table->acquire();
|
|
ut_ad(table->get_ref_count() >= m_ref_count);
|
|
}
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
return(false);
|
|
}
|
|
ut_ad(m_ref_count == 1);
|
|
m_tot_parts = part_info->get_tot_partitions();
|
|
size_t table_parts_size = sizeof(dict_table_t*) * m_tot_parts;
|
|
m_table_parts = static_cast<dict_table_t**>(
|
|
ut_zalloc(table_parts_size, mem_key_partitioning));
|
|
if (m_table_parts == NULL) {
|
|
m_ref_count--;
|
|
return(true);
|
|
}
|
|
|
|
/* Set up the array over all table partitions. */
|
|
table_name_len = strlen(table_name);
|
|
memcpy(partition_name, table_name, table_name_len);
|
|
List_iterator<partition_element>
|
|
part_it(part_info->partitions);
|
|
partition_element* part_elem;
|
|
uint i = 0;
|
|
|
|
while ((part_elem = part_it++)) {
|
|
len = append_sep_and_name(
|
|
partition_name + table_name_len,
|
|
part_elem->partition_name,
|
|
part_sep_nix,
|
|
FN_REFLEN - table_name_len);
|
|
if (part_info->is_sub_partitioned()) {
|
|
List_iterator<partition_element>
|
|
sub_it(part_elem->subpartitions);
|
|
partition_element* sub_elem;
|
|
while ((sub_elem = sub_it++)) {
|
|
append_sep_and_name(
|
|
partition_name
|
|
+ table_name_len + len,
|
|
sub_elem->partition_name,
|
|
sub_sep_nix,
|
|
FN_REFLEN - table_name_len - len);
|
|
if (open_one_table_part(i, partition_name)) {
|
|
goto err;
|
|
}
|
|
i++;
|
|
}
|
|
} else {
|
|
if (open_one_table_part(i, partition_name)) {
|
|
goto err;
|
|
}
|
|
i++;
|
|
}
|
|
}
|
|
ut_ad(i == m_tot_parts);
|
|
|
|
/* Create the mapping of mysql index number to innodb indexes. */
|
|
|
|
ib_num_index = (uint) UT_LIST_GET_LEN(m_table_parts[0]->indexes);
|
|
mysql_num_index = part_info->table->s->keys;
|
|
|
|
/* If there exists inconsistency between MySQL and InnoDB dictionary
|
|
(metadata) information, the number of index defined in MySQL
|
|
could exceed that in InnoDB, do not build index translation
|
|
table in such case. */
|
|
|
|
if (ib_num_index < mysql_num_index) {
|
|
ut_ad(0);
|
|
goto err;
|
|
}
|
|
|
|
if (mysql_num_index != 0) {
|
|
size_t alloc_size = mysql_num_index * m_tot_parts
|
|
* sizeof(*m_index_mapping);
|
|
m_index_mapping = static_cast<dict_index_t**>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_index_mapping == NULL) {
|
|
|
|
/* Report an error if index_mapping continues to be
|
|
NULL and mysql_num_index is a non-zero value. */
|
|
|
|
ib::error() << "Failed to allocate memory for"
|
|
" index translation table. Number of"
|
|
" Index:" << mysql_num_index;
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
/* For each index in the mysql key_info array, fetch its
|
|
corresponding InnoDB index pointer into index_mapping
|
|
array. */
|
|
|
|
for (ulint idx = 0; idx < mysql_num_index; idx++) {
|
|
for (ulint part = 0; part < m_tot_parts; part++) {
|
|
ulint count = part * mysql_num_index + idx;
|
|
|
|
/* Fetch index pointers into index_mapping according
|
|
to mysql index sequence. */
|
|
|
|
m_index_mapping[count] = dict_table_get_index_on_name(
|
|
m_table_parts[part],
|
|
part_info->table->key_info[idx].name);
|
|
|
|
if (m_index_mapping[count] == NULL) {
|
|
ib::error() << "Cannot find index `"
|
|
<< part_info->table->key_info[idx].name
|
|
<< "` in InnoDB index dictionary"
|
|
" partition `"
|
|
<< get_partition_name(part) << "`.";
|
|
index_loaded = false;
|
|
break;
|
|
}
|
|
|
|
/* Double check fetched index has the same
|
|
column info as those in mysql key_info. */
|
|
|
|
if (!innobase_match_index_columns(
|
|
&part_info->table->key_info[idx],
|
|
m_index_mapping[count])) {
|
|
ib::error() << "Found index `"
|
|
<< part_info->table->key_info[idx].name
|
|
<< "` whose column info does not match"
|
|
" that of MySQL.";
|
|
index_loaded = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (!index_loaded && m_index_mapping != NULL) {
|
|
ut_free(m_index_mapping);
|
|
m_index_mapping = NULL;
|
|
}
|
|
|
|
/* Successfully built the translation table. */
|
|
m_index_count = mysql_num_index;
|
|
|
|
return(false);
|
|
err:
|
|
close_table_parts();
|
|
|
|
return(true);
|
|
}
|
|
|
|
/** Close all partitions. */
|
|
void
|
|
Ha_innopart_share::close_table_parts()
|
|
{
|
|
#ifndef DBUG_OFF
|
|
if (m_table_share->tmp_table == NO_TMP_TABLE) {
|
|
mysql_mutex_assert_owner(&m_table_share->LOCK_ha_data);
|
|
}
|
|
#endif /* DBUG_OFF */
|
|
m_ref_count--;
|
|
if (m_ref_count != 0) {
|
|
|
|
/* Decrement dict_table_t reference count for all partitions */
|
|
mutex_enter(&dict_sys->mutex);
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
dict_table_t* table = m_table_parts[i];
|
|
table->release();
|
|
ut_ad(table->get_ref_count() >= m_ref_count);
|
|
}
|
|
mutex_exit(&dict_sys->mutex);
|
|
|
|
return;
|
|
}
|
|
|
|
/* Last instance closed, close all table partitions and
|
|
free the memory. */
|
|
|
|
mutex_enter(&dict_sys->mutex);
|
|
if (m_table_parts != NULL) {
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
if (m_table_parts[i] != NULL) {
|
|
dict_table_close(m_table_parts[i], TRUE, TRUE);
|
|
}
|
|
}
|
|
ut_free(m_table_parts);
|
|
m_table_parts = NULL;
|
|
}
|
|
mutex_exit(&dict_sys->mutex);
|
|
if (m_index_mapping != NULL) {
|
|
ut_free(m_index_mapping);
|
|
m_index_mapping = NULL;
|
|
}
|
|
|
|
m_tot_parts = 0;
|
|
m_index_count = 0;
|
|
}
|
|
|
|
/** Get index.
|
|
Find the index of the specified partition and key number.
|
|
@param[in] part_id Partition number.
|
|
@param[in] keynr Key number.
|
|
@return Index pointer or NULL. */
|
|
inline
|
|
dict_index_t*
|
|
Ha_innopart_share::get_index(
|
|
uint part_id,
|
|
uint keynr)
|
|
{
|
|
ut_a(part_id < m_tot_parts);
|
|
ut_ad(keynr < m_index_count || keynr == MAX_KEY);
|
|
if (m_index_mapping == NULL
|
|
|| keynr >= m_index_count) {
|
|
|
|
if (keynr == MAX_KEY) {
|
|
return(dict_table_get_first_index(
|
|
get_table_part(part_id)));
|
|
}
|
|
return(NULL);
|
|
}
|
|
return(m_index_mapping[m_index_count * part_id + keynr]);
|
|
}
|
|
|
|
/** Get MySQL key number corresponding to InnoDB index.
|
|
Calculates the key number used inside MySQL for an Innobase index. We will
|
|
first check the "index translation table" for a match of the index to get
|
|
the index number. If there does not exist an "index translation table",
|
|
or not able to find the index in the translation table, then we will fall back
|
|
to the traditional way of looping through dict_index_t list to find a
|
|
match. In this case, we have to take into account if we generated a
|
|
default clustered index for the table
|
|
@param[in] part_id Partition the index belongs to.
|
|
@param[in] index Index to return MySQL key number for.
|
|
@return the key number used inside MySQL or UINT_MAX if key is not found. */
|
|
inline
|
|
uint
|
|
Ha_innopart_share::get_mysql_key(
|
|
uint part_id,
|
|
const dict_index_t* index)
|
|
{
|
|
ut_ad(index != NULL);
|
|
ut_ad(m_index_mapping != NULL);
|
|
ut_ad(m_tot_parts);
|
|
|
|
if (index != NULL && m_index_mapping != NULL) {
|
|
uint start;
|
|
uint end;
|
|
|
|
if (part_id < m_tot_parts) {
|
|
start = part_id * m_index_count;
|
|
end = start + m_index_count;
|
|
} else {
|
|
start = 0;
|
|
end = m_tot_parts * m_index_count;
|
|
}
|
|
for (uint i = start; i < end; i++) {
|
|
if (m_index_mapping[i] == index) {
|
|
return(i % m_index_count);
|
|
}
|
|
}
|
|
|
|
/* Print an error message if we cannot find the index
|
|
in the "index translation table". */
|
|
|
|
if (index->is_committed()) {
|
|
ib::error() << "Cannot find index "
|
|
<< index->name
|
|
<< " in InnoDB index translation table.";
|
|
}
|
|
}
|
|
|
|
return(UINT_MAX);
|
|
}
|
|
|
|
/** Helper function for set bit in bitmap.
|
|
@param[in,out] buf Bitmap buffer to update bit in.
|
|
@param[in] bit_pos Bit number (index starts at 0). */
|
|
static
|
|
inline
|
|
void
|
|
set_bit(
|
|
byte* buf,
|
|
size_t pos)
|
|
{
|
|
buf[pos/8] |= (0x1 << (pos & 0x7));
|
|
}
|
|
|
|
/** Helper function for clear bit in bitmap.
|
|
@param[in,out] buf Bitmap buffer to update bit in.
|
|
@param[in] bit_pos Bit number (index starts at 0). */
|
|
static
|
|
inline
|
|
void
|
|
clear_bit(
|
|
byte* buf,
|
|
size_t pos)
|
|
{
|
|
buf[pos/8] &= ~(0x1 << (pos & 0x7));
|
|
}
|
|
|
|
/** Helper function for get bit in bitmap.
|
|
@param[in,out] buf Bitmap buffer.
|
|
@param[in] bit_pos Bit number (index starts at 0).
|
|
@return byte set to 0x0 or 0x1.
|
|
@retval 0x0 bit not set.
|
|
@retval 0x1 bet set. */
|
|
static
|
|
inline
|
|
byte
|
|
get_bit(
|
|
byte* buf,
|
|
size_t pos)
|
|
{
|
|
return((buf[pos/8] >> (pos & 0x7)) & 0x1);
|
|
}
|
|
|
|
/** Helper class for encapsulating new/altered partitions during
|
|
ADD/REORG/... PARTITION. */
|
|
class Altered_partitions
|
|
{
|
|
private:
|
|
/** New partitions during ADD/REORG/... PARTITION. */
|
|
dict_table_t** m_new_table_parts;
|
|
|
|
/** Insert nodes per partition. */
|
|
ins_node_t** m_ins_nodes;
|
|
|
|
/** sql_stat_start per partition. */
|
|
byte* m_sql_stat_start;
|
|
|
|
/** Trx id per partition. */
|
|
trx_id_t* m_trx_ids;
|
|
|
|
/** Number of new partitions. */
|
|
size_t m_num_new_parts;
|
|
|
|
/** Only need to create the partitions (no open/lock). */
|
|
bool m_only_create;
|
|
|
|
public:
|
|
Altered_partitions(
|
|
uint n_partitions,
|
|
bool only_create);
|
|
|
|
~Altered_partitions();
|
|
|
|
bool
|
|
initialize();
|
|
|
|
bool
|
|
only_create() const
|
|
{
|
|
return(m_only_create);
|
|
}
|
|
|
|
/** Set currently used partition.
|
|
@param[in] new_part_id Partition id to set.
|
|
@param[in] part InnoDB table to use. */
|
|
inline
|
|
void
|
|
set_part(
|
|
ulint new_part_id,
|
|
dict_table_t* part)
|
|
{
|
|
ut_ad(m_new_table_parts[new_part_id] == NULL);
|
|
m_new_table_parts[new_part_id] = part;
|
|
set_bit(m_sql_stat_start, new_part_id);
|
|
}
|
|
|
|
/** Get lower level InnoDB table for partition.
|
|
@param[in] part_id Partition id.
|
|
@return Lower level InnoDB table for the partition id. */
|
|
inline
|
|
dict_table_t*
|
|
part(
|
|
uint part_id) const
|
|
{
|
|
ut_ad(part_id < m_num_new_parts);
|
|
return(m_new_table_parts[part_id]);
|
|
}
|
|
|
|
/** Set up prebuilt for using a specified partition.
|
|
@param[in] prebuilt Prebuilt to update.
|
|
@param[in] new_part_id Partition to use. */
|
|
inline
|
|
void
|
|
get_prebuilt(
|
|
row_prebuilt_t* prebuilt,
|
|
uint new_part_id) const
|
|
{
|
|
ut_ad(m_new_table_parts[new_part_id]);
|
|
prebuilt->table = m_new_table_parts[new_part_id];
|
|
prebuilt->ins_node = m_ins_nodes[new_part_id];
|
|
prebuilt->trx_id = m_trx_ids[new_part_id];
|
|
prebuilt->sql_stat_start = get_bit(m_sql_stat_start,
|
|
new_part_id);
|
|
}
|
|
|
|
/** Update cached values for a partition from prebuilt.
|
|
@param[in] prebuilt Prebuilt to copy from.
|
|
@param[in] new_part_id Partition id to copy. */
|
|
inline
|
|
void
|
|
set_from_prebuilt(
|
|
row_prebuilt_t* prebuilt,
|
|
uint new_part_id)
|
|
{
|
|
ut_ad(m_new_table_parts[new_part_id] == prebuilt->table);
|
|
m_ins_nodes[new_part_id] = prebuilt->ins_node;
|
|
m_trx_ids[new_part_id] = prebuilt->trx_id;
|
|
if (prebuilt->sql_stat_start == 0) {
|
|
clear_bit(m_sql_stat_start, new_part_id);
|
|
}
|
|
}
|
|
};
|
|
|
|
Altered_partitions::Altered_partitions(
|
|
uint n_partitions,
|
|
bool only_create)
|
|
:
|
|
m_new_table_parts(),
|
|
m_ins_nodes(),
|
|
m_sql_stat_start(),
|
|
m_trx_ids(),
|
|
m_num_new_parts(n_partitions),
|
|
m_only_create(only_create)
|
|
{}
|
|
|
|
Altered_partitions::~Altered_partitions()
|
|
{
|
|
if (m_new_table_parts != NULL) {
|
|
for (ulint i = 0; i < m_num_new_parts; i++) {
|
|
if (m_new_table_parts[i] != NULL) {
|
|
dict_table_close(m_new_table_parts[i],
|
|
false, true);
|
|
}
|
|
}
|
|
ut_free(m_new_table_parts);
|
|
m_new_table_parts = NULL;
|
|
}
|
|
if (m_ins_nodes != NULL) {
|
|
for (ulint i = 0; i < m_num_new_parts; i++) {
|
|
if (m_ins_nodes[i] != NULL) {
|
|
ins_node_t* ins = m_ins_nodes[i];
|
|
ut_ad(ins->select == NULL);
|
|
que_graph_free_recursive(ins->select);
|
|
ins->select = NULL;
|
|
if (ins->entry_sys_heap != NULL) {
|
|
mem_heap_free(ins->entry_sys_heap);
|
|
ins->entry_sys_heap = NULL;
|
|
}
|
|
}
|
|
}
|
|
ut_free(m_ins_nodes);
|
|
m_ins_nodes = NULL;
|
|
}
|
|
if (m_sql_stat_start != NULL) {
|
|
ut_free(m_sql_stat_start);
|
|
m_sql_stat_start = NULL;
|
|
}
|
|
if (m_trx_ids != NULL) {
|
|
ut_free(m_trx_ids);
|
|
m_trx_ids = NULL;
|
|
}
|
|
}
|
|
|
|
/** Initialize the object.
|
|
@return false on success else true. */
|
|
bool
|
|
Altered_partitions::initialize()
|
|
{
|
|
size_t alloc_size = sizeof(*m_new_table_parts) * m_num_new_parts;
|
|
m_new_table_parts = static_cast<dict_table_t**>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_new_table_parts == NULL) {
|
|
return(true);
|
|
}
|
|
|
|
alloc_size = sizeof(*m_ins_nodes) * m_num_new_parts;
|
|
m_ins_nodes = static_cast<ins_node_t**>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_ins_nodes == NULL) {
|
|
ut_free(m_new_table_parts);
|
|
m_new_table_parts = NULL;
|
|
return(true);
|
|
}
|
|
|
|
alloc_size = sizeof(*m_sql_stat_start)
|
|
* UT_BITS_IN_BYTES(m_num_new_parts);
|
|
m_sql_stat_start = static_cast<byte*>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_sql_stat_start == NULL) {
|
|
ut_free(m_new_table_parts);
|
|
m_new_table_parts = NULL;
|
|
ut_free(m_ins_nodes);
|
|
m_ins_nodes = NULL;
|
|
return(true);
|
|
}
|
|
|
|
alloc_size = sizeof(*m_trx_ids) * m_num_new_parts;
|
|
m_trx_ids = static_cast<trx_id_t*>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_trx_ids == NULL) {
|
|
ut_free(m_new_table_parts);
|
|
m_new_table_parts = NULL;
|
|
ut_free(m_ins_nodes);
|
|
m_ins_nodes = NULL;
|
|
ut_free(m_sql_stat_start);
|
|
m_sql_stat_start = NULL;
|
|
return(true);
|
|
}
|
|
|
|
return(false);
|
|
}
|
|
|
|
/** Construct ha_innopart handler.
|
|
@param[in] hton Handlerton.
|
|
@param[in] table_arg MySQL Table.
|
|
@return a new ha_innopart handler. */
|
|
ha_innopart::ha_innopart(
|
|
handlerton* hton,
|
|
TABLE_SHARE* table_arg)
|
|
:
|
|
ha_innobase(hton, table_arg),
|
|
Partition_helper(this),
|
|
m_ins_node_parts(),
|
|
m_upd_node_parts(),
|
|
m_blob_heap_parts(),
|
|
m_trx_id_parts(),
|
|
m_row_read_type_parts(),
|
|
m_sql_stat_start_parts(),
|
|
m_pcur(),
|
|
m_clust_pcur(),
|
|
m_new_partitions()
|
|
{
|
|
m_int_table_flags &= ~(HA_INNOPART_DISABLED_TABLE_FLAGS);
|
|
|
|
/* INNOBASE_SHARE is not used in ha_innopart.
|
|
This also flags for ha_innobase that it is a partitioned table.
|
|
And make it impossible to use legacy share functionality. */
|
|
|
|
m_share = NULL;
|
|
}
|
|
|
|
/** Destruct ha_innopart handler. */
|
|
ha_innopart::~ha_innopart()
|
|
{}
|
|
|
|
/** Returned supported alter table flags.
|
|
@param[in] flags Flags to support.
|
|
@return Supported flags. */
|
|
uint
|
|
ha_innopart::alter_table_flags(
|
|
uint flags)
|
|
{
|
|
return(HA_PARTITION_FUNCTION_SUPPORTED | HA_FAST_CHANGE_PARTITION);
|
|
}
|
|
|
|
/** Set the autoinc column max value.
|
|
This should only be called once from ha_innobase::open().
|
|
Therefore there's no need for a covering lock.
|
|
@param[in] no_lock Ignored!
|
|
@return 0 for success or error code. */
|
|
inline
|
|
int
|
|
ha_innopart::initialize_auto_increment(
|
|
bool /* no_lock */)
|
|
{
|
|
int error = 0;
|
|
ulonglong auto_inc = 0;
|
|
const Field* field = table->found_next_number_field;
|
|
|
|
#ifndef DBUG_OFF
|
|
if (table_share->tmp_table == NO_TMP_TABLE)
|
|
{
|
|
mysql_mutex_assert_owner(m_part_share->auto_inc_mutex);
|
|
}
|
|
#endif
|
|
|
|
/* Since a table can already be "open" in InnoDB's internal
|
|
data dictionary, we only init the autoinc counter once, the
|
|
first time the table is loaded. We can safely reuse the
|
|
autoinc value from a previous MySQL open. */
|
|
|
|
if (m_part_share->auto_inc_initialized) {
|
|
/* Already initialized, nothing to do. */
|
|
return(0);
|
|
}
|
|
|
|
if (field == NULL) {
|
|
ib::info() << "Unable to determine the AUTOINC column name";
|
|
}
|
|
|
|
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
|
|
/* If the recovery level is set so high that writes
|
|
are disabled we force the AUTOINC counter to 0
|
|
value effectively disabling writes to the table.
|
|
Secondly, we avoid reading the table in case the read
|
|
results in failure due to a corrupted table/index.
|
|
|
|
We will not return an error to the client, so that the
|
|
tables can be dumped with minimal hassle. If an error
|
|
were returned in this case, the first attempt to read
|
|
the table would fail and subsequent SELECTs would succeed. */
|
|
|
|
} else if (field == NULL) {
|
|
/* This is a far more serious error, best to avoid
|
|
opening the table and return failure. */
|
|
|
|
my_error(ER_AUTOINC_READ_FAILED, MYF(0));
|
|
error = HA_ERR_AUTOINC_READ_FAILED;
|
|
} else {
|
|
ib_uint64_t col_max_value = field->get_max_int_value();
|
|
|
|
update_thd(ha_thd());
|
|
|
|
for (uint part = 0; part < m_tot_parts; part++) {
|
|
dict_table_t* ib_table
|
|
= m_part_share->get_table_part(part);
|
|
dict_table_autoinc_lock(ib_table);
|
|
ut_ad(ib_table->persistent_autoinc);
|
|
ib_uint64_t read_auto_inc
|
|
= dict_table_autoinc_read(ib_table);
|
|
if (read_auto_inc == 0) {
|
|
read_auto_inc = btr_read_autoinc(
|
|
dict_table_get_first_index(ib_table));
|
|
|
|
/* At the this stage we do not know the
|
|
increment nor the offset,
|
|
so use a default increment of 1. */
|
|
|
|
read_auto_inc = innobase_next_autoinc(
|
|
read_auto_inc, 1, 1, 0, col_max_value);
|
|
dict_table_autoinc_initialize(ib_table,
|
|
read_auto_inc);
|
|
}
|
|
set_if_bigger(auto_inc, read_auto_inc);
|
|
dict_table_autoinc_unlock(ib_table);
|
|
}
|
|
}
|
|
|
|
done:
|
|
m_part_share->next_auto_inc_val = auto_inc;
|
|
m_part_share->auto_inc_initialized = true;
|
|
return(error);
|
|
}
|
|
|
|
/** Opens a partitioned InnoDB table.
|
|
Initializes needed data and opens the table which already exists
|
|
in an InnoDB database.
|
|
@param[in] name Table name (db/tablename)
|
|
@param[in] mode Not used
|
|
@param[in] test_if_locked Not used
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::open(
|
|
const char* name,
|
|
int /*mode*/,
|
|
uint /*test_if_locked*/)
|
|
{
|
|
dict_table_t* ib_table;
|
|
char norm_name[FN_REFLEN];
|
|
|
|
DBUG_ENTER("ha_innopart::open");
|
|
|
|
ut_ad(table);
|
|
if (m_part_info == NULL) {
|
|
/* Must be during ::clone()! */
|
|
ut_ad(table->part_info != NULL);
|
|
m_part_info = table->part_info;
|
|
}
|
|
|
|
/* Under some cases MySQL seems to call this function while
|
|
holding search latch(es). This breaks the latching order as
|
|
we acquire dict_sys->mutex below and leads to a deadlock. */
|
|
|
|
normalize_table_name(norm_name, name);
|
|
|
|
m_user_thd = NULL;
|
|
|
|
/* Get the Ha_innopart_share from the TABLE_SHARE. */
|
|
lock_shared_ha_data();
|
|
m_part_share = static_cast<Ha_innopart_share*>(get_ha_share_ptr());
|
|
if (m_part_share == NULL) {
|
|
m_part_share = new (std::nothrow)
|
|
Ha_innopart_share(table_share);
|
|
if (m_part_share == NULL) {
|
|
share_error:
|
|
unlock_shared_ha_data();
|
|
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
|
|
}
|
|
set_ha_share_ptr(static_cast<Handler_share*>(m_part_share));
|
|
}
|
|
if (m_part_share->open_table_parts(m_part_info, name)
|
|
|| m_part_share->populate_partition_name_hash(m_part_info)) {
|
|
goto share_error;
|
|
}
|
|
if (m_part_share->auto_inc_mutex == NULL
|
|
&& table->found_next_number_field != NULL) {
|
|
if (m_part_share->init_auto_inc_mutex(table_share)) {
|
|
goto share_error;
|
|
}
|
|
}
|
|
unlock_shared_ha_data();
|
|
|
|
/* Will be allocated if it is needed in ::update_row(). */
|
|
m_upd_buf = NULL;
|
|
m_upd_buf_size = 0;
|
|
|
|
/* Get pointer to a table object in InnoDB dictionary cache. */
|
|
ib_table = m_part_share->get_table_part(0);
|
|
|
|
m_pcur_parts = NULL;
|
|
m_clust_pcur_parts = NULL;
|
|
m_pcur_map = NULL;
|
|
|
|
/* TODO: Handle mismatching #P# vs #p# in upgrading to new DD instead!
|
|
See bug#58406, The problem exists when moving partitioned tables
|
|
between Windows and Unix-like platforms. InnoDB always folds the name
|
|
on windows, partitioning never folds partition (and #P# separator).
|
|
I.e. non of it follows lower_case_table_names correctly :( */
|
|
|
|
if (open_partitioning(m_part_share))
|
|
{
|
|
close();
|
|
DBUG_RETURN(HA_ERR_INITIALIZATION);
|
|
}
|
|
|
|
/* Currently we track statistics for all partitions, but for
|
|
the secondary indexes we only use the biggest partition. */
|
|
|
|
for (uint part_id = 0; part_id < m_tot_parts; part_id++) {
|
|
innobase_copy_frm_flags_from_table_share(
|
|
m_part_share->get_table_part(part_id),
|
|
table->s);
|
|
dict_stats_init(m_part_share->get_table_part(part_id));
|
|
}
|
|
|
|
MONITOR_INC(MONITOR_TABLE_OPEN);
|
|
|
|
bool no_tablespace;
|
|
THD* thd = ha_thd();
|
|
|
|
/* TODO: Should we do this check for every partition during ::open()? */
|
|
/* TODO: refactor this in ha_innobase so it can increase code reuse. */
|
|
if (dict_table_is_discarded(ib_table)) {
|
|
|
|
ib_senderrf(thd,
|
|
IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
|
|
table->s->table_name.str);
|
|
|
|
/* Allow an open because a proper DISCARD should have set
|
|
all the flags and index root page numbers to FIL_NULL that
|
|
should prevent any DML from running but it should allow DDL
|
|
operations. */
|
|
|
|
no_tablespace = false;
|
|
|
|
} else if (ib_table->ibd_file_missing) {
|
|
|
|
ib_senderrf(
|
|
thd, IB_LOG_LEVEL_WARN,
|
|
ER_TABLESPACE_MISSING, norm_name);
|
|
|
|
/* This means we have no idea what happened to the tablespace
|
|
file, best to play it safe. */
|
|
|
|
no_tablespace = true;
|
|
} else {
|
|
no_tablespace = false;
|
|
}
|
|
|
|
if (!thd_tablespace_op(thd) && no_tablespace) {
|
|
set_my_errno(ENOENT);
|
|
|
|
lock_shared_ha_data();
|
|
m_part_share->close_table_parts();
|
|
unlock_shared_ha_data();
|
|
m_part_share = NULL;
|
|
|
|
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
|
|
}
|
|
|
|
m_prebuilt = row_create_prebuilt(ib_table, table->s->reclength);
|
|
|
|
m_prebuilt->default_rec = table->s->default_values;
|
|
ut_ad(m_prebuilt->default_rec);
|
|
|
|
DBUG_ASSERT(table != NULL);
|
|
m_prebuilt->m_mysql_table = table;
|
|
|
|
if (ib_table->n_v_cols > 0) {
|
|
mutex_enter(&dict_sys->mutex);
|
|
m_part_share->set_v_templ(table, ib_table, name);
|
|
mutex_exit(&dict_sys->mutex);
|
|
}
|
|
|
|
/* Looks like MySQL-3.23 sometimes has primary key number != 0. */
|
|
m_primary_key = table->s->primary_key;
|
|
key_used_on_scan = m_primary_key;
|
|
|
|
/* Allocate a buffer for a 'row reference'. A row reference is
|
|
a string of bytes of length ref_length which uniquely specifies
|
|
a row in our table. Note that MySQL may also compare two row
|
|
references for equality by doing a simple memcmp on the strings
|
|
of length ref_length! */
|
|
|
|
if (!row_table_got_default_clust_index(ib_table)) {
|
|
|
|
m_prebuilt->clust_index_was_generated = FALSE;
|
|
|
|
if (UNIV_UNLIKELY(m_primary_key >= MAX_KEY)) {
|
|
table_name_t table_name;
|
|
table_name.m_name = const_cast<char*>(name);
|
|
ib::error() << "Table " << table_name
|
|
<< " has a primary key in InnoDB data"
|
|
" dictionary, but not in MySQL!";
|
|
|
|
/* This mismatch could cause further problems
|
|
if not attended, bring this to the user's attention
|
|
by printing a warning in addition to log a message
|
|
in the errorlog. */
|
|
|
|
push_warning_printf(thd, Sql_condition::SL_WARNING,
|
|
ER_NO_SUCH_INDEX,
|
|
"Table %s has a"
|
|
" primary key in InnoDB data"
|
|
" dictionary, but not in"
|
|
" MySQL!", name);
|
|
|
|
/* If m_primary_key >= MAX_KEY, its (m_primary_key)
|
|
value could be out of bound if continue to index
|
|
into key_info[] array. Find InnoDB primary index,
|
|
and assign its key_length to ref_length.
|
|
In addition, since MySQL indexes are sorted starting
|
|
with primary index, unique index etc., initialize
|
|
ref_length to the first index key length in
|
|
case we fail to find InnoDB cluster index.
|
|
|
|
Please note, this will not resolve the primary
|
|
index mismatch problem, other side effects are
|
|
possible if users continue to use the table.
|
|
However, we allow this table to be opened so
|
|
that user can adopt necessary measures for the
|
|
mismatch while still being accessible to the table
|
|
date. */
|
|
|
|
if (table->key_info == NULL) {
|
|
ut_ad(table->s->keys == 0);
|
|
ref_length = 0;
|
|
} else {
|
|
ref_length = table->key_info[0].key_length;
|
|
}
|
|
|
|
/* Find corresponding cluster index
|
|
key length in MySQL's key_info[] array. */
|
|
|
|
for (uint i = 0; i < table->s->keys; i++) {
|
|
dict_index_t* index;
|
|
index = innopart_get_index(0, i);
|
|
if (dict_index_is_clust(index)) {
|
|
ref_length =
|
|
table->key_info[i].key_length;
|
|
}
|
|
}
|
|
ut_a(ref_length);
|
|
ref_length += PARTITION_BYTES_IN_POS;
|
|
} else {
|
|
/* MySQL allocates the buffer for ref.
|
|
key_info->key_length includes space for all key
|
|
columns + one byte for each column that may be
|
|
NULL. ref_length must be as exact as possible to
|
|
save space, because all row reference buffers are
|
|
allocated based on ref_length. */
|
|
|
|
ref_length = table->key_info[m_primary_key].key_length;
|
|
ref_length += PARTITION_BYTES_IN_POS;
|
|
}
|
|
} else {
|
|
if (m_primary_key != MAX_KEY) {
|
|
table_name_t table_name;
|
|
table_name.m_name = const_cast<char*>(name);
|
|
ib::error() << "Table " << table_name
|
|
<< " has no primary key in InnoDB data"
|
|
" dictionary, but has one in MySQL! If you"
|
|
" created the table with a MySQL version <"
|
|
" 3.23.54 and did not define a primary key,"
|
|
" but defined a unique key with all non-NULL"
|
|
" columns, then MySQL internally treats that"
|
|
" key as the primary key. You can fix this"
|
|
" error by dump + DROP + CREATE + reimport"
|
|
" of the table.";
|
|
|
|
/* This mismatch could cause further problems
|
|
if not attended, bring this to the user attention
|
|
by printing a warning in addition to log a message
|
|
in the errorlog. */
|
|
|
|
push_warning_printf(thd, Sql_condition::SL_WARNING,
|
|
ER_NO_SUCH_INDEX,
|
|
"InnoDB: Table %s has no"
|
|
" primary key in InnoDB data"
|
|
" dictionary, but has one in"
|
|
" MySQL!", name);
|
|
}
|
|
|
|
m_prebuilt->clust_index_was_generated = TRUE;
|
|
|
|
ref_length = DATA_ROW_ID_LEN;
|
|
ref_length += PARTITION_BYTES_IN_POS;
|
|
|
|
/* If we automatically created the clustered index, then
|
|
MySQL does not know about it, and MySQL must NOT be aware
|
|
of the index used on scan, to make it avoid checking if we
|
|
update the column of the index. That is why we assert below
|
|
that key_used_on_scan is the undefined value MAX_KEY.
|
|
The column is the row id in the automatical generation case,
|
|
and it will never be updated anyway. */
|
|
|
|
if (key_used_on_scan != MAX_KEY) {
|
|
table_name_t table_name;
|
|
table_name.m_name = const_cast<char*>(name);
|
|
ib::warn() << "Table " << table_name
|
|
<< " key_used_on_scan is "
|
|
<< key_used_on_scan << " even though there is"
|
|
" no primary key inside InnoDB.";
|
|
}
|
|
}
|
|
|
|
/* Index block size in InnoDB: used by MySQL in query optimization. */
|
|
stats.block_size = UNIV_PAGE_SIZE;
|
|
|
|
if (m_prebuilt->table != NULL) {
|
|
/* We update the highest file format in the system table
|
|
space, if this table has higher file format setting. */
|
|
|
|
trx_sys_file_format_max_upgrade(
|
|
(const char**) &innobase_file_format_max,
|
|
dict_table_get_format(m_prebuilt->table));
|
|
}
|
|
|
|
/* Only if the table has an AUTOINC column. */
|
|
if (m_prebuilt->table != NULL
|
|
&& !m_prebuilt->table->ibd_file_missing
|
|
&& table->found_next_number_field != NULL) {
|
|
int error;
|
|
|
|
/* Since a table can already be "open" in InnoDB's internal
|
|
data dictionary, we only init the autoinc counter once, the
|
|
first time the table is loaded,
|
|
see ha_innopart::initialize_auto_increment.
|
|
We can safely reuse the autoinc value from a previous MySQL
|
|
open. */
|
|
|
|
lock_auto_increment();
|
|
error = initialize_auto_increment(false);
|
|
unlock_auto_increment();
|
|
if (error != 0) {
|
|
close();
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
|
|
#ifdef HA_INNOPART_SUPPORTS_FULLTEXT
|
|
/* Set plugin parser for fulltext index. */
|
|
for (uint i = 0; i < table->s->keys; i++) {
|
|
if (table->key_info[i].flags & HA_USES_PARSER) {
|
|
dict_index_t* index = innobase_get_index(i);
|
|
plugin_ref parser = table->key_info[i].parser;
|
|
|
|
ut_ad(index->type & DICT_FTS);
|
|
index->parser =
|
|
static_cast<st_mysql_ftparser *>(
|
|
plugin_decl(parser)->info);
|
|
|
|
DBUG_EXECUTE_IF("fts_instrument_use_default_parser",
|
|
index->parser = &fts_default_parser;);
|
|
}
|
|
}
|
|
#endif /* HA_INNOPART_SUPPORTS_FULLTEXT */
|
|
|
|
size_t alloc_size = sizeof(*m_ins_node_parts) * m_tot_parts;
|
|
m_ins_node_parts = static_cast<ins_node_t**>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
|
|
alloc_size = sizeof(*m_upd_node_parts) * m_tot_parts;
|
|
m_upd_node_parts = static_cast<upd_node_t**>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
|
|
alloc_blob_heap_array();
|
|
|
|
alloc_size = sizeof(*m_trx_id_parts) * m_tot_parts;
|
|
m_trx_id_parts = static_cast<trx_id_t*>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
|
|
alloc_size = sizeof(*m_row_read_type_parts) * m_tot_parts;
|
|
m_row_read_type_parts = static_cast<ulint*>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
|
|
alloc_size = UT_BITS_IN_BYTES(m_tot_parts);
|
|
m_sql_stat_start_parts = static_cast<uchar*>(
|
|
ut_zalloc(alloc_size, mem_key_partitioning));
|
|
if (m_ins_node_parts == NULL
|
|
|| m_upd_node_parts == NULL
|
|
|| m_blob_heap_parts == NULL
|
|
|| m_trx_id_parts == NULL
|
|
|| m_row_read_type_parts == NULL
|
|
|| m_sql_stat_start_parts == NULL) {
|
|
close(); // Frees all the above.
|
|
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
|
|
}
|
|
info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/** Get a cloned ha_innopart handler.
|
|
@param[in] name Table name.
|
|
@param[in] mem_root MySQL mem_root to use.
|
|
@return new ha_innopart handler. */
|
|
handler*
|
|
ha_innopart::clone(
|
|
const char* name,
|
|
MEM_ROOT* mem_root)
|
|
{
|
|
ha_innopart* new_handler;
|
|
|
|
DBUG_ENTER("ha_innopart::clone");
|
|
|
|
new_handler = dynamic_cast<ha_innopart*>(handler::clone(name,
|
|
mem_root));
|
|
if (new_handler != NULL) {
|
|
ut_ad(new_handler->m_prebuilt != NULL);
|
|
|
|
new_handler->m_prebuilt->select_lock_type =
|
|
m_prebuilt->select_lock_type;
|
|
}
|
|
|
|
DBUG_RETURN(new_handler);
|
|
}
|
|
|
|
/** Clear used ins_nodes and upd_nodes. */
|
|
void ha_innopart::clear_ins_upd_nodes()
|
|
{
|
|
/* Free memory from insert nodes. */
|
|
if (m_ins_node_parts != NULL) {
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
if (m_ins_node_parts[i] != NULL) {
|
|
ins_node_t* ins = m_ins_node_parts[i];
|
|
if (ins->select != NULL) {
|
|
que_graph_free_recursive(ins->select);
|
|
ins->select = NULL;
|
|
}
|
|
|
|
if (ins->entry_sys_heap != NULL) {
|
|
mem_heap_free(ins->entry_sys_heap);
|
|
ins->entry_sys_heap = NULL;
|
|
}
|
|
m_ins_node_parts[i] = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Free memory from update nodes. */
|
|
if (m_upd_node_parts != NULL) {
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
if (m_upd_node_parts[i] != NULL) {
|
|
upd_node_t* upd = m_upd_node_parts[i];
|
|
if (upd->cascade_top) {
|
|
mem_heap_free(upd->cascade_heap);
|
|
upd->cascade_top = false;
|
|
upd->cascade_heap = NULL;
|
|
}
|
|
if (upd->in_mysql_interface) {
|
|
btr_pcur_free_for_mysql(upd->pcur);
|
|
upd->in_mysql_interface = FALSE;
|
|
}
|
|
|
|
if (upd->select != NULL) {
|
|
que_graph_free_recursive(upd->select);
|
|
upd->select = NULL;
|
|
}
|
|
if (upd->heap != NULL) {
|
|
mem_heap_free(upd->heap);
|
|
upd->heap = NULL;
|
|
}
|
|
m_upd_node_parts[i] = NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/** Closes a handle to an InnoDB table.
|
|
@return 0 */
|
|
int
|
|
ha_innopart::close()
|
|
{
|
|
DBUG_ENTER("ha_innopart::close");
|
|
|
|
ut_ad(m_pcur_parts == NULL);
|
|
ut_ad(m_clust_pcur_parts == NULL);
|
|
close_partitioning();
|
|
|
|
ut_ad(m_part_share != NULL);
|
|
if (m_part_share != NULL) {
|
|
lock_shared_ha_data();
|
|
m_part_share->close_table_parts();
|
|
unlock_shared_ha_data();
|
|
m_part_share = NULL;
|
|
}
|
|
clear_ins_upd_nodes();
|
|
free_blob_heap_array();
|
|
|
|
/* Prevent double close of m_prebuilt->table. The real one was done
|
|
done in m_part_share->close_table_parts(). */
|
|
m_prebuilt->table = NULL;
|
|
row_prebuilt_free(m_prebuilt, FALSE);
|
|
|
|
if (m_upd_buf != NULL) {
|
|
ut_ad(m_upd_buf_size != 0);
|
|
/* Allocated with my_malloc! */
|
|
my_free(m_upd_buf);
|
|
m_upd_buf = NULL;
|
|
m_upd_buf_size = 0;
|
|
}
|
|
|
|
if (m_ins_node_parts != NULL) {
|
|
ut_free(m_ins_node_parts);
|
|
m_ins_node_parts = NULL;
|
|
}
|
|
if (m_upd_node_parts != NULL) {
|
|
ut_free(m_upd_node_parts);
|
|
m_upd_node_parts = NULL;
|
|
}
|
|
if (m_trx_id_parts != NULL) {
|
|
ut_free(m_trx_id_parts);
|
|
m_trx_id_parts = NULL;
|
|
}
|
|
if (m_row_read_type_parts != NULL) {
|
|
ut_free(m_row_read_type_parts);
|
|
m_row_read_type_parts = NULL;
|
|
}
|
|
if (m_sql_stat_start_parts != NULL) {
|
|
ut_free(m_sql_stat_start_parts);
|
|
m_sql_stat_start_parts = NULL;
|
|
}
|
|
|
|
MONITOR_INC(MONITOR_TABLE_CLOSE);
|
|
|
|
/* Tell InnoDB server that there might be work for
|
|
utility threads: */
|
|
|
|
srv_active_wake_master_thread();
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/** Change active partition.
|
|
Copies needed info into m_prebuilt from the partition specific memory.
|
|
@param[in] part_id Partition to set as active. */
|
|
void
|
|
ha_innopart::set_partition(
|
|
uint part_id)
|
|
{
|
|
DBUG_ENTER("ha_innopart::set_partition");
|
|
|
|
DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
|
|
|
|
if (part_id >= m_tot_parts) {
|
|
ut_ad(0);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
if (m_pcur_parts != NULL) {
|
|
m_prebuilt->pcur = &m_pcur_parts[m_pcur_map[part_id]];
|
|
}
|
|
if (m_clust_pcur_parts != NULL) {
|
|
m_prebuilt->clust_pcur =
|
|
&m_clust_pcur_parts[m_pcur_map[part_id]];
|
|
}
|
|
m_prebuilt->ins_node = m_ins_node_parts[part_id];
|
|
m_prebuilt->upd_node = m_upd_node_parts[part_id];
|
|
|
|
/* For unordered scan and table scan, use blob_heap from first
|
|
partition as we need exactly one blob. */
|
|
m_prebuilt->blob_heap = m_blob_heap_parts[m_ordered ? part_id : 0];
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if (m_prebuilt->blob_heap != NULL) {
|
|
DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
|
|
m_prebuilt->blob_heap));
|
|
mem_heap_validate(m_prebuilt->blob_heap);
|
|
}
|
|
#endif
|
|
|
|
m_prebuilt->trx_id = m_trx_id_parts[part_id];
|
|
m_prebuilt->row_read_type = m_row_read_type_parts[part_id];
|
|
m_prebuilt->sql_stat_start = get_bit(m_sql_stat_start_parts, part_id);
|
|
m_prebuilt->table = m_part_share->get_table_part(part_id);
|
|
m_prebuilt->index = innopart_get_index(part_id, active_index);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Update active partition.
|
|
Copies needed info from m_prebuilt into the partition specific memory.
|
|
@param[in] part_id Partition to set as active. */
|
|
void
|
|
ha_innopart::update_partition(
|
|
uint part_id)
|
|
{
|
|
DBUG_ENTER("ha_innopart::update_partition");
|
|
DBUG_PRINT("ha_innopart", ("partition id: %u", part_id));
|
|
|
|
if (part_id >= m_tot_parts) {
|
|
ut_ad(0);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
m_ins_node_parts[part_id] = m_prebuilt->ins_node;
|
|
m_upd_node_parts[part_id] = m_prebuilt->upd_node;
|
|
|
|
#ifdef UNIV_DEBUG
|
|
if (m_prebuilt->blob_heap != NULL) {
|
|
DBUG_PRINT("ha_innopart", ("validating blob_heap: %p",
|
|
m_prebuilt->blob_heap));
|
|
mem_heap_validate(m_prebuilt->blob_heap);
|
|
}
|
|
#endif
|
|
|
|
/* For unordered scan and table scan, use blob_heap from first
|
|
partition as we need exactly one blob anytime. */
|
|
m_blob_heap_parts[m_ordered ? part_id : 0] = m_prebuilt->blob_heap;
|
|
|
|
m_trx_id_parts[part_id] = m_prebuilt->trx_id;
|
|
m_row_read_type_parts[part_id] = m_prebuilt->row_read_type;
|
|
if (m_prebuilt->sql_stat_start == 0) {
|
|
clear_bit(m_sql_stat_start_parts, part_id);
|
|
}
|
|
m_last_part = part_id;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Was the last returned row semi consistent read.
|
|
In an UPDATE or DELETE, if the row under the cursor was locked by
|
|
another transaction, and the engine used an optimistic read of the last
|
|
committed row value under the cursor, then the engine returns 1 from
|
|
this function. MySQL must NOT try to update this optimistic value. If
|
|
the optimistic value does not match the WHERE condition, MySQL can
|
|
decide to skip over this row. This can be used to avoid unnecessary
|
|
lock waits.
|
|
|
|
If this method returns true, it will also signal the storage
|
|
engine that the next read will be a locking re-read of the row.
|
|
@see handler.h and row0mysql.h
|
|
@return true if last read was semi consistent else false. */
|
|
bool
|
|
ha_innopart::was_semi_consistent_read()
|
|
{
|
|
return(m_row_read_type_parts[m_last_part]
|
|
== ROW_READ_DID_SEMI_CONSISTENT);
|
|
}
|
|
|
|
/** Try semi consistent read.
|
|
Tell the engine whether it should avoid unnecessary lock waits.
|
|
If yes, in an UPDATE or DELETE, if the row under the cursor was locked
|
|
by another transaction, the engine may try an optimistic read of
|
|
the last committed row value under the cursor.
|
|
@see handler.h and row0mysql.h
|
|
@param[in] yes Should semi-consistent read be used. */
|
|
void
|
|
ha_innopart::try_semi_consistent_read(
|
|
bool yes)
|
|
{
|
|
ha_innobase::try_semi_consistent_read(yes);
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
m_row_read_type_parts[i] = m_prebuilt->row_read_type;
|
|
}
|
|
}
|
|
|
|
/** Removes a lock on a row.
|
|
Removes a new lock set on a row, if it was not read optimistically.
|
|
This can be called after a row has been read in the processing of
|
|
an UPDATE or a DELETE query. @see ha_innobase::unlock_row(). */
|
|
void
|
|
ha_innopart::unlock_row()
|
|
{
|
|
ut_ad(m_last_part < m_tot_parts);
|
|
set_partition(m_last_part);
|
|
ha_innobase::unlock_row();
|
|
update_partition(m_last_part);
|
|
}
|
|
|
|
/** Write a row in partition.
|
|
Stores a row in an InnoDB database, to the table specified in this
|
|
handle.
|
|
@param[in] part_id Partition to write to.
|
|
@param[in] record A row in MySQL format.
|
|
@return 0 or error code. */
|
|
int
|
|
ha_innopart::write_row_in_part(
|
|
uint part_id,
|
|
uchar* record)
|
|
{
|
|
int error;
|
|
Field* saved_next_number_field = table->next_number_field;
|
|
DBUG_ENTER("ha_innopart::write_row_in_part");
|
|
set_partition(part_id);
|
|
|
|
/* Prevent update_auto_increment to be called
|
|
again in ha_innobase::write_row(). */
|
|
|
|
table->next_number_field = NULL;
|
|
|
|
/* TODO: try to avoid creating a new dtuple
|
|
(in row_get_prebuilt_insert_row()) for each partition).
|
|
Might be needed due to ins_node implementation. */
|
|
|
|
error = ha_innobase::write_row(record);
|
|
update_partition(part_id);
|
|
table->next_number_field = saved_next_number_field;
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Update a row in partition.
|
|
Updates a row given as a parameter to a new value.
|
|
@param[in] part_id Partition to update row in.
|
|
@param[in] old_row Old row in MySQL format.
|
|
@param[in] new_row New row in MySQL format.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::update_row_in_part(
|
|
uint part_id,
|
|
const uchar* old_row,
|
|
uchar* new_row)
|
|
{
|
|
int error;
|
|
DBUG_ENTER("ha_innopart::update_row_in_part");
|
|
|
|
set_partition(part_id);
|
|
error = ha_innobase::update_row(old_row, new_row);
|
|
update_partition(part_id);
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Deletes a row in partition.
|
|
@param[in] part_id Partition to delete from.
|
|
@param[in] record Row to delete in MySQL format.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::delete_row_in_part(
|
|
uint part_id,
|
|
const uchar* record)
|
|
{
|
|
int error;
|
|
DBUG_ENTER("ha_innopart::delete_row_in_part");
|
|
m_err_rec = NULL;
|
|
|
|
m_last_part = part_id;
|
|
set_partition(part_id);
|
|
error = ha_innobase::delete_row(record);
|
|
update_partition(part_id);
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Initializes a handle to use an index.
|
|
@param[in] keynr Key (index) number.
|
|
@param[in] sorted True if result MUST be sorted according to index.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::index_init(
|
|
uint keynr,
|
|
bool sorted)
|
|
{
|
|
int error;
|
|
uint part_id = m_part_info->get_first_used_partition();
|
|
DBUG_ENTER("ha_innopart::index_init");
|
|
|
|
active_index = keynr;
|
|
if (part_id == MY_BIT_NONE) {
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
error = ph_index_init_setup(keynr, sorted);
|
|
if (error != 0) {
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
if (sorted) {
|
|
error = init_record_priority_queue();
|
|
if (error != 0) {
|
|
/* Needs cleanup in case it returns error. */
|
|
destroy_record_priority_queue();
|
|
DBUG_RETURN(error);
|
|
}
|
|
/* Disable prefetch.
|
|
The prefetch buffer is not partitioning aware, so it may return
|
|
rows from a different partition if either the prefetch buffer is
|
|
full, or it is non-empty and the partition is exhausted. */
|
|
m_prebuilt->m_no_prefetch = true;
|
|
}
|
|
|
|
/* For scan across partitions, the keys needs to be materialized */
|
|
m_prebuilt->m_read_virtual_key = true;
|
|
|
|
error = change_active_index(part_id, keynr);
|
|
if (error != 0) {
|
|
destroy_record_priority_queue();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
DBUG_EXECUTE_IF("partition_fail_index_init", {
|
|
destroy_record_priority_queue();
|
|
DBUG_RETURN(HA_ERR_NO_PARTITION_FOUND);
|
|
});
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/** End index cursor.
|
|
@return 0 or error code. */
|
|
int
|
|
ha_innopart::index_end()
|
|
{
|
|
uint part_id = m_part_info->get_first_used_partition();
|
|
DBUG_ENTER("ha_innopart::index_end");
|
|
|
|
if (part_id == MY_BIT_NONE) {
|
|
/* Never initialized any index. */
|
|
active_index = MAX_KEY;
|
|
DBUG_RETURN(0);
|
|
}
|
|
if (m_ordered) {
|
|
destroy_record_priority_queue();
|
|
m_prebuilt->m_no_prefetch = false;
|
|
}
|
|
m_prebuilt->m_read_virtual_key = false;
|
|
|
|
DBUG_RETURN(ha_innobase::index_end());
|
|
}
|
|
|
|
/* Partitioning support functions. */
|
|
|
|
/** Setup the ordered record buffer and the priority queue.
|
|
@param[in] used_parts Number of used partitions in query.
|
|
@return false for success else true. */
|
|
int
|
|
ha_innopart::init_record_priority_queue_for_parts(
|
|
uint used_parts)
|
|
{
|
|
size_t alloc_size;
|
|
void* buf;
|
|
|
|
DBUG_ENTER("ha_innopart::init_record_priority_queue_for_parts");
|
|
ut_ad(used_parts >= 1);
|
|
/* TODO: Don't use this if only one partition is used! */
|
|
//ut_ad(used_parts > 1);
|
|
|
|
/* We could reuse current m_prebuilt->pcur/clust_pcur for the first
|
|
used partition, but it would complicate and affect performance,
|
|
so we trade some extra memory instead. */
|
|
|
|
m_pcur = m_prebuilt->pcur;
|
|
m_clust_pcur = m_prebuilt->clust_pcur;
|
|
|
|
/* If we searching for secondary key or doing a write/update
|
|
we will need two pcur, one for the active (secondary) index and
|
|
one for the clustered index. */
|
|
|
|
bool need_clust_index =
|
|
m_curr_key_info[1] != NULL
|
|
|| get_lock_type() != F_RDLCK;
|
|
|
|
/* pcur and clust_pcur per partition.
|
|
By using zalloc, we do not need to initialize the pcur's! */
|
|
|
|
alloc_size = used_parts * sizeof(btr_pcur_t);
|
|
if (need_clust_index) {
|
|
alloc_size *= 2;
|
|
}
|
|
buf = ut_zalloc(alloc_size, mem_key_partitioning);
|
|
if (buf == NULL) {
|
|
DBUG_RETURN(true);
|
|
}
|
|
m_pcur_parts = static_cast<btr_pcur_t*>(buf);
|
|
if (need_clust_index) {
|
|
m_clust_pcur_parts = &m_pcur_parts[used_parts];
|
|
}
|
|
/* mapping from part_id to pcur. */
|
|
alloc_size = m_tot_parts * sizeof(*m_pcur_map);
|
|
buf = ut_zalloc(alloc_size, mem_key_partitioning);
|
|
if (buf == NULL) {
|
|
DBUG_RETURN(true);
|
|
}
|
|
m_pcur_map = static_cast<uint16_t*>(buf);
|
|
{
|
|
uint16_t pcur_count = 0;
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
m_pcur_map[i] = pcur_count++;
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
/** Destroy the ordered record buffer and the priority queue. */
|
|
inline
|
|
void
|
|
ha_innopart::destroy_record_priority_queue_for_parts()
|
|
{
|
|
DBUG_ENTER("ha_innopart::destroy_record_priority_queue");
|
|
if (m_pcur_parts != NULL) {
|
|
uint used_parts;
|
|
used_parts = bitmap_bits_set(&m_part_info->read_partitions);
|
|
for (uint i = 0; i < used_parts; i++) {
|
|
btr_pcur_free(&m_pcur_parts[i]);
|
|
if (m_clust_pcur_parts != NULL) {
|
|
btr_pcur_free(&m_clust_pcur_parts[i]);
|
|
}
|
|
}
|
|
ut_free(m_pcur_parts);
|
|
m_clust_pcur_parts = NULL;
|
|
m_pcur_parts = NULL;
|
|
/* Reset the original m_prebuilt->pcur. */
|
|
m_prebuilt->pcur = m_pcur;
|
|
m_prebuilt->clust_pcur = m_clust_pcur;
|
|
}
|
|
if (m_pcur_map != NULL) {
|
|
ut_free(m_pcur_map);
|
|
m_pcur_map = NULL;
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Print error information.
|
|
@param[in] error Error code (MySQL).
|
|
@param[in] errflag Flags. */
|
|
void
|
|
ha_innopart::print_error(
|
|
int error,
|
|
myf errflag)
|
|
{
|
|
DBUG_ENTER("ha_innopart::print_error");
|
|
if (print_partition_error(error, errflag)) {
|
|
ha_innobase::print_error(error, errflag);
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Can error be ignored.
|
|
@param[in] error Error code to check.
|
|
@return true if ignorable else false. */
|
|
bool
|
|
ha_innopart::is_ignorable_error(
|
|
int error)
|
|
{
|
|
if (ha_innobase::is_ignorable_error(error)
|
|
|| error == HA_ERR_NO_PARTITION_FOUND
|
|
|| error == HA_ERR_NOT_IN_LOCK_PARTITIONS) {
|
|
|
|
return(true);
|
|
}
|
|
return(false);
|
|
}
|
|
|
|
/** Get the index for the current partition
|
|
@param[in] keynr MySQL index number.
|
|
@return InnoDB index or NULL. */
|
|
inline
|
|
dict_index_t*
|
|
ha_innopart::innobase_get_index(
|
|
uint keynr)
|
|
{
|
|
uint part_id = m_last_part;
|
|
if (part_id >= m_tot_parts) {
|
|
ut_ad(0);
|
|
part_id = 0;
|
|
}
|
|
return(innopart_get_index(part_id, keynr));
|
|
}
|
|
|
|
/** Get the index for a handle.
|
|
Does not change active index.
|
|
@param[in] keynr Use this index; MAX_KEY means always clustered index,
|
|
even if it was internally generated by InnoDB.
|
|
@param[in] part_id From this partition.
|
|
@return NULL or index instance. */
|
|
inline
|
|
dict_index_t*
|
|
ha_innopart::innopart_get_index(
|
|
uint part_id,
|
|
uint keynr)
|
|
{
|
|
KEY* key = NULL;
|
|
dict_index_t* index = NULL;
|
|
|
|
DBUG_ENTER("innopart_get_index");
|
|
|
|
if (keynr != MAX_KEY && table->s->keys > 0) {
|
|
key = table->key_info + keynr;
|
|
|
|
index = m_part_share->get_index(part_id, keynr);
|
|
|
|
if (index != NULL) {
|
|
ut_a(ut_strcmp(index->name, key->name) == 0);
|
|
} else {
|
|
/* Can't find index with keynr in the translation
|
|
table. Only print message if the index translation
|
|
table exists. */
|
|
|
|
ib::warn() << "InnoDB could not find index "
|
|
<< (key ? key->name : "NULL")
|
|
<< " key no " << keynr << " for table "
|
|
<< m_prebuilt->table->name
|
|
<< " through its index translation table";
|
|
|
|
index = dict_table_get_index_on_name(m_prebuilt->table,
|
|
key->name);
|
|
}
|
|
} else {
|
|
/* Get the generated index. */
|
|
ut_ad(keynr == MAX_KEY);
|
|
index = dict_table_get_first_index(
|
|
m_part_share->get_table_part(part_id));
|
|
}
|
|
|
|
if (index == NULL) {
|
|
ib::error() << "InnoDB could not find key n:o "
|
|
<< keynr << " with name " << (key ? key->name : "NULL")
|
|
<< " from dict cache for table "
|
|
<< m_prebuilt->table->name << " partition n:o "
|
|
<< part_id;
|
|
}
|
|
|
|
DBUG_RETURN(index);
|
|
}
|
|
|
|
/** Changes the active index of a handle.
|
|
@param[in] part_id Use this partition.
|
|
@param[in] keynr Use this index; MAX_KEY means always clustered index,
|
|
even if it was internally generated by InnoDB.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::change_active_index(
|
|
uint part_id,
|
|
uint keynr)
|
|
{
|
|
DBUG_ENTER("ha_innopart::change_active_index");
|
|
|
|
ut_ad(m_user_thd == ha_thd());
|
|
ut_a(m_prebuilt->trx == thd_to_trx(m_user_thd));
|
|
|
|
active_index = keynr;
|
|
set_partition(part_id);
|
|
|
|
if (UNIV_UNLIKELY(m_prebuilt->index == NULL)) {
|
|
ib::warn() << "change_active_index(" << part_id
|
|
<< "," << keynr << ") failed";
|
|
m_prebuilt->index_usable = FALSE;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
m_prebuilt->index_usable = row_merge_is_index_usable(m_prebuilt->trx,
|
|
m_prebuilt->index);
|
|
|
|
if (UNIV_UNLIKELY(!m_prebuilt->index_usable)) {
|
|
if (dict_index_is_corrupted(m_prebuilt->index)) {
|
|
char table_name[MAX_FULL_NAME_LEN + 1];
|
|
|
|
innobase_format_name(
|
|
table_name, sizeof table_name,
|
|
m_prebuilt->index->table->name.m_name);
|
|
|
|
push_warning_printf(
|
|
m_user_thd, Sql_condition::SL_WARNING,
|
|
HA_ERR_INDEX_CORRUPT,
|
|
"InnoDB: Index %s for table %s is"
|
|
" marked as corrupted"
|
|
" (partition %u)",
|
|
m_prebuilt->index->name(), table_name, part_id);
|
|
DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
|
|
} else {
|
|
push_warning_printf(
|
|
m_user_thd, Sql_condition::SL_WARNING,
|
|
HA_ERR_TABLE_DEF_CHANGED,
|
|
"InnoDB: insufficient history for index %u",
|
|
keynr);
|
|
}
|
|
|
|
/* The caller seems to ignore this. Thus, we must check
|
|
this again in row_search_for_mysql(). */
|
|
|
|
DBUG_RETURN(HA_ERR_TABLE_DEF_CHANGED);
|
|
}
|
|
|
|
ut_a(m_prebuilt->search_tuple != NULL);
|
|
|
|
/* If too expensive, cache the keynr and only update search_tuple when
|
|
keynr changes. Remember that the clustered index is also used for
|
|
MAX_KEY. */
|
|
dtuple_set_n_fields(m_prebuilt->search_tuple,
|
|
m_prebuilt->index->n_fields);
|
|
|
|
dict_index_copy_types(m_prebuilt->search_tuple, m_prebuilt->index,
|
|
m_prebuilt->index->n_fields);
|
|
|
|
/* MySQL changes the active index for a handle also during some
|
|
queries, for example SELECT MAX(a), SUM(a) first retrieves the
|
|
MAX() and then calculates the sum. Previously we played safe
|
|
and used the flag ROW_MYSQL_WHOLE_ROW below, but that caused
|
|
unnecessary copying. Starting from MySQL-4.1 we use a more
|
|
efficient flag here. */
|
|
|
|
/* TODO: Is this really needed?
|
|
Will it not be built in index_read? */
|
|
|
|
build_template(false);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/** Return first record in index from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record First record in index in the partition.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_first_in_part(
|
|
uint part,
|
|
uchar* record)
|
|
{
|
|
int error;
|
|
DBUG_ENTER("ha_innopart::index_first_in_part");
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_first(record);
|
|
update_partition(part);
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Return next record in index from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record Last record in index in the partition.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_next_in_part(
|
|
uint part,
|
|
uchar* record)
|
|
{
|
|
DBUG_ENTER("ha_innopart::index_next_in_part");
|
|
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_next(record);
|
|
update_partition(part);
|
|
|
|
ut_ad(m_ordered_scan_ongoing
|
|
|| m_ordered_rec_buffer == NULL
|
|
|| m_prebuilt->used_in_HANDLER
|
|
|| m_part_spec.start_part >= m_part_spec.end_part);
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Return next same record in index from a partition.
|
|
This routine is used to read the next record, but only if the key is
|
|
the same as supplied in the call.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record Last record in index in the partition.
|
|
@param[in] key Key to match.
|
|
@param[in] length Length of key.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_next_same_in_part(
|
|
uint part,
|
|
uchar* record,
|
|
const uchar* key,
|
|
uint length)
|
|
{
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_next_same(record, key, length);
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Return last record in index from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record Last record in index in the partition.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_last_in_part(
|
|
uint part,
|
|
uchar* record)
|
|
{
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_last(record);
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Return previous record in index from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record Last record in index in the partition.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_prev_in_part(
|
|
uint part,
|
|
uchar* record)
|
|
{
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_prev(record);
|
|
update_partition(part);
|
|
|
|
ut_ad(m_ordered_scan_ongoing
|
|
|| m_ordered_rec_buffer == NULL
|
|
|| m_prebuilt->used_in_HANDLER
|
|
|| m_part_spec.start_part >= m_part_spec.end_part);
|
|
|
|
return(error);
|
|
}
|
|
|
|
/** Start index scan and return first record from a partition.
|
|
This routine starts an index scan using a start key. The calling
|
|
function will check the end key on its own.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record First matching record in index in the partition.
|
|
@param[in] key Key to match.
|
|
@param[in] keypart_map Which part of the key to use.
|
|
@param[in] find_flag Key condition/direction to use.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_read_map_in_part(
|
|
uint part,
|
|
uchar* record,
|
|
const uchar* key,
|
|
key_part_map keypart_map,
|
|
enum ha_rkey_function find_flag)
|
|
{
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_read_map(
|
|
record,
|
|
key,
|
|
keypart_map,
|
|
find_flag);
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Start index scan and return first record from a partition.
|
|
This routine starts an index scan using a start key. The calling
|
|
function will check the end key on its own.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record First matching record in index in the partition.
|
|
@param[in] index Index to read from.
|
|
@param[in] key Key to match.
|
|
@param[in] keypart_map Which part of the key to use.
|
|
@param[in] find_flag Key condition/direction to use.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_read_idx_map_in_part(
|
|
uint part,
|
|
uchar* record,
|
|
uint index,
|
|
const uchar* key,
|
|
key_part_map keypart_map,
|
|
enum ha_rkey_function find_flag)
|
|
{
|
|
int error;
|
|
|
|
set_partition(part);
|
|
error = ha_innobase::index_read_idx_map(
|
|
record,
|
|
index,
|
|
key,
|
|
keypart_map,
|
|
find_flag);
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Return last matching record in index from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[out] record Last matching record in index in the partition.
|
|
@param[in] key Key to match.
|
|
@param[in] keypart_map Which part of the key to use.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::index_read_last_map_in_part(
|
|
uint part,
|
|
uchar* record,
|
|
const uchar* key,
|
|
key_part_map keypart_map)
|
|
{
|
|
int error;
|
|
set_partition(part);
|
|
error = ha_innobase::index_read_last_map(record, key, keypart_map);
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Start index scan and return first record from a partition.
|
|
This routine starts an index scan using a start and end key.
|
|
@param[in] part Partition to read from.
|
|
@param[in,out] record First matching record in index in the partition,
|
|
if NULL use table->record[0] as return buffer.
|
|
@param[in] start_key Start key to match.
|
|
@param[in] end_key End key to match.
|
|
@param[in] eq_range Is equal range, start_key == end_key.
|
|
@param[in] sorted Return rows in sorted order.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::read_range_first_in_part(
|
|
uint part,
|
|
uchar* record,
|
|
const key_range* start_key,
|
|
const key_range* end_key,
|
|
bool eq_range,
|
|
bool sorted)
|
|
{
|
|
int error;
|
|
uchar* read_record = record;
|
|
set_partition(part);
|
|
if (read_record == NULL) {
|
|
read_record = table->record[0];
|
|
}
|
|
if (m_start_key.key != NULL) {
|
|
error = ha_innobase::index_read(
|
|
read_record,
|
|
m_start_key.key,
|
|
m_start_key.length,
|
|
m_start_key.flag);
|
|
} else {
|
|
error = ha_innobase::index_first(read_record);
|
|
}
|
|
if (error == HA_ERR_KEY_NOT_FOUND) {
|
|
error = HA_ERR_END_OF_FILE;
|
|
} else if (error == 0 && !in_range_check_pushed_down) {
|
|
/* compare_key uses table->record[0], so we
|
|
need to copy the data if not already there. */
|
|
|
|
if (record != NULL) {
|
|
copy_cached_row(table->record[0], read_record);
|
|
}
|
|
if (compare_key(end_range) > 0) {
|
|
/* must use ha_innobase:: due to set/update_partition
|
|
could overwrite states if ha_innopart::unlock_row()
|
|
was used. */
|
|
ha_innobase::unlock_row();
|
|
error = HA_ERR_END_OF_FILE;
|
|
}
|
|
}
|
|
update_partition(part);
|
|
return(error);
|
|
}
|
|
|
|
/** Return next record in index range scan from a partition.
|
|
@param[in] part Partition to read from.
|
|
@param[in,out] record First matching record in index in the partition,
|
|
if NULL use table->record[0] as return buffer.
|
|
@return error number or 0. */
|
|
int
|
|
ha_innopart::read_range_next_in_part(
|
|
uint part,
|
|
uchar* record)
|
|
{
|
|
int error;
|
|
uchar* read_record = record;
|
|
|
|
set_partition(part);
|
|
if (read_record == NULL) {
|
|
read_record = table->record[0];
|
|
}
|
|
|
|
/* TODO: Implement ha_innobase::read_range*?
|
|
So it will return HA_ERR_END_OF_FILE or
|
|
HA_ERR_KEY_NOT_FOUND when passing end_range. */
|
|
|
|
error = ha_innobase::index_next(read_record);
|
|
if (error == 0 && !in_range_check_pushed_down) {
|
|
/* compare_key uses table->record[0], so we
|
|
need to copy the data if not already there. */
|
|
|
|
if (record != NULL) {
|
|
copy_cached_row(table->record[0], read_record);
|
|
}
|
|
if (compare_key(end_range) > 0) {
|
|
/* must use ha_innobase:: due to set/update_partition
|
|
could overwrite states if ha_innopart::unlock_row()
|
|
was used. */
|
|
ha_innobase::unlock_row();
|
|
error = HA_ERR_END_OF_FILE;
|
|
}
|
|
}
|
|
update_partition(part);
|
|
|
|
return(error);
|
|
}
|
|
|
|
/** Initialize a table scan in a specific partition.
|
|
@param[in] part_id Partition to initialize.
|
|
@param[in] scan True if table/index scan false otherwise (for rnd_pos)
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::rnd_init_in_part(
|
|
uint part_id,
|
|
bool scan)
|
|
{
|
|
int err;
|
|
|
|
if (m_prebuilt->clust_index_was_generated) {
|
|
err = change_active_index(part_id, MAX_KEY);
|
|
} else {
|
|
err = change_active_index(part_id, m_primary_key);
|
|
}
|
|
|
|
m_start_of_scan = 1;
|
|
|
|
/* Don't use semi-consistent read in random row reads (by position).
|
|
This means we must disable semi_consistent_read if scan is false. */
|
|
|
|
if (!scan) {
|
|
try_semi_consistent_read(false);
|
|
}
|
|
|
|
return(err);
|
|
}
|
|
|
|
/** Ends a table scan.
|
|
@param[in] part_id Partition to end table scan in.
|
|
@param[in] scan True for scan else random access.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::rnd_end_in_part(
|
|
uint part_id,
|
|
bool scan)
|
|
{
|
|
return(index_end());
|
|
}
|
|
|
|
/** Read next row in partition.
|
|
Reads the next row in a table scan (also used to read the FIRST row
|
|
in a table scan).
|
|
@param[in] part_id Partition to end table scan in.
|
|
@param[out] buf Returns the row in this buffer, in MySQL format.
|
|
@return 0, HA_ERR_END_OF_FILE or error number. */
|
|
int
|
|
ha_innopart::rnd_next_in_part(
|
|
uint part_id,
|
|
uchar* buf)
|
|
{
|
|
int error;
|
|
|
|
DBUG_ENTER("ha_innopart::rnd_next_in_part");
|
|
|
|
set_partition(part_id);
|
|
if (m_start_of_scan) {
|
|
error = ha_innobase::index_first(buf);
|
|
|
|
if (error == HA_ERR_KEY_NOT_FOUND) {
|
|
error = HA_ERR_END_OF_FILE;
|
|
}
|
|
m_start_of_scan = 0;
|
|
} else {
|
|
ha_statistic_increment(&SSV::ha_read_rnd_next_count);
|
|
error = ha_innobase::general_fetch(buf, ROW_SEL_NEXT, 0);
|
|
}
|
|
|
|
update_partition(part_id);
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Get a row from a position.
|
|
Fetches a row from the table based on a row reference.
|
|
@param[out] buf Returns the row in this buffer, in MySQL format.
|
|
@param[in] pos Position, given as primary key value or DB_ROW_ID
|
|
(if no primary key) of the row in MySQL format. The length of data in pos has
|
|
to be ref_length.
|
|
@return 0, HA_ERR_KEY_NOT_FOUND or error code. */
|
|
int
|
|
ha_innopart::rnd_pos(
|
|
uchar* buf,
|
|
uchar* pos)
|
|
{
|
|
int error;
|
|
uint part_id;
|
|
DBUG_ENTER("ha_innopart::rnd_pos");
|
|
ut_ad(PARTITION_BYTES_IN_POS == 2);
|
|
DBUG_DUMP("pos", pos, ref_length);
|
|
|
|
ha_statistic_increment(&SSV::ha_read_rnd_count);
|
|
|
|
ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
|
|
|
|
/* Restore used partition. */
|
|
part_id = uint2korr(pos);
|
|
|
|
set_partition(part_id);
|
|
|
|
/* Note that we assume the length of the row reference is fixed
|
|
for the table, and it is == ref_length. */
|
|
|
|
error = ha_innobase::index_read(buf, pos + PARTITION_BYTES_IN_POS,
|
|
ref_length - PARTITION_BYTES_IN_POS,
|
|
HA_READ_KEY_EXACT);
|
|
DBUG_PRINT("info", ("part %u index_read returned %d", part_id, error));
|
|
DBUG_DUMP("buf", buf, table_share->reclength);
|
|
|
|
update_partition(part_id);
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Return position for cursor in last used partition.
|
|
Stores a reference to the current row to 'ref' field of the handle. Note
|
|
that in the case where we have generated the clustered index for the
|
|
table, the function parameter is illogical: we MUST ASSUME that 'record'
|
|
is the current 'position' of the handle, because if row ref is actually
|
|
the row id internally generated in InnoDB, then 'record' does not contain
|
|
it. We just guess that the row id must be for the record where the handle
|
|
was positioned the last time.
|
|
@param[out] ref_arg Pointer to buffer where to write the position.
|
|
@param[in] record Record to position for. */
|
|
void
|
|
ha_innopart::position_in_last_part(
|
|
uchar* ref_arg,
|
|
const uchar* record)
|
|
{
|
|
if (m_prebuilt->clust_index_was_generated) {
|
|
/* No primary key was defined for the table and we
|
|
generated the clustered index from row id: the
|
|
row reference will be the row id, not any key value
|
|
that MySQL knows of. */
|
|
|
|
memcpy(ref_arg, m_prebuilt->row_id, DATA_ROW_ID_LEN);
|
|
} else {
|
|
|
|
/* Copy primary key as the row reference */
|
|
KEY* key_info = table->key_info + m_primary_key;
|
|
key_copy(ref_arg, (uchar*)record, key_info,
|
|
key_info->key_length);
|
|
}
|
|
}
|
|
|
|
/** Fill in data_dir_path and tablespace name from internal data
|
|
dictionary.
|
|
@param part_elem Partition element to fill.
|
|
@param ib_table InnoDB table to copy from. */
|
|
void
|
|
ha_innopart::update_part_elem(
|
|
partition_element* part_elem,
|
|
dict_table_t* ib_table)
|
|
{
|
|
dict_get_and_save_data_dir_path(ib_table, false);
|
|
if (ib_table->data_dir_path != NULL) {
|
|
if (part_elem->data_file_name == NULL
|
|
|| strcmp(ib_table->data_dir_path,
|
|
part_elem->data_file_name) != 0) {
|
|
|
|
/* Play safe and allocate memory from TABLE and copy
|
|
instead of expose the internal data dictionary. */
|
|
part_elem->data_file_name =
|
|
strdup_root(&table->mem_root,
|
|
ib_table->data_dir_path);
|
|
}
|
|
} else {
|
|
part_elem->data_file_name = NULL;
|
|
}
|
|
|
|
part_elem->index_file_name = NULL;
|
|
}
|
|
|
|
/** Update create_info.
|
|
Used in SHOW CREATE TABLE et al.
|
|
@param[in,out] create_info Create info to update. */
|
|
void
|
|
ha_innopart::update_create_info(
|
|
HA_CREATE_INFO* create_info)
|
|
{
|
|
uint num_subparts = m_part_info->num_subparts;
|
|
uint num_parts;
|
|
uint part;
|
|
dict_table_t* table;
|
|
List_iterator<partition_element>
|
|
part_it(m_part_info->partitions);
|
|
partition_element* part_elem;
|
|
partition_element* sub_elem;
|
|
DBUG_ENTER("ha_innopart::update_create_info");
|
|
if ((create_info->used_fields & HA_CREATE_USED_AUTO) == 0) {
|
|
info(HA_STATUS_AUTO);
|
|
create_info->auto_increment_value = stats.auto_increment_value;
|
|
}
|
|
|
|
num_parts = (num_subparts != 0) ? m_tot_parts / num_subparts : m_tot_parts;
|
|
|
|
/* DATA/INDEX DIRECTORY are never applied to the whole partitioned
|
|
table, only to its parts. */
|
|
|
|
create_info->data_file_name = NULL;
|
|
create_info->index_file_name = NULL;
|
|
|
|
/* Since update_create_info() can be called from
|
|
mysql_prepare_alter_table() when not all partitions are set up,
|
|
we look for that condition first.
|
|
If all partitions are not available then simply return,
|
|
since it does not need any updated partitioning info. */
|
|
|
|
if (!m_part_info->temp_partitions.is_empty()) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
part = 0;
|
|
while ((part_elem = part_it++)) {
|
|
if (part >= num_parts) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
if (m_part_info->is_sub_partitioned()) {
|
|
List_iterator<partition_element>
|
|
subpart_it(part_elem->subpartitions);
|
|
uint subpart = 0;
|
|
while ((sub_elem = subpart_it++)) {
|
|
if (subpart >= num_subparts) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
subpart++;
|
|
}
|
|
if (subpart != num_subparts) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
}
|
|
part++;
|
|
}
|
|
if (part != num_parts) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/* part_elem->data_file_name should be correct from
|
|
the .frm, but may have been changed, so update from SYS_DATAFILES.
|
|
index_file_name is ignored, so remove it. */
|
|
|
|
part = 0;
|
|
part_it.rewind();
|
|
while ((part_elem = part_it++)) {
|
|
if (m_part_info->is_sub_partitioned()) {
|
|
List_iterator<partition_element>
|
|
subpart_it(part_elem->subpartitions);
|
|
while ((sub_elem = subpart_it++)) {
|
|
table = m_part_share->get_table_part(part++);
|
|
update_part_elem(sub_elem, table);
|
|
}
|
|
} else {
|
|
table = m_part_share->get_table_part(part++);
|
|
update_part_elem(part_elem, table);
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Set create_info->data_file_name.
|
|
@param[in] part_elem Partition to copy from.
|
|
@param[in,out] info Create info to set. */
|
|
static
|
|
void
|
|
set_create_info_dir(
|
|
partition_element* part_elem,
|
|
HA_CREATE_INFO* info)
|
|
{
|
|
if (part_elem->data_file_name != NULL
|
|
&& part_elem->data_file_name[0] != '\0') {
|
|
info->data_file_name = part_elem->data_file_name;
|
|
}
|
|
if (part_elem->index_file_name != NULL
|
|
&& part_elem->index_file_name[0] != '\0') {
|
|
info->index_file_name = part_elem->index_file_name;
|
|
}
|
|
}
|
|
|
|
/** Set flags and append '/' to remote path if necessary. */
|
|
void
|
|
create_table_info_t::set_remote_path_flags()
|
|
{
|
|
if (m_remote_path[0] != '\0') {
|
|
ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) != 0);
|
|
|
|
/* os_file_make_remote_pathname will truncate
|
|
everything after the last '/', so append '/'
|
|
if it is not the last character. */
|
|
|
|
size_t len = strlen(m_remote_path);
|
|
if (m_remote_path[len - 1] != OS_PATH_SEPARATOR) {
|
|
m_remote_path[len] = OS_PATH_SEPARATOR;
|
|
m_remote_path[len + 1] = '\0';
|
|
}
|
|
} else {
|
|
ut_ad(DICT_TF_HAS_DATA_DIR(m_flags) == 0);
|
|
}
|
|
}
|
|
|
|
/** Creates a new table to an InnoDB database.
|
|
@param[in] name Table name (in filesystem charset).
|
|
@param[in] form MySQL Table containing information of
|
|
partitions, columns and indexes etc.
|
|
@param[in] create_info Additional create information, like
|
|
create statement string.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::create(
|
|
const char* name,
|
|
TABLE* form,
|
|
HA_CREATE_INFO* create_info)
|
|
{
|
|
int error;
|
|
/** {database}/{tablename} */
|
|
char table_name[FN_REFLEN];
|
|
/** absolute path of table */
|
|
char remote_path[FN_REFLEN];
|
|
char partition_name[FN_REFLEN];
|
|
char* table_name_end;
|
|
size_t table_name_len;
|
|
char* partition_name_start;
|
|
char table_data_file_name[FN_REFLEN];
|
|
const char* index_file_name;
|
|
size_t len;
|
|
|
|
create_table_info_t info(ha_thd(),
|
|
form,
|
|
create_info,
|
|
table_name,
|
|
remote_path);
|
|
|
|
DBUG_ENTER("ha_innopart::create");
|
|
ut_ad(create_info != NULL);
|
|
ut_ad(m_part_info == form->part_info);
|
|
ut_ad(table_share != NULL);
|
|
|
|
/* Not allowed to create temporary partitioned tables. */
|
|
if (create_info != NULL
|
|
&& (create_info->options & HA_LEX_CREATE_TMP_TABLE) != 0) {
|
|
my_error(ER_PARTITION_NO_TEMPORARY, MYF(0));
|
|
ut_ad(0); // Can we support partitioned temporary tables?
|
|
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
|
|
}
|
|
|
|
error = info.initialize();
|
|
if (error != 0) {
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/* Setup and check table level options. */
|
|
error = info.prepare_create_table(name);
|
|
if (error != 0) {
|
|
DBUG_RETURN(error);
|
|
}
|
|
strcpy(partition_name, table_name);
|
|
partition_name_start = partition_name + strlen(partition_name);
|
|
table_name_len = strlen(table_name);
|
|
table_name_end = table_name + table_name_len;
|
|
if (create_info->data_file_name != NULL) {
|
|
/* Strip the tablename from the path. */
|
|
strncpy(table_data_file_name, create_info->data_file_name,
|
|
FN_REFLEN-1);
|
|
table_data_file_name[FN_REFLEN - 1] = '\0';
|
|
char* ptr = strrchr(table_data_file_name, OS_PATH_SEPARATOR);
|
|
ut_ad(ptr != NULL);
|
|
if (ptr != NULL) {
|
|
ptr++;
|
|
*ptr = '\0';
|
|
create_info->data_file_name = table_data_file_name;
|
|
}
|
|
} else {
|
|
table_data_file_name[0] = '\0';
|
|
}
|
|
index_file_name = create_info->index_file_name;
|
|
|
|
info.allocate_trx();
|
|
|
|
/* Latch the InnoDB data dictionary exclusively so that no deadlocks
|
|
or lock waits can happen in it during a table create operation.
|
|
Drop table etc. do this latching in row0mysql.cc. */
|
|
|
|
row_mysql_lock_data_dictionary(info.trx());
|
|
|
|
/* TODO: use the new DD tables instead to decrease duplicate info. */
|
|
List_iterator_fast <partition_element>
|
|
part_it(form->part_info->partitions);
|
|
partition_element* part_elem;
|
|
while ((part_elem = part_it++)) {
|
|
/* Append the partition name to the table name. */
|
|
len = Ha_innopart_share::append_sep_and_name(
|
|
partition_name_start,
|
|
part_elem->partition_name,
|
|
part_sep,
|
|
FN_REFLEN - table_name_len);
|
|
if ((table_name_len + len) >= FN_REFLEN) {
|
|
ut_ad(0);
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Override table level DATA/INDEX DIRECTORY. */
|
|
set_create_info_dir(part_elem, create_info);
|
|
|
|
if (!form->part_info->is_sub_partitioned()) {
|
|
error = info.prepare_create_table(partition_name);
|
|
if (error != 0) {
|
|
goto cleanup;
|
|
}
|
|
info.set_remote_path_flags();
|
|
error = info.create_table();
|
|
if (error != 0) {
|
|
goto cleanup;
|
|
}
|
|
} else {
|
|
size_t part_name_len = strlen(partition_name_start)
|
|
+ table_name_len;
|
|
char* part_name_end = partition_name + part_name_len;
|
|
List_iterator_fast <partition_element>
|
|
sub_it(part_elem->subpartitions);
|
|
partition_element* sub_elem;
|
|
|
|
while ((sub_elem = sub_it++)) {
|
|
ut_ad(sub_elem->partition_name != NULL);
|
|
|
|
/* 'table' will be
|
|
<name>#P#<part_name>#SP#<subpart_name>.
|
|
Append the sub-partition name to
|
|
the partition name. */
|
|
|
|
len = Ha_innopart_share::append_sep_and_name(
|
|
part_name_end,
|
|
sub_elem->partition_name,
|
|
sub_sep,
|
|
FN_REFLEN - part_name_len);
|
|
if ((len + part_name_len) >= FN_REFLEN) {
|
|
ut_ad(0);
|
|
goto cleanup;
|
|
}
|
|
/* Override part level DATA/INDEX DIRECTORY. */
|
|
set_create_info_dir(sub_elem, create_info);
|
|
|
|
Ha_innopart_share::partition_name_casedn_str(
|
|
part_name_end + 4);
|
|
error = info.prepare_create_table(partition_name);
|
|
if (error != 0) {
|
|
goto cleanup;
|
|
}
|
|
info.set_remote_path_flags();
|
|
error = info.create_table();
|
|
if (error != 0) {
|
|
goto cleanup;
|
|
}
|
|
|
|
/* Reset partition level
|
|
DATA/INDEX DIRECTORY. */
|
|
|
|
create_info->data_file_name =
|
|
table_data_file_name;
|
|
create_info->index_file_name =
|
|
index_file_name;
|
|
set_create_info_dir(part_elem, create_info);
|
|
}
|
|
}
|
|
/* Reset table level DATA/INDEX DIRECTORY. */
|
|
create_info->data_file_name = table_data_file_name;
|
|
create_info->index_file_name = index_file_name;
|
|
}
|
|
|
|
innobase_commit_low(info.trx());
|
|
|
|
row_mysql_unlock_data_dictionary(info.trx());
|
|
|
|
/* Flush the log to reduce probability that the .frm files and
|
|
the InnoDB data dictionary get out-of-sync if the user runs
|
|
with innodb_flush_log_at_trx_commit = 0. */
|
|
|
|
log_buffer_flush_to_disk();
|
|
|
|
part_it.rewind();
|
|
/* No need to use these now, only table_name will be used. */
|
|
create_info->data_file_name = NULL;
|
|
create_info->index_file_name = NULL;
|
|
while ((part_elem = part_it++)) {
|
|
Ha_innopart_share::append_sep_and_name(
|
|
table_name_end,
|
|
part_elem->partition_name,
|
|
part_sep,
|
|
FN_REFLEN - table_name_len);
|
|
if (!form->part_info->is_sub_partitioned()) {
|
|
error = info.create_table_update_dict();
|
|
if (error != 0) {
|
|
ut_ad(0);
|
|
goto end;
|
|
}
|
|
} else {
|
|
size_t part_name_len = strlen(table_name_end);
|
|
char* part_name_end = table_name_end + part_name_len;
|
|
List_iterator_fast <partition_element>
|
|
sub_it(part_elem->subpartitions);
|
|
partition_element* sub_elem;
|
|
while ((sub_elem = sub_it++)) {
|
|
Ha_innopart_share::append_sep_and_name(
|
|
part_name_end,
|
|
sub_elem->partition_name,
|
|
sub_sep,
|
|
FN_REFLEN - table_name_len
|
|
- part_name_len);
|
|
error = info.create_table_update_dict();
|
|
if (error != 0) {
|
|
ut_ad(0);
|
|
goto end;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
end:
|
|
/* Tell the InnoDB server that there might be work for
|
|
utility threads: */
|
|
|
|
srv_active_wake_master_thread();
|
|
|
|
trx_free_for_mysql(info.trx());
|
|
|
|
DBUG_RETURN(error);
|
|
|
|
cleanup:
|
|
trx_rollback_for_mysql(info.trx());
|
|
|
|
row_mysql_unlock_data_dictionary(info.trx());
|
|
|
|
trx_free_for_mysql(info.trx());
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Discards or imports an InnoDB tablespace.
|
|
@param[in] discard True if discard, else import.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::discard_or_import_tablespace(
|
|
my_bool discard)
|
|
{
|
|
int error = 0;
|
|
uint i;
|
|
DBUG_ENTER("ha_innopart::discard_or_import_tablespace");
|
|
|
|
for (i= m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i= m_part_info->get_next_used_partition(i)) {
|
|
|
|
m_prebuilt->table = m_part_share->get_table_part(i);
|
|
error= ha_innobase::discard_or_import_tablespace(discard);
|
|
if (error != 0) {
|
|
break;
|
|
}
|
|
}
|
|
m_prebuilt->table = m_part_share->get_table_part(0);
|
|
|
|
/* IMPORT/DISCARD also means resetting auto_increment. Make sure
|
|
that auto_increment initialization is done after all partitions
|
|
are imported. */
|
|
if (table->found_next_number_field != NULL) {
|
|
lock_auto_increment();
|
|
m_part_share->next_auto_inc_val = 0;
|
|
m_part_share->auto_inc_initialized = false;
|
|
unlock_auto_increment();
|
|
}
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Compare key and rowid.
|
|
Helper function for sorting records in the priority queue.
|
|
a/b points to table->record[0] rows which must have the
|
|
key fields set. The bytes before a and b store the rowid.
|
|
This is used for comparing/sorting rows first according to
|
|
KEY and if same KEY, by rowid (ref).
|
|
@param[in] key_info Null terminated array of index information.
|
|
@param[in] a Pointer to record+ref in first record.
|
|
@param[in] b Pointer to record+ref in second record.
|
|
@return Return value is SIGN(first_rec - second_rec)
|
|
@retval 0 Keys are equal.
|
|
@retval -1 second_rec is greater than first_rec.
|
|
@retval +1 first_rec is greater than second_rec. */
|
|
int
|
|
ha_innopart::key_and_rowid_cmp(
|
|
KEY** key_info,
|
|
uchar *a,
|
|
uchar *b)
|
|
{
|
|
int cmp = key_rec_cmp(key_info, a, b);
|
|
if (cmp != 0) {
|
|
return(cmp);
|
|
}
|
|
|
|
/* We must compare by rowid, which is added before the record,
|
|
in the priority queue. */
|
|
|
|
return(memcmp(a - DATA_ROW_ID_LEN, b - DATA_ROW_ID_LEN,
|
|
DATA_ROW_ID_LEN));
|
|
}
|
|
|
|
/** Extra hints from MySQL.
|
|
@param[in] operation Operation hint.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::extra(
|
|
enum ha_extra_function operation)
|
|
{
|
|
if (operation == HA_EXTRA_SECONDARY_SORT_ROWID) {
|
|
/* index_init(sorted=true) must have been called! */
|
|
ut_ad(m_ordered);
|
|
ut_ad(m_ordered_rec_buffer != NULL);
|
|
/* No index_read call must have been done! */
|
|
ut_ad(m_queue->empty());
|
|
|
|
/* If not PK is set as secondary sort, do secondary sort by
|
|
rowid/ref. */
|
|
|
|
ut_ad(m_curr_key_info[1] != NULL
|
|
|| m_prebuilt->clust_index_was_generated != 0
|
|
|| m_curr_key_info[0]
|
|
== table->key_info + table->s->primary_key);
|
|
|
|
if (m_curr_key_info[1] == NULL
|
|
&& m_prebuilt->clust_index_was_generated) {
|
|
m_ref_usage = Partition_helper::REF_USED_FOR_SORT;
|
|
m_queue->m_fun = key_and_rowid_cmp;
|
|
}
|
|
return(0);
|
|
}
|
|
return(ha_innobase::extra(operation));
|
|
}
|
|
|
|
/** Delete all rows in a partition.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::truncate_partition_low()
|
|
{
|
|
return(truncate());
|
|
}
|
|
|
|
/** Deletes all rows of a partitioned InnoDB table.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::truncate()
|
|
{
|
|
dberr_t err = DB_SUCCESS;
|
|
int error;
|
|
|
|
DBUG_ENTER("ha_innopart::truncate");
|
|
|
|
if (high_level_read_only) {
|
|
DBUG_RETURN(HA_ERR_TABLE_READONLY);
|
|
}
|
|
|
|
/* TRUNCATE also means resetting auto_increment. Hence, reset
|
|
it so that it will be initialized again at the next use. */
|
|
|
|
if (table->found_next_number_field != NULL) {
|
|
lock_auto_increment();
|
|
m_part_share->next_auto_inc_val= 0;
|
|
m_part_share->auto_inc_initialized= false;
|
|
unlock_auto_increment();
|
|
}
|
|
|
|
/* Get the transaction associated with the current thd, or create one
|
|
if not yet created, and update m_prebuilt->trx. */
|
|
|
|
update_thd(ha_thd());
|
|
|
|
if (!trx_is_started(m_prebuilt->trx)) {
|
|
++m_prebuilt->trx->will_lock;
|
|
}
|
|
/* Truncate the table in InnoDB. */
|
|
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
set_partition(i);
|
|
err = row_truncate_table_for_mysql(m_prebuilt->table,
|
|
m_prebuilt->trx);
|
|
update_partition(i);
|
|
if (err != DB_SUCCESS) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
switch (err) {
|
|
|
|
case DB_TABLESPACE_DELETED:
|
|
case DB_TABLESPACE_NOT_FOUND:
|
|
ib_senderrf(
|
|
m_prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
|
|
(err == DB_TABLESPACE_DELETED ?
|
|
ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
|
|
table->s->table_name.str);
|
|
table->status = STATUS_NOT_FOUND;
|
|
error = HA_ERR_NO_SUCH_TABLE;
|
|
break;
|
|
|
|
default:
|
|
error = convert_error_code_to_mysql(
|
|
err, m_prebuilt->table->flags,
|
|
m_prebuilt->trx->mysql_thd);
|
|
table->status = STATUS_NOT_FOUND;
|
|
break;
|
|
}
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Estimates the number of index records in a range.
|
|
@param[in] keynr Index number.
|
|
@param[in] min_key Start key value (or NULL).
|
|
@param[in] max_key End key value (or NULL).
|
|
@return estimated number of rows. */
|
|
ha_rows
|
|
ha_innopart::records_in_range(
|
|
uint keynr,
|
|
key_range* min_key,
|
|
key_range* max_key)
|
|
{
|
|
KEY* key;
|
|
dict_index_t* index;
|
|
dtuple_t* range_start;
|
|
dtuple_t* range_end;
|
|
int64_t n_rows = 0;
|
|
page_cur_mode_t mode1;
|
|
page_cur_mode_t mode2;
|
|
mem_heap_t* heap;
|
|
uint part_id;
|
|
|
|
DBUG_ENTER("ha_innopart::records_in_range");
|
|
DBUG_PRINT("info", ("keynr %u min %p max %p", keynr, min_key, max_key));
|
|
|
|
ut_a(m_prebuilt->trx == thd_to_trx(ha_thd()));
|
|
|
|
m_prebuilt->trx->op_info = (char*)"estimating records in index range";
|
|
|
|
active_index = keynr;
|
|
|
|
key = table->key_info + active_index;
|
|
|
|
part_id = m_part_info->get_first_used_partition();
|
|
if (part_id == MY_BIT_NONE) {
|
|
DBUG_RETURN(0);
|
|
}
|
|
/* This also sets m_prebuilt->index! */
|
|
set_partition(part_id);
|
|
index = m_prebuilt->index;
|
|
|
|
/* Only validate the first partition, to avoid too much overhead. */
|
|
|
|
/* There exists possibility of not being able to find requested
|
|
index due to inconsistency between MySQL and InoDB dictionary info.
|
|
Necessary message should have been printed in innopart_get_index(). */
|
|
if (index == NULL
|
|
|| dict_table_is_discarded(m_prebuilt->table)
|
|
|| dict_index_is_corrupted(index)
|
|
|| !row_merge_is_index_usable(m_prebuilt->trx, index)) {
|
|
|
|
n_rows = HA_POS_ERROR;
|
|
goto func_exit;
|
|
}
|
|
|
|
heap = mem_heap_create(2 * (key->actual_key_parts * sizeof(dfield_t)
|
|
+ sizeof(dtuple_t)));
|
|
|
|
range_start = dtuple_create(heap, key->actual_key_parts);
|
|
dict_index_copy_types(range_start, index, key->actual_key_parts);
|
|
|
|
range_end = dtuple_create(heap, key->actual_key_parts);
|
|
dict_index_copy_types(range_end, index, key->actual_key_parts);
|
|
|
|
row_sel_convert_mysql_key_to_innobase(
|
|
range_start,
|
|
m_prebuilt->srch_key_val1,
|
|
m_prebuilt->srch_key_val_len,
|
|
index,
|
|
(byte*) (min_key ? min_key->key : (const uchar*) 0),
|
|
(ulint) (min_key ? min_key->length : 0),
|
|
m_prebuilt->trx);
|
|
|
|
ut_ad(min_key != NULL
|
|
? range_start->n_fields > 0
|
|
: range_start->n_fields == 0);
|
|
|
|
row_sel_convert_mysql_key_to_innobase(
|
|
range_end,
|
|
m_prebuilt->srch_key_val2,
|
|
m_prebuilt->srch_key_val_len,
|
|
index,
|
|
(byte*) (max_key != NULL ? max_key->key : (const uchar*) 0),
|
|
(ulint) (max_key != NULL ? max_key->length : 0),
|
|
m_prebuilt->trx);
|
|
|
|
ut_ad(max_key != NULL
|
|
? range_end->n_fields > 0
|
|
: range_end->n_fields == 0);
|
|
|
|
mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
|
|
HA_READ_KEY_EXACT);
|
|
mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
|
|
HA_READ_KEY_EXACT);
|
|
|
|
if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
|
|
|
|
n_rows = btr_estimate_n_rows_in_range(index, range_start,
|
|
mode1, range_end,
|
|
mode2);
|
|
DBUG_PRINT("info", ("part_id %u rows %ld", part_id,
|
|
(long int) n_rows));
|
|
for (part_id = m_part_info->get_next_used_partition(part_id);
|
|
part_id < m_tot_parts;
|
|
part_id = m_part_info->get_next_used_partition(part_id)) {
|
|
|
|
index = m_part_share->get_index(part_id, keynr);
|
|
int64_t n = btr_estimate_n_rows_in_range(index,
|
|
range_start,
|
|
mode1,
|
|
range_end,
|
|
mode2);
|
|
n_rows += n;
|
|
DBUG_PRINT("info", ("part_id %u rows %ld (%ld)",
|
|
part_id,
|
|
(long int) n,
|
|
(long int) n_rows));
|
|
}
|
|
} else {
|
|
|
|
n_rows = HA_POS_ERROR;
|
|
}
|
|
|
|
mem_heap_free(heap);
|
|
|
|
func_exit:
|
|
|
|
m_prebuilt->trx->op_info = (char*)"";
|
|
|
|
/* The MySQL optimizer seems to believe an estimate of 0 rows is
|
|
always accurate and may return the result 'Empty set' based on that.
|
|
The accuracy is not guaranteed, and even if it were, for a locking
|
|
read we should anyway perform the search to set the next-key lock.
|
|
Add 1 to the value to make sure MySQL does not make the assumption! */
|
|
|
|
if (n_rows == 0) {
|
|
n_rows = 1;
|
|
}
|
|
|
|
DBUG_RETURN((ha_rows) n_rows);
|
|
}
|
|
|
|
/** Gives an UPPER BOUND to the number of rows in a table.
|
|
This is used in filesort.cc.
|
|
@return upper bound of rows. */
|
|
ha_rows
|
|
ha_innopart::estimate_rows_upper_bound()
|
|
{
|
|
const dict_index_t* index;
|
|
ulonglong estimate = 0;
|
|
ulonglong local_data_file_length;
|
|
ulint stat_n_leaf_pages;
|
|
|
|
DBUG_ENTER("ha_innopart::estimate_rows_upper_bound");
|
|
|
|
/* We do not know if MySQL can call this function before calling
|
|
external_lock(). To be safe, update the thd of the current table
|
|
handle. */
|
|
|
|
update_thd(ha_thd());
|
|
|
|
m_prebuilt->trx->op_info = "calculating upper bound for table rows";
|
|
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
m_prebuilt->table = m_part_share->get_table_part(i);
|
|
index = dict_table_get_first_index(m_prebuilt->table);
|
|
|
|
stat_n_leaf_pages = index->stat_n_leaf_pages;
|
|
|
|
ut_a(stat_n_leaf_pages > 0);
|
|
|
|
local_data_file_length =
|
|
((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
|
|
|
|
/* Calculate a minimum length for a clustered index record
|
|
and from that an upper bound for the number of rows.
|
|
Since we only calculate new statistics in row0mysql.cc when a
|
|
table has grown by a threshold factor,
|
|
we must add a safety factor 2 in front of the formula below. */
|
|
|
|
estimate += 2 * local_data_file_length
|
|
/ dict_index_calc_min_rec_len(index);
|
|
}
|
|
|
|
m_prebuilt->trx->op_info = "";
|
|
|
|
DBUG_RETURN((ha_rows) estimate);
|
|
}
|
|
|
|
/** Time estimate for full table scan.
|
|
How many seeks it will take to read through the table. This is to be
|
|
comparable to the number returned by records_in_range so that we can
|
|
decide if we should scan the table or use keys.
|
|
@return estimated time measured in disk seeks. */
|
|
double
|
|
ha_innopart::scan_time()
|
|
{
|
|
double scan_time = 0.0;
|
|
DBUG_ENTER("ha_innopart::scan_time");
|
|
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
m_prebuilt->table = m_part_share->get_table_part(i);
|
|
scan_time += ha_innobase::scan_time();
|
|
}
|
|
DBUG_RETURN(scan_time);
|
|
}
|
|
|
|
/** Updates the statistics for one partition (table).
|
|
@param[in] table Table to update the statistics for.
|
|
@param[in] is_analyze True if called from ::analyze().
|
|
@return error code. */
|
|
static
|
|
int
|
|
update_table_stats(
|
|
dict_table_t* table,
|
|
bool is_analyze)
|
|
{
|
|
dict_stats_upd_option_t opt;
|
|
dberr_t ret;
|
|
|
|
if (dict_stats_is_persistent_enabled(table)) {
|
|
if (is_analyze) {
|
|
opt = DICT_STATS_RECALC_PERSISTENT;
|
|
} else {
|
|
/* This is e.g. 'SHOW INDEXES',
|
|
fetch the persistent stats from disk. */
|
|
opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
|
|
}
|
|
} else {
|
|
opt = DICT_STATS_RECALC_TRANSIENT;
|
|
}
|
|
|
|
ut_ad(!mutex_own(&dict_sys->mutex));
|
|
ret = dict_stats_update(table, opt);
|
|
|
|
if (ret != DB_SUCCESS) {
|
|
return(HA_ERR_GENERIC);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
/** Updates and return statistics.
|
|
Returns statistics information of the table to the MySQL interpreter,
|
|
in various fields of the handle object.
|
|
@param[in] flag Flags for what to update and return.
|
|
@param[in] is_analyze True if called from ::analyze().
|
|
@return HA_ERR_* error code or 0. */
|
|
int
|
|
ha_innopart::info_low(
|
|
uint flag,
|
|
bool is_analyze)
|
|
{
|
|
dict_table_t* ib_table;
|
|
ib_uint64_t max_rows = 0;
|
|
uint biggest_partition = 0;
|
|
int error = 0;
|
|
|
|
DBUG_ENTER("ha_innopart::info_low");
|
|
|
|
/* If we are forcing recovery at a high level, we will suppress
|
|
statistics calculation on tables, because that may crash the
|
|
server if an index is badly corrupted. */
|
|
|
|
/* We do not know if MySQL can call this function before calling
|
|
external_lock(). To be safe, update the thd of the current table
|
|
handle. */
|
|
|
|
update_thd(ha_thd());
|
|
|
|
m_prebuilt->trx->op_info = "returning various info to MySQL";
|
|
|
|
ut_ad(m_part_share->get_table_part(0)->n_ref_count > 0);
|
|
|
|
if ((flag & HA_STATUS_TIME) != 0) {
|
|
stats.update_time = 0;
|
|
|
|
if (is_analyze) {
|
|
/* Only analyze the given partitions. */
|
|
int error = set_altered_partitions();
|
|
if (error != 0) {
|
|
/* Already checked in mysql_admin_table! */
|
|
ut_ad(0);
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
if (is_analyze || innobase_stats_on_metadata) {
|
|
m_prebuilt->trx->op_info = "updating table statistics";
|
|
}
|
|
|
|
/* TODO: Only analyze the PK for all partitions,
|
|
then the secondary indexes only for the largest partition! */
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
ib_table = m_part_share->get_table_part(i);
|
|
if (is_analyze || innobase_stats_on_metadata) {
|
|
error = update_table_stats(ib_table, is_analyze);
|
|
if (error != 0) {
|
|
m_prebuilt->trx->op_info = "";
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
set_if_bigger(stats.update_time,
|
|
(ulong) ib_table->update_time);
|
|
}
|
|
|
|
if (is_analyze || innobase_stats_on_metadata) {
|
|
m_prebuilt->trx->op_info =
|
|
"returning various info to MySQL";
|
|
}
|
|
}
|
|
|
|
if ((flag & HA_STATUS_VARIABLE) != 0) {
|
|
|
|
/* TODO: If this is called after pruning, then we could
|
|
also update the statistics according to the non-pruned
|
|
partitions, by allocating new rec_per_key on the TABLE,
|
|
instead of using the info from the TABLE_SHARE. */
|
|
ulint stat_clustered_index_size = 0;
|
|
ulint stat_sum_of_other_index_sizes = 0;
|
|
ib_uint64_t n_rows = 0;
|
|
ulint avail_space = 0;
|
|
bool checked_sys_tablespace = false;
|
|
|
|
if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0) {
|
|
stats.delete_length = 0;
|
|
}
|
|
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
ib_table = m_part_share->get_table_part(i);
|
|
if ((flag & HA_STATUS_NO_LOCK) == 0) {
|
|
dict_table_stats_lock(ib_table, RW_S_LATCH);
|
|
}
|
|
|
|
ut_a(ib_table->stat_initialized);
|
|
|
|
n_rows += ib_table->stat_n_rows;
|
|
if (ib_table->stat_n_rows > max_rows) {
|
|
max_rows = ib_table->stat_n_rows;
|
|
biggest_partition = i;
|
|
}
|
|
|
|
stat_clustered_index_size +=
|
|
ib_table->stat_clustered_index_size;
|
|
|
|
stat_sum_of_other_index_sizes +=
|
|
ib_table->stat_sum_of_other_index_sizes;
|
|
|
|
if ((flag & HA_STATUS_NO_LOCK) == 0) {
|
|
dict_table_stats_unlock(ib_table, RW_S_LATCH);
|
|
}
|
|
|
|
if ((flag & HA_STATUS_VARIABLE_EXTRA) != 0
|
|
&& (flag & HA_STATUS_NO_LOCK) == 0
|
|
&& srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE
|
|
&& avail_space != ULINT_UNDEFINED) {
|
|
|
|
/* Only count system tablespace once! */
|
|
if (is_system_tablespace(ib_table->space)) {
|
|
if (checked_sys_tablespace) {
|
|
continue;
|
|
}
|
|
checked_sys_tablespace = true;
|
|
}
|
|
|
|
uintmax_t space =
|
|
fsp_get_available_space_in_free_extents(
|
|
ib_table->space);
|
|
if (space == UINTMAX_MAX) {
|
|
THD* thd = ha_thd();
|
|
const char* table_name
|
|
= ib_table->name.m_name;
|
|
|
|
push_warning_printf(
|
|
thd,
|
|
Sql_condition::SL_WARNING,
|
|
ER_CANT_GET_STAT,
|
|
"InnoDB: Trying to get the"
|
|
" free space for partition %s"
|
|
" but its tablespace has been"
|
|
" discarded or the .ibd file"
|
|
" is missing. Setting the free"
|
|
" space of the partition to"
|
|
" zero.",
|
|
ut_get_name(
|
|
m_prebuilt->trx,
|
|
table_name).c_str());
|
|
} else {
|
|
avail_space +=
|
|
static_cast<ulint>(space);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
The MySQL optimizer seems to assume in a left join that n_rows
|
|
is an accurate estimate if it is zero. Of course, it is not,
|
|
since we do not have any locks on the rows yet at this phase.
|
|
Since SHOW TABLE STATUS seems to call this function with the
|
|
HA_STATUS_TIME flag set, while the left join optimizer does not
|
|
set that flag, we add one to a zero value if the flag is not
|
|
set. That way SHOW TABLE STATUS will show the best estimate,
|
|
while the optimizer never sees the table empty. */
|
|
|
|
if (n_rows == 0 && (flag & HA_STATUS_TIME) == 0) {
|
|
n_rows++;
|
|
}
|
|
|
|
/* Fix bug#40386: Not flushing query cache after truncate.
|
|
n_rows can not be 0 unless the table is empty, set to 1
|
|
instead. The original problem of bug#29507 is actually
|
|
fixed in the server code. */
|
|
if (thd_sql_command(m_user_thd) == SQLCOM_TRUNCATE) {
|
|
|
|
n_rows = 1;
|
|
|
|
/* We need to reset the m_prebuilt value too, otherwise
|
|
checks for values greater than the last value written
|
|
to the table will fail and the autoinc counter will
|
|
not be updated. This will force write_row() into
|
|
attempting an update of the table's AUTOINC counter. */
|
|
|
|
m_prebuilt->autoinc_last_value = 0;
|
|
}
|
|
|
|
/* Take page_size from first partition. */
|
|
ib_table = m_part_share->get_table_part(0);
|
|
const page_size_t& page_size =
|
|
dict_table_page_size(ib_table);
|
|
|
|
stats.records = (ha_rows) n_rows;
|
|
stats.deleted = 0;
|
|
stats.data_file_length =
|
|
((ulonglong) stat_clustered_index_size)
|
|
* page_size.physical();
|
|
stats.index_file_length =
|
|
((ulonglong) stat_sum_of_other_index_sizes)
|
|
* page_size.physical();
|
|
|
|
/* See ha_innobase::info_low() for comments! */
|
|
if ((flag & HA_STATUS_NO_LOCK) == 0
|
|
&& (flag & HA_STATUS_VARIABLE_EXTRA) != 0
|
|
&& srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
|
|
stats.delete_length = avail_space * 1024;
|
|
}
|
|
|
|
stats.check_time = 0;
|
|
stats.mrr_length_per_rec = ref_length + sizeof(void*)
|
|
- PARTITION_BYTES_IN_POS;
|
|
|
|
if (stats.records == 0) {
|
|
stats.mean_rec_length = 0;
|
|
} else {
|
|
stats.mean_rec_length = (ulong)
|
|
(stats.data_file_length / stats.records);
|
|
}
|
|
}
|
|
|
|
if ((flag & HA_STATUS_CONST) != 0) {
|
|
/* Find max rows and biggest partition. */
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
/* Skip partitions from above. */
|
|
if ((flag & HA_STATUS_VARIABLE) == 0
|
|
|| !bitmap_is_set(&(m_part_info->read_partitions),
|
|
i)) {
|
|
|
|
ib_table = m_part_share->get_table_part(i);
|
|
if (ib_table->stat_n_rows > max_rows) {
|
|
max_rows = ib_table->stat_n_rows;
|
|
biggest_partition = i;
|
|
}
|
|
}
|
|
}
|
|
ib_table = m_part_share->get_table_part(biggest_partition);
|
|
/* Verify the number of index in InnoDB and MySQL
|
|
matches up. If m_prebuilt->clust_index_was_generated
|
|
holds, InnoDB defines GEN_CLUST_INDEX internally. */
|
|
ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
|
|
- m_prebuilt->clust_index_was_generated;
|
|
if (table->s->keys < num_innodb_index) {
|
|
/* If there are too many indexes defined
|
|
inside InnoDB, ignore those that are being
|
|
created, because MySQL will only consider
|
|
the fully built indexes here. */
|
|
|
|
for (const dict_index_t* index =
|
|
UT_LIST_GET_FIRST(ib_table->indexes);
|
|
index != NULL;
|
|
index = UT_LIST_GET_NEXT(indexes, index)) {
|
|
|
|
/* First, online index creation is
|
|
completed inside InnoDB, and then
|
|
MySQL attempts to upgrade the
|
|
meta-data lock so that it can rebuild
|
|
the .frm file. If we get here in that
|
|
time frame, dict_index_is_online_ddl()
|
|
would not hold and the index would
|
|
still not be included in TABLE_SHARE. */
|
|
if (!index->is_committed()) {
|
|
num_innodb_index--;
|
|
}
|
|
}
|
|
|
|
if (table->s->keys < num_innodb_index
|
|
&& (innobase_fts_check_doc_id_index(ib_table,
|
|
NULL, NULL)
|
|
== FTS_EXIST_DOC_ID_INDEX)) {
|
|
num_innodb_index--;
|
|
}
|
|
}
|
|
|
|
if (table->s->keys != num_innodb_index) {
|
|
ib::error() << "Table "
|
|
<< ib_table->name << " contains "
|
|
<< num_innodb_index
|
|
<< " indexes inside InnoDB, which"
|
|
" is different from the number of"
|
|
" indexes " << table->s->keys
|
|
<< " defined in the MySQL";
|
|
}
|
|
|
|
if ((flag & HA_STATUS_NO_LOCK) == 0) {
|
|
dict_table_stats_lock(ib_table, RW_S_LATCH);
|
|
}
|
|
|
|
ut_a(ib_table->stat_initialized);
|
|
|
|
for (ulong i = 0; i < table->s->keys; i++) {
|
|
ulong j;
|
|
/* We could get index quickly through internal
|
|
index mapping with the index translation table.
|
|
The identity of index (match up index name with
|
|
that of table->key_info[i]) is already verified in
|
|
innopart_get_index(). */
|
|
dict_index_t* index = innopart_get_index(
|
|
biggest_partition, i);
|
|
|
|
if (index == NULL) {
|
|
ib::error() << "Table "
|
|
<< ib_table->name << " contains fewer"
|
|
" indexes inside InnoDB than"
|
|
" are defined in the MySQL"
|
|
" .frm file. Have you mixed up"
|
|
" .frm files from different"
|
|
" installations? "
|
|
<< TROUBLESHOOTING_MSG;
|
|
break;
|
|
}
|
|
|
|
KEY* key = &table->key_info[i];
|
|
for (j = 0;
|
|
j < key->actual_key_parts;
|
|
j++) {
|
|
|
|
if ((key->flags & HA_FULLTEXT) != 0) {
|
|
/* The whole concept has no validity
|
|
for FTS indexes. */
|
|
key->rec_per_key[j] = 1;
|
|
continue;
|
|
}
|
|
|
|
if ((j + 1) > index->n_uniq) {
|
|
ib::error() << "Index " << index->name
|
|
<< " of " << ib_table->name
|
|
<< " has " << index->n_uniq
|
|
<< " columns unique inside"
|
|
" InnoDB, but MySQL is"
|
|
" asking statistics for "
|
|
<< j + 1 << " columns. Have"
|
|
" you mixed up .frm files"
|
|
" from different"
|
|
" installations? "
|
|
<< TROUBLESHOOTING_MSG;
|
|
break;
|
|
}
|
|
|
|
/* innodb_rec_per_key() will use
|
|
index->stat_n_diff_key_vals[] and the value we
|
|
pass index->table->stat_n_rows. Both are
|
|
calculated by ANALYZE and by the background
|
|
stats gathering thread (which kicks in when too
|
|
much of the table has been changed). In
|
|
addition table->stat_n_rows is adjusted with
|
|
each DML (e.g. ++ on row insert). Those
|
|
adjustments are not MVCC'ed and not even
|
|
reversed on rollback. So,
|
|
index->stat_n_diff_key_vals[] and
|
|
index->table->stat_n_rows could have been
|
|
calculated at different time. This is
|
|
acceptable. */
|
|
const rec_per_key_t rec_per_key =
|
|
innodb_rec_per_key(
|
|
index, j,
|
|
max_rows);
|
|
|
|
key->set_records_per_key(j, rec_per_key);
|
|
|
|
/* The code below is legacy and should be
|
|
removed together with this comment once we
|
|
are sure the new floating point rec_per_key,
|
|
set via set_records_per_key(), works fine. */
|
|
|
|
ulong rec_per_key_int = static_cast<ulong>(
|
|
innodb_rec_per_key(index, j,
|
|
max_rows));
|
|
|
|
/* Since MySQL seems to favor table scans
|
|
too much over index searches, we pretend
|
|
index selectivity is 2 times better than
|
|
our estimate: */
|
|
|
|
rec_per_key_int = rec_per_key_int / 2;
|
|
|
|
if (rec_per_key_int == 0) {
|
|
rec_per_key_int = 1;
|
|
}
|
|
|
|
key->rec_per_key[j] = rec_per_key_int;
|
|
}
|
|
}
|
|
|
|
if ((flag & HA_STATUS_NO_LOCK) == 0) {
|
|
dict_table_stats_unlock(ib_table, RW_S_LATCH);
|
|
}
|
|
|
|
char path[FN_REFLEN];
|
|
os_file_stat_t stat_info;
|
|
/* Use the first partition for create time until new DD. */
|
|
ib_table = m_part_share->get_table_part(0);
|
|
my_snprintf(path, sizeof(path), "%s/%s%s",
|
|
mysql_data_home,
|
|
table->s->normalized_path.str,
|
|
reg_ext);
|
|
|
|
unpack_filename(path,path);
|
|
|
|
if (os_file_get_status(path, &stat_info, false, true) == DB_SUCCESS) {
|
|
stats.create_time = (ulong) stat_info.ctime;
|
|
}
|
|
}
|
|
|
|
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
|
|
|
|
goto func_exit;
|
|
}
|
|
|
|
if ((flag & HA_STATUS_ERRKEY) != 0) {
|
|
const dict_index_t* err_index;
|
|
|
|
ut_a(m_prebuilt->trx);
|
|
ut_a(m_prebuilt->trx->magic_n == TRX_MAGIC_N);
|
|
|
|
err_index = trx_get_error_info(m_prebuilt->trx);
|
|
|
|
if (err_index != NULL) {
|
|
errkey = m_part_share->get_mysql_key(m_last_part,
|
|
err_index);
|
|
} else {
|
|
errkey = (unsigned int) (
|
|
(m_prebuilt->trx->error_key_num
|
|
== ULINT_UNDEFINED)
|
|
? UINT_MAX
|
|
: m_prebuilt->trx->error_key_num);
|
|
}
|
|
}
|
|
|
|
if ((flag & HA_STATUS_AUTO) != 0) {
|
|
/* auto_inc is only supported in first key for InnoDB! */
|
|
ut_ad(table_share->next_number_keypart == 0);
|
|
DBUG_PRINT("info", ("HA_STATUS_AUTO"));
|
|
if (table->found_next_number_field == NULL) {
|
|
stats.auto_increment_value = 0;
|
|
} else {
|
|
/* Lock to avoid two concurrent initializations. */
|
|
lock_auto_increment();
|
|
if (m_part_share->auto_inc_initialized) {
|
|
stats.auto_increment_value =
|
|
m_part_share->next_auto_inc_val;
|
|
} else {
|
|
/* The auto-inc mutex in the table_share is
|
|
locked, so we do not need to have the handlers
|
|
locked. */
|
|
|
|
error = initialize_auto_increment(
|
|
(flag & HA_STATUS_NO_LOCK) != 0);
|
|
stats.auto_increment_value =
|
|
m_part_share->next_auto_inc_val;
|
|
}
|
|
unlock_auto_increment();
|
|
}
|
|
}
|
|
|
|
func_exit:
|
|
m_prebuilt->trx->op_info = (char*)"";
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Optimize table.
|
|
This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
|
|
the table in MySQL.
|
|
@param[in] thd Connection thread handle.
|
|
@param[in] check_opt Currently ignored.
|
|
@return 0 for success else error code. */
|
|
int
|
|
ha_innopart::optimize(
|
|
THD* thd,
|
|
HA_CHECK_OPT* check_opt)
|
|
{
|
|
return(HA_ADMIN_TRY_ALTER);
|
|
}
|
|
|
|
/** Checks a partitioned table.
|
|
Tries to check that an InnoDB table is not corrupted. If corruption is
|
|
noticed, prints to stderr information about it. In case of corruption
|
|
may also assert a failure and crash the server. Also checks for records
|
|
in wrong partition.
|
|
@param[in] thd MySQL THD object/thread handle.
|
|
@param[in] check_opt Check options.
|
|
@return HA_ADMIN_CORRUPT or HA_ADMIN_OK. */
|
|
int
|
|
ha_innopart::check(
|
|
THD* thd,
|
|
HA_CHECK_OPT* check_opt)
|
|
{
|
|
uint error = HA_ADMIN_OK;
|
|
uint i;
|
|
|
|
DBUG_ENTER("ha_innopart::check");
|
|
/* TODO: Enhance this to:
|
|
- Every partition has the same structure.
|
|
- The names are correct (partition names checked in ::open()?)
|
|
Currently it only does normal InnoDB check of each partition. */
|
|
|
|
if (set_altered_partitions()) {
|
|
ut_ad(0); // Already checked by set_part_state()!
|
|
DBUG_RETURN(HA_ADMIN_INVALID);
|
|
}
|
|
for (i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
m_prebuilt->table = m_part_share->get_table_part(i);
|
|
error = ha_innobase::check(thd, check_opt);
|
|
if (error != 0) {
|
|
break;
|
|
}
|
|
if ((check_opt->flags & (T_MEDIUM | T_EXTEND)) != 0) {
|
|
error = Partition_helper::check_misplaced_rows(i, false);
|
|
if (error != 0) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (error != 0) {
|
|
print_admin_msg(
|
|
thd,
|
|
256,
|
|
"error",
|
|
table_share->db.str,
|
|
table->alias,
|
|
"check",
|
|
m_is_sub_partitioned ?
|
|
"Subpartition %s returned error"
|
|
: "Partition %s returned error",
|
|
m_part_share->get_partition_name(i));
|
|
}
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Repair a partitioned table.
|
|
Only repairs records in wrong partitions (moves them to the correct
|
|
partition or deletes them if not in any partition).
|
|
@param[in] thd MySQL THD object/thread handle.
|
|
@param[in] repair_opt Repair options.
|
|
@return 0 or error code. */
|
|
int
|
|
ha_innopart::repair(
|
|
THD* thd,
|
|
HA_CHECK_OPT* repair_opt)
|
|
{
|
|
uint error = HA_ADMIN_OK;
|
|
|
|
DBUG_ENTER("ha_innopart::repair");
|
|
|
|
/* TODO: enable this warning to be clear about what is repaired.
|
|
Currently disabled to generate smaller test diffs. */
|
|
#ifdef ADD_WARNING_FOR_REPAIR_ONLY_PARTITION
|
|
push_warning_printf(thd, Sql_condition::SL_WARNING,
|
|
ER_ILLEGAL_HA,
|
|
"Only moving rows from wrong partition to correct"
|
|
" partition is supported,"
|
|
" repairing InnoDB indexes is not yet supported!");
|
|
#endif
|
|
|
|
/* Only repair partitions for MEDIUM or EXTENDED options. */
|
|
if ((repair_opt->flags & (T_MEDIUM | T_EXTEND)) == 0) {
|
|
DBUG_RETURN(HA_ADMIN_OK);
|
|
}
|
|
if (set_altered_partitions()) {
|
|
ut_ad(0); // Already checked by set_part_state()!
|
|
DBUG_RETURN(HA_ADMIN_INVALID);
|
|
}
|
|
for (uint i = m_part_info->get_first_used_partition();
|
|
i < m_tot_parts;
|
|
i = m_part_info->get_next_used_partition(i)) {
|
|
|
|
/* TODO: Implement and use ha_innobase::repair()! */
|
|
error = Partition_helper::check_misplaced_rows(i, true);
|
|
if (error != 0) {
|
|
print_admin_msg(
|
|
thd,
|
|
256,
|
|
"error",
|
|
table_share->db.str,
|
|
table->alias,
|
|
"repair",
|
|
m_is_sub_partitioned ?
|
|
"Subpartition %s returned error"
|
|
: "Partition %s returned error",
|
|
m_part_share->get_partition_name(i));
|
|
break;
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/** Check if possible to switch engine (no foreign keys).
|
|
Checks if ALTER TABLE may change the storage engine of the table.
|
|
Changing storage engines is not allowed for tables for which there
|
|
are foreign key constraints (parent or child tables).
|
|
@return true if can switch engines. */
|
|
bool
|
|
ha_innopart::can_switch_engines()
|
|
{
|
|
bool can_switch;
|
|
|
|
DBUG_ENTER("ha_innopart::can_switch_engines");
|
|
can_switch = ha_innobase::can_switch_engines();
|
|
ut_ad(can_switch);
|
|
|
|
DBUG_RETURN(can_switch);
|
|
}
|
|
|
|
/** Checks if a table is referenced by a foreign key.
|
|
The MySQL manual states that a REPLACE is either equivalent to an INSERT,
|
|
or DELETE(s) + INSERT. Only a delete is then allowed internally to resolve
|
|
a duplicate key conflict in REPLACE, not an update.
|
|
@return > 0 if referenced by a FOREIGN KEY. */
|
|
uint
|
|
ha_innopart::referenced_by_foreign_key()
|
|
{
|
|
if (dict_table_is_referenced_by_foreign_key(m_prebuilt->table)) {
|
|
|
|
#ifndef HA_INNOPART_SUPPORTS_FOREIGN_KEYS
|
|
ut_ad(0);
|
|
#endif /* HA_INNOPART_SUPPORTS_FOREIGN_KEYS */
|
|
return(1);
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
/** Start statement.
|
|
MySQL calls this function at the start of each SQL statement inside LOCK
|
|
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
|
|
mark SQL statement borders. Note also a special case: if a temporary table
|
|
is created inside LOCK TABLES, MySQL has not called external_lock() at all
|
|
on that table.
|
|
MySQL-5.0 also calls this before each statement in an execution of a stored
|
|
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
|
|
locks all tables involved in a stored procedure with full explicit table
|
|
locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
|
|
procedure.
|
|
@param[in] thd Handle to the user thread.
|
|
@param[in] lock_type Lock type.
|
|
@return 0 or error code. */
|
|
int
|
|
ha_innopart::start_stmt(
|
|
THD* thd,
|
|
thr_lock_type lock_type)
|
|
{
|
|
int error = 0;
|
|
|
|
if (m_part_info->get_first_used_partition() == MY_BIT_NONE) {
|
|
/* All partitions pruned away, do nothing! */
|
|
return(error);
|
|
}
|
|
|
|
error = ha_innobase::start_stmt(thd, lock_type);
|
|
if (m_prebuilt->sql_stat_start) {
|
|
memset(m_sql_stat_start_parts, 0xff,
|
|
UT_BITS_IN_BYTES(m_tot_parts));
|
|
} else {
|
|
memset(m_sql_stat_start_parts, 0,
|
|
UT_BITS_IN_BYTES(m_tot_parts));
|
|
}
|
|
return(error);
|
|
}
|
|
|
|
/** Function to store lock for all partitions in native partitioned table. Also
|
|
look at ha_innobase::store_lock for more details.
|
|
@param[in] thd user thread handle
|
|
@param[in] to pointer to the current element in an array of
|
|
pointers to lock structs
|
|
@param[in] lock_type lock type to store in 'lock'; this may also be
|
|
TL_IGNORE
|
|
@retval to pointer to the current element in the 'to' array */
|
|
THR_LOCK_DATA**
|
|
ha_innopart::store_lock(
|
|
THD* thd,
|
|
THR_LOCK_DATA** to,
|
|
thr_lock_type lock_type)
|
|
{
|
|
trx_t* trx = m_prebuilt->trx;
|
|
const uint sql_command = thd_sql_command(thd);
|
|
|
|
ha_innobase::store_lock(thd, to, lock_type);
|
|
|
|
if (sql_command == SQLCOM_FLUSH
|
|
&& lock_type == TL_READ_NO_INSERT) {
|
|
for (uint i = 1; i < m_tot_parts; i++) {
|
|
dict_table_t* table = m_part_share->get_table_part(i);
|
|
|
|
dberr_t err = row_quiesce_set_state(
|
|
table, QUIESCE_START, trx);
|
|
ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
|
|
}
|
|
}
|
|
|
|
return to;
|
|
}
|
|
|
|
/** Lock/prepare to lock table.
|
|
As MySQL will execute an external lock for every new table it uses when it
|
|
starts to process an SQL statement (an exception is when MySQL calls
|
|
start_stmt for the handle) we can use this function to store the pointer to
|
|
the THD in the handle. We will also use this function to communicate
|
|
to InnoDB that a new SQL statement has started and that we must store a
|
|
savepoint to our transaction handle, so that we are able to roll back
|
|
the SQL statement in case of an error.
|
|
@param[in] thd Handle to the user thread.
|
|
@param[in] lock_type Lock type.
|
|
@return 0 or error number. */
|
|
int
|
|
ha_innopart::external_lock(
|
|
THD* thd,
|
|
int lock_type)
|
|
{
|
|
int error = 0;
|
|
|
|
if (m_part_info->get_first_used_partition() == MY_BIT_NONE
|
|
&& !(m_mysql_has_locked
|
|
&& lock_type == F_UNLCK)) {
|
|
|
|
/* All partitions pruned away, do nothing! */
|
|
ut_ad(!m_mysql_has_locked);
|
|
return(error);
|
|
}
|
|
ut_ad(m_mysql_has_locked || lock_type != F_UNLCK);
|
|
|
|
m_prebuilt->table = m_part_share->get_table_part(0);
|
|
error = ha_innobase::external_lock(thd, lock_type);
|
|
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
dict_table_t* table = m_part_share->get_table_part(i);
|
|
|
|
switch (table->quiesce) {
|
|
case QUIESCE_START:
|
|
/* Check for FLUSH TABLE t WITH READ LOCK */
|
|
if (!srv_read_only_mode
|
|
&& thd_sql_command(thd) == SQLCOM_FLUSH
|
|
&& lock_type == F_RDLCK) {
|
|
|
|
ut_ad(table->quiesce == QUIESCE_START);
|
|
|
|
row_quiesce_table_start(table,
|
|
m_prebuilt->trx);
|
|
|
|
/* Use the transaction instance to track
|
|
UNLOCK TABLES. It can be done via START
|
|
TRANSACTION; too implicitly. */
|
|
|
|
++m_prebuilt->trx->flush_tables;
|
|
}
|
|
break;
|
|
|
|
case QUIESCE_COMPLETE:
|
|
/* Check for UNLOCK TABLES; implicit or explicit
|
|
or trx interruption. */
|
|
if (m_prebuilt->trx->flush_tables > 0
|
|
&& (lock_type == F_UNLCK
|
|
|| trx_is_interrupted(m_prebuilt->trx))) {
|
|
|
|
ut_ad(table->quiesce == QUIESCE_COMPLETE);
|
|
row_quiesce_table_complete(table,
|
|
m_prebuilt->trx);
|
|
|
|
ut_a(m_prebuilt->trx->flush_tables > 0);
|
|
--m_prebuilt->trx->flush_tables;
|
|
}
|
|
break;
|
|
|
|
case QUIESCE_NONE:
|
|
break;
|
|
|
|
default:
|
|
ut_ad(0);
|
|
}
|
|
}
|
|
|
|
ut_ad(!m_auto_increment_lock);
|
|
ut_ad(!m_auto_increment_safe_stmt_log_lock);
|
|
|
|
if (m_prebuilt->sql_stat_start) {
|
|
memset(m_sql_stat_start_parts, 0xff,
|
|
UT_BITS_IN_BYTES(m_tot_parts));
|
|
} else {
|
|
memset(m_sql_stat_start_parts, 0,
|
|
UT_BITS_IN_BYTES(m_tot_parts));
|
|
}
|
|
return(error);
|
|
}
|
|
|
|
/** Get the current auto_increment value.
|
|
@param[in] offset Table auto-inc offset.
|
|
@param[in] increment Table auto-inc increment.
|
|
@param[in] nb_desired_values Number of required values.
|
|
@param[out] first_value The auto increment value.
|
|
@param[out] nb_reserved_values Number of reserved values.
|
|
@return Auto increment value, or ~0 on failure. */
|
|
void
|
|
ha_innopart::get_auto_increment(
|
|
ulonglong offset,
|
|
ulonglong increment,
|
|
ulonglong nb_desired_values,
|
|
ulonglong* first_value,
|
|
ulonglong* nb_reserved_values)
|
|
{
|
|
DBUG_ENTER("ha_innopart::get_auto_increment");
|
|
if (table_share->next_number_keypart != 0) {
|
|
/* Only first key part allowed as autoinc for InnoDB tables! */
|
|
ut_ad(0);
|
|
*first_value = ULLONG_MAX;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
get_auto_increment_first_field(
|
|
increment,
|
|
nb_desired_values,
|
|
first_value,
|
|
nb_reserved_values);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Compares two 'refs'.
|
|
A 'ref' is the (internal) primary key value of the row.
|
|
If there is no explicitly declared non-null unique key or a primary key, then
|
|
InnoDB internally uses the row id as the primary key.
|
|
It will use the partition id as secondary compare.
|
|
@param[in] ref1 An (internal) primary key value in the MySQL key value
|
|
format.
|
|
@param[in] ref2 Reference to compare with (same type as ref1).
|
|
@return < 0 if ref1 < ref2, 0 if equal, else > 0. */
|
|
int
|
|
ha_innopart::cmp_ref(
|
|
const uchar* ref1,
|
|
const uchar* ref2)
|
|
{
|
|
int cmp;
|
|
|
|
cmp = ha_innobase::cmp_ref(ref1 + PARTITION_BYTES_IN_POS,
|
|
ref2 + PARTITION_BYTES_IN_POS);
|
|
|
|
if (cmp != 0) {
|
|
return(cmp);
|
|
}
|
|
|
|
cmp = static_cast<int>(uint2korr(ref1))
|
|
- static_cast<int>(uint2korr(ref2));
|
|
|
|
return(cmp);
|
|
}
|
|
|
|
/** Prepare for creating new partitions during ALTER TABLE ... PARTITION.
|
|
@param[in] num_partitions Number of new partitions to be created.
|
|
@param[in] only_create True if only creating the partition
|
|
(no open/lock is needed).
|
|
@return 0 for success else error code. */
|
|
int
|
|
ha_innopart::prepare_for_new_partitions(
|
|
uint num_partitions,
|
|
bool only_create)
|
|
{
|
|
m_new_partitions = UT_NEW(Altered_partitions(num_partitions,
|
|
only_create),
|
|
mem_key_partitioning);
|
|
if (m_new_partitions == NULL) {
|
|
return(HA_ERR_OUT_OF_MEM);
|
|
}
|
|
if (m_new_partitions->initialize()) {
|
|
UT_DELETE(m_new_partitions);
|
|
m_new_partitions = NULL;
|
|
return(HA_ERR_OUT_OF_MEM);
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
/** Create a new partition to be filled during ALTER TABLE ... PARTITION.
|
|
@param[in] table Table to create the partition in.
|
|
@param[in] create_info Table/partition specific create info.
|
|
@param[in] part_name Partition name.
|
|
@param[in] new_part_id Partition id in new table.
|
|
@param[in] part_elem Partition element.
|
|
@return 0 for success else error code. */
|
|
int
|
|
ha_innopart::create_new_partition(
|
|
TABLE* table,
|
|
HA_CREATE_INFO* create_info,
|
|
const char* part_name,
|
|
uint new_part_id,
|
|
partition_element* part_elem)
|
|
{
|
|
int error;
|
|
char norm_name[FN_REFLEN];
|
|
const char* data_file_name_backup = create_info->data_file_name;
|
|
DBUG_ENTER("ha_innopart::create_new_partition");
|
|
/* Delete by ddl_log on failure. */
|
|
normalize_table_name(norm_name, part_name);
|
|
set_create_info_dir(part_elem, create_info);
|
|
|
|
error = ha_innobase::create(norm_name, table, create_info);
|
|
create_info->data_file_name = data_file_name_backup;
|
|
if (error == HA_ERR_FOUND_DUPP_KEY) {
|
|
DBUG_RETURN(HA_ERR_TABLE_EXIST);
|
|
}
|
|
if (error != 0) {
|
|
DBUG_RETURN(error);
|
|
}
|
|
if (!m_new_partitions->only_create())
|
|
{
|
|
dict_table_t* part;
|
|
part = dict_table_open_on_name(norm_name,
|
|
false,
|
|
true,
|
|
DICT_ERR_IGNORE_NONE);
|
|
if (part == NULL) {
|
|
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
|
|
}
|
|
m_new_partitions->set_part(new_part_id, part);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/** Close and finalize new partitions. */
|
|
void
|
|
ha_innopart::close_new_partitions()
|
|
{
|
|
if (m_new_partitions != NULL) {
|
|
UT_DELETE(m_new_partitions);
|
|
m_new_partitions = NULL;
|
|
}
|
|
}
|
|
|
|
/** write row to new partition.
|
|
@param[in] new_part New partition to write to.
|
|
@return 0 for success else error code. */
|
|
int
|
|
ha_innopart::write_row_in_new_part(
|
|
uint new_part)
|
|
{
|
|
int result;
|
|
DBUG_ENTER("ha_innopart::write_row_in_new_part");
|
|
|
|
m_last_part = new_part;
|
|
if (m_new_partitions->part(new_part) == NULL) {
|
|
/* Altered partition contains misplaced row. */
|
|
m_err_rec = table->record[0];
|
|
DBUG_RETURN(HA_ERR_ROW_IN_WRONG_PARTITION);
|
|
}
|
|
m_new_partitions->get_prebuilt(m_prebuilt, new_part);
|
|
result = ha_innobase::write_row(table->record[0]);
|
|
m_new_partitions->set_from_prebuilt(m_prebuilt, new_part);
|
|
DBUG_RETURN(result);
|
|
}
|
|
|
|
/** Allocate the array to hold blob heaps for all partitions */
|
|
mem_heap_t**
|
|
ha_innopart::alloc_blob_heap_array()
|
|
{
|
|
DBUG_ENTER("ha_innopart::alloc_blob_heap_array");
|
|
|
|
const ulint len = sizeof(mem_heap_t*) * m_tot_parts;
|
|
m_blob_heap_parts = static_cast<mem_heap_t**>(
|
|
ut_zalloc(len, mem_key_partitioning));
|
|
if (m_blob_heap_parts == NULL) {
|
|
DBUG_RETURN(NULL);
|
|
}
|
|
|
|
DBUG_RETURN(m_blob_heap_parts);
|
|
}
|
|
|
|
/** Free the array that holds blob heaps for all partitions */
|
|
void
|
|
ha_innopart::free_blob_heap_array()
|
|
{
|
|
DBUG_ENTER("ha_innopart::free_blob_heap_array");
|
|
|
|
if (m_blob_heap_parts != NULL) {
|
|
clear_blob_heaps();
|
|
ut_free(m_blob_heap_parts);
|
|
m_blob_heap_parts = NULL;
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
void
|
|
ha_innopart::clear_blob_heaps()
|
|
{
|
|
DBUG_ENTER("ha_innopart::clear_blob_heaps");
|
|
|
|
if (m_blob_heap_parts == NULL) {
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
for (uint i = 0; i < m_tot_parts; i++) {
|
|
if (m_blob_heap_parts[i] != NULL) {
|
|
DBUG_PRINT("ha_innopart", ("freeing blob_heap: %p",
|
|
m_blob_heap_parts[i]));
|
|
mem_heap_free(m_blob_heap_parts[i]);
|
|
m_blob_heap_parts[i] = NULL;
|
|
}
|
|
}
|
|
|
|
/* Reset blob_heap in m_prebuilt after freeing all heaps. It is set in
|
|
ha_innopart::set_partition to the blob heap of current partition. */
|
|
m_prebuilt->blob_heap = NULL;
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/** Reset state of file to after 'open'. This function is called
|
|
after every statement for all tables used by that statement. */
|
|
int
|
|
ha_innopart::reset()
|
|
{
|
|
DBUG_ENTER("ha_innopart::reset");
|
|
|
|
clear_blob_heaps();
|
|
|
|
DBUG_RETURN(ha_innobase::reset());
|
|
}
|
|
|
|
/****************************************************************************
|
|
* DS-MRR implementation
|
|
***************************************************************************/
|
|
|
|
/* TODO: move the default implementations into the base handler class! */
|
|
/* TODO: See if it could be optimized for partitioned tables? */
|
|
/* Use default ha_innobase implementation for now... */
|