mariadb/storage/tokudb/ha_tokudb_alter_56.cc

1684 lines
58 KiB
C++
Raw Normal View History

/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
2015-10-26 12:48:26 +01:00
/*======
This file is part of TokuDB
2015-10-26 12:48:26 +01:00
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
2015-10-26 12:48:26 +01:00
TokuDBis is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
2015-10-26 12:48:26 +01:00
TokuDB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
2015-10-26 12:48:26 +01:00
You should have received a copy of the GNU General Public License
along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
2015-10-26 12:48:26 +01:00
======= */
2015-10-26 12:48:26 +01:00
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#if TOKU_INCLUDE_ALTER_56
2014-02-20 09:28:18 +02:00
#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
#define TOKU_ALTER_RENAME ALTER_RENAME
2013-11-10 16:45:11 -05:00
#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
#define TOKU_ALTER_RENAME ALTER_RENAME
2013-11-10 16:45:11 -05:00
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
#define TOKU_ALTER_RENAME ALTER_RENAME_56
2013-11-10 16:45:11 -05:00
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
#else
#error
#endif
#include "ha_tokudb_alter_common.cc"
#include <sql_array.h>
#include <sql_base.h>
// The tokudb alter context contains the alter state that is set in the check if supported method and used
// later when the alter operation is executed.
class tokudb_alter_ctx : public inplace_alter_handler_ctx {
public:
tokudb_alter_ctx() :
handler_flags(0),
alter_txn(NULL),
add_index_changed(false),
drop_index_changed(false),
2013-05-24 10:59:07 -04:00
reset_card(false),
compression_changed(false),
expand_varchar_update_needed(false),
expand_fixed_update_needed(false),
expand_blob_update_needed(false),
optimize_needed(false),
table_kc_info(NULL),
altered_table_kc_info(NULL) {
}
~tokudb_alter_ctx() {
if (altered_table_kc_info)
free_key_and_col_info(altered_table_kc_info);
}
public:
ulong handler_flags;
2015-12-15 17:23:58 +01:00
DB_TXN* alter_txn;
bool add_index_changed;
bool incremented_num_DBs, modified_DBs;
bool drop_index_changed;
2013-05-24 10:59:07 -04:00
bool reset_card;
bool compression_changed;
enum toku_compression_method orig_compression_method;
bool expand_varchar_update_needed;
bool expand_fixed_update_needed;
bool expand_blob_update_needed;
bool optimize_needed;
Dynamic_array<uint> changed_fields;
2015-12-15 17:23:58 +01:00
KEY_AND_COL_INFO* table_kc_info;
KEY_AND_COL_INFO* altered_table_kc_info;
KEY_AND_COL_INFO altered_table_kc_info_base;
};
// Debug function to print out an alter table operation
2015-12-15 17:23:58 +01:00
void ha_tokudb::print_alter_info(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_TRACE(
"***are keys of two tables same? %d",
tables_have_same_keys(table, altered_table, false, false));
if (ha_alter_info->handler_flags) {
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE("***alter flags set ***");
for (int i = 0; i < 32; i++) {
if (ha_alter_info->handler_flags & (1 << i))
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE("%d", i);
}
}
2015-12-15 17:23:58 +01:00
// everyone calculates data by doing some default_values - record[0], but
// I do not see why that is necessary
TOKUDB_TRACE("******");
TOKUDB_TRACE("***orig table***");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
"%d, is_null %d, pack_length %u",
curr_field->field_name,
curr_field->real_type(),
mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ?
table->s->default_values[null_offset] & curr_field->null_bit :
0xffffffff,
curr_field->pack_length());
}
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE("******");
TOKUDB_TRACE("***altered table***");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, "
"is_null_field: %d, is_null %d, pack_length %u",
curr_field->field_name,
curr_field->real_type(),
mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ?
altered_table->s->default_values[null_offset] &
curr_field->null_bit : 0xffffffff,
curr_field->pack_length());
}
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE("******");
}
2015-12-15 17:23:58 +01:00
// Given two tables with equal number of fields, find all of the fields with
// different types and return the indexes of the different fields in the
// changed_fields array. This function ignores field name differences.
static int find_changed_fields(
TABLE* table_a,
TABLE* table_b,
Dynamic_array<uint>& changed_fields) {
for (uint i = 0; i < table_a->s->fields; i++) {
2015-12-15 17:23:58 +01:00
Field* field_a = table_a->field[i];
Field* field_b = table_b->field[i];
if (!fields_are_same_type(field_a, field_b))
changed_fields.append(i);
}
return changed_fields.elements();
}
2015-12-15 17:23:58 +01:00
static bool change_length_is_supported(
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx);
static bool change_type_is_supported(
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx);
// The ha_alter_info->handler_flags can not be trusted.
// This function maps the bogus handler flags to something we like.
static ulong fix_handler_flags(
THD* thd,
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
ulong handler_flags = ha_alter_info->handler_flags;
2015-11-20 11:14:21 +01:00
#if 100000 <= MYSQL_VERSION_ID
// This is automatically supported, hide the flag from later checks
handler_flags &= ~Alter_inplace_info::ALTER_PARTITIONED;
2014-05-20 09:47:19 -04:00
#endif
// workaround for fill_alter_inplace_info bug (#5193)
2015-12-15 17:23:58 +01:00
// the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
// column addition that does not change the keys.
// the following code turns the ADD_INDEX and DROP_INDEX flags so that
// we can do hot column addition later.
if (handler_flags &
(Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN)) {
if (handler_flags &
(Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX)) {
if (tables_have_same_keys(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0, false)) {
handler_flags &=
~(Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::DROP_INDEX);
}
}
}
2015-12-15 17:23:58 +01:00
// always allow rename table + any other operation, so turn off the
// rename flag
if (handler_flags & Alter_inplace_info::TOKU_ALTER_RENAME) {
handler_flags &= ~Alter_inplace_info::TOKU_ALTER_RENAME;
}
2015-12-15 17:23:58 +01:00
// ALTER_COLUMN_TYPE may be set when no columns have been changed,
// so turn off the flag
if (handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) {
if (all_fields_are_same_type(table, altered_table)) {
handler_flags &= ~Alter_inplace_info::ALTER_COLUMN_TYPE;
}
}
return handler_flags;
}
// Require that there is no intersection of add and drop names.
static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
2015-12-15 17:23:58 +01:00
KEY* drop_key = ha_alter_info->index_drop_buffer[d];
for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
2015-12-15 17:23:58 +01:00
KEY* add_key =
&ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
if (strcmp(drop_key->name, add_key->name) == 0) {
return false;
}
}
}
return true;
}
2015-12-15 17:23:58 +01:00
// Return true if some bit in mask is set and no bit in ~mask is set,
// otherwise return false.
static bool only_flags(ulong bits, ulong mask) {
return (bits & mask) != 0 && (bits & ~mask) == 0;
}
2015-12-15 17:23:58 +01:00
// Check if an alter table operation on this table and described by the alter
// table parameters is supported inplace and if so, what type of locking is
// needed to execute it. return values:
// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
// inplace operation, a table copy is required
// HA_ALTER_ERROR: the alter table operation should fail
// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
2015-12-15 17:23:58 +01:00
// HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE: prepare runs with MDL X,
// alter runs with MDL SNW
// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
// concurrent reads, no writes
// HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE: prepare runs with MDL X,
// alter runs with MDL SW
// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
// concurrent reads, writes.
// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
// deadlocks with the MDL lock and the table lock
enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
2015-12-15 17:23:58 +01:00
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
print_alter_info(altered_table, ha_alter_info);
}
2015-12-15 17:23:58 +01:00
// default is NOT inplace
enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
THD* thd = ha_thd();
// setup context
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
ha_alter_info->handler_ctx = ctx;
2015-12-15 17:23:58 +01:00
ctx->handler_flags =
fix_handler_flags(thd, table, altered_table, ha_alter_info);
ctx->table_kc_info = &share->kc_info;
ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
2015-12-15 17:23:58 +01:00
if (tokudb::sysvars::disable_hot_alter(thd)) {
; // do nothing
2015-12-15 17:23:58 +01:00
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX +
Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::ADD_UNIQUE_INDEX)) {
// add or drop index
if (table->s->null_bytes == altered_table->s->null_bytes &&
2015-12-15 17:23:58 +01:00
(ha_alter_info->index_add_count > 0 ||
ha_alter_info->index_drop_count > 0) &&
!tables_have_same_keys(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0, false) &&
is_disjoint_add_drop(ha_alter_info)) {
2015-12-15 17:23:58 +01:00
if (ctx->handler_flags &
(Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX)) {
// the fractal tree can not handle dropping an index concurrent
// with querying with the index.
// we grab an exclusive MDL for the drop index.
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else {
result = HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE;
2015-12-15 17:23:58 +01:00
// someday, allow multiple hot indexes via alter table add key.
// don't forget to change the store_lock function.
// for now, hot indexing is only supported via session variable
// with the create index sql command
if (ha_alter_info->index_add_count == 1 &&
// only one add or drop
ha_alter_info->index_drop_count == 0 &&
// must be add index not add unique index
ctx->handler_flags == Alter_inplace_info::ADD_INDEX &&
// must be a create index command
thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
// must be enabled
tokudb::sysvars::create_index_online(thd)) {
// external_lock set WRITE_ALLOW_WRITE which allows writes
// concurrent with the index creation
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
}
}
2015-12-15 17:23:58 +01:00
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
// column default
if (table->s->null_bytes == altered_table->s->null_bytes)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
2015-12-15 17:23:58 +01:00
} else if (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_NAME +
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
// column rename
// we have identified a possible column rename,
// but let's do some more checks
2015-12-15 17:23:58 +01:00
// we will only allow an hcr if there are no changes
// in column positions (ALTER_COLUMN_ORDER is not set)
2015-12-15 17:23:58 +01:00
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
if (table->s->null_bytes == altered_table->s->null_bytes) {
2015-12-15 17:23:58 +01:00
bool cr_supported =
column_rename_supported(
table,
altered_table,
(ctx->handler_flags &
Alter_inplace_info::ALTER_COLUMN_ORDER) != 0);
if (cr_supported)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if (ctx->handler_flags & Alter_inplace_info::ADD_COLUMN &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ADD_COLUMN +
Alter_inplace_info::ALTER_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// add column
uint32_t added_columns[altered_table->s->fields];
uint32_t num_added_columns = 0;
2015-12-15 17:23:58 +01:00
int r =
find_changed_columns(
added_columns,
&num_added_columns,
table,
altered_table);
if (r == 0) {
2015-12-15 17:23:58 +01:00
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
for (uint32_t i = 0; i < num_added_columns; i++) {
uint32_t curr_added_index = added_columns[i];
2015-12-15 17:23:58 +01:00
Field* curr_added_field =
altered_table->field[curr_added_index];
TOKUDB_TRACE(
"Added column: index %d, name %s",
curr_added_index,
curr_added_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if (ctx->handler_flags & Alter_inplace_info::DROP_COLUMN &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::DROP_COLUMN +
Alter_inplace_info::ALTER_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// drop column
uint32_t dropped_columns[table->s->fields];
uint32_t num_dropped_columns = 0;
2015-12-15 17:23:58 +01:00
int r =
find_changed_columns(
dropped_columns,
&num_dropped_columns,
altered_table,
table);
if (r == 0) {
2015-12-15 17:23:58 +01:00
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
for (uint32_t i = 0; i < num_dropped_columns; i++) {
uint32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
2015-12-15 17:23:58 +01:00
TOKUDB_TRACE(
"Dropped column: index %d, name %s",
curr_dropped_index,
curr_dropped_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if ((ctx->handler_flags &
Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH +
Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(
table,
altered_table,
ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// change column length
if (change_length_is_supported(
table,
altered_table,
ha_alter_info, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_TYPE +
Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(
table,
altered_table,
ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// change column type
if (change_type_is_supported(
table,
altered_table,
ha_alter_info, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::CHANGE_CREATE_OPTION)) {
HA_CREATE_INFO* create_info = ha_alter_info->create_info;
#if TOKU_INCLUDE_OPTION_STRUCTS
// set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
// compression
2015-12-15 17:23:58 +01:00
if (create_info->option_struct->row_format !=
table_share->option_struct->row_format)
create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
#endif
// alter auto_increment
if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
// do a sanity check that the table is what we think it is
2015-12-15 17:23:58 +01:00
if (tables_have_same_keys_and_columns(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
2015-12-15 17:23:58 +01:00
} else if (only_flags(
create_info->used_fields,
HA_CREATE_USED_ROW_FORMAT)) {
// alter row_format
// do a sanity check that the table is what we think it is
2015-12-15 17:23:58 +01:00
if (tables_have_same_keys_and_columns(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
}
2015-12-15 17:23:58 +01:00
}
#if TOKU_OPTIMIZE_WITH_RECREATE
2015-12-15 17:23:58 +01:00
else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::RECREATE_TABLE +
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
ctx->optimize_needed = true;
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
#endif
2015-12-15 17:23:58 +01:00
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
table->s->null_bytes != altered_table->s->null_bytes) {
TOKUDB_HANDLER_TRACE("q %s", thd->query());
2015-12-15 17:23:58 +01:00
TOKUDB_HANDLER_TRACE(
"null bytes %u -> %u",
table->s->null_bytes,
altered_table->s->null_bytes);
}
2015-12-15 17:23:58 +01:00
// turn a not supported result into an error if the slow alter table
// (copy) is disabled
if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
tokudb::sysvars::disable_slow_alter(thd)) {
print_error(HA_ERR_UNSUPPORTED, MYF(0));
result = HA_ALTER_ERROR;
}
DBUG_RETURN(result);
}
// Prepare for the alter operations
2015-12-15 17:23:58 +01:00
bool ha_tokudb::prepare_inplace_alter_table(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
assert_always(transaction); // transaction must exist after table is locked
ctx->alter_txn = transaction;
bool result = false; // success
DBUG_RETURN(result);
}
// Execute the alter operations.
2015-12-15 17:23:58 +01:00
bool ha_tokudb::inplace_alter_table(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
HA_CREATE_INFO* create_info = ha_alter_info->create_info;
// this should be enough to handle locking as the higher level MDL
// on this table should prevent any new analyze tasks.
share->cancel_background_jobs();
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX))) {
error = alter_table_drop_index(altered_table, ha_alter_info);
}
2015-12-15 17:23:58 +01:00
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::ADD_UNIQUE_INDEX))) {
error = alter_table_add_index(altered_table, ha_alter_info);
}
2015-12-15 17:23:58 +01:00
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::ADD_COLUMN +
Alter_inplace_info::DROP_COLUMN))) {
error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
}
2015-12-15 17:23:58 +01:00
if (error == 0 &&
(ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) &&
(create_info->used_fields & HA_CREATE_USED_AUTO)) {
error = write_auto_inc_create(
share->status_block,
create_info->auto_increment_value,
ctx->alter_txn);
}
2015-12-15 17:23:58 +01:00
if (error == 0 &&
(ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) &&
(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
// Get the current compression
DB *db = share->key_file[0];
error = db->get_compression_method(db, &ctx->orig_compression_method);
2015-12-15 17:23:58 +01:00
assert_always(error == 0);
// Set the new compression
2014-03-18 19:18:40 -04:00
#if TOKU_INCLUDE_OPTION_STRUCTS
2015-12-15 17:23:58 +01:00
toku_compression_method method =
row_format_to_toku_compression_method(
(tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
2014-03-18 19:18:40 -04:00
#else
2015-12-15 17:23:58 +01:00
toku_compression_method method =
row_type_to_toku_compression_method(create_info->row_type);
2014-03-18 19:18:40 -04:00
#endif
2013-12-11 15:47:25 -05:00
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
db = share->key_file[i];
error = db->change_compression_method(db, method);
if (error)
break;
ctx->compression_changed = true;
}
}
// note: only one column expansion is allowed
if (error == 0 && ctx->expand_fixed_update_needed)
error = alter_table_expand_columns(altered_table, ha_alter_info);
if (error == 0 && ctx->expand_varchar_update_needed)
2015-12-15 17:23:58 +01:00
error = alter_table_expand_varchar_offsets(
altered_table,
ha_alter_info);
if (error == 0 && ctx->expand_blob_update_needed)
error = alter_table_expand_blobs(altered_table, ha_alter_info);
if (error == 0 && ctx->reset_card) {
2015-12-15 17:23:58 +01:00
error = tokudb::alter_card(
share->status_block,
ctx->alter_txn,
table->s,
altered_table->s);
}
if (error == 0 && ctx->optimize_needed) {
error = do_optimize(ha_thd());
}
2013-05-24 10:59:07 -04:00
2013-11-09 16:55:12 -05:00
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
#if WITH_PARTITION_STORAGE_ENGINE
2015-12-15 17:23:58 +01:00
if (error == 0 &&
(TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
#else
if (error == 0) {
#endif
2015-12-15 17:23:58 +01:00
error = write_frm_data(
share->status_block,
ctx->alter_txn,
altered_table->s->path.str);
}
#endif
bool result = false; // success
if (error) {
print_error(error, MYF(0));
result = true; // failure
}
DBUG_RETURN(result);
}
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_add_index(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
// sort keys in add index order
2015-12-15 17:23:58 +01:00
KEY* key_info = (KEY*)tokudb::memory::malloc(
sizeof(KEY) * ha_alter_info->index_add_count,
MYF(MY_WME));
for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
KEY *key = &key_info[i];
*key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
2015-12-15 17:23:58 +01:00
for (KEY_PART_INFO* key_part = key->key_part;
2016-04-26 20:58:29 +02:00
key_part < key->key_part + key->user_defined_key_parts;
2015-12-15 17:23:58 +01:00
key_part++) {
key_part->field = table->field[key_part->fieldnr];
2015-12-15 17:23:58 +01:00
}
}
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
ctx->add_index_changed = true;
2015-12-15 17:23:58 +01:00
int error = tokudb_add_index(
table,
key_info,
ha_alter_info->index_add_count,
ctx->alter_txn,
&ctx->incremented_num_DBs,
&ctx->modified_DBs);
if (error == HA_ERR_FOUND_DUPP_KEY) {
// hack for now, in case of duplicate key error,
// because at the moment we cannot display the right key
// information to the user, so that he knows potentially what went
// wrong.
last_dup_key = MAX_KEY;
}
2015-12-15 17:23:58 +01:00
tokudb::memory::free(key_info);
if (error == 0)
2013-05-24 10:59:07 -04:00
ctx->reset_card = true;
return error;
}
2015-12-15 17:23:58 +01:00
static bool find_index_of_key(
const char* key_name,
TABLE* table,
uint* index_offset_ptr) {
for (uint i = 0; i < table->s->keys; i++) {
if (strcmp(key_name, table->key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
2015-12-15 17:23:58 +01:00
static bool find_index_of_key(
const char* key_name,
KEY* key_info,
uint key_count,
uint* index_offset_ptr) {
for (uint i = 0; i < key_count; i++) {
if (strcmp(key_name, key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_drop_index(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
KEY *key_info = table->key_info;
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
bool found;
2015-12-15 17:23:58 +01:00
found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
table,
&index_drop_offsets[i]);
if (!found) {
// undo of add key in partition engine
2015-12-15 17:23:58 +01:00
found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
ha_alter_info->key_info_buffer,
ha_alter_info->key_count,
&index_drop_offsets[i]);
assert_always(found);
key_info = ha_alter_info->key_info_buffer;
}
}
// drop indexes
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
ctx->drop_index_changed = true;
2015-12-15 17:23:58 +01:00
int error = drop_indexes(
table,
index_drop_offsets,
ha_alter_info->index_drop_count,
key_info,
ctx->alter_txn);
if (error == 0)
2013-05-24 10:59:07 -04:00
ctx->reset_card = true;
return error;
}
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_add_or_drop_column(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
int error;
uchar *column_extra = NULL;
uint32_t max_column_extra_size;
uint32_t num_column_extra;
uint32_t num_columns = 0;
2013-12-11 15:47:25 -05:00
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
2015-12-15 17:23:58 +01:00
// set size such that we know it is big enough for both cases
uint32_t columns[table->s->fields + altered_table->s->fields];
memset(columns, 0, sizeof(columns));
// generate the array of columns
if (ha_alter_info->handler_flags & Alter_inplace_info::DROP_COLUMN) {
find_changed_columns(
2015-12-15 17:23:58 +01:00
columns,
&num_columns,
altered_table,
table);
} else if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) {
find_changed_columns(
2015-12-15 17:23:58 +01:00
columns,
&num_columns,
table,
altered_table);
} else {
assert_unreachable();
}
max_column_extra_size =
2015-12-15 17:23:58 +01:00
// max static row_mutator
STATIC_ROW_MUTATOR_SIZE +
// max dynamic row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
// max static blob size
(4 + share->kc_info.num_blobs) +
// max dynamic blob size
(num_columns*(1+4+1+4));
column_extra = (uchar*)tokudb::memory::malloc(
max_column_extra_size,
MYF(MY_WME));
if (column_extra == NULL) {
error = ENOMEM;
goto cleanup;
}
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
2015-12-15 17:23:58 +01:00
error = new_row_descriptor(
table,
altered_table,
ha_alter_info,
i,
&row_descriptor);
if (error)
goto cleanup;
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
goto cleanup;
2013-09-23 14:32:48 -04:00
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
num_column_extra = fill_row_mutator(
2015-12-15 17:23:58 +01:00
column_extra,
columns,
num_columns,
altered_table,
ctx->altered_table_kc_info,
i,
// true if adding columns, otherwise is a drop
(ha_alter_info->handler_flags &
Alter_inplace_info::ADD_COLUMN) != 0);
DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
2015-12-15 17:23:58 +01:00
share->key_file[i],
ctx->alter_txn,
&column_dbt,
DB_IS_RESETTING_OP);
if (error) {
goto cleanup;
}
}
}
error = 0;
cleanup:
2015-12-15 17:23:58 +01:00
tokudb::memory::free(column_extra);
return error;
}
// Commit or abort the alter operations.
2015-12-15 17:23:58 +01:00
// If commit then write the new frm data to the status using the alter
// transaction.
// If abort then abort the alter transaction and try to rollback the
// non-transactional changes.
bool ha_tokudb::commit_inplace_alter_table(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
bool commit) {
TOKUDB_HANDLER_DBUG_ENTER("");
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
bool result = false; // success
THD *thd = ha_thd();
if (commit) {
2013-11-09 16:55:12 -05:00
#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
2015-11-20 11:14:21 +01:00
(100000 <= MYSQL_VERSION_ID)
if (ha_alter_info->group_commit_ctx) {
ha_alter_info->group_commit_ctx = NULL;
}
#endif
#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
2015-11-20 11:14:21 +01:00
(100000 <= MYSQL_VERSION_ID)
#if WITH_PARTITION_STORAGE_ENGINE
if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
#else
if (true) {
#endif
2015-12-15 17:23:58 +01:00
int error = write_frm_data(
share->status_block,
ctx->alter_txn,
altered_table->s->path.str);
if (error) {
commit = false;
result = true;
print_error(error, MYF(0));
}
}
#endif
}
if (!commit) {
if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
2015-12-15 17:23:58 +01:00
(ctx->add_index_changed || ctx->drop_index_changed ||
ctx->compression_changed)) {
// get exclusive lock no matter what
#if defined(MARIADB_BASE_VERSION)
killed_state saved_killed_state = thd->killed;
thd->killed = NOT_KILLED;
2015-12-15 17:23:58 +01:00
for (volatile uint i = 0;
wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
i++) {
if (thd->killed != NOT_KILLED)
thd->killed = NOT_KILLED;
sleep(1);
}
2015-12-15 17:23:58 +01:00
assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
if (thd->killed == NOT_KILLED)
thd->killed = saved_killed_state;
#else
THD::killed_state saved_killed_state = thd->killed;
thd->killed = THD::NOT_KILLED;
2015-12-15 17:23:58 +01:00
// MySQL does not handle HA_EXTRA_NOT_USED so we use
// HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
// the partition storage engine and is treated as a NOP by tokudb
2015-12-15 17:23:58 +01:00
for (volatile uint i = 0;
wait_while_table_is_used(
thd,
table,
HA_EXTRA_PREPARE_FOR_RENAME);
i++) {
if (thd->killed != THD::NOT_KILLED)
thd->killed = THD::NOT_KILLED;
sleep(1);
}
2015-12-15 17:23:58 +01:00
assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
if (thd->killed == THD::NOT_KILLED)
thd->killed = saved_killed_state;
#endif
}
2015-12-15 17:23:58 +01:00
// abort the alter transaction NOW so that any alters are rolled back.
// this allows the following restores to work.
tokudb_trx_data* trx =
(tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
assert_always(ctx->alter_txn == trx->stmt);
assert_always(trx->tokudb_lock_count > 0);
// for partitioned tables, we use a single transaction to do all of the
// partition changes. the tokudb_lock_count is a reference count for
// each of the handlers to the same transaction. obviously, we want
// to only abort once.
if (trx->tokudb_lock_count > 0) {
if (--trx->tokudb_lock_count <= trx->create_lock_count) {
trx->create_lock_count = 0;
abort_txn(ctx->alter_txn);
ctx->alter_txn = NULL;
trx->stmt = NULL;
trx->sub_sp_level = NULL;
}
transaction = NULL;
}
if (ctx->add_index_changed) {
2015-12-15 17:23:58 +01:00
restore_add_index(
table,
ha_alter_info->index_add_count,
ctx->incremented_num_DBs,
ctx->modified_DBs);
}
if (ctx->drop_index_changed) {
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
2015-12-15 17:23:58 +01:00
bool found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
table,
&index_drop_offsets[i]);
assert_always(found);
}
2015-12-15 17:23:58 +01:00
restore_drop_indexes(
table,
index_drop_offsets,
ha_alter_info->index_drop_count);
}
if (ctx->compression_changed) {
2015-12-15 17:23:58 +01:00
uint32_t curr_num_DBs =
table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
DB *db = share->key_file[i];
2015-12-15 17:23:58 +01:00
int error = db->change_compression_method(
db,
ctx->orig_compression_method);
assert_always(error == 0);
}
}
}
DBUG_RETURN(result);
}
// Setup the altered table's key and col info.
2015-12-15 17:23:58 +01:00
int ha_tokudb::setup_kc_info(
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info) {
int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
if (error == 0)
2015-12-15 17:23:58 +01:00
error = initialize_key_and_col_info(
altered_table->s,
altered_table,
altered_kc_info,
hidden_primary_key,
primary_key);
return error;
}
// Expand the variable length fields offsets from 1 to 2 bytes.
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_expand_varchar_offsets(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
2013-12-11 15:47:25 -05:00
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
2015-12-15 17:23:58 +01:00
error = new_row_descriptor(
table,
altered_table,
ha_alter_info,
i,
&row_descriptor);
if (error)
break;
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
2015-12-15 17:23:58 +01:00
// for all trees that have values, make an update variable offsets
// message and broadcast it into the tree
2013-09-23 14:32:48 -04:00
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
2015-12-15 17:23:58 +01:00
uint32_t offset_start =
table_share->null_bytes +
share->kc_info.mcp_info[i].fixed_field_size;
uint32_t offset_end =
offset_start +
share->kc_info.mcp_info[i].len_of_offsets;
uint32_t number_of_offsets = offset_end - offset_start;
// make the expand variable offsets message
DBT expand; memset(&expand, 0, sizeof expand);
2015-12-15 17:23:58 +01:00
expand.size =
sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
2015-12-15 17:23:58 +01:00
uchar* expand_ptr = (uchar*)expand.data;
expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
2015-12-15 17:23:58 +01:00
expand_ptr += sizeof(uchar);
2015-12-15 17:23:58 +01:00
memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
expand_ptr += sizeof(number_of_offsets);
2015-12-15 17:23:58 +01:00
memcpy(expand_ptr, &offset_start, sizeof(offset_start));
expand_ptr += sizeof(offset_start);
// and broadcast it into the tree
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
tokudb::memory::free(expand.data);
if (error)
break;
}
}
return error;
}
// Return true if a field is part of a key
static bool field_in_key(KEY *key, Field *field) {
2016-04-26 20:58:29 +02:00
for (uint i = 0; i < key->user_defined_key_parts; i++) {
KEY_PART_INFO *key_part = &key->key_part[i];
if (strcmp(key_part->field->field_name, field->field_name) == 0)
return true;
}
return false;
}
// Return true if a field is part of any key
static bool field_in_key_of_table(TABLE *table, Field *field) {
for (uint i = 0; i < table->s->keys; i++) {
if (field_in_key(&table->key_info[i], field))
return true;
}
return false;
}
2015-12-15 17:23:58 +01:00
// Return true if all changed varchar/varbinary field lengths can be changed
// inplace, otherwise return false
static bool change_varchar_length_is_supported(
Field* old_field,
Field* new_field,
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx) {
if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
new_field->real_type() != MYSQL_TYPE_VARCHAR ||
old_field->binary() != new_field->binary() ||
old_field->charset()->number != new_field->charset()->number ||
old_field->field_length > new_field->field_length)
return false;
2015-12-15 17:23:58 +01:00
if (ctx->table_kc_info->num_offset_bytes >
ctx->altered_table_kc_info->num_offset_bytes)
return false; // shrink is not supported
2015-12-15 17:23:58 +01:00
if (ctx->table_kc_info->num_offset_bytes <
ctx->altered_table_kc_info->num_offset_bytes)
// sum of varchar lengths changed from 1 to 2
ctx->expand_varchar_update_needed = true;
return true;
}
2015-12-15 17:23:58 +01:00
// Return true if all changed field lengths can be changed inplace, otherwise
// return false
static bool change_length_is_supported(
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx) {
if (table->s->fields != altered_table->s->fields)
return false;
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
2015-12-15 17:23:58 +01:00
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
ai < ctx->changed_fields.elements();
ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (old_field->real_type() != new_field->real_type())
return false; // no type conversions
if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
return false; // only varchar
2015-12-15 17:23:58 +01:00
if (field_in_key_of_table(table, old_field) ||
field_in_key_of_table(altered_table, new_field))
return false; // not in any key
2015-12-15 17:23:58 +01:00
if (!change_varchar_length_is_supported(
old_field,
new_field,
table,
altered_table,
ha_alter_info,
ctx))
return false;
}
return true;
}
// Debug function that ensures that the array is sorted
static bool is_sorted(Dynamic_array<uint> &a) {
bool r = true;
if (a.elements() > 0) {
uint lastelement = a.at(0);
2013-11-10 16:45:11 -05:00
for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
if (lastelement > a.at(i))
r = false;
}
return r;
}
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_expand_columns(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
// since we build the changed_fields array in field order, it must be sorted
assert_always(is_sorted(ctx->changed_fields));
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
error == 0 && ai < ctx->changed_fields.elements();
ai++) {
uint expand_field_num = ctx->changed_fields.at(ai);
2015-12-15 17:23:58 +01:00
error = alter_table_expand_one_column(
altered_table,
ha_alter_info,
expand_field_num);
}
return error;
}
// Return true if the field is an unsigned int
static bool is_unsigned(Field *f) {
return (f->flags & UNSIGNED_FLAG) != 0;
}
2015-12-15 17:23:58 +01:00
// Return the starting offset in the value for a particular index (selected by
// idx) of a particular field (selected by expand_field_num)
// TODO: replace this?
2015-12-15 17:23:58 +01:00
static uint32_t alter_table_field_offset(
uint32_t null_bytes,
KEY_AND_COL_INFO* kc_info,
int idx,
int expand_field_num) {
uint32_t offset = null_bytes;
for (int i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
continue;
offset += kc_info->field_lengths[i];
}
return offset;
}
// Send an expand message into all clustered indexes including the primary
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_expand_one_column(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
int expand_field_num) {
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
Field *old_field = table->field[expand_field_num];
TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
Field *new_field = altered_table->field[expand_field_num];
TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
2015-12-15 17:23:58 +01:00
assert_always(old_field_type == new_field_type);
uchar operation;
uchar pad_char;
switch (old_field_type) {
case toku_type_int:
2015-12-15 17:23:58 +01:00
assert_always(is_unsigned(old_field) == is_unsigned(new_field));
if (is_unsigned(old_field))
operation = UPDATE_OP_EXPAND_UINT;
else
operation = UPDATE_OP_EXPAND_INT;
pad_char = 0;
break;
case toku_type_fixstring:
operation = UPDATE_OP_EXPAND_CHAR;
pad_char = old_field->charset()->pad_char;
break;
case toku_type_fixbinary:
operation = UPDATE_OP_EXPAND_BINARY;
pad_char = 0;
break;
default:
2015-12-15 17:23:58 +01:00
assert_unreachable();
}
2013-12-11 15:47:25 -05:00
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
2015-12-15 17:23:58 +01:00
error = new_row_descriptor(
table,
altered_table,
ha_alter_info,
i,
&row_descriptor);
if (error)
break;
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
2015-12-15 17:23:58 +01:00
// for all trees that have values, make an expand update message and
// broadcast it into the tree
2013-09-23 14:32:48 -04:00
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
2015-12-15 17:23:58 +01:00
uint32_t old_offset = alter_table_field_offset(
table_share->null_bytes,
ctx->table_kc_info,
i,
expand_field_num);
uint32_t new_offset = alter_table_field_offset(
table_share->null_bytes,
ctx->altered_table_kc_info,
i,
expand_field_num);
assert_always(old_offset <= new_offset);
uint32_t old_length =
ctx->table_kc_info->field_lengths[expand_field_num];
assert_always(old_length == old_field->pack_length());
uint32_t new_length =
ctx->altered_table_kc_info->field_lengths[expand_field_num];
assert_always(new_length == new_field->pack_length());
DBT expand; memset(&expand, 0, sizeof(expand));
expand.size =
sizeof(operation) + sizeof(new_offset) +
sizeof(old_length) + sizeof(new_length);
if (operation == UPDATE_OP_EXPAND_CHAR ||
operation == UPDATE_OP_EXPAND_BINARY)
expand.size += sizeof(pad_char);
expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = operation;
expand_ptr += sizeof operation;
2015-12-15 17:23:58 +01:00
// for the first altered field, old_offset == new_offset.
// for the subsequent altered fields, the new_offset
// should be used as it includes the length changes from the
// previous altered fields.
memcpy(expand_ptr, &new_offset, sizeof(new_offset));
expand_ptr += sizeof(new_offset);
2015-12-15 17:23:58 +01:00
memcpy(expand_ptr, &old_length, sizeof(old_length));
expand_ptr += sizeof(old_length);
2015-12-15 17:23:58 +01:00
memcpy(expand_ptr, &new_length, sizeof(new_length));
expand_ptr += sizeof(new_length);
2015-12-15 17:23:58 +01:00
if (operation == UPDATE_OP_EXPAND_CHAR ||
operation == UPDATE_OP_EXPAND_BINARY) {
memcpy(expand_ptr, &pad_char, sizeof(pad_char));
expand_ptr += sizeof(pad_char);
}
2015-12-15 17:23:58 +01:00
assert_always(expand_ptr == (uchar*)expand.data + expand.size);
// and broadcast it into the tree
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
tokudb::memory::free(expand.data);
if (error)
break;
}
}
return error;
}
2015-12-15 17:23:58 +01:00
static void marshall_blob_lengths(
tokudb::buffer& b,
uint32_t n,
TABLE* table,
KEY_AND_COL_INFO* kc_info) {
for (uint i = 0; i < n; i++) {
uint blob_field_index = kc_info->blob_fields[i];
2015-12-15 17:23:58 +01:00
assert_always(blob_field_index < table->s->fields);
uint8_t blob_field_length =
table->s->field[blob_field_index]->row_pack_length();
b.append(&blob_field_length, sizeof blob_field_length);
}
}
2015-12-15 17:23:58 +01:00
int ha_tokudb::alter_table_expand_blobs(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
2013-12-11 15:47:25 -05:00
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
2015-12-15 17:23:58 +01:00
error = new_row_descriptor(
table,
altered_table,
ha_alter_info,
i,
&row_descriptor);
if (error)
break;
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
2015-12-15 17:23:58 +01:00
// for all trees that have values, make an update blobs message and
// broadcast it into the tree
2013-09-23 14:32:48 -04:00
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
tokudb::buffer b;
uint8_t op = UPDATE_OP_EXPAND_BLOB;
b.append(&op, sizeof op);
2015-12-15 17:23:58 +01:00
b.append_ui<uint32_t>(
table->s->null_bytes +
ctx->table_kc_info->mcp_info[i].fixed_field_size);
uint32_t var_offset_bytes =
ctx->table_kc_info->mcp_info[i].len_of_offsets;
b.append_ui<uint32_t>(var_offset_bytes);
2015-12-15 17:23:58 +01:00
b.append_ui<uint32_t>(
var_offset_bytes == 0 ? 0 :
ctx->table_kc_info->num_offset_bytes);
// add blobs info
uint32_t num_blobs = ctx->table_kc_info->num_blobs;
b.append_ui<uint32_t>(num_blobs);
marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
2015-12-15 17:23:58 +01:00
marshall_blob_lengths(
b,
num_blobs,
altered_table,
ctx->altered_table_kc_info);
// and broadcast it into the tree
DBT expand; memset(&expand, 0, sizeof expand);
expand.data = b.data();
expand.size = b.size();
2015-12-15 17:23:58 +01:00
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
if (error)
break;
}
}
return error;
}
// Return true if two fixed length fields can be changed inplace
2015-12-15 17:23:58 +01:00
static bool change_fixed_length_is_supported(
TABLE* table,
TABLE* altered_table,
Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
// no change in size is supported
if (old_field->pack_length() == new_field->pack_length())
return true;
// shrink is not supported
if (old_field->pack_length() > new_field->pack_length())
return false;
ctx->expand_fixed_update_needed = true;
return true;
}
2015-12-15 17:23:58 +01:00
static bool change_blob_length_is_supported(
TABLE* table,
TABLE* altered_table,
Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
// blob -> longer or equal length blob
2015-12-15 17:23:58 +01:00
if (old_field->binary() && new_field->binary() &&
old_field->pack_length() <= new_field->pack_length()) {
ctx->expand_blob_update_needed = true;
return true;
}
// text -> longer or equal length text
if (!old_field->binary() && !new_field->binary() &&
old_field->pack_length() <= new_field->pack_length() &&
old_field->charset()->number == new_field->charset()->number) {
ctx->expand_blob_update_needed = true;
return true;
}
return false;
}
// Return true if the MySQL type is an int or unsigned int type
static bool is_int_type(enum_field_types t) {
switch (t) {
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
return true;
default:
return false;
}
}
// Return true if two field types can be changed inplace
2015-12-15 17:23:58 +01:00
static bool change_field_type_is_supported(
Field* old_field,
Field* new_field,
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx) {
enum_field_types old_type = old_field->real_type();
enum_field_types new_type = new_field->real_type();
if (is_int_type(old_type)) {
// int and unsigned int expansion
2015-12-15 17:23:58 +01:00
if (is_int_type(new_type) &&
is_unsigned(old_field) == is_unsigned(new_field))
return change_fixed_length_is_supported(
table,
altered_table,
old_field,
new_field,
ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_STRING) {
// char(X) -> char(Y) and binary(X) -> binary(Y) expansion
if (new_type == MYSQL_TYPE_STRING &&
old_field->binary() == new_field->binary() &&
old_field->charset()->number == new_field->charset()->number)
2015-12-15 17:23:58 +01:00
return change_fixed_length_is_supported(
table,
altered_table,
old_field,
new_field,
ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_VARCHAR) {
2015-12-15 17:23:58 +01:00
// varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
// where X < 256 <= Y the ALTER_COLUMN_TYPE handler flag is set for
// these cases
return change_varchar_length_is_supported(
old_field,
new_field,
table,
altered_table,
ha_alter_info,
ctx);
} else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
2015-12-15 17:23:58 +01:00
return change_blob_length_is_supported(
table,
altered_table,
old_field,
new_field,
ctx);
} else
return false;
}
// Return true if all changed field types can be changed inplace
2015-12-15 17:23:58 +01:00
static bool change_type_is_supported(
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
tokudb_alter_ctx* ctx) {
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (table->s->fields != altered_table->s->fields)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
2015-12-15 17:23:58 +01:00
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
ai < ctx->changed_fields.elements();
ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
2015-12-15 17:23:58 +01:00
if (field_in_key_of_table(table, old_field) ||
field_in_key_of_table(altered_table, new_field))
return false;
2015-12-15 17:23:58 +01:00
if (!change_field_type_is_supported(
old_field,
new_field,
table,
altered_table,
ha_alter_info,
ctx))
return false;
}
return true;
}
2015-12-15 17:23:58 +01:00
// Allocate and initialize a new descriptor for a dictionary in the altered
// table identified with idx.
// Return the new descriptor in the row_descriptor DBT.
// Return non-zero on error.
2015-12-15 17:23:58 +01:00
int ha_tokudb::new_row_descriptor(
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
uint32_t idx,
DBT* row_descriptor) {
int error = 0;
2015-12-15 17:23:58 +01:00
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
row_descriptor->size =
get_max_desc_size(ctx->altered_table_kc_info, altered_table);
row_descriptor->data =
(uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
if (row_descriptor->data == NULL) {
error = ENOMEM;
} else {
2015-12-15 17:23:58 +01:00
KEY* prim_key =
hidden_primary_key ? NULL :
&altered_table->s->key_info[primary_key];
if (idx == primary_key) {
2015-12-15 17:23:58 +01:00
row_descriptor->size = create_main_key_descriptor(
(uchar*)row_descriptor->data,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
ctx->altered_table_kc_info);
} else {
2015-12-15 17:23:58 +01:00
row_descriptor->size = create_secondary_key_descriptor(
(uchar*)row_descriptor->data,
&altered_table->key_info[idx],
prim_key,
hidden_primary_key,
altered_table,
primary_key,
idx,
ctx->altered_table_kc_info);
}
error = 0;
}
return error;
}
#endif