mariadb/storage/tokudb/ha_tokudb_alter_56.cc
2018-09-14 08:47:22 +02:00

1630 lines
60 KiB
C++

/* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
// vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
#ident "$Id$"
/*======
This file is part of TokuDB
Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved.
TokuDBis is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License, version 2,
as published by the Free Software Foundation.
TokuDB is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with TokuDB. If not, see <http://www.gnu.org/licenses/>.
======= */
#ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved."
#if TOKU_INCLUDE_ALTER_56
#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 101099
#define TOKU_ALTER_RENAME ALTER_RENAME
#define DYNAMIC_ARRAY_ELEMENTS_TYPE size_t
#elif (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
#define TOKU_ALTER_RENAME ALTER_RENAME
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
#define TOKU_ALTER_RENAME ALTER_RENAME_56
#define DYNAMIC_ARRAY_ELEMENTS_TYPE int
#else
#error
#endif
#include "ha_tokudb_alter_common.cc"
#include <sql_array.h>
#include <sql_base.h>
// The tokudb alter context contains the alter state that is set in the check if supported method and used
// later when the alter operation is executed.
class tokudb_alter_ctx : public inplace_alter_handler_ctx {
public:
tokudb_alter_ctx() :
handler_flags(0),
alter_txn(NULL),
add_index_changed(false),
drop_index_changed(false),
reset_card(false),
compression_changed(false),
expand_varchar_update_needed(false),
expand_fixed_update_needed(false),
expand_blob_update_needed(false),
optimize_needed(false),
table_kc_info(NULL),
altered_table_kc_info(NULL) {
}
~tokudb_alter_ctx() {
if (altered_table_kc_info)
free_key_and_col_info(altered_table_kc_info);
}
public:
ulong handler_flags;
DB_TXN* alter_txn;
bool add_index_changed;
bool incremented_num_DBs, modified_DBs;
bool drop_index_changed;
bool reset_card;
bool compression_changed;
enum toku_compression_method orig_compression_method;
bool expand_varchar_update_needed;
bool expand_fixed_update_needed;
bool expand_blob_update_needed;
bool optimize_needed;
Dynamic_array<uint> changed_fields;
KEY_AND_COL_INFO* table_kc_info;
KEY_AND_COL_INFO* altered_table_kc_info;
KEY_AND_COL_INFO altered_table_kc_info_base;
};
// Debug function to print out an alter table operation
void ha_tokudb::print_alter_info(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_TRACE(
"***are keys of two tables same? %d",
tables_have_same_keys(table, altered_table, false, false));
if (ha_alter_info->handler_flags) {
TOKUDB_TRACE("***alter flags set ***");
for (int i = 0; i < 32; i++) {
if (ha_alter_info->handler_flags & (1 << i))
TOKUDB_TRACE("%d", i);
}
}
// everyone calculates data by doing some default_values - record[0], but
// I do not see why that is necessary
TOKUDB_TRACE("******");
TOKUDB_TRACE("***orig table***");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
TOKUDB_TRACE(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: "
"%d, is_null %d, pack_length %u",
curr_field->field_name,
curr_field->real_type(),
mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ?
table->s->default_values[null_offset] & curr_field->null_bit :
0xffffffff,
curr_field->pack_length());
}
TOKUDB_TRACE("******");
TOKUDB_TRACE("***altered table***");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
TOKUDB_TRACE(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, "
"is_null_field: %d, is_null %d, pack_length %u",
curr_field->field_name,
curr_field->real_type(),
mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ?
altered_table->s->default_values[null_offset] &
curr_field->null_bit : 0xffffffff,
curr_field->pack_length());
}
TOKUDB_TRACE("******");
}
// Given two tables with equal number of fields, find all of the fields with
// different types and return the indexes of the different fields in the
// changed_fields array. This function ignores field name differences.
static int find_changed_fields(
TABLE* table_a,
TABLE* table_b,
Dynamic_array<uint>& changed_fields) {
for (uint i = 0; i < table_a->s->fields; i++) {
Field* field_a = table_a->field[i];
Field* field_b = table_b->field[i];
if (!fields_are_same_type(field_a, field_b))
changed_fields.append(i);
}
return changed_fields.elements();
}
static bool change_length_is_supported(TABLE* table,
TABLE* altered_table,
tokudb_alter_ctx* ctx);
static bool change_type_is_supported(TABLE* table,
TABLE* altered_table,
tokudb_alter_ctx* ctx);
// The ha_alter_info->handler_flags can not be trusted.
// This function maps the bogus handler flags to something we like.
static ulong fix_handler_flags(
THD* thd,
TABLE* table,
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
ulong handler_flags = ha_alter_info->handler_flags;
#if 100000 <= MYSQL_VERSION_ID
// This is automatically supported, hide the flag from later checks
handler_flags &= ~Alter_inplace_info::ALTER_PARTITIONED;
#endif
// workaround for fill_alter_inplace_info bug (#5193)
// the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a
// column addition that does not change the keys.
// the following code turns the ADD_INDEX and DROP_INDEX flags so that
// we can do hot column addition later.
if (handler_flags &
(Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN)) {
if (handler_flags &
(Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX)) {
if (tables_have_same_keys(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0, false)) {
handler_flags &=
~(Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::DROP_INDEX);
}
}
}
// always allow rename table + any other operation, so turn off the
// rename flag
if (handler_flags & Alter_inplace_info::TOKU_ALTER_RENAME) {
handler_flags &= ~Alter_inplace_info::TOKU_ALTER_RENAME;
}
// ALTER_STORED_COLUMN_TYPE may be set when no columns have been changed,
// so turn off the flag
if (handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) {
if (all_fields_are_same_type(table, altered_table)) {
handler_flags &= ~Alter_inplace_info::ALTER_STORED_COLUMN_TYPE;
}
}
return handler_flags;
}
// Require that there is no intersection of add and drop names.
static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
KEY* drop_key = ha_alter_info->index_drop_buffer[d];
for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
KEY* add_key =
&ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
if (strcmp(drop_key->name, add_key->name) == 0) {
return false;
}
}
}
return true;
}
// Return true if some bit in mask is set and no bit in ~mask is set,
// otherwise return false.
static bool only_flags(ulong bits, ulong mask) {
return (bits & mask) != 0 && (bits & ~mask) == 0;
}
// Table create options that should be ignored by TokuDB
// There are 25 total create options defined by mysql server (see handler.h),
// and only 4 options will touch engine data, either rebuild engine data or
// just update meta info:
// 1. HA_CREATE_USED_AUTO update auto_inc info
// 2. HA_CREATE_USED_CHARSET rebuild table if contains character columns
// 3. HA_CREATE_USED_ENGINE rebuild table
// 4. HA_CREATE_USED_ROW_FORMAT update compression method info
//
// All the others are either not supported by TokuDB or no need to
// touch engine data.
static constexpr uint32_t TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS =
HA_CREATE_USED_RAID | // deprecated field
HA_CREATE_USED_UNION | // for MERGE table
HA_CREATE_USED_INSERT_METHOD | // for MERGE table
HA_CREATE_USED_MIN_ROWS | // for MEMORY table
HA_CREATE_USED_MAX_ROWS | // for NDB table
HA_CREATE_USED_AVG_ROW_LENGTH | // for MyISAM table
HA_CREATE_USED_PACK_KEYS | // for MyISAM table
HA_CREATE_USED_DEFAULT_CHARSET | // no need to rebuild
HA_CREATE_USED_DATADIR | // ignored by alter
HA_CREATE_USED_INDEXDIR | // ignored by alter
HA_CREATE_USED_CHECKSUM | // for MyISAM table
HA_CREATE_USED_DELAY_KEY_WRITE | // for MyISAM table
HA_CREATE_USED_COMMENT | // no need to rebuild
HA_CREATE_USED_PASSWORD | // not supported by community version
HA_CREATE_USED_CONNECTION | // for FEDERATED table
HA_CREATE_USED_KEY_BLOCK_SIZE | // not supported by TokuDB
HA_CREATE_USED_TRANSACTIONAL | // unused
HA_CREATE_USED_PAGE_CHECKSUM | // unsued
HA_CREATE_USED_STATS_PERSISTENT | // not supported by TokuDB
HA_CREATE_USED_STATS_AUTO_RECALC | // not supported by TokuDB
HA_CREATE_USED_STATS_SAMPLE_PAGES; // not supported by TokuDB
// Check if an alter table operation on this table and described by the alter
// table parameters is supported inplace and if so, what type of locking is
// needed to execute it. return values:
// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an
// inplace operation, a table copy is required
// HA_ALTER_ERROR: the alter table operation should fail
// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
// HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE: prepare runs with MDL X,
// alter runs with MDL SNW
// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW,
// concurrent reads, no writes
// HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE: prepare runs with MDL X,
// alter runs with MDL SW
// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW,
// concurrent reads, writes.
// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid
// deadlocks with the MDL lock and the table lock
enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
print_alter_info(altered_table, ha_alter_info);
}
// default is NOT inplace
enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED;
THD* thd = ha_thd();
// setup context
tokudb_alter_ctx* ctx = new tokudb_alter_ctx;
ha_alter_info->handler_ctx = ctx;
ctx->handler_flags =
fix_handler_flags(thd, table, altered_table, ha_alter_info);
ctx->table_kc_info = &share->kc_info;
ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
if (tokudb::sysvars::disable_hot_alter(thd)) {
; // do nothing
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX +
Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::ADD_UNIQUE_INDEX)) {
// add or drop index
if (table->s->null_bytes == altered_table->s->null_bytes &&
(ha_alter_info->index_add_count > 0 ||
ha_alter_info->index_drop_count > 0) &&
!tables_have_same_keys(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0, false) &&
is_disjoint_add_drop(ha_alter_info)) {
if (ctx->handler_flags &
(Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX)) {
// the fractal tree can not handle dropping an index concurrent
// with querying with the index.
// we grab an exclusive MDL for the drop index.
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else {
result = HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE;
// someday, allow multiple hot indexes via alter table add key.
// don't forget to change the store_lock function.
// for now, hot indexing is only supported via session variable
// with the create index sql command
if (ha_alter_info->index_add_count == 1 &&
// only one add or drop
ha_alter_info->index_drop_count == 0 &&
// must be add index not add unique index
ctx->handler_flags == Alter_inplace_info::ADD_INDEX &&
// must be a create index command
thd_sql_command(thd) == SQLCOM_CREATE_INDEX &&
// must be enabled
tokudb::sysvars::create_index_online(thd)) {
// external_lock set WRITE_ALLOW_WRITE which allows writes
// concurrent with the index creation
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
}
}
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
// column default
if (table->s->null_bytes == altered_table->s->null_bytes)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else if (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_NAME +
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
// column rename
// we have identified a possible column rename,
// but let's do some more checks
// we will only allow an hcr if there are no changes
// in column positions (ALTER_STORED_COLUMN_ORDER is not set)
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
if (table->s->null_bytes == altered_table->s->null_bytes) {
bool cr_supported =
column_rename_supported(
table,
altered_table,
(ctx->handler_flags &
Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) != 0);
if (cr_supported)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if (ctx->handler_flags & Alter_inplace_info::ADD_COLUMN &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ADD_COLUMN +
Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// add column
uint32_t added_columns[altered_table->s->fields];
uint32_t num_added_columns = 0;
int r =
find_changed_columns(
added_columns,
&num_added_columns,
table,
altered_table);
if (r == 0) {
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
for (uint32_t i = 0; i < num_added_columns; i++) {
uint32_t curr_added_index = added_columns[i];
Field* curr_added_field =
altered_table->field[curr_added_index];
TOKUDB_TRACE(
"Added column: index %d, name %s",
curr_added_index,
curr_added_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if (ctx->handler_flags & Alter_inplace_info::DROP_COLUMN &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::DROP_COLUMN +
Alter_inplace_info::ALTER_STORED_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// drop column
uint32_t dropped_columns[table->s->fields];
uint32_t num_dropped_columns = 0;
int r =
find_changed_columns(
dropped_columns,
&num_dropped_columns,
altered_table,
table);
if (r == 0) {
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE))) {
for (uint32_t i = 0; i < num_dropped_columns; i++) {
uint32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
TOKUDB_TRACE(
"Dropped column: index %d, name %s",
curr_dropped_index,
curr_dropped_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if ((ctx->handler_flags &
Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH +
Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(
table,
altered_table,
ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// change column length
if (change_length_is_supported(table, altered_table, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if ((ctx->handler_flags & Alter_inplace_info::ALTER_STORED_COLUMN_TYPE) &&
only_flags(
ctx->handler_flags,
Alter_inplace_info::ALTER_STORED_COLUMN_TYPE +
Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(
table,
altered_table,
ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
// change column type
if (change_type_is_supported(table, altered_table, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::CHANGE_CREATE_OPTION)) {
HA_CREATE_INFO* create_info = ha_alter_info->create_info;
#if TOKU_INCLUDE_OPTION_STRUCTS
// set the USED_ROW_FORMAT flag for use later in this file for changes in the table's
// compression
if (create_info->option_struct->row_format !=
table_share->option_struct->row_format)
create_info->used_fields |= HA_CREATE_USED_ROW_FORMAT;
#endif
// alter auto_increment
if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
// do a sanity check that the table is what we think it is
if (tables_have_same_keys_and_columns(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if (only_flags(
create_info->used_fields,
HA_CREATE_USED_ROW_FORMAT)) {
// alter row_format
// do a sanity check that the table is what we think it is
if (tables_have_same_keys_and_columns(
table,
altered_table,
tokudb::sysvars::alter_print_error(thd) != 0)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else if (only_flags(
create_info->used_fields,
TOKUDB_IGNORED_ALTER_CREATE_OPTION_FIELDS)) {
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
}
#if TOKU_OPTIMIZE_WITH_RECREATE
else if (only_flags(
ctx->handler_flags,
Alter_inplace_info::RECREATE_TABLE +
Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
ctx->optimize_needed = true;
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
#endif
if (TOKUDB_UNLIKELY(TOKUDB_DEBUG_FLAGS(TOKUDB_DEBUG_ALTER_TABLE)) &&
result != HA_ALTER_INPLACE_NOT_SUPPORTED &&
table->s->null_bytes != altered_table->s->null_bytes) {
TOKUDB_HANDLER_TRACE("q %s", thd->query());
TOKUDB_HANDLER_TRACE(
"null bytes %u -> %u",
table->s->null_bytes,
altered_table->s->null_bytes);
}
// turn a not supported result into an error if the slow alter table
// (copy) is disabled
if (result == HA_ALTER_INPLACE_NOT_SUPPORTED &&
tokudb::sysvars::disable_slow_alter(thd)) {
print_error(HA_ERR_UNSUPPORTED, MYF(0));
result = HA_ALTER_ERROR;
}
DBUG_RETURN(result);
}
// Prepare for the alter operations
bool ha_tokudb::prepare_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
assert_always(transaction); // transaction must exist after table is locked
ctx->alter_txn = transaction;
bool result = false; // success
DBUG_RETURN(result);
}
// Execute the alter operations.
bool ha_tokudb::inplace_alter_table(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
TOKUDB_HANDLER_DBUG_ENTER("");
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
HA_CREATE_INFO* create_info = ha_alter_info->create_info;
// this should be enough to handle locking as the higher level MDL
// on this table should prevent any new analyze tasks.
share->cancel_background_jobs();
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::DROP_INDEX +
Alter_inplace_info::DROP_UNIQUE_INDEX))) {
error = alter_table_drop_index(ha_alter_info);
}
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::ADD_INDEX +
Alter_inplace_info::ADD_UNIQUE_INDEX))) {
error = alter_table_add_index(ha_alter_info);
}
if (error == 0 &&
(ctx->handler_flags &
(Alter_inplace_info::ADD_COLUMN +
Alter_inplace_info::DROP_COLUMN))) {
error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
}
if (error == 0 &&
(ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) &&
(create_info->used_fields & HA_CREATE_USED_AUTO)) {
error = write_auto_inc_create(
share->status_block,
create_info->auto_increment_value,
ctx->alter_txn);
}
if (error == 0 &&
(ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) &&
(create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
// Get the current compression
DB *db = share->key_file[0];
error = db->get_compression_method(db, &ctx->orig_compression_method);
assert_always(error == 0);
// Set the new compression
#if TOKU_INCLUDE_OPTION_STRUCTS
toku_compression_method method =
row_format_to_toku_compression_method(
(tokudb::sysvars::row_format_t)create_info->option_struct->row_format);
#else
toku_compression_method method =
row_type_to_toku_compression_method(create_info->row_type);
#endif
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
db = share->key_file[i];
error = db->change_compression_method(db, method);
if (error)
break;
ctx->compression_changed = true;
}
}
// note: only one column expansion is allowed
if (error == 0 && ctx->expand_fixed_update_needed)
error = alter_table_expand_columns(altered_table, ha_alter_info);
if (error == 0 && ctx->expand_varchar_update_needed)
error = alter_table_expand_varchar_offsets(
altered_table,
ha_alter_info);
if (error == 0 && ctx->expand_blob_update_needed)
error = alter_table_expand_blobs(altered_table, ha_alter_info);
if (error == 0 && ctx->reset_card) {
error = tokudb::alter_card(
share->status_block,
ctx->alter_txn,
table->s,
altered_table->s);
}
if (error == 0 && ctx->optimize_needed) {
error = do_optimize(ha_thd());
}
#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
#if (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
if (error == 0 &&
(TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL)) {
#else
if (error == 0) {
#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
error = write_frm_data(
share->status_block,
ctx->alter_txn,
altered_table->s->path.str);
}
#endif // (50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) ||
// (50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799)
#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
bool result = false; // success
if (error) {
print_error(error, MYF(0));
result = true; // failure
}
DBUG_RETURN(result);
}
int ha_tokudb::alter_table_add_index(Alter_inplace_info* ha_alter_info) {
// sort keys in add index order
KEY* key_info = (KEY*)tokudb::memory::malloc(
sizeof(KEY) * ha_alter_info->index_add_count,
MYF(MY_WME));
for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
KEY *key = &key_info[i];
*key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
for (KEY_PART_INFO* key_part = key->key_part;
key_part < key->key_part + key->user_defined_key_parts;
key_part++) {
key_part->field = table->field[key_part->fieldnr];
}
}
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
ctx->add_index_changed = true;
int error = tokudb_add_index(
table,
key_info,
ha_alter_info->index_add_count,
ctx->alter_txn,
&ctx->incremented_num_DBs,
&ctx->modified_DBs);
if (error == HA_ERR_FOUND_DUPP_KEY) {
// hack for now, in case of duplicate key error,
// because at the moment we cannot display the right key
// information to the user, so that he knows potentially what went
// wrong.
last_dup_key = MAX_KEY;
}
tokudb::memory::free(key_info);
if (error == 0)
ctx->reset_card = true;
return error;
}
static bool find_index_of_key(
const char* key_name,
TABLE* table,
uint* index_offset_ptr) {
for (uint i = 0; i < table->s->keys; i++) {
if (strcmp(key_name, table->key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
static bool find_index_of_key(
const char* key_name,
KEY* key_info,
uint key_count,
uint* index_offset_ptr) {
for (uint i = 0; i < key_count; i++) {
if (strcmp(key_name, key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
int ha_tokudb::alter_table_drop_index(Alter_inplace_info* ha_alter_info) {
KEY *key_info = table->key_info;
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
bool found;
found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
table,
&index_drop_offsets[i]);
if (!found) {
// undo of add key in partition engine
found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
ha_alter_info->key_info_buffer,
ha_alter_info->key_count,
&index_drop_offsets[i]);
assert_always(found);
key_info = ha_alter_info->key_info_buffer;
}
}
// drop indexes
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
ctx->drop_index_changed = true;
int error = drop_indexes(index_drop_offsets,
ha_alter_info->index_drop_count,
key_info,
ctx->alter_txn);
if (error == 0)
ctx->reset_card = true;
return error;
}
int ha_tokudb::alter_table_add_or_drop_column(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
int error;
uchar *column_extra = NULL;
uint32_t max_column_extra_size;
uint32_t num_column_extra;
uint32_t num_columns = 0;
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
// set size such that we know it is big enough for both cases
uint32_t columns[table->s->fields + altered_table->s->fields];
memset(columns, 0, sizeof(columns));
// generate the array of columns
if (ha_alter_info->handler_flags & Alter_inplace_info::DROP_COLUMN) {
find_changed_columns(
columns,
&num_columns,
altered_table,
table);
} else if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) {
find_changed_columns(
columns,
&num_columns,
table,
altered_table);
} else {
assert_unreachable();
}
max_column_extra_size =
// max static row_mutator
STATIC_ROW_MUTATOR_SIZE +
// max dynamic row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength +
// max static blob size
(4 + share->kc_info.num_blobs) +
// max dynamic blob size
(num_columns*(1+4+1+4));
column_extra = (uchar*)tokudb::memory::malloc(
max_column_extra_size,
MYF(MY_WME));
if (column_extra == NULL) {
error = ENOMEM;
goto cleanup;
}
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(
altered_table, ha_alter_info, i, &row_descriptor);
if (error)
goto cleanup;
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
goto cleanup;
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
num_column_extra = fill_row_mutator(
column_extra,
columns,
num_columns,
altered_table,
ctx->altered_table_kc_info,
i,
// true if adding columns, otherwise is a drop
(ha_alter_info->handler_flags &
Alter_inplace_info::ADD_COLUMN) != 0);
DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&column_dbt,
DB_IS_RESETTING_OP);
if (error) {
goto cleanup;
}
}
}
error = 0;
cleanup:
tokudb::memory::free(column_extra);
return error;
}
// Commit or abort the alter operations.
// If commit then write the new frm data to the status using the alter
// transaction.
// If abort then abort the alter transaction and try to rollback the
// non-transactional changes.
bool ha_tokudb::commit_inplace_alter_table(TOKUDB_UNUSED(TABLE* altered_table),
Alter_inplace_info* ha_alter_info,
bool commit) {
TOKUDB_HANDLER_DBUG_ENTER("");
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
bool result = false; // success
THD *thd = ha_thd();
if (commit) {
#if (50613 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699) || \
(50700 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50799) || \
(100000 <= MYSQL_VERSION_ID)
if (ha_alter_info->group_commit_ctx) {
ha_alter_info->group_commit_ctx = NULL;
}
#endif
#if defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
#if (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) || \
(100000 <= MYSQL_VERSION_ID)
#if defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
#else
if (true) {
#endif // defined(WITH_PARTITION_STORAGE_ENGINE) && WITH_PARTITION_STORAGE_ENGINE
int error = write_frm_data(
share->status_block,
ctx->alter_txn,
altered_table->s->path.str);
if (error) {
commit = false;
result = true;
print_error(error, MYF(0));
}
}
#endif // (50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599) ||
// (100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099)
#endif // defined(TOKU_INCLUDE_WRITE_FRM_DATA) && TOKU_INCLUDE_WRITE_FRM_DATA
}
if (!commit) {
if (table->mdl_ticket->get_type() != MDL_EXCLUSIVE &&
(ctx->add_index_changed || ctx->drop_index_changed ||
ctx->compression_changed)) {
// get exclusive lock no matter what
#if defined(MARIADB_BASE_VERSION)
killed_state saved_killed_state = thd->killed;
thd->killed = NOT_KILLED;
for (volatile uint i = 0;
wait_while_table_is_used(thd, table, HA_EXTRA_NOT_USED);
i++) {
if (thd->killed != NOT_KILLED)
thd->killed = NOT_KILLED;
sleep(1);
}
assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
if (thd->killed == NOT_KILLED)
thd->killed = saved_killed_state;
#else
THD::killed_state saved_killed_state = thd->killed;
thd->killed = THD::NOT_KILLED;
// MySQL does not handle HA_EXTRA_NOT_USED so we use
// HA_EXTRA_PREPARE_FOR_RENAME since it is passed through
// the partition storage engine and is treated as a NOP by tokudb
for (volatile uint i = 0;
wait_while_table_is_used(
thd,
table,
HA_EXTRA_PREPARE_FOR_RENAME);
i++) {
if (thd->killed != THD::NOT_KILLED)
thd->killed = THD::NOT_KILLED;
sleep(1);
}
assert_always(table->mdl_ticket->get_type() == MDL_EXCLUSIVE);
if (thd->killed == THD::NOT_KILLED)
thd->killed = saved_killed_state;
#endif
}
// abort the alter transaction NOW so that any alters are rolled back.
// this allows the following restores to work.
tokudb_trx_data* trx =
(tokudb_trx_data*)thd_get_ha_data(thd, tokudb_hton);
assert_always(ctx->alter_txn == trx->stmt);
assert_always(trx->tokudb_lock_count > 0);
// for partitioned tables, we use a single transaction to do all of the
// partition changes. the tokudb_lock_count is a reference count for
// each of the handlers to the same transaction. obviously, we want
// to only abort once.
if (trx->tokudb_lock_count > 0) {
if (--trx->tokudb_lock_count <= trx->create_lock_count) {
trx->create_lock_count = 0;
abort_txn(ctx->alter_txn);
ctx->alter_txn = NULL;
trx->stmt = NULL;
trx->sub_sp_level = NULL;
}
transaction = NULL;
}
if (ctx->add_index_changed) {
restore_add_index(
table,
ha_alter_info->index_add_count,
ctx->incremented_num_DBs,
ctx->modified_DBs);
}
if (ctx->drop_index_changed) {
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
bool found = find_index_of_key(
ha_alter_info->index_drop_buffer[i]->name,
table,
&index_drop_offsets[i]);
assert_always(found);
}
restore_drop_indexes(index_drop_offsets,
ha_alter_info->index_drop_count);
}
if (ctx->compression_changed) {
uint32_t curr_num_DBs =
table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
DB *db = share->key_file[i];
int error = db->change_compression_method(
db,
ctx->orig_compression_method);
assert_always(error == 0);
}
}
}
DBUG_RETURN(result);
}
// Setup the altered table's key and col info.
int ha_tokudb::setup_kc_info(
TABLE* altered_table,
KEY_AND_COL_INFO* altered_kc_info) {
int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
if (error == 0)
error = initialize_key_and_col_info(
altered_table->s,
altered_table,
altered_kc_info,
hidden_primary_key,
primary_key);
return error;
}
// Expand the variable length fields offsets from 1 to 2 bytes.
int ha_tokudb::alter_table_expand_varchar_offsets(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(
altered_table, ha_alter_info, i, &row_descriptor);
if (error)
break;
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
// for all trees that have values, make an update variable offsets
// message and broadcast it into the tree
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
uint32_t offset_start =
table_share->null_bytes +
share->kc_info.mcp_info[i].fixed_field_size;
uint32_t offset_end =
offset_start +
share->kc_info.mcp_info[i].len_of_offsets;
uint32_t number_of_offsets = offset_end - offset_start;
// make the expand variable offsets message
DBT expand; memset(&expand, 0, sizeof expand);
expand.size =
sizeof(uchar) + sizeof(offset_start) + sizeof(offset_end);
expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
uchar* expand_ptr = (uchar*)expand.data;
expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
expand_ptr += sizeof(uchar);
memcpy(expand_ptr, &number_of_offsets, sizeof(number_of_offsets));
expand_ptr += sizeof(number_of_offsets);
memcpy(expand_ptr, &offset_start, sizeof(offset_start));
expand_ptr += sizeof(offset_start);
// and broadcast it into the tree
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
tokudb::memory::free(expand.data);
if (error)
break;
}
}
return error;
}
// Return true if a field is part of a key
static bool field_in_key(KEY *key, Field *field) {
for (uint i = 0; i < key->user_defined_key_parts; i++) {
KEY_PART_INFO *key_part = &key->key_part[i];
if (strcmp(key_part->field->field_name, field->field_name) == 0)
return true;
}
return false;
}
// Return true if a field is part of any key
static bool field_in_key_of_table(TABLE *table, Field *field) {
for (uint i = 0; i < table->s->keys; i++) {
if (field_in_key(&table->key_info[i], field))
return true;
}
return false;
}
// Return true if all changed varchar/varbinary field lengths can be changed
// inplace, otherwise return false
static bool change_varchar_length_is_supported(Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
new_field->real_type() != MYSQL_TYPE_VARCHAR ||
old_field->binary() != new_field->binary() ||
old_field->charset()->number != new_field->charset()->number ||
old_field->field_length > new_field->field_length)
return false;
if (ctx->table_kc_info->num_offset_bytes >
ctx->altered_table_kc_info->num_offset_bytes)
return false; // shrink is not supported
if (ctx->table_kc_info->num_offset_bytes <
ctx->altered_table_kc_info->num_offset_bytes)
// sum of varchar lengths changed from 1 to 2
ctx->expand_varchar_update_needed = true;
return true;
}
// Return true if all changed field lengths can be changed inplace, otherwise
// return false
static bool change_length_is_supported(TABLE* table,
TABLE* altered_table,
tokudb_alter_ctx* ctx) {
if (table->s->fields != altered_table->s->fields)
return false;
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
ai < ctx->changed_fields.elements();
ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (old_field->real_type() != new_field->real_type())
return false; // no type conversions
if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
return false; // only varchar
if (field_in_key_of_table(table, old_field) ||
field_in_key_of_table(altered_table, new_field))
return false; // not in any key
if (!change_varchar_length_is_supported(old_field, new_field, ctx))
return false;
}
return true;
}
// Debug function that ensures that the array is sorted
static bool is_sorted(Dynamic_array<uint> &a) {
bool r = true;
if (a.elements() > 0) {
uint lastelement = a.at(0);
for (DYNAMIC_ARRAY_ELEMENTS_TYPE i = 1; i < a.elements(); i++)
if (lastelement > a.at(i))
r = false;
}
return r;
}
int ha_tokudb::alter_table_expand_columns(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
// since we build the changed_fields array in field order, it must be sorted
assert_always(is_sorted(ctx->changed_fields));
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
error == 0 && ai < ctx->changed_fields.elements();
ai++) {
uint expand_field_num = ctx->changed_fields.at(ai);
error = alter_table_expand_one_column(
altered_table,
ha_alter_info,
expand_field_num);
}
return error;
}
// Return true if the field is an unsigned int
static bool is_unsigned(Field *f) {
return (f->flags & UNSIGNED_FLAG) != 0;
}
// Return the starting offset in the value for a particular index (selected by
// idx) of a particular field (selected by expand_field_num)
// TODO: replace this?
static uint32_t alter_table_field_offset(
uint32_t null_bytes,
KEY_AND_COL_INFO* kc_info,
int idx,
int expand_field_num) {
uint32_t offset = null_bytes;
for (int i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
continue;
offset += kc_info->field_lengths[i];
}
return offset;
}
// Send an expand message into all clustered indexes including the primary
int ha_tokudb::alter_table_expand_one_column(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
int expand_field_num) {
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
Field *old_field = table->field[expand_field_num];
TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
Field *new_field = altered_table->field[expand_field_num];
TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
assert_always(old_field_type == new_field_type);
uchar operation;
uchar pad_char;
switch (old_field_type) {
case toku_type_int:
assert_always(is_unsigned(old_field) == is_unsigned(new_field));
if (is_unsigned(old_field))
operation = UPDATE_OP_EXPAND_UINT;
else
operation = UPDATE_OP_EXPAND_INT;
pad_char = 0;
break;
case toku_type_fixstring:
operation = UPDATE_OP_EXPAND_CHAR;
pad_char = old_field->charset()->pad_char;
break;
case toku_type_fixbinary:
operation = UPDATE_OP_EXPAND_BINARY;
pad_char = 0;
break;
default:
assert_unreachable();
}
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(
altered_table, ha_alter_info, i, &row_descriptor);
if (error)
break;
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
// for all trees that have values, make an expand update message and
// broadcast it into the tree
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
uint32_t old_offset = alter_table_field_offset(
table_share->null_bytes,
ctx->table_kc_info,
i,
expand_field_num);
uint32_t new_offset = alter_table_field_offset(
table_share->null_bytes,
ctx->altered_table_kc_info,
i,
expand_field_num);
assert_always(old_offset <= new_offset);
uint32_t old_length =
ctx->table_kc_info->field_lengths[expand_field_num];
assert_always(old_length == old_field->pack_length());
uint32_t new_length =
ctx->altered_table_kc_info->field_lengths[expand_field_num];
assert_always(new_length == new_field->pack_length());
DBT expand; memset(&expand, 0, sizeof(expand));
expand.size =
sizeof(operation) + sizeof(new_offset) +
sizeof(old_length) + sizeof(new_length);
if (operation == UPDATE_OP_EXPAND_CHAR ||
operation == UPDATE_OP_EXPAND_BINARY)
expand.size += sizeof(pad_char);
expand.data = tokudb::memory::malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = operation;
expand_ptr += sizeof operation;
// for the first altered field, old_offset == new_offset.
// for the subsequent altered fields, the new_offset
// should be used as it includes the length changes from the
// previous altered fields.
memcpy(expand_ptr, &new_offset, sizeof(new_offset));
expand_ptr += sizeof(new_offset);
memcpy(expand_ptr, &old_length, sizeof(old_length));
expand_ptr += sizeof(old_length);
memcpy(expand_ptr, &new_length, sizeof(new_length));
expand_ptr += sizeof(new_length);
if (operation == UPDATE_OP_EXPAND_CHAR ||
operation == UPDATE_OP_EXPAND_BINARY) {
memcpy(expand_ptr, &pad_char, sizeof(pad_char));
expand_ptr += sizeof(pad_char);
}
assert_always(expand_ptr == (uchar*)expand.data + expand.size);
// and broadcast it into the tree
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
tokudb::memory::free(expand.data);
if (error)
break;
}
}
return error;
}
static void marshall_blob_lengths(
tokudb::buffer& b,
uint32_t n,
TABLE* table,
KEY_AND_COL_INFO* kc_info) {
for (uint i = 0; i < n; i++) {
uint blob_field_index = kc_info->blob_fields[i];
assert_always(blob_field_index < table->s->fields);
uint8_t blob_field_length =
table->s->field[blob_field_index]->row_pack_length();
b.append(&blob_field_length, sizeof blob_field_length);
}
}
int ha_tokudb::alter_table_expand_blobs(
TABLE* altered_table,
Alter_inplace_info* ha_alter_info) {
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
uint32_t curr_num_DBs = table->s->keys + tokudb_test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(
altered_table, ha_alter_info, i, &row_descriptor);
if (error)
break;
error = share->key_file[i]->change_descriptor(
share->key_file[i],
ctx->alter_txn,
&row_descriptor,
0);
tokudb::memory::free(row_descriptor.data);
if (error)
break;
// for all trees that have values, make an update blobs message and
// broadcast it into the tree
if (i == primary_key || key_is_clustering(&table_share->key_info[i])) {
tokudb::buffer b;
uint8_t op = UPDATE_OP_EXPAND_BLOB;
b.append(&op, sizeof op);
b.append_ui<uint32_t>(
table->s->null_bytes +
ctx->table_kc_info->mcp_info[i].fixed_field_size);
uint32_t var_offset_bytes =
ctx->table_kc_info->mcp_info[i].len_of_offsets;
b.append_ui<uint32_t>(var_offset_bytes);
b.append_ui<uint32_t>(
var_offset_bytes == 0 ? 0 :
ctx->table_kc_info->num_offset_bytes);
// add blobs info
uint32_t num_blobs = ctx->table_kc_info->num_blobs;
b.append_ui<uint32_t>(num_blobs);
marshall_blob_lengths(b, num_blobs, table, ctx->table_kc_info);
marshall_blob_lengths(
b,
num_blobs,
altered_table,
ctx->altered_table_kc_info);
// and broadcast it into the tree
DBT expand; memset(&expand, 0, sizeof expand);
expand.data = b.data();
expand.size = b.size();
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&expand,
DB_IS_RESETTING_OP);
if (error)
break;
}
}
return error;
}
// Return true if two fixed length fields can be changed inplace
static bool change_fixed_length_is_supported(Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
// no change in size is supported
if (old_field->pack_length() == new_field->pack_length())
return true;
// shrink is not supported
if (old_field->pack_length() > new_field->pack_length())
return false;
ctx->expand_fixed_update_needed = true;
return true;
}
static bool change_blob_length_is_supported(Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
// blob -> longer or equal length blob
if (old_field->binary() && new_field->binary() &&
old_field->pack_length() <= new_field->pack_length()) {
ctx->expand_blob_update_needed = true;
return true;
}
// text -> longer or equal length text
if (!old_field->binary() && !new_field->binary() &&
old_field->pack_length() <= new_field->pack_length() &&
old_field->charset()->number == new_field->charset()->number) {
ctx->expand_blob_update_needed = true;
return true;
}
return false;
}
// Return true if the MySQL type is an int or unsigned int type
static bool is_int_type(enum_field_types t) {
switch (t) {
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
return true;
default:
return false;
}
}
// Return true if two field types can be changed inplace
static bool change_field_type_is_supported(Field* old_field,
Field* new_field,
tokudb_alter_ctx* ctx) {
enum_field_types old_type = old_field->real_type();
enum_field_types new_type = new_field->real_type();
if (is_int_type(old_type)) {
// int and unsigned int expansion
if (is_int_type(new_type) &&
is_unsigned(old_field) == is_unsigned(new_field))
return change_fixed_length_is_supported(old_field, new_field, ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_STRING) {
// char(X) -> char(Y) and binary(X) -> binary(Y) expansion
if (new_type == MYSQL_TYPE_STRING &&
old_field->binary() == new_field->binary() &&
old_field->charset()->number == new_field->charset()->number)
return change_fixed_length_is_supported(old_field, new_field, ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_VARCHAR) {
// varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion
// where X < 256 <= Y the ALTER_STORED_COLUMN_TYPE handler flag is set for
// these cases
return change_varchar_length_is_supported(old_field, new_field, ctx);
} else if (old_type == MYSQL_TYPE_BLOB && new_type == MYSQL_TYPE_BLOB) {
return change_blob_length_is_supported(old_field, new_field, ctx);
} else
return false;
}
// Return true if all changed field types can be changed inplace
static bool change_type_is_supported(TABLE* table,
TABLE* altered_table,
tokudb_alter_ctx* ctx) {
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (table->s->fields != altered_table->s->fields)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
for (DYNAMIC_ARRAY_ELEMENTS_TYPE ai = 0;
ai < ctx->changed_fields.elements();
ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (field_in_key_of_table(table, old_field) ||
field_in_key_of_table(altered_table, new_field))
return false;
if (!change_field_type_is_supported(old_field, new_field, ctx))
return false;
}
return true;
}
// Allocate and initialize a new descriptor for a dictionary in the altered
// table identified with idx.
// Return the new descriptor in the row_descriptor DBT.
// Return non-zero on error.
int ha_tokudb::new_row_descriptor(TABLE* altered_table,
Alter_inplace_info* ha_alter_info,
uint32_t idx,
DBT* row_descriptor) {
int error = 0;
tokudb_alter_ctx* ctx =
static_cast<tokudb_alter_ctx*>(ha_alter_info->handler_ctx);
row_descriptor->size =
get_max_desc_size(ctx->altered_table_kc_info, altered_table);
row_descriptor->data =
(uchar*)tokudb::memory::malloc(row_descriptor->size, MYF(MY_WME));
if (row_descriptor->data == NULL) {
error = ENOMEM;
} else {
KEY* prim_key =
hidden_primary_key ? NULL :
&altered_table->s->key_info[primary_key];
if (idx == primary_key) {
row_descriptor->size = create_main_key_descriptor(
(uchar*)row_descriptor->data,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
ctx->altered_table_kc_info);
} else {
row_descriptor->size = create_secondary_key_descriptor(
(uchar*)row_descriptor->data,
&altered_table->key_info[idx],
prim_key,
hidden_primary_key,
altered_table,
primary_key,
idx,
ctx->altered_table_kc_info);
}
error = 0;
}
return error;
}
#endif