mariadb/storage/tokudb/ha_tokudb_alter_56.cc
2013-05-06 14:53:08 -04:00

1002 lines
45 KiB
C++

#if TOKU_INCLUDE_ALTER_56
#if 100000 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 100099
#define TOKU_ALTER_RENAME ALTER_RENAME_56
#elif 50600 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50699
#define TOKU_ALTER_RENAME ALTER_RENAME
#elif 50500 <= MYSQL_VERSION_ID && MYSQL_VERSION_ID <= 50599
#define TOKU_ALTER_RENAME ALTER_RENAME_56
#else
#error
#endif
#include "ha_tokudb_alter_common.cc"
#include <sql_array.h>
// The tokudb alter context contains the alter state that is set in the check if supported method and used
// later when the alter operation is executed.
class tokudb_alter_ctx : public inplace_alter_handler_ctx {
public:
tokudb_alter_ctx() :
handler_flags(0),
alter_txn(NULL),
add_index_changed(false),
drop_index_changed(false),
reset_card(false),
compression_changed(false),
expand_varchar_update_needed(false),
expand_fixed_update_needed(false),
table_kc_info(NULL),
altered_table_kc_info(NULL) {
}
~tokudb_alter_ctx() {
if (altered_table_kc_info)
free_key_and_col_info(altered_table_kc_info);
}
public:
ulong handler_flags;
DB_TXN *alter_txn;
bool add_index_changed;
bool incremented_num_DBs, modified_DBs;
bool drop_index_changed;
bool reset_card;
bool compression_changed;
enum toku_compression_method orig_compression_method;
bool expand_varchar_update_needed;
bool expand_fixed_update_needed;
Dynamic_array<uint> changed_fields;
KEY_AND_COL_INFO *table_kc_info;
KEY_AND_COL_INFO *altered_table_kc_info;
KEY_AND_COL_INFO altered_table_kc_info_base;
};
// Debug function to print out an alter table operation
void ha_tokudb::print_alter_info(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
printf("***are keys of two tables same? %d\n", tables_have_same_keys(table, altered_table, false, false));
if (ha_alter_info->handler_flags) {
printf("***alter flags set ***\n");
for (int i = 0; i < 32; i++) {
if (ha_alter_info->handler_flags & (1 << i))
printf("%d\n", i);
}
}
// everyone calculates data by doing some default_values - record[0], but I do not see why
// that is necessary
printf("******\n");
printf("***orig table***\n");
for (uint i = 0; i < table->s->fields; i++) {
//
// make sure to use table->field, and NOT table->s->field
//
Field* curr_field = table->field[i];
uint null_offset = get_null_offset(table, curr_field);
printf(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, pack_length %u\n",
curr_field->field_name,
curr_field->real_type(), mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ? table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff,
curr_field->pack_length()
);
}
printf("******\n");
printf("***altered table***\n");
for (uint i = 0; i < altered_table->s->fields; i++) {
Field* curr_field = altered_table->field[i];
uint null_offset = get_null_offset(altered_table, curr_field);
printf(
"name: %s, types: %u %u, nullable: %d, null_offset: %d, is_null_field: %d, is_null %d, pack_length %u\n",
curr_field->field_name,
curr_field->real_type(), mysql_to_toku_type(curr_field),
curr_field->null_bit,
null_offset,
curr_field->real_maybe_null(),
curr_field->real_maybe_null() ? altered_table->s->default_values[null_offset] & curr_field->null_bit : 0xffffffff,
curr_field->pack_length()
);
}
printf("******\n");
}
// Given two tables with equal number of fields, find all of the fields with different types
// and return the indexes of the different fields in the changed_fields array. This function ignores field
// name differences.
static int find_changed_fields(TABLE *table_a, TABLE *table_b, Dynamic_array<uint> &changed_fields) {
for (uint i = 0; i < table_a->s->fields; i++) {
Field *field_a = table_a->field[i];
Field *field_b = table_b->field[i];
if (!fields_are_same_type(field_a, field_b))
changed_fields.append(i);
}
return changed_fields.elements();
}
static bool change_length_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx);
static bool change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx);
// The ha_alter_info->handler_flags can not be trusted. This function maps the bogus handler flags to something we like.
static ulong fix_handler_flags(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
ulong handler_flags = ha_alter_info->handler_flags;
// workaround for fill_alter_inplace_info bug (#5193)
// the function erroneously sets the ADD_INDEX and DROP_INDEX flags for a column addition that does not
// change the keys. the following code turns the ADD_INDEX and DROP_INDEX flags so that we can do hot
// column addition later.
if (handler_flags & (Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN)) {
if (handler_flags & (Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX)) {
if (tables_have_same_keys(table, altered_table, false, false)) {
handler_flags &= ~(Alter_inplace_info::ADD_INDEX + Alter_inplace_info::DROP_INDEX);
}
}
}
// always allow rename table + any other operation, so turn off the rename flag
if (handler_flags & Alter_inplace_info::TOKU_ALTER_RENAME) {
handler_flags &= ~Alter_inplace_info::TOKU_ALTER_RENAME;
}
// ALTER_COLUMN_TYPE may be set when no columns have been changed, so turn off the flag
if (handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) {
if (all_fields_are_same_type(table, altered_table)) {
handler_flags &= ~Alter_inplace_info::ALTER_COLUMN_TYPE;
}
}
return handler_flags;
}
// Require that there is no intersection of add and drop names.
static bool is_disjoint_add_drop(Alter_inplace_info *ha_alter_info) {
for (uint d = 0; d < ha_alter_info->index_drop_count; d++) {
KEY *drop_key = ha_alter_info->index_drop_buffer[d];
for (uint a = 0; a < ha_alter_info->index_add_count; a++) {
KEY *add_key = &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[a]];
if (strcmp(drop_key->name, add_key->name) == 0) {
return false;
}
}
}
return true;
}
// Return true if some bit in mask is set and no bit in ~mask is set, otherwise return false.
static bool only_flags(ulong bits, ulong mask) {
return (bits & mask) != 0 && (bits & ~mask) == 0;
}
// Check if an alter table operation on this table and described by the alter table parameters is supported inplace
// and if so, what type of locking is needed to execute it.
// return values:
// HA_ALTER_INPLACE_NOT_SUPPORTED: alter operation is not supported as an inplace operation, a table copy is required
// HA_ALTER_ERROR: the alter table operation should fail
// HA_ALTER_INPLACE_EXCLUSIVE_LOCK: prepare and alter runs with MDL X
// HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE: prepare runs with MDL X, alter runs with MDL SNW
// HA_ALTER_INPLACE_SHARED_LOCK: prepare and alter methods called with MDL SNW, concurrent reads, no writes
// HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE: prepare runs with MDL X, alter runs with MDL SW
// HA_ALTER_INPLACE_NO_LOCK: prepare and alter methods called with MDL SW, concurrent reads, writes.
// must set WRITE_ALLOW_WRITE lock type in the external lock method to avoid deadlocks
// with the MDL lock and the table lock
enum_alter_inplace_result ha_tokudb::check_if_supported_inplace_alter(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("check_if_supported_alter");
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
print_alter_info(altered_table, ha_alter_info);
}
enum_alter_inplace_result result = HA_ALTER_INPLACE_NOT_SUPPORTED; // default is NOT inplace
THD *thd = ha_thd();
// setup context
tokudb_alter_ctx *ctx = new tokudb_alter_ctx;
ha_alter_info->handler_ctx = ctx;
ctx->handler_flags = fix_handler_flags(table, altered_table, ha_alter_info);
ctx->table_kc_info = &share->kc_info;
ctx->altered_table_kc_info = &ctx->altered_table_kc_info_base;
memset(ctx->altered_table_kc_info, 0, sizeof (KEY_AND_COL_INFO));
if (get_disable_hot_alter(thd)) {
; // do nothing
} else
// add or drop index
if (only_flags(ctx->handler_flags, Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX +
Alter_inplace_info::ADD_INDEX + Alter_inplace_info::ADD_UNIQUE_INDEX)) {
if ((ha_alter_info->index_add_count > 0 || ha_alter_info->index_drop_count > 0) &&
!tables_have_same_keys(table, altered_table, false, false) &&
is_disjoint_add_drop(ha_alter_info)) {
if (ctx->handler_flags & (Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX)) {
// the fractal tree can not handle dropping an index concurrent with querying with the index.
// we grab an exclusive MDL for the drop index.
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else {
result = HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE;
// someday, allow multiple hot indexes via alter table add key. don't forget to change the store_lock function.
// for now, hot indexing is only supported via session variable with the create index sql command
if (ha_alter_info->index_add_count == 1 && ha_alter_info->index_drop_count == 0 &&
get_create_index_online(thd) && thd_sql_command(thd) == SQLCOM_CREATE_INDEX) {
// external_lock set WRITE_ALLOW_WRITE which allows writes concurrent with the index creation
result = HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE;
}
}
}
} else
// column default
if (only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else
// column rename
if (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME &&
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_NAME + Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
// we have identified a possible column rename,
// but let's do some more checks
// we will only allow an hcr if there are no changes
// in column positions (ALTER_COLUMN_ORDER is not set)
// now need to verify that one and only one column
// has changed only its name. If we find anything to
// the contrary, we don't allow it, also check indexes
bool cr_supported = column_rename_supported(table, altered_table, (ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_ORDER) != 0);
if (cr_supported)
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
} else
// add column
if (ctx->handler_flags & Alter_inplace_info::ADD_COLUMN &&
only_flags(ctx->handler_flags, Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::ALTER_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
uint32_t added_columns[altered_table->s->fields];
uint32_t num_added_columns = 0;
int r = find_changed_columns(added_columns, &num_added_columns, table, altered_table);
if (r == 0) {
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (uint32_t i = 0; i < num_added_columns; i++) {
uint32_t curr_added_index = added_columns[i];
Field* curr_added_field = altered_table->field[curr_added_index];
printf("Added column: index %d, name %s\n", curr_added_index, curr_added_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
// drop column
if (ctx->handler_flags & Alter_inplace_info::DROP_COLUMN &&
only_flags(ctx->handler_flags, Alter_inplace_info::DROP_COLUMN + Alter_inplace_info::ALTER_COLUMN_ORDER) &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
uint32_t dropped_columns[table->s->fields];
uint32_t num_dropped_columns = 0;
int r = find_changed_columns(dropped_columns, &num_dropped_columns, altered_table, table);
if (r == 0) {
if (tokudb_debug & TOKUDB_DEBUG_ALTER_TABLE_INFO) {
for (uint32_t i = 0; i < num_dropped_columns; i++) {
uint32_t curr_dropped_index = dropped_columns[i];
Field* curr_dropped_field = table->field[curr_dropped_index];
printf("Dropped column: index %d, name %s\n", curr_dropped_index, curr_dropped_field->field_name);
}
}
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
// change column length
if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH) &&
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH + Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(table, altered_table, ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
if (change_length_is_supported(table, altered_table, ha_alter_info, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
// change column type
if ((ctx->handler_flags & Alter_inplace_info::ALTER_COLUMN_TYPE) &&
only_flags(ctx->handler_flags, Alter_inplace_info::ALTER_COLUMN_TYPE + Alter_inplace_info::ALTER_COLUMN_DEFAULT) &&
table->s->fields == altered_table->s->fields &&
find_changed_fields(table, altered_table, ctx->changed_fields) > 0 &&
setup_kc_info(altered_table, ctx->altered_table_kc_info) == 0) {
if (change_type_is_supported(table, altered_table, ha_alter_info, ctx)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
} else
if (only_flags(ctx->handler_flags, Alter_inplace_info::CHANGE_CREATE_OPTION)) {
HA_CREATE_INFO *create_info = ha_alter_info->create_info;
// alter auto_increment
if (only_flags(create_info->used_fields, HA_CREATE_USED_AUTO)) {
// do a sanity check that the table is what we think it is
if (tables_have_same_keys_and_columns(table, altered_table, true)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
}
// alter row_format
else if (only_flags(create_info->used_fields, HA_CREATE_USED_ROW_FORMAT)) {
// do a sanity check that the table is what we think it is
if (tables_have_same_keys_and_columns(table, altered_table, true)) {
result = HA_ALTER_INPLACE_EXCLUSIVE_LOCK;
}
}
}
// turn a not supported result into an error if the slow alter table (copy) is disabled
if (result == HA_ALTER_INPLACE_NOT_SUPPORTED && get_disable_slow_alter(thd)) {
print_error(HA_ERR_UNSUPPORTED, MYF(0));
result = HA_ALTER_ERROR;
}
DBUG_RETURN(result);
}
// Prepare for the alter operations
bool ha_tokudb::prepare_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("prepare_inplace_alter_table");
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
assert(transaction); // transaction must exist after table is locked
ctx->alter_txn = transaction;
bool result = false; // success
DBUG_RETURN(result);
}
// Execute the alter operations.
bool ha_tokudb::inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
TOKUDB_DBUG_ENTER("inplace_alter_table");
int error = 0;
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
HA_CREATE_INFO *create_info = ha_alter_info->create_info;
if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::DROP_INDEX + Alter_inplace_info::DROP_UNIQUE_INDEX))) {
error = alter_table_drop_index(altered_table, ha_alter_info);
}
if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::ADD_INDEX + Alter_inplace_info::ADD_UNIQUE_INDEX))) {
error = alter_table_add_index(altered_table, ha_alter_info);
}
if (error == 0 && (ctx->handler_flags & (Alter_inplace_info::ADD_COLUMN + Alter_inplace_info::DROP_COLUMN))) {
error = alter_table_add_or_drop_column(altered_table, ha_alter_info);
}
if (error == 0 && (ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) && (create_info->used_fields & HA_CREATE_USED_AUTO)) {
error = write_auto_inc_create(share->status_block, create_info->auto_increment_value, ctx->alter_txn);
}
if (error == 0 && (ctx->handler_flags & Alter_inplace_info::CHANGE_CREATE_OPTION) && (create_info->used_fields & HA_CREATE_USED_ROW_FORMAT)) {
// Get the current compression
DB *db = share->key_file[0];
error = db->get_compression_method(db, &ctx->orig_compression_method);
assert(error == 0);
// Set the new compression
enum toku_compression_method method = row_type_to_compression_method(create_info->row_type);
uint32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
db = share->key_file[i];
error = db->change_compression_method(db, method);
if (error)
break;
ctx->compression_changed = true;
}
}
if (error == 0 && ctx->expand_varchar_update_needed)
error = alter_table_expand_varchar_offsets(altered_table, ha_alter_info);
if (error == 0 && ctx->expand_fixed_update_needed)
error = alter_table_expand_columns(altered_table, ha_alter_info);
if (error == 0 && ctx->reset_card)
tokudb::set_card_from_status(share->status_block, ctx->alter_txn, table->s, altered_table->s);
bool result = false; // success
if (error) {
print_error(error, MYF(0));
result = true; // failure
}
DBUG_RETURN(result);
}
int ha_tokudb::alter_table_add_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
// sort keys in add index order
KEY *key_info = (KEY*) my_malloc(sizeof (KEY) * ha_alter_info->index_add_count, MYF(MY_WME));
for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
KEY *key = &key_info[i];
*key = ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]];
for (KEY_PART_INFO *key_part= key->key_part; key_part < key->key_part + get_key_parts(key); key_part++)
key_part->field = table->field[key_part->fieldnr];
}
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
ctx->add_index_changed = true;
int error = tokudb_add_index(table, key_info, ha_alter_info->index_add_count, ctx->alter_txn, &ctx->incremented_num_DBs, &ctx->modified_DBs);
if (error == HA_ERR_FOUND_DUPP_KEY) {
// hack for now, in case of duplicate key error,
// because at the moment we cannot display the right key
// information to the user, so that he knows potentially what went
// wrong.
last_dup_key = MAX_KEY;
}
my_free(key_info);
if (error == 0)
ctx->reset_card = true;
return error;
}
static bool find_index_of_key(const char *key_name, TABLE *table, uint *index_offset_ptr) {
for (uint i = 0; i < table->s->keys; i++) {
if (strcmp(key_name, table->key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
static bool find_index_of_key(const char *key_name, KEY *key_info, uint key_count, uint *index_offset_ptr) {
for (uint i = 0; i < key_count; i++) {
if (strcmp(key_name, key_info[i].name) == 0) {
*index_offset_ptr = i;
return true;
}
}
return false;
}
int ha_tokudb::alter_table_drop_index(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
KEY *key_info = table->key_info;
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
bool found;
found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, table, &index_drop_offsets[i]);
if (!found) {
// undo of add key in partition engine
found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, ha_alter_info->key_info_buffer, ha_alter_info->key_count, &index_drop_offsets[i]);
assert(found);
key_info = ha_alter_info->key_info_buffer;
}
}
// drop indexes
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
ctx->drop_index_changed = true;
int error = drop_indexes(table, index_drop_offsets, ha_alter_info->index_drop_count, key_info, ctx->alter_txn);
if (error == 0)
ctx->reset_card = true;
return error;
}
int ha_tokudb::alter_table_add_or_drop_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
int error;
uchar *column_extra = NULL;
uint32_t max_column_extra_size;
uint32_t num_column_extra;
uint32_t num_columns = 0;
uint32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
uint32_t columns[table->s->fields + altered_table->s->fields]; // set size such that we know it is big enough for both cases
memset(columns, 0, sizeof(columns));
// generate the array of columns
if (ha_alter_info->handler_flags & Alter_inplace_info::DROP_COLUMN) {
find_changed_columns(
columns,
&num_columns,
altered_table,
table
);
} else
if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) {
find_changed_columns(
columns,
&num_columns,
table,
altered_table
);
} else
assert(0);
max_column_extra_size =
STATIC_ROW_MUTATOR_SIZE + //max static row_mutator
4 + num_columns*(1+1+4+1+1+4) + altered_table->s->reclength + // max dynamic row_mutator
(4 + share->kc_info.num_blobs) + // max static blob size
(num_columns*(1+4+1+4)); // max dynamic blob size
column_extra = (uchar *)my_malloc(max_column_extra_size, MYF(MY_WME));
if (column_extra == NULL) { error = ENOMEM; goto cleanup; }
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
if (error)
goto cleanup;
error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
my_free(row_descriptor.data);
if (error)
goto cleanup;
if (i == primary_key || table_share->key_info[i].flags & HA_CLUSTERING) {
num_column_extra = fill_row_mutator(
column_extra,
columns,
num_columns,
altered_table,
ctx->altered_table_kc_info,
i,
(ha_alter_info->handler_flags & Alter_inplace_info::ADD_COLUMN) != 0 // true if adding columns, otherwise is a drop
);
DBT column_dbt; memset(&column_dbt, 0, sizeof column_dbt);
column_dbt.data = column_extra;
column_dbt.size = num_column_extra;
DBUG_ASSERT(num_column_extra <= max_column_extra_size);
error = share->key_file[i]->update_broadcast(
share->key_file[i],
ctx->alter_txn,
&column_dbt,
DB_IS_RESETTING_OP
);
if (error) { goto cleanup; }
}
}
error = 0;
cleanup:
my_free(column_extra, MYF(MY_ALLOW_ZERO_PTR));
return error;
}
// Commit or abort the alter operations.
// If commit then write the new frm data to the status using the alter transaction.
// If abort then abort the alter transaction and try to rollback the non-transactional changes.
bool ha_tokudb::commit_inplace_alter_table(TABLE *altered_table, Alter_inplace_info *ha_alter_info, bool commit) {
TOKUDB_DBUG_ENTER("commit_inplace_alter_table");
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
bool result = false; // success
if (commit) {
if (TOKU_PARTITION_WRITE_FRM_DATA || altered_table->part_info == NULL) {
int error = write_frm_data(share->status_block, ctx->alter_txn, altered_table->s->path.str);
if (error) {
commit = false;
result = true;
print_error(error, MYF(0));
}
}
}
if (!commit) {
// abort the alter transaction NOW so that any alters are rolled back. this allows the following restores to work.
THD *thd = ha_thd();
tokudb_trx_data *trx = (tokudb_trx_data *) thd_data_get(thd, tokudb_hton->slot);
assert(ctx->alter_txn == trx->stmt);
assert(trx->tokudb_lock_count > 0);
// for partitioned tables, we use a single transaction to do all of the partition changes. the tokudb_lock_count
// is a reference count for each of the handlers to the same transaction. obviously, we want to only abort once.
if (!--trx->tokudb_lock_count) {
abort_txn(ctx->alter_txn);
ctx->alter_txn = NULL;
trx->stmt = NULL;
trx->sub_sp_level = NULL;
}
transaction = NULL;
if (ctx->add_index_changed) {
restore_add_index(table, ha_alter_info->index_add_count, ctx->incremented_num_DBs, ctx->modified_DBs);
}
if (ctx->drop_index_changed) {
// translate key names to indexes into the key_info array
uint index_drop_offsets[ha_alter_info->index_drop_count];
for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
bool found = find_index_of_key(ha_alter_info->index_drop_buffer[i]->name, table, &index_drop_offsets[i]);
assert(found);
}
restore_drop_indexes(table, index_drop_offsets, ha_alter_info->index_drop_count);
}
if (ctx->compression_changed) {
uint32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
DB *db = share->key_file[i];
int error = db->change_compression_method(db, ctx->orig_compression_method);
assert(error == 0);
}
}
}
DBUG_RETURN(result);
}
// Setup the altered table's key and col info.
int ha_tokudb::setup_kc_info(TABLE *altered_table, KEY_AND_COL_INFO *altered_kc_info) {
int error = allocate_key_and_col_info(altered_table->s, altered_kc_info);
if (error == 0)
error = initialize_key_and_col_info(altered_table->s, altered_table, altered_kc_info, hidden_primary_key, primary_key);
return error;
}
// Expand the variable length fields offsets from 1 to 2 bytes.
int ha_tokudb::alter_table_expand_varchar_offsets(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
int error = 0;
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
uint32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
if (error)
break;
error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
my_free(row_descriptor.data);
if (error)
break;
// for all trees that have values, make an update variable offsets message and broadcast it into the tree
if (i == primary_key || (table_share->key_info[i].flags & HA_CLUSTERING)) {
uint32_t offset_start = table_share->null_bytes + share->kc_info.mcp_info[i].fixed_field_size;
uint32_t offset_end = offset_start + share->kc_info.mcp_info[i].len_of_offsets;
uint32_t number_of_offsets = offset_end - offset_start;
// make the expand variable offsets message
DBT expand; memset(&expand, 0, sizeof expand);
expand.size = sizeof (uchar) + sizeof offset_start + sizeof offset_end;
expand.data = my_malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = UPDATE_OP_EXPAND_VARIABLE_OFFSETS;
expand_ptr += sizeof (uchar);
memcpy(expand_ptr, &number_of_offsets, sizeof number_of_offsets);
expand_ptr += sizeof number_of_offsets;
memcpy(expand_ptr, &offset_start, sizeof offset_start);
expand_ptr += sizeof offset_start;
// and broadcast it into the tree
error = share->key_file[i]->update_broadcast(share->key_file[i], ctx->alter_txn, &expand, DB_IS_RESETTING_OP);
my_free(expand.data);
if (error)
break;
}
}
return error;
}
// Return true if a field is part of a key
static bool field_in_key(KEY *key, Field *field) {
for (uint i = 0; i < get_key_parts(key); i++) {
KEY_PART_INFO *key_part = &key->key_part[i];
if (strcmp(key_part->field->field_name, field->field_name) == 0)
return true;
}
return false;
}
// Return true if a field is part of any key
static bool field_in_key_of_table(TABLE *table, Field *field) {
for (uint i = 0; i < table->s->keys; i++) {
if (field_in_key(&table->key_info[i], field))
return true;
}
return false;
}
// Return true if all changed varchar/varbinary field lengths can be changed inplace, otherwise return false
static bool change_varchar_length_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
if (old_field->real_type() != MYSQL_TYPE_VARCHAR ||
new_field->real_type() != MYSQL_TYPE_VARCHAR ||
old_field->binary() != new_field->binary() ||
old_field->charset()->number != new_field->charset()->number ||
old_field->field_length > new_field->field_length)
return false;
if (ctx->table_kc_info->num_offset_bytes > ctx->altered_table_kc_info->num_offset_bytes)
return false; // shrink is not supported
if (ctx->table_kc_info->num_offset_bytes < ctx->altered_table_kc_info->num_offset_bytes)
ctx->expand_varchar_update_needed = true; // sum of varchar lengths changed from 1 to 2
return true;
}
// Return true if all changed field lengths can be changed inplace, otherwise return false
static bool change_length_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
if (table->s->fields != altered_table->s->fields)
return false;
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
for (int ai = 0; ai < ctx->changed_fields.elements(); ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (old_field->real_type() != new_field->real_type())
return false; // no type conversions
if (old_field->real_type() != MYSQL_TYPE_VARCHAR)
return false; // only varchar
if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
return false; // not in any key
if (!change_varchar_length_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
return false;
}
return true;
}
// Debug function that ensures that the array is sorted
static bool is_sorted(Dynamic_array<uint> &a) {
bool r = true;
if (a.elements() > 0) {
uint lastelement = a.at(0);
for (int i = 1; i < a.elements(); i++)
if (lastelement > a.at(i))
r = false;
}
return r;
}
int ha_tokudb::alter_table_expand_columns(TABLE *altered_table, Alter_inplace_info *ha_alter_info) {
int error = 0;
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
assert(is_sorted(ctx->changed_fields)); // since we build the changed_fields array in field order, it must be sorted
for (int ai = 0; error == 0 && ai < ctx->changed_fields.elements(); ai++) {
uint expand_field_num = ctx->changed_fields.at(ai);
error = alter_table_expand_one_column(altered_table, ha_alter_info, expand_field_num);
}
return error;
}
// Return true if the field is an unsigned int
static bool is_unsigned(Field *f) {
return (f->flags & UNSIGNED_FLAG) != 0;
}
// Return the starting offset in the value for a particular index (selected by idx) of a
// particular field (selected by expand_field_num)
// TODO: replace this?
static uint32_t alter_table_field_offset(uint32_t null_bytes, KEY_AND_COL_INFO *kc_info, int idx, int expand_field_num) {
uint32_t offset = null_bytes;
for (int i = 0; i < expand_field_num; i++) {
if (bitmap_is_set(&kc_info->key_filters[idx], i)) // skip key fields
continue;
offset += kc_info->field_lengths[i];
}
return offset;
}
// Send an expand message into all clustered indexes including the primary
int ha_tokudb::alter_table_expand_one_column(TABLE *altered_table, Alter_inplace_info *ha_alter_info, int expand_field_num) {
int error = 0;
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
Field *old_field = table->field[expand_field_num];
TOKU_TYPE old_field_type = mysql_to_toku_type(old_field);
Field *new_field = altered_table->field[expand_field_num];
TOKU_TYPE new_field_type = mysql_to_toku_type(new_field);
assert(old_field_type == new_field_type);
uchar operation;
uchar pad_char;
switch (old_field_type) {
case toku_type_int:
assert(is_unsigned(old_field) == is_unsigned(new_field));
if (is_unsigned(old_field))
operation = UPDATE_OP_EXPAND_UINT;
else
operation = UPDATE_OP_EXPAND_INT;
pad_char = 0;
break;
case toku_type_fixstring:
operation = UPDATE_OP_EXPAND_CHAR;
pad_char = old_field->charset()->pad_char;
break;
case toku_type_fixbinary:
operation = UPDATE_OP_EXPAND_BINARY;
pad_char = 0;
break;
default:
assert(0);
}
uint32_t curr_num_DBs = table->s->keys + test(hidden_primary_key);
for (uint32_t i = 0; i < curr_num_DBs; i++) {
// change to a new descriptor
DBT row_descriptor; memset(&row_descriptor, 0, sizeof row_descriptor);
error = new_row_descriptor(table, altered_table, ha_alter_info, i, &row_descriptor);
if (error)
break;
error = share->key_file[i]->change_descriptor(share->key_file[i], ctx->alter_txn, &row_descriptor, 0);
my_free(row_descriptor.data);
if (error)
break;
// for all trees that have values, make an expand update message and broadcast it into the tree
if (i == primary_key || (table_share->key_info[i].flags & HA_CLUSTERING)) {
uint32_t old_offset = alter_table_field_offset(table_share->null_bytes, ctx->table_kc_info, i, expand_field_num);
uint32_t new_offset = alter_table_field_offset(table_share->null_bytes, ctx->altered_table_kc_info, i, expand_field_num);
assert(old_offset <= new_offset);
uint32_t old_length = ctx->table_kc_info->field_lengths[expand_field_num];
assert(old_length == old_field->pack_length());
uint32_t new_length = ctx->altered_table_kc_info->field_lengths[expand_field_num];
assert(new_length == new_field->pack_length());
DBT expand; memset(&expand, 0, sizeof expand);
expand.size = sizeof operation + sizeof new_offset + sizeof old_length + sizeof new_length;
if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY)
expand.size += sizeof pad_char;
expand.data = my_malloc(expand.size, MYF(MY_WME));
if (!expand.data) {
error = ENOMEM;
break;
}
uchar *expand_ptr = (uchar *)expand.data;
expand_ptr[0] = operation;
expand_ptr += sizeof operation;
// for the first altered field, old_offset == new_offset. for the subsequent altered fields, the new_offset
// should be used as it includes the length changes from the previous altered fields.
memcpy(expand_ptr, &new_offset, sizeof new_offset);
expand_ptr += sizeof new_offset;
memcpy(expand_ptr, &old_length, sizeof old_length);
expand_ptr += sizeof old_length;
memcpy(expand_ptr, &new_length, sizeof new_length);
expand_ptr += sizeof new_length;
if (operation == UPDATE_OP_EXPAND_CHAR || operation == UPDATE_OP_EXPAND_BINARY) {
memcpy(expand_ptr, &pad_char, sizeof pad_char);
expand_ptr += sizeof pad_char;
}
assert(expand_ptr == (uchar *)expand.data + expand.size);
// and broadcast it into the tree
error = share->key_file[i]->update_broadcast(share->key_file[i], ctx->alter_txn, &expand, DB_IS_RESETTING_OP);
my_free(expand.data);
if (error)
break;
}
}
return error;
}
// Return true if two fixed length fields can be changed inplace
static bool change_fixed_length_is_supported(TABLE *table, TABLE *altered_table, Field *old_field, Field *new_field, tokudb_alter_ctx *ctx) {
// no change in size is supported
if (old_field->pack_length() == new_field->pack_length())
return true;
// shrink is not supported
if (old_field->pack_length() > new_field->pack_length())
return false;
ctx->expand_fixed_update_needed = true;
return true;
}
// Return true if the MySQL type is an int or unsigned int type
static bool is_int_type(enum_field_types t) {
switch (t) {
case MYSQL_TYPE_TINY:
case MYSQL_TYPE_SHORT:
case MYSQL_TYPE_INT24:
case MYSQL_TYPE_LONG:
case MYSQL_TYPE_LONGLONG:
return true;
default:
return false;
}
}
// Return true if two field types can be changed inplace
static bool change_field_type_is_supported(Field *old_field, Field *new_field, TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
enum_field_types old_type = old_field->real_type();
enum_field_types new_type = new_field->real_type();
if (is_int_type(old_type)) {
// int and unsigned int expansion
if (is_int_type(new_type) && is_unsigned(old_field) == is_unsigned(new_field))
return change_fixed_length_is_supported(table, altered_table, old_field, new_field, ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_STRING) {
// char(X) -> char(Y) and binary(X) -> binary(Y) expansion
if (new_type == MYSQL_TYPE_STRING &&
old_field->binary() == new_field->binary() &&
old_field->charset()->number == new_field->charset()->number)
return change_fixed_length_is_supported(table, altered_table, old_field, new_field, ctx);
else
return false;
} else if (old_type == MYSQL_TYPE_VARCHAR) {
// varchar(X) -> varchar(Y) and varbinary(X) -> varbinary(Y) expansion where X < 256 <= Y
// the ALTER_COLUMN_TYPE handler flag is set for these cases
return change_varchar_length_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx);
} else if (old_type == MYSQL_TYPE_BLOB) {
// blob -> longer blob
if (new_type == MYSQL_TYPE_BLOB && old_field->binary() && new_field->binary() &&
old_field->pack_length() <= new_field->pack_length()) {
return true;
}
// text -> longer text
if (new_type == MYSQL_TYPE_BLOB && !old_field->binary() && !new_field->binary() &&
old_field->pack_length() <= new_field->pack_length() &&
old_field->charset()->number == new_field->charset()->number) {
return true;
}
return false;
} else
return false;
}
// Return true if all changed field types can be changed inplace
static bool change_type_is_supported(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, tokudb_alter_ctx *ctx) {
if (table->s->null_bytes != altered_table->s->null_bytes)
return false;
if (table->s->fields != altered_table->s->fields)
return false;
if (ctx->changed_fields.elements() > 1)
return false; // only support one field change
for (int ai = 0; ai < ctx->changed_fields.elements(); ai++) {
uint i = ctx->changed_fields.at(ai);
Field *old_field = table->field[i];
Field *new_field = altered_table->field[i];
if (field_in_key_of_table(table, old_field) || field_in_key_of_table(altered_table, new_field))
return false;
if (!change_field_type_is_supported(old_field, new_field, table, altered_table, ha_alter_info, ctx))
return false;
}
return true;
}
// Allocate and initialize a new descriptor for a dictionary in the altered table identified with idx.
// Return the new descriptor in the row_descriptor DBT.
// Return non-zero on error.
int ha_tokudb::new_row_descriptor(TABLE *table, TABLE *altered_table, Alter_inplace_info *ha_alter_info, uint32_t idx, DBT *row_descriptor) {
int error = 0;
tokudb_alter_ctx *ctx = static_cast<tokudb_alter_ctx *>(ha_alter_info->handler_ctx);
row_descriptor->size = get_max_desc_size(ctx->altered_table_kc_info, altered_table);
row_descriptor->data = (uchar *) my_malloc(row_descriptor->size, MYF(MY_WME));
if (row_descriptor->data == NULL) {
error = ENOMEM;
} else {
KEY* prim_key = hidden_primary_key ? NULL : &altered_table->s->key_info[primary_key];
if (idx == primary_key) {
row_descriptor->size = create_main_key_descriptor((uchar *)row_descriptor->data,
prim_key,
hidden_primary_key,
primary_key,
altered_table,
ctx->altered_table_kc_info);
} else {
row_descriptor->size = create_secondary_key_descriptor((uchar *)row_descriptor->data,
&altered_table->key_info[idx],
prim_key,
hidden_primary_key,
altered_table,
primary_key,
idx,
ctx->altered_table_kc_info);
}
error = 0;
}
return error;
}
#endif