Bug #25928471: ONLINE ALTER AND CONCURRENT DELETE ON TABLE WITH MANY TEXT COLUMNS CAUSES CRASH

Issue:
------
Prefix for externally stored columns were being stored in online_log when a
table is altered and alter causes table to be rebuilt. Space in online_log is
limited and if length of prefix of externally stored columns is very big, then
it is being written to online log without making sure if it fits. This leads to
memory corruption.

Fix:
----
After fix for Bug#16544143, there is no need to store prefixes of externally
stored columnd in online_log. Thus remove the code which stores column prefixes
for externally stored columns. Also, before writing anything on online_log,
make sure it fits to available memory to avoid memory corruption.

Read RB page for more details.

Reviewed-by: Annamalai Gurusami <annamalai.gurusami@oracle.com>
RB: 18239
This commit is contained in:
Mayank Prasad 2018-01-10 20:54:09 +05:30 committed by Marko Mäkelä
parent 42fac32413
commit f47eac2882
2 changed files with 36 additions and 216 deletions

View file

@ -435,6 +435,8 @@ err_exit:
*avail = srv_sort_buf_size - log->tail.bytes;
if (size > *avail) {
/* Make sure log->tail.buf is large enough */
ut_ad(size <= sizeof log->tail.buf);
return(log->tail.buf);
} else {
return(log->tail.block + log->tail.bytes);
@ -528,12 +530,10 @@ row_log_table_delete(
{
ulint old_pk_extra_size;
ulint old_pk_size;
ulint ext_size = 0;
ulint mrec_size;
ulint avail_size;
mem_heap_t* heap = NULL;
const dtuple_t* old_pk;
row_ext_t* ext;
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
@ -614,72 +614,20 @@ row_log_table_delete(
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
mrec_size = 6 + old_pk_size;
/* Log enough prefix of the BLOB unless both the
old and new table are in COMPACT or REDUNDANT format,
which store the prefix in the clustered index record. */
if (rec_offs_any_extern(offsets)
&& (dict_table_get_format(index->table) >= UNIV_FORMAT_B
|| dict_table_get_format(new_table) >= UNIV_FORMAT_B)) {
/* Build a cache of those off-page column prefixes
that are referenced by secondary indexes. It can be
that none of the off-page columns are needed. */
row_build(ROW_COPY_DATA, index, rec,
offsets, NULL, NULL, NULL, &ext, heap);
if (ext) {
/* Log the row_ext_t, ext->ext and ext->buf */
ext_size = ext->n_ext * ext->max_len
+ sizeof(*ext)
+ ext->n_ext * sizeof(ulint)
+ (ext->n_ext - 1) * sizeof ext->len;
mrec_size += ext_size;
}
}
/* 2 = 1 (extra_size) + at least 1 byte payload */
mrec_size = 2 + old_pk_size;
if (byte* b = row_log_table_open(index->online_log,
mrec_size, &avail_size)) {
*b++ = ROW_T_DELETE;
*b++ = static_cast<byte>(old_pk_extra_size);
/* Log the size of external prefix we saved */
mach_write_to_4(b, ext_size);
b += 4;
rec_convert_dtuple_to_temp(
b + old_pk_extra_size, new_index,
old_pk->fields, old_pk->n_fields);
b += old_pk_size;
if (ext_size) {
ulint cur_ext_size = sizeof(*ext)
+ (ext->n_ext - 1) * sizeof ext->len;
memcpy(b, ext, cur_ext_size);
b += cur_ext_size;
/* Check if we need to col_map to adjust the column
number. If columns were added/removed/reordered,
adjust the column number. */
if (const ulint* col_map =
index->online_log->col_map) {
for (ulint i = 0; i < ext->n_ext; i++) {
const_cast<ulint&>(ext->ext[i]) =
col_map[ext->ext[i]];
}
}
memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext));
b += ext->n_ext * sizeof(*ext->ext);
ext_size -= cur_ext_size
+ ext->n_ext * sizeof(*ext->ext);
memcpy(b, ext->buf, ext_size);
b += ext_size;
}
row_log_table_close(
index->online_log, b, mrec_size, avail_size);
}
@ -1601,15 +1549,13 @@ row_log_table_apply_insert(
/******************************************************//**
Deletes a record from a table that is being rebuilt.
@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result))
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_log_table_apply_delete_low(
/*===========================*/
btr_pcur_t* pcur, /*!< in/out: B-tree cursor,
will be trashed */
const ulint* offsets, /*!< in: offsets on pcur */
const row_ext_t* save_ext, /*!< in: saved external field
info, or NULL */
mem_heap_t* heap, /*!< in/out: memory heap */
mtr_t* mtr) /*!< in/out: mini-transaction,
will be committed */
@ -1633,11 +1579,7 @@ row_log_table_apply_delete_low(
/* Build a row template for purging secondary index entries. */
row = row_build(
ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
offsets, NULL, NULL, NULL,
save_ext ? NULL : &ext, heap);
if (!save_ext) {
save_ext = ext;
}
offsets, NULL, NULL, NULL, &ext, heap);
} else {
row = NULL;
}
@ -1656,7 +1598,7 @@ row_log_table_apply_delete_low(
}
const dtuple_t* entry = row_build_index_entry(
row, save_ext, index, heap);
row, ext, index, heap);
mtr_start(mtr);
btr_pcur_open(index, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, pcur, mtr);
@ -1699,11 +1641,10 @@ flag_ok:
/******************************************************//**
Replays a delete operation on a table that was rebuilt.
@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result))
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_log_table_apply_delete(
/*=======================*/
que_thr_t* thr, /*!< in: query graph */
ulint trx_id_col, /*!< in: position of
DB_TRX_ID in the new
clustered index */
@ -1712,9 +1653,7 @@ row_log_table_apply_delete(
mem_heap_t* offsets_heap, /*!< in/out: memory heap
that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
const row_log_t* log, /*!< in: online log */
const row_ext_t* save_ext) /*!< in: saved external field
info, or NULL */
const row_log_t* log) /*!< in: online log */
{
dict_table_t* new_table = log->table;
dict_index_t* index = dict_table_get_first_index(new_table);
@ -1814,8 +1753,7 @@ all_done:
}
}
return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
heap, &mtr));
return row_log_table_apply_delete_low(&pcur, offsets, heap, &mtr);
}
/******************************************************//**
@ -2026,7 +1964,7 @@ func_exit_committed:
/* Some BLOBs are missing, so we are interpreting
this ROW_T_UPDATE as ROW_T_DELETE (see *1). */
error = row_log_table_apply_delete_low(
&pcur, cur_offsets, NULL, heap, &mtr);
&pcur, cur_offsets, heap, &mtr);
goto func_exit_committed;
}
@ -2064,7 +2002,7 @@ func_exit_committed:
}
error = row_log_table_apply_delete_low(
&pcur, cur_offsets, NULL, heap, &mtr);
&pcur, cur_offsets, heap, &mtr);
ut_ad(mtr.state == MTR_COMMITTED);
if (error == DB_SUCCESS) {
@ -2210,8 +2148,6 @@ row_log_table_apply_op(
ulint extra_size;
const mrec_t* next_mrec;
dtuple_t* old_pk;
row_ext_t* ext;
ulint ext_size;
ut_ad(dict_index_is_clust(dup->index));
ut_ad(dup->index->table != log->table);
@ -2219,7 +2155,7 @@ row_log_table_apply_op(
*error = DB_SUCCESS;
/* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */
/* 3 = 1 (op type) + 1 (extra_size) + at least 1 byte payload */
if (mrec + 3 >= mrec_end) {
return(NULL);
}
@ -2269,14 +2205,12 @@ row_log_table_apply_op(
break;
case ROW_T_DELETE:
/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
if (mrec + 6 >= mrec_end) {
/* 1 (extra_size) + at least 1 (payload) */
if (mrec + 2 >= mrec_end) {
return(NULL);
}
extra_size = *mrec++;
ext_size = mach_read_from_4(mrec);
mrec += 4;
ut_ad(mrec < mrec_end);
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
@ -2285,40 +2219,16 @@ row_log_table_apply_op(
rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
rec_init_offsets_temp(mrec, new_index, offsets);
next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
next_mrec = mrec + rec_offs_data_size(offsets);
if (next_mrec > mrec_end) {
return(NULL);
}
log->head.total += next_mrec - mrec_start;
/* If there are external fields, retrieve those logged
prefix info and reconstruct the row_ext_t */
if (ext_size) {
/* We use memcpy to avoid unaligned
access on some non-x86 platforms.*/
ext = static_cast<row_ext_t*>(
mem_heap_dup(heap,
mrec + rec_offs_data_size(offsets),
ext_size));
byte* ext_start = reinterpret_cast<byte*>(ext);
ulint ext_len = sizeof(*ext)
+ (ext->n_ext - 1) * sizeof ext->len;
ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len);
ext_len += ext->n_ext * sizeof(*ext->ext);
ext->buf = static_cast<byte*>(ext_start + ext_len);
} else {
ext = NULL;
}
*error = row_log_table_apply_delete(
thr, new_trx_id_col,
mrec, offsets, offsets_heap, heap,
log, ext);
new_trx_id_col,
mrec, offsets, offsets_heap, heap, log);
break;
case ROW_T_UPDATE:

View file

@ -435,6 +435,8 @@ err_exit:
*avail = srv_sort_buf_size - log->tail.bytes;
if (size > *avail) {
/* Make sure log->tail.buf is large enough */
ut_ad(size <= sizeof log->tail.buf);
return(log->tail.buf);
} else {
return(log->tail.block + log->tail.bytes);
@ -528,12 +530,10 @@ row_log_table_delete(
{
ulint old_pk_extra_size;
ulint old_pk_size;
ulint ext_size = 0;
ulint mrec_size;
ulint avail_size;
mem_heap_t* heap = NULL;
const dtuple_t* old_pk;
row_ext_t* ext;
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
@ -614,72 +614,20 @@ row_log_table_delete(
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
mrec_size = 6 + old_pk_size;
/* Log enough prefix of the BLOB unless both the
old and new table are in COMPACT or REDUNDANT format,
which store the prefix in the clustered index record. */
if (rec_offs_any_extern(offsets)
&& (dict_table_get_format(index->table) >= UNIV_FORMAT_B
|| dict_table_get_format(new_table) >= UNIV_FORMAT_B)) {
/* Build a cache of those off-page column prefixes
that are referenced by secondary indexes. It can be
that none of the off-page columns are needed. */
row_build(ROW_COPY_DATA, index, rec,
offsets, NULL, NULL, NULL, &ext, heap);
if (ext) {
/* Log the row_ext_t, ext->ext and ext->buf */
ext_size = ext->n_ext * ext->max_len
+ sizeof(*ext)
+ ext->n_ext * sizeof(ulint)
+ (ext->n_ext - 1) * sizeof ext->len;
mrec_size += ext_size;
}
}
/* 2 = 1 (extra_size) + at least 1 byte payload */
mrec_size = 2 + old_pk_size;
if (byte* b = row_log_table_open(index->online_log,
mrec_size, &avail_size)) {
*b++ = ROW_T_DELETE;
*b++ = static_cast<byte>(old_pk_extra_size);
/* Log the size of external prefix we saved */
mach_write_to_4(b, ext_size);
b += 4;
rec_convert_dtuple_to_temp(
b + old_pk_extra_size, new_index,
old_pk->fields, old_pk->n_fields);
b += old_pk_size;
if (ext_size) {
ulint cur_ext_size = sizeof(*ext)
+ (ext->n_ext - 1) * sizeof ext->len;
memcpy(b, ext, cur_ext_size);
b += cur_ext_size;
/* Check if we need to col_map to adjust the column
number. If columns were added/removed/reordered,
adjust the column number. */
if (const ulint* col_map =
index->online_log->col_map) {
for (ulint i = 0; i < ext->n_ext; i++) {
const_cast<ulint&>(ext->ext[i]) =
col_map[ext->ext[i]];
}
}
memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext));
b += ext->n_ext * sizeof(*ext->ext);
ext_size -= cur_ext_size
+ ext->n_ext * sizeof(*ext->ext);
memcpy(b, ext->buf, ext_size);
b += ext_size;
}
row_log_table_close(
index->online_log, b, mrec_size, avail_size);
}
@ -1601,15 +1549,13 @@ row_log_table_apply_insert(
/******************************************************//**
Deletes a record from a table that is being rebuilt.
@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result))
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_log_table_apply_delete_low(
/*===========================*/
btr_pcur_t* pcur, /*!< in/out: B-tree cursor,
will be trashed */
const ulint* offsets, /*!< in: offsets on pcur */
const row_ext_t* save_ext, /*!< in: saved external field
info, or NULL */
mem_heap_t* heap, /*!< in/out: memory heap */
mtr_t* mtr) /*!< in/out: mini-transaction,
will be committed */
@ -1633,11 +1579,7 @@ row_log_table_apply_delete_low(
/* Build a row template for purging secondary index entries. */
row = row_build(
ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
offsets, NULL, NULL, NULL,
save_ext ? NULL : &ext, heap);
if (!save_ext) {
save_ext = ext;
}
offsets, NULL, NULL, NULL, &ext, heap);
} else {
row = NULL;
}
@ -1656,7 +1598,7 @@ row_log_table_apply_delete_low(
}
const dtuple_t* entry = row_build_index_entry(
row, save_ext, index, heap);
row, ext, index, heap);
mtr_start(mtr);
btr_pcur_open(index, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, pcur, mtr);
@ -1699,11 +1641,10 @@ flag_ok:
/******************************************************//**
Replays a delete operation on a table that was rebuilt.
@return DB_SUCCESS or error code */
static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result))
static MY_ATTRIBUTE((nonnull, warn_unused_result))
dberr_t
row_log_table_apply_delete(
/*=======================*/
que_thr_t* thr, /*!< in: query graph */
ulint trx_id_col, /*!< in: position of
DB_TRX_ID in the new
clustered index */
@ -1712,9 +1653,7 @@ row_log_table_apply_delete(
mem_heap_t* offsets_heap, /*!< in/out: memory heap
that can be emptied */
mem_heap_t* heap, /*!< in/out: memory heap */
const row_log_t* log, /*!< in: online log */
const row_ext_t* save_ext) /*!< in: saved external field
info, or NULL */
const row_log_t* log) /*!< in: online log */
{
dict_table_t* new_table = log->table;
dict_index_t* index = dict_table_get_first_index(new_table);
@ -1814,8 +1753,7 @@ all_done:
}
}
return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
heap, &mtr));
return row_log_table_apply_delete_low(&pcur, offsets, heap, &mtr);
}
/******************************************************//**
@ -2026,7 +1964,7 @@ func_exit_committed:
/* Some BLOBs are missing, so we are interpreting
this ROW_T_UPDATE as ROW_T_DELETE (see *1). */
error = row_log_table_apply_delete_low(
&pcur, cur_offsets, NULL, heap, &mtr);
&pcur, cur_offsets, heap, &mtr);
goto func_exit_committed;
}
@ -2064,7 +2002,7 @@ func_exit_committed:
}
error = row_log_table_apply_delete_low(
&pcur, cur_offsets, NULL, heap, &mtr);
&pcur, cur_offsets, heap, &mtr);
ut_ad(mtr.state == MTR_COMMITTED);
if (error == DB_SUCCESS) {
@ -2210,8 +2148,6 @@ row_log_table_apply_op(
ulint extra_size;
const mrec_t* next_mrec;
dtuple_t* old_pk;
row_ext_t* ext;
ulint ext_size;
ut_ad(dict_index_is_clust(dup->index));
ut_ad(dup->index->table != log->table);
@ -2219,7 +2155,7 @@ row_log_table_apply_op(
*error = DB_SUCCESS;
/* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */
/* 3 = 1 (op type) + 1 (extra_size) + at least 1 byte payload */
if (mrec + 3 >= mrec_end) {
return(NULL);
}
@ -2269,14 +2205,12 @@ row_log_table_apply_op(
break;
case ROW_T_DELETE:
/* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
if (mrec + 6 >= mrec_end) {
/* 1 (extra_size) + at least 1 (payload) */
if (mrec + 2 >= mrec_end) {
return(NULL);
}
extra_size = *mrec++;
ext_size = mach_read_from_4(mrec);
mrec += 4;
ut_ad(mrec < mrec_end);
/* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
@ -2285,40 +2219,16 @@ row_log_table_apply_op(
rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
rec_init_offsets_temp(mrec, new_index, offsets);
next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
next_mrec = mrec + rec_offs_data_size(offsets);
if (next_mrec > mrec_end) {
return(NULL);
}
log->head.total += next_mrec - mrec_start;
/* If there are external fields, retrieve those logged
prefix info and reconstruct the row_ext_t */
if (ext_size) {
/* We use memcpy to avoid unaligned
access on some non-x86 platforms.*/
ext = static_cast<row_ext_t*>(
mem_heap_dup(heap,
mrec + rec_offs_data_size(offsets),
ext_size));
byte* ext_start = reinterpret_cast<byte*>(ext);
ulint ext_len = sizeof(*ext)
+ (ext->n_ext - 1) * sizeof ext->len;
ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len);
ext_len += ext->n_ext * sizeof(*ext->ext);
ext->buf = static_cast<byte*>(ext_start + ext_len);
} else {
ext = NULL;
}
*error = row_log_table_apply_delete(
thr, new_trx_id_col,
mrec, offsets, offsets_heap, heap,
log, ext);
new_trx_id_col,
mrec, offsets, offsets_heap, heap, log);
break;
case ROW_T_UPDATE: