From f47eac2882b8635b8de252cdca8e46200b1ff724 Mon Sep 17 00:00:00 2001 From: Mayank Prasad Date: Wed, 10 Jan 2018 20:54:09 +0530 Subject: [PATCH] Bug #25928471: ONLINE ALTER AND CONCURRENT DELETE ON TABLE WITH MANY TEXT COLUMNS CAUSES CRASH Issue: ------ Prefix for externally stored columns were being stored in online_log when a table is altered and alter causes table to be rebuilt. Space in online_log is limited and if length of prefix of externally stored columns is very big, then it is being written to online log without making sure if it fits. This leads to memory corruption. Fix: ---- After fix for Bug#16544143, there is no need to store prefixes of externally stored columnd in online_log. Thus remove the code which stores column prefixes for externally stored columns. Also, before writing anything on online_log, make sure it fits to available memory to avoid memory corruption. Read RB page for more details. Reviewed-by: Annamalai Gurusami RB: 18239 --- storage/innobase/row/row0log.cc | 126 +++++--------------------------- storage/xtradb/row/row0log.cc | 126 +++++--------------------------- 2 files changed, 36 insertions(+), 216 deletions(-) diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 62ca32b712c..1383c91f41f 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -435,6 +435,8 @@ err_exit: *avail = srv_sort_buf_size - log->tail.bytes; if (size > *avail) { + /* Make sure log->tail.buf is large enough */ + ut_ad(size <= sizeof log->tail.buf); return(log->tail.buf); } else { return(log->tail.block + log->tail.bytes); @@ -528,12 +530,10 @@ row_log_table_delete( { ulint old_pk_extra_size; ulint old_pk_size; - ulint ext_size = 0; ulint mrec_size; ulint avail_size; mem_heap_t* heap = NULL; const dtuple_t* old_pk; - row_ext_t* ext; ut_ad(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); @@ -614,72 +614,20 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 6 + old_pk_size; - - /* Log enough prefix of the BLOB unless both the - old and new table are in COMPACT or REDUNDANT format, - which store the prefix in the clustered index record. */ - if (rec_offs_any_extern(offsets) - && (dict_table_get_format(index->table) >= UNIV_FORMAT_B - || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) { - - /* Build a cache of those off-page column prefixes - that are referenced by secondary indexes. It can be - that none of the off-page columns are needed. */ - row_build(ROW_COPY_DATA, index, rec, - offsets, NULL, NULL, NULL, &ext, heap); - if (ext) { - /* Log the row_ext_t, ext->ext and ext->buf */ - ext_size = ext->n_ext * ext->max_len - + sizeof(*ext) - + ext->n_ext * sizeof(ulint) - + (ext->n_ext - 1) * sizeof ext->len; - mrec_size += ext_size; - } - } + /* 2 = 1 (extra_size) + at least 1 byte payload */ + mrec_size = 2 + old_pk_size; if (byte* b = row_log_table_open(index->online_log, mrec_size, &avail_size)) { *b++ = ROW_T_DELETE; *b++ = static_cast(old_pk_extra_size); - /* Log the size of external prefix we saved */ - mach_write_to_4(b, ext_size); - b += 4; - rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, old_pk->fields, old_pk->n_fields); b += old_pk_size; - if (ext_size) { - ulint cur_ext_size = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - memcpy(b, ext, cur_ext_size); - b += cur_ext_size; - - /* Check if we need to col_map to adjust the column - number. If columns were added/removed/reordered, - adjust the column number. */ - if (const ulint* col_map = - index->online_log->col_map) { - for (ulint i = 0; i < ext->n_ext; i++) { - const_cast(ext->ext[i]) = - col_map[ext->ext[i]]; - } - } - - memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext)); - b += ext->n_ext * sizeof(*ext->ext); - - ext_size -= cur_ext_size - + ext->n_ext * sizeof(*ext->ext); - memcpy(b, ext->buf, ext_size); - b += ext_size; - } - row_log_table_close( index->online_log, b, mrec_size, avail_size); } @@ -1601,15 +1549,13 @@ row_log_table_apply_insert( /******************************************************//** Deletes a record from a table that is being rebuilt. @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result)) +static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_log_table_apply_delete_low( /*===========================*/ btr_pcur_t* pcur, /*!< in/out: B-tree cursor, will be trashed */ const ulint* offsets, /*!< in: offsets on pcur */ - const row_ext_t* save_ext, /*!< in: saved external field - info, or NULL */ mem_heap_t* heap, /*!< in/out: memory heap */ mtr_t* mtr) /*!< in/out: mini-transaction, will be committed */ @@ -1633,11 +1579,7 @@ row_log_table_apply_delete_low( /* Build a row template for purging secondary index entries. */ row = row_build( ROW_COPY_DATA, index, btr_pcur_get_rec(pcur), - offsets, NULL, NULL, NULL, - save_ext ? NULL : &ext, heap); - if (!save_ext) { - save_ext = ext; - } + offsets, NULL, NULL, NULL, &ext, heap); } else { row = NULL; } @@ -1656,7 +1598,7 @@ row_log_table_apply_delete_low( } const dtuple_t* entry = row_build_index_entry( - row, save_ext, index, heap); + row, ext, index, heap); mtr_start(mtr); btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, pcur, mtr); @@ -1699,11 +1641,10 @@ flag_ok: /******************************************************//** Replays a delete operation on a table that was rebuilt. @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result)) +static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_log_table_apply_delete( /*=======================*/ - que_thr_t* thr, /*!< in: query graph */ ulint trx_id_col, /*!< in: position of DB_TRX_ID in the new clustered index */ @@ -1712,9 +1653,7 @@ row_log_table_apply_delete( mem_heap_t* offsets_heap, /*!< in/out: memory heap that can be emptied */ mem_heap_t* heap, /*!< in/out: memory heap */ - const row_log_t* log, /*!< in: online log */ - const row_ext_t* save_ext) /*!< in: saved external field - info, or NULL */ + const row_log_t* log) /*!< in: online log */ { dict_table_t* new_table = log->table; dict_index_t* index = dict_table_get_first_index(new_table); @@ -1814,8 +1753,7 @@ all_done: } } - return(row_log_table_apply_delete_low(&pcur, offsets, save_ext, - heap, &mtr)); + return row_log_table_apply_delete_low(&pcur, offsets, heap, &mtr); } /******************************************************//** @@ -2026,7 +1964,7 @@ func_exit_committed: /* Some BLOBs are missing, so we are interpreting this ROW_T_UPDATE as ROW_T_DELETE (see *1). */ error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); + &pcur, cur_offsets, heap, &mtr); goto func_exit_committed; } @@ -2064,7 +2002,7 @@ func_exit_committed: } error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); + &pcur, cur_offsets, heap, &mtr); ut_ad(mtr.state == MTR_COMMITTED); if (error == DB_SUCCESS) { @@ -2210,8 +2148,6 @@ row_log_table_apply_op( ulint extra_size; const mrec_t* next_mrec; dtuple_t* old_pk; - row_ext_t* ext; - ulint ext_size; ut_ad(dict_index_is_clust(dup->index)); ut_ad(dup->index->table != log->table); @@ -2219,7 +2155,7 @@ row_log_table_apply_op( *error = DB_SUCCESS; - /* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */ + /* 3 = 1 (op type) + 1 (extra_size) + at least 1 byte payload */ if (mrec + 3 >= mrec_end) { return(NULL); } @@ -2269,14 +2205,12 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ - if (mrec + 6 >= mrec_end) { + /* 1 (extra_size) + at least 1 (payload) */ + if (mrec + 2 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_4(mrec); - mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. @@ -2285,40 +2219,16 @@ row_log_table_apply_op( rec_offs_set_n_fields(offsets, new_index->n_uniq + 2); rec_init_offsets_temp(mrec, new_index, offsets); - next_mrec = mrec + rec_offs_data_size(offsets) + ext_size; + next_mrec = mrec + rec_offs_data_size(offsets); if (next_mrec > mrec_end) { return(NULL); } log->head.total += next_mrec - mrec_start; - /* If there are external fields, retrieve those logged - prefix info and reconstruct the row_ext_t */ - if (ext_size) { - /* We use memcpy to avoid unaligned - access on some non-x86 platforms.*/ - ext = static_cast( - mem_heap_dup(heap, - mrec + rec_offs_data_size(offsets), - ext_size)); - - byte* ext_start = reinterpret_cast(ext); - - ulint ext_len = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - ext->ext = reinterpret_cast(ext_start + ext_len); - ext_len += ext->n_ext * sizeof(*ext->ext); - - ext->buf = static_cast(ext_start + ext_len); - } else { - ext = NULL; - } - *error = row_log_table_apply_delete( - thr, new_trx_id_col, - mrec, offsets, offsets_heap, heap, - log, ext); + new_trx_id_col, + mrec, offsets, offsets_heap, heap, log); break; case ROW_T_UPDATE: diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 52f6a963ec6..c239722e4f7 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -435,6 +435,8 @@ err_exit: *avail = srv_sort_buf_size - log->tail.bytes; if (size > *avail) { + /* Make sure log->tail.buf is large enough */ + ut_ad(size <= sizeof log->tail.buf); return(log->tail.buf); } else { return(log->tail.block + log->tail.bytes); @@ -528,12 +530,10 @@ row_log_table_delete( { ulint old_pk_extra_size; ulint old_pk_size; - ulint ext_size = 0; ulint mrec_size; ulint avail_size; mem_heap_t* heap = NULL; const dtuple_t* old_pk; - row_ext_t* ext; ut_ad(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); @@ -614,72 +614,20 @@ row_log_table_delete( &old_pk_extra_size); ut_ad(old_pk_extra_size < 0x100); - mrec_size = 6 + old_pk_size; - - /* Log enough prefix of the BLOB unless both the - old and new table are in COMPACT or REDUNDANT format, - which store the prefix in the clustered index record. */ - if (rec_offs_any_extern(offsets) - && (dict_table_get_format(index->table) >= UNIV_FORMAT_B - || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) { - - /* Build a cache of those off-page column prefixes - that are referenced by secondary indexes. It can be - that none of the off-page columns are needed. */ - row_build(ROW_COPY_DATA, index, rec, - offsets, NULL, NULL, NULL, &ext, heap); - if (ext) { - /* Log the row_ext_t, ext->ext and ext->buf */ - ext_size = ext->n_ext * ext->max_len - + sizeof(*ext) - + ext->n_ext * sizeof(ulint) - + (ext->n_ext - 1) * sizeof ext->len; - mrec_size += ext_size; - } - } + /* 2 = 1 (extra_size) + at least 1 byte payload */ + mrec_size = 2 + old_pk_size; if (byte* b = row_log_table_open(index->online_log, mrec_size, &avail_size)) { *b++ = ROW_T_DELETE; *b++ = static_cast(old_pk_extra_size); - /* Log the size of external prefix we saved */ - mach_write_to_4(b, ext_size); - b += 4; - rec_convert_dtuple_to_temp( b + old_pk_extra_size, new_index, old_pk->fields, old_pk->n_fields); b += old_pk_size; - if (ext_size) { - ulint cur_ext_size = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - memcpy(b, ext, cur_ext_size); - b += cur_ext_size; - - /* Check if we need to col_map to adjust the column - number. If columns were added/removed/reordered, - adjust the column number. */ - if (const ulint* col_map = - index->online_log->col_map) { - for (ulint i = 0; i < ext->n_ext; i++) { - const_cast(ext->ext[i]) = - col_map[ext->ext[i]]; - } - } - - memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext)); - b += ext->n_ext * sizeof(*ext->ext); - - ext_size -= cur_ext_size - + ext->n_ext * sizeof(*ext->ext); - memcpy(b, ext->buf, ext_size); - b += ext_size; - } - row_log_table_close( index->online_log, b, mrec_size, avail_size); } @@ -1601,15 +1549,13 @@ row_log_table_apply_insert( /******************************************************//** Deletes a record from a table that is being rebuilt. @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result)) +static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_log_table_apply_delete_low( /*===========================*/ btr_pcur_t* pcur, /*!< in/out: B-tree cursor, will be trashed */ const ulint* offsets, /*!< in: offsets on pcur */ - const row_ext_t* save_ext, /*!< in: saved external field - info, or NULL */ mem_heap_t* heap, /*!< in/out: memory heap */ mtr_t* mtr) /*!< in/out: mini-transaction, will be committed */ @@ -1633,11 +1579,7 @@ row_log_table_apply_delete_low( /* Build a row template for purging secondary index entries. */ row = row_build( ROW_COPY_DATA, index, btr_pcur_get_rec(pcur), - offsets, NULL, NULL, NULL, - save_ext ? NULL : &ext, heap); - if (!save_ext) { - save_ext = ext; - } + offsets, NULL, NULL, NULL, &ext, heap); } else { row = NULL; } @@ -1656,7 +1598,7 @@ row_log_table_apply_delete_low( } const dtuple_t* entry = row_build_index_entry( - row, save_ext, index, heap); + row, ext, index, heap); mtr_start(mtr); btr_pcur_open(index, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, pcur, mtr); @@ -1699,11 +1641,10 @@ flag_ok: /******************************************************//** Replays a delete operation on a table that was rebuilt. @return DB_SUCCESS or error code */ -static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result)) +static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_log_table_apply_delete( /*=======================*/ - que_thr_t* thr, /*!< in: query graph */ ulint trx_id_col, /*!< in: position of DB_TRX_ID in the new clustered index */ @@ -1712,9 +1653,7 @@ row_log_table_apply_delete( mem_heap_t* offsets_heap, /*!< in/out: memory heap that can be emptied */ mem_heap_t* heap, /*!< in/out: memory heap */ - const row_log_t* log, /*!< in: online log */ - const row_ext_t* save_ext) /*!< in: saved external field - info, or NULL */ + const row_log_t* log) /*!< in: online log */ { dict_table_t* new_table = log->table; dict_index_t* index = dict_table_get_first_index(new_table); @@ -1814,8 +1753,7 @@ all_done: } } - return(row_log_table_apply_delete_low(&pcur, offsets, save_ext, - heap, &mtr)); + return row_log_table_apply_delete_low(&pcur, offsets, heap, &mtr); } /******************************************************//** @@ -2026,7 +1964,7 @@ func_exit_committed: /* Some BLOBs are missing, so we are interpreting this ROW_T_UPDATE as ROW_T_DELETE (see *1). */ error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); + &pcur, cur_offsets, heap, &mtr); goto func_exit_committed; } @@ -2064,7 +2002,7 @@ func_exit_committed: } error = row_log_table_apply_delete_low( - &pcur, cur_offsets, NULL, heap, &mtr); + &pcur, cur_offsets, heap, &mtr); ut_ad(mtr.state == MTR_COMMITTED); if (error == DB_SUCCESS) { @@ -2210,8 +2148,6 @@ row_log_table_apply_op( ulint extra_size; const mrec_t* next_mrec; dtuple_t* old_pk; - row_ext_t* ext; - ulint ext_size; ut_ad(dict_index_is_clust(dup->index)); ut_ad(dup->index->table != log->table); @@ -2219,7 +2155,7 @@ row_log_table_apply_op( *error = DB_SUCCESS; - /* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */ + /* 3 = 1 (op type) + 1 (extra_size) + at least 1 byte payload */ if (mrec + 3 >= mrec_end) { return(NULL); } @@ -2269,14 +2205,12 @@ row_log_table_apply_op( break; case ROW_T_DELETE: - /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */ - if (mrec + 6 >= mrec_end) { + /* 1 (extra_size) + at least 1 (payload) */ + if (mrec + 2 >= mrec_end) { return(NULL); } extra_size = *mrec++; - ext_size = mach_read_from_4(mrec); - mrec += 4; ut_ad(mrec < mrec_end); /* We assume extra_size < 0x100 for the PRIMARY KEY prefix. @@ -2285,40 +2219,16 @@ row_log_table_apply_op( rec_offs_set_n_fields(offsets, new_index->n_uniq + 2); rec_init_offsets_temp(mrec, new_index, offsets); - next_mrec = mrec + rec_offs_data_size(offsets) + ext_size; + next_mrec = mrec + rec_offs_data_size(offsets); if (next_mrec > mrec_end) { return(NULL); } log->head.total += next_mrec - mrec_start; - /* If there are external fields, retrieve those logged - prefix info and reconstruct the row_ext_t */ - if (ext_size) { - /* We use memcpy to avoid unaligned - access on some non-x86 platforms.*/ - ext = static_cast( - mem_heap_dup(heap, - mrec + rec_offs_data_size(offsets), - ext_size)); - - byte* ext_start = reinterpret_cast(ext); - - ulint ext_len = sizeof(*ext) - + (ext->n_ext - 1) * sizeof ext->len; - - ext->ext = reinterpret_cast(ext_start + ext_len); - ext_len += ext->n_ext * sizeof(*ext->ext); - - ext->buf = static_cast(ext_start + ext_len); - } else { - ext = NULL; - } - *error = row_log_table_apply_delete( - thr, new_trx_id_col, - mrec, offsets, offsets_heap, heap, - log, ext); + new_trx_id_col, + mrec, offsets, offsets_heap, heap, log); break; case ROW_T_UPDATE: