MDEV-13637 InnoDB change buffer housekeeping can cause redo log overrun and possibly deadlocks

The function ibuf_remove_free_page() may be called while the caller
is holding several mutexes or rw-locks. Because of this, this
housekeeping loop may cause performance glitches for operations that
involve tables that are stored in the InnoDB system tablespace.
Also deadlocks might be possible.

The worst impact of all is that due to the mutexes being held, calls to
log_free_check() had to be skipped during this housekeeping.
This means that the cyclic InnoDB redo log may be overwritten.
If the system crashes during this, it would be unable to recover.

The entry point to the problematic code is ibuf_free_excess_pages().
It would make sense to call it before acquiring any mutexes or rw-locks,
in any 'pessimistic' operation that involves the system tablespace.

fseg_create_general(), fseg_alloc_free_page_general(): Do not call
ibuf_free_excess_pages() while potentially holding some latches.

ibuf_remove_free_page(): Do call log_free_check(), like every operation
that is about to generate redo log should do.

ibuf_free_excess_pages(): Remove some assertions that are replaced
by stricter assertions in the log_free_check() that is now called by
ibuf_remove_free_page().

row_mtr_start(): New function, to perform necessary preparations when
starting a mini-transaction for row operations. For pessimistic operations
on secondary indexes that are located in the system tablespace,
this includes calling ibuf_free_excess_pages().

row_undo_ins_remove_sec_low(), row_undo_mod_del_mark_or_remove_sec_low(),
row_undo_mod_del_unmark_sec_and_undo_update(): Call row_mtr_start().

row_ins_sec_index_entry(): Call ibuf_free_excess_pages() if the operation
may involve allocating pages and change buffering in the system tablespace.

row_upd_sec_index_entry(): Slightly refactor the code. The
delete-marking of the old entry is done in-place. It could be
change-buffered, but the old code should be unlikely to have
invoked ibuf_free_excess_pages() in this case.
This commit is contained in:
Marko Mäkelä 2017-08-28 08:57:51 +03:00
parent a544225d0a
commit f87cb652ac
7 changed files with 59 additions and 66 deletions

View file

@ -2107,15 +2107,6 @@ fseg_create_general(
fil_block_check_type(block, type, mtr);
}
if (rw_lock_get_x_lock_count(&space->latch) == 1) {
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if (space_id == IBUF_SPACE_ID) {
ibuf_free_excess_pages();
}
}
if (!has_done_reservation
&& !fsp_reserve_free_extents(&n_reserved, space_id, 2,
FSP_NORMAL, mtr)) {
@ -2699,15 +2690,6 @@ fseg_alloc_free_page_general(
space = mtr_x_lock_space(space_id, mtr);
const page_size_t page_size(space->flags);
if (rw_lock_get_x_lock_count(&space->latch) == 1) {
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if (space_id == IBUF_SPACE_ID) {
ibuf_free_excess_pages();
}
}
inode = fseg_inode_get(seg_header, space_id, page_size, mtr, &iblock);
fil_block_check_type(iblock, FIL_PAGE_INODE, mtr);

View file

@ -2104,6 +2104,8 @@ ibuf_remove_free_page(void)
page_t* root;
page_t* bitmap_page;
log_free_check();
mtr_start(&mtr);
fil_space_t* space = mtr.set_sys_modified();
const page_size_t page_size(space->flags);
@ -2211,19 +2213,7 @@ void
ibuf_free_excess_pages(void)
/*========================*/
{
ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL), RW_LOCK_X));
ut_ad(rw_lock_get_x_lock_count(
fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
/* NOTE: We require that the thread did not own the latch before,
because then we know that we can obey the correct latching order
for ibuf latches */
if (!ibuf) {
/* Not yet initialized; not sure if this is possible, but
does no harm to check for it. */
if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
return;
}

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2016, MariaDB Corporation.
Copyright (c) 2016, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -30,6 +30,7 @@ Created 4/20/1996 Heikki Tuuri
#include "univ.i"
#include "data0data.h"
#include "dict0types.h"
#include "ibuf0ibuf.h"
#include "trx0types.h"
#include "que0types.h"
#include "mtr0mtr.h"
@ -387,6 +388,34 @@ row_raw_format(
in bytes */
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Prepare to start a mini-transaction to modify an index.
@param[in,out] mtr mini-transaction
@param[in,out] index possibly secondary index
@param[in] pessimistic whether this is a pessimistic operation */
inline
void
row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
{
mtr->start();
switch (index->space) {
case IBUF_SPACE_ID:
if (pessimistic
&& !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
ibuf_free_excess_pages();
}
break;
case SRV_TMP_SPACE_ID:
mtr->set_log_mode(MTR_LOG_NO_REDO);
break;
default:
mtr->set_named_space(index->space);
break;
}
log_free_check();
}
#include "row0row.ic"
#endif

View file

@ -34,6 +34,7 @@ Created 4/20/1996 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "mach0data.h"
#include "ibuf0ibuf.h"
#include "que0que.h"
#include "row0upd.h"
#include "row0sel.h"
@ -3267,6 +3268,11 @@ row_ins_sec_index_entry(
if (err == DB_FAIL) {
mem_heap_empty(heap);
if (index->space == IBUF_SPACE_ID
&& !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
ibuf_free_excess_pages();
}
/* Try then pessimistic descent to the B-tree */
log_free_check();

View file

@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -193,19 +194,15 @@ row_undo_ins_remove_sec_low(
dberr_t err = DB_SUCCESS;
mtr_t mtr;
enum row_search_result search_result;
ibool modify_leaf = false;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
log_free_check();
memset(&pcur, 0, sizeof(pcur));
mtr_start(&mtr);
mtr.set_named_space(index->space);
dict_disable_redo_if_temporary(index->table, &mtr);
row_mtr_start(&mtr, index, !modify_leaf);
if (mode == BTR_MODIFY_LEAF) {
if (modify_leaf) {
mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
modify_leaf = true;
} else {
ut_ad(mode == (BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE));
mtr_sx_lock(dict_index_get_lock(index), &mtr);
@ -216,7 +213,7 @@ row_undo_ins_remove_sec_low(
}
if (dict_index_is_spatial(index)) {
if (mode & BTR_MODIFY_LEAF) {
if (modify_leaf) {
mode |= BTR_RTREE_DELETE_MARK;
}
btr_pcur_get_btr_cur(&pcur)->thr = thr;

View file

@ -422,23 +422,15 @@ row_undo_mod_del_mark_or_remove_sec_low(
mtr_t mtr;
mtr_t mtr_vers;
row_search_result search_result;
ibool modify_leaf = false;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
log_free_check();
//mtr_start_trx(&mtr, thr_get_trx(thr));
mtr_start(&mtr);
mtr.set_named_space(index->space);
dict_disable_redo_if_temporary(index->table, &mtr);
if (mode == BTR_MODIFY_LEAF) {
modify_leaf = true;
}
row_mtr_start(&mtr, index, !modify_leaf);
if (!index->is_committed()) {
/* The index->online_status may change if the index is
or was being created online, but not committed yet. It
is protected by index->lock. */
if (mode == BTR_MODIFY_LEAF) {
if (modify_leaf) {
mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
mtr_s_lock(dict_index_get_lock(index), &mtr);
} else {
@ -459,7 +451,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
btr_cur = btr_pcur_get_btr_cur(&pcur);
if (dict_index_is_spatial(index)) {
if (mode & BTR_MODIFY_LEAF) {
if (modify_leaf) {
btr_cur->thr = thr;
mode |= BTR_RTREE_DELETE_MARK;
}
@ -631,11 +623,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
}
try_again:
log_free_check();
//mtr_start_trx(&mtr, thr_get_trx(thr));
mtr_start(&mtr);
mtr.set_named_space(index->space);
dict_disable_redo_if_temporary(index->table, &mtr);
row_mtr_start(&mtr, index, !(mode & BTR_MODIFY_LEAF));
if (!index->is_committed()) {
/* The index->online_status may change if the index is

View file

@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2015, 2016, MariaDB Corporation.
Copyright (c) 2015, 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -2300,17 +2300,18 @@ row_upd_sec_index_entry(
"before_row_upd_sec_index_entry");
mtr_start_trx(&mtr, trx);
mtr.set_named_space(index->space);
/* Disable REDO logging as lifetime of temp-tables is limited to
server or connection lifetime and so REDO information is not needed
on restart for recovery.
Disable locking as temp-tables are not shared across connection. */
if (dict_table_is_temporary(index->table)) {
flags = BTR_NO_LOCKING_FLAG;
switch (index->space) {
case SRV_TMP_SPACE_ID:
mtr.set_log_mode(MTR_LOG_NO_REDO);
} else {
flags = BTR_NO_LOCKING_FLAG;
break;
default:
mtr.set_named_space(index->space);
/* fall through */
case IBUF_SPACE_ID:
flags = 0;
break;
}
if (!index->is_committed()) {