MDEV-22769 Shutdown hang or crash due to XA breaking locks

The background drop table queue in InnoDB is a work-around for
cases where the SQL layer is requesting DDL on tables on which
transactional locks exist.

One such case are XA transactions. Our test case exploits the
fact that the recovery of XA PREPARE transactions will
only resurrect InnoDB table locks, but not MDL that should
block any concurrent DDL.

srv_shutdown_t: Introduce the srv_shutdown_state=SRV_SHUTDOWN_INITIATED
for the initial part of shutdown, to wait for the background drop
table queue to be emptied.

srv_shutdown_bg_undo_sources(): Assign
srv_shutdown_state=SRV_SHUTDOWN_INITIATED
before waiting for the background drop table queue to be emptied.

row_drop_tables_for_mysql_in_background(): On slow shutdown, if
no active transactions exist (excluding ones that are in
XA PREPARE state), skip any tables on which locks exist.

row_drop_table_for_mysql(): Do not unnecessarily attempt to
drop InnoDB persistent statistics for tables that have
already been added to the background drop table queue.

row_mysql_close(): Relax an assertion, and free all memory
even if innodb_force_recovery=2 would prevent the background
drop table queue from being emptied.
This commit is contained in:
Marko Mäkelä 2020-06-05 14:59:33 +03:00
parent 138c11cce5
commit efc70da5fd
13 changed files with 94 additions and 50 deletions

View file

@ -5,11 +5,19 @@ XA START 'x';
UPDATE t1 set a=2;
XA END 'x';
XA PREPARE 'x';
connect con2,localhost,root;
CREATE TABLE t2 (a INT) ENGINE=InnoDB;
XA START 'y';
INSERT INTO t2 VALUES (1);
XA END 'y';
XA PREPARE 'y';
connection default;
disconnect con1;
disconnect con2;
connect con1,localhost,root;
SELECT * FROM t1 LOCK IN SHARE MODE;
connection default;
DROP TABLE t2;
disconnect con1;
SET TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SELECT * FROM t1;
@ -20,3 +28,5 @@ SELECT * FROM t1;
a
1
DROP TABLE t1;
SET GLOBAL innodb_fast_shutdown=0;
XA ROLLBACK 'y';

View file

@ -5,7 +5,7 @@
# MDEV-8841 - close tables opened by previous tests,
# so they don't get marked crashed when the server gets crashed
--disable_query_log
call mtr.add_suppression("Found 1 prepared XA transactions");
call mtr.add_suppression("Found [12] prepared XA transactions");
FLUSH TABLES;
--enable_query_log
@ -13,6 +13,9 @@ CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1);
connect (con1,localhost,root);
XA START 'x'; UPDATE t1 set a=2; XA END 'x'; XA PREPARE 'x';
connect (con2,localhost,root);
CREATE TABLE t2 (a INT) ENGINE=InnoDB;
XA START 'y'; INSERT INTO t2 VALUES (1); XA END 'y'; XA PREPARE 'y';
connection default;
# innodb_force_recovery=2 prevents the purge and tests that the fix of
@ -25,6 +28,7 @@ connection default;
--let $shutdown_timeout=
disconnect con1;
disconnect con2;
connect (con1,localhost,root);
--send SELECT * FROM t1 LOCK IN SHARE MODE
@ -35,6 +39,8 @@ let $wait_condition=
info = 'SELECT * FROM t1 LOCK IN SHARE MODE';
--source include/wait_condition.inc
DROP TABLE t2;
--source include/restart_mysqld.inc
disconnect con1;
@ -45,3 +51,8 @@ XA ROLLBACK 'x';
SELECT * FROM t1;
DROP TABLE t1;
SET GLOBAL innodb_fast_shutdown=0;
--source include/restart_mysqld.inc
XA ROLLBACK 'y';

View file

@ -3150,7 +3150,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ulint last_activity = srv_get_activity_count();
ulint last_pages = 0;
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
ulint curr_time = ut_time_ms();
/* The page_cleaner skips sleep if the server is
@ -3168,7 +3168,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ret_sleep = 0;
}
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
break;
}
@ -3335,7 +3335,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(void*)
ut_d(buf_flush_page_cleaner_disabled_loop());
}
ut_ad(srv_shutdown_state > 0);
ut_ad(srv_shutdown_state > SRV_SHUTDOWN_INITIATED);
if (srv_fast_shutdown == 2
|| srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
/* In very fast shutdown or when innodb failed to start, we

View file

@ -1076,6 +1076,7 @@ struct rotate_thread_t {
case SRV_SHUTDOWN_EXIT_THREADS:
/* srv_init_abort() must have been invoked */
case SRV_SHUTDOWN_CLEANUP:
case SRV_SHUTDOWN_INITIATED:
return true;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:

View file

@ -2790,8 +2790,7 @@ fts_optimize_thread(
/* Assign number of tables added in fts_slots_t to n_tables */
n_tables = ib_vector_size(fts_slots);
while (!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
while (!done && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
/* If there is no message in the queue and we have tables
to optimize then optimize the tables. */

View file

@ -2614,7 +2614,7 @@ ibuf_merge(
when a slow shutdown is being executed. During a slow
shutdown, the insert buffer merge must be completed. */
if (ibuf->empty && !srv_shutdown_state) {
if (ibuf->empty && srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
return(0);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
} else if (ibuf_debug) {

View file

@ -113,6 +113,8 @@ extern ibool srv_start_raw_disk_in_use;
/** Shutdown state */
enum srv_shutdown_t {
SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
/** Shutdown initiated in srv_shutdown_bg_undo_sources() */
SRV_SHUTDOWN_INITIATED,
SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
logs_empty_and_mark_files_at_shutdown() */
SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the

View file

@ -1203,7 +1203,7 @@ loop:
}
}
if (UNIV_UNLIKELY(srv_shutdown_state != SRV_SHUTDOWN_NONE)) {
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) {
service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
"InnoDB log write: "
LSN_PF "," LSN_PF,
@ -1430,7 +1430,7 @@ log_group_checkpoint(lsn_t end_lsn)
ut_ad(end_lsn == 0 || end_lsn >= log_sys->next_checkpoint_lsn);
ut_ad(end_lsn <= log_sys->lsn);
ut_ad(end_lsn + SIZE_OF_MLOG_CHECKPOINT <= log_sys->lsn
|| srv_shutdown_state != SRV_SHUTDOWN_NONE);
|| srv_shutdown_state > SRV_SHUTDOWN_INITIATED);
DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
" written",
@ -1600,7 +1600,7 @@ bool log_checkpoint(bool sync)
if (oldest_lsn
> log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
/* Some log has been written since the previous checkpoint. */
} else if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
} else if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
/* MariaDB 10.3 startup expects the redo log file to be
logically empty (not even containing a MLOG_CHECKPOINT record)
after a clean shutdown. Perform an extra checkpoint at
@ -1625,7 +1625,7 @@ bool log_checkpoint(bool sync)
lsn_t flush_lsn = oldest_lsn;
const lsn_t end_lsn = log_sys->lsn;
const bool do_write
= srv_shutdown_state == SRV_SHUTDOWN_NONE
= srv_shutdown_state <= SRV_SHUTDOWN_INITIATED
|| flush_lsn != end_lsn;
if (fil_names_clear(flush_lsn, do_write)) {

View file

@ -5435,7 +5435,7 @@ fallback:
? 0 : posix_fallocate(file, current_size,
size - current_size);
} while (err == EINTR
&& srv_shutdown_state == SRV_SHUTDOWN_NONE);
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED);
switch (err) {
case 0:
@ -5475,7 +5475,7 @@ fallback:
os_offset_t current_size = os_file_get_size(file);
while (current_size < size
&& srv_shutdown_state == SRV_SHUTDOWN_NONE) {
&& srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
ulint n_bytes;
if (size - current_size < (os_offset_t) buf_size) {

View file

@ -100,7 +100,7 @@ static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
static ib_mutex_t row_drop_list_mutex;
/** Flag: has row_mysql_drop_list been initialized? */
static ibool row_mysql_drop_list_inited = FALSE;
static bool row_mysql_drop_list_inited;
/*******************************************************************//**
Determine if the given name is a name reserved for MySQL system tables.
@ -2572,15 +2572,33 @@ next:
ut_a(!table->can_be_evicted);
bool skip = false;
if (!table->to_be_dropped) {
skip:
dict_table_close(table, FALSE, FALSE);
mutex_enter(&row_drop_list_mutex);
UT_LIST_REMOVE(row_mysql_drop_list, drop);
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
if (!skip) {
UT_LIST_ADD_LAST(row_mysql_drop_list, drop);
} else {
ut_free(drop);
}
goto next;
}
if (!srv_fast_shutdown && !trx_sys_any_active_transactions()) {
lock_mutex_enter();
skip = UT_LIST_GET_LEN(table->locks) != 0;
lock_mutex_exit();
if (skip) {
/* We cannot drop tables that are locked by XA
PREPARE transactions. */
goto skip;
}
}
char* name = mem_strdup(table->name.m_name);
dict_table_close(table, FALSE, FALSE);
@ -3390,15 +3408,15 @@ row_drop_table_for_mysql(
btr_defragment_remove_table(table);
}
/* Remove stats for this table and all of its indexes from the
persistent storage if it exists and if there are stats for this
table in there. This function creates its own trx and commits
it. */
char errstr[1024];
err = dict_stats_drop_table(name, errstr, sizeof(errstr));
if (err != DB_SUCCESS) {
ib::warn() << errstr;
if (UNIV_LIKELY(!strstr(name, "/" TEMP_FILE_PREFIX_INNODB))) {
/* Remove any persistent statistics for this table,
in a separate transaction. */
char errstr[1024];
err = dict_stats_drop_table(name, errstr,
sizeof errstr);
if (err != DB_SUCCESS) {
ib::warn() << errstr;
}
}
}
@ -4808,19 +4826,22 @@ row_mysql_init(void)
row_mysql_drop_list,
&row_mysql_drop_t::row_mysql_drop_list);
row_mysql_drop_list_inited = TRUE;
row_mysql_drop_list_inited = true;
}
/*********************************************************************//**
Close this module */
void
row_mysql_close(void)
/*================*/
void row_mysql_close()
{
ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
ut_ad(!UT_LIST_GET_LEN(row_mysql_drop_list) ||
srv_force_recovery >= SRV_FORCE_NO_BACKGROUND);
if (row_mysql_drop_list_inited)
{
row_mysql_drop_list_inited= false;
mutex_free(&row_drop_list_mutex);
if (row_mysql_drop_list_inited) {
mutex_free(&row_drop_list_mutex);
row_mysql_drop_list_inited = FALSE;
}
while (row_mysql_drop_t *drop= UT_LIST_GET_FIRST(row_mysql_drop_list))
{
UT_LIST_REMOVE(row_mysql_drop_list, drop);
ut_free(drop);
}
}
}

View file

@ -1004,7 +1004,7 @@ try_again:
dict_table_close(node->table, FALSE, FALSE);
rw_lock_s_unlock(&dict_operation_lock);
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return(false);
}
os_thread_sleep(1000000);
@ -1167,7 +1167,7 @@ row_purge(
ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_S));
if (purged
|| srv_shutdown_state != SRV_SHUTDOWN_NONE
|| srv_shutdown_state > SRV_SHUTDOWN_INITIATED
|| node->vcol_op_failed()) {
return;
}

View file

@ -1802,7 +1802,7 @@ loop:
srv_refresh_innodb_monitor_stats();
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
goto exit_func;
}
@ -1914,7 +1914,7 @@ loop:
os_event_wait_time_low(srv_error_event, 1000000, sig_count);
if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
goto loop;
}
@ -1964,7 +1964,7 @@ srv_get_active_thread_type(void)
srv_sys_mutex_exit();
if (ret == SRV_NONE && srv_shutdown_state != SRV_SHUTDOWN_NONE
if (ret == SRV_NONE && srv_shutdown_state > SRV_SHUTDOWN_INITIATED
&& purge_sys != NULL) {
/* Check only on shutdown. */
switch (trx_purge_state()) {
@ -2219,7 +2219,7 @@ srv_master_do_active_tasks(void)
ut_d(srv_master_do_disabled_loop());
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2244,7 +2244,7 @@ srv_master_do_active_tasks(void)
/* Now see if various tasks that are performed at defined
intervals need to be performed. */
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2269,7 +2269,7 @@ srv_master_do_active_tasks(void)
early and often to avoid those situations. */
DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2312,7 +2312,7 @@ srv_master_do_idle_tasks(void)
ut_d(srv_master_do_disabled_loop());
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2328,7 +2328,7 @@ srv_master_do_idle_tasks(void)
MONITOR_INC_TIME_IN_MICRO_SECS(
MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2356,7 +2356,7 @@ srv_master_do_idle_tasks(void)
early and often to avoid those situations. */
DBUG_EXECUTE_IF("ib_log_checkpoint_avoid", return;);
if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
if (srv_shutdown_state > SRV_SHUTDOWN_INITIATED) {
return;
}
@ -2454,8 +2454,7 @@ DECLARE_THREAD(srv_master_thread)(
ut_a(slot == srv_sys.sys_threads);
loop:
while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
while (srv_shutdown_state <= SRV_SHUTDOWN_INITIATED) {
srv_master_sleep();
MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
@ -2470,6 +2469,7 @@ loop:
switch (srv_shutdown_state) {
case SRV_SHUTDOWN_NONE:
case SRV_SHUTDOWN_INITIATED:
break;
case SRV_SHUTDOWN_FLUSH_PHASE:
case SRV_SHUTDOWN_LAST_PHASE:
@ -2508,8 +2508,7 @@ static
bool
srv_purge_should_exit(ulint n_purged)
{
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_NONE
|| srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
ut_ad(srv_shutdown_state <= SRV_SHUTDOWN_CLEANUP);
if (srv_undo_sources) {
return(false);

View file

@ -2757,6 +2757,7 @@ srv_shutdown_bg_undo_sources()
{
if (srv_undo_sources) {
ut_ad(!srv_read_only_mode);
srv_shutdown_state = SRV_SHUTDOWN_INITIATED;
fts_optimize_shutdown();
dict_stats_shutdown();
while (row_get_background_drop_list_len_low()) {