From 7624bf868ed0d08276cb27c5cdbd007faf4de3c4 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Sat, 6 Aug 2022 01:16:21 +0530 Subject: [PATCH] MDEV-29250 InnoDB: Failing assertion: table->get_ref_count() == 0 Reason: ====== This issue is caused by race condition between fulltext DDL and purge thread. DDL sets the signal to stop the purge thread to process the new undo log records and wait for the ongoing processed FTS table undo log records to finish. But in dict_acquire_mdl_shared(),InnoDB release all innodb table related locks before acquiring the mdl. At the same time, DDL assumes that there are no purge threads working on fts table. There is a possiblity that purge thread can skip processing the valid undo log records if it checks purge_sys.must_wait_FTS() twice in different places. Solution: ========== Add the purge_sys.must_wait_FTS() check in dict_acquire_mdl_shared() to avoid the purge thread processing undo log records. dict_open_table_on_id(): return -1 if the purge thread has to wait dict_acquire_mdl_shared(): Added 1 new parameters to indicate that purge thread invoking the function, return -1 if the purge thread has to wait. --- storage/innobase/dict/dict0dict.cc | 31 +++++++++++++++++++++------- storage/innobase/include/dict0dict.h | 2 +- storage/innobase/row/row0purge.cc | 3 ++- 3 files changed, 26 insertions(+), 10 deletions(-) diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 52a7d5a4b36..ffb76de4f7b 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -662,7 +662,7 @@ dict_table_t::parse_name<>(char(&)[NAME_LEN + 1], char(&)[NAME_LEN + 1], @param[in] table_op operation to perform when opening @return table object after locking MDL shared @retval nullptr if the table is not readable, or if trylock && MDL blocked */ -template +template dict_table_t* dict_acquire_mdl_shared(dict_table_t *table, THD *thd, @@ -674,9 +674,11 @@ dict_acquire_mdl_shared(dict_table_t *table, MDL_context *mdl_context= static_cast(thd_mdl_context(thd)); size_t db_len; + dict_table_t *not_found= nullptr; if (trylock) { + static_assert(!trylock || !purge_thd, "usage"); dict_sys.freeze(SRW_LOCK_CALL); db_len= dict_get_db_name_len(table->name.m_name); dict_sys.unfreeze(); @@ -748,7 +750,13 @@ retry: } } +retry_table_open: dict_sys.freeze(SRW_LOCK_CALL); + if (purge_thd && purge_sys.must_wait_FTS()) + { + not_found= reinterpret_cast(-1); + goto return_without_mdl; + } table= dict_sys.find_table(table_id); if (table) table->acquire(); @@ -756,6 +764,11 @@ retry: { dict_sys.unfreeze(); dict_sys.lock(SRW_LOCK_CALL); + if (purge_thd && purge_sys.must_wait_FTS()) + { + dict_sys.unlock(); + goto retry_table_open; + } table= dict_load_table_on_id(table_id, table_op == DICT_TABLE_OP_LOAD_TABLESPACE ? DICT_ERR_IGNORE_RECOVER_LOCK @@ -777,7 +790,7 @@ return_without_mdl: mdl_context->release_lock(*mdl); *mdl= nullptr; } - return nullptr; + return not_found; } size_t db1_len, tbl1_len; @@ -814,9 +827,9 @@ return_without_mdl: goto retry; } -template dict_table_t* dict_acquire_mdl_shared +template dict_table_t* dict_acquire_mdl_shared (dict_table_t*,THD*,MDL_ticket**,dict_table_op_t); -template dict_table_t* dict_acquire_mdl_shared +template dict_table_t* dict_acquire_mdl_shared (dict_table_t*,THD*,MDL_ticket**,dict_table_op_t); /** Look up a table by numeric identifier. @@ -842,13 +855,14 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked, { if (purge_thd && purge_sys.must_wait_FTS()) { - table= nullptr; + table= reinterpret_cast(-1); goto func_exit; } table->acquire(); if (thd && !dict_locked) - table= dict_acquire_mdl_shared(table, thd, mdl, table_op); + table= dict_acquire_mdl_shared( + table, thd, mdl, table_op); } else if (table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED) { @@ -866,7 +880,7 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked, if (purge_thd && purge_sys.must_wait_FTS()) { dict_sys.unlock(); - return nullptr; + return reinterpret_cast(-1); } table->acquire(); } @@ -876,7 +890,8 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked, if (table && thd) { dict_sys.freeze(SRW_LOCK_CALL); - table= dict_acquire_mdl_shared(table, thd, mdl, table_op); + table= dict_acquire_mdl_shared( + table, thd, mdl, table_op); dict_sys.unfreeze(); } return table; diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 29673f5bc95..cfaf4fab83e 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -132,7 +132,7 @@ enum dict_table_op_t { @param[in] table_op operation to perform when opening @return table object after locking MDL shared @retval NULL if the table is not readable, or if trylock && MDL blocked */ -template +template dict_table_t* dict_acquire_mdl_shared(dict_table_t *table, THD *thd, diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index 47625b91f35..8bbb0a36144 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -941,7 +941,8 @@ try_again: table_id, false, DICT_TABLE_OP_NORMAL, node->purge_thd, &node->mdl_ticket); - if (!node->table && purge_sys.must_wait_FTS()) { + if (node->table == reinterpret_cast(-1)) { + /* purge stop signal */ goto try_again; }