From 7078203389b04e742de660d78c36034a3a4deb59 Mon Sep 17 00:00:00 2001 From: Sergey Vojtovich <svoj@mariadb.org> Date: Wed, 27 Dec 2017 20:07:20 +0400 Subject: [PATCH] MDEV-14756 - Remove trx_sys_t::rw_trx_list Use atomic operations when accessing trx_sys_t::max_trx_id. We can't yet move trx_sys_t::get_new_trx_id() out of mutex because it must be updated atomically along with trx_sys_t::rw_trx_ids. --- storage/innobase/handler/ha_innodb.cc | 4 +- storage/innobase/include/lock0lock.h | 7 +- storage/innobase/include/trx0sys.h | 110 ++++++++++++++++++-------- storage/innobase/include/trx0sys.ic | 61 -------------- storage/innobase/lock/lock0lock.cc | 20 ++--- storage/innobase/page/page0page.cc | 2 +- storage/innobase/read/read0read.cc | 4 +- storage/innobase/row/row0vers.cc | 9 +-- storage/innobase/trx/trx0sys.cc | 50 +++++------- storage/innobase/trx/trx0trx.cc | 12 +-- 10 files changed, 119 insertions(+), 160 deletions(-) diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 2236abf593c..a719c1e6208 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -3645,7 +3645,7 @@ static ulonglong innodb_prepare_commit_versioned(THD* thd, ulonglong *trx_id) DBUG_ASSERT(trx->rsegs.m_redo.rseg); mutex_enter(&trx_sys->mutex); - trx_id_t commit_id = trx_sys_get_new_trx_id(); + trx_id_t commit_id = trx_sys->get_new_trx_id(); mutex_exit(&trx_sys->mutex); return commit_id; @@ -19907,7 +19907,7 @@ wsrep_fake_trx_id( THD *thd) /*!< in: user thread handle */ { mutex_enter(&trx_sys->mutex); - trx_id_t trx_id = trx_sys_get_new_trx_id(); + trx_id_t trx_id = trx_sys->get_new_trx_id(); mutex_exit(&trx_sys->mutex); WSREP_DEBUG("innodb fake trx id: " TRX_ID_FMT " thd: %s", trx_id, wsrep_thd_query(thd)); diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 06600960c27..1ff9e8ad570 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -609,7 +609,7 @@ lock_report_trx_id_insanity( const rec_t* rec, /*!< in: user record */ dict_index_t* index, /*!< in: index */ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - trx_id_t max_trx_id); /*!< in: trx_sys_get_max_trx_id() */ + trx_id_t max_trx_id); /*!< in: trx_sys->get_max_trx_id() */ /*********************************************************************//** Prints info of locks for all transactions. @return FALSE if not able to obtain lock mutex and exits without @@ -827,7 +827,6 @@ Set the lock system timeout event. */ void lock_set_timeout_event(); /*====================*/ -#ifdef UNIV_DEBUG /*********************************************************************//** Checks that a transaction id is sensible, i.e., not in the future. @return true if ok */ @@ -837,8 +836,8 @@ lock_check_trx_id_sanity( trx_id_t trx_id, /*!< in: trx id */ const rec_t* rec, /*!< in: user record */ dict_index_t* index, /*!< in: index */ - const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */ - MY_ATTRIBUTE((warn_unused_result)); + const ulint* offsets); /*!< in: rec_get_offsets(rec, index) */ +#ifdef UNIV_DEBUG /*******************************************************************//** Check if the transaction holds any locks on the sys tables or its records. diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index b3a009771e8..a3d6129d094 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -132,21 +132,6 @@ trx_sysf_rseg_set_page_no( ulint page_no, /*!< in: page number, FIL_NULL if the slot is reset to unused */ mtr_t* mtr); /*!< in: mtr */ -/*****************************************************************//** -Allocates a new transaction id. -@return new, allocated trx id */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_id(); -/*===================*/ -/*****************************************************************//** -Determines the maximum transaction id. -@return maximum currently allocated trx id; will be stale after the -next call to trx_sys_get_new_trx_id() */ -UNIV_INLINE -trx_id_t -trx_sys_get_max_trx_id(void); -/*========================*/ #ifdef UNIV_DEBUG /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */ @@ -419,6 +404,11 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */ /** Size of the doublewrite block in pages */ #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE + +/** When a trx id which is zero modulo this number (which must be a power of +two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system +page is updated */ +#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256) /* @} */ trx_t* current_trx(); @@ -847,20 +837,24 @@ public: /** The transaction system central memory data structure. */ struct trx_sys_t { +private: + /** + The smallest number not yet assigned as a transaction id or transaction + number. Accessed and updated with atomic operations. + */ + char pad0[CACHE_LINE_SIZE]; + trx_id_t m_max_trx_id; + char pad1[CACHE_LINE_SIZE]; + + +public: TrxSysMutex mutex; /*!< mutex protecting most fields in this structure except when noted otherwise */ MVCC* mvcc; /*!< Multi version concurrency control manager */ - volatile trx_id_t - max_trx_id; /*!< The smallest number not yet - assigned as a transaction id or - transaction number. This is declared - volatile because it can be accessed - without holding any mutex during - AC-NL-RO view creation. */ trx_ut_list_t serialisation_list; /*!< Ordered on trx_t::no of all the currenrtly active RW transactions */ @@ -870,7 +864,7 @@ struct trx_sys_t { #endif /* UNIV_DEBUG */ /** Avoid false sharing */ - const char pad2[CACHE_LINE_SIZE]; + char pad2[CACHE_LINE_SIZE]; trx_ut_list_t mysql_trx_list; /*!< List of transactions created for MySQL. All user transactions are on mysql_trx_list. The rw_trx_hash @@ -891,11 +885,11 @@ struct trx_sys_t { consistent snapshot. */ /** Avoid false sharing */ - const char pad3[CACHE_LINE_SIZE]; + char pad3[CACHE_LINE_SIZE]; /** Temporary rollback segments */ trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS]; /** Avoid false sharing */ - const char pad4[CACHE_LINE_SIZE]; + char pad4[CACHE_LINE_SIZE]; trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS]; /*!< Pointer array to rollback @@ -910,7 +904,7 @@ struct trx_sys_t { transactions), protected by rseg->mutex */ - const char rw_trx_hash_pre_pad[CACHE_LINE_SIZE]; + char rw_trx_hash_pre_pad[CACHE_LINE_SIZE]; /** @@ -919,7 +913,7 @@ struct trx_sys_t { */ rw_trx_hash_t rw_trx_hash; - const char rw_trx_hash_post_pad[CACHE_LINE_SIZE]; + char rw_trx_hash_post_pad[CACHE_LINE_SIZE]; ulint n_prepared_trx; /*!< Number of transactions currently in the XA PREPARED state */ @@ -940,18 +934,64 @@ struct trx_sys_t { must look at the trx->state to find out if the minimum trx id transaction itself is active, or already committed.) - @return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty + @return the minimum trx id, or m_max_trx_id if the trx list is empty */ trx_id_t get_min_trx_id() { - trx_id_t id= trx_sys_get_max_trx_id(); + trx_id_t id= get_max_trx_id(); rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action> (get_min_trx_id_callback), &id); return id; } + /** + Determines the maximum transaction id. + + @return maximum currently allocated trx id; will be stale after the + next call to trx_sys->get_new_trx_id() + */ + + trx_id_t get_max_trx_id(void) + { + return static_cast<trx_id_t> + (my_atomic_load64_explicit(reinterpret_cast<int64*>(&m_max_trx_id), + MY_MEMORY_ORDER_RELAXED)); + } + + + /** + Allocates a new transaction id. + + VERY important: after the database is started, m_max_trx_id value is + divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if + will evaluate to TRUE when this function is first time called, + and the value for trx id will be written to disk-based header! + Thus trx id values will not overlap when the database is + repeatedly started! + + @return new, allocated trx id + */ + + trx_id_t get_new_trx_id() + { + ut_ad(mutex_own(&trx_sys->mutex)); + trx_id_t id= static_cast<trx_id_t>(my_atomic_add64_explicit( + reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED)); + + if (UNIV_UNLIKELY(!(id % TRX_SYS_TRX_ID_WRITE_MARGIN))) + flush_max_trx_id(); + return(id); + } + + + void init_max_trx_id(trx_id_t value) + { + m_max_trx_id= value; + } + + private: static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element, trx_id_t *id) @@ -966,12 +1006,14 @@ private: } return 0; } -}; -/** When a trx id which is zero modulo this number (which must be a power of -two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system -page is updated */ -#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256) + + /** + Writes the value of m_max_trx_id to the file based trx system header. + */ + + void flush_max_trx_id(); +}; /** Test if trx_sys->mutex is owned. */ #define trx_sys_mutex_own() (trx_sys->mutex.is_owned()) diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic index 497f87e4e2c..7fc810526e2 100644 --- a/storage/innobase/include/trx0sys.ic +++ b/storage/innobase/include/trx0sys.ic @@ -45,12 +45,6 @@ typedef byte trx_sysf_rseg_t; /* Size of a rollback segment specification slot */ #define TRX_SYS_RSEG_SLOT_SIZE 8 -/*****************************************************************//** -Writes the value of max_trx_id to the file based trx system header. */ -void -trx_sys_flush_max_trx_id(void); -/*==========================*/ - /** Checks if a page address is the trx sys header page. @param[in] page_id page id @return true if trx sys header page */ @@ -191,58 +185,3 @@ trx_write_trx_id( ut_ad(id > 0); mach_write_to_6(ptr, id); } - -/*****************************************************************//** -Allocates a new transaction id. -@return new, allocated trx id */ -UNIV_INLINE -trx_id_t -trx_sys_get_new_trx_id() -/*====================*/ -{ - /* wsrep_fake_trx_id violates this assert */ - ut_ad(trx_sys_mutex_own()); - - /* VERY important: after the database is started, max_trx_id value is - divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if - will evaluate to TRUE when this function is first time called, - and the value for trx id will be written to disk-based header! - Thus trx id values will not overlap when the database is - repeatedly started! */ - - if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) { - - trx_sys_flush_max_trx_id(); - } - - return(trx_sys->max_trx_id++); -} - -/*****************************************************************//** -Determines the maximum transaction id. -@return maximum currently allocated trx id; will be stale after the -next call to trx_sys_get_new_trx_id() */ -UNIV_INLINE -trx_id_t -trx_sys_get_max_trx_id(void) -/*========================*/ -{ - ut_ad(!trx_sys_mutex_own()); - -#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN - /* Avoid torn reads. */ - - trx_sys_mutex_enter(); - - trx_id_t max_trx_id = trx_sys->max_trx_id; - - trx_sys_mutex_exit(); - - return(max_trx_id); -#else - /* Perform a dirty read. Callers should be prepared for stale - values, and we know that the value fits in a machine word, so - that it will be read and written atomically. */ - return(trx_sys->max_trx_id); -#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */ -} diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 5758780302a..715b1c9daa6 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -354,7 +354,7 @@ lock_report_trx_id_insanity( const rec_t* rec, /*!< in: user record */ dict_index_t* index, /*!< in: index */ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */ + trx_id_t max_trx_id) /*!< in: trx_sys->get_max_trx_id() */ { ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_is_default_row(rec, index)); @@ -371,11 +371,6 @@ lock_report_trx_id_insanity( /*********************************************************************//** Checks that a transaction id is sensible, i.e., not in the future. @return true if ok */ -#ifdef UNIV_DEBUG - -#else -static MY_ATTRIBUTE((warn_unused_result)) -#endif bool lock_check_trx_id_sanity( /*=====================*/ @@ -387,15 +382,14 @@ lock_check_trx_id_sanity( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(!rec_is_default_row(rec, index)); - trx_id_t max_trx_id = trx_sys_get_max_trx_id(); - bool is_ok = trx_id < max_trx_id; + trx_id_t max_trx_id = trx_sys->get_max_trx_id(); - if (!is_ok) { + if (trx_id >= max_trx_id) { lock_report_trx_id_insanity( trx_id, rec, index, offsets, max_trx_id); + return false; } - - return(is_ok); + return(true); } /*********************************************************************//** @@ -5215,7 +5209,7 @@ lock_release( { lock_t* lock; ulint count = 0; - trx_id_t max_trx_id = trx_sys_get_max_trx_id(); + trx_id_t max_trx_id = trx_sys->get_max_trx_id(); ut_ad(lock_mutex_own()); ut_ad(!trx_mutex_own(trx)); @@ -5639,7 +5633,7 @@ lock_print_info_summary( "------------\n", file); fprintf(file, "Trx id counter " TRX_ID_FMT "\n", - trx_sys_get_max_trx_id()); + trx_sys->get_max_trx_id()); fprintf(file, "Purge done for trx's n:o < " TRX_ID_FMT diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 624e31685fe..7f9b7964a18 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -2432,7 +2432,7 @@ page_validate( && page_is_leaf(page) && !page_is_empty(page)) { trx_id_t max_trx_id = page_get_max_trx_id(page); - trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id(); + trx_id_t sys_max_trx_id = trx_sys->get_max_trx_id(); if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) { ib::error() << "PAGE_MAX_TRX_ID out of bounds: " diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc index 8f4dd4f37b2..e95c89449e6 100644 --- a/storage/innobase/read/read0read.cc +++ b/storage/innobase/read/read0read.cc @@ -459,7 +459,7 @@ ReadView::prepare(trx_id_t id) m_creator_trx_id = id; - m_low_limit_no = m_low_limit_id = trx_sys->max_trx_id; + m_low_limit_no = m_low_limit_id = trx_sys->get_max_trx_id(); if (!trx_sys->rw_trx_ids.empty()) { copy_trx_ids(trx_sys->rw_trx_ids); @@ -580,7 +580,7 @@ MVCC::view_open(ReadView*& view, trx_t* trx) view->m_closed = false; - if (view->m_low_limit_id == trx_sys_get_max_trx_id()) { + if (view->m_low_limit_id == trx_sys->get_max_trx_id()) { return; } else { view->m_closed = true; diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index fc8b1b2ec81..6c34464b2f8 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -126,14 +126,7 @@ row_vers_impl_x_locked_low( if (trx == 0) { /* The transaction that modified or inserted clust_rec is no longer active, or it is corrupt: no implicit lock on rec */ - trx_sys_mutex_enter(); - bool corrupt = trx_id >= trx_sys->max_trx_id; - trx_sys_mutex_exit(); - if (corrupt) { - lock_report_trx_id_insanity( - trx_id, clust_rec, clust_index, clust_offsets, - trx_sys_get_max_trx_id()); - } + lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, clust_offsets); mem_heap_free(heap); DBUG_RETURN(0); } diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index e03fc915573..bf74ed05201 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -58,7 +58,7 @@ ReadView::check_trx_id_sanity( trx_id_t id, const table_name_t& name) { - if (id >= trx_sys->max_trx_id) { + if (id >= trx_sys->get_max_trx_id()) { ib::warn() << "A transaction id" << " in a record of table " @@ -89,33 +89,25 @@ ReadView::check_trx_id_sanity( uint trx_rseg_n_slots_debug = 0; #endif -/*****************************************************************//** -Writes the value of max_trx_id to the file based trx system header. */ -void -trx_sys_flush_max_trx_id(void) -/*==========================*/ + +/** + Writes the value of m_max_trx_id to the file based trx system header. +*/ + +void trx_sys_t::flush_max_trx_id() { - mtr_t mtr; - trx_sysf_t* sys_header; - - /* wsrep_fake_trx_id violates this assert - Copied from trx_sys_get_new_trx_id - */ - ut_ad(trx_sys_mutex_own()); - - if (!srv_read_only_mode) { - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - mlog_write_ull( - sys_header + TRX_SYS_TRX_ID_STORE, - trx_sys->max_trx_id, &mtr); - - mtr_commit(&mtr); - } + ut_ad(trx_sys->mutex.is_owned()); + if (!srv_read_only_mode) + { + mtr_t mtr; + mtr.start(); + mlog_write_ull(trx_sysf_get(&mtr) + TRX_SYS_TRX_ID_STORE, + trx_sys->get_max_trx_id(), &mtr); + mtr.commit(); + } } + /*****************************************************************//** Updates the offset information about the end of the MySQL binlog entry which corresponds to the transaction just being committed. In a MySQL @@ -432,7 +424,7 @@ trx_sys_init_at_db_start() /* VERY important: after the database is started, max_trx_id value is divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in - trx_sys_get_new_trx_id will evaluate to TRUE when the function + trx_sys->get_new_trx_id will evaluate to TRUE when the function is first time called, and the value for trx id will be written to the disk-based header! Thus trx id values will not overlap when the database is repeatedly started! */ @@ -442,13 +434,13 @@ trx_sys_init_at_db_start() sys_header = trx_sysf_get(&mtr); - trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN + trx_sys->init_max_trx_id(2 * TRX_SYS_TRX_ID_WRITE_MARGIN + ut_uint64_align_up(mach_read_from_8(sys_header + TRX_SYS_TRX_ID_STORE), - TRX_SYS_TRX_ID_WRITE_MARGIN); + TRX_SYS_TRX_ID_WRITE_MARGIN)); mtr.commit(); - ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id); + ut_d(trx_sys->rw_max_trx_id = trx_sys->get_max_trx_id()); trx_dummy_sess = sess_open(); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 9f0767657f7..f1322a9f3a7 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -969,7 +969,7 @@ trx_lists_init_at_db_start() " cleaned up in total " << rows_to_undo << " row operations to undo"; - ib::info() << "Trx id counter is " << trx_sys->max_trx_id; + ib::info() << "Trx id counter is " << trx_sys->get_max_trx_id(); } std::sort(trx_sys->rw_trx_ids.begin(), trx_sys->rw_trx_ids.end()); @@ -1089,7 +1089,7 @@ trx_t::assign_temp_rseg() if (id == 0) { mutex_enter(&trx_sys->mutex); - id = trx_sys_get_new_trx_id(); + id = trx_sys->get_new_trx_id(); trx_sys->rw_trx_ids.push_back(id); mutex_exit(&trx_sys->mutex); trx_sys->rw_trx_hash.insert(this); @@ -1180,7 +1180,7 @@ trx_start_low( trx_sys_mutex_enter(); - trx->id = trx_sys_get_new_trx_id(); + trx->id = trx_sys->get_new_trx_id(); trx_sys->rw_trx_ids.push_back(trx->id); @@ -1212,7 +1212,7 @@ trx_start_low( ut_ad(!srv_read_only_mode); - trx->id = trx_sys_get_new_trx_id(); + trx->id = trx_sys->get_new_trx_id(); trx_sys->rw_trx_ids.push_back(trx->id); @@ -1249,7 +1249,7 @@ trx_serialise(trx_t* trx, trx_rseg_t* rseg) trx_sys_mutex_enter(); - trx->no = trx_sys_get_new_trx_id(); + trx->no = trx_sys->get_new_trx_id(); /* Track the minimum serialisation number. */ UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx); @@ -2768,7 +2768,7 @@ trx_set_rw_mode( ut_ad(trx->rsegs.m_redo.rseg != 0); mutex_enter(&trx_sys->mutex); - trx->id = trx_sys_get_new_trx_id(); + trx->id = trx_sys->get_new_trx_id(); trx_sys->rw_trx_ids.push_back(trx->id);