From 68cac26108cb6351e1793954864f2a88aa3e9d98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Fri, 10 Jan 2025 16:40:29 +0200 Subject: [PATCH] MDEV-35049: Fix bogus BTR_CUR_HASH_FAIL on PAGE_CUR_LE btr_cur_t::search_leaf(): Do not attempt to use the adaptive hash index for PAGE_CUR_G or PAGE_CUR_L, because those modes expect an inequal result, and the adaptive hash index can only deliver equal results. btr_cur_t::check_mismatch(): Only handle PAGE_CUR_LE and PAGE_CUR_GE. For PAGE_CUR_LE (bool ge=false), qualify a full match for the last record of a page that is not at the end of the index. Previously, an adaptive hash index lookup would fail when the record is at the end of an index page but not at the end of the index. This would lead to unnecessary rebuild of the adaptive hash index in read-only workloads. Reviewed by: Vladislav Lesin --- storage/innobase/btr/btr0cur.cc | 4 +- storage/innobase/btr/btr0sea.cc | 6 +- storage/innobase/include/btr0cur.h | 5 +- storage/innobase/include/btr0sea.h | 4 +- storage/innobase/page/page0cur.cc | 156 +++++++++++++--------- storage/innobase/unittest/innodb_ahi-t.cc | 2 +- 6 files changed, 102 insertions(+), 75 deletions(-) diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 692f4b9d2d3..987ed658315 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1130,10 +1130,12 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, # endif if (latch_mode > BTR_MODIFY_LEAF) /* The adaptive hash index cannot be useful for these searches. */; + else if (mode != PAGE_CUR_LE && mode != PAGE_CUR_GE) + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_G); /* We do a dirty read of btr_search.enabled below, and btr_search_guess_on_hash() will have to check it again. */ else if (!btr_search.enabled); - else if (btr_search_guess_on_hash(index(), tuple, mode, + else if (btr_search_guess_on_hash(index(), tuple, mode != PAGE_CUR_LE, latch_mode, this, mtr)) { /* Search using the hash index succeeded */ diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index 21a7d19454b..f572bdffd99 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1028,7 +1028,7 @@ and the function returns TRUE, then cursor->up_match and cursor->low_match both have sensible values. @param[in,out] index index @param[in] tuple logical record -@param[in] mode PAGE_CUR_L, .... +@param[in] ge false=PAGE_CUR_LE, true=PAGE_CUR_GE @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[out] cursor tree cursor @param[in] mtr mini-transaction @@ -1038,7 +1038,7 @@ bool btr_search_guess_on_hash( dict_index_t* index, const dtuple_t* tuple, - page_cur_mode_t mode, + bool ge, btr_latch_mode latch_mode, btr_cur_t* cursor, mtr_t* mtr) noexcept @@ -1174,7 +1174,7 @@ btr_search_guess_on_hash( /* Check the validity of the guess within the page */ if (index_id != btr_page_get_index_id(block->page.frame) || - cursor->check_mismatch(*tuple, mode, comp)) + cursor->check_mismatch(*tuple, ge, comp)) goto mismatch; uint8_t n_hash_potential= index->search_info.n_hash_potential; diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index bb911674ffd..4d1a4059cad 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -753,13 +753,12 @@ struct btr_cur_t { /** Check if a guessed position for a tree cursor is correct. @param tuple search key - @param mode PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, PAGE_CUR_GE + @param ge false=PAGE_CUR_LE, true=PAGE_CUR_GE @param comp nonzero if ROW_FORMAT=REDUNDANT is not being used @retval true on mismatch or corruption @retval false on a match; if mode=PAGE_CUR_LE, then up_match,low_match will be set correctly. */ - bool check_mismatch(const dtuple_t& tuple, page_cur_mode_t mode, ulint comp) - noexcept; + bool check_mismatch(const dtuple_t &tuple, bool ge, ulint comp) noexcept; #endif }; diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index aa8a8e50fe4..e975a53ae8c 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -43,7 +43,7 @@ and the function returns TRUE, then cursor->up_match and cursor->low_match both have sensible values. @param[in,out] index index @param[in] tuple logical record -@param[in] mode PAGE_CUR_L, .... +@param[in] ge false=PAGE_CUR_LE, true=PAGE_CUR_GE @param[in] latch_mode BTR_SEARCH_LEAF, ... @param[out] cursor tree cursor @param[in] mtr mini-transaction @@ -52,7 +52,7 @@ bool btr_search_guess_on_hash( dict_index_t* index, const dtuple_t* tuple, - page_cur_mode_t mode, + bool ge, btr_latch_mode latch_mode, btr_cur_t* cursor, mtr_t* mtr) noexcept; diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index c337cdd772b..a67e9875e43 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -32,6 +32,9 @@ Created 10/4/1994 Heikki Tuuri #include "log0recv.h" #include "rem0cmp.h" #include "gis0rtree.h" +#ifdef UNIV_DEBUG +# include "trx0roll.h" +#endif #ifdef BTR_CUR_HASH_ADAPT /** Get the pad character code point for a type. @@ -516,24 +519,63 @@ bool page_cur_search_with_match_bytes(const dtuple_t &tuple, return false; } +/** Compare a data tuple to a physical record. +@param page B-tree index page +@param rec B-tree index record +@param index index B-tree +@param tuple search key +@param match matched fields << 16 | bytes +@param comp nonzero if ROW_FORMAT=REDUNDANT is not being used +@return the comparison result of dtuple and rec +@retval 0 if dtuple is equal to rec +@retval negative if dtuple is less than rec +@retval positive if dtuple is greater than rec */ static int cmp_dtuple_rec_leaf(const dtuple_t &dtuple, const rec_t *rec, const dict_index_t &index, uint16_t *matched_fields, ulint comp) noexcept { ut_ad(dtuple_check_typed(&dtuple)); - ut_ad(page_rec_is_leaf(rec)); ut_ad(!!comp == index.table->not_redundant()); ulint cur_field= *matched_fields; ut_ad(dtuple.n_fields_cmp > 0); ut_ad(dtuple.n_fields_cmp <= index.n_core_fields); ut_ad(cur_field <= dtuple.n_fields_cmp); + ut_ad(page_rec_is_leaf(rec)); + ut_ad(!(rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG) || + index.is_instant() || + (index.is_primary() && trx_roll_crash_recv_trx && + !trx_rollback_is_active)); + ut_ad(!(dtuple.info_bits & REC_INFO_MIN_REC_FLAG) || + index.is_instant() || + (index.is_primary() && trx_roll_crash_recv_trx && + !trx_rollback_is_active)); int ret= 0; + if (dtuple.info_bits & REC_INFO_MIN_REC_FLAG) + { + *matched_fields= 0; + return -!(rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG); + } + else if (rec_get_info_bits(rec, comp) & REC_INFO_MIN_REC_FLAG) + { + *matched_fields= 0; + return 1; + } + if (UNIV_LIKELY(comp != 0)) { - const unsigned n_core_null_bytes= index.n_core_null_bytes; const byte *nulls= rec - REC_N_NEW_EXTRA_BYTES; - const byte *lens= --nulls - n_core_null_bytes; + const byte *lens; + if (rec_get_status(rec) == REC_STATUS_INSTANT) + { + ulint n_fields= index.n_core_fields + rec_get_n_add_field(nulls) + 1; + ut_ad(n_fields <= index.n_fields); + const ulint n_nullable= index.get_n_nullable(n_fields); + ut_ad(n_nullable <= index.n_nullable); + lens= --nulls - UT_BITS_IN_BYTES(n_nullable); + } + else + lens= --nulls - index.n_core_null_bytes; byte null_mask= 1; size_t i= 0; @@ -616,70 +658,39 @@ static int cmp_dtuple_rec_leaf(const dtuple_t &dtuple, const rec_t *rec, return ret; } -bool btr_cur_t::check_mismatch(const dtuple_t& tuple, page_cur_mode_t mode, - ulint comp) noexcept +bool btr_cur_t::check_mismatch(const dtuple_t &tuple, bool ge, ulint comp) + noexcept { + ut_ad(page_is_leaf(page_cur.block->page.frame)); + ut_ad(page_rec_is_user_rec(page_cur.rec)); + const rec_t *rec= page_cur.rec; uint16_t match= 0; int cmp= cmp_dtuple_rec_leaf(tuple, rec, *index(), &match, comp); + const auto uniq= dict_index_get_n_unique_in_tree(index()); + ut_ad(match <= uniq); + ut_ad(match <= tuple.n_fields_cmp); + ut_ad(match < uniq || !cmp); - switch (mode) { - const rec_t *other; - const page_t *page; - case PAGE_CUR_GE: - if (cmp > 0) - return true; - up_match= match; - if (match >= dict_index_get_n_unique_in_tree(index())) - return false; - goto check_ge; - case PAGE_CUR_G: - if (cmp >= 0) - return true; - check_ge: - match= 0; - if (!(other= page_rec_get_prev_const(rec))) - return true; - page= page_cur.block->page.frame; - if (uintptr_t(other - page) == - (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)) - return page_has_prev(page); - if (UNIV_LIKELY(comp != 0)) - switch (rec_get_status(other)) { - case REC_STATUS_INSTANT: - case REC_STATUS_ORDINARY: - break; - default: - return true; - } - cmp= cmp_dtuple_rec_leaf(tuple, other, *index(), &match, comp); - return (mode == PAGE_CUR_GE) ? cmp <= 0 : cmp < 0; - case PAGE_CUR_LE: + const page_t *const page= page_cur.block->page.frame; + + if (UNIV_LIKELY(!ge)) + { if (cmp < 0) return true; low_match= match; - goto check_le; - case PAGE_CUR_L: - if (cmp <= 0) - return true; - check_le: - match= 0; - ut_ad(!page_rec_is_supremum(rec)); - page= page_cur.block->page.frame; + up_match= 0; if (UNIV_LIKELY(comp != 0)) { - other= page_rec_next_get(page, rec); - if (!other) + rec= page_rec_next_get(page, rec); + if (!rec) return true; - if (other - page == PAGE_NEW_SUPREMUM) - { + if (uintptr_t(rec - page) == PAGE_NEW_SUPREMUM) le_supremum: - if (page_has_next(page)) - return true; - up_match= 0; - return false; - } - switch (rec_get_status(other)) { + /* If we matched the full key at the end of a page (but not the index), + the adaptive hash index was successful. */ + return page_has_next(page) && match < uniq; + switch (rec_get_status(rec)) { case REC_STATUS_INSTANT: case REC_STATUS_ORDINARY: break; @@ -689,20 +700,35 @@ bool btr_cur_t::check_mismatch(const dtuple_t& tuple, page_cur_mode_t mode, } else { - other= page_rec_next_get(page, rec); - if (!other) + rec= page_rec_next_get(page, rec); + if (!rec) return true; - if (other - page == PAGE_OLD_SUPREMUM) + if (uintptr_t(rec - page) == PAGE_OLD_SUPREMUM) goto le_supremum; } - cmp= cmp_dtuple_rec_leaf(tuple, other, *index(), &match, comp); - if (mode != PAGE_CUR_LE) - return cmp > 0; + return cmp_dtuple_rec_leaf(tuple, rec, *index(), &up_match, comp) >= 0; + } + else + { + if (cmp > 0) + return true; up_match= match; - return cmp >= 0; - default: - ut_ad("invalid mode" == 0); - return true; + if (match >= uniq) + return false; + match= 0; + if (!(rec= page_rec_get_prev_const(rec))) + return true; + if (uintptr_t(rec - page) == (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)) + return page_has_prev(page); + if (UNIV_LIKELY(comp != 0)) + switch (rec_get_status(rec)) { + case REC_STATUS_INSTANT: + case REC_STATUS_ORDINARY: + break; + default: + return true; + } + return cmp_dtuple_rec_leaf(tuple, rec, *index(), &match, comp) <= 0; } } diff --git a/storage/innobase/unittest/innodb_ahi-t.cc b/storage/innobase/unittest/innodb_ahi-t.cc index eb7b3ed0973..2ec188a1c63 100644 --- a/storage/innobase/unittest/innodb_ahi-t.cc +++ b/storage/innobase/unittest/innodb_ahi-t.cc @@ -23,7 +23,7 @@ void dict_mem_table_free(dict_table_t*) {} void dict_mem_index_free(dict_index_t*) {} buf_block_t *buf_LRU_get_free_block(buf_LRU_get) { return nullptr; } ibool dtuple_check_typed(const dtuple_t*) { return true; } -bool btr_cur_t::check_mismatch(const dtuple_t&,page_cur_mode_t,ulint) noexcept +bool btr_cur_t::check_mismatch(const dtuple_t&,bool,ulint) noexcept { return false; } buf_block_t *buf_page_get_gen(const page_id_t, ulint, rw_lock_type_t, buf_block_t*,ulint,mtr_t*,dberr_t*)