From de4030e4d49805a7ded5c0bfee01cc3fd7623522 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Tue, 24 Jan 2023 14:09:21 +0200 Subject: [PATCH] MDEV-30400 Assertion height == btr_page_get_level(...) on INSERT This also fixes part of MDEV-29835 Partial server freeze which is caused by violations of the latching order that was defined in https://dev.mysql.com/worklog/task/?id=6326 (WL#6326: InnoDB: fix index->lock contention). Unless the current thread is holding an exclusive dict_index_t::lock, it must acquire page latches in a strict parent-to-child, left-to-right order. Not all cases of MDEV-29835 are fixed yet. Failure to follow the correct latching order will cause deadlocks of threads due to lock order inversion. As part of these changes, the BTR_MODIFY_TREE mode is modified so that an Update latch (U a.k.a. SX) will be acquired on the root page, and eXclusive latches (X) will be acquired on all pages leading to the leaf page, as well as any left and right siblings of the pages along the path. The DEBUG_SYNC test innodb.innodb_wl6326 will be removed, because at the time the DEBUG_SYNC point is hit, the thread is actually holding several page latches that will be blocking a concurrent SELECT statement. We also remove double bookkeeping that was caused due to excessive information hiding in mtr_t::m_memo. We simply let mtr_t::m_memo store information of latched pages, and ensure that mtr_memo_slot_t::object is never a null pointer. The tree_blocks[] and tree_savepoints[] were redundant. buf_page_get_low(): If innodb_change_buffering_debug=1, to avoid a hang, do not try to evict blocks if we are holding a latch on a modified page. The test innodb.innodb-change-buffer-recovery will be removed, because change buffering may no longer be forced by debug injection when the change buffer comprises multiple pages. Remove a debug assertion that could fail when innodb_change_buffering_debug=1 fails to evict a page. For other cases, the assertion is redundant, because we already checked that right after the got_block: label. The test innodb.innodb-change-buffering-recovery will be removed, because due to this change, we will be unable to evict the desired page. mtr_t::lock_register(): Register a change of a page latch on an unmodified buffer-fixed block. mtr_t::x_latch_at_savepoint(), mtr_t::sx_latch_at_savepoint(): Replaced by the use of mtr_t::upgrade_buffer_fix(), which now also handles RW_S_LATCH. mtr_t::set_modified(): For temporary tables, invoke buf_page_t::set_modified() here and not in mtr_t::commit(). We will never set the MTR_MEMO_MODIFY flag on other than persistent data pages, nor set mtr_t::m_modifications when temporary data pages are modified. mtr_t::commit(): Only invoke the buf_flush_note_modification() loop if persistent data pages were modified. mtr_t::get_already_latched(): Look up a latched page in mtr_t::m_memo. This avoids many redundant entries in mtr_t::m_memo, as well as redundant calls to buf_page_get_gen() for blocks that had already been looked up in a mini-transaction. btr_get_latched_root(): Return a pointer to an already latched root page. This replaces btr_root_block_get() in cases where the mini-transaction has already latched the root page. btr_page_get_parent(): Fetch a parent page that was already latched in BTR_MODIFY_TREE, by invoking mtr_t::get_already_latched(). If needed, upgrade the root page U latch to X. This avoids bloating mtr_t::m_memo as well as performing redundant buf_pool.page_hash lookups. For non-QUICK CHECK TABLE as well as for B-tree defragmentation, we will invoke btr_cur_search_to_nth_level(). btr_cur_search_to_nth_level(): This will only be used for non-leaf (level>0) B-tree searches that were formerly named BTR_CONT_SEARCH_TREE or BTR_CONT_MODIFY_TREE. In MDEV-29835, this function could be removed altogether, or retained for the case of CHECK TABLE without QUICK. btr_cur_t::left_block: Remove. btr_pcur_move_backward_from_page() can retrieve the left sibling from the end of mtr_t::m_memo. btr_cur_t::open_leaf(): Some clean-up. btr_cur_t::search_leaf(): Replaces btr_cur_search_to_nth_level() for searches to level=0 (the leaf level). We will never release parent page latches before acquiring leaf page latches. If we need to temporarily release the level=1 page latch in the BTR_SEARCH_PREV or BTR_MODIFY_PREV latch_mode, we will reposition the cursor on the child node pointer so that we will land on the correct leaf page. btr_cur_t::pessimistic_search_leaf(): Implement new BTR_MODIFY_TREE latching logic in the case that page splits or merges will be needed. The parent pages (and their siblings) should already be latched on the first dive to the leaf and be present in mtr_t::m_memo; there should be no need for BTR_CONT_MODIFY_TREE. This pre-latching almost suffices; it must be revised in MDEV-29835 and work-arounds removed for cases where mtr_t::get_already_latched() fails to find a block. rtr_search_to_nth_level(): A SPATIAL INDEX version of btr_search_to_nth_level() that can search to any level (including the leaf level). rtr_search_leaf(), rtr_insert_leaf(): Wrappers for rtr_search_to_nth_level(). rtr_search(): Replaces rtr_pcur_open(). rtr_latch_leaves(): Replaces btr_cur_latch_leaves(). Note that unlike in the B-tree code, there is no error handling in case the sibling pages are corrupted. rtr_cur_restore_position(): Remove an unused constant parameter. btr_pcur_open_on_user_rec(): Remove the constant parameter mode=PAGE_CUR_GE. row_ins_clust_index_entry_low(): Use a new mode=BTR_MODIFY_ROOT_AND_LEAF to gain access to the root page when mode!=BTR_MODIFY_TREE, to write the PAGE_ROOT_AUTO_INC. BTR_SEARCH_TREE, BTR_CONT_SEARCH_TREE: Remove. BTR_CONT_MODIFY_TREE: Note that this is only used by rtr_search_to_nth_level(). btr_pcur_optimistic_latch_leaves(): Replaces btr_cur_optimistic_latch_leaves(). ibuf_delete_rec(): Acquire exclusive ibuf.index->lock in order to avoid a deadlock with ibuf_insert_low(BTR_MODIFY_PREV). btr_blob_log_check_t(): Acquire a U latch on the root page, so that btr_page_alloc() in btr_store_big_rec_extern_fields() will avoid a deadlock. btr_store_big_rec_extern_fields(): Assert that the root page latch is being held. Tested by: Matthias Leich Reviewed by: Vladislav Lesin --- .../r/innodb-change-buffer-recovery.result | 55 - .../suite/innodb/r/innodb_wl6326.result | 405 --- .../innodb-change-buffer-recovery-master.opt | 1 - .../t/innodb-change-buffer-recovery.test | 82 - mysql-test/suite/innodb/t/innodb_wl6326.opt | 1 - mysql-test/suite/innodb/t/innodb_wl6326.test | 519 ---- .../suite/innodb_gis/r/rtree_split.result | 12 + .../suite/innodb_gis/t/rtree_split.test | 15 + storage/innobase/btr/btr0btr.cc | 506 ++-- storage/innobase/btr/btr0cur.cc | 2452 +++++++---------- storage/innobase/btr/btr0defragment.cc | 73 +- storage/innobase/btr/btr0pcur.cc | 160 +- storage/innobase/btr/btr0sea.cc | 24 +- storage/innobase/buf/buf0buf.cc | 27 +- storage/innobase/dict/dict0crea.cc | 29 +- storage/innobase/dict/dict0dict.cc | 8 +- storage/innobase/dict/dict0load.cc | 23 +- storage/innobase/dict/dict0stats.cc | 4 +- storage/innobase/fil/fil0fil.cc | 2 +- storage/innobase/fsp/fsp0fsp.cc | 25 +- storage/innobase/gis/gis0sea.cc | 698 ++++- storage/innobase/handler/ha_innodb.cc | 3 +- storage/innobase/handler/handler0alter.cc | 9 +- storage/innobase/ibuf/ibuf0ibuf.cc | 28 +- storage/innobase/include/btr0btr.h | 11 +- storage/innobase/include/btr0cur.h | 100 +- storage/innobase/include/btr0pcur.h | 55 +- storage/innobase/include/btr0pcur.inl | 40 +- storage/innobase/include/btr0types.h | 32 +- storage/innobase/include/buf0buf.h | 2 +- storage/innobase/include/buf0flu.h | 2 - storage/innobase/include/gis0rtree.h | 46 +- storage/innobase/include/gis0type.h | 8 +- storage/innobase/include/ibuf0ibuf.inl | 3 +- storage/innobase/include/mtr0log.h | 32 +- storage/innobase/include/mtr0mtr.h | 208 +- storage/innobase/include/small_vector.h | 3 + storage/innobase/mtr/mtr0mtr.cc | 142 +- storage/innobase/row/row0import.cc | 5 +- storage/innobase/row/row0ins.cc | 104 +- storage/innobase/row/row0log.cc | 33 +- storage/innobase/row/row0merge.cc | 18 +- storage/innobase/row/row0purge.cc | 46 +- storage/innobase/row/row0row.cc | 16 +- storage/innobase/row/row0sel.cc | 16 +- storage/innobase/row/row0uins.cc | 44 +- storage/innobase/row/row0umod.cc | 53 +- storage/innobase/row/row0upd.cc | 55 +- storage/innobase/trx/trx0purge.cc | 1 - 49 files changed, 2651 insertions(+), 3585 deletions(-) delete mode 100644 mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result delete mode 100644 mysql-test/suite/innodb/r/innodb_wl6326.result delete mode 100644 mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt delete mode 100644 mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test delete mode 100644 mysql-test/suite/innodb/t/innodb_wl6326.opt delete mode 100644 mysql-test/suite/innodb/t/innodb_wl6326.test diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result deleted file mode 100644 index 670340f3583..00000000000 --- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result +++ /dev/null @@ -1,55 +0,0 @@ -# -# Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE -# OPERATION IF IT IS DONE IN-PLACE -# -call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery"); -call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc"); -call mtr.add_suppression("Plugin 'InnoDB'"); -FLUSH TABLES; -CREATE TABLE t1( -a INT AUTO_INCREMENT PRIMARY KEY, -b CHAR(1), -c INT, -INDEX(b)) -ENGINE=InnoDB STATS_PERSISTENT=0; -SET GLOBAL innodb_change_buffering_debug = 1; -SET GLOBAL innodb_change_buffering = all; -INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192; -BEGIN; -SELECT b FROM t1 LIMIT 3; -b -x -x -x -connect con1,localhost,root,,; -BEGIN; -DELETE FROM t1 WHERE a=1; -INSERT INTO t1 VALUES(1,'X',1); -SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; -SELECT b FROM t1 LIMIT 3; -ERROR HY000: Lost connection to server during query -disconnect con1; -connection default; -FOUND 1 /Wrote log record for ibuf update in place operation/ in mysqld.1.err -# restart: --innodb-read-only -CHECK TABLE t1; -Table Op Msg_type Msg_text -test.t1 check Error Unknown storage engine 'InnoDB' -test.t1 check error Corrupt -FOUND 1 /innodb_read_only prevents crash recovery/ in mysqld.1.err -# restart: --innodb-force-recovery=5 -SELECT * FROM t1 LIMIT 1; -a b c -1 X 1 -SHOW ENGINE INNODB STATUS; -Type Name Status -InnoDB insert 0, delete mark 0 -SET GLOBAL innodb_fast_shutdown=0; -# restart -CHECK TABLE t1; -Table Op Msg_type Msg_text -test.t1 check status OK -SHOW ENGINE INNODB STATUS; -Type Name Status -InnoDB -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/innodb_wl6326.result b/mysql-test/suite/innodb/r/innodb_wl6326.result deleted file mode 100644 index fcd58aedafe..00000000000 --- a/mysql-test/suite/innodb/r/innodb_wl6326.result +++ /dev/null @@ -1,405 +0,0 @@ -SET GLOBAL innodb_adaptive_hash_index = false; -SET GLOBAL innodb_stats_persistent = false; -connect con1,localhost,root,,; -connect con2,localhost,root,,; -connect con3,localhost,root,,; -CREATE TABLE t1 ( -a00 CHAR(255) NOT NULL DEFAULT 'a', -a01 CHAR(255) NOT NULL DEFAULT 'a', -a02 CHAR(255) NOT NULL DEFAULT 'a', -a03 CHAR(255) NOT NULL DEFAULT 'a', -a04 CHAR(255) NOT NULL DEFAULT 'a', -a05 CHAR(255) NOT NULL DEFAULT 'a', -a06 CHAR(255) NOT NULL DEFAULT 'a', -b INT NOT NULL DEFAULT 0 -) ENGINE = InnoDB; -ALTER TABLE t1 ADD PRIMARY KEY( -a00, -a01, -a02, -a03, -a04, -a05, -a06 -); -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -1 -SET GLOBAL innodb_limit_optimistic_insert_debug = 7; -BEGIN; -INSERT INTO t1 (a00) VALUES ('aa'); -INSERT INTO t1 (a00) VALUES ('ab'); -INSERT INTO t1 (a00) VALUES ('ac'); -INSERT INTO t1 (a00) VALUES ('ad'); -INSERT INTO t1 (a00) VALUES ('ae'); -INSERT INTO t1 (a00) VALUES ('af'); -INSERT INTO t1 (a00) VALUES ('ag'); -INSERT INTO t1 (a00) VALUES ('ah'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -3 -BEGIN; -INSERT INTO t1 (a00) VALUES ('ai'); -INSERT INTO t1 (a00) VALUES ('aj'); -INSERT INTO t1 (a00) VALUES ('ak'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -4 -BEGIN; -INSERT INTO t1 (a00) VALUES ('al'); -INSERT INTO t1 (a00) VALUES ('am'); -INSERT INTO t1 (a00) VALUES ('an'); -INSERT INTO t1 (a00) VALUES ('ao'); -INSERT INTO t1 (a00) VALUES ('ap'); -INSERT INTO t1 (a00) VALUES ('aq'); -INSERT INTO t1 (a00) VALUES ('ar'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -5 -BEGIN; -INSERT INTO t1 (a00) VALUES ('as'); -INSERT INTO t1 (a00) VALUES ('at'); -INSERT INTO t1 (a00) VALUES ('au'); -INSERT INTO t1 (a00) VALUES ('av'); -INSERT INTO t1 (a00) VALUES ('aw'); -INSERT INTO t1 (a00) VALUES ('ax'); -INSERT INTO t1 (a00) VALUES ('ay'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -6 -BEGIN; -INSERT INTO t1 (a00) VALUES ('az'); -INSERT INTO t1 (a00) VALUES ('ba'); -INSERT INTO t1 (a00) VALUES ('bb'); -INSERT INTO t1 (a00) VALUES ('bc'); -INSERT INTO t1 (a00) VALUES ('bd'); -INSERT INTO t1 (a00) VALUES ('be'); -INSERT INTO t1 (a00) VALUES ('bf'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -7 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bg'); -INSERT INTO t1 (a00) VALUES ('bh'); -INSERT INTO t1 (a00) VALUES ('bi'); -INSERT INTO t1 (a00) VALUES ('bj'); -INSERT INTO t1 (a00) VALUES ('bk'); -INSERT INTO t1 (a00) VALUES ('bl'); -INSERT INTO t1 (a00) VALUES ('bm'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -8 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bn'); -INSERT INTO t1 (a00) VALUES ('bo'); -INSERT INTO t1 (a00) VALUES ('bp'); -INSERT INTO t1 (a00) VALUES ('bq'); -INSERT INTO t1 (a00) VALUES ('br'); -INSERT INTO t1 (a00) VALUES ('bs'); -INSERT INTO t1 (a00) VALUES ('bt'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -11 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bu'); -INSERT INTO t1 (a00) VALUES ('bv'); -INSERT INTO t1 (a00) VALUES ('bw'); -INSERT INTO t1 (a00) VALUES ('bx'); -INSERT INTO t1 (a00) VALUES ('by'); -INSERT INTO t1 (a00) VALUES ('bz'); -INSERT INTO t1 (a00) VALUES ('ca'); -INSERT INTO t1 (a00) VALUES ('cb'); -INSERT INTO t1 (a00) VALUES ('cc'); -INSERT INTO t1 (a00) VALUES ('cd'); -INSERT INTO t1 (a00) VALUES ('ce'); -INSERT INTO t1 (a00) VALUES ('cf'); -INSERT INTO t1 (a00) VALUES ('cg'); -INSERT INTO t1 (a00) VALUES ('ch'); -INSERT INTO t1 (a00) VALUES ('ci'); -INSERT INTO t1 (a00) VALUES ('cj'); -INSERT INTO t1 (a00) VALUES ('ck'); -INSERT INTO t1 (a00) VALUES ('cl'); -INSERT INTO t1 (a00) VALUES ('cm'); -INSERT INTO t1 (a00) VALUES ('cn'); -INSERT INTO t1 (a00) VALUES ('co'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -15 -BEGIN; -INSERT INTO t1 (a00) VALUES ('cp'); -INSERT INTO t1 (a00) VALUES ('cq'); -INSERT INTO t1 (a00) VALUES ('cr'); -INSERT INTO t1 (a00) VALUES ('cs'); -INSERT INTO t1 (a00) VALUES ('ct'); -INSERT INTO t1 (a00) VALUES ('cu'); -INSERT INTO t1 (a00) VALUES ('cv'); -INSERT INTO t1 (a00) VALUES ('cw'); -INSERT INTO t1 (a00) VALUES ('cx'); -INSERT INTO t1 (a00) VALUES ('cy'); -INSERT INTO t1 (a00) VALUES ('cz'); -INSERT INTO t1 (a00) VALUES ('da'); -INSERT INTO t1 (a00) VALUES ('db'); -INSERT INTO t1 (a00) VALUES ('dc'); -INSERT INTO t1 (a00) VALUES ('dd'); -INSERT INTO t1 (a00) VALUES ('de'); -INSERT INTO t1 (a00) VALUES ('df'); -INSERT INTO t1 (a00) VALUES ('dg'); -INSERT INTO t1 (a00) VALUES ('dh'); -INSERT INTO t1 (a00) VALUES ('di'); -INSERT INTO t1 (a00) VALUES ('dj'); -INSERT INTO t1 (a00) VALUES ('dk'); -INSERT INTO t1 (a00) VALUES ('dl'); -INSERT INTO t1 (a00) VALUES ('dm'); -INSERT INTO t1 (a00) VALUES ('dn'); -INSERT INTO t1 (a00) VALUES ('do'); -INSERT INTO t1 (a00) VALUES ('dp'); -INSERT INTO t1 (a00) VALUES ('dq'); -INSERT INTO t1 (a00) VALUES ('dr'); -INSERT INTO t1 (a00) VALUES ('ds'); -INSERT INTO t1 (a00) VALUES ('dt'); -INSERT INTO t1 (a00) VALUES ('du'); -INSERT INTO t1 (a00) VALUES ('dv'); -INSERT INTO t1 (a00) VALUES ('dw'); -INSERT INTO t1 (a00) VALUES ('dx'); -INSERT INTO t1 (a00) VALUES ('dy'); -INSERT INTO t1 (a00) VALUES ('dz'); -INSERT INTO t1 (a00) VALUES ('ea'); -INSERT INTO t1 (a00) VALUES ('eb'); -INSERT INTO t1 (a00) VALUES ('ec'); -INSERT INTO t1 (a00) VALUES ('ed'); -INSERT INTO t1 (a00) VALUES ('ee'); -INSERT INTO t1 (a00) VALUES ('ef'); -INSERT INTO t1 (a00) VALUES ('eg'); -INSERT INTO t1 (a00) VALUES ('eh'); -INSERT INTO t1 (a00) VALUES ('ei'); -INSERT INTO t1 (a00) VALUES ('ej'); -INSERT INTO t1 (a00) VALUES ('ek'); -INSERT INTO t1 (a00) VALUES ('el'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -23 -BEGIN; -INSERT INTO t1 (a00) VALUES ('em'); -INSERT INTO t1 (a00) VALUES ('en'); -INSERT INTO t1 (a00) VALUES ('eo'); -INSERT INTO t1 (a00) VALUES ('ep'); -INSERT INTO t1 (a00) VALUES ('eq'); -INSERT INTO t1 (a00) VALUES ('er'); -INSERT INTO t1 (a00) VALUES ('es'); -INSERT INTO t1 (a00) VALUES ('et'); -INSERT INTO t1 (a00) VALUES ('eu'); -INSERT INTO t1 (a00) VALUES ('ev'); -INSERT INTO t1 (a00) VALUES ('ew'); -INSERT INTO t1 (a00) VALUES ('ex'); -INSERT INTO t1 (a00) VALUES ('ey'); -INSERT INTO t1 (a00) VALUES ('ez'); -INSERT INTO t1 (a00) VALUES ('fa'); -INSERT INTO t1 (a00) VALUES ('fb'); -INSERT INTO t1 (a00) VALUES ('fc'); -INSERT INTO t1 (a00) VALUES ('fd'); -INSERT INTO t1 (a00) VALUES ('fe'); -INSERT INTO t1 (a00) VALUES ('ff'); -INSERT INTO t1 (a00) VALUES ('fg'); -INSERT INTO t1 (a00) VALUES ('fh'); -INSERT INTO t1 (a00) VALUES ('fi'); -INSERT INTO t1 (a00) VALUES ('fj'); -INSERT INTO t1 (a00) VALUES ('fk'); -INSERT INTO t1 (a00) VALUES ('fl'); -INSERT INTO t1 (a00) VALUES ('fm'); -INSERT INTO t1 (a00) VALUES ('fn'); -INSERT INTO t1 (a00) VALUES ('fo'); -INSERT INTO t1 (a00) VALUES ('fp'); -INSERT INTO t1 (a00) VALUES ('fq'); -INSERT INTO t1 (a00) VALUES ('fr'); -INSERT INTO t1 (a00) VALUES ('fs'); -INSERT INTO t1 (a00) VALUES ('ft'); -INSERT INTO t1 (a00) VALUES ('fu'); -INSERT INTO t1 (a00) VALUES ('fv'); -INSERT INTO t1 (a00) VALUES ('fw'); -INSERT INTO t1 (a00) VALUES ('fx'); -INSERT INTO t1 (a00) VALUES ('fy'); -INSERT INTO t1 (a00) VALUES ('fz'); -INSERT INTO t1 (a00) VALUES ('ga'); -INSERT INTO t1 (a00) VALUES ('gb'); -INSERT INTO t1 (a00) VALUES ('gc'); -INSERT INTO t1 (a00) VALUES ('gd'); -INSERT INTO t1 (a00) VALUES ('ge'); -INSERT INTO t1 (a00) VALUES ('gf'); -INSERT INTO t1 (a00) VALUES ('gg'); -INSERT INTO t1 (a00) VALUES ('gh'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -29 -SET GLOBAL innodb_limit_optimistic_insert_debug = 0; -# Test start -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('bfa'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('bfb'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -a00 a01 -aa a -SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; -a00 a01 -aq a -SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; -a00 a01 -cp a -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -a00 a01 -el a -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -ar a -connection con3; -a00 a01 -cn a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -30 -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('coa'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('cob'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -aa a -connection con3; -a00 a01 -el a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -31 -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('gba'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('gbb'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -a00 a01 -aa a -SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; -a00 a01 -ek a -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -el a -connection con3; -a00 a01 -gb a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -32 -SET DEBUG_SYNC = 'RESET'; -connection default; -disconnect con1; -disconnect con2; -disconnect con3; -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt deleted file mode 100644 index e5d7090c883..00000000000 --- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_buffer_pool_size=24M diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test deleted file mode 100644 index 129037e783b..00000000000 --- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test +++ /dev/null @@ -1,82 +0,0 @@ ---echo # ---echo # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE ---echo # OPERATION IF IT IS DONE IN-PLACE ---echo # ---source include/have_innodb.inc -# innodb_change_buffering_debug option is debug only ---source include/have_debug.inc -# Embedded server does not support crashing ---source include/not_embedded.inc -# DBUG_SUICIDE() hangs under valgrind ---source include/not_valgrind.inc -# This test is slow on buildbot. ---source include/big_test.inc ---source include/have_sequence.inc - -call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery"); -call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc"); -call mtr.add_suppression("Plugin 'InnoDB'"); -FLUSH TABLES; - -CREATE TABLE t1( - a INT AUTO_INCREMENT PRIMARY KEY, - b CHAR(1), - c INT, - INDEX(b)) -ENGINE=InnoDB STATS_PERSISTENT=0; - ---let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect - -# The flag innodb_change_buffering_debug is only available in debug builds. -# It instructs InnoDB to try to evict pages from the buffer pool when -# change buffering is possible, so that the change buffer will be used -# whenever possible. -SET GLOBAL innodb_change_buffering_debug = 1; -SET GLOBAL innodb_change_buffering = all; -let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; - -# Create enough rows for the table, so that the change buffer will be -# used for modifying the secondary index page. There must be multiple -# index pages, because changes to the root page are never buffered. -INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192; - -BEGIN; -SELECT b FROM t1 LIMIT 3; - -connect (con1,localhost,root,,); -BEGIN; -DELETE FROM t1 WHERE a=1; -# This should be buffered, if innodb_change_buffering_debug = 1 is in effect. -INSERT INTO t1 VALUES(1,'X',1); - -SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; ---exec echo "wait" > $_expect_file_name ---error 2013 -# This should force a change buffer merge -SELECT b FROM t1 LIMIT 3; -disconnect con1; -connection default; -let SEARCH_PATTERN=Wrote log record for ibuf update in place operation; ---source include/search_pattern_in_file.inc - ---let $restart_parameters= --innodb-read-only ---source include/start_mysqld.inc -CHECK TABLE t1; ---source include/shutdown_mysqld.inc -let SEARCH_PATTERN=innodb_read_only prevents crash recovery; ---source include/search_pattern_in_file.inc - ---let $restart_parameters= --innodb-force-recovery=5 ---source include/start_mysqld.inc -SELECT * FROM t1 LIMIT 1; -replace_regex /.*operations:.* (insert.*), delete \d.*discarded .*/\1/; -SHOW ENGINE INNODB STATUS; -# Slow shutdown will not merge the changes due to innodb_force_recovery=5. -SET GLOBAL innodb_fast_shutdown=0; ---let $restart_parameters= ---source include/restart_mysqld.inc -CHECK TABLE t1; -replace_regex /.*operations:.* insert [1-9][0-9]*, delete mark [1-9][0-9]*, delete \d.*discarded .*//; -SHOW ENGINE INNODB STATUS; -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.opt b/mysql-test/suite/innodb/t/innodb_wl6326.opt deleted file mode 100644 index 99bf0e5a28b..00000000000 --- a/mysql-test/suite/innodb/t/innodb_wl6326.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-sys-tablestats diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.test b/mysql-test/suite/innodb/t/innodb_wl6326.test deleted file mode 100644 index 1cf98cd1c7b..00000000000 --- a/mysql-test/suite/innodb/t/innodb_wl6326.test +++ /dev/null @@ -1,519 +0,0 @@ -# -# WL#6326: InnoDB: fix index->lock contention -# - ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/have_debug_sync.inc ---source include/have_innodb_16k.inc - ---disable_query_log -SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug; -SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; -SET @old_innodb_stats_persistent = @@innodb_stats_persistent; ---enable_query_log - -# Save the initial number of concurrent sessions ---source include/count_sessions.inc - -SET GLOBAL innodb_adaptive_hash_index = false; -SET GLOBAL innodb_stats_persistent = false; - ---connect (con1,localhost,root,,) ---connect (con2,localhost,root,,) ---connect (con3,localhost,root,,) - -CREATE TABLE t1 ( - a00 CHAR(255) NOT NULL DEFAULT 'a', - a01 CHAR(255) NOT NULL DEFAULT 'a', - a02 CHAR(255) NOT NULL DEFAULT 'a', - a03 CHAR(255) NOT NULL DEFAULT 'a', - a04 CHAR(255) NOT NULL DEFAULT 'a', - a05 CHAR(255) NOT NULL DEFAULT 'a', - a06 CHAR(255) NOT NULL DEFAULT 'a', - b INT NOT NULL DEFAULT 0 -) ENGINE = InnoDB; - -ALTER TABLE t1 ADD PRIMARY KEY( - a00, - a01, - a02, - a03, - a04, - a05, - a06 -); - -# -# Prepare primary key index tree to be used for this test. -# - -# Only root (1) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -# Make the first records sparse artificially, -# not to cause modify_tree by single node_ptr insert operation. -# * (7 - 2) records should be larger than a half of the page size -# * (7 + 2) records should be fit to the page -# (above t1 definition is already adjusted) -SET GLOBAL innodb_limit_optimistic_insert_debug = 7; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('aa'); -INSERT INTO t1 (a00) VALUES ('ab'); -INSERT INTO t1 (a00) VALUES ('ac'); -INSERT INTO t1 (a00) VALUES ('ad'); -INSERT INTO t1 (a00) VALUES ('ae'); -INSERT INTO t1 (a00) VALUES ('af'); -INSERT INTO t1 (a00) VALUES ('ag'); -INSERT INTO t1 (a00) VALUES ('ah'); -COMMIT; -# Raise root (1-2) -# (aa,ad) -# (aa,ab,ac)(ad,ae,af,ag,ah) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('ai'); -INSERT INTO t1 (a00) VALUES ('aj'); -INSERT INTO t1 (a00) VALUES ('ak'); -COMMIT; -# Split leaf (1-3) -# (aa,ad,ak) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('al'); -INSERT INTO t1 (a00) VALUES ('am'); -INSERT INTO t1 (a00) VALUES ('an'); -INSERT INTO t1 (a00) VALUES ('ao'); -INSERT INTO t1 (a00) VALUES ('ap'); -INSERT INTO t1 (a00) VALUES ('aq'); -INSERT INTO t1 (a00) VALUES ('ar'); -COMMIT; -# Split leaf (1-4) -# (aa,ad,ak,ar) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('as'); -INSERT INTO t1 (a00) VALUES ('at'); -INSERT INTO t1 (a00) VALUES ('au'); -INSERT INTO t1 (a00) VALUES ('av'); -INSERT INTO t1 (a00) VALUES ('aw'); -INSERT INTO t1 (a00) VALUES ('ax'); -INSERT INTO t1 (a00) VALUES ('ay'); -COMMIT; -# Split leaf (1-5) -# (aa,ad,ak,ar,ay) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar,as,at,au,av,aw,ax)(ay) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('az'); -INSERT INTO t1 (a00) VALUES ('ba'); -INSERT INTO t1 (a00) VALUES ('bb'); -INSERT INTO t1 (a00) VALUES ('bc'); -INSERT INTO t1 (a00) VALUES ('bd'); -INSERT INTO t1 (a00) VALUES ('be'); -INSERT INTO t1 (a00) VALUES ('bf'); -COMMIT; -# Split leaf (1-6) -# (aa,ad,ak,ar,ay,bf) -# (aa,ab,ac)(ad..)(ak..)(ar,as,at,au,av,aw,ax)(ay,az,ba,bb,bc,bd,be)(bf) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bg'); -INSERT INTO t1 (a00) VALUES ('bh'); -INSERT INTO t1 (a00) VALUES ('bi'); -INSERT INTO t1 (a00) VALUES ('bj'); -INSERT INTO t1 (a00) VALUES ('bk'); -INSERT INTO t1 (a00) VALUES ('bl'); -INSERT INTO t1 (a00) VALUES ('bm'); -COMMIT; -# Split leaf (1-7) -# (aa,ad,ak,ar,ay,bf,bm) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay,az,ba,bb,bc,bd,be)(bf,bg,bh,bi,bj,bk,bl)(bm) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bn'); -INSERT INTO t1 (a00) VALUES ('bo'); -INSERT INTO t1 (a00) VALUES ('bp'); -INSERT INTO t1 (a00) VALUES ('bq'); -INSERT INTO t1 (a00) VALUES ('br'); -INSERT INTO t1 (a00) VALUES ('bs'); -INSERT INTO t1 (a00) VALUES ('bt'); -COMMIT; -# Raise root (1-2-8) -# (aa,ar) -# (aa,ad,ak) (ar,ay,bf,bm,bt) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bu'); -INSERT INTO t1 (a00) VALUES ('bv'); -INSERT INTO t1 (a00) VALUES ('bw'); -INSERT INTO t1 (a00) VALUES ('bx'); -INSERT INTO t1 (a00) VALUES ('by'); -INSERT INTO t1 (a00) VALUES ('bz'); -INSERT INTO t1 (a00) VALUES ('ca'); - -INSERT INTO t1 (a00) VALUES ('cb'); -INSERT INTO t1 (a00) VALUES ('cc'); -INSERT INTO t1 (a00) VALUES ('cd'); -INSERT INTO t1 (a00) VALUES ('ce'); -INSERT INTO t1 (a00) VALUES ('cf'); -INSERT INTO t1 (a00) VALUES ('cg'); -INSERT INTO t1 (a00) VALUES ('ch'); - -INSERT INTO t1 (a00) VALUES ('ci'); -INSERT INTO t1 (a00) VALUES ('cj'); -INSERT INTO t1 (a00) VALUES ('ck'); -INSERT INTO t1 (a00) VALUES ('cl'); -INSERT INTO t1 (a00) VALUES ('cm'); -INSERT INTO t1 (a00) VALUES ('cn'); -INSERT INTO t1 (a00) VALUES ('co'); -COMMIT; -# Split also at level 1 (1-3-11) -# (aa,ar,co) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('cp'); -INSERT INTO t1 (a00) VALUES ('cq'); -INSERT INTO t1 (a00) VALUES ('cr'); -INSERT INTO t1 (a00) VALUES ('cs'); -INSERT INTO t1 (a00) VALUES ('ct'); -INSERT INTO t1 (a00) VALUES ('cu'); -INSERT INTO t1 (a00) VALUES ('cv'); - -INSERT INTO t1 (a00) VALUES ('cw'); -INSERT INTO t1 (a00) VALUES ('cx'); -INSERT INTO t1 (a00) VALUES ('cy'); -INSERT INTO t1 (a00) VALUES ('cz'); -INSERT INTO t1 (a00) VALUES ('da'); -INSERT INTO t1 (a00) VALUES ('db'); -INSERT INTO t1 (a00) VALUES ('dc'); - -INSERT INTO t1 (a00) VALUES ('dd'); -INSERT INTO t1 (a00) VALUES ('de'); -INSERT INTO t1 (a00) VALUES ('df'); -INSERT INTO t1 (a00) VALUES ('dg'); -INSERT INTO t1 (a00) VALUES ('dh'); -INSERT INTO t1 (a00) VALUES ('di'); -INSERT INTO t1 (a00) VALUES ('dj'); - -INSERT INTO t1 (a00) VALUES ('dk'); -INSERT INTO t1 (a00) VALUES ('dl'); -INSERT INTO t1 (a00) VALUES ('dm'); -INSERT INTO t1 (a00) VALUES ('dn'); -INSERT INTO t1 (a00) VALUES ('do'); -INSERT INTO t1 (a00) VALUES ('dp'); -INSERT INTO t1 (a00) VALUES ('dq'); - -INSERT INTO t1 (a00) VALUES ('dr'); -INSERT INTO t1 (a00) VALUES ('ds'); -INSERT INTO t1 (a00) VALUES ('dt'); -INSERT INTO t1 (a00) VALUES ('du'); -INSERT INTO t1 (a00) VALUES ('dv'); -INSERT INTO t1 (a00) VALUES ('dw'); -INSERT INTO t1 (a00) VALUES ('dx'); - -INSERT INTO t1 (a00) VALUES ('dy'); -INSERT INTO t1 (a00) VALUES ('dz'); -INSERT INTO t1 (a00) VALUES ('ea'); -INSERT INTO t1 (a00) VALUES ('eb'); -INSERT INTO t1 (a00) VALUES ('ec'); -INSERT INTO t1 (a00) VALUES ('ed'); -INSERT INTO t1 (a00) VALUES ('ee'); - -INSERT INTO t1 (a00) VALUES ('ef'); -INSERT INTO t1 (a00) VALUES ('eg'); -INSERT INTO t1 (a00) VALUES ('eh'); -INSERT INTO t1 (a00) VALUES ('ei'); -INSERT INTO t1 (a00) VALUES ('ej'); -INSERT INTO t1 (a00) VALUES ('ek'); -INSERT INTO t1 (a00) VALUES ('el'); -COMMIT; -# Split also at level 1 (1-4-18) -# (aa,ar,co,el) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('em'); -INSERT INTO t1 (a00) VALUES ('en'); -INSERT INTO t1 (a00) VALUES ('eo'); -INSERT INTO t1 (a00) VALUES ('ep'); -INSERT INTO t1 (a00) VALUES ('eq'); -INSERT INTO t1 (a00) VALUES ('er'); -INSERT INTO t1 (a00) VALUES ('es'); - -INSERT INTO t1 (a00) VALUES ('et'); -INSERT INTO t1 (a00) VALUES ('eu'); -INSERT INTO t1 (a00) VALUES ('ev'); -INSERT INTO t1 (a00) VALUES ('ew'); -INSERT INTO t1 (a00) VALUES ('ex'); -INSERT INTO t1 (a00) VALUES ('ey'); -INSERT INTO t1 (a00) VALUES ('ez'); - -INSERT INTO t1 (a00) VALUES ('fa'); -INSERT INTO t1 (a00) VALUES ('fb'); -INSERT INTO t1 (a00) VALUES ('fc'); -INSERT INTO t1 (a00) VALUES ('fd'); -INSERT INTO t1 (a00) VALUES ('fe'); -INSERT INTO t1 (a00) VALUES ('ff'); -INSERT INTO t1 (a00) VALUES ('fg'); - -INSERT INTO t1 (a00) VALUES ('fh'); -INSERT INTO t1 (a00) VALUES ('fi'); -INSERT INTO t1 (a00) VALUES ('fj'); -INSERT INTO t1 (a00) VALUES ('fk'); -INSERT INTO t1 (a00) VALUES ('fl'); -INSERT INTO t1 (a00) VALUES ('fm'); -INSERT INTO t1 (a00) VALUES ('fn'); - -INSERT INTO t1 (a00) VALUES ('fo'); -INSERT INTO t1 (a00) VALUES ('fp'); -INSERT INTO t1 (a00) VALUES ('fq'); -INSERT INTO t1 (a00) VALUES ('fr'); -INSERT INTO t1 (a00) VALUES ('fs'); -INSERT INTO t1 (a00) VALUES ('ft'); -INSERT INTO t1 (a00) VALUES ('fu'); - -INSERT INTO t1 (a00) VALUES ('fv'); -INSERT INTO t1 (a00) VALUES ('fw'); -INSERT INTO t1 (a00) VALUES ('fx'); -INSERT INTO t1 (a00) VALUES ('fy'); -INSERT INTO t1 (a00) VALUES ('fz'); -INSERT INTO t1 (a00) VALUES ('ga'); -INSERT INTO t1 (a00) VALUES ('gb'); - -INSERT INTO t1 (a00) VALUES ('gc'); -INSERT INTO t1 (a00) VALUES ('gd'); -INSERT INTO t1 (a00) VALUES ('ge'); -INSERT INTO t1 (a00) VALUES ('gf'); -INSERT INTO t1 (a00) VALUES ('gg'); -INSERT INTO t1 (a00) VALUES ('gh'); -COMMIT; - -# Current tree form (1-4-24) -# (aa,ar,co,el) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el..,gb) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el..)..(gb..) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -# Insert the rest of records normally -SET GLOBAL innodb_limit_optimistic_insert_debug = 0; - ---echo # Test start - -# (1) Insert records to leaf page (bf..) and cause modify_page. -# - root page is not X latched -# - latched from level 1 page (ar,ay,bf,bm,bt,ca,ch) - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (bf..) -INSERT INTO t1 (a00) VALUES ('bfa'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('bfb'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# Not blocked searches -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; -# "where a00 = 'co'" is blocked because searching from smaller ('co','a','a',..). -SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default - -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# (2) Insert records to leaf page (co..) and cause modify_page -# - root page is X latched, because node_ptr for 'co' -# is 1st record for (co,cv,dc,dj,dq,dx,ee) -# -# * ordinary pessimitic insert might be done by pessistic update -# and we should consider possibility node_ptr to be deleted. - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (co..) -INSERT INTO t1 (a00) VALUES ('coa'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('cob'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# All searches are blocked because root page is X latched - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default - -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# (3) Insert records to rightmost leaf page (gb..) and cause modify_page -# - root page is not X latched, because node_ptr for 'gb' is the last record -# of the level 1 though it is last record in the page. -# - lathed from level 1 page (el..,gb) - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (gb..) -INSERT INTO t1 (a00) VALUES ('gba'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('gbb'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# Not blocked searches -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# Cleanup -SET DEBUG_SYNC = 'RESET'; - ---connection default ---disconnect con1 ---disconnect con2 ---disconnect con3 - -DROP TABLE t1; - ---disable_query_log -SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug; -SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; -SET GLOBAL innodb_stats_persistent = @old_innodb_stats_persistent; ---enable_query_log - -# Wait till all disconnects are completed. ---source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb_gis/r/rtree_split.result b/mysql-test/suite/innodb_gis/r/rtree_split.result index 8e475776ce0..97027bde865 100644 --- a/mysql-test/suite/innodb_gis/r/rtree_split.result +++ b/mysql-test/suite/innodb_gis/r/rtree_split.result @@ -61,3 +61,15 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); count(*) 57344 drop table t1; +# +# MDEV-30400 Assertion height == btr_page_get_level ... on INSERT +# +CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; +SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; +SET GLOBAL innodb_limit_optimistic_insert_debug=2; +BEGIN; +INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6; +ROLLBACK; +SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; +DROP TABLE t1; +# End of 10.6 tests diff --git a/mysql-test/suite/innodb_gis/t/rtree_split.test b/mysql-test/suite/innodb_gis/t/rtree_split.test index 6f285187508..a23315dc3f3 100644 --- a/mysql-test/suite/innodb_gis/t/rtree_split.test +++ b/mysql-test/suite/innodb_gis/t/rtree_split.test @@ -73,3 +73,18 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); # Clean up. drop table t1; + +--echo # +--echo # MDEV-30400 Assertion height == btr_page_get_level ... on INSERT +--echo # + +CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; +SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; +SET GLOBAL innodb_limit_optimistic_insert_debug=2; +BEGIN; +INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6; +ROLLBACK; +SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; +DROP TABLE t1; + +--echo # End of 10.6 tests diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 0bb16dba374..7fd851f7b0e 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -460,6 +460,54 @@ btr_page_create( } } +buf_block_t * +mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const +{ + ut_ad(is_active()); + ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX || + type == MTR_MEMO_PAGE_S_FIX); + for (ulint i= 0; i < m_memo.size(); i++) + { + const mtr_memo_slot_t &slot= m_memo[i]; + const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY); + if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type) + { + buf_block_t *block= static_cast(slot.object); + if (block->page.id() == id) + return block; + } + } + return nullptr; +} + +/** Fetch an index root page that was already latched in the +mini-transaction. */ +static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr) +{ + return mtr->get_already_latched(page_id_t{index.table->space_id, index.page}, + MTR_MEMO_PAGE_SX_FIX); +} + +/** Fetch an index page that should have been already latched in the +mini-transaction. */ +static buf_block_t * +btr_block_reget(mtr_t *mtr, const dict_index_t &index, + const page_id_t id, rw_lock_type_t rw_latch, + dberr_t *err) +{ + if (buf_block_t *block= + mtr->get_already_latched(id, mtr_memo_type_t(rw_latch))) + { + *err= DB_SUCCESS; + return block; + } + +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); +#endif + return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); +} + /**************************************************************//** Allocates a new file page to be used in an ibuf tree. Takes the page from the free list of the tree, which must contain pages! @@ -472,18 +520,16 @@ btr_page_alloc_for_ibuf( mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ { - buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); + buf_block_t *root= btr_get_latched_root(*index, mtr); if (UNIV_UNLIKELY(!root)) return root; - buf_block_t *new_block= - buf_page_get_gen(page_id_t(index->table->space_id, + buf_page_get_gen(page_id_t(IBUF_SPACE_ID, mach_read_from_4(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST + FLST_FIRST + FIL_ADDR_PAGE + root->page.frame)), - index->table->space->zip_size(), RW_X_LATCH, nullptr, - BUF_GET, mtr, err); + 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err); if (new_block) *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); @@ -523,11 +569,11 @@ btr_page_alloc_low( #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -579,15 +625,12 @@ btr_page_free_for_ibuf( mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - - dberr_t err; - if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)) - { - err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + buf_block_t *root= btr_get_latched_root(*index, mtr); + dberr_t err= + flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - ut_d(if (err == DB_SUCCESS) - flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); - } + ut_d(if (err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); return err; } @@ -637,11 +680,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -712,35 +755,27 @@ btr_node_ptr_get_child( mtr, err); } -MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result)) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an sx-latch on the tree. @return rec_get_offsets() of the node pointer record */ static rec_offs* -btr_page_get_father_node_ptr_func( -/*==============================*/ +btr_page_get_father_node_ptr_for_validate( rec_offs* offsets,/*!< in: work area for the return value */ mem_heap_t* heap, /*!< in: memory heap to use */ btr_cur_t* cursor, /*!< in: cursor pointing to user record, out: cursor on node pointer record, its page x-latched */ - btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE - or BTR_CONT_SEARCH_TREE */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); const auto level = btr_page_get_level(btr_cur_get_page(cursor)); @@ -752,12 +787,16 @@ btr_page_get_father_node_ptr_func( dict_index_build_node_ptr(index, user_rec, 0, heap, level), - PAGE_CUR_LE, latch_mode, + RW_S_LATCH, cursor, mtr) != DB_SUCCESS) { return nullptr; } const rec_t* node_ptr = btr_cur_get_rec(cursor); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); @@ -769,13 +808,65 @@ btr_page_get_father_node_ptr_func( return(offsets); } -#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_MODIFY_TREE,mtr) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/** Return the node pointer to a page. +@param offsets work area for the return value +@param heap memory heap +@param cursor in: child page; out: node pointer to it +@param mtr mini-transaction +@return rec_get_offsets() of the node pointer record +@retval nullptr if the parent page had not been latched in mtr */ +static rec_offs *btr_page_get_parent(rec_offs *offsets, mem_heap_t *heap, + btr_cur_t *cursor, mtr_t *mtr) +{ + const uint32_t page_no= cursor->block()->page.id().page_no(); + const dict_index_t *index= cursor->index(); + ut_ad(!index->is_spatial()); + ut_ad(index->page != page_no); -#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_SEARCH_TREE,mtr) + uint32_t p= index->page; + auto level= btr_page_get_level(cursor->block()->page.frame); + const dtuple_t *tuple= + dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap, level); + level++; + + ulint i; + for (i= 0; i < mtr->get_savepoint(); i++) + if (buf_block_t *block= mtr->block_at_savepoint(i)) + if (block->page.id().page_no() == p) + { + ut_ad(block->page.lock.have_u_or_x() || + (!block->page.lock.have_s() && index->lock.have_x())); + ulint up_match= 0, low_match= 0; + cursor->page_cur.block= block; + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match, + &low_match, &cursor->page_cur, + nullptr)) + return nullptr; + offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets); + if (p != page_no) + { + if (btr_page_get_level(block->page.frame) == level) + return nullptr; + i= 0; // MDEV-29835 FIXME: require all pages to be latched in order! + continue; + } + ut_ad(block->page.lock.have_u_or_x()); + if (block->page.lock.have_u_not_x()) + { + /* btr_cur_t::search_leaf(BTR_MODIFY_TREE) only U-latches the + root page initially. */ + ut_ad(block->page.id().page_no() == index->page); + block->page.lock.u_x_upgrade(); + mtr->page_lock_upgrade(*block); + } + return offsets; + } + + return nullptr; +} /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds @@ -796,7 +887,7 @@ btr_page_get_father_block( if (UNIV_UNLIKELY(!rec)) return nullptr; cursor->page_cur.rec= rec; - return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr); + return btr_page_get_parent(offsets, heap, cursor, mtr); } /** Seek to the parent page of a B-tree page. @@ -811,7 +902,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor) return false; cursor->page_cur.rec= rec; mem_heap_t *heap= mem_heap_create(100); - const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr); + const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr); mem_heap_free(heap); return got; } @@ -1718,48 +1809,43 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ +@param[in,out] mtr mini-transaction */ ATTRIBUTE_COLD -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) { ut_ad(!index.table->is_temporary()); ut_ad(index.is_primary()); - dberr_t err; - if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err)) + buf_block_t *root= btr_get_latched_root(index, mtr); + byte *page_type= root->page.frame + FIL_PAGE_TYPE; + if (all) { - byte *page_type= root->page.frame + FIL_PAGE_TYPE; - if (all) - { - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || - mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); - byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; - mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, - page_ptr_get_direction(instant + 1)); - } - else - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); - static const byte supremuminfimum[8 + 8] = "supremuminfimum"; - uint16_t infimum, supremum; - if (page_is_comp(root->page.frame)) - { - infimum= PAGE_NEW_INFIMUM; - supremum= PAGE_NEW_SUPREMUM; - } - else - { - infimum= PAGE_OLD_INFIMUM; - supremum= PAGE_OLD_SUPREMUM; - } - ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == - !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); - mtr->memcpy(*root, &root->page.frame[infimum], - supremuminfimum + 8, 8); - mtr->memcpy(*root, &root->page.frame[supremum], - supremuminfimum, 8); + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || + mach_read_from_2(page_type) == FIL_PAGE_INDEX); + mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); + byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; + mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, + page_ptr_get_direction(instant + 1)); } - return err; + else + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); + static const byte supremuminfimum[8 + 8] = "supremuminfimum"; + uint16_t infimum, supremum; + if (page_is_comp(root->page.frame)) + { + infimum= PAGE_NEW_INFIMUM; + supremum= PAGE_NEW_SUPREMUM; + } + else + { + infimum= PAGE_OLD_INFIMUM; + supremum= PAGE_OLD_SUPREMUM; + } + ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == + !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); + mtr->memcpy(*root, &root->page.frame[infimum], + supremuminfimum + 8, 8); + mtr->memcpy(*root, &root->page.frame[supremum], + supremuminfimum, 8); } /*************************************************************//** @@ -1856,11 +1942,6 @@ btr_root_raise_and_insert( } /* Copy the records from root to the new page one by one. */ - dberr_t e; - if (!err) { - err = &e; - } - if (0 #ifdef UNIV_ZIP_COPY || new_page_zip @@ -2004,21 +2085,15 @@ btr_root_raise_and_insert( page_cursor->block = new_block; page_cursor->index = index; - if (tuple) { - ut_ad(dtuple_check_typed(tuple)); - /* Reposition the cursor to the child node */ - ulint low_match = 0, up_match = 0; + ut_ad(dtuple_check_typed(tuple)); + /* Reposition the cursor to the child node */ + ulint low_match = 0, up_match = 0; - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, - &up_match, &low_match, - page_cursor, nullptr)) { - if (err) { - *err = DB_CORRUPTION; - } - return nullptr; - } - } else { - page_cursor->rec = page_get_infimum_rec(new_block->page.frame); + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, + &up_match, &low_match, + page_cursor, nullptr)) { + *err = DB_CORRUPTION; + return nullptr; } /* Split the child and insert tuple */ @@ -2237,6 +2312,7 @@ func_exit: return(rec); } +#ifdef UNIV_DEBUG /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. @@ -2334,6 +2410,7 @@ got_rec: return(false); } +#endif /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed @@ -2356,25 +2433,34 @@ btr_insert_on_non_leaf_level( rtr_info_t rtr_info; ut_ad(level > 0); - auto mode = PAGE_CUR_LE; - - if (index->is_spatial()) { - mode = PAGE_CUR_RTREE_INSERT; - /* For spatial index, initialize structures to track - its parents etc. */ - rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); - - rtr_info_update_btr(&cursor, &rtr_info); - } flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG | BTR_NO_UNDO_LOG_FLAG; cursor.page_cur.index = index; - dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode, - BTR_CONT_MODIFY_TREE, + dberr_t err; + + if (index->is_spatial()) { + /* For spatial index, initialize structures to track + its parents etc. */ + rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); + + rtr_info_update_btr(&cursor, &rtr_info); + err = rtr_search_to_nth_level(level, tuple, + PAGE_CUR_RTREE_INSERT, + BTR_CONT_MODIFY_TREE, + &cursor, mtr); + } else { + err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH, &cursor, mtr); + } + ut_ad(cursor.flag == BTR_CUR_BINARY); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() + || index->is_spatial() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif if (UNIV_LIKELY(err == DB_SUCCESS)) { err = btr_cur_optimistic_insert(flags, @@ -2470,6 +2556,7 @@ btr_attach_half_pages( /* Get the level of the split pages */ const ulint level = btr_page_get_level(block->page.frame); ut_ad(level == btr_page_get_level(new_block->page.frame)); + page_id_t id{block->page.id()}; /* Get the previous and next pages of page */ const uint32_t prev_page_no = btr_page_get_prev(block->page.frame); @@ -2477,12 +2564,32 @@ btr_attach_half_pages( /* for consistency, both blocks should be locked, before change */ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) { - prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(prev_page_no); + prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + prev_block = btr_block_get(*index, prev_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } if (next_page_no != FIL_NULL && direction != FSP_DOWN) { - next_block = btr_block_get(*index, next_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(next_page_no); + next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + next_block = btr_block_get(*index, next_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } /* Build the node pointer (= node key and page address) for the upper @@ -3018,6 +3125,7 @@ insert_empty: return nullptr; } +#ifdef UNIV_DEBUG /* If the split is made on the leaf level and the insert will fit on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, @@ -3025,21 +3133,21 @@ insert_empty: const bool insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); +#endif if (!split_rec && !insert_left) { UT_DELETE_ARRAY(buf); buf = NULL; } - if (!srv_read_only_mode - && insert_will_fit +#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled + if (insert_will_fit && page_is_leaf(page) && !dict_index_is_online_ddl(cursor->index())) { -#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled mtr->release(cursor->index()->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif /* 5. Move then the records to the new page */ if (direction == FSP_DOWN) { @@ -3271,52 +3379,58 @@ func_exit: dberr_t btr_level_list_remove(const buf_block_t& block, const dict_index_t& index, mtr_t* mtr) { - ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(block.zip_size() == index.table->space->zip_size()); - ut_ad(index.table->space->id == block.page.id().space()); - /* Get the previous and next page numbers of page */ + ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(block.zip_size() == index.table->space->zip_size()); + ut_ad(index.table->space->id == block.page.id().space()); + /* Get the previous and next page numbers of page */ + const uint32_t prev_page_no= btr_page_get_prev(block.page.frame); + const uint32_t next_page_no= btr_page_get_next(block.page.frame); + page_id_t id{block.page.id()}; + buf_block_t *prev= nullptr, *next; + dberr_t err; - const page_t* page = block.page.frame; - const uint32_t prev_page_no = btr_page_get_prev(page); - const uint32_t next_page_no = btr_page_get_next(page); + /* Update page links of the level */ + if (prev_page_no != FIL_NULL) + { + id.set_page_no(prev_page_no); + prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + prev= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!prev)) + return err; + } +#endif + } - /* Update page links of the level */ - dberr_t err; + if (next_page_no != FIL_NULL) + { + id.set_page_no(next_page_no); + next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + next= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!next)) + return err; + } +#endif + btr_page_set_prev(next, prev_page_no, mtr); + } - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block = btr_block_get( - index, prev_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); - if (UNIV_UNLIKELY(!prev_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame - + FIL_PAGE_NEXT, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_next(prev_block, next_page_no, mtr); - } + if (prev) + btr_page_set_next(prev, next_page_no, mtr); - if (next_page_no != FIL_NULL) { - buf_block_t* next_block = btr_block_get( - index, next_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); - - if (UNIV_UNLIKELY(!next_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame - + FIL_PAGE_PREV, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_prev(next_block, prev_page_no, mtr); - } - - return DB_SUCCESS; + return DB_SUCCESS; } /*************************************************************//** @@ -4166,23 +4280,30 @@ btr_discard_page( const uint32_t left_page_no = btr_page_get_prev(block->page.frame); const uint32_t right_page_no = btr_page_get_next(block->page.frame); + page_id_t merge_page_id{block->page.id()}; ut_d(bool parent_is_different = false); + dberr_t err; if (left_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(left_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } - +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_NEXT, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = (page_rec_get_next( page_get_infimum_rec( @@ -4190,19 +4311,25 @@ btr_discard_page( &parent_cursor))) == btr_cur_get_rec(&parent_cursor))); } else if (right_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(right_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_PREV, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); if (page_is_leaf(merge_block->page.frame)) { @@ -4244,13 +4371,10 @@ btr_discard_page( } #ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* merge_page_zip - = buf_block_get_page_zip(merge_block); - ut_a(!merge_page_zip - || page_zip_validate(merge_page_zip, - merge_block->page.frame, index)); - } + if (page_zip_des_t* merge_page_zip + = buf_block_get_page_zip(merge_block)); + ut_a(page_zip_validate(merge_page_zip, + merge_block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ if (index->has_locking()) { @@ -4269,7 +4393,7 @@ btr_discard_page( } /* Free the file page */ - dberr_t err = btr_page_free(index, block, mtr); + err = btr_page_free(index, block, mtr); if (err == DB_SUCCESS) { /* btr_check_node_ptr() needs parent block latched. @@ -4462,6 +4586,8 @@ btr_check_node_ptr( offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor); } + ut_ad(offsets); + if (page_is_leaf(page)) { goto func_exit; @@ -4793,19 +4919,16 @@ btr_validate_level( page_zip_des_t* page_zip; #endif /* UNIV_ZIP_DEBUG */ ulint savepoint = 0; - ulint savepoint2 = 0; uint32_t parent_page_no = FIL_NULL; uint32_t parent_right_page_no = FIL_NULL; bool rightmost_child = false; mtr.start(); - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } dberr_t err; @@ -4853,7 +4976,6 @@ corrupted: offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr, &err); if (!block) { @@ -4874,10 +4996,8 @@ corrupted: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint2, block); + mtr.release_last_page(); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_block_get(*index, left_page_no, RW_SX_LATCH, false, &mtr, &err); @@ -4905,12 +5025,10 @@ func_exit: mem_heap_empty(heap); offsets = offsets2 = NULL; - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } page = block->page.frame; @@ -4955,7 +5073,7 @@ func_exit: if (right_page_no != FIL_NULL) { const rec_t* right_rec; - savepoint = mtr_set_savepoint(&mtr); + savepoint = mtr.get_savepoint(); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); @@ -5150,8 +5268,10 @@ broken_links: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint, right_block); + ut_ad(right_block + == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, + savepoint + 1); if (parent_right_page_no != FIL_NULL) { btr_block_get(*index, diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index ac06d9b1568..c0473f76422 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3,7 +3,7 @@ Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -103,14 +103,14 @@ throughput clearly from about 100000. */ #define BTR_CUR_FINE_HISTORY_LENGTH 100000 #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ ib_counter_t btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ ib_counter_t btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by @@ -187,167 +187,6 @@ btr_rec_free_externally_stored_fields( /*==================== B-TREE SEARCH =========================*/ -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves) -{ - compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); - ut_ad(block->page.id().space() == cursor->index()->table->space->id); - ut_ad(block->page.in_file()); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - auto rtr_info = cursor->rtr_info; - if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) { - rtr_info = nullptr; - } - - const rw_lock_type_t mode = rw_lock_type_t( - latch_mode & (RW_X_LATCH | RW_S_LATCH)); - static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, ""); - static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, ""); - static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, ""); - - switch (latch_mode) { - default: - break; - uint32_t left_page_no; - uint32_t right_page_no; - ulint save; - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - case BTR_SEARCH_TREE: - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = mtr->get_savepoint(); - } -latch_block: - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - block->page.fix(); - mtr->page_lock(block, mode); - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_blocks[RTR_MAX_LEVELS] = block; - } - return; - case BTR_MODIFY_TREE: - /* It is exclusive for other operations which calls - btr_page_set_prev() */ - ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - save = mtr->get_savepoint(); - /* x-latch also siblings from left to right */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - buf_block_t *b = btr_block_get( - *cursor->index(), left_page_no, RW_X_LATCH, - true, mtr); - - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS] = b; - } - - save = mtr->get_savepoint(); - } - - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - - block->page.fix(); - block->page.lock.x_lock(); - - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); -#ifdef BTR_CUR_HASH_ADAPT - ut_ad(!btr_search_check_marked_free_index(block)); -#endif - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block; - } - - right_page_no = btr_page_get_next(block->page.frame); - - if (right_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - - buf_block_t* b = btr_block_get( - *cursor->index(), right_page_no, RW_X_LATCH, - true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[2] = save; - latch_leaves->blocks[2] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b; - } - } - - return; - - case BTR_SEARCH_PREV: - case BTR_MODIFY_PREV: - ut_ad(!rtr_info); - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - cursor->left_block = btr_block_get( - *cursor->index(), left_page_no, - mode, true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = cursor->left_block; - } - } - - goto latch_block; - case BTR_CONT_MODIFY_TREE: - ut_ad(cursor->index()->is_spatial()); - return; - } - - MY_ASSERT_UNREACHABLE(); -} - /** Load the instant ALTER TABLE metadata from the clustered index when loading a table definition. @param[in,out] index clustered index definition @@ -729,98 +568,6 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) return index->n_core_null_bytes > 128; } -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -TRANSACTIONAL_TARGET -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr) -{ - ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - switch (*latch_mode) { - default: - MY_ASSERT_UNREACHABLE(); - return(false); - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - return(buf_page_optimistic_get(*latch_mode, block, - modify_clock, mtr)); - case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */ - case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */ - uint32_t curr_page_no, left_page_no; - { - transactional_shared_lock_guard g{ - block->page.lock}; - if (block->modify_clock != modify_clock) { - return false; - } - curr_page_no = block->page.id().page_no(); - left_page_no = btr_page_get_prev(block->page.frame); - } - - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - const rw_lock_type_t mode = rw_lock_type_t( - *latch_mode & (RW_X_LATCH | RW_S_LATCH)); - - if (left_page_no != FIL_NULL) { - cursor->left_block = buf_page_get_gen( - page_id_t(cursor->index()->table->space_id, - left_page_no), - cursor->index()->table->space->zip_size(), - mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - - if (cursor->left_block - && btr_page_get_next( - cursor->left_block->page.frame) - != curr_page_no) { -release_left_block: - mtr->release_last_page(); - return false; - } - } else { - cursor->left_block = nullptr; - } - - if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { - if (btr_page_get_prev(block->page.frame) - == left_page_no) { - /* block was already buffer-fixed while - entering the function and - buf_page_optimistic_get() buffer-fixes - it again. */ - ut_ad(2 <= block->page.buf_fix_count()); - *latch_mode = btr_latch_mode(mode); - return(true); - } - - mtr->release_last_page(); - } - - ut_ad(block->page.buf_fix_count()); - if (cursor->left_block) { - goto release_left_block; - } - } - - return false; -} - /** Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. @@ -848,38 +595,6 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode) return(intention); } -/** -Gets the desired latch type for the root leaf (root page is root leaf) -at the latch mode. -@param latch_mode in: BTR_SEARCH_LEAF, ... -@return latch type */ -static -rw_lock_type_t -btr_cur_latch_for_root_leaf( - ulint latch_mode) -{ - switch (latch_mode) { - case BTR_SEARCH_LEAF: - case BTR_SEARCH_TREE: - case BTR_SEARCH_PREV: - return(RW_S_LATCH); - case BTR_MODIFY_LEAF: - case BTR_MODIFY_TREE: - case BTR_MODIFY_PREV: - return(RW_X_LATCH); - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - /* A root page should be latched already, - and don't need to be latched here. - fall through (RW_NO_LATCH) */ - case BTR_NO_LATCHES: - return(RW_NO_LATCH); - } - - MY_ASSERT_UNREACHABLE(); - return(RW_NO_LATCH); /* avoid compiler warnings */ -} - /** @return whether the distance between two records is at most the specified value */ static bool @@ -1197,1223 +912,879 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index) return rec_max_size; } +/** @return a B-tree search mode suitable for non-leaf pages +@param mode leaf page search mode */ +static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode) +{ + if (mode > PAGE_CUR_GE) + { + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); + return mode; + } + if (mode == PAGE_CUR_GE) + return PAGE_CUR_L; + ut_ad(mode == PAGE_CUR_G); + return PAGE_CUR_LE; +} + +dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + + buf_block_t *guess; + btr_op_t btr_op; + btr_intention_t lock_intention; + bool detected_same_key_root= false; + + mem_heap_t* heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets2 = offsets2_; + rec_offs_init(offsets_); + rec_offs_init(offsets2_); + + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index()->page != FIL_NULL); + + MEM_UNDEFINED(&up_match, sizeof up_match); + MEM_UNDEFINED(&up_bytes, sizeof up_bytes); + MEM_UNDEFINED(&low_match, sizeof low_match); + MEM_UNDEFINED(&low_bytes, sizeof low_bytes); + ut_d(up_match= ULINT_UNDEFINED); + ut_d(low_match= ULINT_UNDEFINED); + + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) || + mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK | + MTR_MEMO_X_LOCK)); + + /* These flags are mutually exclusive, they are lumped together + with the latch mode for historical reasons. It's possible for + none of the flags to be set. */ + switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { + default: + btr_op= BTR_NO_OP; + break; + case BTR_INSERT: + btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE) + ? BTR_INSERT_IGNORE_UNIQUE_OP + : BTR_INSERT_OP; + break; + case BTR_DELETE: + btr_op= BTR_DELETE_OP; + ut_a(purge_node); + break; + case BTR_DELETE_MARK: + btr_op= BTR_DELMARK_OP; + break; + } + + /* Operations on the insert buffer tree cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf()); + /* Operations on the clustered index cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_clust()); + /* Operations on the temporary table(indexes) cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary()); + + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + lock_intention= btr_cur_get_and_clear_intention(&latch_mode); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller + || latch_mode == BTR_SEARCH_LEAF + || latch_mode == BTR_MODIFY_LEAF + || latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + + flag= BTR_CUR_BINARY; +#ifndef BTR_CUR_ADAPT + guess= nullptr; +#else + btr_search_t *info= btr_search_get_info(index()); + guess= info->root_guess; + +# ifdef BTR_CUR_HASH_ADAPT +# ifdef UNIV_SEARCH_PERF_STAT + info->n_searches++; +# endif + /* We do a dirty read of btr_search_enabled below, + and btr_search_guess_on_hash() will have to check it again. */ + if (!btr_search_enabled); + else if (btr_search_guess_on_hash(index(), info, tuple, mode, + latch_mode, this, mtr)) + { + /* Search using the hash index succeeded */ + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ++btr_cur_n_sea; + + return DB_SUCCESS; + } + else + ++btr_cur_n_non_sea; +# endif +#endif + + /* If the hash search did not succeed, do binary search down the + tree */ + + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + + const ulint savepoint= mtr->get_savepoint(); + + ulint node_ptr_max_size= 0; + rw_lock_type_t rw_latch= RW_S_LATCH; + + switch (latch_mode) { + case BTR_MODIFY_TREE: + rw_latch= RW_X_LATCH; + node_ptr_max_size= btr_node_ptr_max_size(index()); + if (latch_by_caller) + { + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + break; + } + if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads && + trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) + /* Most delete-intended operations are due to the purge of history. + Prioritize them when the history list is growing huge. */ + mtr_x_lock_index(index(), mtr); + else + mtr_sx_lock_index(index(), mtr); + break; +#ifdef UNIV_DEBUG + case BTR_CONT_MODIFY_TREE: + ut_ad("invalid mode" == 0); + break; +#endif + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_SX_LATCH; + /* fall through */ + default: + if (!latch_by_caller) + mtr_s_lock_index(index(), mtr); + } + + const ulint zip_size= index()->table->space->zip_size(); + + /* Start with the root page. */ + page_id_t page_id(index()->table->space_id, index()->page); + + const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode); + ulint height= ULINT_UNDEFINED; + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint buf_mode= BUF_GET; + search_loop: + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= + buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, + &err, height == 0 && !index()->is_clust()); + if (!block) + { + switch (err) { + case DB_DECRYPTION_FAILED: + btr_decryption_failed(*index()); + /* fall through */ + default: + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + case DB_SUCCESS: + /* This must be a search to perform an insert, delete mark, or delete; + try using the change buffer */ + ut_ad(height == 0); + ut_ad(thr); + break; + } + + switch (btr_op) { + default: + MY_ASSERT_UNREACHABLE(); + break; + case BTR_INSERT_OP: + case BTR_INSERT_IGNORE_UNIQUE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + + if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr)) + { + flag= BTR_CUR_INSERT_TO_IBUF; + goto func_exit; + } + break; + + case BTR_DELMARK_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + + if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, + index(), page_id, zip_size, thr)) + { + flag = BTR_CUR_DEL_MARK_IBUF; + goto func_exit; + } + + break; + + case BTR_DELETE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); + auto& chain = buf_pool.page_hash.cell_get(page_id.fold()); + + if (!row_purge_poss_sec(purge_node, index(), tuple)) + /* The record cannot be purged yet. */ + flag= BTR_CUR_DELETE_REF; + else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(), + page_id, zip_size, thr)) + /* The purge was buffered. */ + flag= BTR_CUR_DELETE_IBUF; + else + { + /* The purge could not be buffered. */ + buf_pool.watch_unset(page_id, chain); + break; + } + + buf_pool.watch_unset(page_id, chain); + goto func_exit; + } + + /* Change buffering did not succeed, we must read the page. */ + buf_mode= BUF_GET; + goto search_loop; + } + + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + ut_ad("corrupted" == 0); // FIXME: remove this + err= DB_CORRUPTION; + goto func_exit; + } + + page_cur.block= block; + ut_ad(block == mtr->at_savepoint(block_savepoint)); +#ifdef UNIV_ZIP_DEBUG + if (rw_latch == RW_NO_LATCH); + else if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index())); +#endif /* UNIV_ZIP_DEBUG */ + const uint32_t page_level= btr_page_get_level(block->page.frame); + + if (height == ULINT_UNDEFINED) + { + /* We are in the B-tree index root page. */ +#ifdef BTR_CUR_ADAPT + info->root_guess= block; +#endif + height= page_level; + tree_height= height + 1; + + if (!height) + { + /* The root page is also a leaf page. + We may have to reacquire the page latch in a different mode. */ + switch (rw_latch) { + case RW_S_LATCH: + if ((latch_mode & ~12) != RW_S_LATCH) + { + ut_ad(rw_lock_type_t(latch_mode & ~12) == RW_X_LATCH); + goto relatch_x; + } + if (latch_mode != BTR_MODIFY_PREV) + { + if (!latch_by_caller) + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + goto reached_latched_leaf; + } + /* fall through */ + case RW_SX_LATCH: + ut_ad(rw_latch == RW_S_LATCH || + latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + relatch_x: + mtr->rollback_to_savepoint(block_savepoint); + height= ULINT_UNDEFINED; + rw_latch= RW_X_LATCH; + goto search_loop; + case RW_X_LATCH: + if (latch_mode == BTR_MODIFY_TREE) + goto reached_index_root_and_leaf; + goto reached_root_and_leaf; + case RW_NO_LATCH: + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + } + goto reached_leaf; + } + } + else if (UNIV_UNLIKELY(height != page_level)) + goto corrupted; + else + switch (latch_mode) { + case BTR_MODIFY_TREE: + break; + case BTR_MODIFY_ROOT_AND_LEAF: + ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() == + index()->page) == (tree_height <= height + 2)); + if (tree_height <= height + 2) + /* Retain the root page latch. */ + break; + goto release_parent_page; + default: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(!height); + break; + } + release_parent_page: + ut_ad(block_savepoint > savepoint); + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } + + if (!height) + { + reached_leaf: + /* We reached the leaf level. */ + ut_ad(block == mtr->at_savepoint(block_savepoint)); + + if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF) + { + reached_root_and_leaf: + if (!latch_by_caller) + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + reached_index_root_and_leaf: + ut_ad(rw_latch == RW_X_LATCH); +#ifdef BTR_CUR_HASH_ADAPT + btr_search_drop_page_hash_index(block, true); +#endif + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + goto func_exit; + } + + switch (latch_mode) { + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + ut_ad(!latch_by_caller); + + if (rw_latch == RW_NO_LATCH) + { + /* latch also siblings from left to right */ + rw_latch= rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, rw_latch); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + } + goto release_tree; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(index()->is_ibuf()); + mtr->upgrade_buffer_fix(block_savepoint, rw_lock_type_t(latch_mode)); + } + if (!latch_by_caller) + { +release_tree: + /* Release the tree s-latch */ + block_savepoint--; + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + break; + default: + ut_ad(latch_mode == BTR_MODIFY_TREE); + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + } + + reached_latched_leaf: +#ifdef BTR_CUR_HASH_ADAPT + if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)) + { + if (page_cur_search_with_match_bytes(tuple, mode, + &up_match, &up_bytes, + &low_match, &low_bytes, &page_cur)) + goto corrupted; + } + else +#endif /* BTR_CUR_HASH_ADAPT */ + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ + + goto func_exit; + } + + guess= nullptr; + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); + + ut_ad(block == mtr->at_savepoint(block_savepoint)); + + switch (latch_mode) { + default: + break; + case BTR_MODIFY_TREE: + if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + page_cur.rec)) + /* If the rec is the first or last in the page for pessimistic + delete intention, it might cause node_ptr insert for the upper + level. We should change the intention and retry. */ + need_opposite_intention: + return pessimistic_search_leaf(tuple, mode, mtr); + + if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH || + index()->is_unique() || + (up_match <= rec_offs_n_fields(offsets) && + low_match <= rec_offs_n_fields(offsets))) + break; + + /* If the first or the last record of the page or the same key + value to the first record or last record, then another page might + be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should + not released to avoiding deadlock with blocking the another search + with the same key value. */ + const rec_t *first= + page_rec_get_next_const(page_get_infimum_rec(block->page.frame)); + ulint matched_fields; + + if (UNIV_UNLIKELY(!first)) + goto corrupted; + if (page_cur.rec == first || + page_rec_is_last(page_cur.rec, block->page.frame)) + { + same_key_root: + detected_same_key_root= true; + break; + } + + matched_fields= 0; + offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + if (const rec_t* last= + page_rec_get_prev_const(page_get_supremum_rec(block->page.frame))) + { + matched_fields= 0; + offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + } + else + goto corrupted; + + /* Release the non-root parent page unless it may need to be modified. */ + if (tree_height > height + 1 && + !btr_cur_will_modify_tree(index(), block->page.frame, lock_intention, + page_cur.rec, node_ptr_max_size, + zip_size, mtr)) + { + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } + } + + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + + if (!--height) + { + /* We are about to access the leaf level. */ + + switch (latch_mode) { + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_X_LATCH; + break; + case BTR_MODIFY_PREV: /* ibuf_insert() or btr_pcur_move_to_prev() */ + case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */ + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); + + if (page_has_prev(block->page.frame) && + page_rec_is_first(page_cur.rec, block->page.frame)) + { + ut_ad(block_savepoint + 1 == mtr->get_savepoint()); + /* Latch the previous page if the node pointer is the leftmost + of the current page. */ + buf_block_t *left= btr_block_get(*index(), + btr_page_get_prev(block->page.frame), + RW_NO_LATCH, false, mtr, &err); + if (UNIV_UNLIKELY(!left)) + goto func_exit; + ut_ad(block_savepoint + 2 == mtr->get_savepoint()); + if (UNIV_LIKELY(left->page.lock.s_lock_try())) + mtr->lock_register(block_savepoint + 1, MTR_MEMO_PAGE_S_FIX); + else + { + if (rw_latch == RW_S_LATCH) + block->page.lock.s_unlock(); + else + block->page.lock.x_unlock(); + mtr->upgrade_buffer_fix(block_savepoint + 1, RW_S_LATCH); + mtr->lock_register(block_savepoint, MTR_MEMO_BUF_FIX); + mtr->upgrade_buffer_fix(block_savepoint, RW_S_LATCH); + /* While our latch on the level-2 page prevents splits or + merges of this level-1 block, other threads may have + modified it due to splitting or merging some level-0 (leaf) + pages underneath it. Thus, we must search again. */ + if (page_cur_search_with_match(tuple, page_mode, + &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, + ULINT_UNDEFINED, &heap); + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, + offsets)); + } + } + goto leaf_with_no_latch; + case BTR_MODIFY_LEAF: + case BTR_SEARCH_LEAF: + if (index()->is_ibuf()) + goto leaf_with_no_latch; + rw_latch= rw_lock_type_t(latch_mode); + if (btr_op != BTR_NO_OP && + ibuf_should_try(index(), btr_op != BTR_INSERT_OP)) + /* Try to buffer the operation if the leaf page + is not in the buffer pool. */ + buf_mode= btr_op == BTR_DELETE_OP + ? BUF_GET_IF_IN_POOL_OR_WATCH + : BUF_GET_IF_IN_POOL; + break; + case BTR_MODIFY_TREE: + ut_ad(rw_latch == RW_X_LATCH); + + if (lock_intention == BTR_INTENTION_INSERT && + page_has_next(block->page.frame) && + page_rec_is_last(page_cur.rec, block->page.frame)) + { + /* btr_insert_into_right_sibling() might cause deleting node_ptr + at upper level */ + mtr->rollback_to_savepoint(block_savepoint); + goto need_opposite_intention; + } + /* fall through */ + default: + leaf_with_no_latch: + rw_latch= RW_NO_LATCH; + } + } + + goto search_loop; +} + +ATTRIBUTE_COLD +dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(flag == BTR_CUR_BINARY); + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + buf_block_t *block= mtr->at_savepoint(1); + ut_ad(block->page.id().page_no() == index()->page); + block->page.fix(); + mtr->rollback_to_savepoint(1); + ut_ad(mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); + + const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)}; + + mtr->page_lock(block, RW_X_LATCH); + + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint height= btr_page_get_level(block->page.frame); + tree_height= height + 1; + mem_heap_t *heap= nullptr; + + search_loop: + dberr_t err; + page_cur.block= block; + + if (UNIV_UNLIKELY(!height)) + { + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + corrupted: + err= DB_CORRUPTION; + else + { + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ + err= DB_SUCCESS; + } + + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } + + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + + page_id_t page_id{block->page.id()}; + + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + + const auto block_savepoint= mtr->get_savepoint(); + block= + buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET, + mtr, &err, !--height && !index()->is_clust()); + + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index()); + goto func_exit; + } + + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + goto corrupted; + + if (height != btr_page_get_level(block->page.frame)) + goto corrupted; + + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); +#ifdef UNIV_ZIP_DEBUG + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index())); +#endif /* UNIV_ZIP_DEBUG */ + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + goto search_loop; +} + /********************************************************************//** -Searches an index tree and positions a tree cursor on a given level. +Searches an index tree and positions a tree cursor on a given non-leaf level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. - -If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the +Cursor is left at the place where an insert of the search tuple should be performed in the B-tree. InnoDB does an insert immediately after the cursor. Thus, the cursor may end up on a user record, or on a page infimum record. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page +@param latch RW_S_LATCH or RW_X_LATCH @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ TRANSACTIONAL_TARGET dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc) + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr) { - page_t* page = NULL; /* remove warning */ - buf_block_t* block; - buf_block_t* guess; - ulint height; - ulint up_match; - ulint up_bytes; - ulint low_match; - ulint low_bytes; - ulint rw_latch; - page_cur_mode_t page_mode; - page_cur_mode_t search_mode = PAGE_CUR_UNSUPP; - ulint buf_mode; - ulint node_ptr_max_size = srv_page_size / 2; - page_cur_t* page_cursor; - btr_op_t btr_op; - ulint root_height = 0; /* remove warning */ + dict_index_t *const index= cursor->index(); - btr_intention_t lock_intention; - buf_block_t* tree_blocks[BTR_MAX_LEVELS]; - ulint tree_savepoints[BTR_MAX_LEVELS]; - ulint n_blocks = 0; - ulint n_releases = 0; - bool detected_same_key_root = false; + ut_ad(index->is_btree() || index->is_ibuf()); + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree()); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->page != FIL_NULL); - ulint leftmost_from_level = 0; - buf_block_t** prev_tree_blocks = NULL; - ulint* prev_tree_savepoints = NULL; - ulint prev_n_blocks = 0; - ulint prev_n_releases = 0; - bool need_path = true; - bool rtree_parent_modified = false; - bool mbr_adj = false; - bool found = false; - dict_index_t * const index = cursor->index(); - - DBUG_ENTER("btr_cur_search_to_nth_level"); - -#ifdef BTR_CUR_ADAPT - btr_search_t* info; -#endif /* BTR_CUR_ADAPT */ - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets2 = offsets2_; - rec_offs_init(offsets_); - rec_offs_init(offsets2_); - /* Currently, PAGE_CUR_LE is the only search mode used for searches - ending to upper levels */ - - ut_ad(level == 0 || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(!(index->type & DICT_FTS)); - ut_ad(index->page != FIL_NULL); - - MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match); - MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); - MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match); - MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); -#ifdef UNIV_DEBUG - cursor->up_match = ULINT_UNDEFINED; - cursor->low_match = ULINT_UNDEFINED; -#endif /* UNIV_DEBUG */ - - const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; - - ut_ad(!latch_by_caller - || srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_SX_LOCK)); - - /* These flags are mutually exclusive, they are lumped together - with the latch mode for historical reasons. It's possible for - none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { - default: - btr_op = BTR_NO_OP; - break; - case BTR_INSERT: - btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE) - ? BTR_INSERT_IGNORE_UNIQUE_OP - : BTR_INSERT_OP; - break; - case BTR_DELETE: - btr_op = BTR_DELETE_OP; - ut_a(cursor->purge_node); - break; - case BTR_DELETE_MARK: - btr_op = BTR_DELMARK_OP; - break; - } - - /* Operations on the insert buffer tree cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index)); - /* Operations on the clustered index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index)); - /* Operations on the temporary table(indexes) cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary()); - /* Operation on the spatial index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index)); - - lock_intention = btr_cur_get_and_clear_intention(&latch_mode); - - /* Turn the flags unrelated to the latch mode off. */ - latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - - ut_ad(!latch_by_caller - || latch_mode == BTR_SEARCH_LEAF - || latch_mode == BTR_SEARCH_TREE - || latch_mode == BTR_MODIFY_LEAF); - - ut_ad(autoinc == 0 || dict_index_is_clust(index)); - ut_ad(autoinc == 0 - || latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF); - ut_ad(autoinc == 0 || level == 0); - - cursor->flag = BTR_CUR_BINARY; + MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); + MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); + cursor->up_match= 0; + cursor->low_match= 0; + cursor->flag= BTR_CUR_BINARY; #ifndef BTR_CUR_ADAPT - guess = NULL; + buf_block_t *block= nullptr; #else - info = btr_search_get_info(index); - guess = info->root_guess; - -#ifdef BTR_CUR_HASH_ADAPT - -# ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; -# endif - /* We do a dirty read of btr_search_enabled below, - and btr_search_guess_on_hash() will have to check it again. */ - if (!btr_search_enabled) { - } else if (autoinc == 0 - && latch_mode <= BTR_MODIFY_LEAF -# ifdef PAGE_CUR_LE_OR_EXTENDS - && mode != PAGE_CUR_LE_OR_EXTENDS -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - && info->last_hash_succ - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && !index->is_spatial() && !index->table->is_temporary() - && btr_search_guess_on_hash(index, info, tuple, mode, - latch_mode, cursor, mtr)) { - - /* Search using the hash index succeeded */ - - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ++btr_cur_n_sea; - - DBUG_RETURN(DB_SUCCESS); - } else { - ++btr_cur_n_non_sea; - } -# endif /* BTR_CUR_HASH_ADAPT */ + btr_search_t *info= btr_search_get_info(index); + buf_block_t *block= info->root_guess; #endif /* BTR_CUR_ADAPT */ - /* If the hash search did not succeed, do binary search down the - tree */ + ut_ad(mtr->memo_contains_flagged(&index->lock, + MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ + const ulint zip_size= index->table->space->zip_size(); - ulint savepoint = mtr_set_savepoint(mtr); - - rw_lock_type_t upper_rw_latch; - - switch (latch_mode) { - case BTR_MODIFY_TREE: - /* Most of delete-intended operations are purging. - Free blocks and read IO bandwidth should be prior - for them, when the history list is glowing huge. */ - if (lock_intention == BTR_INTENTION_DELETE - && buf_pool.n_pend_reads - && trx_sys.history_size_approx() - > BTR_CUR_FINE_HISTORY_LENGTH) { -x_latch_index: - mtr_x_lock_index(index, mtr); - } else if (index->is_spatial() - && lock_intention <= BTR_INTENTION_BOTH) { - /* X lock the if there is possibility of - pessimistic delete on spatial index. As we could - lock upward for the tree */ - goto x_latch_index; - } else { - mtr_sx_lock_index(index, mtr); - } - upper_rw_latch = RW_X_LATCH; - break; - case BTR_CONT_MODIFY_TREE: - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - if (index->is_spatial()) { - /* If we are about to locate parent page for split - and/or merge operation for R-Tree index, X latch - the parent */ - upper_rw_latch = RW_X_LATCH; - break; - } - /* fall through */ - case BTR_CONT_SEARCH_TREE: - /* Do nothing */ - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - upper_rw_latch = RW_NO_LATCH; - break; - default: - if (!srv_read_only_mode) { - if (!latch_by_caller) { - ut_ad(latch_mode != BTR_SEARCH_TREE); - mtr_s_lock_index(index, mtr); - } - upper_rw_latch = RW_S_LATCH; - } else { - upper_rw_latch = RW_NO_LATCH; - } - } - const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf( - latch_mode); - - page_cursor = btr_cur_get_page_cur(cursor); - page_cursor->index = index; - - const ulint zip_size = index->table->space->zip_size(); - - /* Start with the root page. */ - page_id_t page_id(index->table->space_id, index->page); - - if (root_leaf_rw_latch == RW_X_LATCH) { - node_ptr_max_size = btr_node_ptr_max_size(index); - } - - up_match = 0; - up_bytes = 0; - low_match = 0; - low_bytes = 0; - - height = ULINT_UNDEFINED; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode = PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode = PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode) - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode = mode; - break; - } - - /* Loop and search until we arrive at the desired level */ - btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + ulint height= ULINT_UNDEFINED; search_loop: - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - rtree_parent_modified = false; - - if (height != 0) { - /* We are about to fetch the root or a non-leaf page. */ - if ((latch_mode != BTR_MODIFY_TREE || height == level) - && !prev_tree_blocks) { - /* If doesn't have SX or X latch of index, - each pages should be latched before reading. */ - if (height == ULINT_UNDEFINED - && upper_rw_latch == RW_S_LATCH - && autoinc) { - /* needs sx-latch of root page - for writing PAGE_ROOT_AUTO_INC */ - rw_latch = RW_SX_LATCH; - } else { - rw_latch = upper_rw_latch; - } - } - } else if (latch_mode <= BTR_MODIFY_LEAF) { - rw_latch = latch_mode; - - if (btr_op != BTR_NO_OP - && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) { - - /* Try to buffer the operation if the leaf - page is not in the buffer pool. */ - - buf_mode = btr_op == BTR_DELETE_OP - ? BUF_GET_IF_IN_POOL_OR_WATCH - : BUF_GET_IF_IN_POOL; - } - } - -retry_page_get: - ut_ad(n_blocks < BTR_MAX_LEVELS); - tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); - dberr_t err; - block = buf_page_get_gen(page_id, zip_size, rw_latch, guess, - buf_mode, mtr, &err, - height == 0 && !index->is_clust()); - if (!block) { - switch (err) { - case DB_SUCCESS: - /* change buffering */ - break; - case DB_DECRYPTION_FAILED: - btr_decryption_failed(*index); - /* fall through */ - default: - goto func_exit; - } - - /* This must be a search to perform an insert/delete - mark/ delete; try using the insert/delete buffer */ - - ut_ad(height == 0); - ut_ad(cursor->thr); - - switch (btr_op) { - default: - MY_ASSERT_UNREACHABLE(); - break; - case BTR_INSERT_OP: - case BTR_INSERT_IGNORE_UNIQUE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - ut_ad(!dict_index_is_spatial(index)); - - if (ibuf_insert(IBUF_OP_INSERT, tuple, index, - page_id, zip_size, cursor->thr)) { - - cursor->flag = BTR_CUR_INSERT_TO_IBUF; - - goto func_exit; - } - break; - - case BTR_DELMARK_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - ut_ad(!dict_index_is_spatial(index)); - - if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, - index, page_id, zip_size, - cursor->thr)) { - - cursor->flag = BTR_CUR_DEL_MARK_IBUF; - - goto func_exit; - } - - break; - - case BTR_DELETE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); - ut_ad(!dict_index_is_spatial(index)); - auto& chain = buf_pool.page_hash.cell_get( - page_id.fold()); - - if (!row_purge_poss_sec(cursor->purge_node, - index, tuple)) { - - /* The record cannot be purged yet. */ - cursor->flag = BTR_CUR_DELETE_REF; - } else if (ibuf_insert(IBUF_OP_DELETE, tuple, - index, page_id, zip_size, - cursor->thr)) { - - /* The purge was buffered. */ - cursor->flag = BTR_CUR_DELETE_IBUF; - } else { - /* The purge could not be buffered. */ - buf_pool.watch_unset(page_id, chain); - break; - } - - buf_pool.watch_unset(page_id, chain); - goto func_exit; - } - - /* Insert to the insert/delete buffer did not succeed, we - must read the page from disk. */ - - buf_mode = BUF_GET; - - goto retry_page_get; - } - - tree_blocks[n_blocks] = block; - - if (height && prev_tree_blocks) { - /* also latch left sibling */ - ut_ad(rw_latch == RW_NO_LATCH); - - rw_latch = upper_rw_latch; - - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - uint32_t left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - ut_ad(prev_n_blocks < leftmost_from_level); - - prev_tree_savepoints[prev_n_blocks] - = mtr_set_savepoint(mtr); - buf_block_t* get_block = buf_page_get_gen( - page_id_t(page_id.space(), left_page_no), - zip_size, rw_latch, NULL, buf_mode, - mtr, &err); - if (!get_block) { - if (err == DB_DECRYPTION_FAILED) { - btr_decryption_failed(*index); - } - goto func_exit; - } - - prev_tree_blocks[prev_n_blocks++] = get_block; - /* BTR_MODIFY_TREE doesn't update prev/next_page_no, - without their parent page's lock. So, not needed to - retry here, because we have the parent page's lock. */ - } - - mtr->s_lock_register(tree_savepoints[n_blocks]); - block->page.lock.s_lock(); - } - - page = buf_block_get_frame(block); - - if (height == ULINT_UNDEFINED - && page_is_leaf(page) - && rw_latch != RW_NO_LATCH - && rw_latch != root_leaf_rw_latch) { - /* The root page is also a leaf page (root_leaf). - We should reacquire the page, because the root page - is latched differently from leaf pages. */ - ut_ad(root_leaf_rw_latch != RW_NO_LATCH); - ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); - ut_ad(rw_latch == RW_S_LATCH || autoinc); - ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH); - - ut_ad(n_blocks == 0); - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_blocks], - tree_blocks[n_blocks]); - - upper_rw_latch = root_leaf_rw_latch; - goto search_loop; - } + dberr_t err= DB_SUCCESS; + if (buf_block_t *b= + mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch))) + block= b; + else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch, + block, BUF_GET, mtr, &err))) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + goto func_exit; + } #ifdef UNIV_ZIP_DEBUG - if (rw_latch != RW_NO_LATCH) { - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); - } + if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ - ut_ad(fil_page_index_page_check(page)); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page); - root_height = height; - cursor->tree_height = root_height + 1; - - if (dict_index_is_spatial(index)) { - ut_ad(cursor->rtr_info); - - /* If SSN in memory is not initialized, fetch - it from root page */ - if (!rtr_get_current_ssn_id(index)) { - /* FIXME: do this in dict_load_table_one() */ - index->set_ssn(page_get_ssn_id(page) + 1); - } - - /* Save the MBR */ - cursor->rtr_info->thr = cursor->thr; - rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr); - } - -#ifdef BTR_CUR_ADAPT - info->root_guess = block; -#endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) { - btr_cur_latch_leaves(block, latch_mode, cursor, mtr, - &latch_leaves); - } - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: - if (!latch_by_caller - && !srv_read_only_mode) { - /* Release the tree s-latch */ - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - &index->lock); - } - - /* release upper blocks */ - if (prev_tree_blocks) { - ut_ad(!autoinc); - for (; - prev_n_releases < prev_n_blocks; - prev_n_releases++) { - mtr_release_block_at_savepoint( - mtr, - prev_tree_savepoints[ - prev_n_releases], - prev_tree_blocks[ - prev_n_releases]); - } - } - - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0 - && (autoinc)) { - /* keep the root page latch */ - ut_ad(mtr->memo_contains_flagged( - tree_blocks[n_releases], - MTR_MEMO_PAGE_SX_FIX - | MTR_MEMO_PAGE_X_FIX)); - continue; - } - - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - page_mode = mode; - } - - if (dict_index_is_spatial(index)) { - /* Remember the page search mode */ - search_mode = page_mode; - - /* Some adjustment on search mode, when the - page search mode is PAGE_CUR_RTREE_LOCATE - or PAGE_CUR_RTREE_INSERT, as we are searching - with MBRs. When it is not the target level, we - should search all sub-trees that "CONTAIN" the - search range/MBR. When it is at the target - level, the search becomes PAGE_CUR_LE */ - if (page_mode == PAGE_CUR_RTREE_LOCATE - && level == height) { - if (level == 0) { - page_mode = PAGE_CUR_LE; - } else { - page_mode = PAGE_CUR_RTREE_GET_FATHER; - } - } - - if (page_mode == PAGE_CUR_RTREE_INSERT) { - page_mode = (level == height) - ? PAGE_CUR_LE - : PAGE_CUR_RTREE_INSERT; - - ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); - } - - /* "need_path" indicates if we need to tracking the parent - pages, if it is not spatial comparison, then no need to - track it */ - if (page_mode < PAGE_CUR_CONTAIN) { - need_path = false; - } - - up_match = 0; - low_match = 0; - - if (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE) { - /* Tree are locked, no need for Page Lock to protect - the "path" */ - cursor->rtr_info->need_page_lock = false; - } - } - - page_cursor->block = block; - - if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) { - ut_ad(need_path); - found = rtr_cur_search_with_match( - block, index, tuple, page_mode, page_cursor, - cursor->rtr_info); - - /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ - if (search_mode == PAGE_CUR_RTREE_INSERT - && cursor->rtr_info->mbr_adj) { - static_assert(BTR_MODIFY_TREE - == (8 | BTR_MODIFY_LEAF), ""); - - if (!(latch_mode & 8)) { - /* Parent MBR needs updated, should retry - with BTR_MODIFY_TREE */ - goto func_exit; - } - - rtree_parent_modified = true; - cursor->rtr_info->mbr_adj = false; - mbr_adj = true; - } - - if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) { - cursor->low_match = - DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; - } -#ifdef BTR_CUR_HASH_ADAPT - } else if (height == 0 && btr_search_enabled - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && index->is_btree()) { - /* The adaptive hash index is only used when searching - for leaf pages (height==0), but not in r-trees. - We only need the byte prefix comparison for the purpose - of updating the adaptive hash index. */ - if (page_cur_search_with_match_bytes( - tuple, page_mode, &up_match, &up_bytes, - &low_match, &low_bytes, page_cursor)) { - err = DB_CORRUPTION; - goto func_exit; - } -#endif /* BTR_CUR_HASH_ADAPT */ - } else { - /* Search for complete index fields. */ - up_bytes = low_bytes = 0; - if (page_cur_search_with_match( - tuple, page_mode, &up_match, - &low_match, page_cursor, - need_path ? cursor->rtr_info : nullptr)) { - err = DB_CORRUPTION; - goto func_exit; - } - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor))); - - /* Add Predicate lock if it is serializable isolation - and only if it is in the search case */ - if (dict_index_is_spatial(index) - && cursor->rtr_info->need_prdt_lock - && mode != PAGE_CUR_RTREE_INSERT - && mode != PAGE_CUR_RTREE_LOCATE - && mode >= PAGE_CUR_CONTAIN) { - lock_prdt_t prdt; - - { - trx_t* trx = thr_get_trx(cursor->thr); - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr( - &prdt, &cursor->rtr_info->mbr, mode, - trx->lock.lock_heap); - } - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_lock(); - } - - lock_prdt_lock(block, &prdt, index, LOCK_S, - LOCK_PREDICATE, cursor->thr); - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_unlock(); - } - } - - if (level != height) { - - const rec_t* node_ptr; - ut_ad(height > 0); - - height--; - guess = NULL; - - node_ptr = page_cur_get_rec(page_cursor); - - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - - /* If the rec is the first or last in the page for - pessimistic delete intention, it might cause node_ptr insert - for the upper level. We should change the intention and retry. - */ - if (latch_mode == BTR_MODIFY_TREE - && btr_cur_need_opposite_intention( - page, lock_intention, node_ptr)) { - -need_opposite_intention: - ut_ad(upper_rw_latch == RW_X_LATCH); - - if (n_releases > 0) { - /* release root block */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[0], - tree_blocks[0]); - } - - /* release all blocks */ - for (; n_releases <= n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - - lock_intention = BTR_INTENTION_BOTH; - - page_id.set_page_no(index->page); - up_match = 0; - low_match = 0; - height = ULINT_UNDEFINED; - - n_blocks = 0; - n_releases = 0; - - goto search_loop; - } - - if (dict_index_is_spatial(index)) { - if (page_rec_is_supremum(node_ptr)) { - cursor->low_match = 0; - cursor->up_match = 0; - goto func_exit; - } - - /* If we are doing insertion or record locating, - remember the tree nodes we visited */ - if (page_mode == PAGE_CUR_RTREE_INSERT - || (search_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF))) { - bool add_latch = false; - - if (latch_mode == BTR_MODIFY_TREE - && rw_latch == RW_NO_LATCH) { - ut_ad(mtr->memo_contains_flagged( - &index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - block->page.lock.s_lock(); - add_latch = true; - } - - /* Store the parent cursor location */ -#ifdef UNIV_DEBUG - ulint num_stored = rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#else - rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#endif - - if (page_mode == PAGE_CUR_RTREE_INSERT) { - btr_pcur_t* r_cursor = - rtr_get_parent_cursor( - cursor, height + 1, - true); - /* If it is insertion, there should - be only one parent for each level - traverse */ -#ifdef UNIV_DEBUG - ut_ad(num_stored == 1); -#endif - - node_ptr = btr_pcur_get_rec(r_cursor); - - } - - if (add_latch) { - block->page.lock.s_unlock(); - } - - ut_ad(!page_rec_is_supremum(node_ptr)); - } - - ut_ad(page_mode == search_mode - || (page_mode == PAGE_CUR_WITHIN - && search_mode == PAGE_CUR_RTREE_LOCATE)); - - page_mode = search_mode; - } - - /* If the first or the last record of the page - or the same key value to the first record or last record, - the another page might be chosen when BTR_CONT_MODIFY_TREE. - So, the parent page should not released to avoiding deadlock - with blocking the another search with the same key value. */ - if (!detected_same_key_root - && lock_intention == BTR_INTENTION_BOTH - && !dict_index_is_unique(index) - && latch_mode == BTR_MODIFY_TREE - && (up_match >= rec_offs_n_fields(offsets) - 1 - || low_match >= rec_offs_n_fields(offsets) - 1)) { - const rec_t* first_rec = page_rec_get_next_const( - page_get_infimum_rec(page)); - ulint matched_fields; - - ut_ad(upper_rw_latch == RW_X_LATCH); - - if (UNIV_UNLIKELY(!first_rec)) { - corrupted: - err = DB_CORRUPTION; - goto func_exit; - } - if (node_ptr == first_rec - || page_rec_is_last(node_ptr, page)) { - detected_same_key_root = true; - } else { - matched_fields = 0; - - offsets2 = rec_get_offsets( - first_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec(node_ptr, first_rec, - offsets, offsets2, index, false, - &matched_fields); - - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } else if (const rec_t* last_rec - = page_rec_get_prev_const( - page_get_supremum_rec( - page))) { - matched_fields = 0; - - offsets2 = rec_get_offsets( - last_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec( - node_ptr, last_rec, - offsets, offsets2, index, - false, &matched_fields); - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } - } else { - goto corrupted; - } - } - } - - /* If the page might cause modify_tree, - we should not release the parent page's lock. */ - if (!detected_same_key_root - && latch_mode == BTR_MODIFY_TREE - && !btr_cur_will_modify_tree( - index, page, lock_intention, node_ptr, - node_ptr_max_size, zip_size, mtr) - && !rtree_parent_modified) { - ut_ad(upper_rw_latch == RW_X_LATCH); - ut_ad(n_releases <= n_blocks); - - /* we can release upper blocks */ - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0) { - /* we should not release root page - to pin to same block. */ - continue; - } - - /* release unused blocks to unpin */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - if (height == level - && latch_mode == BTR_MODIFY_TREE) { - ut_ad(upper_rw_latch == RW_X_LATCH); - /* we should sx-latch root page, if released already. - It contains seg_header. */ - if (n_releases > 0) { - mtr->sx_latch_at_savepoint( - tree_savepoints[0], - tree_blocks[0]); - } - - /* x-latch the branch blocks not released yet. */ - for (ulint i = n_releases; i <= n_blocks; i++) { - mtr->x_latch_at_savepoint( - tree_savepoints[i], - tree_blocks[i]); - } - } - - /* We should consider prev_page of parent page, if the node_ptr - is the leftmost of the page. because BTR_SEARCH_PREV and - BTR_MODIFY_PREV latches prev_page of the leaf page. */ - if ((latch_mode == BTR_SEARCH_PREV - || latch_mode == BTR_MODIFY_PREV) - && !prev_tree_blocks) { - /* block should be latched for consistent - btr_page_get_prev() */ - ut_ad(mtr->memo_contains_flagged( - block, MTR_MEMO_PAGE_S_FIX - | MTR_MEMO_PAGE_X_FIX)); - - if (page_has_prev(page) - && page_rec_is_first(node_ptr, page)) { - - if (leftmost_from_level == 0) { - leftmost_from_level = height + 1; - } - } else { - leftmost_from_level = 0; - } - - if (height == 0 && leftmost_from_level > 0) { - /* should retry to get also prev_page - from level==leftmost_from_level. */ - prev_tree_blocks = static_cast( - ut_malloc_nokey(sizeof(buf_block_t*) - * leftmost_from_level)); - - prev_tree_savepoints = static_cast( - ut_malloc_nokey(sizeof(ulint) - * leftmost_from_level)); - - /* back to the level (leftmost_from_level+1) */ - ulint idx = n_blocks - - (leftmost_from_level - 1); - - page_id.set_page_no( - tree_blocks[idx]->page.id().page_no()); - - for (ulint i = n_blocks - - (leftmost_from_level - 1); - i <= n_blocks; i++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[i], - tree_blocks[i]); - } - - n_blocks -= (leftmost_from_level - 1); - height = leftmost_from_level; - ut_ad(n_releases == 0); - - /* replay up_match, low_match */ - up_match = 0; - low_match = 0; - rtr_info_t* rtr_info = need_path - ? cursor->rtr_info : NULL; - - for (ulint i = 0; i < n_blocks; i++) { - page_cursor->block = tree_blocks[i]; - if (page_cur_search_with_match( - tuple, - page_mode, &up_match, - &low_match, page_cursor, - rtr_info)) { - err = DB_CORRUPTION; - goto func_exit; - } - } - - goto search_loop; - } - } - - /* Go to the child node */ - page_id.set_page_no( - btr_node_ptr_get_child_page_no(node_ptr, offsets)); - - n_blocks++; - - if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) { - /* We're doing a search on an ibuf tree and we're one - level above the leaf page. */ - - ut_ad(level == 0); - - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - goto retry_page_get; - } - - if (dict_index_is_spatial(index) - && page_mode >= PAGE_CUR_CONTAIN - && page_mode != PAGE_CUR_RTREE_INSERT) { - ut_ad(need_path); - rtr_node_path_t* path = - cursor->rtr_info->path; - - if (!path->empty() && found) { - ut_ad(path->back().page_no - == page_id.page_no()); - path->pop_back(); -#ifdef UNIV_DEBUG - if (page_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF)) { - btr_pcur_t* cur - = cursor->rtr_info->parent_path->back( - ).cursor; - rec_t* my_node_ptr - = btr_pcur_get_rec(cur); - - offsets = rec_get_offsets( - my_node_ptr, index, offsets, - 0, ULINT_UNDEFINED, &heap); - - ulint my_page_no - = btr_node_ptr_get_child_page_no( - my_node_ptr, offsets); - - ut_ad(page_id.page_no() == my_page_no); - } -#endif - } - } - - goto search_loop; - } else if (!dict_index_is_spatial(index) - && latch_mode == BTR_MODIFY_TREE - && lock_intention == BTR_INTENTION_INSERT - && page_has_next(page) - && page_rec_is_last(page_cur_get_rec(page_cursor), page)) { - - /* btr_insert_into_right_sibling() might cause - deleting node_ptr at upper level */ - - guess = NULL; - - if (height == 0) { - /* release the leaf pages if latched */ - for (uint i = 0; i < 3; i++) { - if (latch_leaves.blocks[i] != NULL) { - mtr_release_block_at_savepoint( - mtr, latch_leaves.savepoints[i], - latch_leaves.blocks[i]); - latch_leaves.blocks[i] = NULL; - } - } - } - - goto need_opposite_intention; - } - - if (level != 0) { - ut_ad(!autoinc); - - if (upper_rw_latch == RW_NO_LATCH) { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - btr_block_get( - *index, page_id.page_no(), - latch_mode == BTR_CONT_MODIFY_TREE - ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err); - } else { - ut_ad(mtr->memo_contains_flagged(block, - upper_rw_latch)); - - if (latch_by_caller) { - ut_ad(latch_mode == BTR_SEARCH_TREE); - /* to exclude modifying tree operations - should sx-latch the index. */ - ut_ad(mtr->memo_contains(index->lock, - MTR_MEMO_SX_LOCK)); - /* because has sx-latch of index, - can release upper blocks. */ - for (; n_releases < n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, - tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - } - - if (page_mode <= PAGE_CUR_LE) { - cursor->low_match = low_match; - cursor->up_match = up_match; - } - } else { - cursor->low_match = low_match; - cursor->low_bytes = low_bytes; - cursor->up_match = up_match; - cursor->up_bytes = up_bytes; - - if (autoinc) { - page_set_autoinc(tree_blocks[0], autoinc, mtr, false); - } - -#ifdef BTR_CUR_HASH_ADAPT - /* We do a dirty read of btr_search_enabled here. We - will properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a - page hash index, while holding search latch. */ - if (!btr_search_enabled) { - } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) { - /* This may be a search tuple for - btr_pcur_t::restore_position(). */ - ut_ad(tuple->is_metadata() - || (tuple->is_metadata(tuple->info_bits - ^ REC_STATUS_INSTANT))); - } else if (index->is_spatial()) { - } else if (index->table->is_temporary()) { - } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { - /* Only user records belong in the adaptive - hash index. */ - } else { - btr_search_info_update(index, cursor); - } -#endif /* BTR_CUR_HASH_ADAPT */ - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - } - - /* For spatial index, remember what blocks are still latched */ - if (dict_index_is_spatial(index) - && (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF)) { - for (ulint i = 0; i < n_releases; i++) { - cursor->rtr_info->tree_blocks[i] = NULL; - cursor->rtr_info->tree_savepoints[i] = 0; - } - - for (ulint i = n_releases; i <= n_blocks; i++) { - cursor->rtr_info->tree_blocks[i] = tree_blocks[i]; - cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i]; - } - } - -func_exit: - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - ut_free(prev_tree_blocks); - ut_free(prev_tree_savepoints); - - if (mbr_adj) { - /* remember that we will need to adjust parent MBR */ - cursor->rtr_info->mbr_adj = true; - } - - DBUG_RETURN(err); + if (!!page_is_comp(block->page.frame) != index->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + err= DB_CORRUPTION; + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } + + const uint32_t page_level= btr_page_get_level(block->page.frame); + + if (height == ULINT_UNDEFINED) + { + /* We are in the root node */ + height= page_level; + if (!height) + goto corrupted; + cursor->tree_height= height + 1; + } + else if (height != ulint{page_level}) + goto corrupted; + + cursor->page_cur.block= block; + + /* Search for complete index fields. */ + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match, + &cursor->low_match, &cursor->page_cur, + nullptr)) + goto corrupted; + + /* If this is the desired level, leave the loop */ + if (level == height) + goto func_exit; + + ut_ad(height > level); + height--; + + offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec, + offsets)); + block= nullptr; + goto search_loop; } dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr) { - ulint node_ptr_max_size= srv_page_size / 2; btr_intention_t lock_intention; ulint n_blocks= 0; mem_heap_t *heap= nullptr; @@ -2424,29 +1795,21 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, rec_offs_init(offsets_); const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); + latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); lock_intention= btr_cur_get_and_clear_intention(&latch_mode); - /* This function doesn't need to lock left page of the leaf page */ - if (latch_mode == BTR_SEARCH_PREV) - latch_mode= BTR_SEARCH_LEAF; - else if (latch_mode == BTR_MODIFY_PREV) - latch_mode= BTR_MODIFY_LEAF; - /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ auto savepoint= mtr->get_savepoint(); rw_lock_type_t upper_rw_latch= RW_X_LATCH; + ulint node_ptr_max_size= 0; - switch (latch_mode) { - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - abort(); - break; - case BTR_MODIFY_TREE: + if (latch_mode == BTR_MODIFY_TREE) + { + node_ptr_max_size= btr_node_ptr_max_size(index); /* Most of delete-intended operations are purging. Free blocks and read IO bandwidth should be prioritized for them, when the history list is growing huge. */ @@ -2457,32 +1820,35 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, mtr_x_lock_index(index, mtr); else mtr_sx_lock_index(index, mtr); - break; - default: + } + else + { + static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), ""); + ut_ad(!(latch_mode & 8)); + /* This function doesn't need to lock left page of the leaf page */ + static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), ""); + static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), ""); + latch_mode= btr_latch_mode(latch_mode & ~4); ut_ad(!latch_by_caller || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK)); upper_rw_latch= RW_S_LATCH; - if (latch_by_caller) - break; - ut_ad(latch_mode != BTR_SEARCH_TREE); - savepoint++; - mtr_s_lock_index(index, mtr); + if (!latch_by_caller) + { + savepoint++; + mtr_s_lock_index(index, mtr); + } } ut_ad(savepoint == mtr->get_savepoint()); - const rw_lock_type_t root_leaf_rw_latch= - btr_cur_latch_for_root_leaf(latch_mode); + const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12); page_cur.index = index; uint32_t page= index->page; const auto zip_size= index->table->space->zip_size(); - if (root_leaf_rw_latch == RW_X_LATCH) - node_ptr_max_size= btr_node_ptr_max_size(index); - for (ulint height= ULINT_UNDEFINED;;) { ut_ad(n_blocks < BTR_MAX_LEVELS); @@ -2531,16 +1897,27 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, reached_leaf: const auto leaf_savepoint= mtr->get_savepoint(); ut_ad(leaf_savepoint); + ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1)); - if (rw_latch == RW_NO_LATCH) - btr_cur_latch_leaves(block, latch_mode, this, mtr); - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: + if (latch_mode == BTR_MODIFY_TREE) + { + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index, btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + mtr->upgrade_buffer_fix(leaf_savepoint - 1, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index, btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + } + else + { + if (rw_latch == RW_NO_LATCH) + mtr->upgrade_buffer_fix(leaf_savepoint - 1, + rw_lock_type_t(latch_mode)); /* Release index->lock if needed, and the non-leaf pages. */ mtr->rollback_to_savepoint(savepoint - !latch_by_caller, leaf_savepoint - 1); @@ -4669,16 +4046,15 @@ btr_cur_pessimistic_update( } } - if (!srv_read_only_mode - && !big_rec_vec +#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (!big_rec_vec && page_is_leaf(block->page.frame) && !dict_index_is_online_ddl(index)) { -#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif err = DB_SUCCESS; goto return_after_reservations; @@ -5420,15 +4796,14 @@ return_after_reservations: err_exit: mem_heap_free(heap); - if (!srv_read_only_mode - && page_is_leaf(page) - && !dict_index_is_online_ddl(index)) { #if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (page_is_leaf(page) + && !dict_index_is_online_ddl(index)) { mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif index->table->space->release_free_extents(n_reserved); return(ret); @@ -5545,16 +4920,18 @@ public: buf_block_t *parent_block= m_block; ulint parent_savepoint= m_savepoint; - m_savepoint= mtr_set_savepoint(&mtr); m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); + if (!m_block) + return false; if (parent_block && parent_block != right_parent) - mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block); + mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1); - return m_block && - (level == ULINT_UNDEFINED || - btr_page_get_level(buf_block_get_frame(m_block)) == level); + m_savepoint= mtr.get_savepoint() - 1; + + return level == ULINT_UNDEFINED || + btr_page_get_level(m_block->page.frame) == level; } /** Sets page mode for leaves */ @@ -5761,14 +5138,18 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( buf_block_t *prev_block= block; ulint prev_savepoint= savepoint; - savepoint= mtr_set_savepoint(&mtr); + savepoint= mtr.get_savepoint(); /* Fetch the page. */ block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); if (prev_block) - mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block); + { + mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1); + if (block) + savepoint--; + } if (!block || btr_page_get_level(buf_block_get_frame(block)) != level) goto inexact; @@ -5797,14 +5178,20 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( } while (page_id.page_no() != right_page_no); if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } return (n_rows); inexact: if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } is_n_rows_exact= false; @@ -5863,9 +5250,7 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index, mtr.start(); - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - ulint savepoint= mtr_set_savepoint(&mtr); + ut_ad(mtr.get_savepoint() == 0); mtr_s_lock_index(index, &mtr); ha_rows table_n_rows= dict_table_get_n_rows(index->table); @@ -5920,10 +5305,10 @@ search_loop: } if (height == 0) - /* There is no need to unlach non-leaf pages here as they must already be + /* There is no need to release non-leaf pages here as they must already be unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after - index->lock unlatching to decrease contention. */ - mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock); + releasing the index latch, to decrease contention. */ + mtr.rollback_to_savepoint(0, 1); /* There is no need to search on left page if divergence_height != ULINT_UNDEFINED, as it was already searched before @@ -6369,16 +5754,21 @@ struct btr_blob_log_check_t { DEBUG_SYNC_C("blob_write_middle"); - log_free_check(); - - DEBUG_SYNC_C("blob_write_middle_after_check"); - const mtr_log_t log_mode = m_mtr->get_log_mode(); m_mtr->start(); m_mtr->set_log_mode(log_mode); index->set_modified(*m_mtr); + log_free_check(); + + DEBUG_SYNC_C("blob_write_middle_after_check"); + if (UNIV_UNLIKELY(page_no != FIL_NULL)) { + dberr_t err; + if (UNIV_LIKELY(index->page != page_no)) { + ut_a(btr_root_block_get(index, RW_SX_LATCH, + m_mtr, &err)); + } m_pcur->btr_cur.page_cur.block = btr_block_get( *index, page_no, RW_X_LATCH, false, m_mtr); /* The page should not be evicted or corrupted while @@ -6391,7 +5781,7 @@ struct btr_blob_log_check_t { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); mtr_sx_lock_index(index, m_mtr); ut_a(m_pcur->restore_position( - BTR_MODIFY_LEAF_ALREADY_LATCHED, + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED, m_mtr) == btr_pcur_t::SAME_ALL); } @@ -6560,6 +5950,10 @@ btr_store_big_rec_extern_fields( page_zip = buf_block_get_page_zip(rec_block); } + ut_ad(btr_mtr->get_already_latched( + page_id_t{index->table->space_id, index->page}, + MTR_MEMO_PAGE_SX_FIX)); + mtr.start(); index->set_modified(mtr); mtr.set_log_mode_sub(*btr_mtr); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 76b173359da..642db0e9f1c 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2022, MariaDB Corporation. +Copyright (C) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -280,6 +280,70 @@ btr_defragment_calc_n_recs_for_size( return n_recs; } +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/************************************************************//** +Returns the upper level node pointer to a page. It is assumed that mtr holds +an sx-latch on the tree. +@return rec_get_offsets() of the node pointer record */ +static +rec_offs* +btr_page_search_father_node_ptr( + rec_offs* offsets,/*!< in: work area for the return value */ + mem_heap_t* heap, /*!< in: memory heap to use */ + btr_cur_t* cursor, /*!< in: cursor pointing to user record, + out: cursor on node pointer record, + its page x-latched */ + mtr_t* mtr) /*!< in: mtr */ +{ + const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); + dict_index_t* index = btr_cur_get_index(cursor); + ut_ad(!index->is_spatial()); + + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + ut_ad(dict_index_get_page(index) != page_no); + + const auto level = btr_page_get_level(btr_cur_get_page(cursor)); + + const rec_t* user_rec = btr_cur_get_rec(cursor); + ut_a(page_rec_is_user_rec(user_rec)); + + if (btr_cur_search_to_nth_level(level + 1, + dict_index_build_node_ptr(index, + user_rec, 0, + heap, level), + RW_X_LATCH, + cursor, mtr) != DB_SUCCESS) { + return nullptr; + } + + const rec_t* node_ptr = btr_cur_get_rec(cursor); + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); + + offsets = rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { + offsets = nullptr; + } + + return(offsets); +} + +static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor) +{ + rec_t *rec= + page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame)); + if (UNIV_UNLIKELY(!rec)) + return false; + cursor->page_cur.rec= rec; + mem_heap_t *heap= mem_heap_create(100); + const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr); + mem_heap_free(heap); + return got; +} + /*************************************************************//** Merge as many records from the from_block to the to_block. Delete the from_block if all records are successfully merged to to_block. @@ -408,7 +472,7 @@ btr_defragment_merge_pages( parent.page_cur.index = index; parent.page_cur.block = from_block; - if (!btr_page_get_father(mtr, &parent)) { + if (!btr_page_search_father(mtr, &parent)) { to_block = nullptr; } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, @@ -699,10 +763,9 @@ processed: acquire index->lock X-latch. This entitles us to acquire page latches in any order for the index. */ mtr_x_lock_index(index, &mtr); - /* This will acquire index->lock U latch, which is allowed - when we are already holding the X-latch. */ if (buf_block_t *last_block = - item->pcur->restore_position(BTR_MODIFY_TREE, &mtr) + item->pcur->restore_position( + BTR_PURGE_TREE_ALREADY_LATCHED, &mtr) == btr_pcur_t::CORRUPTED ? nullptr : btr_defragment_n_pages(btr_pcur_get_block(item->pcur), diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index d731bcbb893..d48437e4bd0 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,24 +212,98 @@ btr_pcur_copy_stored_position( pcur_receive->old_n_fields = pcur_donate->old_n_fields; } +/** Optimistically latches the leaf page or pages requested. +@param[in] block guessed buffer block +@param[in,out] pcur cursor +@param[in,out] latch_mode BTR_SEARCH_LEAF, ... +@param[in,out] mtr mini-transaction +@return true if success */ +TRANSACTIONAL_TARGET +static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block, + btr_pcur_t *pcur, + btr_latch_mode *latch_mode, + mtr_t *mtr) +{ + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); + + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) == + (RW_S_LATCH ^ RW_X_LATCH), ""); + + const rw_lock_type_t mode= + rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH)); + + switch (*latch_mode) { + default: + ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF); + return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr); + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + page_id_t id{0}; + uint32_t left_page_no; + ulint zip_size; + buf_block_t *left_block= nullptr; + { + transactional_shared_lock_guard g{block->page.lock}; + if (block->modify_clock != pcur->modify_clock) + return false; + id= block->page.id(); + zip_size= block->zip_size(); + left_page_no= btr_page_get_prev(block->page.frame); + } + + if (left_page_no != FIL_NULL) + { + left_block= + buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size, + mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); + + if (left_block && + btr_page_get_next(left_block->page.frame) != id.page_no()) + { +release_left_block: + mtr->release_last_page(); + return false; + } + } + + if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr)) + { + if (btr_page_get_prev(block->page.frame) == left_page_no) + { + /* block was already buffer-fixed while entering the function and + buf_page_optimistic_get() buffer-fixes it again. */ + ut_ad(2 <= block->page.buf_fix_count()); + *latch_mode= btr_latch_mode(mode); + return true; + } + + mtr->release_last_page(); + } + + ut_ad(block->page.buf_fix_count()); + if (left_block) + goto release_left_block; + return false; + } +} + /** Structure acts as functor to do the latching of leaf pages. It returns true if latching of leaf pages succeeded and false otherwise. */ struct optimistic_latch_leaves { btr_pcur_t *const cursor; - btr_latch_mode *latch_mode; + btr_latch_mode *const latch_mode; mtr_t *const mtr; - optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode, - mtr_t *mtr) - : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {} - - bool operator() (buf_block_t *hint) const + bool operator()(buf_block_t *hint) const { - return hint && btr_cur_optimistic_latch_leaves( - hint, cursor->modify_clock, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return hint && + btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr); } }; @@ -246,8 +320,8 @@ record GREATER than the user record which was the predecessor of the supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. -@param restore_latch_mode BTR_SEARCH_LEAF, ... -@param mtr mtr +@param latch_mode BTR_SEARCH_LEAF, ... +@param mtr mini-transaction @return btr_pcur_t::SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the @@ -301,10 +375,9 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr) case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: /* Try optimistic restoration. */ - if (block_when_stored.run_with_hint( - optimistic_latch_leaves(this, &restore_latch_mode, - mtr))) { + optimistic_latch_leaves{this, &restore_latch_mode, + mtr})) { pos_state = BTR_PCUR_IS_POSITIONED; latch_mode = restore_latch_mode; @@ -465,18 +538,9 @@ btr_pcur_move_to_next_page( return DB_CORRUPTION; } - ulint mode = cursor->latch_mode; - switch (mode) { - case BTR_SEARCH_TREE: - mode = BTR_SEARCH_LEAF; - break; - case BTR_MODIFY_TREE: - mode = BTR_MODIFY_LEAF; - } - dberr_t err; buf_block_t* next_block = btr_block_get( - *cursor->index(), next_page_no, mode, + *cursor->index(), next_page_no, cursor->latch_mode & ~12, page_is_leaf(page), mtr, &err); if (UNIV_UNLIKELY(!next_block)) { @@ -538,26 +602,42 @@ btr_pcur_move_backward_from_page( return true; } - buf_block_t* release_block = nullptr; + buf_block_t* block = btr_pcur_get_block(cursor); - if (!page_has_prev(btr_pcur_get_page(cursor))) { - } else if (btr_pcur_is_before_first_on_page(cursor)) { - release_block = btr_pcur_get_block(cursor); - page_cur_set_after_last(cursor->btr_cur.left_block, - btr_pcur_get_page_cur(cursor)); - } else { - /* The repositioned cursor did not end on an infimum - record on a page. Cursor repositioning acquired a latch - also on the previous page, but we do not need the latch: - release it. */ - release_block = cursor->btr_cur.left_block; + if (page_has_prev(block->page.frame)) { + buf_block_t* left_block + = mtr->at_savepoint(mtr->get_savepoint() - 1); + const page_t* const left = left_block->page.frame; + if (memcmp_aligned<4>(left + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)) { + /* This should be the right sibling page, or + if there is none, the current block. */ + ut_ad(left_block == block + || !memcmp_aligned<4>(left + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + /* The previous one must be the left sibling. */ + left_block + = mtr->at_savepoint(mtr->get_savepoint() - 2); + ut_ad(!memcmp_aligned<4>(left_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + } + if (btr_pcur_is_before_first_on_page(cursor)) { + page_cur_set_after_last(left_block, + &cursor->btr_cur.page_cur); + /* Release the right sibling. */ + } else { + /* Release the left sibling. */ + block = left_block; + } + mtr->release(*block); } cursor->latch_mode = latch_mode; cursor->old_rec = nullptr; - if (release_block) { - mtr->release(*release_block); - } return false; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index fc890f9233b..a1609248512 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1055,26 +1055,24 @@ btr_search_guess_on_hash( index_id_t index_id; ut_ad(mtr->is_active()); - - if (!btr_search_enabled) { - return false; - } - - ut_ad(!index->is_ibuf()); - ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); - compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); - - /* Not supported for spatial index */ - ut_ad(!dict_index_is_spatial(index)); + ut_ad(index->is_btree() || index->is_ibuf()); /* Note that, for efficiency, the struct info may not be protected by any latch here! */ - if (info->n_hash_potential == 0) { + if (latch_mode > BTR_MODIFY_LEAF + || !info->last_hash_succ || !info->n_hash_potential + || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) { return false; } + ut_ad(index->is_btree()); + ut_ad(!index->table->is_temporary()); + + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); + compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); + compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); + cursor->n_fields = info->n_fields; cursor->n_bytes = info->n_bytes; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 2e320ce18c2..644d8680484 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2700,6 +2700,18 @@ re_evict: && mode != BUF_GET_IF_IN_POOL_OR_WATCH) { } else if (!ibuf_debug || recv_recovery_is_on()) { } else if (fil_space_t* space = fil_space_t::get(page_id.space())) { + for (ulint i = 0; i < mtr->get_savepoint(); i++) { + if (buf_block_t* b = mtr->block_at_savepoint(i)) { + if (b->page.oldest_modification() > 2 + && b->page.lock.have_any()) { + /* We are holding a dirty page latch + that would hang buf_flush_sync(). */ + space->release(); + goto re_evict_fail; + } + } + } + /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ @@ -2741,9 +2753,9 @@ re_evict: /* Failed to evict the page; change it directly */ } +re_evict_fail: #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - ut_ad(state > buf_page_t::FREED); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { goto ignore_block; } @@ -2799,8 +2811,7 @@ ibuf_merge_corrupted: } if (rw_latch == RW_X_LATCH) { - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - goto got_latch; + goto get_latch_valid; } else { block->page.lock.x_unlock(); goto get_latch; @@ -2808,12 +2819,10 @@ ibuf_merge_corrupted: } else { get_latch: switch (rw_latch) { - mtr_memo_type_t fix_type; case RW_NO_LATCH: mtr->memo_push(block, MTR_MEMO_BUF_FIX); return block; case RW_S_LATCH: - fix_type = MTR_MEMO_PAGE_S_FIX; block->page.lock.s_lock(); ut_ad(!block->page.is_read_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2822,13 +2831,12 @@ get_latch: goto page_id_mismatch; } get_latch_valid: - mtr->memo_push(block, fix_type); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ break; case RW_SX_LATCH: - fix_type = MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2838,7 +2846,6 @@ get_latch_valid: goto get_latch_valid; default: ut_ad(rw_latch == RW_X_LATCH); - fix_type = MTR_MEMO_PAGE_X_FIX; if (block->page.lock.x_lock_upgraded()) { ut_ad(block->page.id() == page_id); block->unfix(); @@ -2851,7 +2858,6 @@ get_latch_valid: goto get_latch_valid; } -got_latch: ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); @@ -3040,8 +3046,7 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, ut_ad(!block->page.is_read_fixed()); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); - mtr->memo_push(block, rw_latch == RW_S_LATCH - ? MTR_MEMO_PAGE_S_FIX : MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); } ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate()); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index daf3bc9a664..614048b7ba0 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -722,7 +722,7 @@ dict_build_field_def_step( } /***************************************************************//** -Creates an index tree for the index if it is not a member of a cluster. +Creates an index tree for the index. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t @@ -755,9 +755,8 @@ dict_create_index_tree_step( pcur.btr_cur.page_cur.index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes); - dberr_t err = - btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: @@ -768,10 +767,25 @@ func_exit: btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) { +corrupted: err = DB_CORRUPTION; goto func_exit; } + ulint len; + byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__ID, + &len); + if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) { + goto corrupted; + } + + data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + if (len != 4) { + goto corrupted; + } + if (index->is_readable()) { index->set_modified(mtr); @@ -784,11 +798,6 @@ func_exit: err = DB_OUT_OF_FILE_SPACE; ); } - ulint len; - byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), - DICT_FLD__SYS_INDEXES__PAGE_NO, - &len); - ut_ad(len == 4); mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data, node->page_no); goto func_exit; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 21efb525fa8..53d1031d270 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2022, MariaDB Corporation. +Copyright (c) 2013, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4143,8 +4143,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx) dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto fail; } @@ -4219,8 +4218,7 @@ dict_index_set_merge_threshold( dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 004b00615e8..9910a000b5b 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1321,7 +1321,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1452,7 +1452,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col) dict_index_copy_types(&tuple, sys_virtual_index, 2); pcur.btr_cur.page_cur.index = sys_virtual_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1688,8 +1688,7 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -1947,8 +1946,7 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -2349,7 +2347,7 @@ static dict_table_t *dict_load_table_one(const span &name, bool uncommitted = false; reload: mtr.start(); - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) { @@ -2607,8 +2605,7 @@ dict_load_table_on_id( dict_table_t* table = nullptr; - if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr) + if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr) == DB_SUCCESS && btr_pcur_is_on_user_rec(&pcur)) { /*---------------------------------------------------*/ @@ -2714,7 +2711,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) pcur.btr_cur.page_cur.index = sys_index; mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -2891,7 +2888,7 @@ dict_load_foreign( mtr.start(); mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto err_exit; @@ -3102,7 +3099,7 @@ start_load: dict_index_copy_types(&tuple, sec_index, 1); pcur.btr_cur.page_cur.index = sec_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { DBUG_RETURN(err); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 44fcf9f2c18..845f133f1a6 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level, static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr, dict_index_t *index) { - pcur->latch_mode= BTR_SEARCH_TREE; + pcur->latch_mode= BTR_SEARCH_LEAF; pcur->search_mode= PAGE_CUR_G; pcur->pos_state= BTR_PCUR_IS_POSITIONED; pcur->btr_cur.page_cur.index= index; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 89af4e2420f..bad9e1e1bfd 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1474,7 +1474,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id, ut_ad(strchr(path, '/')); ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); - flag_modified(); + m_modifications= true; if (!is_logged()) return; m_last= nullptr; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 12e9a6913ba..e9f3106feb0 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,15 +122,22 @@ MY_ATTRIBUTE((nonnull, warn_unused_result)) static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr, dberr_t *err) { - buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), - space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, - mtr, err); - if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->page.frame)) + const page_id_t id{space->id, 0}; + buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX); + if (block) + *err= DB_SUCCESS; + else { - *err= DB_CORRUPTION; - block= nullptr; + block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (block && + space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + + block->page.frame)) + { + *err= DB_CORRUPTION; + block= nullptr; + } } return block; } diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 207d49abeba..8ca8681bce9 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,7 +44,6 @@ Created 2014/01/16 Jimmy Yang static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr); /*!< in: mtr */ @@ -74,6 +73,70 @@ rtr_adjust_parent_path( } } +/** Latches the leaf page or pages requested. +@param[in] block_savepoint leaf page where the search converged +@param[in] latch_mode BTR_SEARCH_LEAF, ... +@param[in] cursor cursor +@param[in] mtr mini-transaction */ +static void +rtr_latch_leaves( + ulint block_savepoint, + btr_latch_mode latch_mode, + btr_cur_t* cursor, + mtr_t* mtr) +{ + compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); + + buf_block_t* block = mtr->at_savepoint(block_savepoint); + + ut_ad(block->page.id().space() == cursor->index()->table->space->id); + ut_ad(block->page.in_file()); + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_S_LOCK + | MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + + switch (latch_mode) { + uint32_t left_page_no; + uint32_t right_page_no; + default: + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + break; + case BTR_MODIFY_TREE: + /* It is exclusive for other operations which calls + btr_page_set_prev() */ + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + /* x-latch also siblings from left to right */ + left_page_no = btr_page_get_prev(block->page.frame); + + if (left_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH, + true, mtr); + } + + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + + right_page_no = btr_page_get_next(block->page.frame); + + if (right_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), right_page_no, + RW_X_LATCH, true, mtr); + } + break; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + rw_lock_type_t mode = + rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + static_assert(int{RW_S_LATCH} == int{BTR_SEARCH_LEAF}, ""); + static_assert(int{RW_X_LATCH} == int{BTR_MODIFY_LEAF}, ""); + mtr->upgrade_buffer_fix(block_savepoint, mode); + } +} + /*************************************************************//** Find the next matching record. This function is used by search or record locating during index delete/update. @@ -135,6 +198,7 @@ rtr_pcur_getnext_from_path( && (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE; if (!index_locked) { + ut_ad(mtr->is_empty()); mtr_s_lock_index(index, mtr); } else { ut_ad(mtr->memo_contains_flagged(&index->lock, @@ -154,14 +218,12 @@ rtr_pcur_getnext_from_path( node_seq_t path_ssn; const page_t* page; rw_lock_type_t rw_latch; - ulint tree_idx; mysql_mutex_lock(&rtr_info->rtr_path_mutex); next_rec = rtr_info->path->back(); rtr_info->path->pop_back(); level = next_rec.level; path_ssn = next_rec.seq_no; - tree_idx = btr_cur->tree_height - level - 1; /* Maintain the parent path info as well, if needed */ if (need_parent && !skip_parent && !new_split) { @@ -223,37 +285,15 @@ rtr_pcur_getnext_from_path( rw_latch = RW_X_LATCH; } - /* Release previous locked blocks */ - if (my_latch_mode != BTR_SEARCH_LEAF) { - for (ulint idx = 0; idx < btr_cur->tree_height; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } - for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } + if (my_latch_mode == BTR_MODIFY_LEAF) { + mtr->rollback_to_savepoint(1); } - /* set up savepoint to record any locks to be taken */ - rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr); - ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) || !btr_cur->page_cur.block->page.lock.have_any()); + const auto block_savepoint = mtr->get_savepoint(); block = buf_page_get_gen( page_id_t(index->table->space_id, next_rec.page_no), zip_size, @@ -264,8 +304,6 @@ rtr_pcur_getnext_from_path( break; } - rtr_info->tree_blocks[tree_idx] = block; - page = buf_block_get_frame(block); page_ssn = page_get_ssn_id(page); @@ -396,24 +434,23 @@ rtr_pcur_getnext_from_path( if (found) { if (level == target_level) { - page_cur_t* r_cur;; + ut_ad(block + == mtr->at_savepoint(block_savepoint)); if (my_latch_mode == BTR_MODIFY_TREE && level == 0) { ut_ad(rw_latch == RW_NO_LATCH); - btr_cur_latch_leaves( - block, + rtr_latch_leaves( + block_savepoint, BTR_MODIFY_TREE, btr_cur, mtr); } - r_cur = btr_cur_get_page_cur(btr_cur); - page_cur_position( page_cur_get_rec(page_cursor), page_cur_get_block(page_cursor), - r_cur); + btr_cur_get_page_cur(btr_cur)); btr_cur->low_match = level != 0 ? DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1 @@ -425,13 +462,7 @@ rtr_pcur_getnext_from_path( last node just located */ skip_parent = true; } else { - /* Release latch on the current page */ - ut_ad(rtr_info->tree_blocks[tree_idx]); - - mtr_release_block_at_savepoint( - mtr, rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; + mtr->release_last_page(); } } while (!rtr_info->path->empty()); @@ -509,50 +540,524 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index, } #endif +TRANSACTIONAL_TARGET +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) +{ + page_cur_mode_t page_mode; + page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; + + bool mbr_adj= false; + bool found= false; + dict_index_t *const index= cur->index(); + + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode)); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->is_spatial()); + ut_ad(index->page != FIL_NULL); + + MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match); + MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes); + MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match); + MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes); + ut_d(cur->up_match= ULINT_UNDEFINED); + ut_d(cur->low_match= ULINT_UNDEFINED); + + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + + ut_ad(!latch_by_caller + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK + | MTR_MEMO_SX_LOCK)); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF || + latch_mode == BTR_MODIFY_LEAF); + + cur->flag= BTR_CUR_BINARY; + +#ifndef BTR_CUR_ADAPT + buf_block_t *guess= nullptr; +#else + btr_search_t *const info= btr_search_get_info(index); + buf_block_t *guess= info->root_guess; +#endif + + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + + const ulint savepoint= mtr->get_savepoint(); + + rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH; + + switch (latch_mode) { + case BTR_MODIFY_TREE: + mtr_x_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH; + break; + case BTR_CONT_MODIFY_TREE: + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + upper_rw_latch= RW_X_LATCH; + break; + default: + ut_ad(latch_mode != BTR_MODIFY_PREV); + ut_ad(latch_mode != BTR_SEARCH_PREV); + if (!latch_by_caller) + mtr_s_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH; + if (latch_mode == BTR_MODIFY_LEAF) + root_leaf_rw_latch= RW_X_LATCH; + } + + auto root_savepoint= mtr->get_savepoint(); + const ulint zip_size= index->table->space->zip_size(); + + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + + ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0; + ulint height= ULINT_UNDEFINED; + + /* We use these modified search modes on non-leaf levels of the + B-tree. These let us end up in the right B-tree leaf. In that leaf + we use the original search mode. */ + + switch (mode) { + case PAGE_CUR_GE: + page_mode= PAGE_CUR_L; + break; + case PAGE_CUR_G: + page_mode= PAGE_CUR_LE; + break; + default: +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode) + || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode)); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + page_mode= mode; + break; + } + + search_loop: + auto buf_mode= BUF_GET; + ulint rw_latch= RW_NO_LATCH; + + if (height) + { + /* We are about to fetch the root or a non-leaf page. */ + if (latch_mode != BTR_MODIFY_TREE || height == level) + /* If doesn't have SX or X latch of index, + each page should be latched before reading. */ + rw_latch= upper_rw_latch; + } + else if (latch_mode <= BTR_MODIFY_LEAF) + rw_latch= latch_mode; + + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess, + buf_mode, mtr, &err, false); + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + + if (mbr_adj) + /* remember that we will need to adjust parent MBR */ + cur->rtr_info->mbr_adj= true; + + return err; + } + + const page_t *page= buf_block_get_frame(block); +#ifdef UNIV_ZIP_DEBUG + if (rw_latch != RW_NO_LATCH) { + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); + } +#endif /* UNIV_ZIP_DEBUG */ + + ut_ad(fil_page_index_page_check(page)); + ut_ad(index->id == btr_page_get_index_id(page)); + + if (height != ULINT_UNDEFINED); + else if (page_is_leaf(page) && + rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch) + { + /* The root page is also a leaf page (root_leaf). + We should reacquire the page, because the root page + is latched differently from leaf pages. */ + ut_ad(root_leaf_rw_latch != RW_NO_LATCH); + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); + + ut_ad(block == mtr->at_savepoint(block_savepoint)); + mtr->rollback_to_savepoint(block_savepoint); + + upper_rw_latch= root_leaf_rw_latch; + goto search_loop; + } + else + { + /* We are in the root node */ + + height= btr_page_get_level(page); + cur->tree_height= height + 1; + + ut_ad(cur->rtr_info); + + /* If SSN in memory is not initialized, fetch it from root page */ + if (!rtr_get_current_ssn_id(index)) + /* FIXME: do this in dict_load_table_one() */ + index->set_ssn(page_get_ssn_id(page) + 1); + + /* Save the MBR */ + cur->rtr_info->thr= cur->thr; + rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr); + +#ifdef BTR_CUR_ADAPT + info->root_guess= block; +#endif + } + + if (height == 0) { + if (rw_latch == RW_NO_LATCH) + { + ut_ad(block == mtr->at_savepoint(block_savepoint)); + rtr_latch_leaves(block_savepoint, latch_mode, cur, mtr); + } + + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + break; + default: + if (!latch_by_caller) + { + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, + savepoint + 1); + block_savepoint--; + root_savepoint--; + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + } + + page_mode= mode; + } + + /* Remember the page search mode */ + search_mode= page_mode; + + /* Some adjustment on search mode, when the page search mode is + PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching + with MBRs. When it is not the target level, we should search all + sub-trees that "CONTAIN" the search range/MBR. When it is at the + target level, the search becomes PAGE_CUR_LE */ + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + page_mode= (level == height) + ? PAGE_CUR_LE + : PAGE_CUR_RTREE_INSERT; + + ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); + } + else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height) + page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER; + + up_match= 0; + low_match= 0; + + if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE) + /* Tree are locked, no need for Page Lock to protect the "path" */ + cur->rtr_info->need_page_lock= false; + + cur->page_cur.block= block; + + if (page_mode >= PAGE_CUR_CONTAIN) + { + found= rtr_cur_search_with_match(block, index, tuple, page_mode, + &cur->page_cur, cur->rtr_info); + + /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ + if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) { + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + + if (!(latch_mode & 8)) + /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */ + goto func_exit; + + cur->rtr_info->mbr_adj= false; + mbr_adj= true; + } + + if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) + cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; + } + else + { + /* Search for complete index fields. */ + up_bytes= low_bytes= 0; + if (page_cur_search_with_match(tuple, page_mode, &up_match, + &low_match, &cur->page_cur, nullptr)) { + err= DB_CORRUPTION; + goto func_exit; + } + } + + /* If this is the desired level, leave the loop */ + + ut_ad(height == btr_page_get_level(btr_cur_get_page(cur))); + + /* Add Predicate lock if it is serializable isolation + and only if it is in the search case */ + if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT && + mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock) + { + lock_prdt_t prdt; + + { + trx_t* trx= thr_get_trx(cur->thr); + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; + lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode, + trx->lock.lock_heap); + } + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_lock(); + + lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr); + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_unlock(); + } + + if (level != height) + { + ut_ad(height > 0); + + height--; + guess= nullptr; + + const rec_t *node_ptr= btr_cur_get_rec(cur); + + offsets= rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (page_rec_is_supremum(node_ptr)) + { + cur->low_match= 0; + cur->up_match= 0; + goto func_exit; + } + + /* If we are doing insertion or record locating, + remember the tree nodes we visited */ + if (page_mode == PAGE_CUR_RTREE_INSERT || + (search_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF)) + { + const bool add_latch= latch_mode == BTR_MODIFY_TREE && + rw_latch == RW_NO_LATCH; + + if (add_latch) + { + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + block->page.lock.s_lock(); + } + + /* Store the parent cursor location */ + ut_d(auto num_stored=) + rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr); + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true); + /* If it is insertion, there should be only one parent for + each level traverse */ + ut_ad(num_stored == 1); + node_ptr= btr_pcur_get_rec(r_cursor); + } + + if (add_latch) + block->page.lock.s_unlock(); + + ut_ad(!page_rec_is_supremum(node_ptr)); + } + + ut_ad(page_mode == search_mode || + (page_mode == PAGE_CUR_WITHIN && + search_mode == PAGE_CUR_RTREE_LOCATE)); + page_mode= search_mode; + + if (height == level && latch_mode == BTR_MODIFY_TREE) + { + ut_ad(upper_rw_latch == RW_X_LATCH); + for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++) + mtr->upgrade_buffer_fix(i, RW_X_LATCH); + } + + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets)); + + if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT) + { + rtr_node_path_t *path= cur->rtr_info->path; + + if (found && !path->empty()) + { + ut_ad(path->back().page_no == page_id.page_no()); + path->pop_back(); +#ifdef UNIV_DEBUG + if (page_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF) + { + btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor; + rec_t *my_node_ptr= btr_pcur_get_rec(pcur); + + offsets= rec_get_offsets(my_node_ptr, index, offsets, + 0, ULINT_UNDEFINED, &heap); + + ut_ad(page_id.page_no() == + btr_node_ptr_get_child_page_no(my_node_ptr, offsets)); + } +#endif + } + } + + goto search_loop; + } + + if (level) + { + if (upper_rw_latch == RW_NO_LATCH) + { + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err); + } + else + { + ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch)); + ut_ad(!latch_by_caller); + } + + if (page_mode <= PAGE_CUR_LE) + { + cur->low_match= low_match; + cur->up_match= up_match; + } + } + else + { + cur->low_match= low_match; + cur->low_bytes= low_bytes; + cur->up_match= up_match; + cur->up_bytes= up_bytes; + + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + } + + goto func_exit; +} + +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, + mtr_t *mtr, page_cur_mode_t mode) +{ + return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ +#ifdef UNIV_DEBUG + switch (mode) { + case PAGE_CUR_CONTAIN: + case PAGE_CUR_INTERSECT: + case PAGE_CUR_WITHIN: + case PAGE_CUR_DISJOINT: + case PAGE_CUR_MBR_EQUAL: + break; + default: + ut_ad("invalid mode" == 0); + } +#endif + pcur->latch_mode= BTR_SEARCH_LEAF; + pcur->search_mode= mode; + pcur->pos_state= BTR_PCUR_IS_POSITIONED; + pcur->trx_if_known= nullptr; + return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode); +} + /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. Mainly called by row_search_index_entry() */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +closed with btr_pcur_close. */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); ut_ad(latch_mode & BTR_MODIFY_LEAF); + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED)); + ut_ad(mtr->is_empty()); /* Initialize the cursor */ btr_pcur_init(cursor); cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = PAGE_CUR_RTREE_LOCATE; - cursor->trx_if_known = NULL; + cursor->search_mode = PAGE_CUR_RTREE_LOCATE; + cursor->trx_if_known = nullptr; + + if (latch_mode & 8) { + mtr_x_lock_index(cursor->index(), mtr); + } else { + latch_mode + = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED); + mtr_sx_lock_index(cursor->index(), mtr); + } /* Search with the tree cursor */ btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor); - btr_cursor->page_cur.index = index; - btr_cursor->rtr_info = rtr_create_rtr_info(false, false, - btr_cursor, index); + btr_cursor->rtr_info + = rtr_create_rtr_info(false, false, + btr_cursor, cursor->index()); - /* Purge will SX lock the tree instead of take Page Locks */ if (btr_cursor->thr) { btr_cursor->rtr_info->need_page_lock = true; btr_cursor->rtr_info->thr = btr_cursor->thr; } - if ((latch_mode & 8) && index->lock.have_u_not_x()) { - index->lock.u_x_upgrade(SRW_LOCK_CALL); - mtr->lock_upgrade(index->lock); - } - - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE, - latch_mode, - btr_cursor, mtr) != DB_SUCCESS) { + if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr) + != DB_SUCCESS) { return true; } @@ -560,7 +1065,8 @@ rtr_pcur_open( const rec_t* rec = btr_pcur_get_rec(cursor); - const bool d= rec_get_deleted_flag(rec, index->table->not_redundant()); + const bool d= rec_get_deleted_flag( + rec, cursor->index()->table->not_redundant()); if (page_rec_is_infimum(rec) || btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple) @@ -571,26 +1077,12 @@ rtr_pcur_open( btr_cursor->rtr_info->fd_del = true; btr_cursor->low_match = 0; } - /* Did not find matched row in first dive. Release - latched block if any before search more pages */ - if (!(latch_mode & 8)) { - ulint tree_idx = btr_cursor->tree_height - 1; - rtr_info_t* rtr_info = btr_cursor->rtr_info; - if (rtr_info->tree_blocks[tree_idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; - } - } + mtr->rollback_to_savepoint(1); if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE, btr_cursor, 0, latch_mode, - latch_mode - & (8 | BTR_ALREADY_S_LATCHED), - mtr)) { + true, mtr)) { return true; } @@ -598,6 +1090,10 @@ rtr_pcur_open( == dtuple_get_n_fields(tuple)); } + if (!(latch_mode & 8)) { + mtr->rollback_to_savepoint(0, 1); + } + return false; } @@ -641,8 +1137,7 @@ static const rec_t* rtr_get_father_node( if (sea_cur && sea_cur->tree_height > level) { ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur, - level, mtr)) { + if (rtr_cur_restore_position(sea_cur, level, mtr)) { btr_pcur_t* r_cursor = rtr_get_parent_cursor( sea_cur, level, false); @@ -668,9 +1163,8 @@ static const rec_t* rtr_get_father_node( btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index); - if (btr_cur_search_to_nth_level(level, tuple, - PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, mtr) + if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE, + BTR_CONT_MODIFY_TREE, btr_cur, mtr) != DB_SUCCESS) { } else if (sea_cur && sea_cur->tree_height == level) { rec = btr_cur_get_rec(btr_cur); @@ -729,9 +1223,8 @@ rtr_page_get_father_node_ptr( page_no = btr_cur_get_block(cursor)->page.id().page_no(); index = btr_cur_get_index(cursor); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); @@ -879,32 +1372,10 @@ rtr_init_rtr_info( if (!reinit) { /* Reset all members. */ - rtr_info->path = NULL; - rtr_info->parent_path = NULL; - rtr_info->matches = NULL; - + memset(rtr_info, 0, sizeof *rtr_info); + static_assert(PAGE_CUR_UNSUPP == 0, "compatibility"); mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex, nullptr); - - memset(rtr_info->tree_blocks, 0x0, - sizeof(rtr_info->tree_blocks)); - memset(rtr_info->tree_savepoints, 0x0, - sizeof(rtr_info->tree_savepoints)); - rtr_info->mbr.xmin = 0.0; - rtr_info->mbr.xmax = 0.0; - rtr_info->mbr.ymin = 0.0; - rtr_info->mbr.ymax = 0.0; - rtr_info->thr = NULL; - rtr_info->heap = NULL; - rtr_info->cursor = NULL; - rtr_info->index = NULL; - rtr_info->need_prdt_lock = false; - rtr_info->need_page_lock = false; - rtr_info->allocated = false; - rtr_info->mbr_adj = false; - rtr_info->fd_del = false; - rtr_info->search_tuple = NULL; - rtr_info->search_mode = PAGE_CUR_UNSUPP; } ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty()); @@ -1130,7 +1601,6 @@ struct optimistic_get static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* btr_cur, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr) /*!< in: mtr */ @@ -1158,8 +1628,6 @@ rtr_cur_restore_position( r_cursor->modify_clock = 100; ); - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); - if (r_cursor->block_when_stored.run_with_hint( optimistic_get(r_cursor, mtr))) { ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f1a381f0487..d7370a240cc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1549,8 +1549,7 @@ static void innodb_drop_database(handlerton*, char *path) mtr_t mtr; mtr.start(); pcur.btr_cur.page_cur.index = sys_index; - err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) goto err_exit; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index c6400544f73..f2a2ae7008b 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6081,7 +6081,8 @@ func_exit: que_thr_t* thr = pars_complete_graph_for_exec( NULL, trx, ctx->heap, NULL); - const bool is_root = block->page.id().page_no() == index->page; + page_id_t id{block->page.id()}; + const bool is_root = id.page_no() == index->page; if (rec_is_metadata(rec, *index)) { ut_ad(page_rec_is_user_rec(rec)); @@ -6098,8 +6099,10 @@ func_exit: } /* Ensure that the root page is in the correct format. */ - buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, - &mtr, &err); + id.set_page_no(index->page); + buf_block_t* root = mtr.get_already_latched( + id, MTR_MEMO_PAGE_SX_FIX); + if (UNIV_UNLIKELY(!root)) { goto func_exit; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 5a97ea4ebe0..69ee0058d0b 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2297,7 +2297,7 @@ loop: btr_pcur_t pcur; pcur.btr_cur.page_cur.index= ibuf.index; ibuf_mtr_start(&mtr); - if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr)) + if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr)) goto func_exit; if (!btr_pcur_is_on_user_rec(&pcur)) { @@ -2493,8 +2493,8 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ pcur.btr_cur.page_cur.index = ibuf.index; - dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, &mtr); ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur), ibuf.index)); @@ -3238,7 +3238,7 @@ ibuf_insert_low( ibuf_mtr_start(&mtr); pcur.btr_cur.page_cur.index = ibuf.index; - err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr); + err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: ibuf_mtr_commit(&mtr); @@ -3955,8 +3955,6 @@ ibuf_restore_pos( position is to be restored */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_PURGE_TREE); - if (UNIV_LIKELY(pcur->restore_position(mode, mtr) == btr_pcur_t::SAME_ALL)) { return true; @@ -4037,12 +4035,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ibuf_mtr_start(mtr); mysql_mutex_lock(&ibuf_mutex); + mtr_x_lock_index(ibuf.index, mtr); - if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE, - pcur, mtr)) { - + if (!ibuf_restore_pos(page_id, search_tuple, + BTR_PURGE_TREE_ALREADY_LATCHED, pcur, mtr)) { mysql_mutex_unlock(&ibuf_mutex); - ut_ad(mtr->has_committed()); goto func_exit; } @@ -4053,13 +4050,10 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_a(err == DB_SUCCESS); ibuf_size_update(ibuf_root->page.frame); - mysql_mutex_unlock(&ibuf_mutex); - ibuf.empty = page_is_empty(ibuf_root->page.frame); - } else { - mysql_mutex_unlock(&ibuf_mutex); } + mysql_mutex_unlock(&ibuf_mutex); ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: @@ -4237,7 +4231,7 @@ loop: /* Position pcur in the insert buffer at the first entry for this index page */ - if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { err = DB_CORRUPTION; @@ -4454,7 +4448,7 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ - if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(&search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { goto leave_loop; diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a2aa46b62da..a1cc10b05db 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,13 +211,12 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) @param[in,out] mtr mini-transaction */ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); -ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)) +ATTRIBUTE_COLD __attribute__((nonnull)) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); +@param[in,out] mtr mini-transaction */ +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); /*************************************************************//** Makes tree one level higher by splitting the root, and inserts @@ -241,7 +240,7 @@ btr_root_raise_and_insert( ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ - MY_ATTRIBUTE((warn_unused_result)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 49bc8a4ff1b..f6abc9f5e52 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,12 +63,6 @@ enum { BTR_KEEP_IBUF_BITMAP = 32 }; -/* btr_cur_latch_leaves() returns latched blocks and savepoints. */ -struct btr_latch_leaves_t { - buf_block_t* blocks[3]; - ulint savepoints[3]; -}; - #include "que0types.h" #include "row0types.h" @@ -126,51 +120,28 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)); -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr); - MY_ATTRIBUTE((warn_unused_result)) -/** Searches an index tree and positions a tree cursor on a given level. +/********************************************************************//** +Searches an index tree and positions a tree cursor on a given non-leaf level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. +Cursor is left at the place where an insert of the +search tuple should be performed in the B-tree. InnoDB does an insert +immediately after the cursor. Thus, the cursor may end up on a user record, +or on a page infimum record. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page +@param latch RW_S_LATCH or RW_X_LATCH @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc= 0); + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr); /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. @@ -653,20 +624,6 @@ btr_rec_copy_externally_stored_field( ulint* len, mem_heap_t* heap); -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in,out] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves = nullptr); - /*######################################################################*/ /** In the pessimistic delete, if the page data size drops below this @@ -727,21 +684,16 @@ to know struct size! */ struct btr_cur_t { page_cur_t page_cur; /*!< page cursor */ purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */ - buf_block_t* left_block; /*!< this field is used to store - a pointer to the left neighbor - page, in the cases - BTR_SEARCH_PREV and - BTR_MODIFY_PREV */ /*------------------------------*/ que_thr_t* thr; /*!< this field is only used - when btr_cur_search_to_nth_level + when search_leaf() is called for an index entry insertion: the calling query thread is passed here to be used in the insert buffer */ /*------------------------------*/ /** The following fields are used in - btr_cur_search_to_nth_level to pass information: */ + search_leaf() to pass information: */ /* @{ */ enum btr_cur_method flag; /*!< Search method used */ ulint tree_height; /*!< Tree height if the search is done @@ -750,8 +702,7 @@ struct btr_cur_t { ulint up_match; /*!< If the search mode was PAGE_CUR_LE, the number of matched fields to the the first user record to the right of - the cursor record after - btr_cur_search_to_nth_level; + the cursor record after search_leaf(); for the mode PAGE_CUR_GE, the matched fields to the first user record AT THE CURSOR or to the right of it; @@ -768,8 +719,7 @@ struct btr_cur_t { ulint low_match; /*!< if search mode was PAGE_CUR_LE, the number of matched fields to the first user record AT THE CURSOR or - to the left of it after - btr_cur_search_to_nth_level; + to the left of it after search_leaf(); NOT defined for PAGE_CUR_GE or any other search modes; see also the NOTE in up_match! */ @@ -803,6 +753,24 @@ struct btr_cur_t { dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr); + /** Search the leaf page record corresponding to a key. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param latch_mode latch mode + @param mtr mini-transaction + @return error code */ + dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr); + + /** Search the leaf page record corresponding to a key, exclusively latching + all sibling pages on the way. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param mtr mini-transaction + @return error code */ + dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + mtr_t *mtr); + /** Open the cursor at a random leaf page record. @param offsets temporary memory for rec_get_offsets() @param heap memory heap for rec_get_offsets() @@ -862,14 +830,14 @@ inherited external field. */ #define BTR_EXTERN_INHERITED_FLAG 64U #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ extern ib_counter_t btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ extern ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ extern ib_counter_t btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index cd8eacdc212..c66a3bfa329 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,24 +70,6 @@ btr_pcur_init( /*==========*/ btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done @@ -100,8 +82,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr); @@ -356,7 +337,7 @@ struct btr_pcur_t /** the modify clock value of the buffer block when the cursor position was stored */ ib_uint64_t modify_clock= 0; - /** btr_pcur_store_position() and btr_pcur_restore_position() state. */ + /** btr_pcur_store_position() and restore_position() state. */ enum pcur_pos_t pos_state= BTR_PCUR_NOT_POSITIONED; page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; /** the transaction, if we know it; otherwise this field is not defined; @@ -383,8 +364,8 @@ struct btr_pcur_t supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. - @param restore_latch_mode BTR_SEARCH_LEAF, ... - @param mtr mtr + @param latch_mode BTR_SEARCH_LEAF, ... + @param mtr mini-transaction @retval SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, @retval SAME_UNIQ cursor position is on user rec and points on the @@ -409,8 +390,7 @@ struct btr_pcur_t pos_state= BTR_PCUR_IS_POSITIONED; old_rec= nullptr; - return btr_cur.open_leaf(first, index, - BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr); + return btr_cur.open_leaf(first, index, this->latch_mode, mtr); } }; @@ -433,6 +413,24 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) return cursor->btr_cur.page_cur.rec; } +/**************************************************************//** +Initializes and opens a persistent cursor to an index tree. */ +inline +dberr_t +btr_pcur_open( + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + mtr_t* mtr) /*!< in: mtr */ +{ + cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + cursor->search_mode= mode; + cursor->pos_state= BTR_PCUR_IS_POSITIONED; + cursor->trx_if_known= nullptr; + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); +} + /** Open a cursor on the first user record satisfying the search condition; in case of no match, after the last index record. */ MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -440,16 +438,15 @@ inline dberr_t btr_pcur_open_on_user_rec( const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G); ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr)) + if (dberr_t err= + btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr)) return err; if (!btr_pcur_is_after_last_on_page(cursor) || btr_pcur_is_after_last_in_tree(cursor)) diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index 551f8f20fca..b827d70dc47 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -299,38 +299,10 @@ btr_pcur_init( pcur->btr_cur.rtr_info = NULL; } -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(!cursor->index()->is_spatial()); - cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode= mode; - cursor->pos_state= BTR_PCUR_IS_POSITIONED; - cursor->trx_if_known= nullptr; - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), - mtr, autoinc); -} - /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a +@param mode search mode; NOTE that if the search is made using a unique prefix of a record, mode should be PAGE_CUR_LE, not PAGE_CUR_GE, as the latter may end up on the previous page of the record! @@ -339,8 +311,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr) { @@ -348,10 +319,7 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, cursor->search_mode= mode; cursor->pos_state= BTR_PCUR_IS_POSITIONED; cursor->trx_if_known= nullptr; - - /* Search with the tree cursor */ - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); } /**************************************************************//** diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 6118bfbc128..912c022c64f 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,25 +55,26 @@ in the index record. */ #define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \ (BTR_EXTERN_FIELD_REF_SIZE * 2) -/** Latching modes for btr_cur_search_to_nth_level(). */ +/** Latching modes for btr_cur_t::search_leaf(). */ enum btr_latch_mode { /** Search a record on a leaf page and S-latch it. */ BTR_SEARCH_LEAF = RW_S_LATCH, /** (Prepare to) modify a record on a leaf page and X-latch it. */ BTR_MODIFY_LEAF = RW_X_LATCH, + /** U-latch root and X-latch a leaf page */ + BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH, /** Obtain no latches. */ BTR_NO_LATCHES = RW_NO_LATCH, - /** Search the previous record. */ + /** Search the previous record. + Used in btr_pcur_move_backward_from_page(). */ BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF, - /** Modify the previous record. */ + /** Modify the previous record. + Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */ BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF, - /** Start searching the entire B-tree. */ - BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF, - /** Start modifying1 the entire B-tree. */ + /** Start modifying the entire B-tree. */ BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF, - /** Continue searching the entire B-tree. */ - BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE, - /** Continue modifying the entire B-tree. */ + /** Continue modifying the entire R-tree. + Only used by rtr_search_to_nth_level(). */ BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE, /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually @@ -98,14 +99,14 @@ enum btr_latch_mode { dict_index_t::lock S-latch is being held. */ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - /** Search the entire index tree, assuming that the - dict_index_t::lock S-latch is being held. */ - BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE - | BTR_ALREADY_S_LATCHED, /** Search and X-latch a leaf page, assuming that the dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, + /** U-latch root and X-latch a leaf page, assuming that + dict_index_t::lock is being held in U mode. */ + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF + | BTR_ALREADY_S_LATCHED, /** Attempt to delete-mark a secondary index record. */ BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK, @@ -132,6 +133,9 @@ enum btr_latch_mode { /** Attempt to delete a record in the tree. */ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + /** Attempt to delete a record in an x-latched tree. */ + BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE + | BTR_ALREADY_S_LATCHED, /** Attempt to insert a record into the tree. */ BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT, diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 87236415150..d17f7456a15 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -791,7 +791,7 @@ public: { ut_ad(fsp_is_system_temporary(id().space())); ut_ad(in_file()); - ut_ad(!oldest_modification()); + ut_ad((oldest_modification() | 2) == 2); oldest_modification_= 2; } diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index c07ff679a80..f615b856126 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -123,8 +123,6 @@ inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end) if (oldest_modification > 1) ut_ad(oldest_modification <= start); - else if (fsp_is_system_temporary(b->page.id().space())) - b->page.set_temp_modified(); else buf_pool.insert_into_flush_list(b, start); srv_stats.buf_pool_write_requests.inc(); diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index 777f2432c93..b07261ce042 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,6 +59,44 @@ Created 2013/03/27 Jimmy Yang and Allen Lai /* Geometry data header */ #define GEO_DATA_HEADER_SIZE 4 + +/** Search for a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction +@param mode search mode */ +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr, + page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +/** Search for inserting a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction */ +inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + /**********************************************************************//** Builds a Rtree node pointer out of a physical record and a page number. @return own: node pointer */ @@ -295,11 +333,9 @@ rtr_store_parent_path( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((warn_unused_result)); diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h index 4fccfdb6c26..d6a4ef67a38 100644 --- a/storage/innobase/include/gis0type.h +++ b/storage/innobase/include/gis0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,12 +105,6 @@ typedef struct rtr_info{ matched_rec_t* matches;/*!< struct holding matching leaf records */ mysql_mutex_t rtr_path_mutex; /*!< mutex protect the "path" vector */ - buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< tracking pages that would be locked - at leaf level, for future free */ - ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< savepoint used to release latches/blocks - on each level and leaf level */ rtr_mbr_t mbr; /*!< the search MBR */ que_thr_t* thr; /*!< the search thread */ mem_heap_t* heap; /*!< memory heap */ diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl index 9f4e937f31d..1e21f74ff2b 100644 --- a/storage/innobase/include/ibuf0ibuf.inl +++ b/storage/innobase/include/ibuf0ibuf.inl @@ -100,9 +100,8 @@ ibuf_should_try( decide */ { return(innodb_change_buffering + && !(index->type & (DICT_CLUSTERED | DICT_IBUF)) && ibuf.max_size != 0 - && !dict_index_is_clust(index) - && !dict_index_is_spatial(index) && index->table->quiesce == QUIESCE_NONE && (ignore_sec_unique || !dict_index_is_unique(index))); } diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 093b706c1de..d34a62e7bb2 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2019, 2022, MariaDB Corporation. +Copyright (c) 2019, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -401,7 +401,8 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, ut_ad(have_offset || offset == 0); ut_ad(offset + len <= srv_page_size); static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency"); - + ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) || + memo_contains_flagged(bpage, MTR_MEMO_MODIFY)); size_t max_len; if (!have_len) max_len= 1 + 5 + 5; @@ -511,33 +512,6 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, memcpy(b, ut_align_offset(d, srv_page_size), len); } -/** Initialize an entire page. -@param[in,out] b buffer page */ -inline void mtr_t::init(buf_block_t *b) -{ - const page_id_t id{b->page.id()}; - ut_ad(is_named_space(id.space())); - ut_ad(!m_freed_pages == !m_freed_space); - - if (UNIV_LIKELY_NULL(m_freed_space) && - m_freed_space->id == id.space() && - m_freed_pages->remove_if_exists(b->page.id().page_no()) && - m_freed_pages->empty()) - { - delete m_freed_pages; - m_freed_pages= nullptr; - m_freed_space= nullptr; - } - - b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); - - if (!is_logged()) - return; - - m_log.close(log_write(b->page.id(), &b->page)); - m_last_offset= FIL_PAGE_TYPE; -} - /** Write an EXTENDED log record. @param block buffer pool page @param type extended record subtype; @see mrec_ext_t */ diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 41f9b473856..140cd3dc1b6 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -37,15 +37,6 @@ Created 11/26/1995 Heikki Tuuri /** Commit a mini-transaction. */ #define mtr_commit(m) (m)->commit() -/** Set and return a savepoint in mtr. -@return savepoint */ -#define mtr_set_savepoint(m) (m)->get_savepoint() - -/** Release the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -#define mtr_release_s_latch_at_savepoint(m, s, l) \ - (m)->release_s_latch_at_savepoint((s), (l)) - /** Change the logging mode of a mini-transaction. @return old mode */ #define mtr_set_log_mode(m, d) (m)->set_log_mode((d)) @@ -60,13 +51,10 @@ savepoint. */ # define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock) #endif -#define mtr_release_block_at_savepoint(m, s, b) \ - (m)->release_block_at_savepoint((s), (b)) - /** Mini-transaction memo stack slot. */ struct mtr_memo_slot_t { - /** pointer to the object, or nullptr if released */ + /** pointer to the object */ void *object; /** type of the stored object */ mtr_memo_type_t type; @@ -125,83 +113,36 @@ struct mtr_t { return m_memo.size(); } - /** Release the (index tree) s-latch stored in an mtr memo after a savepoint. - @param savepoint value returned by get_savepoint() - @param lock index latch to release */ - void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock) + /** Get the block at a savepoint */ + buf_block_t *at_savepoint(ulint savepoint) const { ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == lock); - ut_ad(slot.type == MTR_MEMO_S_LOCK); - slot.object= nullptr; - lock->s_unlock(); + const mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.type < MTR_MEMO_S_LOCK); + ut_ad(slot.object); + return static_cast(slot.object); } - /** Release the block in an mtr memo after a savepoint. */ - void release_block_at_savepoint(ulint savepoint, buf_block_t *block) + + /** Try to get a block at a savepoint. + @param savepoint the savepoint right before the block was acquired + @return the block at the savepoint + @retval nullptr if no buffer block was registered at that savepoint */ + buf_block_t *block_at_savepoint(ulint savepoint) const { ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(!(slot.type & MTR_MEMO_MODIFY)); - slot.object= nullptr; - block->page.unfix(); - - switch (slot.type) { - case MTR_MEMO_PAGE_S_FIX: - block->page.lock.s_unlock(); - break; - case MTR_MEMO_PAGE_SX_FIX: - case MTR_MEMO_PAGE_X_FIX: - block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX); - break; - default: - break; - } + const mtr_memo_slot_t &slot= m_memo[savepoint]; + return slot.type < MTR_MEMO_S_LOCK + ? static_cast(slot.object) + : nullptr; } - /** @return if we are about to make a clean buffer block dirty */ - static bool is_block_dirtied(const buf_page_t &b) - { - ut_ad(b.in_file()); - ut_ad(b.frame); - ut_ad(b.buf_fix_count()); - return b.oldest_modification() <= 1 && b.id().space() < SRV_TMP_SPACE_ID; - } - - /** X-latch a not yet latched block after a savepoint. */ - void x_latch_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_X_FIX; - block->page.lock.x_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); - } - - /** U-latch a not yet latched block after a savepoint. */ - void sx_latch_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - block->page.lock.u_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); - } + /** Retrieve a page that has already been latched. + @param id page identifier + @param type page latch type + @return block + @retval nullptr if the block had not been latched yet */ + buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type) + const; /** @return the logging mode */ mtr_log_t get_log_mode() const @@ -360,23 +301,17 @@ struct mtr_t { void release(const index_lock &lock) { release(&lock); } /** Release a latch to an unmodified page. */ void release(const buf_block_t &block) { release(&block); } - - /** Note that the mini-transaction will modify data. */ - void flag_modified() { m_modifications = true; } private: /** Release an unmodified object. */ void release(const void *object); +public: /** Mark the given latched page as modified. @param block page that will be modified */ - void modify(const buf_block_t& block); -public: - /** Note that the mini-transaction will modify a block. */ - void set_modified(const buf_block_t &block) - { flag_modified(); if (m_log_mode != MTR_LOG_NONE) modify(block); } + void set_modified(const buf_block_t &block); /** Set the state to not-modified. This will not log the changes. This is only used during redo log apply, to avoid logging the changes. */ - void discard_modifications() { m_modifications = false; } + void discard_modifications() { m_modifications= false; } /** Get the LSN of commit(). @return the commit LSN @@ -408,28 +343,17 @@ public: @param rw_latch latch to acquire */ void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch); - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void u_lock_register(ulint savepoint) + /** Register a change to the page latch state. */ + void lock_register(ulint savepoint, mtr_memo_type_t type) { mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - } - - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void s_lock_register(ulint savepoint) - { - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_S_FIX; + ut_ad(slot.type <= MTR_MEMO_BUF_FIX); + ut_ad(type <= MTR_MEMO_BUF_FIX); + slot.type= type; } /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); - /** Upgrade U lock to X */ - void lock_upgrade(const index_lock &lock); /** Check if we are holding tablespace latch @param space tablespace to search for @@ -459,27 +383,65 @@ public: @retval nullptr if not found */ buf_block_t *memo_contains_page_flagged(const byte *ptr, ulint flags) const; - /** @return true if mini-transaction contains modifications. */ + /** @return whether this mini-transaction modifies persistent data */ bool has_modifications() const { return m_modifications; } #endif /* UNIV_DEBUG */ - /** Push an object to an mtr memo stack. - @param object object + /** Push a buffer page to an the memo. + @param block buffer block + @param type object type: MTR_MEMO_S_LOCK, ... */ + void memo_push(buf_block_t *block, mtr_memo_type_t type) + __attribute__((nonnull)) + { + ut_ad(is_active()); + ut_ad(type <= MTR_MEMO_PAGE_SX_MODIFY); + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); +#ifdef UNIV_DEBUG + switch (type) { + case MTR_MEMO_PAGE_S_FIX: + ut_ad(block->page.lock.have_s()); + break; + case MTR_MEMO_PAGE_X_FIX: case MTR_MEMO_PAGE_X_MODIFY: + ut_ad(block->page.lock.have_x()); + break; + case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_SX_MODIFY: + ut_ad(block->page.lock.have_u_or_x()); + break; + case MTR_MEMO_BUF_FIX: + break; + case MTR_MEMO_MODIFY: + case MTR_MEMO_S_LOCK: case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: + case MTR_MEMO_SPACE_X_LOCK: case MTR_MEMO_SPACE_S_LOCK: + ut_ad("invalid type" == 0); + } +#endif + if (!(type & MTR_MEMO_MODIFY)); + else if (block->page.id().space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + type= mtr_memo_type_t(type & ~MTR_MEMO_MODIFY); + } + else + { + m_modifications= true; + if (!m_made_dirty) + /* If we are going to modify a previously clean persistent page, + we must set m_made_dirty, so that commit() will acquire + log_sys.flush_order_mutex and insert the block into + buf_pool.flush_list. */ + m_made_dirty= block->page.oldest_modification() <= 1; + } + m_memo.emplace_back(mtr_memo_slot_t{block, type}); + } + + /** Push an index lock or tablespace latch to the memo. + @param object index lock or tablespace latch @param type object type: MTR_MEMO_S_LOCK, ... */ void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull)) { ut_ad(is_active()); - /* If this mtr has U or X latched a clean page then we set - the m_made_dirty flag. This tells us if we need to - grab log_sys.flush_order_mutex at mtr_t::commit() so that we - can insert the dirtied page into the buf_pool.flush_list. - - FIXME: Do this only when the MTR_MEMO_MODIFY flag is set! */ - if (!m_made_dirty && - (type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))) - m_made_dirty= - is_block_dirtied(*static_cast(object)); - + ut_ad(type >= MTR_MEMO_S_LOCK); m_memo.emplace_back(mtr_memo_slot_t{object, type}); } @@ -770,7 +732,7 @@ private: /** specifies which operations should be logged; default MTR_LOG_ALL */ uint16_t m_log_mode:2; - /** whether at least one buffer pool page was written to */ + /** whether at least one persistent page was written to */ uint16_t m_modifications:1; /** whether at least one previously clean buffer pool page was written to */ diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h index 76069cfc168..d28a36184b8 100644 --- a/storage/innobase/include/small_vector.h +++ b/storage/innobase/include/small_vector.h @@ -71,6 +71,7 @@ public: using const_iterator= const T *; using reverse_iterator= std::reverse_iterator; using reference= T &; + using const_reference= const T&; iterator begin() { return static_cast(BeginX); } const_iterator begin() const { return static_cast(BeginX); } @@ -81,6 +82,8 @@ public: reverse_iterator rend() { return reverse_iterator(begin()); } reference operator[](size_t i) { assert(i < size()); return begin()[i]; } + const_reference operator[](size_t i) const + { return const_cast(*this)[i]; } void erase(const_iterator S, const_iterator E) { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 2c93d7ffe5a..1c6c28d874a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -40,6 +40,8 @@ Created 11/26/1995 Heikki Tuuri void mtr_memo_slot_t::release() const { + ut_ad(object); + switch (type) { case MTR_MEMO_S_LOCK: static_cast(object)->s_unlock(); @@ -121,11 +123,7 @@ inline void mtr_t::release_resources() void mtr_t::release() { for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) - { - mtr_memo_slot_t &slot= *it; - if (slot.object) - slot.release(); - } + it->release(); m_memo.clear(); } @@ -191,7 +189,7 @@ void mtr_t::commit() for (const mtr_memo_slot_t &slot : m_memo) { - if (slot.object && slot.type & MTR_MEMO_MODIFY) + if (slot.type & MTR_MEMO_MODIFY) { ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || slot.type == MTR_MEMO_PAGE_SX_MODIFY); @@ -226,8 +224,7 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end) while (s-- > begin) { const mtr_memo_slot_t &slot= m_memo[s]; - if (!slot.object) - continue; + ut_ad(slot.object); /* This is intended for releasing latches on indexes or unmodified buffer pool pages. */ ut_ad(slot.type <= MTR_MEMO_SX_LOCK); @@ -271,8 +268,7 @@ void mtr_t::commit_shrink(fil_space_t &space) for (mtr_memo_slot_t &slot : m_memo) { - if (!slot.object) - continue; + ut_ad(slot.object); switch (slot.type) { default: ut_ad("invalid type" == 0); @@ -936,15 +932,6 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block) #endif /* BTR_CUR_HASH_ADAPT */ } -void mtr_t::lock_upgrade(const index_lock &lock) -{ - ut_ad(lock.have_x()); - - for (mtr_memo_slot_t &slot : m_memo) - if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK) - slot.type= MTR_MEMO_X_LOCK; -} - /** Latch a buffer pool block. @param block block to be latched @param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */ @@ -993,26 +980,29 @@ done: void mtr_t::upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch) { ut_ad(is_active()); - ut_ad(savepoint < m_memo.size()); - mtr_memo_slot_t &slot= m_memo[savepoint]; ut_ad(slot.type == MTR_MEMO_BUF_FIX); buf_block_t *block= static_cast(slot.object); ut_d(const auto state= block->page.state()); ut_ad(state > buf_page_t::UNFIXED); ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX); + static_assert(int{MTR_MEMO_PAGE_S_FIX} == int{RW_S_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_X_FIX} == int{RW_X_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_SX_FIX} == int{RW_SX_LATCH}, ""); + slot.type= mtr_memo_type_t(rw_latch); switch (rw_latch) { default: ut_ad("invalid state" == 0); break; + case RW_S_LATCH: + block->page.lock.s_lock(); + break; case RW_SX_LATCH: - slot.type= MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); break; case RW_X_LATCH: - slot.type= MTR_MEMO_PAGE_X_FIX; block->page.lock.x_lock(); ut_ad(!block->page.is_io_fixed()); } @@ -1116,7 +1106,8 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) for (const mtr_memo_slot_t &slot : m_memo) { - if (!slot.object || !(flags & slot.type)) + ut_ad(slot.object); + if (!(flags & slot.type)) continue; buf_page_t *bpage= static_cast(slot.object); @@ -1137,30 +1128,84 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) /** Mark the given latched page as modified. @param block page that will be modified */ -void mtr_t::modify(const buf_block_t &block) +void mtr_t::set_modified(const buf_block_t &block) { - mtr_memo_slot_t *found= nullptr; + if (block.page.id().space() >= SRV_TMP_SPACE_ID) + { + const_cast(block).page.set_temp_modified(); + return; + } + + m_modifications= true; + + if (UNIV_UNLIKELY(m_log_mode == MTR_LOG_NONE)) + return; for (mtr_memo_slot_t &slot : m_memo) { if (slot.object == &block && slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)) { - found= &slot; - break; + if (slot.type & MTR_MEMO_MODIFY) + ut_ad(m_made_dirty || block.page.oldest_modification() > 1); + else + { + slot.type= static_cast(slot.type | MTR_MEMO_MODIFY); + if (!m_made_dirty) + m_made_dirty= block.page.oldest_modification() <= 1; + } + return; } } - if (UNIV_UNLIKELY(!found)) + /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ + ut_ad(m_memo.empty()); + ut_ad(!block.page.in_LRU_list); +} + +void mtr_t::init(buf_block_t *b) +{ + const page_id_t id{b->page.id()}; + ut_ad(is_named_space(id.space())); + ut_ad(!m_freed_pages == !m_freed_space); + ut_ad(memo_contains_flagged(b, MTR_MEMO_PAGE_X_FIX)); + + if (id.space() >= SRV_TMP_SPACE_ID) + b->page.set_temp_modified(); + else { - /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ - ut_ad(m_memo.empty()); - ut_ad(!block.page.in_LRU_list); - return; + for (mtr_memo_slot_t &slot : m_memo) + { + if (slot.object == b && slot.type & MTR_MEMO_PAGE_X_FIX) + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + m_modifications= true; + if (!m_made_dirty) + m_made_dirty= b->page.oldest_modification() <= 1; + goto found; + } + } + ut_ad("block not X-latched" == 0); } - found->type= static_cast(found->type | MTR_MEMO_MODIFY); - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block.page); + + found: + if (UNIV_LIKELY_NULL(m_freed_space) && + m_freed_space->id == id.space() && + m_freed_pages->remove_if_exists(id.page_no()) && + m_freed_pages->empty()) + { + delete m_freed_pages; + m_freed_pages= nullptr; + m_freed_space= nullptr; + } + + b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); + + if (!is_logged()) + return; + + m_log.close(log_write(id, &b->page)); + m_last_offset= FIL_PAGE_TYPE; } /** Free a page. @@ -1176,20 +1221,23 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) buf_block_t *freed= nullptr; const page_id_t id{space.id, offset}; - for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) + for (auto it= m_memo.end(); it != m_memo.begin(); ) { + it--; + next: mtr_memo_slot_t &slot= *it; buf_block_t *block= static_cast(slot.object); - if (!block); - else if (block == freed) + ut_ad(block); + if (block == freed) { if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)) slot.type= MTR_MEMO_PAGE_X_FIX; else { ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.object= nullptr; block->page.unfix(); + m_memo.erase(it, it + 1); + goto next; } } else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) && @@ -1203,7 +1251,17 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) ut_d(bool upgraded=) block->page.lock.x_lock_upgraded(); ut_ad(upgraded); } - slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (id.space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + slot.type= MTR_MEMO_PAGE_X_FIX; + } + else + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (!m_made_dirty) + m_made_dirty= block->page.oldest_modification() <= 1; + } #ifdef BTR_CUR_HASH_ADAPT if (block->index) btr_search_drop_page_hash_index(block, false); @@ -1212,8 +1270,6 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) } } - if (freed && !m_made_dirty) - m_made_dirty= is_block_dirtied(freed->page); m_log.close(log_write(id, nullptr)); } } diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 45c35bc6995..861095b421e 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1629,6 +1629,9 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept dberr_t IndexPurge::purge() noexcept { btr_pcur_store_position(&m_pcur, &m_mtr); + m_mtr.commit(); + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NO_REDO); dberr_t err= purge_pessimistic_delete(); m_mtr.start(); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 5da0e2479f9..a4471104543 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,8 +314,10 @@ row_ins_clust_index_entry_by_modify( } if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) - == BTR_MODIFY_LEAF); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); /* Try optimistic updating of the record, keeping changes within the page */ @@ -1621,8 +1623,7 @@ row_ins_check_foreign_constraint( dtuple_set_n_fields_cmp(entry, foreign->n_fields); pcur.btr_cur.page_cur.index = check_index; - err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0, - &mtr); + err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto end_scan; } @@ -2119,7 +2120,7 @@ row_ins_scan_sec_index_for_duplicate( pcur.btr_cur.page_cur.index = index; trx_t* const trx = thr_get_trx(thr); dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, - &pcur, 0, mtr); + &pcur, mtr); if (err != DB_SUCCESS) { goto end_scan; } @@ -2539,8 +2540,8 @@ row_ins_index_entry_big_rec( index->set_modified(mtr); } - dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE, + &pcur, &mtr); if (error != DB_SUCCESS) { return error; } @@ -2634,11 +2635,10 @@ row_ins_clust_index_entry_low( que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t pcur; - btr_cur_t* cursor; dberr_t err = DB_SUCCESS; big_rec_t* big_rec = NULL; mtr_t mtr; - ib_uint64_t auto_inc = 0; + uint64_t auto_inc = 0; mem_heap_t* offsets_heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; @@ -2654,7 +2654,7 @@ row_ins_clust_index_entry_low( ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); ut_ad(!trx->in_rollback); - mtr_start(&mtr); + mtr.start(); if (index->table->is_temporary()) { /* Disable REDO logging as the lifetime of temp-tables is @@ -2694,6 +2694,13 @@ row_ins_clust_index_entry_low( dfield->type.mtype, dfield->type.prtype & DATA_UNSIGNED); + if (auto_inc + && mode != BTR_MODIFY_TREE) { + mode = btr_latch_mode( + BTR_MODIFY_ROOT_AND_LEAF + ^ BTR_MODIFY_LEAF + ^ mode); + } } } } @@ -2703,20 +2710,26 @@ row_ins_clust_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ pcur.btr_cur.page_cur.index = index; - err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr); + err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { index->table->file_unreadable = true; -commit_exit: +err_exit: mtr.commit(); goto func_exit; } - cursor = btr_pcur_get_btr_cur(&pcur); - cursor->thr = thr; + if (auto_inc) { + buf_block_t* root + = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF); + ut_ad(index->page == root->page.id().page_no()); + page_set_autoinc(root, auto_inc, &mtr, false); + } + + btr_pcur_get_btr_cur(&pcur)->thr = thr; #ifdef UNIV_DEBUG { - page_t* page = btr_cur_get_page(cursor); + page_t* page = btr_pcur_get_page(&pcur); rec_t* first_rec = page_rec_get_next( page_get_infimum_rec(page)); @@ -2725,7 +2738,7 @@ commit_exit: } #endif /* UNIV_DEBUG */ - block = btr_cur_get_block(cursor); + block = btr_pcur_get_block(&pcur); DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;); @@ -2747,7 +2760,7 @@ commit_exit: if (err != DB_SUCCESS) { trx->error_state = err; - goto commit_exit; + goto err_exit; } if (index->table->n_rec_locks) { @@ -2787,7 +2800,7 @@ skip_bulk_insert: ut_ad(index->is_instant()); ut_ad(!dict_index_is_online_ddl(index)); - const rec_t* rec = btr_cur_get_rec(cursor); + const rec_t* rec = btr_pcur_get_rec(&pcur); if (rec_get_info_bits(rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG) { @@ -2796,16 +2809,17 @@ skip_bulk_insert: goto err_exit; } - ut_ad(!row_ins_must_modify_rec(cursor)); + ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur)); goto do_insert; } - if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { + if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) { goto do_insert; } if (n_uniq - && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) { + && (pcur.btr_cur.up_match >= n_uniq + || pcur.btr_cur.low_match >= n_uniq)) { if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG @@ -2813,7 +2827,7 @@ skip_bulk_insert: /* Set no locks when applying log in online table rebuild. Only check for duplicates. */ err = row_ins_duplicate_error_in_clust_online( - n_uniq, entry, cursor, + n_uniq, entry, &pcur.btr_cur, &offsets, &offsets_heap); switch (err) { @@ -2824,26 +2838,24 @@ skip_bulk_insert: /* fall through */ case DB_SUCCESS_LOCKED_REC: case DB_DUPLICATE_KEY: - trx->error_info = cursor->index(); + trx->error_info = index; } } else { /* Note that the following may return also DB_LOCK_WAIT */ err = row_ins_duplicate_error_in_clust( - flags, cursor, entry, thr); + flags, &pcur.btr_cur, entry, thr); } if (err != DB_SUCCESS) { -err_exit: - mtr_commit(&mtr); - goto func_exit; + goto err_exit; } } /* Note: Allowing duplicates would qualify for modification of an existing record as the new entry is exactly same as old entry. */ - if (row_ins_must_modify_rec(cursor)) { + if (row_ins_must_modify_rec(&pcur.btr_cur)) { /* There is already an index entry with a long enough common prefix, we must convert the insert into a modify of an existing record */ @@ -2861,10 +2873,13 @@ do_insert: rec_t* insert_rec; if (mode != BTR_MODIFY_TREE) { - ut_ad(mode == BTR_MODIFY_LEAF || - mode == BTR_MODIFY_LEAF_ALREADY_LATCHED); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode + == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); err = btr_cur_optimistic_insert( - flags, cursor, &offsets, &offsets_heap, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); } else { @@ -2873,17 +2888,15 @@ do_insert: goto err_exit; } - DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust"); - err = btr_cur_optimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); if (err == DB_FAIL) { err = btr_cur_pessimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); @@ -2995,9 +3008,7 @@ row_ins_sec_index_entry_low( rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - err = btr_cur_search_to_nth_level(0, entry, - PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr); if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { @@ -3013,9 +3024,8 @@ row_ins_sec_index_entry_low( } else { index->set_modified(mtr); } - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, + search_mode, &mtr); } DBUG_EXECUTE_IF( @@ -3031,8 +3041,8 @@ row_ins_sec_index_entry_low( : BTR_INSERT)); } - err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - search_mode, &cursor, &mtr); + err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode, + &mtr); } if (err != DB_SUCCESS) { @@ -3108,12 +3118,12 @@ row_ins_sec_index_entry_low( prevent any insertion of a duplicate by another transaction. Let us now reposition the cursor and continue the insertion (bypassing the change buffer). */ - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, + err = cursor.search_leaf( + entry, PAGE_CUR_LE, btr_latch_mode(search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)), - &cursor, &mtr); + &mtr); if (err != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 94d69d88fb5..0743dc2bb50 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1696,8 +1696,8 @@ err_exit: mtr->start(); index->set_modified(*mtr); pcur->btr_cur.page_cur.index = index; - error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_PURGE_TREE, pcur, 0, mtr); + error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur, + mtr); if (error) { goto err_exit; } @@ -1780,8 +1780,8 @@ row_log_table_apply_delete( mtr_start(&mtr); index->set_modified(mtr); - dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_PURGE_TREE, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur, + &mtr); if (err != DB_SUCCESS) { goto all_done; } @@ -1917,8 +1917,8 @@ row_log_table_apply_update( mtr.start(); index->set_modified(mtr); - error = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur, + &mtr); if (error != DB_SUCCESS) { func_exit: mtr.commit(); @@ -3084,11 +3084,8 @@ row_log_apply_op_low( record. The operation may already have been performed, depending on when the row in the clustered index was scanned. */ - *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - has_index_lock - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock + ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3138,9 +3135,9 @@ row_log_apply_op_low( mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3242,9 +3239,9 @@ insert_the_rec: mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (*error != DB_SUCCESS) { break; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index caa2646eee2..535762ee105 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -150,9 +150,8 @@ public: false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_LEAF, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_LEAF, &mtr); /* It need to update MBR in parent entry, so change search mode to BTR_MODIFY_TREE */ @@ -164,10 +163,8 @@ public: rtr_info_update_btr(&ins_cur, &rtr_info); mtr.start(); index->set_modified(mtr); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); } if (error == DB_SUCCESS) { @@ -189,11 +186,8 @@ public: &ins_cur, index, false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, - &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); if (error == DB_SUCCESS) { error = btr_cur_pessimistic_insert( diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index e216d9faa3b..753b42332fc 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -104,7 +104,7 @@ bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ - btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ { dict_index_t* index = dict_table_get_first_index(node->table); table_id_t table_id = 0; @@ -342,17 +342,20 @@ row_purge_remove_sec_if_poss_tree( ibool success = TRUE; dberr_t err; mtr_t mtr; - enum row_search_result search_result; log_free_check(); mtr.start(); index->set_modified(mtr); pcur.btr_cur.page_cur.index = index; - search_result = row_search_index_entry(entry, BTR_PURGE_TREE, - &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) { + goto found; + } + goto func_exit; + } - switch (search_result) { + switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) { case ROW_NOT_FOUND: /* Not found. This is a legitimate condition. In a rollback, InnoDB will remove secondary recs that would @@ -381,6 +384,7 @@ row_purge_remove_sec_if_poss_tree( which cannot be purged yet, requires its existence. If some requires, we should do nothing. */ +found: if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) { /* Remove the index record, which should have been @@ -439,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf( { mtr_t mtr; btr_pcur_t pcur; - enum btr_latch_mode mode; - enum row_search_result search_result; bool success = true; log_free_check(); @@ -449,31 +451,27 @@ row_purge_remove_sec_if_poss_leaf( mtr.start(); index->set_modified(mtr); - /* Change buffering is disabled for spatial index and - virtual index. */ - mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL)) - ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF; pcur.btr_cur.page_cur.index = index; /* Set the purge node for the call to row_purge_poss_sec(). */ pcur.btr_cur.purge_node = node; if (index->is_spatial()) { pcur.btr_cur.thr = NULL; - index->lock.u_lock(SRW_LOCK_CALL); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); - index->lock.u_unlock(); - } else { - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = static_cast( - que_node_get_parent(node)); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); + if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) { + goto found; + } + goto func_exit; } - switch (search_result) { + /* Set the query thread, so that ibuf_insert_low() will be + able to invoke thd_get_trx(). */ + pcur.btr_cur.thr = static_cast(que_node_get_parent(node)); + + switch (row_search_index_entry(entry, index->has_virtual() + ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF, + &pcur, &mtr)) { case ROW_FOUND: +found: /* Before attempting to purge a record, check if it is safe to do so. */ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) { diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index b998d27d836..4a00b2a430e 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1216,7 +1216,7 @@ row_search_on_row_ref( & REC_INFO_MIN_REC_FLAG; } else { ut_a(ref->n_fields == index->n_uniq); - if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr) + if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { return false; } @@ -1278,21 +1278,13 @@ row_search_index_entry( ut_ad(dtuple_check_typed(entry)); - if (pcur->index()->is_spatial()) { - if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) { - return ROW_NOT_FOUND; - } - } else { - if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr) - != DB_SUCCESS) { - return ROW_NOT_FOUND; - } + if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { + return ROW_NOT_FOUND; } switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: ut_ad(!(~mode & BTR_DELETE)); - ut_ad(!pcur->index()->is_spatial()); return(ROW_NOT_DELETED_REF); case BTR_CUR_DEL_MARK_IBUF: diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 87e3ca43b1c..e44cc466295 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -2,7 +2,7 @@ Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -4776,7 +4776,7 @@ wait_table_again: pcur->btr_cur.thr = thr; pcur->old_rec = nullptr; - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { if (!prebuilt->rtr_info) { prebuilt->rtr_info = rtr_create_rtr_info( set_also_gap_locks, true, @@ -4792,10 +4792,13 @@ wait_table_again: prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; } - } - err = btr_pcur_open_with_no_init(search_tuple, mode, - BTR_SEARCH_LEAF, pcur, &mtr); + err = rtr_search_leaf(pcur, search_tuple, mode, &mtr); + } else { + err = btr_pcur_open_with_no_init(search_tuple, mode, + BTR_SEARCH_LEAF, + pcur, &mtr); + } if (err != DB_SUCCESS) { page_corrupted: @@ -5771,8 +5774,7 @@ next_rec_after_check: if (spatial_search) { /* No need to do store restore for R-tree */ - mtr.commit(); - mtr.start(); + mtr.rollback_to_savepoint(0); } else if (mtr_extra_clust_savepoint) { /* We must release any clustered index latches if we are moving to the next non-clustered diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 6567019a33d..50196e78092 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -233,7 +233,7 @@ func_exit: if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ - err = btr_reset_instant(*index, true, &mtr); + btr_reset_instant(*index, true, &mtr); } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); @@ -268,21 +268,32 @@ row_undo_ins_remove_sec_low( pcur.btr_cur.page_cur.index = index; row_mtr_start(&mtr, index, !modify_leaf); - if (modify_leaf) { - mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; - mtr_s_lock_index(index, &mtr); - } else { - ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); - } - if (index->is_spatial()) { mode = modify_leaf - ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED + ? btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_pcur_get_btr_cur(&pcur)->thr = thr; + if (rtr_search(entry, mode, &pcur, &mtr)) { + goto func_exit; + } + + if (rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); + } + goto found; + } else if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { @@ -295,15 +306,7 @@ row_undo_ins_remove_sec_low( case ROW_NOT_FOUND: break; case ROW_FOUND: - if (dict_index_is_spatial(index) - && rec_get_deleted_flag( - btr_pcur_get_rec(&pcur), - dict_table_is_comp(index->table))) { - ib::error() << "Record found in index " << index->name - << " is deleted marked on insert rollback."; - ut_ad(0); - } - + found: btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { @@ -318,6 +321,7 @@ row_undo_ins_remove_sec_low( } } +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 2d04dca4003..50e15e03cc9 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -133,8 +133,7 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - err = btr_reset_instant(*btr_cur_get_index(btr_cur), - false, mtr); + btr_reset_instant(*btr_cur->index(), false, mtr); } } @@ -490,7 +489,6 @@ row_undo_mod_del_mark_or_remove_sec_low( dberr_t err = DB_SUCCESS; mtr_t mtr; mtr_t mtr_vers; - row_search_result search_result; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index, !modify_leaf); @@ -505,6 +503,11 @@ row_undo_mod_del_mark_or_remove_sec_low( | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_cur->thr = thr; + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } else { + goto func_exit; + } } else if (!index->is_committed()) { /* The index->online_status may change if the index is or was being created online, but not committed yet. It @@ -514,7 +517,8 @@ row_undo_mod_del_mark_or_remove_sec_low( mtr_s_lock_index(index, &mtr); } else { ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } } else { /* For secondary indexes, @@ -523,9 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_ad(!dict_index_is_online_ddl(index)); } - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - - switch (UNIV_EXPECT(search_result, ROW_FOUND)) { + switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr), + ROW_FOUND)) { case ROW_NOT_FOUND: /* In crash recovery, the secondary index record may be missing if the UPDATE did not have time to insert @@ -547,6 +550,7 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_error; } +found: /* We should remove the index record if no prior version of the row, which cannot be purged yet, requires its existence. If some requires, we should delete mark the record. */ @@ -665,13 +669,12 @@ row_undo_mod_del_unmark_sec_and_undo_update( trx_t* trx = thr_get_trx(thr); const ulint flags = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; - row_search_result search_result; const auto orig_mode = mode; pcur.btr_cur.page_cur.index = index; ut_ad(trx->id != 0); - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { /* FIXME: Currently we do a 2-pass search for the undo due to avoid undel-mark a wrong rec in rolling back in partial update. Later, we could log some info in @@ -686,9 +689,22 @@ try_again: btr_cur->thr = thr; - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, mode, &pcur, &mtr)) { + goto found; + } - switch (search_result) { + if (mode != orig_mode && btr_cur->rtr_info->fd_del) { + mode = orig_mode; + btr_pcur_close(&pcur); + mtr.commit(); + goto try_again; + } + + goto not_found; + } + + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { mem_heap_t* heap; mem_heap_t* offsets_heap; rec_offs* offsets; @@ -699,17 +715,7 @@ try_again: flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ ut_error; case ROW_NOT_FOUND: - /* For spatial index, if first search didn't find an - undel-marked rec, try to find a del-marked rec. */ - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - if (mode != orig_mode) { - mode = orig_mode; - btr_pcur_close(&pcur); - mtr_commit(&mtr); - goto try_again; - } - } - +not_found: if (btr_cur->up_match >= dict_index_get_n_unique(index) || btr_cur->low_match >= dict_index_get_n_unique(index)) { ib::warn() << "Record in index " << index->name @@ -767,6 +773,7 @@ try_again: break; case ROW_FOUND: +found: btr_rec_set_deleted(btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), &mtr); heap = mem_heap_create( diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a3f940adff5..fe88fce58a2 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1832,12 +1832,10 @@ row_upd_sec_index_entry( que_thr_t* thr) /*!< in: query thread */ { mtr_t mtr; - const rec_t* rec; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; - btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; trx_t* trx = thr_get_trx(thr); btr_latch_mode mode; @@ -1876,10 +1874,6 @@ row_upd_sec_index_entry( case SRV_TMP_SPACE_ID: mtr.set_log_mode(MTR_LOG_NO_REDO); flags = BTR_NO_LOCKING_FLAG; - if (index->is_spatial()) { - mode = btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK); - } break; default: index->set_modified(mtr); @@ -1888,26 +1882,35 @@ row_upd_sec_index_entry( flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0; /* We can only buffer delete-mark operations if there are no foreign key constraints referring to the index. */ - mode = index->is_spatial() - ? btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK) - : referenced - ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF; + if (!referenced) { + mode = BTR_DELETE_MARK_LEAF; + } break; } /* Set the query thread, so that ibuf_insert_low() will be able to invoke thd_get_trx(). */ - btr_pcur_get_btr_cur(&pcur)->thr = thr; + pcur.btr_cur.thr = thr; pcur.btr_cur.page_cur.index = index; + if (index->is_spatial()) { + mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK); + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } + + if (pcur.btr_cur.rtr_info->fd_del) { + /* We found the record, but a delete marked */ + goto close; + } + + goto not_found; + } + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - btr_cur = btr_pcur_get_btr_cur(&pcur); - - rec = btr_cur_get_rec(btr_cur); - switch (search_result) { + const rec_t* rec; case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */ ut_error; break; @@ -1916,11 +1919,8 @@ row_upd_sec_index_entry( break; case ROW_NOT_FOUND: - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - /* We found the record, but a delete marked */ - break; - } - +not_found: + rec = btr_pcur_get_rec(&pcur); ib::error() << "Record in index " << index->name << " of table " << index->table->name @@ -1934,7 +1934,9 @@ row_upd_sec_index_entry( #endif /* UNIV_DEBUG */ break; case ROW_FOUND: +found: ut_ad(err == DB_SUCCESS); + rec = btr_pcur_get_rec(&pcur); /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in @@ -1943,14 +1945,14 @@ row_upd_sec_index_entry( rec, dict_table_is_comp(index->table))) { err = lock_sec_rec_modify_check_and_lock( flags, - btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), index, thr, &mtr); + btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), index, thr, &mtr); if (err != DB_SUCCESS) { break; } - btr_rec_set_deleted(btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), + btr_rec_set_deleted(btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), &mtr); #ifdef WITH_WSREP if (!referenced && foreign @@ -2009,6 +2011,7 @@ row_upd_sec_index_entry( } } +close: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 8dfb6847a32..e1df09dd3e6 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -424,7 +424,6 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) block->fix(); mtr.commit(); mtr.start(); - mtr.flag_modified(); rseg->latch.wr_lock(SRW_LOCK_CALL); rseg_hdr->page.lock.x_lock(); block->page.lock.x_lock();