diff --git a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result b/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result deleted file mode 100644 index 670340f3583..00000000000 --- a/mysql-test/suite/innodb/r/innodb-change-buffer-recovery.result +++ /dev/null @@ -1,55 +0,0 @@ -# -# Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE -# OPERATION IF IT IS DONE IN-PLACE -# -call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery"); -call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc"); -call mtr.add_suppression("Plugin 'InnoDB'"); -FLUSH TABLES; -CREATE TABLE t1( -a INT AUTO_INCREMENT PRIMARY KEY, -b CHAR(1), -c INT, -INDEX(b)) -ENGINE=InnoDB STATS_PERSISTENT=0; -SET GLOBAL innodb_change_buffering_debug = 1; -SET GLOBAL innodb_change_buffering = all; -INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192; -BEGIN; -SELECT b FROM t1 LIMIT 3; -b -x -x -x -connect con1,localhost,root,,; -BEGIN; -DELETE FROM t1 WHERE a=1; -INSERT INTO t1 VALUES(1,'X',1); -SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; -SELECT b FROM t1 LIMIT 3; -ERROR HY000: Lost connection to server during query -disconnect con1; -connection default; -FOUND 1 /Wrote log record for ibuf update in place operation/ in mysqld.1.err -# restart: --innodb-read-only -CHECK TABLE t1; -Table Op Msg_type Msg_text -test.t1 check Error Unknown storage engine 'InnoDB' -test.t1 check error Corrupt -FOUND 1 /innodb_read_only prevents crash recovery/ in mysqld.1.err -# restart: --innodb-force-recovery=5 -SELECT * FROM t1 LIMIT 1; -a b c -1 X 1 -SHOW ENGINE INNODB STATUS; -Type Name Status -InnoDB insert 0, delete mark 0 -SET GLOBAL innodb_fast_shutdown=0; -# restart -CHECK TABLE t1; -Table Op Msg_type Msg_text -test.t1 check status OK -SHOW ENGINE INNODB STATUS; -Type Name Status -InnoDB -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/r/innodb_wl6326.result b/mysql-test/suite/innodb/r/innodb_wl6326.result deleted file mode 100644 index fcd58aedafe..00000000000 --- a/mysql-test/suite/innodb/r/innodb_wl6326.result +++ /dev/null @@ -1,405 +0,0 @@ -SET GLOBAL innodb_adaptive_hash_index = false; -SET GLOBAL innodb_stats_persistent = false; -connect con1,localhost,root,,; -connect con2,localhost,root,,; -connect con3,localhost,root,,; -CREATE TABLE t1 ( -a00 CHAR(255) NOT NULL DEFAULT 'a', -a01 CHAR(255) NOT NULL DEFAULT 'a', -a02 CHAR(255) NOT NULL DEFAULT 'a', -a03 CHAR(255) NOT NULL DEFAULT 'a', -a04 CHAR(255) NOT NULL DEFAULT 'a', -a05 CHAR(255) NOT NULL DEFAULT 'a', -a06 CHAR(255) NOT NULL DEFAULT 'a', -b INT NOT NULL DEFAULT 0 -) ENGINE = InnoDB; -ALTER TABLE t1 ADD PRIMARY KEY( -a00, -a01, -a02, -a03, -a04, -a05, -a06 -); -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -1 -SET GLOBAL innodb_limit_optimistic_insert_debug = 7; -BEGIN; -INSERT INTO t1 (a00) VALUES ('aa'); -INSERT INTO t1 (a00) VALUES ('ab'); -INSERT INTO t1 (a00) VALUES ('ac'); -INSERT INTO t1 (a00) VALUES ('ad'); -INSERT INTO t1 (a00) VALUES ('ae'); -INSERT INTO t1 (a00) VALUES ('af'); -INSERT INTO t1 (a00) VALUES ('ag'); -INSERT INTO t1 (a00) VALUES ('ah'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -3 -BEGIN; -INSERT INTO t1 (a00) VALUES ('ai'); -INSERT INTO t1 (a00) VALUES ('aj'); -INSERT INTO t1 (a00) VALUES ('ak'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -4 -BEGIN; -INSERT INTO t1 (a00) VALUES ('al'); -INSERT INTO t1 (a00) VALUES ('am'); -INSERT INTO t1 (a00) VALUES ('an'); -INSERT INTO t1 (a00) VALUES ('ao'); -INSERT INTO t1 (a00) VALUES ('ap'); -INSERT INTO t1 (a00) VALUES ('aq'); -INSERT INTO t1 (a00) VALUES ('ar'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -5 -BEGIN; -INSERT INTO t1 (a00) VALUES ('as'); -INSERT INTO t1 (a00) VALUES ('at'); -INSERT INTO t1 (a00) VALUES ('au'); -INSERT INTO t1 (a00) VALUES ('av'); -INSERT INTO t1 (a00) VALUES ('aw'); -INSERT INTO t1 (a00) VALUES ('ax'); -INSERT INTO t1 (a00) VALUES ('ay'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -6 -BEGIN; -INSERT INTO t1 (a00) VALUES ('az'); -INSERT INTO t1 (a00) VALUES ('ba'); -INSERT INTO t1 (a00) VALUES ('bb'); -INSERT INTO t1 (a00) VALUES ('bc'); -INSERT INTO t1 (a00) VALUES ('bd'); -INSERT INTO t1 (a00) VALUES ('be'); -INSERT INTO t1 (a00) VALUES ('bf'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -7 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bg'); -INSERT INTO t1 (a00) VALUES ('bh'); -INSERT INTO t1 (a00) VALUES ('bi'); -INSERT INTO t1 (a00) VALUES ('bj'); -INSERT INTO t1 (a00) VALUES ('bk'); -INSERT INTO t1 (a00) VALUES ('bl'); -INSERT INTO t1 (a00) VALUES ('bm'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -8 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bn'); -INSERT INTO t1 (a00) VALUES ('bo'); -INSERT INTO t1 (a00) VALUES ('bp'); -INSERT INTO t1 (a00) VALUES ('bq'); -INSERT INTO t1 (a00) VALUES ('br'); -INSERT INTO t1 (a00) VALUES ('bs'); -INSERT INTO t1 (a00) VALUES ('bt'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -11 -BEGIN; -INSERT INTO t1 (a00) VALUES ('bu'); -INSERT INTO t1 (a00) VALUES ('bv'); -INSERT INTO t1 (a00) VALUES ('bw'); -INSERT INTO t1 (a00) VALUES ('bx'); -INSERT INTO t1 (a00) VALUES ('by'); -INSERT INTO t1 (a00) VALUES ('bz'); -INSERT INTO t1 (a00) VALUES ('ca'); -INSERT INTO t1 (a00) VALUES ('cb'); -INSERT INTO t1 (a00) VALUES ('cc'); -INSERT INTO t1 (a00) VALUES ('cd'); -INSERT INTO t1 (a00) VALUES ('ce'); -INSERT INTO t1 (a00) VALUES ('cf'); -INSERT INTO t1 (a00) VALUES ('cg'); -INSERT INTO t1 (a00) VALUES ('ch'); -INSERT INTO t1 (a00) VALUES ('ci'); -INSERT INTO t1 (a00) VALUES ('cj'); -INSERT INTO t1 (a00) VALUES ('ck'); -INSERT INTO t1 (a00) VALUES ('cl'); -INSERT INTO t1 (a00) VALUES ('cm'); -INSERT INTO t1 (a00) VALUES ('cn'); -INSERT INTO t1 (a00) VALUES ('co'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -15 -BEGIN; -INSERT INTO t1 (a00) VALUES ('cp'); -INSERT INTO t1 (a00) VALUES ('cq'); -INSERT INTO t1 (a00) VALUES ('cr'); -INSERT INTO t1 (a00) VALUES ('cs'); -INSERT INTO t1 (a00) VALUES ('ct'); -INSERT INTO t1 (a00) VALUES ('cu'); -INSERT INTO t1 (a00) VALUES ('cv'); -INSERT INTO t1 (a00) VALUES ('cw'); -INSERT INTO t1 (a00) VALUES ('cx'); -INSERT INTO t1 (a00) VALUES ('cy'); -INSERT INTO t1 (a00) VALUES ('cz'); -INSERT INTO t1 (a00) VALUES ('da'); -INSERT INTO t1 (a00) VALUES ('db'); -INSERT INTO t1 (a00) VALUES ('dc'); -INSERT INTO t1 (a00) VALUES ('dd'); -INSERT INTO t1 (a00) VALUES ('de'); -INSERT INTO t1 (a00) VALUES ('df'); -INSERT INTO t1 (a00) VALUES ('dg'); -INSERT INTO t1 (a00) VALUES ('dh'); -INSERT INTO t1 (a00) VALUES ('di'); -INSERT INTO t1 (a00) VALUES ('dj'); -INSERT INTO t1 (a00) VALUES ('dk'); -INSERT INTO t1 (a00) VALUES ('dl'); -INSERT INTO t1 (a00) VALUES ('dm'); -INSERT INTO t1 (a00) VALUES ('dn'); -INSERT INTO t1 (a00) VALUES ('do'); -INSERT INTO t1 (a00) VALUES ('dp'); -INSERT INTO t1 (a00) VALUES ('dq'); -INSERT INTO t1 (a00) VALUES ('dr'); -INSERT INTO t1 (a00) VALUES ('ds'); -INSERT INTO t1 (a00) VALUES ('dt'); -INSERT INTO t1 (a00) VALUES ('du'); -INSERT INTO t1 (a00) VALUES ('dv'); -INSERT INTO t1 (a00) VALUES ('dw'); -INSERT INTO t1 (a00) VALUES ('dx'); -INSERT INTO t1 (a00) VALUES ('dy'); -INSERT INTO t1 (a00) VALUES ('dz'); -INSERT INTO t1 (a00) VALUES ('ea'); -INSERT INTO t1 (a00) VALUES ('eb'); -INSERT INTO t1 (a00) VALUES ('ec'); -INSERT INTO t1 (a00) VALUES ('ed'); -INSERT INTO t1 (a00) VALUES ('ee'); -INSERT INTO t1 (a00) VALUES ('ef'); -INSERT INTO t1 (a00) VALUES ('eg'); -INSERT INTO t1 (a00) VALUES ('eh'); -INSERT INTO t1 (a00) VALUES ('ei'); -INSERT INTO t1 (a00) VALUES ('ej'); -INSERT INTO t1 (a00) VALUES ('ek'); -INSERT INTO t1 (a00) VALUES ('el'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -23 -BEGIN; -INSERT INTO t1 (a00) VALUES ('em'); -INSERT INTO t1 (a00) VALUES ('en'); -INSERT INTO t1 (a00) VALUES ('eo'); -INSERT INTO t1 (a00) VALUES ('ep'); -INSERT INTO t1 (a00) VALUES ('eq'); -INSERT INTO t1 (a00) VALUES ('er'); -INSERT INTO t1 (a00) VALUES ('es'); -INSERT INTO t1 (a00) VALUES ('et'); -INSERT INTO t1 (a00) VALUES ('eu'); -INSERT INTO t1 (a00) VALUES ('ev'); -INSERT INTO t1 (a00) VALUES ('ew'); -INSERT INTO t1 (a00) VALUES ('ex'); -INSERT INTO t1 (a00) VALUES ('ey'); -INSERT INTO t1 (a00) VALUES ('ez'); -INSERT INTO t1 (a00) VALUES ('fa'); -INSERT INTO t1 (a00) VALUES ('fb'); -INSERT INTO t1 (a00) VALUES ('fc'); -INSERT INTO t1 (a00) VALUES ('fd'); -INSERT INTO t1 (a00) VALUES ('fe'); -INSERT INTO t1 (a00) VALUES ('ff'); -INSERT INTO t1 (a00) VALUES ('fg'); -INSERT INTO t1 (a00) VALUES ('fh'); -INSERT INTO t1 (a00) VALUES ('fi'); -INSERT INTO t1 (a00) VALUES ('fj'); -INSERT INTO t1 (a00) VALUES ('fk'); -INSERT INTO t1 (a00) VALUES ('fl'); -INSERT INTO t1 (a00) VALUES ('fm'); -INSERT INTO t1 (a00) VALUES ('fn'); -INSERT INTO t1 (a00) VALUES ('fo'); -INSERT INTO t1 (a00) VALUES ('fp'); -INSERT INTO t1 (a00) VALUES ('fq'); -INSERT INTO t1 (a00) VALUES ('fr'); -INSERT INTO t1 (a00) VALUES ('fs'); -INSERT INTO t1 (a00) VALUES ('ft'); -INSERT INTO t1 (a00) VALUES ('fu'); -INSERT INTO t1 (a00) VALUES ('fv'); -INSERT INTO t1 (a00) VALUES ('fw'); -INSERT INTO t1 (a00) VALUES ('fx'); -INSERT INTO t1 (a00) VALUES ('fy'); -INSERT INTO t1 (a00) VALUES ('fz'); -INSERT INTO t1 (a00) VALUES ('ga'); -INSERT INTO t1 (a00) VALUES ('gb'); -INSERT INTO t1 (a00) VALUES ('gc'); -INSERT INTO t1 (a00) VALUES ('gd'); -INSERT INTO t1 (a00) VALUES ('ge'); -INSERT INTO t1 (a00) VALUES ('gf'); -INSERT INTO t1 (a00) VALUES ('gg'); -INSERT INTO t1 (a00) VALUES ('gh'); -COMMIT; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -29 -SET GLOBAL innodb_limit_optimistic_insert_debug = 0; -# Test start -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('bfa'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('bfb'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -a00 a01 -aa a -SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; -a00 a01 -aq a -SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; -a00 a01 -cp a -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -a00 a01 -el a -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -ar a -connection con3; -a00 a01 -cn a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -30 -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('coa'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('cob'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -aa a -connection con3; -a00 a01 -el a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -31 -SET DEBUG_SYNC = 'RESET'; -INSERT INTO t1 (a00) VALUES ('gba'); -connection con1; -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -INSERT INTO t1 (a00) VALUES ('gbb'); -connection con2; -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -a00 a01 -aa a -SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; -a00 a01 -ek a -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; -connection con3; -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; -connection default; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; -SET DEBUG_SYNC = 'now SIGNAL continue'; -connection con1; -connection con2; -a00 a01 -el a -connection con3; -a00 a01 -gb a -connection default; -ANALYZE TABLE t1; -Table Op Msg_type Msg_text -test.t1 analyze status Engine-independent statistics collected -test.t1 analyze status OK -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; -CLUST_INDEX_SIZE -32 -SET DEBUG_SYNC = 'RESET'; -connection default; -disconnect con1; -disconnect con2; -disconnect con3; -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt deleted file mode 100644 index e5d7090c883..00000000000 --- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery-master.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb_buffer_pool_size=24M diff --git a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test b/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test deleted file mode 100644 index 129037e783b..00000000000 --- a/mysql-test/suite/innodb/t/innodb-change-buffer-recovery.test +++ /dev/null @@ -1,82 +0,0 @@ ---echo # ---echo # Bug#69122 - INNODB DOESN'T REDO-LOG INSERT BUFFER MERGE ---echo # OPERATION IF IT IS DONE IN-PLACE ---echo # ---source include/have_innodb.inc -# innodb_change_buffering_debug option is debug only ---source include/have_debug.inc -# Embedded server does not support crashing ---source include/not_embedded.inc -# DBUG_SUICIDE() hangs under valgrind ---source include/not_valgrind.inc -# This test is slow on buildbot. ---source include/big_test.inc ---source include/have_sequence.inc - -call mtr.add_suppression("InnoDB: innodb_read_only prevents crash recovery"); -call mtr.add_suppression("Plugin initialization aborted at srv0start\\.cc"); -call mtr.add_suppression("Plugin 'InnoDB'"); -FLUSH TABLES; - -CREATE TABLE t1( - a INT AUTO_INCREMENT PRIMARY KEY, - b CHAR(1), - c INT, - INDEX(b)) -ENGINE=InnoDB STATS_PERSISTENT=0; - ---let $_server_id= `SELECT @@server_id` ---let $_expect_file_name= $MYSQLTEST_VARDIR/tmp/mysqld.$_server_id.expect - -# The flag innodb_change_buffering_debug is only available in debug builds. -# It instructs InnoDB to try to evict pages from the buffer pool when -# change buffering is possible, so that the change buffer will be used -# whenever possible. -SET GLOBAL innodb_change_buffering_debug = 1; -SET GLOBAL innodb_change_buffering = all; -let SEARCH_FILE = $MYSQLTEST_VARDIR/log/mysqld.1.err; - -# Create enough rows for the table, so that the change buffer will be -# used for modifying the secondary index page. There must be multiple -# index pages, because changes to the root page are never buffered. -INSERT INTO t1 SELECT 0,'x',1 FROM seq_1_to_8192; - -BEGIN; -SELECT b FROM t1 LIMIT 3; - -connect (con1,localhost,root,,); -BEGIN; -DELETE FROM t1 WHERE a=1; -# This should be buffered, if innodb_change_buffering_debug = 1 is in effect. -INSERT INTO t1 VALUES(1,'X',1); - -SET DEBUG_DBUG='+d,crash_after_log_ibuf_upd_inplace'; ---exec echo "wait" > $_expect_file_name ---error 2013 -# This should force a change buffer merge -SELECT b FROM t1 LIMIT 3; -disconnect con1; -connection default; -let SEARCH_PATTERN=Wrote log record for ibuf update in place operation; ---source include/search_pattern_in_file.inc - ---let $restart_parameters= --innodb-read-only ---source include/start_mysqld.inc -CHECK TABLE t1; ---source include/shutdown_mysqld.inc -let SEARCH_PATTERN=innodb_read_only prevents crash recovery; ---source include/search_pattern_in_file.inc - ---let $restart_parameters= --innodb-force-recovery=5 ---source include/start_mysqld.inc -SELECT * FROM t1 LIMIT 1; -replace_regex /.*operations:.* (insert.*), delete \d.*discarded .*/\1/; -SHOW ENGINE INNODB STATUS; -# Slow shutdown will not merge the changes due to innodb_force_recovery=5. -SET GLOBAL innodb_fast_shutdown=0; ---let $restart_parameters= ---source include/restart_mysqld.inc -CHECK TABLE t1; -replace_regex /.*operations:.* insert [1-9][0-9]*, delete mark [1-9][0-9]*, delete \d.*discarded .*//; -SHOW ENGINE INNODB STATUS; -DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.opt b/mysql-test/suite/innodb/t/innodb_wl6326.opt deleted file mode 100644 index 99bf0e5a28b..00000000000 --- a/mysql-test/suite/innodb/t/innodb_wl6326.opt +++ /dev/null @@ -1 +0,0 @@ ---innodb-sys-tablestats diff --git a/mysql-test/suite/innodb/t/innodb_wl6326.test b/mysql-test/suite/innodb/t/innodb_wl6326.test deleted file mode 100644 index 1cf98cd1c7b..00000000000 --- a/mysql-test/suite/innodb/t/innodb_wl6326.test +++ /dev/null @@ -1,519 +0,0 @@ -# -# WL#6326: InnoDB: fix index->lock contention -# - ---source include/have_innodb.inc ---source include/have_debug.inc ---source include/have_debug_sync.inc ---source include/have_innodb_16k.inc - ---disable_query_log -SET @old_innodb_limit_optimistic_insert_debug = @@innodb_limit_optimistic_insert_debug; -SET @old_innodb_adaptive_hash_index = @@innodb_adaptive_hash_index; -SET @old_innodb_stats_persistent = @@innodb_stats_persistent; ---enable_query_log - -# Save the initial number of concurrent sessions ---source include/count_sessions.inc - -SET GLOBAL innodb_adaptive_hash_index = false; -SET GLOBAL innodb_stats_persistent = false; - ---connect (con1,localhost,root,,) ---connect (con2,localhost,root,,) ---connect (con3,localhost,root,,) - -CREATE TABLE t1 ( - a00 CHAR(255) NOT NULL DEFAULT 'a', - a01 CHAR(255) NOT NULL DEFAULT 'a', - a02 CHAR(255) NOT NULL DEFAULT 'a', - a03 CHAR(255) NOT NULL DEFAULT 'a', - a04 CHAR(255) NOT NULL DEFAULT 'a', - a05 CHAR(255) NOT NULL DEFAULT 'a', - a06 CHAR(255) NOT NULL DEFAULT 'a', - b INT NOT NULL DEFAULT 0 -) ENGINE = InnoDB; - -ALTER TABLE t1 ADD PRIMARY KEY( - a00, - a01, - a02, - a03, - a04, - a05, - a06 -); - -# -# Prepare primary key index tree to be used for this test. -# - -# Only root (1) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -# Make the first records sparse artificially, -# not to cause modify_tree by single node_ptr insert operation. -# * (7 - 2) records should be larger than a half of the page size -# * (7 + 2) records should be fit to the page -# (above t1 definition is already adjusted) -SET GLOBAL innodb_limit_optimistic_insert_debug = 7; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('aa'); -INSERT INTO t1 (a00) VALUES ('ab'); -INSERT INTO t1 (a00) VALUES ('ac'); -INSERT INTO t1 (a00) VALUES ('ad'); -INSERT INTO t1 (a00) VALUES ('ae'); -INSERT INTO t1 (a00) VALUES ('af'); -INSERT INTO t1 (a00) VALUES ('ag'); -INSERT INTO t1 (a00) VALUES ('ah'); -COMMIT; -# Raise root (1-2) -# (aa,ad) -# (aa,ab,ac)(ad,ae,af,ag,ah) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('ai'); -INSERT INTO t1 (a00) VALUES ('aj'); -INSERT INTO t1 (a00) VALUES ('ak'); -COMMIT; -# Split leaf (1-3) -# (aa,ad,ak) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('al'); -INSERT INTO t1 (a00) VALUES ('am'); -INSERT INTO t1 (a00) VALUES ('an'); -INSERT INTO t1 (a00) VALUES ('ao'); -INSERT INTO t1 (a00) VALUES ('ap'); -INSERT INTO t1 (a00) VALUES ('aq'); -INSERT INTO t1 (a00) VALUES ('ar'); -COMMIT; -# Split leaf (1-4) -# (aa,ad,ak,ar) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('as'); -INSERT INTO t1 (a00) VALUES ('at'); -INSERT INTO t1 (a00) VALUES ('au'); -INSERT INTO t1 (a00) VALUES ('av'); -INSERT INTO t1 (a00) VALUES ('aw'); -INSERT INTO t1 (a00) VALUES ('ax'); -INSERT INTO t1 (a00) VALUES ('ay'); -COMMIT; -# Split leaf (1-5) -# (aa,ad,ak,ar,ay) -# (aa,ab,ac)(ad,ae,af,ag,ah,ai,aj)(ak,al,am,an,ao,ap,aq)(ar,as,at,au,av,aw,ax)(ay) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('az'); -INSERT INTO t1 (a00) VALUES ('ba'); -INSERT INTO t1 (a00) VALUES ('bb'); -INSERT INTO t1 (a00) VALUES ('bc'); -INSERT INTO t1 (a00) VALUES ('bd'); -INSERT INTO t1 (a00) VALUES ('be'); -INSERT INTO t1 (a00) VALUES ('bf'); -COMMIT; -# Split leaf (1-6) -# (aa,ad,ak,ar,ay,bf) -# (aa,ab,ac)(ad..)(ak..)(ar,as,at,au,av,aw,ax)(ay,az,ba,bb,bc,bd,be)(bf) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bg'); -INSERT INTO t1 (a00) VALUES ('bh'); -INSERT INTO t1 (a00) VALUES ('bi'); -INSERT INTO t1 (a00) VALUES ('bj'); -INSERT INTO t1 (a00) VALUES ('bk'); -INSERT INTO t1 (a00) VALUES ('bl'); -INSERT INTO t1 (a00) VALUES ('bm'); -COMMIT; -# Split leaf (1-7) -# (aa,ad,ak,ar,ay,bf,bm) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay,az,ba,bb,bc,bd,be)(bf,bg,bh,bi,bj,bk,bl)(bm) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bn'); -INSERT INTO t1 (a00) VALUES ('bo'); -INSERT INTO t1 (a00) VALUES ('bp'); -INSERT INTO t1 (a00) VALUES ('bq'); -INSERT INTO t1 (a00) VALUES ('br'); -INSERT INTO t1 (a00) VALUES ('bs'); -INSERT INTO t1 (a00) VALUES ('bt'); -COMMIT; -# Raise root (1-2-8) -# (aa,ar) -# (aa,ad,ak) (ar,ay,bf,bm,bt) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('bu'); -INSERT INTO t1 (a00) VALUES ('bv'); -INSERT INTO t1 (a00) VALUES ('bw'); -INSERT INTO t1 (a00) VALUES ('bx'); -INSERT INTO t1 (a00) VALUES ('by'); -INSERT INTO t1 (a00) VALUES ('bz'); -INSERT INTO t1 (a00) VALUES ('ca'); - -INSERT INTO t1 (a00) VALUES ('cb'); -INSERT INTO t1 (a00) VALUES ('cc'); -INSERT INTO t1 (a00) VALUES ('cd'); -INSERT INTO t1 (a00) VALUES ('ce'); -INSERT INTO t1 (a00) VALUES ('cf'); -INSERT INTO t1 (a00) VALUES ('cg'); -INSERT INTO t1 (a00) VALUES ('ch'); - -INSERT INTO t1 (a00) VALUES ('ci'); -INSERT INTO t1 (a00) VALUES ('cj'); -INSERT INTO t1 (a00) VALUES ('ck'); -INSERT INTO t1 (a00) VALUES ('cl'); -INSERT INTO t1 (a00) VALUES ('cm'); -INSERT INTO t1 (a00) VALUES ('cn'); -INSERT INTO t1 (a00) VALUES ('co'); -COMMIT; -# Split also at level 1 (1-3-11) -# (aa,ar,co) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('cp'); -INSERT INTO t1 (a00) VALUES ('cq'); -INSERT INTO t1 (a00) VALUES ('cr'); -INSERT INTO t1 (a00) VALUES ('cs'); -INSERT INTO t1 (a00) VALUES ('ct'); -INSERT INTO t1 (a00) VALUES ('cu'); -INSERT INTO t1 (a00) VALUES ('cv'); - -INSERT INTO t1 (a00) VALUES ('cw'); -INSERT INTO t1 (a00) VALUES ('cx'); -INSERT INTO t1 (a00) VALUES ('cy'); -INSERT INTO t1 (a00) VALUES ('cz'); -INSERT INTO t1 (a00) VALUES ('da'); -INSERT INTO t1 (a00) VALUES ('db'); -INSERT INTO t1 (a00) VALUES ('dc'); - -INSERT INTO t1 (a00) VALUES ('dd'); -INSERT INTO t1 (a00) VALUES ('de'); -INSERT INTO t1 (a00) VALUES ('df'); -INSERT INTO t1 (a00) VALUES ('dg'); -INSERT INTO t1 (a00) VALUES ('dh'); -INSERT INTO t1 (a00) VALUES ('di'); -INSERT INTO t1 (a00) VALUES ('dj'); - -INSERT INTO t1 (a00) VALUES ('dk'); -INSERT INTO t1 (a00) VALUES ('dl'); -INSERT INTO t1 (a00) VALUES ('dm'); -INSERT INTO t1 (a00) VALUES ('dn'); -INSERT INTO t1 (a00) VALUES ('do'); -INSERT INTO t1 (a00) VALUES ('dp'); -INSERT INTO t1 (a00) VALUES ('dq'); - -INSERT INTO t1 (a00) VALUES ('dr'); -INSERT INTO t1 (a00) VALUES ('ds'); -INSERT INTO t1 (a00) VALUES ('dt'); -INSERT INTO t1 (a00) VALUES ('du'); -INSERT INTO t1 (a00) VALUES ('dv'); -INSERT INTO t1 (a00) VALUES ('dw'); -INSERT INTO t1 (a00) VALUES ('dx'); - -INSERT INTO t1 (a00) VALUES ('dy'); -INSERT INTO t1 (a00) VALUES ('dz'); -INSERT INTO t1 (a00) VALUES ('ea'); -INSERT INTO t1 (a00) VALUES ('eb'); -INSERT INTO t1 (a00) VALUES ('ec'); -INSERT INTO t1 (a00) VALUES ('ed'); -INSERT INTO t1 (a00) VALUES ('ee'); - -INSERT INTO t1 (a00) VALUES ('ef'); -INSERT INTO t1 (a00) VALUES ('eg'); -INSERT INTO t1 (a00) VALUES ('eh'); -INSERT INTO t1 (a00) VALUES ('ei'); -INSERT INTO t1 (a00) VALUES ('ej'); -INSERT INTO t1 (a00) VALUES ('ek'); -INSERT INTO t1 (a00) VALUES ('el'); -COMMIT; -# Split also at level 1 (1-4-18) -# (aa,ar,co,el) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -BEGIN; -INSERT INTO t1 (a00) VALUES ('em'); -INSERT INTO t1 (a00) VALUES ('en'); -INSERT INTO t1 (a00) VALUES ('eo'); -INSERT INTO t1 (a00) VALUES ('ep'); -INSERT INTO t1 (a00) VALUES ('eq'); -INSERT INTO t1 (a00) VALUES ('er'); -INSERT INTO t1 (a00) VALUES ('es'); - -INSERT INTO t1 (a00) VALUES ('et'); -INSERT INTO t1 (a00) VALUES ('eu'); -INSERT INTO t1 (a00) VALUES ('ev'); -INSERT INTO t1 (a00) VALUES ('ew'); -INSERT INTO t1 (a00) VALUES ('ex'); -INSERT INTO t1 (a00) VALUES ('ey'); -INSERT INTO t1 (a00) VALUES ('ez'); - -INSERT INTO t1 (a00) VALUES ('fa'); -INSERT INTO t1 (a00) VALUES ('fb'); -INSERT INTO t1 (a00) VALUES ('fc'); -INSERT INTO t1 (a00) VALUES ('fd'); -INSERT INTO t1 (a00) VALUES ('fe'); -INSERT INTO t1 (a00) VALUES ('ff'); -INSERT INTO t1 (a00) VALUES ('fg'); - -INSERT INTO t1 (a00) VALUES ('fh'); -INSERT INTO t1 (a00) VALUES ('fi'); -INSERT INTO t1 (a00) VALUES ('fj'); -INSERT INTO t1 (a00) VALUES ('fk'); -INSERT INTO t1 (a00) VALUES ('fl'); -INSERT INTO t1 (a00) VALUES ('fm'); -INSERT INTO t1 (a00) VALUES ('fn'); - -INSERT INTO t1 (a00) VALUES ('fo'); -INSERT INTO t1 (a00) VALUES ('fp'); -INSERT INTO t1 (a00) VALUES ('fq'); -INSERT INTO t1 (a00) VALUES ('fr'); -INSERT INTO t1 (a00) VALUES ('fs'); -INSERT INTO t1 (a00) VALUES ('ft'); -INSERT INTO t1 (a00) VALUES ('fu'); - -INSERT INTO t1 (a00) VALUES ('fv'); -INSERT INTO t1 (a00) VALUES ('fw'); -INSERT INTO t1 (a00) VALUES ('fx'); -INSERT INTO t1 (a00) VALUES ('fy'); -INSERT INTO t1 (a00) VALUES ('fz'); -INSERT INTO t1 (a00) VALUES ('ga'); -INSERT INTO t1 (a00) VALUES ('gb'); - -INSERT INTO t1 (a00) VALUES ('gc'); -INSERT INTO t1 (a00) VALUES ('gd'); -INSERT INTO t1 (a00) VALUES ('ge'); -INSERT INTO t1 (a00) VALUES ('gf'); -INSERT INTO t1 (a00) VALUES ('gg'); -INSERT INTO t1 (a00) VALUES ('gh'); -COMMIT; - -# Current tree form (1-4-24) -# (aa,ar,co,el) -# (aa,ad,ak) (ar,ay,bf,bm,bt,ca,ch) (co,cv,dc,dj,dq,dx,ee) (el..,gb) -# (aa,ab,ac)(ad..)(ak..)(ar..)(ay..)(bf..)(bm..)(bt..)(ca..)(ch..)(co..)(cv..)(dc..)(dj..)(dq..)(dx..)(ee..)(el..)..(gb..) -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - -# Insert the rest of records normally -SET GLOBAL innodb_limit_optimistic_insert_debug = 0; - ---echo # Test start - -# (1) Insert records to leaf page (bf..) and cause modify_page. -# - root page is not X latched -# - latched from level 1 page (ar,ay,bf,bm,bt,ca,ch) - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (bf..) -INSERT INTO t1 (a00) VALUES ('bfa'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('bfb'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# Not blocked searches -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -SELECT a00,a01 FROM t1 WHERE a00 = 'aq'; -# "where a00 = 'co'" is blocked because searching from smaller ('co','a','a',..). -SELECT a00,a01 FROM t1 WHERE a00 = 'cp'; -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'ar'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'cn'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default - -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# (2) Insert records to leaf page (co..) and cause modify_page -# - root page is X latched, because node_ptr for 'co' -# is 1st record for (co,cv,dc,dj,dq,dx,ee) -# -# * ordinary pessimitic insert might be done by pessistic update -# and we should consider possibility node_ptr to be deleted. - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (co..) -INSERT INTO t1 (a00) VALUES ('coa'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('cob'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# All searches are blocked because root page is X latched - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default - -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# (3) Insert records to rightmost leaf page (gb..) and cause modify_page -# - root page is not X latched, because node_ptr for 'gb' is the last record -# of the level 1 though it is last record in the page. -# - lathed from level 1 page (el..,gb) - -SET DEBUG_SYNC = 'RESET'; - -# Filling leaf page (gb..) -INSERT INTO t1 (a00) VALUES ('gba'); - ---connection con1 -SET DEBUG_SYNC = 'before_insert_pessimitic_row_ins_clust SIGNAL reached WAIT_FOR continue'; -# Cause modify_tree ---send -INSERT INTO t1 (a00) VALUES ('gbb'); - ---connection con2 -SET DEBUG_SYNC = 'now WAIT_FOR reached'; -# Not blocked searches -SELECT a00,a01 FROM t1 WHERE a00 = 'aa'; -SELECT a00,a01 FROM t1 WHERE a00 = 'ek'; - -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait1'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'el'; - ---connection con3 -SET DEBUG_SYNC = 'rw_s_lock_waiting SIGNAL lockwait2'; -# Blocked ---send -SELECT a00,a01 FROM t1 WHERE a00 = 'gb'; - ---connection default -# FIXME: These occasionally time out! ---disable_warnings -SET DEBUG_SYNC = 'now WAIT_FOR lockwait1 TIMEOUT 1'; -SET DEBUG_SYNC = 'now WAIT_FOR lockwait2 TIMEOUT 1'; ---enable_warnings -SET DEBUG_SYNC = 'now SIGNAL continue'; - ---connection con1 ---reap - ---connection con2 ---reap - ---connection con3 ---reap - ---connection default -ANALYZE TABLE t1; -SELECT CLUST_INDEX_SIZE FROM information_schema.INNODB_SYS_TABLESTATS WHERE NAME = 'test/t1'; - - - -# Cleanup -SET DEBUG_SYNC = 'RESET'; - ---connection default ---disconnect con1 ---disconnect con2 ---disconnect con3 - -DROP TABLE t1; - ---disable_query_log -SET GLOBAL innodb_limit_optimistic_insert_debug = @old_innodb_limit_optimistic_insert_debug; -SET GLOBAL innodb_adaptive_hash_index = @old_innodb_adaptive_hash_index; -SET GLOBAL innodb_stats_persistent = @old_innodb_stats_persistent; ---enable_query_log - -# Wait till all disconnects are completed. ---source include/wait_until_count_sessions.inc diff --git a/mysql-test/suite/innodb_gis/r/rtree_split.result b/mysql-test/suite/innodb_gis/r/rtree_split.result index 8e475776ce0..97027bde865 100644 --- a/mysql-test/suite/innodb_gis/r/rtree_split.result +++ b/mysql-test/suite/innodb_gis/r/rtree_split.result @@ -61,3 +61,15 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); count(*) 57344 drop table t1; +# +# MDEV-30400 Assertion height == btr_page_get_level ... on INSERT +# +CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; +SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; +SET GLOBAL innodb_limit_optimistic_insert_debug=2; +BEGIN; +INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6; +ROLLBACK; +SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; +DROP TABLE t1; +# End of 10.6 tests diff --git a/mysql-test/suite/innodb_gis/t/rtree_split.test b/mysql-test/suite/innodb_gis/t/rtree_split.test index 6f285187508..a23315dc3f3 100644 --- a/mysql-test/suite/innodb_gis/t/rtree_split.test +++ b/mysql-test/suite/innodb_gis/t/rtree_split.test @@ -73,3 +73,18 @@ select count(*) from t1 where MBRWithin(t1.c2, @g1); # Clean up. drop table t1; + +--echo # +--echo # MDEV-30400 Assertion height == btr_page_get_level ... on INSERT +--echo # + +CREATE TABLE t1 (c POINT NOT NULL,SPATIAL (c)) ENGINE=InnoDB; +SET @save_limit=@@GLOBAL.innodb_limit_optimistic_insert_debug; +SET GLOBAL innodb_limit_optimistic_insert_debug=2; +BEGIN; +INSERT INTO t1 SELECT POINTFROMTEXT ('POINT(0 0)') FROM seq_1_to_6; +ROLLBACK; +SET GLOBAL innodb_limit_optimistic_insert_debug=@save_limit; +DROP TABLE t1; + +--echo # End of 10.6 tests diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc index 0bb16dba374..7fd851f7b0e 100644 --- a/storage/innobase/btr/btr0btr.cc +++ b/storage/innobase/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -460,6 +460,54 @@ btr_page_create( } } +buf_block_t * +mtr_t::get_already_latched(const page_id_t id, mtr_memo_type_t type) const +{ + ut_ad(is_active()); + ut_ad(type == MTR_MEMO_PAGE_X_FIX || type == MTR_MEMO_PAGE_SX_FIX || + type == MTR_MEMO_PAGE_S_FIX); + for (ulint i= 0; i < m_memo.size(); i++) + { + const mtr_memo_slot_t &slot= m_memo[i]; + const auto slot_type= mtr_memo_type_t(slot.type & ~MTR_MEMO_MODIFY); + if (slot_type == MTR_MEMO_PAGE_X_FIX || slot_type == type) + { + buf_block_t *block= static_cast(slot.object); + if (block->page.id() == id) + return block; + } + } + return nullptr; +} + +/** Fetch an index root page that was already latched in the +mini-transaction. */ +static buf_block_t *btr_get_latched_root(const dict_index_t &index, mtr_t *mtr) +{ + return mtr->get_already_latched(page_id_t{index.table->space_id, index.page}, + MTR_MEMO_PAGE_SX_FIX); +} + +/** Fetch an index page that should have been already latched in the +mini-transaction. */ +static buf_block_t * +btr_block_reget(mtr_t *mtr, const dict_index_t &index, + const page_id_t id, rw_lock_type_t rw_latch, + dberr_t *err) +{ + if (buf_block_t *block= + mtr->get_already_latched(id, mtr_memo_type_t(rw_latch))) + { + *err= DB_SUCCESS; + return block; + } + +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(mtr->memo_contains_flagged(&index.lock, MTR_MEMO_X_LOCK)); +#endif + return btr_block_get(index, id.page_no(), rw_latch, true, mtr, err); +} + /**************************************************************//** Allocates a new file page to be used in an ibuf tree. Takes the page from the free list of the tree, which must contain pages! @@ -472,18 +520,16 @@ btr_page_alloc_for_ibuf( mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ { - buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, err); + buf_block_t *root= btr_get_latched_root(*index, mtr); if (UNIV_UNLIKELY(!root)) return root; - buf_block_t *new_block= - buf_page_get_gen(page_id_t(index->table->space_id, + buf_page_get_gen(page_id_t(IBUF_SPACE_ID, mach_read_from_4(PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST + FLST_FIRST + FIL_ADDR_PAGE + root->page.frame)), - index->table->space->zip_size(), RW_X_LATCH, nullptr, - BUF_GET, mtr, err); + 0, RW_X_LATCH, nullptr, BUF_GET, mtr, err); if (new_block) *err= flst_remove(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, new_block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); @@ -523,11 +569,11 @@ btr_page_alloc_low( #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -579,15 +625,12 @@ btr_page_free_for_ibuf( mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr->memo_contains_flagged(block, MTR_MEMO_PAGE_X_FIX)); - - dberr_t err; - if (buf_block_t *root= btr_root_block_get(index, RW_SX_LATCH, mtr, &err)) - { - err= flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + buf_block_t *root= btr_get_latched_root(*index, mtr); + dberr_t err= + flst_add_first(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, block, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - ut_d(if (err == DB_SUCCESS) - flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); - } + ut_d(if (err == DB_SUCCESS) + flst_validate(root, PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr)); return err; } @@ -637,11 +680,11 @@ dberr_t btr_page_free(dict_index_t* index, buf_block_t* block, mtr_t* mtr, #ifdef BTR_CUR_HASH_ADAPT ut_ad(!root->index || !root->index->freed()); #endif - mtr->release_block_at_savepoint(savepoint, root); + mtr->rollback_to_savepoint(savepoint); } else { - mtr->u_lock_register(savepoint); + mtr->lock_register(savepoint, MTR_MEMO_PAGE_SX_FIX); root->page.lock.u_lock(); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(root, true); @@ -712,35 +755,27 @@ btr_node_ptr_get_child( mtr, err); } -MY_ATTRIBUTE((nonnull(2,3,5), warn_unused_result)) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds an sx-latch on the tree. @return rec_get_offsets() of the node pointer record */ static rec_offs* -btr_page_get_father_node_ptr_func( -/*==============================*/ +btr_page_get_father_node_ptr_for_validate( rec_offs* offsets,/*!< in: work area for the return value */ mem_heap_t* heap, /*!< in: memory heap to use */ btr_cur_t* cursor, /*!< in: cursor pointing to user record, out: cursor on node pointer record, its page x-latched */ - btr_latch_mode latch_mode,/*!< in: BTR_CONT_MODIFY_TREE - or BTR_CONT_SEARCH_TREE */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); dict_index_t* index = btr_cur_get_index(cursor); ut_ad(!dict_index_is_spatial(index)); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); const auto level = btr_page_get_level(btr_cur_get_page(cursor)); @@ -752,12 +787,16 @@ btr_page_get_father_node_ptr_func( dict_index_build_node_ptr(index, user_rec, 0, heap, level), - PAGE_CUR_LE, latch_mode, + RW_S_LATCH, cursor, mtr) != DB_SUCCESS) { return nullptr; } const rec_t* node_ptr = btr_cur_get_rec(cursor); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); @@ -769,13 +808,65 @@ btr_page_get_father_node_ptr_func( return(offsets); } -#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_MODIFY_TREE,mtr) +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/** Return the node pointer to a page. +@param offsets work area for the return value +@param heap memory heap +@param cursor in: child page; out: node pointer to it +@param mtr mini-transaction +@return rec_get_offsets() of the node pointer record +@retval nullptr if the parent page had not been latched in mtr */ +static rec_offs *btr_page_get_parent(rec_offs *offsets, mem_heap_t *heap, + btr_cur_t *cursor, mtr_t *mtr) +{ + const uint32_t page_no= cursor->block()->page.id().page_no(); + const dict_index_t *index= cursor->index(); + ut_ad(!index->is_spatial()); + ut_ad(index->page != page_no); -#define btr_page_get_father_node_ptr_for_validate(of,heap,cur,mtr) \ - btr_page_get_father_node_ptr_func( \ - of,heap,cur,BTR_CONT_SEARCH_TREE,mtr) + uint32_t p= index->page; + auto level= btr_page_get_level(cursor->block()->page.frame); + const dtuple_t *tuple= + dict_index_build_node_ptr(index, btr_cur_get_rec(cursor), 0, heap, level); + level++; + + ulint i; + for (i= 0; i < mtr->get_savepoint(); i++) + if (buf_block_t *block= mtr->block_at_savepoint(i)) + if (block->page.id().page_no() == p) + { + ut_ad(block->page.lock.have_u_or_x() || + (!block->page.lock.have_s() && index->lock.have_x())); + ulint up_match= 0, low_match= 0; + cursor->page_cur.block= block; + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &up_match, + &low_match, &cursor->page_cur, + nullptr)) + return nullptr; + offsets= rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + p= btr_node_ptr_get_child_page_no(cursor->page_cur.rec, offsets); + if (p != page_no) + { + if (btr_page_get_level(block->page.frame) == level) + return nullptr; + i= 0; // MDEV-29835 FIXME: require all pages to be latched in order! + continue; + } + ut_ad(block->page.lock.have_u_or_x()); + if (block->page.lock.have_u_not_x()) + { + /* btr_cur_t::search_leaf(BTR_MODIFY_TREE) only U-latches the + root page initially. */ + ut_ad(block->page.id().page_no() == index->page); + block->page.lock.u_x_upgrade(); + mtr->page_lock_upgrade(*block); + } + return offsets; + } + + return nullptr; +} /************************************************************//** Returns the upper level node pointer to a page. It is assumed that mtr holds @@ -796,7 +887,7 @@ btr_page_get_father_block( if (UNIV_UNLIKELY(!rec)) return nullptr; cursor->page_cur.rec= rec; - return btr_page_get_father_node_ptr(offsets, heap, cursor, mtr); + return btr_page_get_parent(offsets, heap, cursor, mtr); } /** Seek to the parent page of a B-tree page. @@ -811,7 +902,7 @@ bool btr_page_get_father(mtr_t* mtr, btr_cur_t* cursor) return false; cursor->page_cur.rec= rec; mem_heap_t *heap= mem_heap_create(100); - const bool got= btr_page_get_father_node_ptr(nullptr, heap, cursor, mtr); + const bool got= btr_page_get_parent(nullptr, heap, cursor, mtr); mem_heap_free(heap); return got; } @@ -1718,48 +1809,43 @@ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ +@param[in,out] mtr mini-transaction */ ATTRIBUTE_COLD -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr) { ut_ad(!index.table->is_temporary()); ut_ad(index.is_primary()); - dberr_t err; - if (buf_block_t *root= btr_root_block_get(&index, RW_SX_LATCH, mtr, &err)) + buf_block_t *root= btr_get_latched_root(index, mtr); + byte *page_type= root->page.frame + FIL_PAGE_TYPE; + if (all) { - byte *page_type= root->page.frame + FIL_PAGE_TYPE; - if (all) - { - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || - mach_read_from_2(page_type) == FIL_PAGE_INDEX); - mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); - byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; - mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, - page_ptr_get_direction(instant + 1)); - } - else - ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); - static const byte supremuminfimum[8 + 8] = "supremuminfimum"; - uint16_t infimum, supremum; - if (page_is_comp(root->page.frame)) - { - infimum= PAGE_NEW_INFIMUM; - supremum= PAGE_NEW_SUPREMUM; - } - else - { - infimum= PAGE_OLD_INFIMUM; - supremum= PAGE_OLD_SUPREMUM; - } - ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == - !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); - mtr->memcpy(*root, &root->page.frame[infimum], - supremuminfimum + 8, 8); - mtr->memcpy(*root, &root->page.frame[supremum], - supremuminfimum, 8); + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT || + mach_read_from_2(page_type) == FIL_PAGE_INDEX); + mtr->write<2,mtr_t::MAYBE_NOP>(*root, page_type, FIL_PAGE_INDEX); + byte *instant= PAGE_INSTANT + PAGE_HEADER + root->page.frame; + mtr->write<2,mtr_t::MAYBE_NOP>(*root, instant, + page_ptr_get_direction(instant + 1)); } - return err; + else + ut_ad(mach_read_from_2(page_type) == FIL_PAGE_TYPE_INSTANT); + static const byte supremuminfimum[8 + 8] = "supremuminfimum"; + uint16_t infimum, supremum; + if (page_is_comp(root->page.frame)) + { + infimum= PAGE_NEW_INFIMUM; + supremum= PAGE_NEW_SUPREMUM; + } + else + { + infimum= PAGE_OLD_INFIMUM; + supremum= PAGE_OLD_SUPREMUM; + } + ut_ad(!memcmp(&root->page.frame[infimum], supremuminfimum + 8, 8) == + !memcmp(&root->page.frame[supremum], supremuminfimum, 8)); + mtr->memcpy(*root, &root->page.frame[infimum], + supremuminfimum + 8, 8); + mtr->memcpy(*root, &root->page.frame[supremum], + supremuminfimum, 8); } /*************************************************************//** @@ -1856,11 +1942,6 @@ btr_root_raise_and_insert( } /* Copy the records from root to the new page one by one. */ - dberr_t e; - if (!err) { - err = &e; - } - if (0 #ifdef UNIV_ZIP_COPY || new_page_zip @@ -2004,21 +2085,15 @@ btr_root_raise_and_insert( page_cursor->block = new_block; page_cursor->index = index; - if (tuple) { - ut_ad(dtuple_check_typed(tuple)); - /* Reposition the cursor to the child node */ - ulint low_match = 0, up_match = 0; + ut_ad(dtuple_check_typed(tuple)); + /* Reposition the cursor to the child node */ + ulint low_match = 0, up_match = 0; - if (page_cur_search_with_match(tuple, PAGE_CUR_LE, - &up_match, &low_match, - page_cursor, nullptr)) { - if (err) { - *err = DB_CORRUPTION; - } - return nullptr; - } - } else { - page_cursor->rec = page_get_infimum_rec(new_block->page.frame); + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, + &up_match, &low_match, + page_cursor, nullptr)) { + *err = DB_CORRUPTION; + return nullptr; } /* Split the child and insert tuple */ @@ -2237,6 +2312,7 @@ func_exit: return(rec); } +#ifdef UNIV_DEBUG /*************************************************************//** Returns TRUE if the insert fits on the appropriate half-page with the chosen split_rec. @@ -2334,6 +2410,7 @@ got_rec: return(false); } +#endif /*******************************************************//** Inserts a data tuple to a tree on a non-leaf level. It is assumed @@ -2356,25 +2433,34 @@ btr_insert_on_non_leaf_level( rtr_info_t rtr_info; ut_ad(level > 0); - auto mode = PAGE_CUR_LE; - - if (index->is_spatial()) { - mode = PAGE_CUR_RTREE_INSERT; - /* For spatial index, initialize structures to track - its parents etc. */ - rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); - - rtr_info_update_btr(&cursor, &rtr_info); - } flags |= BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG | BTR_NO_UNDO_LOG_FLAG; cursor.page_cur.index = index; - dberr_t err = btr_cur_search_to_nth_level(level, tuple, mode, - BTR_CONT_MODIFY_TREE, + dberr_t err; + + if (index->is_spatial()) { + /* For spatial index, initialize structures to track + its parents etc. */ + rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); + + rtr_info_update_btr(&cursor, &rtr_info); + err = rtr_search_to_nth_level(level, tuple, + PAGE_CUR_RTREE_INSERT, + BTR_CONT_MODIFY_TREE, + &cursor, mtr); + } else { + err = btr_cur_search_to_nth_level(level, tuple, RW_X_LATCH, &cursor, mtr); + } + ut_ad(cursor.flag == BTR_CUR_BINARY); +#if 0 /* MDEV-29835 FIXME */ + ut_ad(!btr_cur_get_block(&cursor)->page.lock.not_recursive() + || index->is_spatial() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); +#endif if (UNIV_LIKELY(err == DB_SUCCESS)) { err = btr_cur_optimistic_insert(flags, @@ -2470,6 +2556,7 @@ btr_attach_half_pages( /* Get the level of the split pages */ const ulint level = btr_page_get_level(block->page.frame); ut_ad(level == btr_page_get_level(new_block->page.frame)); + page_id_t id{block->page.id()}; /* Get the previous and next pages of page */ const uint32_t prev_page_no = btr_page_get_prev(block->page.frame); @@ -2477,12 +2564,32 @@ btr_attach_half_pages( /* for consistency, both blocks should be locked, before change */ if (prev_page_no != FIL_NULL && direction == FSP_DOWN) { - prev_block = btr_block_get(*index, prev_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(prev_page_no); + prev_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + prev_block = btr_block_get(*index, prev_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } if (next_page_no != FIL_NULL && direction != FSP_DOWN) { - next_block = btr_block_get(*index, next_page_no, RW_X_LATCH, - !level, mtr); + id.set_page_no(next_page_no); + next_block = mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next_block) { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index->lock, + MTR_MEMO_X_LOCK)); +# endif + next_block = btr_block_get(*index, next_page_no, + RW_X_LATCH, !level, mtr); + } +#endif } /* Build the node pointer (= node key and page address) for the upper @@ -3018,6 +3125,7 @@ insert_empty: return nullptr; } +#ifdef UNIV_DEBUG /* If the split is made on the leaf level and the insert will fit on the appropriate half-page, we may release the tree x-latch. We can then move the records after releasing the tree latch, @@ -3025,21 +3133,21 @@ insert_empty: const bool insert_will_fit = !new_page_zip && btr_page_insert_fits(cursor, split_rec, offsets, tuple, n_ext, heap); +#endif if (!split_rec && !insert_left) { UT_DELETE_ARRAY(buf); buf = NULL; } - if (!srv_read_only_mode - && insert_will_fit +#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled + if (insert_will_fit && page_is_leaf(page) && !dict_index_is_online_ddl(cursor->index())) { -#if 0 // FIXME: this used to be a no-op, and may cause trouble if enabled mtr->release(cursor->index()->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif /* 5. Move then the records to the new page */ if (direction == FSP_DOWN) { @@ -3271,52 +3379,58 @@ func_exit: dberr_t btr_level_list_remove(const buf_block_t& block, const dict_index_t& index, mtr_t* mtr) { - ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); - ut_ad(block.zip_size() == index.table->space->zip_size()); - ut_ad(index.table->space->id == block.page.id().space()); - /* Get the previous and next page numbers of page */ + ut_ad(mtr->memo_contains_flagged(&block, MTR_MEMO_PAGE_X_FIX)); + ut_ad(block.zip_size() == index.table->space->zip_size()); + ut_ad(index.table->space->id == block.page.id().space()); + /* Get the previous and next page numbers of page */ + const uint32_t prev_page_no= btr_page_get_prev(block.page.frame); + const uint32_t next_page_no= btr_page_get_next(block.page.frame); + page_id_t id{block.page.id()}; + buf_block_t *prev= nullptr, *next; + dberr_t err; - const page_t* page = block.page.frame; - const uint32_t prev_page_no = btr_page_get_prev(page); - const uint32_t next_page_no = btr_page_get_next(page); + /* Update page links of the level */ + if (prev_page_no != FIL_NULL) + { + id.set_page_no(prev_page_no); + prev= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!prev) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + prev= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!prev)) + return err; + } +#endif + } - /* Update page links of the level */ - dberr_t err; + if (next_page_no != FIL_NULL) + { + id.set_page_no(next_page_no); + next= mtr->get_already_latched(id, MTR_MEMO_PAGE_X_FIX); +#if 1 /* MDEV-29835 FIXME: acquire page latches upfront */ + if (!next) + { +# if 0 /* MDEV-29835 FIXME */ + ut_ad(mtr->memo_contains(index.lock, MTR_MEMO_X_LOCK)); +# endif + next= btr_block_get(index, id.page_no(), RW_X_LATCH, + page_is_leaf(block.page.frame), mtr, &err); + if (UNIV_UNLIKELY(!next)) + return err; + } +#endif + btr_page_set_prev(next, prev_page_no, mtr); + } - if (prev_page_no != FIL_NULL) { - buf_block_t* prev_block = btr_block_get( - index, prev_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); - if (UNIV_UNLIKELY(!prev_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(prev_block->page.frame - + FIL_PAGE_NEXT, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_next(prev_block, next_page_no, mtr); - } + if (prev) + btr_page_set_next(prev, next_page_no, mtr); - if (next_page_no != FIL_NULL) { - buf_block_t* next_block = btr_block_get( - index, next_page_no, RW_X_LATCH, page_is_leaf(page), - mtr, &err); - - if (UNIV_UNLIKELY(!next_block)) { - return err; - } - if (UNIV_UNLIKELY(memcmp_aligned<4>(next_block->page.frame - + FIL_PAGE_PREV, - page + FIL_PAGE_OFFSET, - 4))) { - return DB_CORRUPTION; - } - btr_page_set_prev(next_block, prev_page_no, mtr); - } - - return DB_SUCCESS; + return DB_SUCCESS; } /*************************************************************//** @@ -4166,23 +4280,30 @@ btr_discard_page( const uint32_t left_page_no = btr_page_get_prev(block->page.frame); const uint32_t right_page_no = btr_page_get_next(block->page.frame); + page_id_t merge_page_id{block->page.id()}; ut_d(bool parent_is_different = false); + dberr_t err; if (left_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, left_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(left_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } - +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_NEXT, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = (page_rec_get_next( page_get_infimum_rec( @@ -4190,19 +4311,25 @@ btr_discard_page( &parent_cursor))) == btr_cur_get_rec(&parent_cursor))); } else if (right_page_no != FIL_NULL) { - dberr_t err; - merge_block = btr_block_get(*index, right_page_no, RW_X_LATCH, - true, mtr, &err); + merge_page_id.set_page_no(right_page_no); + merge_block = btr_block_reget(mtr, *index, merge_page_id, + RW_X_LATCH, &err); if (UNIV_UNLIKELY(!merge_block)) { return err; } +#if 0 /* MDEV-29385 FIXME: Acquire the page latch upfront. */ + ut_ad(!memcmp_aligned<4>(merge_block->page.frame + + FIL_PAGE_PREV, + block->page.frame + FIL_PAGE_OFFSET, + 4)); +#else if (UNIV_UNLIKELY(memcmp_aligned<4>(merge_block->page.frame + FIL_PAGE_PREV, block->page.frame + FIL_PAGE_OFFSET, 4))) { return DB_CORRUPTION; } - +#endif ut_d(parent_is_different = page_rec_is_supremum( page_rec_get_next(btr_cur_get_rec(&parent_cursor)))); if (page_is_leaf(merge_block->page.frame)) { @@ -4244,13 +4371,10 @@ btr_discard_page( } #ifdef UNIV_ZIP_DEBUG - { - page_zip_des_t* merge_page_zip - = buf_block_get_page_zip(merge_block); - ut_a(!merge_page_zip - || page_zip_validate(merge_page_zip, - merge_block->page.frame, index)); - } + if (page_zip_des_t* merge_page_zip + = buf_block_get_page_zip(merge_block)); + ut_a(page_zip_validate(merge_page_zip, + merge_block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ if (index->has_locking()) { @@ -4269,7 +4393,7 @@ btr_discard_page( } /* Free the file page */ - dberr_t err = btr_page_free(index, block, mtr); + err = btr_page_free(index, block, mtr); if (err == DB_SUCCESS) { /* btr_check_node_ptr() needs parent block latched. @@ -4462,6 +4586,8 @@ btr_check_node_ptr( offsets = btr_page_get_father_block(NULL, heap, mtr, &cursor); } + ut_ad(offsets); + if (page_is_leaf(page)) { goto func_exit; @@ -4793,19 +4919,16 @@ btr_validate_level( page_zip_des_t* page_zip; #endif /* UNIV_ZIP_DEBUG */ ulint savepoint = 0; - ulint savepoint2 = 0; uint32_t parent_page_no = FIL_NULL; uint32_t parent_right_page_no = FIL_NULL; bool rightmost_child = false; mtr.start(); - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } dberr_t err; @@ -4853,7 +4976,6 @@ corrupted: offsets = rec_get_offsets(node_ptr, index, offsets, 0, ULINT_UNDEFINED, &heap); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr, &err); if (!block) { @@ -4874,10 +4996,8 @@ corrupted: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint2, block); + mtr.release_last_page(); - savepoint2 = mtr_set_savepoint(&mtr); block = btr_block_get(*index, left_page_no, RW_SX_LATCH, false, &mtr, &err); @@ -4905,12 +5025,10 @@ func_exit: mem_heap_empty(heap); offsets = offsets2 = NULL; - if (!srv_read_only_mode) { - if (lockout) { - mtr_x_lock_index(index, &mtr); - } else { - mtr_sx_lock_index(index, &mtr); - } + if (lockout) { + mtr_x_lock_index(index, &mtr); + } else { + mtr_sx_lock_index(index, &mtr); } page = block->page.frame; @@ -4955,7 +5073,7 @@ func_exit: if (right_page_no != FIL_NULL) { const rec_t* right_rec; - savepoint = mtr_set_savepoint(&mtr); + savepoint = mtr.get_savepoint(); right_block = btr_block_get(*index, right_page_no, RW_SX_LATCH, !level, &mtr, &err); @@ -5150,8 +5268,10 @@ broken_links: /* To obey latch order of tree blocks, we should release the right_block once to obtain lock of the uncle block. */ - mtr_release_block_at_savepoint( - &mtr, savepoint, right_block); + ut_ad(right_block + == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, + savepoint + 1); if (parent_right_page_no != FIL_NULL) { btr_block_get(*index, diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index ac06d9b1568..c0473f76422 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -3,7 +3,7 @@ Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -103,14 +103,14 @@ throughput clearly from about 100000. */ #define BTR_CUR_FINE_HISTORY_LENGTH 100000 #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ ib_counter_t btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ ib_counter_t btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by @@ -187,167 +187,6 @@ btr_rec_free_externally_stored_fields( /*==================== B-TREE SEARCH =========================*/ -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves) -{ - compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); - compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); - ut_ad(block->page.id().space() == cursor->index()->table->space->id); - ut_ad(block->page.in_file()); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_S_LOCK - | MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - auto rtr_info = cursor->rtr_info; - if (UNIV_LIKELY_NULL(rtr_info) && !cursor->index()->is_spatial()) { - rtr_info = nullptr; - } - - const rw_lock_type_t mode = rw_lock_type_t( - latch_mode & (RW_X_LATCH | RW_S_LATCH)); - static_assert(ulint{RW_S_LATCH} == ulint{BTR_SEARCH_LEAF}, ""); - static_assert(ulint{RW_X_LATCH} == ulint{BTR_MODIFY_LEAF}, ""); - static_assert(BTR_SEARCH_LEAF & BTR_SEARCH_TREE, ""); - - switch (latch_mode) { - default: - break; - uint32_t left_page_no; - uint32_t right_page_no; - ulint save; - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - case BTR_SEARCH_TREE: - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = mtr->get_savepoint(); - } -latch_block: - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - block->page.fix(); - mtr->page_lock(block, mode); - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_blocks[RTR_MAX_LEVELS] = block; - } - return; - case BTR_MODIFY_TREE: - /* It is exclusive for other operations which calls - btr_page_set_prev() */ - ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - save = mtr->get_savepoint(); - /* x-latch also siblings from left to right */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - buf_block_t *b = btr_block_get( - *cursor->index(), left_page_no, RW_X_LATCH, - true, mtr); - - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS] = b; - } - - save = mtr->get_savepoint(); - } - - if (latch_leaves) { - latch_leaves->savepoints[1] = mtr->get_savepoint(); - latch_leaves->blocks[1] = block; - } - - block->page.fix(); - block->page.lock.x_lock(); - - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); -#ifdef BTR_CUR_HASH_ADAPT - ut_ad(!btr_search_check_marked_free_index(block)); -#endif - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 1] = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 1] = block; - } - - right_page_no = btr_page_get_next(block->page.frame); - - if (right_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - - buf_block_t* b = btr_block_get( - *cursor->index(), right_page_no, RW_X_LATCH, - true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[2] = save; - latch_leaves->blocks[2] = b; - } - - if (UNIV_LIKELY_NULL(rtr_info)) { - rtr_info->tree_savepoints[RTR_MAX_LEVELS + 2] - = save; - rtr_info->tree_blocks[RTR_MAX_LEVELS + 2] = b; - } - } - - return; - - case BTR_SEARCH_PREV: - case BTR_MODIFY_PREV: - ut_ad(!rtr_info); - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - save = mtr->get_savepoint(); - cursor->left_block = btr_block_get( - *cursor->index(), left_page_no, - mode, true, mtr); - if (latch_leaves) { - latch_leaves->savepoints[0] = save; - latch_leaves->blocks[0] = cursor->left_block; - } - } - - goto latch_block; - case BTR_CONT_MODIFY_TREE: - ut_ad(cursor->index()->is_spatial()); - return; - } - - MY_ASSERT_UNREACHABLE(); -} - /** Load the instant ALTER TABLE metadata from the clustered index when loading a table definition. @param[in,out] index clustered index definition @@ -729,98 +568,6 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) return index->n_core_null_bytes > 128; } -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -TRANSACTIONAL_TARGET -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr) -{ - ut_ad(block->page.buf_fix_count()); - ut_ad(block->page.in_file()); - ut_ad(block->page.frame); - - switch (*latch_mode) { - default: - MY_ASSERT_UNREACHABLE(); - return(false); - case BTR_SEARCH_LEAF: - case BTR_MODIFY_LEAF: - return(buf_page_optimistic_get(*latch_mode, block, - modify_clock, mtr)); - case BTR_SEARCH_PREV: /* btr_pcur_move_backward_from_page() */ - case BTR_MODIFY_PREV: /* Ditto, or ibuf_insert() */ - uint32_t curr_page_no, left_page_no; - { - transactional_shared_lock_guard g{ - block->page.lock}; - if (block->modify_clock != modify_clock) { - return false; - } - curr_page_no = block->page.id().page_no(); - left_page_no = btr_page_get_prev(block->page.frame); - } - - static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); - static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); - static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) - == (RW_S_LATCH ^ RW_X_LATCH), ""); - - const rw_lock_type_t mode = rw_lock_type_t( - *latch_mode & (RW_X_LATCH | RW_S_LATCH)); - - if (left_page_no != FIL_NULL) { - cursor->left_block = buf_page_get_gen( - page_id_t(cursor->index()->table->space_id, - left_page_no), - cursor->index()->table->space->zip_size(), - mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); - - if (cursor->left_block - && btr_page_get_next( - cursor->left_block->page.frame) - != curr_page_no) { -release_left_block: - mtr->release_last_page(); - return false; - } - } else { - cursor->left_block = nullptr; - } - - if (buf_page_optimistic_get(mode, block, modify_clock, mtr)) { - if (btr_page_get_prev(block->page.frame) - == left_page_no) { - /* block was already buffer-fixed while - entering the function and - buf_page_optimistic_get() buffer-fixes - it again. */ - ut_ad(2 <= block->page.buf_fix_count()); - *latch_mode = btr_latch_mode(mode); - return(true); - } - - mtr->release_last_page(); - } - - ut_ad(block->page.buf_fix_count()); - if (cursor->left_block) { - goto release_left_block; - } - } - - return false; -} - /** Gets intention in btr_intention_t from latch_mode, and cleares the intention at the latch_mode. @@ -848,38 +595,6 @@ btr_intention_t btr_cur_get_and_clear_intention(btr_latch_mode *latch_mode) return(intention); } -/** -Gets the desired latch type for the root leaf (root page is root leaf) -at the latch mode. -@param latch_mode in: BTR_SEARCH_LEAF, ... -@return latch type */ -static -rw_lock_type_t -btr_cur_latch_for_root_leaf( - ulint latch_mode) -{ - switch (latch_mode) { - case BTR_SEARCH_LEAF: - case BTR_SEARCH_TREE: - case BTR_SEARCH_PREV: - return(RW_S_LATCH); - case BTR_MODIFY_LEAF: - case BTR_MODIFY_TREE: - case BTR_MODIFY_PREV: - return(RW_X_LATCH); - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - /* A root page should be latched already, - and don't need to be latched here. - fall through (RW_NO_LATCH) */ - case BTR_NO_LATCHES: - return(RW_NO_LATCH); - } - - MY_ASSERT_UNREACHABLE(); - return(RW_NO_LATCH); /* avoid compiler warnings */ -} - /** @return whether the distance between two records is at most the specified value */ static bool @@ -1197,1223 +912,879 @@ static ulint btr_node_ptr_max_size(const dict_index_t* index) return rec_max_size; } +/** @return a B-tree search mode suitable for non-leaf pages +@param mode leaf page search mode */ +static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode) +{ + if (mode > PAGE_CUR_GE) + { + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); + return mode; + } + if (mode == PAGE_CUR_GE) + return PAGE_CUR_L; + ut_ad(mode == PAGE_CUR_G); + return PAGE_CUR_LE; +} + +dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + + buf_block_t *guess; + btr_op_t btr_op; + btr_intention_t lock_intention; + bool detected_same_key_root= false; + + mem_heap_t* heap = NULL; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets2 = offsets2_; + rec_offs_init(offsets_); + rec_offs_init(offsets2_); + + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index()->page != FIL_NULL); + + MEM_UNDEFINED(&up_match, sizeof up_match); + MEM_UNDEFINED(&up_bytes, sizeof up_bytes); + MEM_UNDEFINED(&low_match, sizeof low_match); + MEM_UNDEFINED(&low_bytes, sizeof low_bytes); + ut_d(up_match= ULINT_UNDEFINED); + ut_d(low_match= ULINT_UNDEFINED); + + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED) || + mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_S_LOCK | MTR_MEMO_SX_LOCK | + MTR_MEMO_X_LOCK)); + + /* These flags are mutually exclusive, they are lumped together + with the latch mode for historical reasons. It's possible for + none of the flags to be set. */ + switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { + default: + btr_op= BTR_NO_OP; + break; + case BTR_INSERT: + btr_op= (latch_mode & BTR_IGNORE_SEC_UNIQUE) + ? BTR_INSERT_IGNORE_UNIQUE_OP + : BTR_INSERT_OP; + break; + case BTR_DELETE: + btr_op= BTR_DELETE_OP; + ut_a(purge_node); + break; + case BTR_DELETE_MARK: + btr_op= BTR_DELMARK_OP; + break; + } + + /* Operations on the insert buffer tree cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_ibuf()); + /* Operations on the clustered index cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->is_clust()); + /* Operations on the temporary table(indexes) cannot be buffered. */ + ut_ad(btr_op == BTR_NO_OP || !index()->table->is_temporary()); + + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + lock_intention= btr_cur_get_and_clear_intention(&latch_mode); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller + || latch_mode == BTR_SEARCH_LEAF + || latch_mode == BTR_MODIFY_LEAF + || latch_mode == BTR_MODIFY_TREE + || latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + + flag= BTR_CUR_BINARY; +#ifndef BTR_CUR_ADAPT + guess= nullptr; +#else + btr_search_t *info= btr_search_get_info(index()); + guess= info->root_guess; + +# ifdef BTR_CUR_HASH_ADAPT +# ifdef UNIV_SEARCH_PERF_STAT + info->n_searches++; +# endif + /* We do a dirty read of btr_search_enabled below, + and btr_search_guess_on_hash() will have to check it again. */ + if (!btr_search_enabled); + else if (btr_search_guess_on_hash(index(), info, tuple, mode, + latch_mode, this, mtr)) + { + /* Search using the hash index succeeded */ + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ++btr_cur_n_sea; + + return DB_SUCCESS; + } + else + ++btr_cur_n_non_sea; +# endif +#endif + + /* If the hash search did not succeed, do binary search down the + tree */ + + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + + const ulint savepoint= mtr->get_savepoint(); + + ulint node_ptr_max_size= 0; + rw_lock_type_t rw_latch= RW_S_LATCH; + + switch (latch_mode) { + case BTR_MODIFY_TREE: + rw_latch= RW_X_LATCH; + node_ptr_max_size= btr_node_ptr_max_size(index()); + if (latch_by_caller) + { + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + break; + } + if (lock_intention == BTR_INTENTION_DELETE && buf_pool.n_pend_reads && + trx_sys.history_size_approx() > BTR_CUR_FINE_HISTORY_LENGTH) + /* Most delete-intended operations are due to the purge of history. + Prioritize them when the history list is growing huge. */ + mtr_x_lock_index(index(), mtr); + else + mtr_sx_lock_index(index(), mtr); + break; +#ifdef UNIV_DEBUG + case BTR_CONT_MODIFY_TREE: + ut_ad("invalid mode" == 0); + break; +#endif + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_SX_LATCH; + /* fall through */ + default: + if (!latch_by_caller) + mtr_s_lock_index(index(), mtr); + } + + const ulint zip_size= index()->table->space->zip_size(); + + /* Start with the root page. */ + page_id_t page_id(index()->table->space_id, index()->page); + + const page_cur_mode_t page_mode= btr_cur_nonleaf_mode(mode); + ulint height= ULINT_UNDEFINED; + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint buf_mode= BUF_GET; + search_loop: + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= + buf_page_get_gen(page_id, zip_size, rw_latch, guess, buf_mode, mtr, + &err, height == 0 && !index()->is_clust()); + if (!block) + { + switch (err) { + case DB_DECRYPTION_FAILED: + btr_decryption_failed(*index()); + /* fall through */ + default: + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + case DB_SUCCESS: + /* This must be a search to perform an insert, delete mark, or delete; + try using the change buffer */ + ut_ad(height == 0); + ut_ad(thr); + break; + } + + switch (btr_op) { + default: + MY_ASSERT_UNREACHABLE(); + break; + case BTR_INSERT_OP: + case BTR_INSERT_IGNORE_UNIQUE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + + if (ibuf_insert(IBUF_OP_INSERT, tuple, index(), page_id, zip_size, thr)) + { + flag= BTR_CUR_INSERT_TO_IBUF; + goto func_exit; + } + break; + + case BTR_DELMARK_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL); + + if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, + index(), page_id, zip_size, thr)) + { + flag = BTR_CUR_DEL_MARK_IBUF; + goto func_exit; + } + + break; + + case BTR_DELETE_OP: + ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); + auto& chain = buf_pool.page_hash.cell_get(page_id.fold()); + + if (!row_purge_poss_sec(purge_node, index(), tuple)) + /* The record cannot be purged yet. */ + flag= BTR_CUR_DELETE_REF; + else if (ibuf_insert(IBUF_OP_DELETE, tuple, index(), + page_id, zip_size, thr)) + /* The purge was buffered. */ + flag= BTR_CUR_DELETE_IBUF; + else + { + /* The purge could not be buffered. */ + buf_pool.watch_unset(page_id, chain); + break; + } + + buf_pool.watch_unset(page_id, chain); + goto func_exit; + } + + /* Change buffering did not succeed, we must read the page. */ + buf_mode= BUF_GET; + goto search_loop; + } + + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + ut_ad("corrupted" == 0); // FIXME: remove this + err= DB_CORRUPTION; + goto func_exit; + } + + page_cur.block= block; + ut_ad(block == mtr->at_savepoint(block_savepoint)); +#ifdef UNIV_ZIP_DEBUG + if (rw_latch == RW_NO_LATCH); + else if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index())); +#endif /* UNIV_ZIP_DEBUG */ + const uint32_t page_level= btr_page_get_level(block->page.frame); + + if (height == ULINT_UNDEFINED) + { + /* We are in the B-tree index root page. */ +#ifdef BTR_CUR_ADAPT + info->root_guess= block; +#endif + height= page_level; + tree_height= height + 1; + + if (!height) + { + /* The root page is also a leaf page. + We may have to reacquire the page latch in a different mode. */ + switch (rw_latch) { + case RW_S_LATCH: + if ((latch_mode & ~12) != RW_S_LATCH) + { + ut_ad(rw_lock_type_t(latch_mode & ~12) == RW_X_LATCH); + goto relatch_x; + } + if (latch_mode != BTR_MODIFY_PREV) + { + if (!latch_by_caller) + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + goto reached_latched_leaf; + } + /* fall through */ + case RW_SX_LATCH: + ut_ad(rw_latch == RW_S_LATCH || + latch_mode == BTR_MODIFY_ROOT_AND_LEAF); + relatch_x: + mtr->rollback_to_savepoint(block_savepoint); + height= ULINT_UNDEFINED; + rw_latch= RW_X_LATCH; + goto search_loop; + case RW_X_LATCH: + if (latch_mode == BTR_MODIFY_TREE) + goto reached_index_root_and_leaf; + goto reached_root_and_leaf; + case RW_NO_LATCH: + ut_ad(mtr->memo_contains_flagged(&index()->lock, MTR_MEMO_X_LOCK)); + } + goto reached_leaf; + } + } + else if (UNIV_UNLIKELY(height != page_level)) + goto corrupted; + else + switch (latch_mode) { + case BTR_MODIFY_TREE: + break; + case BTR_MODIFY_ROOT_AND_LEAF: + ut_ad((mtr->at_savepoint(block_savepoint - 1)->page.id().page_no() == + index()->page) == (tree_height <= height + 2)); + if (tree_height <= height + 2) + /* Retain the root page latch. */ + break; + goto release_parent_page; + default: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(!height); + break; + } + release_parent_page: + ut_ad(block_savepoint > savepoint); + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } + + if (!height) + { + reached_leaf: + /* We reached the leaf level. */ + ut_ad(block == mtr->at_savepoint(block_savepoint)); + + if (latch_mode == BTR_MODIFY_ROOT_AND_LEAF) + { + reached_root_and_leaf: + if (!latch_by_caller) + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + reached_index_root_and_leaf: + ut_ad(rw_latch == RW_X_LATCH); +#ifdef BTR_CUR_HASH_ADAPT + btr_search_drop_page_hash_index(block, true); +#endif + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + goto func_exit; + } + + switch (latch_mode) { + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + ut_ad(!latch_by_caller); + + if (rw_latch == RW_NO_LATCH) + { + /* latch also siblings from left to right */ + rw_latch= rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, rw_latch); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + rw_latch, false, mtr, &err)) + goto func_exit; + } + goto release_tree; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + if (rw_latch == RW_NO_LATCH) + { + ut_ad(index()->is_ibuf()); + mtr->upgrade_buffer_fix(block_savepoint, rw_lock_type_t(latch_mode)); + } + if (!latch_by_caller) + { +release_tree: + /* Release the tree s-latch */ + block_savepoint--; + mtr->rollback_to_savepoint(savepoint, savepoint + 1); + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + break; + default: + ut_ad(latch_mode == BTR_MODIFY_TREE); + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + } + + reached_latched_leaf: +#ifdef BTR_CUR_HASH_ADAPT + if (btr_search_enabled && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG)) + { + if (page_cur_search_with_match_bytes(tuple, mode, + &up_match, &up_bytes, + &low_match, &low_bytes, &page_cur)) + goto corrupted; + } + else +#endif /* BTR_CUR_HASH_ADAPT */ + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ + + goto func_exit; + } + + guess= nullptr; + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); + + ut_ad(block == mtr->at_savepoint(block_savepoint)); + + switch (latch_mode) { + default: + break; + case BTR_MODIFY_TREE: + if (btr_cur_need_opposite_intention(block->page.frame, lock_intention, + page_cur.rec)) + /* If the rec is the first or last in the page for pessimistic + delete intention, it might cause node_ptr insert for the upper + level. We should change the intention and retry. */ + need_opposite_intention: + return pessimistic_search_leaf(tuple, mode, mtr); + + if (detected_same_key_root || lock_intention != BTR_INTENTION_BOTH || + index()->is_unique() || + (up_match <= rec_offs_n_fields(offsets) && + low_match <= rec_offs_n_fields(offsets))) + break; + + /* If the first or the last record of the page or the same key + value to the first record or last record, then another page might + be chosen when BTR_CONT_MODIFY_TREE. So, the parent page should + not released to avoiding deadlock with blocking the another search + with the same key value. */ + const rec_t *first= + page_rec_get_next_const(page_get_infimum_rec(block->page.frame)); + ulint matched_fields; + + if (UNIV_UNLIKELY(!first)) + goto corrupted; + if (page_cur.rec == first || + page_rec_is_last(page_cur.rec, block->page.frame)) + { + same_key_root: + detected_same_key_root= true; + break; + } + + matched_fields= 0; + offsets2= rec_get_offsets(first, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, first, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + if (const rec_t* last= + page_rec_get_prev_const(page_get_supremum_rec(block->page.frame))) + { + matched_fields= 0; + offsets2= rec_get_offsets(last, index(), offsets2, 0, ULINT_UNDEFINED, + &heap); + cmp_rec_rec(page_cur.rec, last, offsets, offsets2, index(), false, + &matched_fields); + if (matched_fields >= rec_offs_n_fields(offsets) - 1) + goto same_key_root; + } + else + goto corrupted; + + /* Release the non-root parent page unless it may need to be modified. */ + if (tree_height > height + 1 && + !btr_cur_will_modify_tree(index(), block->page.frame, lock_intention, + page_cur.rec, node_ptr_max_size, + zip_size, mtr)) + { + mtr->rollback_to_savepoint(block_savepoint - 1, block_savepoint); + block_savepoint--; + } + } + + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + + if (!--height) + { + /* We are about to access the leaf level. */ + + switch (latch_mode) { + case BTR_MODIFY_ROOT_AND_LEAF: + rw_latch= RW_X_LATCH; + break; + case BTR_MODIFY_PREV: /* ibuf_insert() or btr_pcur_move_to_prev() */ + case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */ + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_X_LATCH); + + if (page_has_prev(block->page.frame) && + page_rec_is_first(page_cur.rec, block->page.frame)) + { + ut_ad(block_savepoint + 1 == mtr->get_savepoint()); + /* Latch the previous page if the node pointer is the leftmost + of the current page. */ + buf_block_t *left= btr_block_get(*index(), + btr_page_get_prev(block->page.frame), + RW_NO_LATCH, false, mtr, &err); + if (UNIV_UNLIKELY(!left)) + goto func_exit; + ut_ad(block_savepoint + 2 == mtr->get_savepoint()); + if (UNIV_LIKELY(left->page.lock.s_lock_try())) + mtr->lock_register(block_savepoint + 1, MTR_MEMO_PAGE_S_FIX); + else + { + if (rw_latch == RW_S_LATCH) + block->page.lock.s_unlock(); + else + block->page.lock.x_unlock(); + mtr->upgrade_buffer_fix(block_savepoint + 1, RW_S_LATCH); + mtr->lock_register(block_savepoint, MTR_MEMO_BUF_FIX); + mtr->upgrade_buffer_fix(block_savepoint, RW_S_LATCH); + /* While our latch on the level-2 page prevents splits or + merges of this level-1 block, other threads may have + modified it due to splitting or merging some level-0 (leaf) + pages underneath it. Thus, we must search again. */ + if (page_cur_search_with_match(tuple, page_mode, + &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, + ULINT_UNDEFINED, &heap); + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, + offsets)); + } + } + goto leaf_with_no_latch; + case BTR_MODIFY_LEAF: + case BTR_SEARCH_LEAF: + if (index()->is_ibuf()) + goto leaf_with_no_latch; + rw_latch= rw_lock_type_t(latch_mode); + if (btr_op != BTR_NO_OP && + ibuf_should_try(index(), btr_op != BTR_INSERT_OP)) + /* Try to buffer the operation if the leaf page + is not in the buffer pool. */ + buf_mode= btr_op == BTR_DELETE_OP + ? BUF_GET_IF_IN_POOL_OR_WATCH + : BUF_GET_IF_IN_POOL; + break; + case BTR_MODIFY_TREE: + ut_ad(rw_latch == RW_X_LATCH); + + if (lock_intention == BTR_INTENTION_INSERT && + page_has_next(block->page.frame) && + page_rec_is_last(page_cur.rec, block->page.frame)) + { + /* btr_insert_into_right_sibling() might cause deleting node_ptr + at upper level */ + mtr->rollback_to_savepoint(block_savepoint); + goto need_opposite_intention; + } + /* fall through */ + default: + leaf_with_no_latch: + rw_latch= RW_NO_LATCH; + } + } + + goto search_loop; +} + +ATTRIBUTE_COLD +dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ + ut_ad(index()->is_btree() || index()->is_ibuf()); + ut_ad(!index()->is_ibuf() || ibuf_inside(mtr)); + + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(flag == BTR_CUR_BINARY); + ut_ad(dict_index_check_search_tuple(index(), tuple)); + ut_ad(dtuple_check_typed(tuple)); + buf_block_t *block= mtr->at_savepoint(1); + ut_ad(block->page.id().page_no() == index()->page); + block->page.fix(); + mtr->rollback_to_savepoint(1); + ut_ad(mtr->memo_contains_flagged(&index()->lock, + MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK)); + + const page_cur_mode_t page_mode{btr_cur_nonleaf_mode(mode)}; + + mtr->page_lock(block, RW_X_LATCH); + + up_match= 0; + up_bytes= 0; + low_match= 0; + low_bytes= 0; + ulint height= btr_page_get_level(block->page.frame); + tree_height= height + 1; + mem_heap_t *heap= nullptr; + + search_loop: + dberr_t err; + page_cur.block= block; + + if (UNIV_UNLIKELY(!height)) + { + if (page_cur_search_with_match(tuple, mode, &up_match, &low_match, + &page_cur, nullptr)) + corrupted: + err= DB_CORRUPTION; + else + { + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + +#ifdef BTR_CUR_HASH_ADAPT + /* We do a dirty read of btr_search_enabled here. We will + properly check btr_search_enabled again in + btr_search_build_page_hash_index() before building a page hash + index, while holding search latch. */ + if (!btr_search_enabled); + else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) + /* This may be a search tuple for btr_pcur_t::restore_position(). */ + ut_ad(tuple->is_metadata() || + (tuple->is_metadata(tuple->info_bits ^ REC_STATUS_INSTANT))); + else if (index()->table->is_temporary()); + else if (!rec_is_metadata(page_cur.rec, *index())) + btr_search_info_update(index(), this); +#endif /* BTR_CUR_HASH_ADAPT */ + err= DB_SUCCESS; + } + + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } + + if (page_cur_search_with_match(tuple, page_mode, &up_match, &low_match, + &page_cur, nullptr)) + goto corrupted; + + page_id_t page_id{block->page.id()}; + + offsets= rec_get_offsets(page_cur.rec, index(), offsets, 0, ULINT_UNDEFINED, + &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(page_cur.rec, offsets)); + + const auto block_savepoint= mtr->get_savepoint(); + block= + buf_page_get_gen(page_id, block->zip_size(), RW_NO_LATCH, nullptr, BUF_GET, + mtr, &err, !--height && !index()->is_clust()); + + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index()); + goto func_exit; + } + + if (!!page_is_comp(block->page.frame) != index()->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index()->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + goto corrupted; + + if (height != btr_page_get_level(block->page.frame)) + goto corrupted; + + if (page_has_prev(block->page.frame) && + !btr_block_get(*index(), btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); +#ifdef UNIV_ZIP_DEBUG + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index())); +#endif /* UNIV_ZIP_DEBUG */ + if (page_has_next(block->page.frame) && + !btr_block_get(*index(), btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + goto func_exit; + goto search_loop; +} + /********************************************************************//** -Searches an index tree and positions a tree cursor on a given level. +Searches an index tree and positions a tree cursor on a given non-leaf level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. - -If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the +Cursor is left at the place where an insert of the search tuple should be performed in the B-tree. InnoDB does an insert immediately after the cursor. Thus, the cursor may end up on a user record, or on a page infimum record. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page +@param latch RW_S_LATCH or RW_X_LATCH @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ TRANSACTIONAL_TARGET dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc) + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr) { - page_t* page = NULL; /* remove warning */ - buf_block_t* block; - buf_block_t* guess; - ulint height; - ulint up_match; - ulint up_bytes; - ulint low_match; - ulint low_bytes; - ulint rw_latch; - page_cur_mode_t page_mode; - page_cur_mode_t search_mode = PAGE_CUR_UNSUPP; - ulint buf_mode; - ulint node_ptr_max_size = srv_page_size / 2; - page_cur_t* page_cursor; - btr_op_t btr_op; - ulint root_height = 0; /* remove warning */ + dict_index_t *const index= cursor->index(); - btr_intention_t lock_intention; - buf_block_t* tree_blocks[BTR_MAX_LEVELS]; - ulint tree_savepoints[BTR_MAX_LEVELS]; - ulint n_blocks = 0; - ulint n_releases = 0; - bool detected_same_key_root = false; + ut_ad(index->is_btree() || index->is_ibuf()); + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(index->is_ibuf() ? ibuf_inside(mtr) : index->is_btree()); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->page != FIL_NULL); - ulint leftmost_from_level = 0; - buf_block_t** prev_tree_blocks = NULL; - ulint* prev_tree_savepoints = NULL; - ulint prev_n_blocks = 0; - ulint prev_n_releases = 0; - bool need_path = true; - bool rtree_parent_modified = false; - bool mbr_adj = false; - bool found = false; - dict_index_t * const index = cursor->index(); - - DBUG_ENTER("btr_cur_search_to_nth_level"); - -#ifdef BTR_CUR_ADAPT - btr_search_t* info; -#endif /* BTR_CUR_ADAPT */ - mem_heap_t* heap = NULL; - rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets = offsets_; - rec_offs offsets2_[REC_OFFS_NORMAL_SIZE]; - rec_offs* offsets2 = offsets2_; - rec_offs_init(offsets_); - rec_offs_init(offsets2_); - /* Currently, PAGE_CUR_LE is the only search mode used for searches - ending to upper levels */ - - ut_ad(level == 0 || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr)); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(!(index->type & DICT_FTS)); - ut_ad(index->page != FIL_NULL); - - MEM_UNDEFINED(&cursor->up_match, sizeof cursor->up_match); - MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); - MEM_UNDEFINED(&cursor->low_match, sizeof cursor->low_match); - MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); -#ifdef UNIV_DEBUG - cursor->up_match = ULINT_UNDEFINED; - cursor->low_match = ULINT_UNDEFINED; -#endif /* UNIV_DEBUG */ - - const bool latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED; - - ut_ad(!latch_by_caller - || srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK - | MTR_MEMO_SX_LOCK)); - - /* These flags are mutually exclusive, they are lumped together - with the latch mode for historical reasons. It's possible for - none of the flags to be set. */ - switch (UNIV_EXPECT(latch_mode & BTR_DELETE, 0)) { - default: - btr_op = BTR_NO_OP; - break; - case BTR_INSERT: - btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE) - ? BTR_INSERT_IGNORE_UNIQUE_OP - : BTR_INSERT_OP; - break; - case BTR_DELETE: - btr_op = BTR_DELETE_OP; - ut_a(cursor->purge_node); - break; - case BTR_DELETE_MARK: - btr_op = BTR_DELMARK_OP; - break; - } - - /* Operations on the insert buffer tree cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index)); - /* Operations on the clustered index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index)); - /* Operations on the temporary table(indexes) cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !index->table->is_temporary()); - /* Operation on the spatial index cannot be buffered. */ - ut_ad(btr_op == BTR_NO_OP || !dict_index_is_spatial(index)); - - lock_intention = btr_cur_get_and_clear_intention(&latch_mode); - - /* Turn the flags unrelated to the latch mode off. */ - latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - - ut_ad(!latch_by_caller - || latch_mode == BTR_SEARCH_LEAF - || latch_mode == BTR_SEARCH_TREE - || latch_mode == BTR_MODIFY_LEAF); - - ut_ad(autoinc == 0 || dict_index_is_clust(index)); - ut_ad(autoinc == 0 - || latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF); - ut_ad(autoinc == 0 || level == 0); - - cursor->flag = BTR_CUR_BINARY; + MEM_UNDEFINED(&cursor->up_bytes, sizeof cursor->up_bytes); + MEM_UNDEFINED(&cursor->low_bytes, sizeof cursor->low_bytes); + cursor->up_match= 0; + cursor->low_match= 0; + cursor->flag= BTR_CUR_BINARY; #ifndef BTR_CUR_ADAPT - guess = NULL; + buf_block_t *block= nullptr; #else - info = btr_search_get_info(index); - guess = info->root_guess; - -#ifdef BTR_CUR_HASH_ADAPT - -# ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; -# endif - /* We do a dirty read of btr_search_enabled below, - and btr_search_guess_on_hash() will have to check it again. */ - if (!btr_search_enabled) { - } else if (autoinc == 0 - && latch_mode <= BTR_MODIFY_LEAF -# ifdef PAGE_CUR_LE_OR_EXTENDS - && mode != PAGE_CUR_LE_OR_EXTENDS -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - && info->last_hash_succ - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && !index->is_spatial() && !index->table->is_temporary() - && btr_search_guess_on_hash(index, info, tuple, mode, - latch_mode, cursor, mtr)) { - - /* Search using the hash index succeeded */ - - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ++btr_cur_n_sea; - - DBUG_RETURN(DB_SUCCESS); - } else { - ++btr_cur_n_non_sea; - } -# endif /* BTR_CUR_HASH_ADAPT */ + btr_search_t *info= btr_search_get_info(index); + buf_block_t *block= info->root_guess; #endif /* BTR_CUR_ADAPT */ - /* If the hash search did not succeed, do binary search down the - tree */ + ut_ad(mtr->memo_contains_flagged(&index->lock, + MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ + const ulint zip_size= index->table->space->zip_size(); - ulint savepoint = mtr_set_savepoint(mtr); - - rw_lock_type_t upper_rw_latch; - - switch (latch_mode) { - case BTR_MODIFY_TREE: - /* Most of delete-intended operations are purging. - Free blocks and read IO bandwidth should be prior - for them, when the history list is glowing huge. */ - if (lock_intention == BTR_INTENTION_DELETE - && buf_pool.n_pend_reads - && trx_sys.history_size_approx() - > BTR_CUR_FINE_HISTORY_LENGTH) { -x_latch_index: - mtr_x_lock_index(index, mtr); - } else if (index->is_spatial() - && lock_intention <= BTR_INTENTION_BOTH) { - /* X lock the if there is possibility of - pessimistic delete on spatial index. As we could - lock upward for the tree */ - goto x_latch_index; - } else { - mtr_sx_lock_index(index, mtr); - } - upper_rw_latch = RW_X_LATCH; - break; - case BTR_CONT_MODIFY_TREE: - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - if (index->is_spatial()) { - /* If we are about to locate parent page for split - and/or merge operation for R-Tree index, X latch - the parent */ - upper_rw_latch = RW_X_LATCH; - break; - } - /* fall through */ - case BTR_CONT_SEARCH_TREE: - /* Do nothing */ - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, - MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - upper_rw_latch = RW_NO_LATCH; - break; - default: - if (!srv_read_only_mode) { - if (!latch_by_caller) { - ut_ad(latch_mode != BTR_SEARCH_TREE); - mtr_s_lock_index(index, mtr); - } - upper_rw_latch = RW_S_LATCH; - } else { - upper_rw_latch = RW_NO_LATCH; - } - } - const rw_lock_type_t root_leaf_rw_latch = btr_cur_latch_for_root_leaf( - latch_mode); - - page_cursor = btr_cur_get_page_cur(cursor); - page_cursor->index = index; - - const ulint zip_size = index->table->space->zip_size(); - - /* Start with the root page. */ - page_id_t page_id(index->table->space_id, index->page); - - if (root_leaf_rw_latch == RW_X_LATCH) { - node_ptr_max_size = btr_node_ptr_max_size(index); - } - - up_match = 0; - up_bytes = 0; - low_match = 0; - low_bytes = 0; - - height = ULINT_UNDEFINED; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode = PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode = PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode) - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || RTREE_SEARCH_MODE(mode)); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode = mode; - break; - } - - /* Loop and search until we arrive at the desired level */ - btr_latch_leaves_t latch_leaves = {{NULL, NULL, NULL}, {0, 0, 0}}; + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + ulint height= ULINT_UNDEFINED; search_loop: - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - rtree_parent_modified = false; - - if (height != 0) { - /* We are about to fetch the root or a non-leaf page. */ - if ((latch_mode != BTR_MODIFY_TREE || height == level) - && !prev_tree_blocks) { - /* If doesn't have SX or X latch of index, - each pages should be latched before reading. */ - if (height == ULINT_UNDEFINED - && upper_rw_latch == RW_S_LATCH - && autoinc) { - /* needs sx-latch of root page - for writing PAGE_ROOT_AUTO_INC */ - rw_latch = RW_SX_LATCH; - } else { - rw_latch = upper_rw_latch; - } - } - } else if (latch_mode <= BTR_MODIFY_LEAF) { - rw_latch = latch_mode; - - if (btr_op != BTR_NO_OP - && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) { - - /* Try to buffer the operation if the leaf - page is not in the buffer pool. */ - - buf_mode = btr_op == BTR_DELETE_OP - ? BUF_GET_IF_IN_POOL_OR_WATCH - : BUF_GET_IF_IN_POOL; - } - } - -retry_page_get: - ut_ad(n_blocks < BTR_MAX_LEVELS); - tree_savepoints[n_blocks] = mtr_set_savepoint(mtr); - dberr_t err; - block = buf_page_get_gen(page_id, zip_size, rw_latch, guess, - buf_mode, mtr, &err, - height == 0 && !index->is_clust()); - if (!block) { - switch (err) { - case DB_SUCCESS: - /* change buffering */ - break; - case DB_DECRYPTION_FAILED: - btr_decryption_failed(*index); - /* fall through */ - default: - goto func_exit; - } - - /* This must be a search to perform an insert/delete - mark/ delete; try using the insert/delete buffer */ - - ut_ad(height == 0); - ut_ad(cursor->thr); - - switch (btr_op) { - default: - MY_ASSERT_UNREACHABLE(); - break; - case BTR_INSERT_OP: - case BTR_INSERT_IGNORE_UNIQUE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - ut_ad(!dict_index_is_spatial(index)); - - if (ibuf_insert(IBUF_OP_INSERT, tuple, index, - page_id, zip_size, cursor->thr)) { - - cursor->flag = BTR_CUR_INSERT_TO_IBUF; - - goto func_exit; - } - break; - - case BTR_DELMARK_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - ut_ad(!dict_index_is_spatial(index)); - - if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple, - index, page_id, zip_size, - cursor->thr)) { - - cursor->flag = BTR_CUR_DEL_MARK_IBUF; - - goto func_exit; - } - - break; - - case BTR_DELETE_OP: - ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH); - ut_ad(!dict_index_is_spatial(index)); - auto& chain = buf_pool.page_hash.cell_get( - page_id.fold()); - - if (!row_purge_poss_sec(cursor->purge_node, - index, tuple)) { - - /* The record cannot be purged yet. */ - cursor->flag = BTR_CUR_DELETE_REF; - } else if (ibuf_insert(IBUF_OP_DELETE, tuple, - index, page_id, zip_size, - cursor->thr)) { - - /* The purge was buffered. */ - cursor->flag = BTR_CUR_DELETE_IBUF; - } else { - /* The purge could not be buffered. */ - buf_pool.watch_unset(page_id, chain); - break; - } - - buf_pool.watch_unset(page_id, chain); - goto func_exit; - } - - /* Insert to the insert/delete buffer did not succeed, we - must read the page from disk. */ - - buf_mode = BUF_GET; - - goto retry_page_get; - } - - tree_blocks[n_blocks] = block; - - if (height && prev_tree_blocks) { - /* also latch left sibling */ - ut_ad(rw_latch == RW_NO_LATCH); - - rw_latch = upper_rw_latch; - - /* Because we are holding index->lock, no page splits - or merges may run concurrently, and we may read - FIL_PAGE_PREV from a buffer-fixed, unlatched page. */ - uint32_t left_page_no = btr_page_get_prev(block->page.frame); - - if (left_page_no != FIL_NULL) { - ut_ad(prev_n_blocks < leftmost_from_level); - - prev_tree_savepoints[prev_n_blocks] - = mtr_set_savepoint(mtr); - buf_block_t* get_block = buf_page_get_gen( - page_id_t(page_id.space(), left_page_no), - zip_size, rw_latch, NULL, buf_mode, - mtr, &err); - if (!get_block) { - if (err == DB_DECRYPTION_FAILED) { - btr_decryption_failed(*index); - } - goto func_exit; - } - - prev_tree_blocks[prev_n_blocks++] = get_block; - /* BTR_MODIFY_TREE doesn't update prev/next_page_no, - without their parent page's lock. So, not needed to - retry here, because we have the parent page's lock. */ - } - - mtr->s_lock_register(tree_savepoints[n_blocks]); - block->page.lock.s_lock(); - } - - page = buf_block_get_frame(block); - - if (height == ULINT_UNDEFINED - && page_is_leaf(page) - && rw_latch != RW_NO_LATCH - && rw_latch != root_leaf_rw_latch) { - /* The root page is also a leaf page (root_leaf). - We should reacquire the page, because the root page - is latched differently from leaf pages. */ - ut_ad(root_leaf_rw_latch != RW_NO_LATCH); - ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); - ut_ad(rw_latch == RW_S_LATCH || autoinc); - ut_ad(!autoinc || root_leaf_rw_latch == RW_X_LATCH); - - ut_ad(n_blocks == 0); - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_blocks], - tree_blocks[n_blocks]); - - upper_rw_latch = root_leaf_rw_latch; - goto search_loop; - } + dberr_t err= DB_SUCCESS; + if (buf_block_t *b= + mtr->get_already_latched(page_id, mtr_memo_type_t(rw_latch))) + block= b; + else if (!(block= buf_page_get_gen(page_id, zip_size, rw_latch, + block, BUF_GET, mtr, &err))) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + goto func_exit; + } #ifdef UNIV_ZIP_DEBUG - if (rw_latch != RW_NO_LATCH) { - const page_zip_des_t* page_zip - = buf_block_get_page_zip(block); - ut_a(!page_zip || page_zip_validate(page_zip, page, index)); - } + if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block)) + ut_a(page_zip_validate(page_zip, block->page.frame, index)); #endif /* UNIV_ZIP_DEBUG */ - ut_ad(fil_page_index_page_check(page)); - ut_ad(index->id == btr_page_get_index_id(page)); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page); - root_height = height; - cursor->tree_height = root_height + 1; - - if (dict_index_is_spatial(index)) { - ut_ad(cursor->rtr_info); - - /* If SSN in memory is not initialized, fetch - it from root page */ - if (!rtr_get_current_ssn_id(index)) { - /* FIXME: do this in dict_load_table_one() */ - index->set_ssn(page_get_ssn_id(page) + 1); - } - - /* Save the MBR */ - cursor->rtr_info->thr = cursor->thr; - rtr_get_mbr_from_tuple(tuple, &cursor->rtr_info->mbr); - } - -#ifdef BTR_CUR_ADAPT - info->root_guess = block; -#endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) { - btr_cur_latch_leaves(block, latch_mode, cursor, mtr, - &latch_leaves); - } - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: - if (!latch_by_caller - && !srv_read_only_mode) { - /* Release the tree s-latch */ - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - &index->lock); - } - - /* release upper blocks */ - if (prev_tree_blocks) { - ut_ad(!autoinc); - for (; - prev_n_releases < prev_n_blocks; - prev_n_releases++) { - mtr_release_block_at_savepoint( - mtr, - prev_tree_savepoints[ - prev_n_releases], - prev_tree_blocks[ - prev_n_releases]); - } - } - - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0 - && (autoinc)) { - /* keep the root page latch */ - ut_ad(mtr->memo_contains_flagged( - tree_blocks[n_releases], - MTR_MEMO_PAGE_SX_FIX - | MTR_MEMO_PAGE_X_FIX)); - continue; - } - - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - page_mode = mode; - } - - if (dict_index_is_spatial(index)) { - /* Remember the page search mode */ - search_mode = page_mode; - - /* Some adjustment on search mode, when the - page search mode is PAGE_CUR_RTREE_LOCATE - or PAGE_CUR_RTREE_INSERT, as we are searching - with MBRs. When it is not the target level, we - should search all sub-trees that "CONTAIN" the - search range/MBR. When it is at the target - level, the search becomes PAGE_CUR_LE */ - if (page_mode == PAGE_CUR_RTREE_LOCATE - && level == height) { - if (level == 0) { - page_mode = PAGE_CUR_LE; - } else { - page_mode = PAGE_CUR_RTREE_GET_FATHER; - } - } - - if (page_mode == PAGE_CUR_RTREE_INSERT) { - page_mode = (level == height) - ? PAGE_CUR_LE - : PAGE_CUR_RTREE_INSERT; - - ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); - } - - /* "need_path" indicates if we need to tracking the parent - pages, if it is not spatial comparison, then no need to - track it */ - if (page_mode < PAGE_CUR_CONTAIN) { - need_path = false; - } - - up_match = 0; - low_match = 0; - - if (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE) { - /* Tree are locked, no need for Page Lock to protect - the "path" */ - cursor->rtr_info->need_page_lock = false; - } - } - - page_cursor->block = block; - - if (dict_index_is_spatial(index) && page_mode >= PAGE_CUR_CONTAIN) { - ut_ad(need_path); - found = rtr_cur_search_with_match( - block, index, tuple, page_mode, page_cursor, - cursor->rtr_info); - - /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ - if (search_mode == PAGE_CUR_RTREE_INSERT - && cursor->rtr_info->mbr_adj) { - static_assert(BTR_MODIFY_TREE - == (8 | BTR_MODIFY_LEAF), ""); - - if (!(latch_mode & 8)) { - /* Parent MBR needs updated, should retry - with BTR_MODIFY_TREE */ - goto func_exit; - } - - rtree_parent_modified = true; - cursor->rtr_info->mbr_adj = false; - mbr_adj = true; - } - - if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) { - cursor->low_match = - DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; - } -#ifdef BTR_CUR_HASH_ADAPT - } else if (height == 0 && btr_search_enabled - && !(tuple->info_bits & REC_INFO_MIN_REC_FLAG) - && index->is_btree()) { - /* The adaptive hash index is only used when searching - for leaf pages (height==0), but not in r-trees. - We only need the byte prefix comparison for the purpose - of updating the adaptive hash index. */ - if (page_cur_search_with_match_bytes( - tuple, page_mode, &up_match, &up_bytes, - &low_match, &low_bytes, page_cursor)) { - err = DB_CORRUPTION; - goto func_exit; - } -#endif /* BTR_CUR_HASH_ADAPT */ - } else { - /* Search for complete index fields. */ - up_bytes = low_bytes = 0; - if (page_cur_search_with_match( - tuple, page_mode, &up_match, - &low_match, page_cursor, - need_path ? cursor->rtr_info : nullptr)) { - err = DB_CORRUPTION; - goto func_exit; - } - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor))); - - /* Add Predicate lock if it is serializable isolation - and only if it is in the search case */ - if (dict_index_is_spatial(index) - && cursor->rtr_info->need_prdt_lock - && mode != PAGE_CUR_RTREE_INSERT - && mode != PAGE_CUR_RTREE_LOCATE - && mode >= PAGE_CUR_CONTAIN) { - lock_prdt_t prdt; - - { - trx_t* trx = thr_get_trx(cursor->thr); - TMLockTrxGuard g{TMLockTrxArgs(*trx)}; - lock_init_prdt_from_mbr( - &prdt, &cursor->rtr_info->mbr, mode, - trx->lock.lock_heap); - } - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_lock(); - } - - lock_prdt_lock(block, &prdt, index, LOCK_S, - LOCK_PREDICATE, cursor->thr); - - if (rw_latch == RW_NO_LATCH && height != 0) { - block->page.lock.s_unlock(); - } - } - - if (level != height) { - - const rec_t* node_ptr; - ut_ad(height > 0); - - height--; - guess = NULL; - - node_ptr = page_cur_get_rec(page_cursor); - - offsets = rec_get_offsets(node_ptr, index, offsets, 0, - ULINT_UNDEFINED, &heap); - - /* If the rec is the first or last in the page for - pessimistic delete intention, it might cause node_ptr insert - for the upper level. We should change the intention and retry. - */ - if (latch_mode == BTR_MODIFY_TREE - && btr_cur_need_opposite_intention( - page, lock_intention, node_ptr)) { - -need_opposite_intention: - ut_ad(upper_rw_latch == RW_X_LATCH); - - if (n_releases > 0) { - /* release root block */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[0], - tree_blocks[0]); - } - - /* release all blocks */ - for (; n_releases <= n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - - lock_intention = BTR_INTENTION_BOTH; - - page_id.set_page_no(index->page); - up_match = 0; - low_match = 0; - height = ULINT_UNDEFINED; - - n_blocks = 0; - n_releases = 0; - - goto search_loop; - } - - if (dict_index_is_spatial(index)) { - if (page_rec_is_supremum(node_ptr)) { - cursor->low_match = 0; - cursor->up_match = 0; - goto func_exit; - } - - /* If we are doing insertion or record locating, - remember the tree nodes we visited */ - if (page_mode == PAGE_CUR_RTREE_INSERT - || (search_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF))) { - bool add_latch = false; - - if (latch_mode == BTR_MODIFY_TREE - && rw_latch == RW_NO_LATCH) { - ut_ad(mtr->memo_contains_flagged( - &index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); - block->page.lock.s_lock(); - add_latch = true; - } - - /* Store the parent cursor location */ -#ifdef UNIV_DEBUG - ulint num_stored = rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#else - rtr_store_parent_path( - block, cursor, latch_mode, - height + 1, mtr); -#endif - - if (page_mode == PAGE_CUR_RTREE_INSERT) { - btr_pcur_t* r_cursor = - rtr_get_parent_cursor( - cursor, height + 1, - true); - /* If it is insertion, there should - be only one parent for each level - traverse */ -#ifdef UNIV_DEBUG - ut_ad(num_stored == 1); -#endif - - node_ptr = btr_pcur_get_rec(r_cursor); - - } - - if (add_latch) { - block->page.lock.s_unlock(); - } - - ut_ad(!page_rec_is_supremum(node_ptr)); - } - - ut_ad(page_mode == search_mode - || (page_mode == PAGE_CUR_WITHIN - && search_mode == PAGE_CUR_RTREE_LOCATE)); - - page_mode = search_mode; - } - - /* If the first or the last record of the page - or the same key value to the first record or last record, - the another page might be chosen when BTR_CONT_MODIFY_TREE. - So, the parent page should not released to avoiding deadlock - with blocking the another search with the same key value. */ - if (!detected_same_key_root - && lock_intention == BTR_INTENTION_BOTH - && !dict_index_is_unique(index) - && latch_mode == BTR_MODIFY_TREE - && (up_match >= rec_offs_n_fields(offsets) - 1 - || low_match >= rec_offs_n_fields(offsets) - 1)) { - const rec_t* first_rec = page_rec_get_next_const( - page_get_infimum_rec(page)); - ulint matched_fields; - - ut_ad(upper_rw_latch == RW_X_LATCH); - - if (UNIV_UNLIKELY(!first_rec)) { - corrupted: - err = DB_CORRUPTION; - goto func_exit; - } - if (node_ptr == first_rec - || page_rec_is_last(node_ptr, page)) { - detected_same_key_root = true; - } else { - matched_fields = 0; - - offsets2 = rec_get_offsets( - first_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec(node_ptr, first_rec, - offsets, offsets2, index, false, - &matched_fields); - - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } else if (const rec_t* last_rec - = page_rec_get_prev_const( - page_get_supremum_rec( - page))) { - matched_fields = 0; - - offsets2 = rec_get_offsets( - last_rec, index, offsets2, - 0, ULINT_UNDEFINED, &heap); - cmp_rec_rec( - node_ptr, last_rec, - offsets, offsets2, index, - false, &matched_fields); - if (matched_fields - >= rec_offs_n_fields(offsets) - 1) { - detected_same_key_root = true; - } - } else { - goto corrupted; - } - } - } - - /* If the page might cause modify_tree, - we should not release the parent page's lock. */ - if (!detected_same_key_root - && latch_mode == BTR_MODIFY_TREE - && !btr_cur_will_modify_tree( - index, page, lock_intention, node_ptr, - node_ptr_max_size, zip_size, mtr) - && !rtree_parent_modified) { - ut_ad(upper_rw_latch == RW_X_LATCH); - ut_ad(n_releases <= n_blocks); - - /* we can release upper blocks */ - for (; n_releases < n_blocks; n_releases++) { - if (n_releases == 0) { - /* we should not release root page - to pin to same block. */ - continue; - } - - /* release unused blocks to unpin */ - mtr_release_block_at_savepoint( - mtr, tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - - if (height == level - && latch_mode == BTR_MODIFY_TREE) { - ut_ad(upper_rw_latch == RW_X_LATCH); - /* we should sx-latch root page, if released already. - It contains seg_header. */ - if (n_releases > 0) { - mtr->sx_latch_at_savepoint( - tree_savepoints[0], - tree_blocks[0]); - } - - /* x-latch the branch blocks not released yet. */ - for (ulint i = n_releases; i <= n_blocks; i++) { - mtr->x_latch_at_savepoint( - tree_savepoints[i], - tree_blocks[i]); - } - } - - /* We should consider prev_page of parent page, if the node_ptr - is the leftmost of the page. because BTR_SEARCH_PREV and - BTR_MODIFY_PREV latches prev_page of the leaf page. */ - if ((latch_mode == BTR_SEARCH_PREV - || latch_mode == BTR_MODIFY_PREV) - && !prev_tree_blocks) { - /* block should be latched for consistent - btr_page_get_prev() */ - ut_ad(mtr->memo_contains_flagged( - block, MTR_MEMO_PAGE_S_FIX - | MTR_MEMO_PAGE_X_FIX)); - - if (page_has_prev(page) - && page_rec_is_first(node_ptr, page)) { - - if (leftmost_from_level == 0) { - leftmost_from_level = height + 1; - } - } else { - leftmost_from_level = 0; - } - - if (height == 0 && leftmost_from_level > 0) { - /* should retry to get also prev_page - from level==leftmost_from_level. */ - prev_tree_blocks = static_cast( - ut_malloc_nokey(sizeof(buf_block_t*) - * leftmost_from_level)); - - prev_tree_savepoints = static_cast( - ut_malloc_nokey(sizeof(ulint) - * leftmost_from_level)); - - /* back to the level (leftmost_from_level+1) */ - ulint idx = n_blocks - - (leftmost_from_level - 1); - - page_id.set_page_no( - tree_blocks[idx]->page.id().page_no()); - - for (ulint i = n_blocks - - (leftmost_from_level - 1); - i <= n_blocks; i++) { - mtr_release_block_at_savepoint( - mtr, tree_savepoints[i], - tree_blocks[i]); - } - - n_blocks -= (leftmost_from_level - 1); - height = leftmost_from_level; - ut_ad(n_releases == 0); - - /* replay up_match, low_match */ - up_match = 0; - low_match = 0; - rtr_info_t* rtr_info = need_path - ? cursor->rtr_info : NULL; - - for (ulint i = 0; i < n_blocks; i++) { - page_cursor->block = tree_blocks[i]; - if (page_cur_search_with_match( - tuple, - page_mode, &up_match, - &low_match, page_cursor, - rtr_info)) { - err = DB_CORRUPTION; - goto func_exit; - } - } - - goto search_loop; - } - } - - /* Go to the child node */ - page_id.set_page_no( - btr_node_ptr_get_child_page_no(node_ptr, offsets)); - - n_blocks++; - - if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) { - /* We're doing a search on an ibuf tree and we're one - level above the leaf page. */ - - ut_ad(level == 0); - - buf_mode = BUF_GET; - rw_latch = RW_NO_LATCH; - goto retry_page_get; - } - - if (dict_index_is_spatial(index) - && page_mode >= PAGE_CUR_CONTAIN - && page_mode != PAGE_CUR_RTREE_INSERT) { - ut_ad(need_path); - rtr_node_path_t* path = - cursor->rtr_info->path; - - if (!path->empty() && found) { - ut_ad(path->back().page_no - == page_id.page_no()); - path->pop_back(); -#ifdef UNIV_DEBUG - if (page_mode == PAGE_CUR_RTREE_LOCATE - && (latch_mode != BTR_MODIFY_LEAF)) { - btr_pcur_t* cur - = cursor->rtr_info->parent_path->back( - ).cursor; - rec_t* my_node_ptr - = btr_pcur_get_rec(cur); - - offsets = rec_get_offsets( - my_node_ptr, index, offsets, - 0, ULINT_UNDEFINED, &heap); - - ulint my_page_no - = btr_node_ptr_get_child_page_no( - my_node_ptr, offsets); - - ut_ad(page_id.page_no() == my_page_no); - } -#endif - } - } - - goto search_loop; - } else if (!dict_index_is_spatial(index) - && latch_mode == BTR_MODIFY_TREE - && lock_intention == BTR_INTENTION_INSERT - && page_has_next(page) - && page_rec_is_last(page_cur_get_rec(page_cursor), page)) { - - /* btr_insert_into_right_sibling() might cause - deleting node_ptr at upper level */ - - guess = NULL; - - if (height == 0) { - /* release the leaf pages if latched */ - for (uint i = 0; i < 3; i++) { - if (latch_leaves.blocks[i] != NULL) { - mtr_release_block_at_savepoint( - mtr, latch_leaves.savepoints[i], - latch_leaves.blocks[i]); - latch_leaves.blocks[i] = NULL; - } - } - } - - goto need_opposite_intention; - } - - if (level != 0) { - ut_ad(!autoinc); - - if (upper_rw_latch == RW_NO_LATCH) { - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE - || latch_mode == BTR_CONT_SEARCH_TREE); - btr_block_get( - *index, page_id.page_no(), - latch_mode == BTR_CONT_MODIFY_TREE - ? RW_X_LATCH : RW_SX_LATCH, false, mtr, &err); - } else { - ut_ad(mtr->memo_contains_flagged(block, - upper_rw_latch)); - - if (latch_by_caller) { - ut_ad(latch_mode == BTR_SEARCH_TREE); - /* to exclude modifying tree operations - should sx-latch the index. */ - ut_ad(mtr->memo_contains(index->lock, - MTR_MEMO_SX_LOCK)); - /* because has sx-latch of index, - can release upper blocks. */ - for (; n_releases < n_blocks; n_releases++) { - mtr_release_block_at_savepoint( - mtr, - tree_savepoints[n_releases], - tree_blocks[n_releases]); - } - } - } - - if (page_mode <= PAGE_CUR_LE) { - cursor->low_match = low_match; - cursor->up_match = up_match; - } - } else { - cursor->low_match = low_match; - cursor->low_bytes = low_bytes; - cursor->up_match = up_match; - cursor->up_bytes = up_bytes; - - if (autoinc) { - page_set_autoinc(tree_blocks[0], autoinc, mtr, false); - } - -#ifdef BTR_CUR_HASH_ADAPT - /* We do a dirty read of btr_search_enabled here. We - will properly check btr_search_enabled again in - btr_search_build_page_hash_index() before building a - page hash index, while holding search latch. */ - if (!btr_search_enabled) { - } else if (tuple->info_bits & REC_INFO_MIN_REC_FLAG) { - /* This may be a search tuple for - btr_pcur_t::restore_position(). */ - ut_ad(tuple->is_metadata() - || (tuple->is_metadata(tuple->info_bits - ^ REC_STATUS_INSTANT))); - } else if (index->is_spatial()) { - } else if (index->table->is_temporary()) { - } else if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { - /* Only user records belong in the adaptive - hash index. */ - } else { - btr_search_info_update(index, cursor); - } -#endif /* BTR_CUR_HASH_ADAPT */ - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - } - - /* For spatial index, remember what blocks are still latched */ - if (dict_index_is_spatial(index) - && (latch_mode == BTR_MODIFY_TREE - || latch_mode == BTR_MODIFY_LEAF)) { - for (ulint i = 0; i < n_releases; i++) { - cursor->rtr_info->tree_blocks[i] = NULL; - cursor->rtr_info->tree_savepoints[i] = 0; - } - - for (ulint i = n_releases; i <= n_blocks; i++) { - cursor->rtr_info->tree_blocks[i] = tree_blocks[i]; - cursor->rtr_info->tree_savepoints[i] = tree_savepoints[i]; - } - } - -func_exit: - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - ut_free(prev_tree_blocks); - ut_free(prev_tree_savepoints); - - if (mbr_adj) { - /* remember that we will need to adjust parent MBR */ - cursor->rtr_info->mbr_adj = true; - } - - DBUG_RETURN(err); + if (!!page_is_comp(block->page.frame) != index->table->not_redundant() || + btr_page_get_index_id(block->page.frame) != index->id || + fil_page_get_type(block->page.frame) == FIL_PAGE_RTREE || + !fil_page_index_page_check(block->page.frame)) + { + corrupted: + err= DB_CORRUPTION; + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + return err; + } + + const uint32_t page_level= btr_page_get_level(block->page.frame); + + if (height == ULINT_UNDEFINED) + { + /* We are in the root node */ + height= page_level; + if (!height) + goto corrupted; + cursor->tree_height= height + 1; + } + else if (height != ulint{page_level}) + goto corrupted; + + cursor->page_cur.block= block; + + /* Search for complete index fields. */ + if (page_cur_search_with_match(tuple, PAGE_CUR_LE, &cursor->up_match, + &cursor->low_match, &cursor->page_cur, + nullptr)) + goto corrupted; + + /* If this is the desired level, leave the loop */ + if (level == height) + goto func_exit; + + ut_ad(height > level); + height--; + + offsets = rec_get_offsets(cursor->page_cur.rec, index, offsets, 0, + ULINT_UNDEFINED, &heap); + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(cursor->page_cur.rec, + offsets)); + block= nullptr; + goto search_loop; } dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr) { - ulint node_ptr_max_size= srv_page_size / 2; btr_intention_t lock_intention; ulint n_blocks= 0; mem_heap_t *heap= nullptr; @@ -2424,29 +1795,21 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, rec_offs_init(offsets_); const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; - latch_mode = btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); + latch_mode= btr_latch_mode(latch_mode & ~BTR_ALREADY_S_LATCHED); lock_intention= btr_cur_get_and_clear_intention(&latch_mode); - /* This function doesn't need to lock left page of the leaf page */ - if (latch_mode == BTR_SEARCH_PREV) - latch_mode= BTR_SEARCH_LEAF; - else if (latch_mode == BTR_MODIFY_PREV) - latch_mode= BTR_MODIFY_LEAF; - /* Store the position of the tree latch we push to mtr so that we know how to release it when we have latched the leaf node */ auto savepoint= mtr->get_savepoint(); rw_lock_type_t upper_rw_latch= RW_X_LATCH; + ulint node_ptr_max_size= 0; - switch (latch_mode) { - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - abort(); - break; - case BTR_MODIFY_TREE: + if (latch_mode == BTR_MODIFY_TREE) + { + node_ptr_max_size= btr_node_ptr_max_size(index); /* Most of delete-intended operations are purging. Free blocks and read IO bandwidth should be prioritized for them, when the history list is growing huge. */ @@ -2457,32 +1820,35 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, mtr_x_lock_index(index, mtr); else mtr_sx_lock_index(index, mtr); - break; - default: + } + else + { + static_assert(int{BTR_CONT_MODIFY_TREE} == (12 | BTR_MODIFY_LEAF), ""); + ut_ad(!(latch_mode & 8)); + /* This function doesn't need to lock left page of the leaf page */ + static_assert(int{BTR_SEARCH_PREV} == (4 | BTR_SEARCH_LEAF), ""); + static_assert(int{BTR_MODIFY_PREV} == (4 | BTR_MODIFY_LEAF), ""); + latch_mode= btr_latch_mode(latch_mode & ~4); ut_ad(!latch_by_caller || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_SX_LOCK | MTR_MEMO_S_LOCK)); upper_rw_latch= RW_S_LATCH; - if (latch_by_caller) - break; - ut_ad(latch_mode != BTR_SEARCH_TREE); - savepoint++; - mtr_s_lock_index(index, mtr); + if (!latch_by_caller) + { + savepoint++; + mtr_s_lock_index(index, mtr); + } } ut_ad(savepoint == mtr->get_savepoint()); - const rw_lock_type_t root_leaf_rw_latch= - btr_cur_latch_for_root_leaf(latch_mode); + const rw_lock_type_t root_leaf_rw_latch= rw_lock_type_t(latch_mode & ~12); page_cur.index = index; uint32_t page= index->page; const auto zip_size= index->table->space->zip_size(); - if (root_leaf_rw_latch == RW_X_LATCH) - node_ptr_max_size= btr_node_ptr_max_size(index); - for (ulint height= ULINT_UNDEFINED;;) { ut_ad(n_blocks < BTR_MAX_LEVELS); @@ -2531,16 +1897,27 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index, reached_leaf: const auto leaf_savepoint= mtr->get_savepoint(); ut_ad(leaf_savepoint); + ut_ad(block == mtr->at_savepoint(leaf_savepoint - 1)); - if (rw_latch == RW_NO_LATCH) - btr_cur_latch_leaves(block, latch_mode, this, mtr); - - switch (latch_mode) { - case BTR_MODIFY_TREE: - case BTR_CONT_MODIFY_TREE: - case BTR_CONT_SEARCH_TREE: - break; - default: + if (latch_mode == BTR_MODIFY_TREE) + { + ut_ad(rw_latch == RW_NO_LATCH); + /* x-latch also siblings from left to right */ + if (page_has_prev(block->page.frame) && + !btr_block_get(*index, btr_page_get_prev(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + mtr->upgrade_buffer_fix(leaf_savepoint - 1, RW_X_LATCH); + if (page_has_next(block->page.frame) && + !btr_block_get(*index, btr_page_get_next(block->page.frame), + RW_X_LATCH, false, mtr, &err)) + break; + } + else + { + if (rw_latch == RW_NO_LATCH) + mtr->upgrade_buffer_fix(leaf_savepoint - 1, + rw_lock_type_t(latch_mode)); /* Release index->lock if needed, and the non-leaf pages. */ mtr->rollback_to_savepoint(savepoint - !latch_by_caller, leaf_savepoint - 1); @@ -4669,16 +4046,15 @@ btr_cur_pessimistic_update( } } - if (!srv_read_only_mode - && !big_rec_vec +#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (!big_rec_vec && page_is_leaf(block->page.frame) && !dict_index_is_online_ddl(index)) { -#if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif err = DB_SUCCESS; goto return_after_reservations; @@ -5420,15 +4796,14 @@ return_after_reservations: err_exit: mem_heap_free(heap); - if (!srv_read_only_mode - && page_is_leaf(page) - && !dict_index_is_online_ddl(index)) { #if 0 // FIXME: this used to be a no-op, and will cause trouble if enabled + if (page_is_leaf(page) + && !dict_index_is_online_ddl(index)) { mtr->release(index->lock); -#endif /* NOTE: We cannot release root block latch here, because it has segment header and already modified in most of cases.*/ } +#endif index->table->space->release_free_extents(n_reserved); return(ret); @@ -5545,16 +4920,18 @@ public: buf_block_t *parent_block= m_block; ulint parent_savepoint= m_savepoint; - m_savepoint= mtr_set_savepoint(&mtr); m_block= btr_block_get(*index(), m_page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); + if (!m_block) + return false; if (parent_block && parent_block != right_parent) - mtr_release_block_at_savepoint(&mtr, parent_savepoint, parent_block); + mtr.rollback_to_savepoint(parent_savepoint, parent_savepoint + 1); - return m_block && - (level == ULINT_UNDEFINED || - btr_page_get_level(buf_block_get_frame(m_block)) == level); + m_savepoint= mtr.get_savepoint() - 1; + + return level == ULINT_UNDEFINED || + btr_page_get_level(m_block->page.frame) == level; } /** Sets page mode for leaves */ @@ -5761,14 +5138,18 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( buf_block_t *prev_block= block; ulint prev_savepoint= savepoint; - savepoint= mtr_set_savepoint(&mtr); + savepoint= mtr.get_savepoint(); /* Fetch the page. */ block= btr_block_get(*index, page_id.page_no(), RW_S_LATCH, !level, &mtr, nullptr); if (prev_block) - mtr_release_block_at_savepoint(&mtr, prev_savepoint, prev_block); + { + mtr.rollback_to_savepoint(prev_savepoint, prev_savepoint + 1); + if (block) + savepoint--; + } if (!block || btr_page_get_level(buf_block_get_frame(block)) != level) goto inexact; @@ -5797,14 +5178,20 @@ static ha_rows btr_estimate_n_rows_in_range_on_level( } while (page_id.page_no() != right_page_no); if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } return (n_rows); inexact: if (block) - mtr_release_block_at_savepoint(&mtr, savepoint, block); + { + ut_ad(block == mtr.at_savepoint(savepoint)); + mtr.rollback_to_savepoint(savepoint, savepoint + 1); + } is_n_rows_exact= false; @@ -5863,9 +5250,7 @@ ha_rows btr_estimate_n_rows_in_range(dict_index_t *index, mtr.start(); - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - ulint savepoint= mtr_set_savepoint(&mtr); + ut_ad(mtr.get_savepoint() == 0); mtr_s_lock_index(index, &mtr); ha_rows table_n_rows= dict_table_get_n_rows(index->table); @@ -5920,10 +5305,10 @@ search_loop: } if (height == 0) - /* There is no need to unlach non-leaf pages here as they must already be + /* There is no need to release non-leaf pages here as they must already be unlatched in btr_est_cur_t::fetch_child(). Try to search on pages after - index->lock unlatching to decrease contention. */ - mtr_release_s_latch_at_savepoint(&mtr, savepoint, &index->lock); + releasing the index latch, to decrease contention. */ + mtr.rollback_to_savepoint(0, 1); /* There is no need to search on left page if divergence_height != ULINT_UNDEFINED, as it was already searched before @@ -6369,16 +5754,21 @@ struct btr_blob_log_check_t { DEBUG_SYNC_C("blob_write_middle"); - log_free_check(); - - DEBUG_SYNC_C("blob_write_middle_after_check"); - const mtr_log_t log_mode = m_mtr->get_log_mode(); m_mtr->start(); m_mtr->set_log_mode(log_mode); index->set_modified(*m_mtr); + log_free_check(); + + DEBUG_SYNC_C("blob_write_middle_after_check"); + if (UNIV_UNLIKELY(page_no != FIL_NULL)) { + dberr_t err; + if (UNIV_LIKELY(index->page != page_no)) { + ut_a(btr_root_block_get(index, RW_SX_LATCH, + m_mtr, &err)); + } m_pcur->btr_cur.page_cur.block = btr_block_get( *index, page_no, RW_X_LATCH, false, m_mtr); /* The page should not be evicted or corrupted while @@ -6391,7 +5781,7 @@ struct btr_blob_log_check_t { ut_ad(m_pcur->rel_pos == BTR_PCUR_ON); mtr_sx_lock_index(index, m_mtr); ut_a(m_pcur->restore_position( - BTR_MODIFY_LEAF_ALREADY_LATCHED, + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED, m_mtr) == btr_pcur_t::SAME_ALL); } @@ -6560,6 +5950,10 @@ btr_store_big_rec_extern_fields( page_zip = buf_block_get_page_zip(rec_block); } + ut_ad(btr_mtr->get_already_latched( + page_id_t{index->table->space_id, index->page}, + MTR_MEMO_PAGE_SX_FIX)); + mtr.start(); index->set_modified(mtr); mtr.set_log_mode_sub(*btr_mtr); diff --git a/storage/innobase/btr/btr0defragment.cc b/storage/innobase/btr/btr0defragment.cc index 76b173359da..642db0e9f1c 100644 --- a/storage/innobase/btr/btr0defragment.cc +++ b/storage/innobase/btr/btr0defragment.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2022, MariaDB Corporation. +Copyright (C) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -280,6 +280,70 @@ btr_defragment_calc_n_recs_for_size( return n_recs; } +MY_ATTRIBUTE((nonnull(2,3,4), warn_unused_result)) +/************************************************************//** +Returns the upper level node pointer to a page. It is assumed that mtr holds +an sx-latch on the tree. +@return rec_get_offsets() of the node pointer record */ +static +rec_offs* +btr_page_search_father_node_ptr( + rec_offs* offsets,/*!< in: work area for the return value */ + mem_heap_t* heap, /*!< in: memory heap to use */ + btr_cur_t* cursor, /*!< in: cursor pointing to user record, + out: cursor on node pointer record, + its page x-latched */ + mtr_t* mtr) /*!< in: mtr */ +{ + const uint32_t page_no = btr_cur_get_block(cursor)->page.id().page_no(); + dict_index_t* index = btr_cur_get_index(cursor); + ut_ad(!index->is_spatial()); + + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + ut_ad(dict_index_get_page(index) != page_no); + + const auto level = btr_page_get_level(btr_cur_get_page(cursor)); + + const rec_t* user_rec = btr_cur_get_rec(cursor); + ut_a(page_rec_is_user_rec(user_rec)); + + if (btr_cur_search_to_nth_level(level + 1, + dict_index_build_node_ptr(index, + user_rec, 0, + heap, level), + RW_X_LATCH, + cursor, mtr) != DB_SUCCESS) { + return nullptr; + } + + const rec_t* node_ptr = btr_cur_get_rec(cursor); + ut_ad(!btr_cur_get_block(cursor)->page.lock.not_recursive() + || mtr->memo_contains(index->lock, MTR_MEMO_X_LOCK)); + + offsets = rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) { + offsets = nullptr; + } + + return(offsets); +} + +static bool btr_page_search_father(mtr_t *mtr, btr_cur_t *cursor) +{ + rec_t *rec= + page_rec_get_next(page_get_infimum_rec(cursor->block()->page.frame)); + if (UNIV_UNLIKELY(!rec)) + return false; + cursor->page_cur.rec= rec; + mem_heap_t *heap= mem_heap_create(100); + const bool got= btr_page_search_father_node_ptr(nullptr, heap, cursor, mtr); + mem_heap_free(heap); + return got; +} + /*************************************************************//** Merge as many records from the from_block to the to_block. Delete the from_block if all records are successfully merged to to_block. @@ -408,7 +472,7 @@ btr_defragment_merge_pages( parent.page_cur.index = index; parent.page_cur.block = from_block; - if (!btr_page_get_father(mtr, &parent)) { + if (!btr_page_search_father(mtr, &parent)) { to_block = nullptr; } else if (n_recs_to_move == n_recs) { /* The whole page is merged with the previous page, @@ -699,10 +763,9 @@ processed: acquire index->lock X-latch. This entitles us to acquire page latches in any order for the index. */ mtr_x_lock_index(index, &mtr); - /* This will acquire index->lock U latch, which is allowed - when we are already holding the X-latch. */ if (buf_block_t *last_block = - item->pcur->restore_position(BTR_MODIFY_TREE, &mtr) + item->pcur->restore_position( + BTR_PURGE_TREE_ALREADY_LATCHED, &mtr) == btr_pcur_t::CORRUPTED ? nullptr : btr_defragment_n_pages(btr_pcur_get_block(item->pcur), diff --git a/storage/innobase/btr/btr0pcur.cc b/storage/innobase/btr/btr0pcur.cc index d731bcbb893..d48437e4bd0 100644 --- a/storage/innobase/btr/btr0pcur.cc +++ b/storage/innobase/btr/btr0pcur.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,24 +212,98 @@ btr_pcur_copy_stored_position( pcur_receive->old_n_fields = pcur_donate->old_n_fields; } +/** Optimistically latches the leaf page or pages requested. +@param[in] block guessed buffer block +@param[in,out] pcur cursor +@param[in,out] latch_mode BTR_SEARCH_LEAF, ... +@param[in,out] mtr mini-transaction +@return true if success */ +TRANSACTIONAL_TARGET +static bool btr_pcur_optimistic_latch_leaves(buf_block_t *block, + btr_pcur_t *pcur, + btr_latch_mode *latch_mode, + mtr_t *mtr) +{ + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); + ut_ad(block->page.frame); + + static_assert(BTR_SEARCH_PREV & BTR_SEARCH_LEAF, ""); + static_assert(BTR_MODIFY_PREV & BTR_MODIFY_LEAF, ""); + static_assert((BTR_SEARCH_PREV ^ BTR_MODIFY_PREV) == + (RW_S_LATCH ^ RW_X_LATCH), ""); + + const rw_lock_type_t mode= + rw_lock_type_t(*latch_mode & (RW_X_LATCH | RW_S_LATCH)); + + switch (*latch_mode) { + default: + ut_ad(*latch_mode == BTR_SEARCH_LEAF || *latch_mode == BTR_MODIFY_LEAF); + return buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr); + case BTR_SEARCH_PREV: + case BTR_MODIFY_PREV: + page_id_t id{0}; + uint32_t left_page_no; + ulint zip_size; + buf_block_t *left_block= nullptr; + { + transactional_shared_lock_guard g{block->page.lock}; + if (block->modify_clock != pcur->modify_clock) + return false; + id= block->page.id(); + zip_size= block->zip_size(); + left_page_no= btr_page_get_prev(block->page.frame); + } + + if (left_page_no != FIL_NULL) + { + left_block= + buf_page_get_gen(page_id_t(id.space(), left_page_no), zip_size, + mode, nullptr, BUF_GET_POSSIBLY_FREED, mtr); + + if (left_block && + btr_page_get_next(left_block->page.frame) != id.page_no()) + { +release_left_block: + mtr->release_last_page(); + return false; + } + } + + if (buf_page_optimistic_get(mode, block, pcur->modify_clock, mtr)) + { + if (btr_page_get_prev(block->page.frame) == left_page_no) + { + /* block was already buffer-fixed while entering the function and + buf_page_optimistic_get() buffer-fixes it again. */ + ut_ad(2 <= block->page.buf_fix_count()); + *latch_mode= btr_latch_mode(mode); + return true; + } + + mtr->release_last_page(); + } + + ut_ad(block->page.buf_fix_count()); + if (left_block) + goto release_left_block; + return false; + } +} + /** Structure acts as functor to do the latching of leaf pages. It returns true if latching of leaf pages succeeded and false otherwise. */ struct optimistic_latch_leaves { btr_pcur_t *const cursor; - btr_latch_mode *latch_mode; + btr_latch_mode *const latch_mode; mtr_t *const mtr; - optimistic_latch_leaves(btr_pcur_t *cursor, btr_latch_mode *latch_mode, - mtr_t *mtr) - : cursor(cursor), latch_mode(latch_mode), mtr(mtr) {} - - bool operator() (buf_block_t *hint) const + bool operator()(buf_block_t *hint) const { - return hint && btr_cur_optimistic_latch_leaves( - hint, cursor->modify_clock, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return hint && + btr_pcur_optimistic_latch_leaves(hint, cursor, latch_mode, mtr); } }; @@ -246,8 +320,8 @@ record GREATER than the user record which was the predecessor of the supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. -@param restore_latch_mode BTR_SEARCH_LEAF, ... -@param mtr mtr +@param latch_mode BTR_SEARCH_LEAF, ... +@param mtr mini-transaction @return btr_pcur_t::SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, btr_pcur_t::SAME_UNIQ cursor position is on user rec and points on the @@ -301,10 +375,9 @@ btr_pcur_t::restore_position(btr_latch_mode restore_latch_mode, mtr_t *mtr) case BTR_SEARCH_PREV: case BTR_MODIFY_PREV: /* Try optimistic restoration. */ - if (block_when_stored.run_with_hint( - optimistic_latch_leaves(this, &restore_latch_mode, - mtr))) { + optimistic_latch_leaves{this, &restore_latch_mode, + mtr})) { pos_state = BTR_PCUR_IS_POSITIONED; latch_mode = restore_latch_mode; @@ -465,18 +538,9 @@ btr_pcur_move_to_next_page( return DB_CORRUPTION; } - ulint mode = cursor->latch_mode; - switch (mode) { - case BTR_SEARCH_TREE: - mode = BTR_SEARCH_LEAF; - break; - case BTR_MODIFY_TREE: - mode = BTR_MODIFY_LEAF; - } - dberr_t err; buf_block_t* next_block = btr_block_get( - *cursor->index(), next_page_no, mode, + *cursor->index(), next_page_no, cursor->latch_mode & ~12, page_is_leaf(page), mtr, &err); if (UNIV_UNLIKELY(!next_block)) { @@ -538,26 +602,42 @@ btr_pcur_move_backward_from_page( return true; } - buf_block_t* release_block = nullptr; + buf_block_t* block = btr_pcur_get_block(cursor); - if (!page_has_prev(btr_pcur_get_page(cursor))) { - } else if (btr_pcur_is_before_first_on_page(cursor)) { - release_block = btr_pcur_get_block(cursor); - page_cur_set_after_last(cursor->btr_cur.left_block, - btr_pcur_get_page_cur(cursor)); - } else { - /* The repositioned cursor did not end on an infimum - record on a page. Cursor repositioning acquired a latch - also on the previous page, but we do not need the latch: - release it. */ - release_block = cursor->btr_cur.left_block; + if (page_has_prev(block->page.frame)) { + buf_block_t* left_block + = mtr->at_savepoint(mtr->get_savepoint() - 1); + const page_t* const left = left_block->page.frame; + if (memcmp_aligned<4>(left + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)) { + /* This should be the right sibling page, or + if there is none, the current block. */ + ut_ad(left_block == block + || !memcmp_aligned<4>(left + FIL_PAGE_PREV, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + /* The previous one must be the left sibling. */ + left_block + = mtr->at_savepoint(mtr->get_savepoint() - 2); + ut_ad(!memcmp_aligned<4>(left_block->page.frame + + FIL_PAGE_NEXT, + block->page.frame + + FIL_PAGE_OFFSET, 4)); + } + if (btr_pcur_is_before_first_on_page(cursor)) { + page_cur_set_after_last(left_block, + &cursor->btr_cur.page_cur); + /* Release the right sibling. */ + } else { + /* Release the left sibling. */ + block = left_block; + } + mtr->release(*block); } cursor->latch_mode = latch_mode; cursor->old_rec = nullptr; - if (release_block) { - mtr->release(*release_block); - } return false; } diff --git a/storage/innobase/btr/btr0sea.cc b/storage/innobase/btr/btr0sea.cc index fc890f9233b..a1609248512 100644 --- a/storage/innobase/btr/btr0sea.cc +++ b/storage/innobase/btr/btr0sea.cc @@ -1055,26 +1055,24 @@ btr_search_guess_on_hash( index_id_t index_id; ut_ad(mtr->is_active()); - - if (!btr_search_enabled) { - return false; - } - - ut_ad(!index->is_ibuf()); - ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); - compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); - - /* Not supported for spatial index */ - ut_ad(!dict_index_is_spatial(index)); + ut_ad(index->is_btree() || index->is_ibuf()); /* Note that, for efficiency, the struct info may not be protected by any latch here! */ - if (info->n_hash_potential == 0) { + if (latch_mode > BTR_MODIFY_LEAF + || !info->last_hash_succ || !info->n_hash_potential + || (tuple->info_bits & REC_INFO_MIN_REC_FLAG)) { return false; } + ut_ad(index->is_btree()); + ut_ad(!index->table->is_temporary()); + + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); + compile_time_assert(ulint{BTR_SEARCH_LEAF} == ulint{RW_S_LATCH}); + compile_time_assert(ulint{BTR_MODIFY_LEAF} == ulint{RW_X_LATCH}); + cursor->n_fields = info->n_fields; cursor->n_bytes = info->n_bytes; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 2e320ce18c2..644d8680484 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2700,6 +2700,18 @@ re_evict: && mode != BUF_GET_IF_IN_POOL_OR_WATCH) { } else if (!ibuf_debug || recv_recovery_is_on()) { } else if (fil_space_t* space = fil_space_t::get(page_id.space())) { + for (ulint i = 0; i < mtr->get_savepoint(); i++) { + if (buf_block_t* b = mtr->block_at_savepoint(i)) { + if (b->page.oldest_modification() > 2 + && b->page.lock.have_any()) { + /* We are holding a dirty page latch + that would hang buf_flush_sync(). */ + space->release(); + goto re_evict_fail; + } + } + } + /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ @@ -2741,9 +2753,9 @@ re_evict: /* Failed to evict the page; change it directly */ } +re_evict_fail: #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - ut_ad(state > buf_page_t::FREED); if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) { goto ignore_block; } @@ -2799,8 +2811,7 @@ ibuf_merge_corrupted: } if (rw_latch == RW_X_LATCH) { - mtr->memo_push(block, MTR_MEMO_PAGE_X_FIX); - goto got_latch; + goto get_latch_valid; } else { block->page.lock.x_unlock(); goto get_latch; @@ -2808,12 +2819,10 @@ ibuf_merge_corrupted: } else { get_latch: switch (rw_latch) { - mtr_memo_type_t fix_type; case RW_NO_LATCH: mtr->memo_push(block, MTR_MEMO_BUF_FIX); return block; case RW_S_LATCH: - fix_type = MTR_MEMO_PAGE_S_FIX; block->page.lock.s_lock(); ut_ad(!block->page.is_read_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2822,13 +2831,12 @@ get_latch: goto page_id_mismatch; } get_latch_valid: - mtr->memo_push(block, fix_type); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); #ifdef BTR_CUR_HASH_ADAPT btr_search_drop_page_hash_index(block, true); #endif /* BTR_CUR_HASH_ADAPT */ break; case RW_SX_LATCH: - fix_type = MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); if (UNIV_UNLIKELY(block->page.id() != page_id)) { @@ -2838,7 +2846,6 @@ get_latch_valid: goto get_latch_valid; default: ut_ad(rw_latch == RW_X_LATCH); - fix_type = MTR_MEMO_PAGE_X_FIX; if (block->page.lock.x_lock_upgraded()) { ut_ad(block->page.id() == page_id); block->unfix(); @@ -2851,7 +2858,6 @@ get_latch_valid: goto get_latch_valid; } -got_latch: ut_ad(page_id_t(page_get_space_id(block->page.frame), page_get_page_no(block->page.frame)) == page_id); @@ -3040,8 +3046,7 @@ bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, ut_ad(!block->page.is_read_fixed()); block->page.set_accessed(); buf_page_make_young_if_needed(&block->page); - mtr->memo_push(block, rw_latch == RW_S_LATCH - ? MTR_MEMO_PAGE_S_FIX : MTR_MEMO_PAGE_X_FIX); + mtr->memo_push(block, mtr_memo_type_t(rw_latch)); } ut_d(if (!(++buf_dbg_counter % 5771)) buf_pool.validate()); diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index daf3bc9a664..614048b7ba0 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -722,7 +722,7 @@ dict_build_field_def_step( } /***************************************************************//** -Creates an index tree for the index if it is not a member of a cluster. +Creates an index tree for the index. @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t @@ -755,9 +755,8 @@ dict_create_index_tree_step( pcur.btr_cur.page_cur.index = UT_LIST_GET_FIRST(dict_sys.sys_indexes->indexes); - dberr_t err = - btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, + &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: @@ -768,10 +767,25 @@ func_exit: btr_pcur_move_to_next_user_rec(&pcur, &mtr); if (UNIV_UNLIKELY(btr_pcur_is_after_last_on_page(&pcur))) { +corrupted: err = DB_CORRUPTION; goto func_exit; } + ulint len; + byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__ID, + &len); + if (UNIV_UNLIKELY(len != 8 || mach_read_from_8(data) != index->id)) { + goto corrupted; + } + + data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), + DICT_FLD__SYS_INDEXES__PAGE_NO, &len); + if (len != 4) { + goto corrupted; + } + if (index->is_readable()) { index->set_modified(mtr); @@ -784,11 +798,6 @@ func_exit: err = DB_OUT_OF_FILE_SPACE; ); } - ulint len; - byte* data = rec_get_nth_field_old(btr_pcur_get_rec(&pcur), - DICT_FLD__SYS_INDEXES__PAGE_NO, - &len); - ut_ad(len == 4); mtr.write<4,mtr_t::MAYBE_NOP>(*btr_pcur_get_block(&pcur), data, node->page_no); goto func_exit; diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 21efb525fa8..53d1031d270 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -2,7 +2,7 @@ Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2022, MariaDB Corporation. +Copyright (c) 2013, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -4143,8 +4143,7 @@ void dict_set_corrupted(dict_index_t *index, const char *ctx) dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_LE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_LE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto fail; } @@ -4219,8 +4218,7 @@ dict_index_set_merge_threshold( dict_index_copy_types(tuple, sys_index, 2); cursor.page_cur.index = sys_index; - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &cursor, &mtr) + if (cursor.search_leaf(tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &mtr) != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 004b00615e8..9910a000b5b 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1321,7 +1321,7 @@ static dberr_t dict_load_columns(dict_table_t *table, unsigned use_uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1452,7 +1452,7 @@ dict_load_virtual_col(dict_table_t *table, bool uncommitted, ulint nth_v_col) dict_index_copy_types(&tuple, sys_virtual_index, 2); pcur.btr_cur.page_cur.index = sys_virtual_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -1688,8 +1688,7 @@ static dberr_t dict_load_fields(dict_index_t *index, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -1947,8 +1946,7 @@ dberr_t dict_load_indexes(dict_table_t *table, bool uncommitted, dict_index_copy_types(&tuple, sys_index, 1); pcur.btr_cur.page_cur.index = sys_index; - dberr_t error = btr_pcur_open_on_user_rec(&tuple, - PAGE_CUR_GE, BTR_SEARCH_LEAF, + dberr_t error = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (error != DB_SUCCESS) { goto func_exit; @@ -2349,7 +2347,7 @@ static dict_table_t *dict_load_table_one(const span &name, bool uncommitted = false; reload: mtr.start(); - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS || !btr_pcur_is_on_user_rec(&pcur)) { @@ -2607,8 +2605,7 @@ dict_load_table_on_id( dict_table_t* table = nullptr; - if (btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr) + if (btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr) == DB_SUCCESS && btr_pcur_is_on_user_rec(&pcur)) { /*---------------------------------------------------*/ @@ -2714,7 +2711,7 @@ static dberr_t dict_load_foreign_cols(dict_foreign_t *foreign, trx_id_t trx_id) pcur.btr_cur.page_cur.index = sys_index; mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto func_exit; @@ -2891,7 +2888,7 @@ dict_load_foreign( mtr.start(); mem_heap_t* heap = nullptr; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { goto err_exit; @@ -3102,7 +3099,7 @@ start_load: dict_index_copy_types(&tuple, sec_index, 1); pcur.btr_cur.page_cur.index = sec_index; - dberr_t err = btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, + dberr_t err = btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) { DBUG_RETURN(err); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 44fcf9f2c18..845f133f1a6 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2009, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1697,7 +1697,7 @@ static dberr_t page_cur_open_level(page_cur_t *page_cur, ulint level, static dberr_t btr_pcur_open_level(btr_pcur_t *pcur, ulint level, mtr_t *mtr, dict_index_t *index) { - pcur->latch_mode= BTR_SEARCH_TREE; + pcur->latch_mode= BTR_SEARCH_LEAF; pcur->search_mode= PAGE_CUR_G; pcur->pos_state= BTR_PCUR_IS_POSITIONED; pcur->btr_cur.page_cur.index= index; diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 89af4e2420f..bad9e1e1bfd 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -1474,7 +1474,7 @@ inline void mtr_t::log_file_op(mfile_type_t type, ulint space_id, ut_ad(strchr(path, '/')); ut_ad(!strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD)); - flag_modified(); + m_modifications= true; if (!is_logged()) return; m_last= nullptr; diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index 12e9a6913ba..e9f3106feb0 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -122,15 +122,22 @@ MY_ATTRIBUTE((nonnull, warn_unused_result)) static buf_block_t *fsp_get_header(const fil_space_t *space, mtr_t *mtr, dberr_t *err) { - buf_block_t *block= buf_page_get_gen(page_id_t(space->id, 0), - space->zip_size(), RW_SX_LATCH, - nullptr, BUF_GET_POSSIBLY_FREED, - mtr, err); - if (block && space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + - block->page.frame)) + const page_id_t id{space->id, 0}; + buf_block_t *block= mtr->get_already_latched(id, MTR_MEMO_PAGE_SX_FIX); + if (block) + *err= DB_SUCCESS; + else { - *err= DB_CORRUPTION; - block= nullptr; + block= buf_page_get_gen(id, space->zip_size(), RW_SX_LATCH, + nullptr, BUF_GET_POSSIBLY_FREED, + mtr, err); + if (block && + space->id != mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + + block->page.frame)) + { + *err= DB_CORRUPTION; + block= nullptr; + } } return block; } diff --git a/storage/innobase/gis/gis0sea.cc b/storage/innobase/gis/gis0sea.cc index 207d49abeba..8ca8681bce9 100644 --- a/storage/innobase/gis/gis0sea.cc +++ b/storage/innobase/gis/gis0sea.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2016, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -44,7 +44,6 @@ Created 2014/01/16 Jimmy Yang static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* cursor, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr); /*!< in: mtr */ @@ -74,6 +73,70 @@ rtr_adjust_parent_path( } } +/** Latches the leaf page or pages requested. +@param[in] block_savepoint leaf page where the search converged +@param[in] latch_mode BTR_SEARCH_LEAF, ... +@param[in] cursor cursor +@param[in] mtr mini-transaction */ +static void +rtr_latch_leaves( + ulint block_savepoint, + btr_latch_mode latch_mode, + btr_cur_t* cursor, + mtr_t* mtr) +{ + compile_time_assert(int(MTR_MEMO_PAGE_S_FIX) == int(RW_S_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_X_FIX) == int(RW_X_LATCH)); + compile_time_assert(int(MTR_MEMO_PAGE_SX_FIX) == int(RW_SX_LATCH)); + + buf_block_t* block = mtr->at_savepoint(block_savepoint); + + ut_ad(block->page.id().space() == cursor->index()->table->space->id); + ut_ad(block->page.in_file()); + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_S_LOCK + | MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + + switch (latch_mode) { + uint32_t left_page_no; + uint32_t right_page_no; + default: + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + break; + case BTR_MODIFY_TREE: + /* It is exclusive for other operations which calls + btr_page_set_prev() */ + ut_ad(mtr->memo_contains_flagged(&cursor->index()->lock, + MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); + /* x-latch also siblings from left to right */ + left_page_no = btr_page_get_prev(block->page.frame); + + if (left_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), left_page_no, RW_X_LATCH, + true, mtr); + } + + mtr->upgrade_buffer_fix(block_savepoint, RW_X_LATCH); + + right_page_no = btr_page_get_next(block->page.frame); + + if (right_page_no != FIL_NULL) { + btr_block_get(*cursor->index(), right_page_no, + RW_X_LATCH, true, mtr); + } + break; + case BTR_SEARCH_LEAF: + case BTR_MODIFY_LEAF: + rw_lock_type_t mode = + rw_lock_type_t(latch_mode & (RW_X_LATCH | RW_S_LATCH)); + static_assert(int{RW_S_LATCH} == int{BTR_SEARCH_LEAF}, ""); + static_assert(int{RW_X_LATCH} == int{BTR_MODIFY_LEAF}, ""); + mtr->upgrade_buffer_fix(block_savepoint, mode); + } +} + /*************************************************************//** Find the next matching record. This function is used by search or record locating during index delete/update. @@ -135,6 +198,7 @@ rtr_pcur_getnext_from_path( && (my_latch_mode | 4) == BTR_CONT_MODIFY_TREE; if (!index_locked) { + ut_ad(mtr->is_empty()); mtr_s_lock_index(index, mtr); } else { ut_ad(mtr->memo_contains_flagged(&index->lock, @@ -154,14 +218,12 @@ rtr_pcur_getnext_from_path( node_seq_t path_ssn; const page_t* page; rw_lock_type_t rw_latch; - ulint tree_idx; mysql_mutex_lock(&rtr_info->rtr_path_mutex); next_rec = rtr_info->path->back(); rtr_info->path->pop_back(); level = next_rec.level; path_ssn = next_rec.seq_no; - tree_idx = btr_cur->tree_height - level - 1; /* Maintain the parent path info as well, if needed */ if (need_parent && !skip_parent && !new_split) { @@ -223,37 +285,15 @@ rtr_pcur_getnext_from_path( rw_latch = RW_X_LATCH; } - /* Release previous locked blocks */ - if (my_latch_mode != BTR_SEARCH_LEAF) { - for (ulint idx = 0; idx < btr_cur->tree_height; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } - for (ulint idx = RTR_MAX_LEVELS; idx < RTR_MAX_LEVELS + 3; - idx++) { - if (rtr_info->tree_blocks[idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[idx], - rtr_info->tree_blocks[idx]); - rtr_info->tree_blocks[idx] = NULL; - } - } + if (my_latch_mode == BTR_MODIFY_LEAF) { + mtr->rollback_to_savepoint(1); } - /* set up savepoint to record any locks to be taken */ - rtr_info->tree_savepoints[tree_idx] = mtr_set_savepoint(mtr); - ut_ad((my_latch_mode | 4) == BTR_CONT_MODIFY_TREE || !page_is_leaf(btr_cur_get_page(btr_cur)) || !btr_cur->page_cur.block->page.lock.have_any()); + const auto block_savepoint = mtr->get_savepoint(); block = buf_page_get_gen( page_id_t(index->table->space_id, next_rec.page_no), zip_size, @@ -264,8 +304,6 @@ rtr_pcur_getnext_from_path( break; } - rtr_info->tree_blocks[tree_idx] = block; - page = buf_block_get_frame(block); page_ssn = page_get_ssn_id(page); @@ -396,24 +434,23 @@ rtr_pcur_getnext_from_path( if (found) { if (level == target_level) { - page_cur_t* r_cur;; + ut_ad(block + == mtr->at_savepoint(block_savepoint)); if (my_latch_mode == BTR_MODIFY_TREE && level == 0) { ut_ad(rw_latch == RW_NO_LATCH); - btr_cur_latch_leaves( - block, + rtr_latch_leaves( + block_savepoint, BTR_MODIFY_TREE, btr_cur, mtr); } - r_cur = btr_cur_get_page_cur(btr_cur); - page_cur_position( page_cur_get_rec(page_cursor), page_cur_get_block(page_cursor), - r_cur); + btr_cur_get_page_cur(btr_cur)); btr_cur->low_match = level != 0 ? DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1 @@ -425,13 +462,7 @@ rtr_pcur_getnext_from_path( last node just located */ skip_parent = true; } else { - /* Release latch on the current page */ - ut_ad(rtr_info->tree_blocks[tree_idx]); - - mtr_release_block_at_savepoint( - mtr, rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; + mtr->release_last_page(); } } while (!rtr_info->path->empty()); @@ -509,50 +540,524 @@ static void rtr_compare_cursor_rec(const rec_t *rec, dict_index_t *index, } #endif +TRANSACTIONAL_TARGET +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) +{ + page_cur_mode_t page_mode; + page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; + + bool mbr_adj= false; + bool found= false; + dict_index_t *const index= cur->index(); + + mem_heap_t *heap= nullptr; + rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; + rec_offs *offsets= offsets_; + rec_offs_init(offsets_); + ut_ad(level == 0 || mode == PAGE_CUR_LE || RTREE_SEARCH_MODE(mode)); + ut_ad(dict_index_check_search_tuple(index, tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(index->is_spatial()); + ut_ad(index->page != FIL_NULL); + + MEM_UNDEFINED(&cur->up_match, sizeof cur->up_match); + MEM_UNDEFINED(&cur->up_bytes, sizeof cur->up_bytes); + MEM_UNDEFINED(&cur->low_match, sizeof cur->low_match); + MEM_UNDEFINED(&cur->low_bytes, sizeof cur->low_bytes); + ut_d(cur->up_match= ULINT_UNDEFINED); + ut_d(cur->low_match= ULINT_UNDEFINED); + + const bool latch_by_caller= latch_mode & BTR_ALREADY_S_LATCHED; + + ut_ad(!latch_by_caller + || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_S_LOCK + | MTR_MEMO_SX_LOCK)); + latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + + ut_ad(!latch_by_caller || latch_mode == BTR_SEARCH_LEAF || + latch_mode == BTR_MODIFY_LEAF); + + cur->flag= BTR_CUR_BINARY; + +#ifndef BTR_CUR_ADAPT + buf_block_t *guess= nullptr; +#else + btr_search_t *const info= btr_search_get_info(index); + buf_block_t *guess= info->root_guess; +#endif + + /* Store the position of the tree latch we push to mtr so that we + know how to release it when we have latched leaf node(s) */ + + const ulint savepoint= mtr->get_savepoint(); + + rw_lock_type_t upper_rw_latch, root_leaf_rw_latch= RW_NO_LATCH; + + switch (latch_mode) { + case BTR_MODIFY_TREE: + mtr_x_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_X_LATCH; + break; + case BTR_CONT_MODIFY_TREE: + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + upper_rw_latch= RW_X_LATCH; + break; + default: + ut_ad(latch_mode != BTR_MODIFY_PREV); + ut_ad(latch_mode != BTR_SEARCH_PREV); + if (!latch_by_caller) + mtr_s_lock_index(index, mtr); + upper_rw_latch= root_leaf_rw_latch= RW_S_LATCH; + if (latch_mode == BTR_MODIFY_LEAF) + root_leaf_rw_latch= RW_X_LATCH; + } + + auto root_savepoint= mtr->get_savepoint(); + const ulint zip_size= index->table->space->zip_size(); + + /* Start with the root page. */ + page_id_t page_id(index->table->space_id, index->page); + + ulint up_match= 0, up_bytes= 0, low_match= 0, low_bytes= 0; + ulint height= ULINT_UNDEFINED; + + /* We use these modified search modes on non-leaf levels of the + B-tree. These let us end up in the right B-tree leaf. In that leaf + we use the original search mode. */ + + switch (mode) { + case PAGE_CUR_GE: + page_mode= PAGE_CUR_L; + break; + case PAGE_CUR_G: + page_mode= PAGE_CUR_LE; + break; + default: +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode) + || mode == PAGE_CUR_LE_OR_EXTENDS); +#else /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || RTREE_SEARCH_MODE(mode)); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + page_mode= mode; + break; + } + + search_loop: + auto buf_mode= BUF_GET; + ulint rw_latch= RW_NO_LATCH; + + if (height) + { + /* We are about to fetch the root or a non-leaf page. */ + if (latch_mode != BTR_MODIFY_TREE || height == level) + /* If doesn't have SX or X latch of index, + each page should be latched before reading. */ + rw_latch= upper_rw_latch; + } + else if (latch_mode <= BTR_MODIFY_LEAF) + rw_latch= latch_mode; + + dberr_t err; + auto block_savepoint= mtr->get_savepoint(); + buf_block_t *block= buf_page_get_gen(page_id, zip_size, rw_latch, guess, + buf_mode, mtr, &err, false); + if (!block) + { + if (err == DB_DECRYPTION_FAILED) + btr_decryption_failed(*index); + func_exit: + if (UNIV_LIKELY_NULL(heap)) + mem_heap_free(heap); + + if (mbr_adj) + /* remember that we will need to adjust parent MBR */ + cur->rtr_info->mbr_adj= true; + + return err; + } + + const page_t *page= buf_block_get_frame(block); +#ifdef UNIV_ZIP_DEBUG + if (rw_latch != RW_NO_LATCH) { + const page_zip_des_t *page_zip= buf_block_get_page_zip(block); + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); + } +#endif /* UNIV_ZIP_DEBUG */ + + ut_ad(fil_page_index_page_check(page)); + ut_ad(index->id == btr_page_get_index_id(page)); + + if (height != ULINT_UNDEFINED); + else if (page_is_leaf(page) && + rw_latch != RW_NO_LATCH && rw_latch != root_leaf_rw_latch) + { + /* The root page is also a leaf page (root_leaf). + We should reacquire the page, because the root page + is latched differently from leaf pages. */ + ut_ad(root_leaf_rw_latch != RW_NO_LATCH); + ut_ad(rw_latch == RW_S_LATCH || rw_latch == RW_SX_LATCH); + + ut_ad(block == mtr->at_savepoint(block_savepoint)); + mtr->rollback_to_savepoint(block_savepoint); + + upper_rw_latch= root_leaf_rw_latch; + goto search_loop; + } + else + { + /* We are in the root node */ + + height= btr_page_get_level(page); + cur->tree_height= height + 1; + + ut_ad(cur->rtr_info); + + /* If SSN in memory is not initialized, fetch it from root page */ + if (!rtr_get_current_ssn_id(index)) + /* FIXME: do this in dict_load_table_one() */ + index->set_ssn(page_get_ssn_id(page) + 1); + + /* Save the MBR */ + cur->rtr_info->thr= cur->thr; + rtr_get_mbr_from_tuple(tuple, &cur->rtr_info->mbr); + +#ifdef BTR_CUR_ADAPT + info->root_guess= block; +#endif + } + + if (height == 0) { + if (rw_latch == RW_NO_LATCH) + { + ut_ad(block == mtr->at_savepoint(block_savepoint)); + rtr_latch_leaves(block_savepoint, latch_mode, cur, mtr); + } + + switch (latch_mode) { + case BTR_MODIFY_TREE: + case BTR_CONT_MODIFY_TREE: + break; + default: + if (!latch_by_caller) + { + /* Release the tree s-latch */ + mtr->rollback_to_savepoint(savepoint, + savepoint + 1); + block_savepoint--; + root_savepoint--; + } + /* release upper blocks */ + if (savepoint < block_savepoint) + mtr->rollback_to_savepoint(savepoint, block_savepoint); + } + + page_mode= mode; + } + + /* Remember the page search mode */ + search_mode= page_mode; + + /* Some adjustment on search mode, when the page search mode is + PAGE_CUR_RTREE_LOCATE or PAGE_CUR_RTREE_INSERT, as we are searching + with MBRs. When it is not the target level, we should search all + sub-trees that "CONTAIN" the search range/MBR. When it is at the + target level, the search becomes PAGE_CUR_LE */ + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + page_mode= (level == height) + ? PAGE_CUR_LE + : PAGE_CUR_RTREE_INSERT; + + ut_ad(!page_is_leaf(page) || page_mode == PAGE_CUR_LE); + } + else if (page_mode == PAGE_CUR_RTREE_LOCATE && level == height) + page_mode= level == 0 ? PAGE_CUR_LE : PAGE_CUR_RTREE_GET_FATHER; + + up_match= 0; + low_match= 0; + + if (latch_mode == BTR_MODIFY_TREE || latch_mode == BTR_CONT_MODIFY_TREE) + /* Tree are locked, no need for Page Lock to protect the "path" */ + cur->rtr_info->need_page_lock= false; + + cur->page_cur.block= block; + + if (page_mode >= PAGE_CUR_CONTAIN) + { + found= rtr_cur_search_with_match(block, index, tuple, page_mode, + &cur->page_cur, cur->rtr_info); + + /* Need to use BTR_MODIFY_TREE to do the MBR adjustment */ + if (search_mode == PAGE_CUR_RTREE_INSERT && cur->rtr_info->mbr_adj) { + static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); + + if (!(latch_mode & 8)) + /* Parent MBR needs updated, should retry with BTR_MODIFY_TREE */ + goto func_exit; + + cur->rtr_info->mbr_adj= false; + mbr_adj= true; + } + + if (found && page_mode == PAGE_CUR_RTREE_GET_FATHER) + cur->low_match= DICT_INDEX_SPATIAL_NODEPTR_SIZE + 1; + } + else + { + /* Search for complete index fields. */ + up_bytes= low_bytes= 0; + if (page_cur_search_with_match(tuple, page_mode, &up_match, + &low_match, &cur->page_cur, nullptr)) { + err= DB_CORRUPTION; + goto func_exit; + } + } + + /* If this is the desired level, leave the loop */ + + ut_ad(height == btr_page_get_level(btr_cur_get_page(cur))); + + /* Add Predicate lock if it is serializable isolation + and only if it is in the search case */ + if (mode >= PAGE_CUR_CONTAIN && mode != PAGE_CUR_RTREE_INSERT && + mode != PAGE_CUR_RTREE_LOCATE && cur->rtr_info->need_prdt_lock) + { + lock_prdt_t prdt; + + { + trx_t* trx= thr_get_trx(cur->thr); + TMLockTrxGuard g{TMLockTrxArgs(*trx)}; + lock_init_prdt_from_mbr(&prdt, &cur->rtr_info->mbr, mode, + trx->lock.lock_heap); + } + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_lock(); + + lock_prdt_lock(block, &prdt, index, LOCK_S, LOCK_PREDICATE, cur->thr); + + if (rw_latch == RW_NO_LATCH && height != 0) + block->page.lock.s_unlock(); + } + + if (level != height) + { + ut_ad(height > 0); + + height--; + guess= nullptr; + + const rec_t *node_ptr= btr_cur_get_rec(cur); + + offsets= rec_get_offsets(node_ptr, index, offsets, 0, + ULINT_UNDEFINED, &heap); + + if (page_rec_is_supremum(node_ptr)) + { + cur->low_match= 0; + cur->up_match= 0; + goto func_exit; + } + + /* If we are doing insertion or record locating, + remember the tree nodes we visited */ + if (page_mode == PAGE_CUR_RTREE_INSERT || + (search_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF)) + { + const bool add_latch= latch_mode == BTR_MODIFY_TREE && + rw_latch == RW_NO_LATCH; + + if (add_latch) + { + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | + MTR_MEMO_SX_LOCK)); + block->page.lock.s_lock(); + } + + /* Store the parent cursor location */ + ut_d(auto num_stored=) + rtr_store_parent_path(block, cur, latch_mode, height + 1, mtr); + + if (page_mode == PAGE_CUR_RTREE_INSERT) + { + btr_pcur_t *r_cursor= rtr_get_parent_cursor(cur, height + 1, true); + /* If it is insertion, there should be only one parent for + each level traverse */ + ut_ad(num_stored == 1); + node_ptr= btr_pcur_get_rec(r_cursor); + } + + if (add_latch) + block->page.lock.s_unlock(); + + ut_ad(!page_rec_is_supremum(node_ptr)); + } + + ut_ad(page_mode == search_mode || + (page_mode == PAGE_CUR_WITHIN && + search_mode == PAGE_CUR_RTREE_LOCATE)); + page_mode= search_mode; + + if (height == level && latch_mode == BTR_MODIFY_TREE) + { + ut_ad(upper_rw_latch == RW_X_LATCH); + for (auto i= root_savepoint, n= mtr->get_savepoint(); i < n; i++) + mtr->upgrade_buffer_fix(i, RW_X_LATCH); + } + + /* Go to the child node */ + page_id.set_page_no(btr_node_ptr_get_child_page_no(node_ptr, offsets)); + + if (page_mode >= PAGE_CUR_CONTAIN && page_mode != PAGE_CUR_RTREE_INSERT) + { + rtr_node_path_t *path= cur->rtr_info->path; + + if (found && !path->empty()) + { + ut_ad(path->back().page_no == page_id.page_no()); + path->pop_back(); +#ifdef UNIV_DEBUG + if (page_mode == PAGE_CUR_RTREE_LOCATE && + latch_mode != BTR_MODIFY_LEAF) + { + btr_pcur_t* pcur= cur->rtr_info->parent_path->back().cursor; + rec_t *my_node_ptr= btr_pcur_get_rec(pcur); + + offsets= rec_get_offsets(my_node_ptr, index, offsets, + 0, ULINT_UNDEFINED, &heap); + + ut_ad(page_id.page_no() == + btr_node_ptr_get_child_page_no(my_node_ptr, offsets)); + } +#endif + } + } + + goto search_loop; + } + + if (level) + { + if (upper_rw_latch == RW_NO_LATCH) + { + ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); + btr_block_get(*index, page_id.page_no(), RW_X_LATCH, false, mtr, &err); + } + else + { + ut_ad(mtr->memo_contains_flagged(block, upper_rw_latch)); + ut_ad(!latch_by_caller); + } + + if (page_mode <= PAGE_CUR_LE) + { + cur->low_match= low_match; + cur->up_match= up_match; + } + } + else + { + cur->low_match= low_match; + cur->low_bytes= low_bytes; + cur->up_match= up_match; + cur->up_bytes= up_bytes; + + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_GE); + ut_ad(up_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + ut_ad(low_match != ULINT_UNDEFINED || mode != PAGE_CUR_LE); + } + + goto func_exit; +} + +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, + mtr_t *mtr, page_cur_mode_t mode) +{ + return rtr_search_to_nth_level(0, tuple, mode, latch_mode, cur, mtr); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) +{ +#ifdef UNIV_DEBUG + switch (mode) { + case PAGE_CUR_CONTAIN: + case PAGE_CUR_INTERSECT: + case PAGE_CUR_WITHIN: + case PAGE_CUR_DISJOINT: + case PAGE_CUR_MBR_EQUAL: + break; + default: + ut_ad("invalid mode" == 0); + } +#endif + pcur->latch_mode= BTR_SEARCH_LEAF; + pcur->search_mode= mode; + pcur->pos_state= BTR_PCUR_IS_POSITIONED; + pcur->trx_if_known= nullptr; + return rtr_search_leaf(&pcur->btr_cur, tuple, BTR_SEARCH_LEAF, mtr, mode); +} + /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. Mainly called by row_search_index_entry() */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +closed with btr_pcur_close. */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { static_assert(BTR_MODIFY_TREE == (8 | BTR_MODIFY_LEAF), ""); ut_ad(latch_mode & BTR_MODIFY_LEAF); + ut_ad(!(latch_mode & BTR_ALREADY_S_LATCHED)); + ut_ad(mtr->is_empty()); /* Initialize the cursor */ btr_pcur_init(cursor); cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode = PAGE_CUR_RTREE_LOCATE; - cursor->trx_if_known = NULL; + cursor->search_mode = PAGE_CUR_RTREE_LOCATE; + cursor->trx_if_known = nullptr; + + if (latch_mode & 8) { + mtr_x_lock_index(cursor->index(), mtr); + } else { + latch_mode + = btr_latch_mode(latch_mode | BTR_ALREADY_S_LATCHED); + mtr_sx_lock_index(cursor->index(), mtr); + } /* Search with the tree cursor */ btr_cur_t* btr_cursor = btr_pcur_get_btr_cur(cursor); - btr_cursor->page_cur.index = index; - btr_cursor->rtr_info = rtr_create_rtr_info(false, false, - btr_cursor, index); + btr_cursor->rtr_info + = rtr_create_rtr_info(false, false, + btr_cursor, cursor->index()); - /* Purge will SX lock the tree instead of take Page Locks */ if (btr_cursor->thr) { btr_cursor->rtr_info->need_page_lock = true; btr_cursor->rtr_info->thr = btr_cursor->thr; } - if ((latch_mode & 8) && index->lock.have_u_not_x()) { - index->lock.u_x_upgrade(SRW_LOCK_CALL); - mtr->lock_upgrade(index->lock); - } - - if (btr_cur_search_to_nth_level(0, tuple, PAGE_CUR_RTREE_LOCATE, - latch_mode, - btr_cursor, mtr) != DB_SUCCESS) { + if (rtr_search_leaf(btr_cursor, tuple, latch_mode, mtr) + != DB_SUCCESS) { return true; } @@ -560,7 +1065,8 @@ rtr_pcur_open( const rec_t* rec = btr_pcur_get_rec(cursor); - const bool d= rec_get_deleted_flag(rec, index->table->not_redundant()); + const bool d= rec_get_deleted_flag( + rec, cursor->index()->table->not_redundant()); if (page_rec_is_infimum(rec) || btr_pcur_get_low_match(cursor) != dtuple_get_n_fields(tuple) @@ -571,26 +1077,12 @@ rtr_pcur_open( btr_cursor->rtr_info->fd_del = true; btr_cursor->low_match = 0; } - /* Did not find matched row in first dive. Release - latched block if any before search more pages */ - if (!(latch_mode & 8)) { - ulint tree_idx = btr_cursor->tree_height - 1; - rtr_info_t* rtr_info = btr_cursor->rtr_info; - if (rtr_info->tree_blocks[tree_idx]) { - mtr_release_block_at_savepoint( - mtr, - rtr_info->tree_savepoints[tree_idx], - rtr_info->tree_blocks[tree_idx]); - rtr_info->tree_blocks[tree_idx] = NULL; - } - } + mtr->rollback_to_savepoint(1); if (!rtr_pcur_getnext_from_path(tuple, PAGE_CUR_RTREE_LOCATE, btr_cursor, 0, latch_mode, - latch_mode - & (8 | BTR_ALREADY_S_LATCHED), - mtr)) { + true, mtr)) { return true; } @@ -598,6 +1090,10 @@ rtr_pcur_open( == dtuple_get_n_fields(tuple)); } + if (!(latch_mode & 8)) { + mtr->rollback_to_savepoint(0, 1); + } + return false; } @@ -641,8 +1137,7 @@ static const rec_t* rtr_get_father_node( if (sea_cur && sea_cur->tree_height > level) { ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK | MTR_MEMO_SX_LOCK)); - if (rtr_cur_restore_position(BTR_CONT_MODIFY_TREE, sea_cur, - level, mtr)) { + if (rtr_cur_restore_position(sea_cur, level, mtr)) { btr_pcur_t* r_cursor = rtr_get_parent_cursor( sea_cur, level, false); @@ -668,9 +1163,8 @@ static const rec_t* rtr_get_father_node( btr_cur->rtr_info = rtr_create_rtr_info(false, false, btr_cur, index); - if (btr_cur_search_to_nth_level(level, tuple, - PAGE_CUR_RTREE_LOCATE, - BTR_CONT_MODIFY_TREE, btr_cur, mtr) + if (rtr_search_to_nth_level(level, tuple, PAGE_CUR_RTREE_LOCATE, + BTR_CONT_MODIFY_TREE, btr_cur, mtr) != DB_SUCCESS) { } else if (sea_cur && sea_cur->tree_height == level) { rec = btr_cur_get_rec(btr_cur); @@ -729,9 +1223,8 @@ rtr_page_get_father_node_ptr( page_no = btr_cur_get_block(cursor)->page.id().page_no(); index = btr_cur_get_index(cursor); - ut_ad(srv_read_only_mode - || mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK - | MTR_MEMO_SX_LOCK)); + ut_ad(mtr->memo_contains_flagged(&index->lock, MTR_MEMO_X_LOCK + | MTR_MEMO_SX_LOCK)); ut_ad(dict_index_get_page(index) != page_no); @@ -879,32 +1372,10 @@ rtr_init_rtr_info( if (!reinit) { /* Reset all members. */ - rtr_info->path = NULL; - rtr_info->parent_path = NULL; - rtr_info->matches = NULL; - + memset(rtr_info, 0, sizeof *rtr_info); + static_assert(PAGE_CUR_UNSUPP == 0, "compatibility"); mysql_mutex_init(rtr_path_mutex_key, &rtr_info->rtr_path_mutex, nullptr); - - memset(rtr_info->tree_blocks, 0x0, - sizeof(rtr_info->tree_blocks)); - memset(rtr_info->tree_savepoints, 0x0, - sizeof(rtr_info->tree_savepoints)); - rtr_info->mbr.xmin = 0.0; - rtr_info->mbr.xmax = 0.0; - rtr_info->mbr.ymin = 0.0; - rtr_info->mbr.ymax = 0.0; - rtr_info->thr = NULL; - rtr_info->heap = NULL; - rtr_info->cursor = NULL; - rtr_info->index = NULL; - rtr_info->need_prdt_lock = false; - rtr_info->need_page_lock = false; - rtr_info->allocated = false; - rtr_info->mbr_adj = false; - rtr_info->fd_del = false; - rtr_info->search_tuple = NULL; - rtr_info->search_mode = PAGE_CUR_UNSUPP; } ut_ad(!rtr_info->matches || rtr_info->matches->matched_recs->empty()); @@ -1130,7 +1601,6 @@ struct optimistic_get static bool rtr_cur_restore_position( - ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */ btr_cur_t* btr_cur, /*!< in: detached persistent cursor */ ulint level, /*!< in: index level */ mtr_t* mtr) /*!< in: mtr */ @@ -1158,8 +1628,6 @@ rtr_cur_restore_position( r_cursor->modify_clock = 100; ); - ut_ad(latch_mode == BTR_CONT_MODIFY_TREE); - if (r_cursor->block_when_stored.run_with_hint( optimistic_get(r_cursor, mtr))) { ut_ad(r_cursor->pos_state == BTR_PCUR_IS_POSITIONED); diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f1a381f0487..d7370a240cc 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -1549,8 +1549,7 @@ static void innodb_drop_database(handlerton*, char *path) mtr_t mtr; mtr.start(); pcur.btr_cur.page_cur.index = sys_index; - err= btr_pcur_open_on_user_rec(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); + err= btr_pcur_open_on_user_rec(&tuple, BTR_SEARCH_LEAF, &pcur, &mtr); if (err != DB_SUCCESS) goto err_exit; diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index c6400544f73..f2a2ae7008b 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -6081,7 +6081,8 @@ func_exit: que_thr_t* thr = pars_complete_graph_for_exec( NULL, trx, ctx->heap, NULL); - const bool is_root = block->page.id().page_no() == index->page; + page_id_t id{block->page.id()}; + const bool is_root = id.page_no() == index->page; if (rec_is_metadata(rec, *index)) { ut_ad(page_rec_is_user_rec(rec)); @@ -6098,8 +6099,10 @@ func_exit: } /* Ensure that the root page is in the correct format. */ - buf_block_t* root = btr_root_block_get(index, RW_X_LATCH, - &mtr, &err); + id.set_page_no(index->page); + buf_block_t* root = mtr.get_already_latched( + id, MTR_MEMO_PAGE_SX_FIX); + if (UNIV_UNLIKELY(!root)) { goto func_exit; } diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index 5a97ea4ebe0..69ee0058d0b 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2297,7 +2297,7 @@ loop: btr_pcur_t pcur; pcur.btr_cur.page_cur.index= ibuf.index; ibuf_mtr_start(&mtr); - if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, 0, &mtr)) + if (btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, &pcur, &mtr)) goto func_exit; if (!btr_pcur_is_on_user_rec(&pcur)) { @@ -2493,8 +2493,8 @@ ibuf_merge_space( /* Position the cursor on the first matching record. */ pcur.btr_cur.page_cur.index = ibuf.index; - dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(&tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, + &pcur, &mtr); ut_ad(err != DB_SUCCESS || page_validate(btr_pcur_get_page(&pcur), ibuf.index)); @@ -3238,7 +3238,7 @@ ibuf_insert_low( ibuf_mtr_start(&mtr); pcur.btr_cur.page_cur.index = ibuf.index; - err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, 0, &mtr); + err = btr_pcur_open(ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { func_exit: ibuf_mtr_commit(&mtr); @@ -3955,8 +3955,6 @@ ibuf_restore_pos( position is to be restored */ mtr_t* mtr) /*!< in/out: mini-transaction */ { - ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_PURGE_TREE); - if (UNIV_LIKELY(pcur->restore_position(mode, mtr) == btr_pcur_t::SAME_ALL)) { return true; @@ -4037,12 +4035,11 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ibuf_mtr_start(mtr); mysql_mutex_lock(&ibuf_mutex); + mtr_x_lock_index(ibuf.index, mtr); - if (!ibuf_restore_pos(page_id, search_tuple, BTR_PURGE_TREE, - pcur, mtr)) { - + if (!ibuf_restore_pos(page_id, search_tuple, + BTR_PURGE_TREE_ALREADY_LATCHED, pcur, mtr)) { mysql_mutex_unlock(&ibuf_mutex); - ut_ad(mtr->has_committed()); goto func_exit; } @@ -4053,13 +4050,10 @@ bool ibuf_delete_rec(const page_id_t page_id, btr_pcur_t* pcur, ut_a(err == DB_SUCCESS); ibuf_size_update(ibuf_root->page.frame); - mysql_mutex_unlock(&ibuf_mutex); - ibuf.empty = page_is_empty(ibuf_root->page.frame); - } else { - mysql_mutex_unlock(&ibuf_mutex); } + mysql_mutex_unlock(&ibuf_mutex); ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); func_exit: @@ -4237,7 +4231,7 @@ loop: /* Position pcur in the insert buffer at the first entry for this index page */ - if (btr_pcur_open_on_user_rec(search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { err = DB_CORRUPTION; @@ -4454,7 +4448,7 @@ loop: /* Position pcur in the insert buffer at the first entry for the space */ - if (btr_pcur_open_on_user_rec(&search_tuple, PAGE_CUR_GE, + if (btr_pcur_open_on_user_rec(&search_tuple, BTR_MODIFY_LEAF, &pcur, &mtr) != DB_SUCCESS) { goto leave_loop; diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index a2aa46b62da..a1cc10b05db 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2022, MariaDB Corporation. +Copyright (c) 2014, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -211,13 +211,12 @@ btr_write_autoinc(dict_index_t* index, ib_uint64_t autoinc, bool reset = false) @param[in,out] mtr mini-transaction */ void btr_set_instant(buf_block_t* root, const dict_index_t& index, mtr_t* mtr); -ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)) +ATTRIBUTE_COLD __attribute__((nonnull)) /** Reset the table to the canonical format on ROLLBACK of instant ALTER TABLE. @param[in] index clustered index with instant ALTER TABLE @param[in] all whether to reset FIL_PAGE_TYPE as well -@param[in,out] mtr mini-transaction -@return error code */ -dberr_t btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); +@param[in,out] mtr mini-transaction */ +void btr_reset_instant(const dict_index_t &index, bool all, mtr_t *mtr); /*************************************************************//** Makes tree one level higher by splitting the root, and inserts @@ -241,7 +240,7 @@ btr_root_raise_and_insert( ulint n_ext, /*!< in: number of externally stored columns */ mtr_t* mtr, /*!< in: mtr */ dberr_t* err) /*!< out: error code */ - MY_ATTRIBUTE((warn_unused_result)); + MY_ATTRIBUTE((nonnull, warn_unused_result)); /*************************************************************//** Reorganizes an index page. diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index 49bc8a4ff1b..f6abc9f5e52 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -63,12 +63,6 @@ enum { BTR_KEEP_IBUF_BITMAP = 32 }; -/* btr_cur_latch_leaves() returns latched blocks and savepoints. */ -struct btr_latch_leaves_t { - buf_block_t* blocks[3]; - ulint savepoints[3]; -}; - #include "que0types.h" #include "row0types.h" @@ -126,51 +120,28 @@ bool btr_cur_instant_root_init(dict_index_t* index, const page_t* page) ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result)); -/** Optimistically latches the leaf page or pages requested. -@param[in] block guessed buffer block -@param[in] modify_clock modify clock value -@param[in,out] latch_mode BTR_SEARCH_LEAF, ... -@param[in,out] cursor cursor -@param[in] mtr mini-transaction -@return true if success */ -bool -btr_cur_optimistic_latch_leaves( - buf_block_t* block, - ib_uint64_t modify_clock, - btr_latch_mode* latch_mode, - btr_cur_t* cursor, - mtr_t* mtr); - MY_ATTRIBUTE((warn_unused_result)) -/** Searches an index tree and positions a tree cursor on a given level. +/********************************************************************//** +Searches an index tree and positions a tree cursor on a given non-leaf level. NOTE: n_fields_cmp in tuple must be set so that it cannot be compared to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. +Cursor is left at the place where an insert of the +search tuple should be performed in the B-tree. InnoDB does an insert +immediately after the cursor. Thus, the cursor may end up on a user record, +or on a page infimum record. @param level the tree level of search @param tuple data tuple; NOTE: n_fields_cmp in tuple must be set so that it cannot get compared to the node ptr page number field! -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a - unique prefix of a record, mode should be PAGE_CUR_LE, not - PAGE_CUR_GE, as the latter may end up on the previous page of - the record! Inserts should always be made using PAGE_CUR_LE - to search the position! -@param latch_mode BTR_SEARCH_LEAF, ..., ORed with at most one of BTR_INSERT, - BTR_DELETE_MARK, or BTR_DELETE; - cursor->left_block is used to store a pointer to the left - neighbor page +@param latch RW_S_LATCH or RW_X_LATCH @param cursor tree cursor; the cursor page is s- or x-latched, but see also above! @param mtr mini-transaction -@param autoinc PAGE_ROOT_AUTO_INC to be written (0 if none) @return DB_SUCCESS on success or error code otherwise */ dberr_t btr_cur_search_to_nth_level(ulint level, const dtuple_t *tuple, - page_cur_mode_t mode, - btr_latch_mode latch_mode, - btr_cur_t *cursor, mtr_t *mtr, - ib_uint64_t autoinc= 0); + rw_lock_type_t rw_latch, + btr_cur_t *cursor, mtr_t *mtr); /*************************************************************//** Tries to perform an insert to a page in an index tree, next to cursor. @@ -653,20 +624,6 @@ btr_rec_copy_externally_stored_field( ulint* len, mem_heap_t* heap); -/** Latches the leaf page or pages requested. -@param[in] block leaf page where the search converged -@param[in] latch_mode BTR_SEARCH_LEAF, ... -@param[in] cursor cursor -@param[in,out] mtr mini-transaction -@param[out] latch_leaves latched blocks and savepoints */ -void -btr_cur_latch_leaves( - buf_block_t* block, - btr_latch_mode latch_mode, - btr_cur_t* cursor, - mtr_t* mtr, - btr_latch_leaves_t* latch_leaves = nullptr); - /*######################################################################*/ /** In the pessimistic delete, if the page data size drops below this @@ -727,21 +684,16 @@ to know struct size! */ struct btr_cur_t { page_cur_t page_cur; /*!< page cursor */ purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */ - buf_block_t* left_block; /*!< this field is used to store - a pointer to the left neighbor - page, in the cases - BTR_SEARCH_PREV and - BTR_MODIFY_PREV */ /*------------------------------*/ que_thr_t* thr; /*!< this field is only used - when btr_cur_search_to_nth_level + when search_leaf() is called for an index entry insertion: the calling query thread is passed here to be used in the insert buffer */ /*------------------------------*/ /** The following fields are used in - btr_cur_search_to_nth_level to pass information: */ + search_leaf() to pass information: */ /* @{ */ enum btr_cur_method flag; /*!< Search method used */ ulint tree_height; /*!< Tree height if the search is done @@ -750,8 +702,7 @@ struct btr_cur_t { ulint up_match; /*!< If the search mode was PAGE_CUR_LE, the number of matched fields to the the first user record to the right of - the cursor record after - btr_cur_search_to_nth_level; + the cursor record after search_leaf(); for the mode PAGE_CUR_GE, the matched fields to the first user record AT THE CURSOR or to the right of it; @@ -768,8 +719,7 @@ struct btr_cur_t { ulint low_match; /*!< if search mode was PAGE_CUR_LE, the number of matched fields to the first user record AT THE CURSOR or - to the left of it after - btr_cur_search_to_nth_level; + to the left of it after search_leaf(); NOT defined for PAGE_CUR_GE or any other search modes; see also the NOTE in up_match! */ @@ -803,6 +753,24 @@ struct btr_cur_t { dberr_t open_leaf(bool first, dict_index_t *index, btr_latch_mode latch_mode, mtr_t *mtr); + /** Search the leaf page record corresponding to a key. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param latch_mode latch mode + @param mtr mini-transaction + @return error code */ + dberr_t search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + btr_latch_mode latch_mode, mtr_t *mtr); + + /** Search the leaf page record corresponding to a key, exclusively latching + all sibling pages on the way. + @param tuple key to search for, with correct n_fields_cmp + @param mode search mode; PAGE_CUR_LE for unique prefix or for inserting + @param mtr mini-transaction + @return error code */ + dberr_t pessimistic_search_leaf(const dtuple_t *tuple, page_cur_mode_t mode, + mtr_t *mtr); + /** Open the cursor at a random leaf page record. @param offsets temporary memory for rec_get_offsets() @param heap memory heap for rec_get_offsets() @@ -862,14 +830,14 @@ inherited external field. */ #define BTR_EXTERN_INHERITED_FLAG 64U #ifdef BTR_CUR_HASH_ADAPT -/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */ +/** Number of searches down the B-tree in btr_cur_t::search_leaf(). */ extern ib_counter_t btr_cur_n_non_sea; /** Old value of btr_cur_n_non_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by srv_printf_innodb_monitor(). */ extern ulint btr_cur_n_non_sea_old; /** Number of successful adaptive hash index lookups in -btr_cur_search_to_nth_level(). */ +btr_cur_t::search_leaf(). */ extern ib_counter_t btr_cur_n_sea; /** Old value of btr_cur_n_sea. Copied by srv_refresh_innodb_monitor_stats(). Referenced by diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index cd8eacdc212..c66a3bfa329 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,24 +70,6 @@ btr_pcur_init( /*==========*/ btr_pcur_t* pcur); /*!< in: persistent cursor */ -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done @@ -100,8 +82,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr); @@ -356,7 +337,7 @@ struct btr_pcur_t /** the modify clock value of the buffer block when the cursor position was stored */ ib_uint64_t modify_clock= 0; - /** btr_pcur_store_position() and btr_pcur_restore_position() state. */ + /** btr_pcur_store_position() and restore_position() state. */ enum pcur_pos_t pos_state= BTR_PCUR_NOT_POSITIONED; page_cur_mode_t search_mode= PAGE_CUR_UNSUPP; /** the transaction, if we know it; otherwise this field is not defined; @@ -383,8 +364,8 @@ struct btr_pcur_t supremum. (4) cursor was positioned before the first or after the last in an empty tree: restores to before first or after the last in the tree. - @param restore_latch_mode BTR_SEARCH_LEAF, ... - @param mtr mtr + @param latch_mode BTR_SEARCH_LEAF, ... + @param mtr mini-transaction @retval SAME_ALL cursor position on user rec and points on the record with the same field values as in the stored record, @retval SAME_UNIQ cursor position is on user rec and points on the @@ -409,8 +390,7 @@ struct btr_pcur_t pos_state= BTR_PCUR_IS_POSITIONED; old_rec= nullptr; - return btr_cur.open_leaf(first, index, - BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode), mtr); + return btr_cur.open_leaf(first, index, this->latch_mode, mtr); } }; @@ -433,6 +413,24 @@ inline rec_t *btr_pcur_get_rec(const btr_pcur_t *cursor) return cursor->btr_cur.page_cur.rec; } +/**************************************************************//** +Initializes and opens a persistent cursor to an index tree. */ +inline +dberr_t +btr_pcur_open( + const dtuple_t* tuple, /*!< in: tuple on which search done */ + page_cur_mode_t mode, /*!< in: PAGE_CUR_LE, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ + mtr_t* mtr) /*!< in: mtr */ +{ + cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); + cursor->search_mode= mode; + cursor->pos_state= BTR_PCUR_IS_POSITIONED; + cursor->trx_if_known= nullptr; + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); +} + /** Open a cursor on the first user record satisfying the search condition; in case of no match, after the last index record. */ MY_ATTRIBUTE((nonnull, warn_unused_result)) @@ -440,16 +438,15 @@ inline dberr_t btr_pcur_open_on_user_rec( const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ... */ btr_latch_mode latch_mode, /*!< in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mode == PAGE_CUR_GE || mode == PAGE_CUR_G); ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); - if (dberr_t err= btr_pcur_open(tuple, mode, latch_mode, cursor, 0, mtr)) + if (dberr_t err= + btr_pcur_open(tuple, PAGE_CUR_GE, latch_mode, cursor, mtr)) return err; if (!btr_pcur_is_after_last_on_page(cursor) || btr_pcur_is_after_last_in_tree(cursor)) diff --git a/storage/innobase/include/btr0pcur.inl b/storage/innobase/include/btr0pcur.inl index 551f8f20fca..b827d70dc47 100644 --- a/storage/innobase/include/btr0pcur.inl +++ b/storage/innobase/include/btr0pcur.inl @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -299,38 +299,10 @@ btr_pcur_init( pcur->btr_cur.rtr_info = NULL; } -/**************************************************************//** -Initializes and opens a persistent cursor to an index tree. */ -inline -dberr_t -btr_pcur_open( - const dtuple_t* tuple, /*!< in: tuple on which search done */ - page_cur_mode_t mode, /*!< in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ - ib_uint64_t autoinc,/*!< in: PAGE_ROOT_AUTO_INC to be written - (0 if none) */ - mtr_t* mtr) /*!< in: mtr */ -{ - ut_ad(!cursor->index()->is_spatial()); - cursor->latch_mode= BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode); - cursor->search_mode= mode; - cursor->pos_state= BTR_PCUR_IS_POSITIONED; - cursor->trx_if_known= nullptr; - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), - mtr, autoinc); -} - /** Opens an persistent cursor to an index tree without initializing the cursor. @param tuple tuple on which search done -@param mode PAGE_CUR_L, ...; NOTE that if the search is made using a +@param mode search mode; NOTE that if the search is made using a unique prefix of a record, mode should be PAGE_CUR_LE, not PAGE_CUR_GE, as the latter may end up on the previous page of the record! @@ -339,8 +311,7 @@ cursor. @param mtr mini-transaction @return DB_SUCCESS on success or error code otherwise. */ inline -dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, - page_cur_mode_t mode, +dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, page_cur_mode_t mode, btr_latch_mode latch_mode, btr_pcur_t *cursor, mtr_t *mtr) { @@ -348,10 +319,7 @@ dberr_t btr_pcur_open_with_no_init(const dtuple_t *tuple, cursor->search_mode= mode; cursor->pos_state= BTR_PCUR_IS_POSITIONED; cursor->trx_if_known= nullptr; - - /* Search with the tree cursor */ - return btr_cur_search_to_nth_level(0, tuple, mode, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); + return cursor->btr_cur.search_leaf(tuple, mode, latch_mode, mtr); } /**************************************************************//** diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 6118bfbc128..912c022c64f 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -55,25 +55,26 @@ in the index record. */ #define BTR_EXTERN_LOCAL_STORED_MAX_SIZE \ (BTR_EXTERN_FIELD_REF_SIZE * 2) -/** Latching modes for btr_cur_search_to_nth_level(). */ +/** Latching modes for btr_cur_t::search_leaf(). */ enum btr_latch_mode { /** Search a record on a leaf page and S-latch it. */ BTR_SEARCH_LEAF = RW_S_LATCH, /** (Prepare to) modify a record on a leaf page and X-latch it. */ BTR_MODIFY_LEAF = RW_X_LATCH, + /** U-latch root and X-latch a leaf page */ + BTR_MODIFY_ROOT_AND_LEAF = RW_SX_LATCH, /** Obtain no latches. */ BTR_NO_LATCHES = RW_NO_LATCH, - /** Search the previous record. */ + /** Search the previous record. + Used in btr_pcur_move_backward_from_page(). */ BTR_SEARCH_PREV = 4 | BTR_SEARCH_LEAF, - /** Modify the previous record. */ + /** Modify the previous record. + Used in btr_pcur_move_backward_from_page() and ibuf_insert(). */ BTR_MODIFY_PREV = 4 | BTR_MODIFY_LEAF, - /** Start searching the entire B-tree. */ - BTR_SEARCH_TREE = 8 | BTR_SEARCH_LEAF, - /** Start modifying1 the entire B-tree. */ + /** Start modifying the entire B-tree. */ BTR_MODIFY_TREE = 8 | BTR_MODIFY_LEAF, - /** Continue searching the entire B-tree. */ - BTR_CONT_SEARCH_TREE = 4 | BTR_SEARCH_TREE, - /** Continue modifying the entire B-tree. */ + /** Continue modifying the entire R-tree. + Only used by rtr_search_to_nth_level(). */ BTR_CONT_MODIFY_TREE = 4 | BTR_MODIFY_TREE, /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually @@ -98,14 +99,14 @@ enum btr_latch_mode { dict_index_t::lock S-latch is being held. */ BTR_SEARCH_LEAF_ALREADY_S_LATCHED = BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED, - /** Search the entire index tree, assuming that the - dict_index_t::lock S-latch is being held. */ - BTR_SEARCH_TREE_ALREADY_S_LATCHED = BTR_SEARCH_TREE - | BTR_ALREADY_S_LATCHED, /** Search and X-latch a leaf page, assuming that the dict_index_t::lock is being held in non-exclusive mode. */ BTR_MODIFY_LEAF_ALREADY_LATCHED = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED, + /** U-latch root and X-latch a leaf page, assuming that + dict_index_t::lock is being held in U mode. */ + BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED = BTR_MODIFY_ROOT_AND_LEAF + | BTR_ALREADY_S_LATCHED, /** Attempt to delete-mark a secondary index record. */ BTR_DELETE_MARK_LEAF = BTR_MODIFY_LEAF | BTR_DELETE_MARK, @@ -132,6 +133,9 @@ enum btr_latch_mode { /** Attempt to delete a record in the tree. */ BTR_PURGE_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_DELETE, + /** Attempt to delete a record in an x-latched tree. */ + BTR_PURGE_TREE_ALREADY_LATCHED = BTR_PURGE_TREE + | BTR_ALREADY_S_LATCHED, /** Attempt to insert a record into the tree. */ BTR_INSERT_TREE = BTR_MODIFY_TREE | BTR_LATCH_FOR_INSERT, diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 87236415150..d17f7456a15 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -791,7 +791,7 @@ public: { ut_ad(fsp_is_system_temporary(id().space())); ut_ad(in_file()); - ut_ad(!oldest_modification()); + ut_ad((oldest_modification() | 2) == 2); oldest_modification_= 2; } diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index c07ff679a80..f615b856126 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -123,8 +123,6 @@ inline void buf_flush_note_modification(buf_block_t *b, lsn_t start, lsn_t end) if (oldest_modification > 1) ut_ad(oldest_modification <= start); - else if (fsp_is_system_temporary(b->page.id().space())) - b->page.set_temp_modified(); else buf_pool.insert_into_flush_list(b, start); srv_stats.buf_pool_write_requests.inc(); diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h index 777f2432c93..b07261ce042 100644 --- a/storage/innobase/include/gis0rtree.h +++ b/storage/innobase/include/gis0rtree.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -59,6 +59,44 @@ Created 2013/03/27 Jimmy Yang and Allen Lai /* Geometry data header */ #define GEO_DATA_HEADER_SIZE 4 + +/** Search for a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction +@param mode search mode */ +dberr_t rtr_search_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr, + page_cur_mode_t mode= PAGE_CUR_RTREE_LOCATE) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +/** Search for inserting a spatial index leaf page record. +@param cur cursor +@param tuple search tuple +@param latch_mode latching mode +@param mtr mini-transaction */ +inline dberr_t rtr_insert_leaf(btr_cur_t *cur, const dtuple_t *tuple, + btr_latch_mode latch_mode, mtr_t *mtr) +{ + return rtr_search_leaf(cur, tuple, latch_mode, mtr, PAGE_CUR_RTREE_INSERT); +} + +/** Search for a spatial index leaf page record. +@param pcur cursor +@param tuple search tuple +@param mode search mode +@param mtr mini-transaction */ +dberr_t rtr_search_leaf(btr_pcur_t *pcur, const dtuple_t *tuple, + page_cur_mode_t mode, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + +dberr_t rtr_search_to_nth_level(ulint level, const dtuple_t *tuple, + page_cur_mode_t mode, + btr_latch_mode latch_mode, + btr_cur_t *cur, mtr_t *mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); + /**********************************************************************//** Builds a Rtree node pointer out of a physical record and a page number. @return own: node pointer */ @@ -295,11 +333,9 @@ rtr_store_parent_path( /**************************************************************//** Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ -bool -rtr_pcur_open( - dict_index_t* index, /*!< in: index */ +bool rtr_search( const dtuple_t* tuple, /*!< in: tuple on which search done */ - btr_latch_mode latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */ + btr_latch_mode latch_mode,/*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */ mtr_t* mtr) /*!< in: mtr */ MY_ATTRIBUTE((warn_unused_result)); diff --git a/storage/innobase/include/gis0type.h b/storage/innobase/include/gis0type.h index 4fccfdb6c26..d6a4ef67a38 100644 --- a/storage/innobase/include/gis0type.h +++ b/storage/innobase/include/gis0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2020, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -105,12 +105,6 @@ typedef struct rtr_info{ matched_rec_t* matches;/*!< struct holding matching leaf records */ mysql_mutex_t rtr_path_mutex; /*!< mutex protect the "path" vector */ - buf_block_t* tree_blocks[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< tracking pages that would be locked - at leaf level, for future free */ - ulint tree_savepoints[RTR_MAX_LEVELS + RTR_LEAF_LATCH_NUM]; - /*!< savepoint used to release latches/blocks - on each level and leaf level */ rtr_mbr_t mbr; /*!< the search MBR */ que_thr_t* thr; /*!< the search thread */ mem_heap_t* heap; /*!< memory heap */ diff --git a/storage/innobase/include/ibuf0ibuf.inl b/storage/innobase/include/ibuf0ibuf.inl index 9f4e937f31d..1e21f74ff2b 100644 --- a/storage/innobase/include/ibuf0ibuf.inl +++ b/storage/innobase/include/ibuf0ibuf.inl @@ -100,9 +100,8 @@ ibuf_should_try( decide */ { return(innodb_change_buffering + && !(index->type & (DICT_CLUSTERED | DICT_IBUF)) && ibuf.max_size != 0 - && !dict_index_is_clust(index) - && !dict_index_is_spatial(index) && index->table->quiesce == QUIESCE_NONE && (ignore_sec_unique || !dict_index_is_unique(index))); } diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index 093b706c1de..d34a62e7bb2 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2019, 2022, MariaDB Corporation. +Copyright (c) 2019, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -401,7 +401,8 @@ inline byte *mtr_t::log_write(const page_id_t id, const buf_page_t *bpage, ut_ad(have_offset || offset == 0); ut_ad(offset + len <= srv_page_size); static_assert(MIN_4BYTE >= UNIV_PAGE_SIZE_MAX, "consistency"); - + ut_ad(type == FREE_PAGE || type == OPTION || (type == EXTENDED && !bpage) || + memo_contains_flagged(bpage, MTR_MEMO_MODIFY)); size_t max_len; if (!have_len) max_len= 1 + 5 + 5; @@ -511,33 +512,6 @@ inline void mtr_t::memcpy(const buf_block_t &b, void *dest, const void *str, memcpy(b, ut_align_offset(d, srv_page_size), len); } -/** Initialize an entire page. -@param[in,out] b buffer page */ -inline void mtr_t::init(buf_block_t *b) -{ - const page_id_t id{b->page.id()}; - ut_ad(is_named_space(id.space())); - ut_ad(!m_freed_pages == !m_freed_space); - - if (UNIV_LIKELY_NULL(m_freed_space) && - m_freed_space->id == id.space() && - m_freed_pages->remove_if_exists(b->page.id().page_no()) && - m_freed_pages->empty()) - { - delete m_freed_pages; - m_freed_pages= nullptr; - m_freed_space= nullptr; - } - - b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); - - if (!is_logged()) - return; - - m_log.close(log_write(b->page.id(), &b->page)); - m_last_offset= FIL_PAGE_TYPE; -} - /** Write an EXTENDED log record. @param block buffer pool page @param type extended record subtype; @see mrec_ext_t */ diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 41f9b473856..140cd3dc1b6 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -37,15 +37,6 @@ Created 11/26/1995 Heikki Tuuri /** Commit a mini-transaction. */ #define mtr_commit(m) (m)->commit() -/** Set and return a savepoint in mtr. -@return savepoint */ -#define mtr_set_savepoint(m) (m)->get_savepoint() - -/** Release the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -#define mtr_release_s_latch_at_savepoint(m, s, l) \ - (m)->release_s_latch_at_savepoint((s), (l)) - /** Change the logging mode of a mini-transaction. @return old mode */ #define mtr_set_log_mode(m, d) (m)->set_log_mode((d)) @@ -60,13 +51,10 @@ savepoint. */ # define mtr_sx_lock_index(i,m) (m)->u_lock(&(i)->lock) #endif -#define mtr_release_block_at_savepoint(m, s, b) \ - (m)->release_block_at_savepoint((s), (b)) - /** Mini-transaction memo stack slot. */ struct mtr_memo_slot_t { - /** pointer to the object, or nullptr if released */ + /** pointer to the object */ void *object; /** type of the stored object */ mtr_memo_type_t type; @@ -125,83 +113,36 @@ struct mtr_t { return m_memo.size(); } - /** Release the (index tree) s-latch stored in an mtr memo after a savepoint. - @param savepoint value returned by get_savepoint() - @param lock index latch to release */ - void release_s_latch_at_savepoint(ulint savepoint, index_lock *lock) + /** Get the block at a savepoint */ + buf_block_t *at_savepoint(ulint savepoint) const { ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == lock); - ut_ad(slot.type == MTR_MEMO_S_LOCK); - slot.object= nullptr; - lock->s_unlock(); + const mtr_memo_slot_t &slot= m_memo[savepoint]; + ut_ad(slot.type < MTR_MEMO_S_LOCK); + ut_ad(slot.object); + return static_cast(slot.object); } - /** Release the block in an mtr memo after a savepoint. */ - void release_block_at_savepoint(ulint savepoint, buf_block_t *block) + + /** Try to get a block at a savepoint. + @param savepoint the savepoint right before the block was acquired + @return the block at the savepoint + @retval nullptr if no buffer block was registered at that savepoint */ + buf_block_t *block_at_savepoint(ulint savepoint) const { ut_ad(is_active()); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(!(slot.type & MTR_MEMO_MODIFY)); - slot.object= nullptr; - block->page.unfix(); - - switch (slot.type) { - case MTR_MEMO_PAGE_S_FIX: - block->page.lock.s_unlock(); - break; - case MTR_MEMO_PAGE_SX_FIX: - case MTR_MEMO_PAGE_X_FIX: - block->page.lock.u_or_x_unlock(slot.type == MTR_MEMO_PAGE_SX_FIX); - break; - default: - break; - } + const mtr_memo_slot_t &slot= m_memo[savepoint]; + return slot.type < MTR_MEMO_S_LOCK + ? static_cast(slot.object) + : nullptr; } - /** @return if we are about to make a clean buffer block dirty */ - static bool is_block_dirtied(const buf_page_t &b) - { - ut_ad(b.in_file()); - ut_ad(b.frame); - ut_ad(b.buf_fix_count()); - return b.oldest_modification() <= 1 && b.id().space() < SRV_TMP_SPACE_ID; - } - - /** X-latch a not yet latched block after a savepoint. */ - void x_latch_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_X_FIX; - block->page.lock.x_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); - } - - /** U-latch a not yet latched block after a savepoint. */ - void sx_latch_at_savepoint(ulint savepoint, buf_block_t *block) - { - ut_ad(is_active()); - ut_ad(!memo_contains_flagged(block, MTR_MEMO_PAGE_S_FIX | - MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)); - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.object == block); - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - block->page.lock.u_lock(); - ut_ad(!block->page.is_io_fixed()); - - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block->page); - } + /** Retrieve a page that has already been latched. + @param id page identifier + @param type page latch type + @return block + @retval nullptr if the block had not been latched yet */ + buf_block_t *get_already_latched(const page_id_t id, mtr_memo_type_t type) + const; /** @return the logging mode */ mtr_log_t get_log_mode() const @@ -360,23 +301,17 @@ struct mtr_t { void release(const index_lock &lock) { release(&lock); } /** Release a latch to an unmodified page. */ void release(const buf_block_t &block) { release(&block); } - - /** Note that the mini-transaction will modify data. */ - void flag_modified() { m_modifications = true; } private: /** Release an unmodified object. */ void release(const void *object); +public: /** Mark the given latched page as modified. @param block page that will be modified */ - void modify(const buf_block_t& block); -public: - /** Note that the mini-transaction will modify a block. */ - void set_modified(const buf_block_t &block) - { flag_modified(); if (m_log_mode != MTR_LOG_NONE) modify(block); } + void set_modified(const buf_block_t &block); /** Set the state to not-modified. This will not log the changes. This is only used during redo log apply, to avoid logging the changes. */ - void discard_modifications() { m_modifications = false; } + void discard_modifications() { m_modifications= false; } /** Get the LSN of commit(). @return the commit LSN @@ -408,28 +343,17 @@ public: @param rw_latch latch to acquire */ void upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch); - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void u_lock_register(ulint savepoint) + /** Register a change to the page latch state. */ + void lock_register(ulint savepoint, mtr_memo_type_t type) { mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_SX_FIX; - } - - /** Register a page latch on a buffer-fixed block was buffer-fixed. - @param latch latch type */ - void s_lock_register(ulint savepoint) - { - mtr_memo_slot_t &slot= m_memo[savepoint]; - ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.type= MTR_MEMO_PAGE_S_FIX; + ut_ad(slot.type <= MTR_MEMO_BUF_FIX); + ut_ad(type <= MTR_MEMO_BUF_FIX); + slot.type= type; } /** Upgrade U locks on a block to X */ void page_lock_upgrade(const buf_block_t &block); - /** Upgrade U lock to X */ - void lock_upgrade(const index_lock &lock); /** Check if we are holding tablespace latch @param space tablespace to search for @@ -459,27 +383,65 @@ public: @retval nullptr if not found */ buf_block_t *memo_contains_page_flagged(const byte *ptr, ulint flags) const; - /** @return true if mini-transaction contains modifications. */ + /** @return whether this mini-transaction modifies persistent data */ bool has_modifications() const { return m_modifications; } #endif /* UNIV_DEBUG */ - /** Push an object to an mtr memo stack. - @param object object + /** Push a buffer page to an the memo. + @param block buffer block + @param type object type: MTR_MEMO_S_LOCK, ... */ + void memo_push(buf_block_t *block, mtr_memo_type_t type) + __attribute__((nonnull)) + { + ut_ad(is_active()); + ut_ad(type <= MTR_MEMO_PAGE_SX_MODIFY); + ut_ad(block->page.buf_fix_count()); + ut_ad(block->page.in_file()); +#ifdef UNIV_DEBUG + switch (type) { + case MTR_MEMO_PAGE_S_FIX: + ut_ad(block->page.lock.have_s()); + break; + case MTR_MEMO_PAGE_X_FIX: case MTR_MEMO_PAGE_X_MODIFY: + ut_ad(block->page.lock.have_x()); + break; + case MTR_MEMO_PAGE_SX_FIX: case MTR_MEMO_PAGE_SX_MODIFY: + ut_ad(block->page.lock.have_u_or_x()); + break; + case MTR_MEMO_BUF_FIX: + break; + case MTR_MEMO_MODIFY: + case MTR_MEMO_S_LOCK: case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: + case MTR_MEMO_SPACE_X_LOCK: case MTR_MEMO_SPACE_S_LOCK: + ut_ad("invalid type" == 0); + } +#endif + if (!(type & MTR_MEMO_MODIFY)); + else if (block->page.id().space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + type= mtr_memo_type_t(type & ~MTR_MEMO_MODIFY); + } + else + { + m_modifications= true; + if (!m_made_dirty) + /* If we are going to modify a previously clean persistent page, + we must set m_made_dirty, so that commit() will acquire + log_sys.flush_order_mutex and insert the block into + buf_pool.flush_list. */ + m_made_dirty= block->page.oldest_modification() <= 1; + } + m_memo.emplace_back(mtr_memo_slot_t{block, type}); + } + + /** Push an index lock or tablespace latch to the memo. + @param object index lock or tablespace latch @param type object type: MTR_MEMO_S_LOCK, ... */ void memo_push(void *object, mtr_memo_type_t type) __attribute__((nonnull)) { ut_ad(is_active()); - /* If this mtr has U or X latched a clean page then we set - the m_made_dirty flag. This tells us if we need to - grab log_sys.flush_order_mutex at mtr_t::commit() so that we - can insert the dirtied page into the buf_pool.flush_list. - - FIXME: Do this only when the MTR_MEMO_MODIFY flag is set! */ - if (!m_made_dirty && - (type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX))) - m_made_dirty= - is_block_dirtied(*static_cast(object)); - + ut_ad(type >= MTR_MEMO_S_LOCK); m_memo.emplace_back(mtr_memo_slot_t{object, type}); } @@ -770,7 +732,7 @@ private: /** specifies which operations should be logged; default MTR_LOG_ALL */ uint16_t m_log_mode:2; - /** whether at least one buffer pool page was written to */ + /** whether at least one persistent page was written to */ uint16_t m_modifications:1; /** whether at least one previously clean buffer pool page was written to */ diff --git a/storage/innobase/include/small_vector.h b/storage/innobase/include/small_vector.h index 76069cfc168..d28a36184b8 100644 --- a/storage/innobase/include/small_vector.h +++ b/storage/innobase/include/small_vector.h @@ -71,6 +71,7 @@ public: using const_iterator= const T *; using reverse_iterator= std::reverse_iterator; using reference= T &; + using const_reference= const T&; iterator begin() { return static_cast(BeginX); } const_iterator begin() const { return static_cast(BeginX); } @@ -81,6 +82,8 @@ public: reverse_iterator rend() { return reverse_iterator(begin()); } reference operator[](size_t i) { assert(i < size()); return begin()[i]; } + const_reference operator[](size_t i) const + { return const_cast(*this)[i]; } void erase(const_iterator S, const_iterator E) { diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 2c93d7ffe5a..1c6c28d874a 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -40,6 +40,8 @@ Created 11/26/1995 Heikki Tuuri void mtr_memo_slot_t::release() const { + ut_ad(object); + switch (type) { case MTR_MEMO_S_LOCK: static_cast(object)->s_unlock(); @@ -121,11 +123,7 @@ inline void mtr_t::release_resources() void mtr_t::release() { for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) - { - mtr_memo_slot_t &slot= *it; - if (slot.object) - slot.release(); - } + it->release(); m_memo.clear(); } @@ -191,7 +189,7 @@ void mtr_t::commit() for (const mtr_memo_slot_t &slot : m_memo) { - if (slot.object && slot.type & MTR_MEMO_MODIFY) + if (slot.type & MTR_MEMO_MODIFY) { ut_ad(slot.type == MTR_MEMO_PAGE_X_MODIFY || slot.type == MTR_MEMO_PAGE_SX_MODIFY); @@ -226,8 +224,7 @@ void mtr_t::rollback_to_savepoint(ulint begin, ulint end) while (s-- > begin) { const mtr_memo_slot_t &slot= m_memo[s]; - if (!slot.object) - continue; + ut_ad(slot.object); /* This is intended for releasing latches on indexes or unmodified buffer pool pages. */ ut_ad(slot.type <= MTR_MEMO_SX_LOCK); @@ -271,8 +268,7 @@ void mtr_t::commit_shrink(fil_space_t &space) for (mtr_memo_slot_t &slot : m_memo) { - if (!slot.object) - continue; + ut_ad(slot.object); switch (slot.type) { default: ut_ad("invalid type" == 0); @@ -936,15 +932,6 @@ void mtr_t::page_lock_upgrade(const buf_block_t &block) #endif /* BTR_CUR_HASH_ADAPT */ } -void mtr_t::lock_upgrade(const index_lock &lock) -{ - ut_ad(lock.have_x()); - - for (mtr_memo_slot_t &slot : m_memo) - if (slot.object == &lock && slot.type == MTR_MEMO_SX_LOCK) - slot.type= MTR_MEMO_X_LOCK; -} - /** Latch a buffer pool block. @param block block to be latched @param rw_latch RW_S_LATCH, RW_SX_LATCH, RW_X_LATCH, RW_NO_LATCH */ @@ -993,26 +980,29 @@ done: void mtr_t::upgrade_buffer_fix(ulint savepoint, rw_lock_type_t rw_latch) { ut_ad(is_active()); - ut_ad(savepoint < m_memo.size()); - mtr_memo_slot_t &slot= m_memo[savepoint]; ut_ad(slot.type == MTR_MEMO_BUF_FIX); buf_block_t *block= static_cast(slot.object); ut_d(const auto state= block->page.state()); ut_ad(state > buf_page_t::UNFIXED); ut_ad(state > buf_page_t::WRITE_FIX || state < buf_page_t::READ_FIX); + static_assert(int{MTR_MEMO_PAGE_S_FIX} == int{RW_S_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_X_FIX} == int{RW_X_LATCH}, ""); + static_assert(int{MTR_MEMO_PAGE_SX_FIX} == int{RW_SX_LATCH}, ""); + slot.type= mtr_memo_type_t(rw_latch); switch (rw_latch) { default: ut_ad("invalid state" == 0); break; + case RW_S_LATCH: + block->page.lock.s_lock(); + break; case RW_SX_LATCH: - slot.type= MTR_MEMO_PAGE_SX_FIX; block->page.lock.u_lock(); ut_ad(!block->page.is_io_fixed()); break; case RW_X_LATCH: - slot.type= MTR_MEMO_PAGE_X_FIX; block->page.lock.x_lock(); ut_ad(!block->page.is_io_fixed()); } @@ -1116,7 +1106,8 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) for (const mtr_memo_slot_t &slot : m_memo) { - if (!slot.object || !(flags & slot.type)) + ut_ad(slot.object); + if (!(flags & slot.type)) continue; buf_page_t *bpage= static_cast(slot.object); @@ -1137,30 +1128,84 @@ buf_block_t* mtr_t::memo_contains_page_flagged(const byte *ptr, ulint flags) /** Mark the given latched page as modified. @param block page that will be modified */ -void mtr_t::modify(const buf_block_t &block) +void mtr_t::set_modified(const buf_block_t &block) { - mtr_memo_slot_t *found= nullptr; + if (block.page.id().space() >= SRV_TMP_SPACE_ID) + { + const_cast(block).page.set_temp_modified(); + return; + } + + m_modifications= true; + + if (UNIV_UNLIKELY(m_log_mode == MTR_LOG_NONE)) + return; for (mtr_memo_slot_t &slot : m_memo) { if (slot.object == &block && slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX)) { - found= &slot; - break; + if (slot.type & MTR_MEMO_MODIFY) + ut_ad(m_made_dirty || block.page.oldest_modification() > 1); + else + { + slot.type= static_cast(slot.type | MTR_MEMO_MODIFY); + if (!m_made_dirty) + m_made_dirty= block.page.oldest_modification() <= 1; + } + return; } } - if (UNIV_UNLIKELY(!found)) + /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ + ut_ad(m_memo.empty()); + ut_ad(!block.page.in_LRU_list); +} + +void mtr_t::init(buf_block_t *b) +{ + const page_id_t id{b->page.id()}; + ut_ad(is_named_space(id.space())); + ut_ad(!m_freed_pages == !m_freed_space); + ut_ad(memo_contains_flagged(b, MTR_MEMO_PAGE_X_FIX)); + + if (id.space() >= SRV_TMP_SPACE_ID) + b->page.set_temp_modified(); + else { - /* This must be PageConverter::update_page() in IMPORT TABLESPACE. */ - ut_ad(m_memo.empty()); - ut_ad(!block.page.in_LRU_list); - return; + for (mtr_memo_slot_t &slot : m_memo) + { + if (slot.object == b && slot.type & MTR_MEMO_PAGE_X_FIX) + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + m_modifications= true; + if (!m_made_dirty) + m_made_dirty= b->page.oldest_modification() <= 1; + goto found; + } + } + ut_ad("block not X-latched" == 0); } - found->type= static_cast(found->type | MTR_MEMO_MODIFY); - if (!m_made_dirty) - m_made_dirty= is_block_dirtied(block.page); + + found: + if (UNIV_LIKELY_NULL(m_freed_space) && + m_freed_space->id == id.space() && + m_freed_pages->remove_if_exists(id.page_no()) && + m_freed_pages->empty()) + { + delete m_freed_pages; + m_freed_pages= nullptr; + m_freed_space= nullptr; + } + + b->page.set_reinit(b->page.state() & buf_page_t::LRU_MASK); + + if (!is_logged()) + return; + + m_log.close(log_write(id, &b->page)); + m_last_offset= FIL_PAGE_TYPE; } /** Free a page. @@ -1176,20 +1221,23 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) buf_block_t *freed= nullptr; const page_id_t id{space.id, offset}; - for (auto it= m_memo.rbegin(); it != m_memo.rend(); it++) + for (auto it= m_memo.end(); it != m_memo.begin(); ) { + it--; + next: mtr_memo_slot_t &slot= *it; buf_block_t *block= static_cast(slot.object); - if (!block); - else if (block == freed) + ut_ad(block); + if (block == freed) { if (slot.type & (MTR_MEMO_PAGE_SX_FIX | MTR_MEMO_PAGE_X_FIX)) slot.type= MTR_MEMO_PAGE_X_FIX; else { ut_ad(slot.type == MTR_MEMO_BUF_FIX); - slot.object= nullptr; block->page.unfix(); + m_memo.erase(it, it + 1); + goto next; } } else if (slot.type & (MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) && @@ -1203,7 +1251,17 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) ut_d(bool upgraded=) block->page.lock.x_lock_upgraded(); ut_ad(upgraded); } - slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (id.space() >= SRV_TMP_SPACE_ID) + { + block->page.set_temp_modified(); + slot.type= MTR_MEMO_PAGE_X_FIX; + } + else + { + slot.type= MTR_MEMO_PAGE_X_MODIFY; + if (!m_made_dirty) + m_made_dirty= block->page.oldest_modification() <= 1; + } #ifdef BTR_CUR_HASH_ADAPT if (block->index) btr_search_drop_page_hash_index(block, false); @@ -1212,8 +1270,6 @@ void mtr_t::free(const fil_space_t &space, uint32_t offset) } } - if (freed && !m_made_dirty) - m_made_dirty= is_block_dirtied(freed->page); m_log.close(log_write(id, nullptr)); } } diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 45c35bc6995..861095b421e 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1629,6 +1629,9 @@ inline dberr_t IndexPurge::purge_pessimistic_delete() noexcept dberr_t IndexPurge::purge() noexcept { btr_pcur_store_position(&m_pcur, &m_mtr); + m_mtr.commit(); + m_mtr.start(); + m_mtr.set_log_mode(MTR_LOG_NO_REDO); dberr_t err= purge_pessimistic_delete(); m_mtr.start(); diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 5da0e2479f9..a4471104543 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2016, 2022, MariaDB Corporation. +Copyright (c) 2016, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -314,8 +314,10 @@ row_ins_clust_index_entry_by_modify( } if (mode != BTR_MODIFY_TREE) { - ut_ad((mode & ulint(~BTR_ALREADY_S_LATCHED)) - == BTR_MODIFY_LEAF); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); /* Try optimistic updating of the record, keeping changes within the page */ @@ -1621,8 +1623,7 @@ row_ins_check_foreign_constraint( dtuple_set_n_fields_cmp(entry, foreign->n_fields); pcur.btr_cur.page_cur.index = check_index; - err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, 0, - &mtr); + err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); if (UNIV_UNLIKELY(err != DB_SUCCESS)) { goto end_scan; } @@ -2119,7 +2120,7 @@ row_ins_scan_sec_index_for_duplicate( pcur.btr_cur.page_cur.index = index; trx_t* const trx = thr_get_trx(thr); dberr_t err = btr_pcur_open(entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, - &pcur, 0, mtr); + &pcur, mtr); if (err != DB_SUCCESS) { goto end_scan; } @@ -2539,8 +2540,8 @@ row_ins_index_entry_big_rec( index->set_modified(mtr); } - dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + dberr_t error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_MODIFY_TREE, + &pcur, &mtr); if (error != DB_SUCCESS) { return error; } @@ -2634,11 +2635,10 @@ row_ins_clust_index_entry_low( que_thr_t* thr) /*!< in: query thread */ { btr_pcur_t pcur; - btr_cur_t* cursor; dberr_t err = DB_SUCCESS; big_rec_t* big_rec = NULL; mtr_t mtr; - ib_uint64_t auto_inc = 0; + uint64_t auto_inc = 0; mem_heap_t* offsets_heap = NULL; rec_offs offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs* offsets = offsets_; @@ -2654,7 +2654,7 @@ row_ins_clust_index_entry_low( ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index)); ut_ad(!trx->in_rollback); - mtr_start(&mtr); + mtr.start(); if (index->table->is_temporary()) { /* Disable REDO logging as the lifetime of temp-tables is @@ -2694,6 +2694,13 @@ row_ins_clust_index_entry_low( dfield->type.mtype, dfield->type.prtype & DATA_UNSIGNED); + if (auto_inc + && mode != BTR_MODIFY_TREE) { + mode = btr_latch_mode( + BTR_MODIFY_ROOT_AND_LEAF + ^ BTR_MODIFY_LEAF + ^ mode); + } } } } @@ -2703,20 +2710,26 @@ row_ins_clust_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ pcur.btr_cur.page_cur.index = index; - err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, auto_inc, &mtr); + err = btr_pcur_open(entry, PAGE_CUR_LE, mode, &pcur, &mtr); if (err != DB_SUCCESS) { index->table->file_unreadable = true; -commit_exit: +err_exit: mtr.commit(); goto func_exit; } - cursor = btr_pcur_get_btr_cur(&pcur); - cursor->thr = thr; + if (auto_inc) { + buf_block_t* root + = mtr.at_savepoint(mode != BTR_MODIFY_ROOT_AND_LEAF); + ut_ad(index->page == root->page.id().page_no()); + page_set_autoinc(root, auto_inc, &mtr, false); + } + + btr_pcur_get_btr_cur(&pcur)->thr = thr; #ifdef UNIV_DEBUG { - page_t* page = btr_cur_get_page(cursor); + page_t* page = btr_pcur_get_page(&pcur); rec_t* first_rec = page_rec_get_next( page_get_infimum_rec(page)); @@ -2725,7 +2738,7 @@ commit_exit: } #endif /* UNIV_DEBUG */ - block = btr_cur_get_block(cursor); + block = btr_pcur_get_block(&pcur); DBUG_EXECUTE_IF("row_ins_row_level", goto skip_bulk_insert;); @@ -2747,7 +2760,7 @@ commit_exit: if (err != DB_SUCCESS) { trx->error_state = err; - goto commit_exit; + goto err_exit; } if (index->table->n_rec_locks) { @@ -2787,7 +2800,7 @@ skip_bulk_insert: ut_ad(index->is_instant()); ut_ad(!dict_index_is_online_ddl(index)); - const rec_t* rec = btr_cur_get_rec(cursor); + const rec_t* rec = btr_pcur_get_rec(&pcur); if (rec_get_info_bits(rec, page_rec_is_comp(rec)) & REC_INFO_MIN_REC_FLAG) { @@ -2796,16 +2809,17 @@ skip_bulk_insert: goto err_exit; } - ut_ad(!row_ins_must_modify_rec(cursor)); + ut_ad(!row_ins_must_modify_rec(&pcur.btr_cur)); goto do_insert; } - if (rec_is_metadata(btr_cur_get_rec(cursor), *index)) { + if (rec_is_metadata(btr_pcur_get_rec(&pcur), *index)) { goto do_insert; } if (n_uniq - && (cursor->up_match >= n_uniq || cursor->low_match >= n_uniq)) { + && (pcur.btr_cur.up_match >= n_uniq + || pcur.btr_cur.low_match >= n_uniq)) { if (flags == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG @@ -2813,7 +2827,7 @@ skip_bulk_insert: /* Set no locks when applying log in online table rebuild. Only check for duplicates. */ err = row_ins_duplicate_error_in_clust_online( - n_uniq, entry, cursor, + n_uniq, entry, &pcur.btr_cur, &offsets, &offsets_heap); switch (err) { @@ -2824,26 +2838,24 @@ skip_bulk_insert: /* fall through */ case DB_SUCCESS_LOCKED_REC: case DB_DUPLICATE_KEY: - trx->error_info = cursor->index(); + trx->error_info = index; } } else { /* Note that the following may return also DB_LOCK_WAIT */ err = row_ins_duplicate_error_in_clust( - flags, cursor, entry, thr); + flags, &pcur.btr_cur, entry, thr); } if (err != DB_SUCCESS) { -err_exit: - mtr_commit(&mtr); - goto func_exit; + goto err_exit; } } /* Note: Allowing duplicates would qualify for modification of an existing record as the new entry is exactly same as old entry. */ - if (row_ins_must_modify_rec(cursor)) { + if (row_ins_must_modify_rec(&pcur.btr_cur)) { /* There is already an index entry with a long enough common prefix, we must convert the insert into a modify of an existing record */ @@ -2861,10 +2873,13 @@ do_insert: rec_t* insert_rec; if (mode != BTR_MODIFY_TREE) { - ut_ad(mode == BTR_MODIFY_LEAF || - mode == BTR_MODIFY_LEAF_ALREADY_LATCHED); + ut_ad(mode == BTR_MODIFY_LEAF + || mode == BTR_MODIFY_LEAF_ALREADY_LATCHED + || mode == BTR_MODIFY_ROOT_AND_LEAF + || mode + == BTR_MODIFY_ROOT_AND_LEAF_ALREADY_LATCHED); err = btr_cur_optimistic_insert( - flags, cursor, &offsets, &offsets_heap, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); } else { @@ -2873,17 +2888,15 @@ do_insert: goto err_exit; } - DEBUG_SYNC_C("before_insert_pessimitic_row_ins_clust"); - err = btr_cur_optimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); if (err == DB_FAIL) { err = btr_cur_pessimistic_insert( - flags, cursor, + flags, &pcur.btr_cur, &offsets, &offsets_heap, entry, &insert_rec, &big_rec, n_ext, thr, &mtr); @@ -2995,9 +3008,7 @@ row_ins_sec_index_entry_low( rtr_init_rtr_info(&rtr_info, false, &cursor, index, false); rtr_info_update_btr(&cursor, &rtr_info); - err = btr_cur_search_to_nth_level(0, entry, - PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, search_mode, &mtr); if (err == DB_SUCCESS && search_mode == BTR_MODIFY_LEAF && rtr_info.mbr_adj) { @@ -3013,9 +3024,8 @@ row_ins_sec_index_entry_low( } else { index->set_modified(mtr); } - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_RTREE_INSERT, - search_mode, &cursor, &mtr); + err = rtr_insert_leaf(&cursor, entry, + search_mode, &mtr); } DBUG_EXECUTE_IF( @@ -3031,8 +3041,8 @@ row_ins_sec_index_entry_low( : BTR_INSERT)); } - err = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - search_mode, &cursor, &mtr); + err = cursor.search_leaf(entry, PAGE_CUR_LE, search_mode, + &mtr); } if (err != DB_SUCCESS) { @@ -3108,12 +3118,12 @@ row_ins_sec_index_entry_low( prevent any insertion of a duplicate by another transaction. Let us now reposition the cursor and continue the insertion (bypassing the change buffer). */ - err = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, + err = cursor.search_leaf( + entry, PAGE_CUR_LE, btr_latch_mode(search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE)), - &cursor, &mtr); + &mtr); if (err != DB_SUCCESS) { goto func_exit; } diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 94d69d88fb5..0743dc2bb50 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1696,8 +1696,8 @@ err_exit: mtr->start(); index->set_modified(*mtr); pcur->btr_cur.page_cur.index = index; - error = btr_pcur_open(entry, PAGE_CUR_LE, - BTR_PURGE_TREE, pcur, 0, mtr); + error = btr_pcur_open(entry, PAGE_CUR_LE, BTR_PURGE_TREE, pcur, + mtr); if (error) { goto err_exit; } @@ -1780,8 +1780,8 @@ row_log_table_apply_delete( mtr_start(&mtr); index->set_modified(mtr); - dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_PURGE_TREE, &pcur, 0, &mtr); + dberr_t err = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_PURGE_TREE, &pcur, + &mtr); if (err != DB_SUCCESS) { goto all_done; } @@ -1917,8 +1917,8 @@ row_log_table_apply_update( mtr.start(); index->set_modified(mtr); - error = btr_pcur_open(old_pk, PAGE_CUR_LE, - BTR_MODIFY_TREE, &pcur, 0, &mtr); + error = btr_pcur_open(old_pk, PAGE_CUR_LE, BTR_MODIFY_TREE, &pcur, + &mtr); if (error != DB_SUCCESS) { func_exit: mtr.commit(); @@ -3084,11 +3084,8 @@ row_log_apply_op_low( record. The operation may already have been performed, depending on when the row in the clustered index was scanned. */ - *error = btr_cur_search_to_nth_level(0, entry, PAGE_CUR_LE, - has_index_lock - ? BTR_MODIFY_TREE - : BTR_MODIFY_LEAF, - &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, has_index_lock + ? BTR_MODIFY_TREE : BTR_MODIFY_LEAF, &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3138,9 +3135,9 @@ row_log_apply_op_low( mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (UNIV_UNLIKELY(*error != DB_SUCCESS)) { goto func_exit; } @@ -3242,9 +3239,9 @@ insert_the_rec: mtr_commit(&mtr); mtr_start(&mtr); index->set_modified(mtr); - *error = btr_cur_search_to_nth_level( - 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, &mtr); + *error = cursor.search_leaf(entry, PAGE_CUR_LE, + BTR_MODIFY_TREE, + &mtr); if (*error != DB_SUCCESS) { break; } diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index caa2646eee2..535762ee105 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -150,9 +150,8 @@ public: false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_LEAF, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_LEAF, &mtr); /* It need to update MBR in parent entry, so change search mode to BTR_MODIFY_TREE */ @@ -164,10 +163,8 @@ public: rtr_info_update_btr(&ins_cur, &rtr_info); mtr.start(); index->set_modified(mtr); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); } if (error == DB_SUCCESS) { @@ -189,11 +186,8 @@ public: &ins_cur, index, false); rtr_info_update_btr(&ins_cur, &rtr_info); - error = btr_cur_search_to_nth_level( - 0, dtuple, - PAGE_CUR_RTREE_INSERT, - BTR_MODIFY_TREE, - &ins_cur, &mtr); + error = rtr_insert_leaf(&ins_cur, dtuple, + BTR_MODIFY_TREE, &mtr); if (error == DB_SUCCESS) { error = btr_cur_pessimistic_insert( diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index e216d9faa3b..753b42332fc 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -104,7 +104,7 @@ bool row_purge_remove_clust_if_poss_low( /*===============================*/ purge_node_t* node, /*!< in/out: row purge node */ - btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + btr_latch_mode mode) /*!< in: BTR_MODIFY_LEAF or BTR_PURGE_TREE */ { dict_index_t* index = dict_table_get_first_index(node->table); table_id_t table_id = 0; @@ -342,17 +342,20 @@ row_purge_remove_sec_if_poss_tree( ibool success = TRUE; dberr_t err; mtr_t mtr; - enum row_search_result search_result; log_free_check(); mtr.start(); index->set_modified(mtr); pcur.btr_cur.page_cur.index = index; - search_result = row_search_index_entry(entry, BTR_PURGE_TREE, - &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, BTR_PURGE_TREE, &pcur, &mtr)) { + goto found; + } + goto func_exit; + } - switch (search_result) { + switch (row_search_index_entry(entry, BTR_PURGE_TREE, &pcur, &mtr)) { case ROW_NOT_FOUND: /* Not found. This is a legitimate condition. In a rollback, InnoDB will remove secondary recs that would @@ -381,6 +384,7 @@ row_purge_remove_sec_if_poss_tree( which cannot be purged yet, requires its existence. If some requires, we should do nothing. */ +found: if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) { /* Remove the index record, which should have been @@ -439,8 +443,6 @@ row_purge_remove_sec_if_poss_leaf( { mtr_t mtr; btr_pcur_t pcur; - enum btr_latch_mode mode; - enum row_search_result search_result; bool success = true; log_free_check(); @@ -449,31 +451,27 @@ row_purge_remove_sec_if_poss_leaf( mtr.start(); index->set_modified(mtr); - /* Change buffering is disabled for spatial index and - virtual index. */ - mode = (index->type & (DICT_SPATIAL | DICT_VIRTUAL)) - ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF; pcur.btr_cur.page_cur.index = index; /* Set the purge node for the call to row_purge_poss_sec(). */ pcur.btr_cur.purge_node = node; if (index->is_spatial()) { pcur.btr_cur.thr = NULL; - index->lock.u_lock(SRW_LOCK_CALL); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); - index->lock.u_unlock(); - } else { - /* Set the query thread, so that ibuf_insert_low() will be - able to invoke thd_get_trx(). */ - pcur.btr_cur.thr = static_cast( - que_node_get_parent(node)); - search_result = row_search_index_entry( - entry, mode, &pcur, &mtr); + if (!rtr_search(entry, BTR_MODIFY_LEAF, &pcur, &mtr)) { + goto found; + } + goto func_exit; } - switch (search_result) { + /* Set the query thread, so that ibuf_insert_low() will be + able to invoke thd_get_trx(). */ + pcur.btr_cur.thr = static_cast(que_node_get_parent(node)); + + switch (row_search_index_entry(entry, index->has_virtual() + ? BTR_MODIFY_LEAF : BTR_PURGE_LEAF, + &pcur, &mtr)) { case ROW_FOUND: +found: /* Before attempting to purge a record, check if it is safe to do so. */ if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) { diff --git a/storage/innobase/row/row0row.cc b/storage/innobase/row/row0row.cc index b998d27d836..4a00b2a430e 100644 --- a/storage/innobase/row/row0row.cc +++ b/storage/innobase/row/row0row.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2018, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2022, MariaDB Corporation. +Copyright (c) 2018, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1216,7 +1216,7 @@ row_search_on_row_ref( & REC_INFO_MIN_REC_FLAG; } else { ut_a(ref->n_fields == index->n_uniq); - if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, 0, mtr) + if (btr_pcur_open(ref, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { return false; } @@ -1278,21 +1278,13 @@ row_search_index_entry( ut_ad(dtuple_check_typed(entry)); - if (pcur->index()->is_spatial()) { - if (rtr_pcur_open(pcur->index(), entry, mode, pcur, mtr)) { - return ROW_NOT_FOUND; - } - } else { - if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, 0, mtr) - != DB_SUCCESS) { - return ROW_NOT_FOUND; - } + if (btr_pcur_open(entry, PAGE_CUR_LE, mode, pcur, mtr) != DB_SUCCESS) { + return ROW_NOT_FOUND; } switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: ut_ad(!(~mode & BTR_DELETE)); - ut_ad(!pcur->index()->is_spatial()); return(ROW_NOT_DELETED_REF); case BTR_CUR_DEL_MARK_IBUF: diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 87e3ca43b1c..e44cc466295 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -2,7 +2,7 @@ Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -4776,7 +4776,7 @@ wait_table_again: pcur->btr_cur.thr = thr; pcur->old_rec = nullptr; - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { if (!prebuilt->rtr_info) { prebuilt->rtr_info = rtr_create_rtr_info( set_also_gap_locks, true, @@ -4792,10 +4792,13 @@ wait_table_again: prebuilt->rtr_info->search_tuple = search_tuple; prebuilt->rtr_info->search_mode = mode; } - } - err = btr_pcur_open_with_no_init(search_tuple, mode, - BTR_SEARCH_LEAF, pcur, &mtr); + err = rtr_search_leaf(pcur, search_tuple, mode, &mtr); + } else { + err = btr_pcur_open_with_no_init(search_tuple, mode, + BTR_SEARCH_LEAF, + pcur, &mtr); + } if (err != DB_SUCCESS) { page_corrupted: @@ -5771,8 +5774,7 @@ next_rec_after_check: if (spatial_search) { /* No need to do store restore for R-tree */ - mtr.commit(); - mtr.start(); + mtr.rollback_to_savepoint(0); } else if (mtr_extra_clust_savepoint) { /* We must release any clustered index latches if we are moving to the next non-clustered diff --git a/storage/innobase/row/row0uins.cc b/storage/innobase/row/row0uins.cc index 6567019a33d..50196e78092 100644 --- a/storage/innobase/row/row0uins.cc +++ b/storage/innobase/row/row0uins.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -233,7 +233,7 @@ func_exit: if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_INSERT_METADATA) { /* When rolling back the very first instant ADD COLUMN operation, reset the root page to the basic state. */ - err = btr_reset_instant(*index, true, &mtr); + btr_reset_instant(*index, true, &mtr); } btr_pcur_commit_specify_mtr(&node->pcur, &mtr); @@ -268,21 +268,32 @@ row_undo_ins_remove_sec_low( pcur.btr_cur.page_cur.index = index; row_mtr_start(&mtr, index, !modify_leaf); - if (modify_leaf) { - mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; - mtr_s_lock_index(index, &mtr); - } else { - ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); - } - if (index->is_spatial()) { mode = modify_leaf - ? btr_latch_mode(BTR_MODIFY_LEAF_ALREADY_LATCHED + ? btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_pcur_get_btr_cur(&pcur)->thr = thr; + if (rtr_search(entry, mode, &pcur, &mtr)) { + goto func_exit; + } + + if (rec_get_deleted_flag( + btr_pcur_get_rec(&pcur), + dict_table_is_comp(index->table))) { + ib::error() << "Record found in index " << index->name + << " is deleted marked on insert rollback."; + ut_ad(0); + } + goto found; + } else if (modify_leaf) { + mode = BTR_MODIFY_LEAF_ALREADY_LATCHED; + mtr_s_lock_index(index, &mtr); + } else { + ut_ad(mode == BTR_PURGE_TREE); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { @@ -295,15 +306,7 @@ row_undo_ins_remove_sec_low( case ROW_NOT_FOUND: break; case ROW_FOUND: - if (dict_index_is_spatial(index) - && rec_get_deleted_flag( - btr_pcur_get_rec(&pcur), - dict_table_is_comp(index->table))) { - ib::error() << "Record found in index " << index->name - << " is deleted marked on insert rollback."; - ut_ad(0); - } - + found: btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); if (modify_leaf) { @@ -318,6 +321,7 @@ row_undo_ins_remove_sec_low( } } +func_exit: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 2d04dca4003..50e15e03cc9 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2022, MariaDB Corporation. +Copyright (c) 2017, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -133,8 +133,7 @@ row_undo_mod_clust_low( && node->ref == &trx_undo_metadata && btr_cur_get_index(btr_cur)->table->instant && node->update->info_bits == REC_INFO_METADATA_ADD) { - err = btr_reset_instant(*btr_cur_get_index(btr_cur), - false, mtr); + btr_reset_instant(*btr_cur->index(), false, mtr); } } @@ -490,7 +489,6 @@ row_undo_mod_del_mark_or_remove_sec_low( dberr_t err = DB_SUCCESS; mtr_t mtr; mtr_t mtr_vers; - row_search_result search_result; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index, !modify_leaf); @@ -505,6 +503,11 @@ row_undo_mod_del_mark_or_remove_sec_low( | BTR_RTREE_UNDO_INS) : btr_latch_mode(BTR_PURGE_TREE | BTR_RTREE_UNDO_INS); btr_cur->thr = thr; + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } else { + goto func_exit; + } } else if (!index->is_committed()) { /* The index->online_status may change if the index is or was being created online, but not committed yet. It @@ -514,7 +517,8 @@ row_undo_mod_del_mark_or_remove_sec_low( mtr_s_lock_index(index, &mtr); } else { ut_ad(mode == BTR_PURGE_TREE); - mtr_sx_lock_index(index, &mtr); + mode = BTR_PURGE_TREE_ALREADY_LATCHED; + mtr_x_lock_index(index, &mtr); } } else { /* For secondary indexes, @@ -523,9 +527,8 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_ad(!dict_index_is_online_ddl(index)); } - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - - switch (UNIV_EXPECT(search_result, ROW_FOUND)) { + switch (UNIV_EXPECT(row_search_index_entry(entry, mode, &pcur, &mtr), + ROW_FOUND)) { case ROW_NOT_FOUND: /* In crash recovery, the secondary index record may be missing if the UPDATE did not have time to insert @@ -547,6 +550,7 @@ row_undo_mod_del_mark_or_remove_sec_low( ut_error; } +found: /* We should remove the index record if no prior version of the row, which cannot be purged yet, requires its existence. If some requires, we should delete mark the record. */ @@ -665,13 +669,12 @@ row_undo_mod_del_unmark_sec_and_undo_update( trx_t* trx = thr_get_trx(thr); const ulint flags = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG; - row_search_result search_result; const auto orig_mode = mode; pcur.btr_cur.page_cur.index = index; ut_ad(trx->id != 0); - if (dict_index_is_spatial(index)) { + if (index->is_spatial()) { /* FIXME: Currently we do a 2-pass search for the undo due to avoid undel-mark a wrong rec in rolling back in partial update. Later, we could log some info in @@ -686,9 +689,22 @@ try_again: btr_cur->thr = thr; - search_result = row_search_index_entry(entry, mode, &pcur, &mtr); + if (index->is_spatial()) { + if (!rtr_search(entry, mode, &pcur, &mtr)) { + goto found; + } - switch (search_result) { + if (mode != orig_mode && btr_cur->rtr_info->fd_del) { + mode = orig_mode; + btr_pcur_close(&pcur); + mtr.commit(); + goto try_again; + } + + goto not_found; + } + + switch (row_search_index_entry(entry, mode, &pcur, &mtr)) { mem_heap_t* heap; mem_heap_t* offsets_heap; rec_offs* offsets; @@ -699,17 +715,7 @@ try_again: flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */ ut_error; case ROW_NOT_FOUND: - /* For spatial index, if first search didn't find an - undel-marked rec, try to find a del-marked rec. */ - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - if (mode != orig_mode) { - mode = orig_mode; - btr_pcur_close(&pcur); - mtr_commit(&mtr); - goto try_again; - } - } - +not_found: if (btr_cur->up_match >= dict_index_get_n_unique(index) || btr_cur->low_match >= dict_index_get_n_unique(index)) { ib::warn() << "Record in index " << index->name @@ -767,6 +773,7 @@ try_again: break; case ROW_FOUND: +found: btr_rec_set_deleted(btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), &mtr); heap = mem_heap_create( diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index a3f940adff5..fe88fce58a2 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2022, MariaDB Corporation. +Copyright (c) 2015, 2023, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1832,12 +1832,10 @@ row_upd_sec_index_entry( que_thr_t* thr) /*!< in: query thread */ { mtr_t mtr; - const rec_t* rec; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* entry; dict_index_t* index; - btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; trx_t* trx = thr_get_trx(thr); btr_latch_mode mode; @@ -1876,10 +1874,6 @@ row_upd_sec_index_entry( case SRV_TMP_SPACE_ID: mtr.set_log_mode(MTR_LOG_NO_REDO); flags = BTR_NO_LOCKING_FLAG; - if (index->is_spatial()) { - mode = btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK); - } break; default: index->set_modified(mtr); @@ -1888,26 +1882,35 @@ row_upd_sec_index_entry( flags = index->table->no_rollback() ? BTR_NO_ROLLBACK : 0; /* We can only buffer delete-mark operations if there are no foreign key constraints referring to the index. */ - mode = index->is_spatial() - ? btr_latch_mode(BTR_MODIFY_LEAF - | BTR_RTREE_DELETE_MARK) - : referenced - ? BTR_MODIFY_LEAF : BTR_DELETE_MARK_LEAF; + if (!referenced) { + mode = BTR_DELETE_MARK_LEAF; + } break; } /* Set the query thread, so that ibuf_insert_low() will be able to invoke thd_get_trx(). */ - btr_pcur_get_btr_cur(&pcur)->thr = thr; + pcur.btr_cur.thr = thr; pcur.btr_cur.page_cur.index = index; + if (index->is_spatial()) { + mode = btr_latch_mode(BTR_MODIFY_LEAF | BTR_RTREE_DELETE_MARK); + if (UNIV_LIKELY(!rtr_search(entry, mode, &pcur, &mtr))) { + goto found; + } + + if (pcur.btr_cur.rtr_info->fd_del) { + /* We found the record, but a delete marked */ + goto close; + } + + goto not_found; + } + search_result = row_search_index_entry(entry, mode, &pcur, &mtr); - btr_cur = btr_pcur_get_btr_cur(&pcur); - - rec = btr_cur_get_rec(btr_cur); - switch (search_result) { + const rec_t* rec; case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */ ut_error; break; @@ -1916,11 +1919,8 @@ row_upd_sec_index_entry( break; case ROW_NOT_FOUND: - if (dict_index_is_spatial(index) && btr_cur->rtr_info->fd_del) { - /* We found the record, but a delete marked */ - break; - } - +not_found: + rec = btr_pcur_get_rec(&pcur); ib::error() << "Record in index " << index->name << " of table " << index->table->name @@ -1934,7 +1934,9 @@ row_upd_sec_index_entry( #endif /* UNIV_DEBUG */ break; case ROW_FOUND: +found: ut_ad(err == DB_SUCCESS); + rec = btr_pcur_get_rec(&pcur); /* Delete mark the old index record; it can already be delete marked if we return after a lock wait in @@ -1943,14 +1945,14 @@ row_upd_sec_index_entry( rec, dict_table_is_comp(index->table))) { err = lock_sec_rec_modify_check_and_lock( flags, - btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), index, thr, &mtr); + btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), index, thr, &mtr); if (err != DB_SUCCESS) { break; } - btr_rec_set_deleted(btr_cur_get_block(btr_cur), - btr_cur_get_rec(btr_cur), + btr_rec_set_deleted(btr_pcur_get_block(&pcur), + btr_pcur_get_rec(&pcur), &mtr); #ifdef WITH_WSREP if (!referenced && foreign @@ -2009,6 +2011,7 @@ row_upd_sec_index_entry( } } +close: btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 8dfb6847a32..e1df09dd3e6 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -424,7 +424,6 @@ static dberr_t trx_purge_free_segment(trx_rseg_t *rseg, fil_addr_t hdr_addr) block->fix(); mtr.commit(); mtr.start(); - mtr.flag_modified(); rseg->latch.wr_lock(SRW_LOCK_CALL); rseg_hdr->page.lock.x_lock(); block->page.lock.x_lock();