From badf6de1715bbdddcaf8c1535747f94602aaa13f Mon Sep 17 00:00:00 2001 From: Weijun Huang Date: Sun, 12 Feb 2023 18:42:23 +0100 Subject: [PATCH 01/12] MDEV-30412: JSON_OBJECTAGG doesn't escape double quote in key --- mysql-test/main/func_json.result | 12 ++++++++++++ mysql-test/main/func_json.test | 8 ++++++++ sql/item_jsonfunc.cc | 2 +- 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/func_json.result b/mysql-test/main/func_json.result index 2a293fc72c7..b9df193d3bd 100644 --- a/mysql-test/main/func_json.result +++ b/mysql-test/main/func_json.result @@ -1623,5 +1623,17 @@ id doc {"$oid":"611c0a463b150154132f6636"} { "_id" : { "$oid" : "611c0a463b150154132f6636" }, "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : [ { "a" : 1.0 } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } ] } DROP TABLE arrNestTest; # +# MDEV-30412 JSON_OBJECTAGG doesn't escape double quote in key +# +SELECT JSON_OBJECTAGG('"', 1); +JSON_OBJECTAGG('"', 1) +{"\"":1} +SELECT JSON_OBJECTAGG('\"', 1); +JSON_OBJECTAGG('\"', 1) +{"\"":1} +SELECT JSON_OBJECTAGG('\\', 1); +JSON_OBJECTAGG('\\', 1) +{"\\":1} +# # End of 10.5 tests # diff --git a/mysql-test/main/func_json.test b/mysql-test/main/func_json.test index 35645bcb226..0a5d1638717 100644 --- a/mysql-test/main/func_json.test +++ b/mysql-test/main/func_json.test @@ -1067,6 +1067,14 @@ INSERT INTO test.arrNestTest (doc) VALUES ('{ "_id" : { "$oid" : "611c0a463b1501 SELECT * FROM arrNestTest; DROP TABLE arrNestTest; +--echo # +--echo # MDEV-30412 JSON_OBJECTAGG doesn't escape double quote in key +--echo # + +SELECT JSON_OBJECTAGG('"', 1); +SELECT JSON_OBJECTAGG('\"', 1); +SELECT JSON_OBJECTAGG('\\', 1); + --echo # --echo # End of 10.5 tests --echo # diff --git a/sql/item_jsonfunc.cc b/sql/item_jsonfunc.cc index 9843b6d14b1..b85adeff05d 100644 --- a/sql/item_jsonfunc.cc +++ b/sql/item_jsonfunc.cc @@ -4060,7 +4060,7 @@ bool Item_func_json_objectagg::add() result.append(", "); result.append("\""); - result.append(*key); + st_append_escaped(&result,key); result.append("\":"); buf.length(0); From 3eea2e8e10b6d8f5280e40478dcaac8961b74de4 Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Tue, 14 Feb 2023 14:35:35 +0530 Subject: [PATCH 02/12] MDEV-30551 InnoDB recovery hangs when buffer pool ran out of memory - During non-last batch of multi-batch recovery, InnoDB holds log_sys.mutex and preallocates the block which may intiate page flush, which may initiate log flush, which requires log_sys.mutex to acquire again. This leads to assert failure. So InnoDB recovery should release log_sys.mutex before preallocating the block. --- mysql-test/suite/innodb/r/alter_copy.result | 2 +- mysql-test/suite/innodb/t/alter_copy.test | 2 +- storage/innobase/buf/buf0lru.cc | 9 ++++++++- storage/innobase/log/log0recv.cc | 18 ++++++++++++++++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/mysql-test/suite/innodb/r/alter_copy.result b/mysql-test/suite/innodb/r/alter_copy.result index 4d6d55f01cf..22916598083 100644 --- a/mysql-test/suite/innodb/r/alter_copy.result +++ b/mysql-test/suite/innodb/r/alter_copy.result @@ -51,7 +51,7 @@ ADD INDEX(a,b,d), ADD INDEX(a,d,b), ADD INDEX(b,c,d), ADD INDEX(b,d,c), ALGORITHM=COPY; connection default; SET DEBUG_SYNC='now WAIT_FOR hung'; -# restart: --innodb-force-recovery=3 +# restart: --innodb-force-recovery=3 --debug_dbug=+d,recv_ran_out_of_buffer disconnect hang; #sql-alter.frm #sql-alter.ibd diff --git a/mysql-test/suite/innodb/t/alter_copy.test b/mysql-test/suite/innodb/t/alter_copy.test index e67f6f9a66b..f321308ce26 100644 --- a/mysql-test/suite/innodb/t/alter_copy.test +++ b/mysql-test/suite/innodb/t/alter_copy.test @@ -57,7 +57,7 @@ ALTER TABLE t ADD INDEX(b,c,d,a),ADD INDEX(b,c,a,d),ADD INDEX(b,a,c,d),ADD INDEX connection default; SET DEBUG_SYNC='now WAIT_FOR hung'; let $shutdown_timeout=0; ---let $restart_parameters= --innodb-force-recovery=3 +--let $restart_parameters= --innodb-force-recovery=3 --debug_dbug="+d,recv_ran_out_of_buffer" --source include/restart_mysqld.inc disconnect hang; let $shutdown_timeout=; diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index f7a3543c493..6d83d448bd6 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -408,6 +408,11 @@ buf_block_t *buf_LRU_get_free_block(bool have_mutex) mysql_mutex_assert_owner(&buf_pool.mutex); goto got_mutex; } + DBUG_EXECUTE_IF("recv_ran_out_of_buffer", + if (recv_recovery_is_on() + && recv_sys.apply_log_recs) { + goto flush_lru; + }); mysql_mutex_lock(&buf_pool.mutex); got_mutex: buf_LRU_check_size_of_non_data_objects(); @@ -493,7 +498,9 @@ not_found: removing the block from buf_pool.page_hash and buf_pool.LRU is fairly involved (particularly in case of ROW_FORMAT=COMPRESSED pages). We can do that in a separate patch sometime in future. */ - +#ifndef DBUG_OFF +flush_lru: +#endif if (!buf_flush_LRU(innodb_lru_flush_size)) { MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT); ++flush_failures; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 4a6527962f6..eb2931adf0e 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -2692,8 +2692,23 @@ void recv_sys_t::apply(bool last_batch) fil_system.extend_to_recv_size(); + /* Release the log_sys mutex in non-last batches of multi-batch + recovery mode and recv_sys.mutex before preallocating the + block because while preallocating the block which may initiate + log flush which requires log_sys mutex to acquire again, which + should be acquired before recv_sys.mutex in order to avoid + deadlocks. */ + mutex_exit(&mutex); + if (!last_batch) + mysql_mutex_unlock(&log_sys.mutex); + + mysql_mutex_assert_not_owner(&log_sys.mutex); buf_block_t *free_block= buf_LRU_get_free_block(false); + if (!last_batch) + mysql_mutex_lock(&log_sys.mutex); + mutex_enter(&mutex); + for (map::iterator p= pages.begin(); p != pages.end(); ) { const page_id_t page_id= p->first; @@ -2708,7 +2723,10 @@ void recv_sys_t::apply(bool last_batch) if (UNIV_LIKELY(!!recover_low(page_id, p, mtr, free_block))) { mutex_exit(&mutex); + if (!last_batch) mysql_mutex_unlock(&log_sys.mutex); + mysql_mutex_assert_not_owner(&log_sys.mutex); free_block= buf_LRU_get_free_block(false); + if (!last_batch) mysql_mutex_lock(&log_sys.mutex); mutex_enter(&mutex); break; } From 1a5c7552ea3a0233e7abff56a167c3a532c7da0a Mon Sep 17 00:00:00 2001 From: Thirunarayanan Balathandayuthapani Date: Tue, 14 Feb 2023 14:36:17 +0530 Subject: [PATCH 03/12] MDEV-30552 InnoDB recovery crashes when error handling scenario - InnoDB fails to reset the after_apply variable before applying the redo log in last batch during multi-batch recovery. --- .../suite/innodb/r/recovery_memory.result | 20 ++++++++++++++++++ .../suite/innodb/t/recovery_memory.test | 21 +++++++++++++++++++ storage/innobase/ibuf/ibuf0ibuf.cc | 4 ++++ storage/innobase/log/log0recv.cc | 5 +++++ 4 files changed, 50 insertions(+) diff --git a/mysql-test/suite/innodb/r/recovery_memory.result b/mysql-test/suite/innodb/r/recovery_memory.result index 4fa31009130..6faea097616 100644 --- a/mysql-test/suite/innodb/r/recovery_memory.result +++ b/mysql-test/suite/innodb/r/recovery_memory.result @@ -1,3 +1,7 @@ +call mtr.add_suppression("InnoDB: The change buffer is corrupted"); +call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); CREATE TABLE t1(c TEXT, KEY(c(3072)))ENGINE=InnoDB; CREATE PROCEDURE dorepeat() LOOP @@ -10,3 +14,19 @@ CALL dorepeat(); connection default; # restart: --innodb_buffer_pool_size=5242880 DROP TABLE t1; +DROP PROCEDURE dorepeat; +# +# MDEV-30552 InnoDB recovery crashes when error +# handling scenario +# +SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; +CREATE TABLE t1(f1 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT * FROM seq_1_to_65536; +# restart: --innodb_buffer_pool_size=5242880 --debug_dbug=+d,ibuf_init_corrupt +# restart +SHOW CREATE TABLE t1; +Table Create Table +t1 CREATE TABLE `t1` ( + `f1` int(11) NOT NULL +) ENGINE=InnoDB DEFAULT CHARSET=latin1 COLLATE=latin1_swedish_ci +DROP TABLE t1; diff --git a/mysql-test/suite/innodb/t/recovery_memory.test b/mysql-test/suite/innodb/t/recovery_memory.test index d9afd52c499..e723ba25d36 100644 --- a/mysql-test/suite/innodb/t/recovery_memory.test +++ b/mysql-test/suite/innodb/t/recovery_memory.test @@ -1,5 +1,10 @@ --source include/have_innodb.inc --source include/big_test.inc +--source include/have_sequence.inc +call mtr.add_suppression("InnoDB: The change buffer is corrupted"); +call mtr.add_suppression("InnoDB: Plugin initialization aborted at srv0start.cc"); +call mtr.add_suppression("Plugin 'InnoDB' init function returned error"); +call mtr.add_suppression("Plugin 'InnoDB' registration as a STORAGE ENGINE failed."); CREATE TABLE t1(c TEXT, KEY(c(3072)))ENGINE=InnoDB; DELIMITER |; @@ -19,3 +24,19 @@ let $shutdown_timeout=0; let $restart_parameters=--innodb_buffer_pool_size=5242880; --source include/restart_mysqld.inc DROP TABLE t1; +DROP PROCEDURE dorepeat; + +--echo # +--echo # MDEV-30552 InnoDB recovery crashes when error +--echo # handling scenario +--echo # +SET DEBUG_DBUG="+d,ib_log_checkpoint_avoid_hard"; +CREATE TABLE t1(f1 INT NOT NULL)ENGINE=InnoDB; +INSERT INTO t1 SELECT * FROM seq_1_to_65536; +let $shutdown_timeout=0; +let $restart_parameters=--innodb_buffer_pool_size=5242880 --debug_dbug="+d,ibuf_init_corrupt"; +--source include/restart_mysqld.inc +let $restart_parameters=; +--source include/restart_mysqld.inc +SHOW CREATE TABLE t1; +DROP TABLE t1; diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc index d88c2b0027e..dff0ad57057 100644 --- a/storage/innobase/ibuf/ibuf0ibuf.cc +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -450,6 +450,10 @@ err_exit: root = buf_block_get_frame(block); } + DBUG_EXECUTE_IF("ibuf_init_corrupt", + err = DB_CORRUPTION; + goto err_exit;); + if (page_is_comp(root) || fil_page_get_type(root) != FIL_PAGE_INDEX || btr_page_get_index_id(root) != DICT_IBUF_ID_MIN) { err = DB_CORRUPTION; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index eb2931adf0e..9b4ceb4f03e 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -3642,6 +3642,11 @@ completed: mysql_mutex_unlock(&log_sys.mutex); return(DB_ERROR); } + + /* In case of multi-batch recovery, + redo log for the last batch is not + applied yet. */ + ut_d(recv_sys.after_apply = false); } } else { ut_ad(!rescan || recv_sys.pages.empty()); From 690fcfbd2954f9470047ce754c2a5c2fdfd15cf3 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Wed, 18 Jan 2023 16:16:34 +0000 Subject: [PATCH 04/12] Fix S3 engine Coverity hits Very minor hits found by Coverity for the S3 engine. --- storage/maria/ha_s3.cc | 2 +- storage/maria/s3_func.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/storage/maria/ha_s3.cc b/storage/maria/ha_s3.cc index 158fa8d0430..b75481b3fc2 100644 --- a/storage/maria/ha_s3.cc +++ b/storage/maria/ha_s3.cc @@ -233,7 +233,7 @@ ha_create_table_option s3_table_option_list[]= ha_s3::ha_s3(handlerton *hton, TABLE_SHARE *table_arg) - :ha_maria(hton, table_arg), in_alter_table(S3_NO_ALTER) + :ha_maria(hton, table_arg), in_alter_table(S3_NO_ALTER), open_args(NULL) { /* Remove things that S3 doesn't support */ int_table_flags&= ~(HA_BINLOG_ROW_CAPABLE | HA_BINLOG_STMT_CAPABLE | diff --git a/storage/maria/s3_func.c b/storage/maria/s3_func.c index 491a8e0a323..3d18ba8800d 100644 --- a/storage/maria/s3_func.c +++ b/storage/maria/s3_func.c @@ -351,7 +351,7 @@ int aria_copy_to_s3(ms3_st *s3_client, const char *aws_bucket, if (display) printf("Copying frm file %s\n", filename); - end= strmov(aws_path_end,"/frm"); + strmov(aws_path_end,"/frm"); convert_frm_to_s3_format(alloc_block); /* Note that frm is not compressed! */ @@ -1232,7 +1232,7 @@ static void convert_index_to_s3_format(uchar *header, ulong block_size, uchar *base_pos; uint base_offset; - memcpy(state.header.file_version, header, sizeof(state.header)); + memcpy(&state.header, header, sizeof(state.header)); base_offset= mi_uint2korr(state.header.base_pos); base_pos= header + base_offset; @@ -1251,7 +1251,7 @@ static void convert_index_to_disk_format(uchar *header) uchar *base_pos; uint base_offset; - memcpy(state.header.file_version, header, sizeof(state.header)); + memcpy(&state.header, header, sizeof(state.header)); base_offset= mi_uint2korr(state.header.base_pos); base_pos= header + base_offset; From 192427e37d2b066f9f681cc1b998b2efdc407c55 Mon Sep 17 00:00:00 2001 From: Monty Date: Wed, 15 Feb 2023 13:56:33 +0200 Subject: [PATCH 05/12] MDEV-30333 Wrong result with not_null_range_scan and LEFT JOIN with empty table There was a bug in JOIN::make_notnull_conds_for_range_scans() when clearing TABLE->tmp_set, which was used to mark fields that could not be null. This function was only used if 'not_null_range_scan=on' is set. The effect was that tmp_set contained a 'random value' and this caused the optimizer to think that some fields could not be null. FLUSH TABLES clears tmp_set and because of this things worked temporarily. Fixed by clearing tmp_set properly. --- mysql-test/main/empty_table.result | 58 ++++++++++++++++++++++++++++++ mysql-test/main/empty_table.test | 36 ++++++++++++++++++- sql/sql_select.cc | 6 ++-- 3 files changed, 95 insertions(+), 5 deletions(-) diff --git a/mysql-test/main/empty_table.result b/mysql-test/main/empty_table.result index 2bca3e792fa..90aec2eda3b 100644 --- a/mysql-test/main/empty_table.result +++ b/mysql-test/main/empty_table.result @@ -16,3 +16,61 @@ ERROR 42S02: Table 'test.t2' doesn't exist show status like "Empty_queries"; Variable_name Value Empty_queries 2 +# End of 4.1 tests +# +# MDEV-30333 Wrong result with not_null_range_scan and LEFT JOIN with empty table +# +set @save_optimizer_switch=@@optimizer_switch; +CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM; +INSERT INTO t1 (b) VALUES (1),(2); +CREATE TABLE t2 (c INT) ENGINE=MyISAM; +SET optimizer_switch= 'not_null_range_scan=off'; +explain extended SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 system NULL NULL NULL NULL 0 0.00 Const row not found +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where; Using filesort +Warnings: +Note 1003 select `test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` is null order by `test`.`t1`.`b` +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +b +1 +2 +SET optimizer_switch = 'not_null_range_scan=on'; +explain extended SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +id select_type table type possible_keys key key_len ref rows filtered Extra +1 SIMPLE t2 system NULL NULL NULL NULL 0 0.00 Const row not found +1 SIMPLE t1 ALL NULL NULL NULL NULL 2 100.00 Using where; Using filesort +Warnings: +Note 1003 select `test`.`t1`.`b` AS `b` from `test`.`t1` where `test`.`t1`.`a` is null order by `test`.`t1`.`b` +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +b +1 +2 +flush tables; +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +b +1 +2 +drop table t1,t2; +# Second test in MDEV-30333 +CREATE TABLE t1 (a int, b varchar(10)) ENGINE=MyISAM; +INSERT INTO t1 VALUES (69,'foo'),(71,'bar'); +CREATE TABLE t2 (c int) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1),(2); +CREATE TABLE t3 (d int, e int, KEY(e)) ENGINE=MyISAM; +SELECT * FROM t1 LEFT JOIN t2 LEFT JOIN t3 ON t3.e = t3.d ON 1; +a b c d e +69 foo 1 NULL NULL +71 bar 1 NULL NULL +69 foo 2 NULL NULL +71 bar 2 NULL NULL +SET optimizer_switch = 'not_null_range_scan=on'; +SELECT * FROM t1 LEFT JOIN t2 LEFT JOIN t3 ON t3.e = t3.d ON 1; +a b c d e +69 foo 1 NULL NULL +71 bar 1 NULL NULL +69 foo 2 NULL NULL +71 bar 2 NULL NULL +DROP TABLE t1, t2, t3; +set @@optimizer_switch=@save_optimizer_switch; +End of 10.5 tests diff --git a/mysql-test/main/empty_table.test b/mysql-test/main/empty_table.test index 754671868ba..a17b0c897d5 100644 --- a/mysql-test/main/empty_table.test +++ b/mysql-test/main/empty_table.test @@ -21,4 +21,38 @@ drop table t1; select * from t2; show status like "Empty_queries"; -# End of 4.1 tests +--echo # End of 4.1 tests + +--echo # +--echo # MDEV-30333 Wrong result with not_null_range_scan and LEFT JOIN with empty table +--echo # + +set @save_optimizer_switch=@@optimizer_switch; +CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM; +INSERT INTO t1 (b) VALUES (1),(2); +CREATE TABLE t2 (c INT) ENGINE=MyISAM; +SET optimizer_switch= 'not_null_range_scan=off'; # Default +explain extended SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +SET optimizer_switch = 'not_null_range_scan=on'; +explain extended SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +flush tables; +SELECT b FROM t1 LEFT JOIN t2 ON t2.c = a WHERE a IS NULL ORDER BY b; +drop table t1,t2; + +--echo # Second test in MDEV-30333 + +CREATE TABLE t1 (a int, b varchar(10)) ENGINE=MyISAM; +INSERT INTO t1 VALUES (69,'foo'),(71,'bar'); +CREATE TABLE t2 (c int) ENGINE=MyISAM; +INSERT INTO t2 VALUES (1),(2); +CREATE TABLE t3 (d int, e int, KEY(e)) ENGINE=MyISAM; +SELECT * FROM t1 LEFT JOIN t2 LEFT JOIN t3 ON t3.e = t3.d ON 1; +SET optimizer_switch = 'not_null_range_scan=on'; +SELECT * FROM t1 LEFT JOIN t2 LEFT JOIN t3 ON t3.e = t3.d ON 1; +DROP TABLE t1, t2, t3; +set @@optimizer_switch=@save_optimizer_switch; + +--echo End of 10.5 tests + diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 42711270f60..c28f104804f 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -29683,7 +29683,6 @@ void JOIN::make_notnull_conds_for_range_scans() { DBUG_ENTER("JOIN::make_notnull_conds_for_range_scans"); - if (impossible_where || !optimizer_flag(thd, OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN)) { @@ -29769,7 +29768,6 @@ bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond, table_map allowed) { THD *thd= join->thd; - DBUG_ENTER("build_notnull_conds_for_range_scans"); for (JOIN_TAB *s= join->join_tab; @@ -29777,13 +29775,13 @@ bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond, { /* Clear all needed bitmaps to mark found fields */ if ((allowed & s->table->map) && - !(s->table->map && join->const_table_map)) + !(s->table->map & join->const_table_map)) bitmap_clear_all(&s->table->tmp_set); } /* Find all null-rejected fields assuming that cond is null-rejected and - only formulas over tables from 'allowed' are to be taken into account + only formulas over tables from 'allowed' are to be taken into account */ if (cond->find_not_null_fields(allowed)) DBUG_RETURN(true); From e83ae66e4a08f5efcbc7d86d47603db6f1570009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Sat, 4 Feb 2023 12:10:31 +0200 Subject: [PATCH 06/12] Update comments to match new debug_sync implementation --- mysql-test/main/debug_sync.test | 11 ++++------- sql/debug_sync.cc | 7 ++++--- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/mysql-test/main/debug_sync.test b/mysql-test/main/debug_sync.test index 1dc3dc9c71e..6e75ba9624c 100644 --- a/mysql-test/main/debug_sync.test +++ b/mysql-test/main/debug_sync.test @@ -231,15 +231,12 @@ SHOW VARIABLES LIKE 'DEBUG_SYNC'; # immediately after setting of the DEBUG_SYNC variable. # So it is executed before the SET statement ends. # -# NOTE: There is only one global signal (say "signal post" or "flag mast"). -# A SIGNAL action writes its signal into it ("sets a flag"). -# The signal persists until explicitly overwritten. +# NOTE: There can be multiple active signals at the same time. +# A SIGNAL action appends its signal into signals set. +# The signal persists until waited on. # To avoid confusion for later tests, it is recommended to clear -# the signal by signalling "empty" ("setting the 'empty' flag"): -# SET DEBUG_SYNC= 'now SIGNAL empty'; -# Preferably you can reset the whole facility with: +# the signal set by running # SET DEBUG_SYNC= 'RESET'; -# The signal is then '' (really empty) which connot be done otherwise. # # diff --git a/sql/debug_sync.cc b/sql/debug_sync.cc index aa6956060b4..eac111d32d7 100644 --- a/sql/debug_sync.cc +++ b/sql/debug_sync.cc @@ -1344,7 +1344,8 @@ static bool debug_sync_eval_action(THD *thd, char *action_str, char *action_end) /* Try NO_CLEAR_EVENT. */ - if (!my_strcasecmp(system_charset_info, token, "NO_CLEAR_EVENT")) { + if (!my_strcasecmp(system_charset_info, token, "NO_CLEAR_EVENT")) + { action->clear_event= false; /* Get next token. If none follows, set action. */ if (!(ptr = debug_sync_token(&token, &token_length, ptr, action_end))) goto set_action; @@ -1634,8 +1635,8 @@ static void debug_sync_execute(THD *thd, st_debug_sync_action *action) /* - Wait until global signal string matches the wait_for string. - Interrupt when thread or query is killed or facility disabled. + Wait until the signal set contains the wait_for string. + Interrupt when thread or query is killed or facility is disabled. The facility can become disabled when some thread cannot get the required dynamic memory allocated. */ From d2b773d913cdcd35a727643bc16927527a5ee4bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Sat, 4 Feb 2023 12:11:15 +0200 Subject: [PATCH 07/12] Whitespace fix --- sql/sql_hset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/sql_hset.h b/sql/sql_hset.h index c9ffc8a560b..41573fb5f03 100644 --- a/sql/sql_hset.h +++ b/sql/sql_hset.h @@ -40,7 +40,7 @@ public: Hash_set(PSI_memory_key psi_key, CHARSET_INFO *charset, ulong default_array_elements, size_t key_offset, size_t key_length, my_hash_get_key get_key, - void (*free_element)(void*),uint flags) + void (*free_element)(void*), uint flags) { my_hash_init(psi_key, &m_hash, charset, default_array_elements, key_offset, key_length, get_key, free_element, flags); From 4afa3b64c4e5ca12d755124befc373e0f35ff1e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vicen=C8=9Biu=20Ciorbaru?= Date: Wed, 15 Feb 2023 16:20:25 +0200 Subject: [PATCH 08/12] MDEV-30324: Wrong result upon SELECT DISTINCT ... WITH TIES WITH TIES would not take effect if SELECT DISTINCT was used in a context where an INDEX is used to resolve the ORDER BY clause. WITH TIES relies on the `JOIN::order` to contain the non-constant fields to test the equality of ORDER BY fiels required for WITH TIES. The cause of the problem was a premature removal of the `JOIN::order` member during a DISTINCT optimization. This lead to WITH TIES code assuming ORDER BY only contained "constant" elements. Disable this optimization when WITH TIES is in effect. (side-note: the order by removal does not impact any current tests, thus it will be removed in a future version) Reviewed by: monty@mariadb.org --- mysql-test/main/fetch_first.result | 28 ++++++++++++++++++++++++++++ mysql-test/main/fetch_first.test | 19 +++++++++++++++++++ sql/sql_select.cc | 2 +- 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/mysql-test/main/fetch_first.result b/mysql-test/main/fetch_first.result index 69a0336d722..df182381d1c 100644 --- a/mysql-test/main/fetch_first.result +++ b/mysql-test/main/fetch_first.result @@ -1378,3 +1378,31 @@ a bar foo DROP TABLE t; +# +# MDEV-30324: Wrong result upon SELECT DISTINCT .. WITH TIES using index +# +CREATE TABLE t1 (a int, b char(3), KEY (a)); +INSERT INTO t1 VALUES (2,'foo'),(3,'bar'),(3,'bar'),(3,'zzz'); +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 1 ROWS WITH TIES; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 5 NULL 1 Using temporary +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 1 ROWS WITH TIES; +a b +2 foo +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 2 ROWS WITH TIES; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 index NULL a 5 NULL 2 Using temporary +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 2 ROWS WITH TIES; +a b +2 foo +3 bar +3 zzz +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 3 ROWS WITH TIES; +id select_type table type possible_keys key key_len ref rows Extra +1 SIMPLE t1 ALL NULL NULL NULL NULL 4 Using temporary; Using filesort +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 3 ROWS WITH TIES; +a b +2 foo +3 bar +3 zzz +DROP TABLE t1; diff --git a/mysql-test/main/fetch_first.test b/mysql-test/main/fetch_first.test index 62cdd717f81..98bbf1ca06b 100644 --- a/mysql-test/main/fetch_first.test +++ b/mysql-test/main/fetch_first.test @@ -1059,3 +1059,22 @@ SELECT a FROM t ORDER BY a FETCH FIRST 2 ROWS WITH TIES; # Cleanup DROP TABLE t; + +--echo # +--echo # MDEV-30324: Wrong result upon SELECT DISTINCT .. WITH TIES using index +--echo # +CREATE TABLE t1 (a int, b char(3), KEY (a)); +INSERT INTO t1 VALUES (2,'foo'),(3,'bar'),(3,'bar'),(3,'zzz'); + +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 1 ROWS WITH TIES; +--sorted_result +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 1 ROWS WITH TIES; +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 2 ROWS WITH TIES; +--sorted_result +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 2 ROWS WITH TIES; +EXPLAIN SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 3 ROWS WITH TIES; +--sorted_result +SELECT DISTINCT a, b FROM t1 ORDER BY a FETCH FIRST 3 ROWS WITH TIES; + +# Cleanup +DROP TABLE t1; diff --git a/sql/sql_select.cc b/sql/sql_select.cc index e0beda6ec61..57bf9363b47 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -4170,7 +4170,7 @@ JOIN::optimize_distinct() } /* Optimize "select distinct b from t1 order by key_part_1 limit #" */ - if (order && skip_sort_order) + if (order && skip_sort_order && !unit->lim.is_with_ties()) { /* Should already have been optimized away */ DBUG_ASSERT(ordered_index_usage == ordered_index_order_by); From 5300c0fb7632e7e49a22997297bf731e691d3c24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Wed, 15 Feb 2023 18:16:41 +0200 Subject: [PATCH 09/12] MDEV-30657 InnoDB: Not applying UNDO_APPEND due to corruption This almost completely reverts commit acd23da4c2363511aae7d984c24cc6847aa3f19c and retains a safe optimization: recv_sys_t::parse(): Remove any old redo log records for the truncated tablespace, to free up memory earlier. If recovery consists of multiple batches, then recv_sys_t::apply() will must invoke recv_sys_t::trim() again to avoid wrongly applying old log records to an already truncated undo tablespace. --- storage/innobase/include/log0recv.h | 12 +++++++++--- storage/innobase/log/log0recv.cc | 16 ++++++++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 73d5724f612..e4a8f0d25a2 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -266,9 +266,15 @@ private: @param lsn log sequence number of the shrink operation */ inline void trim(const page_id_t page_id, lsn_t lsn); - /** Truncated undo tablespace size for which truncate has been logged - (indexed by page_id_t::space() - srv_undo_space_id_start), or 0 */ - unsigned truncated_undo_spaces[127]; + /** Undo tablespaces for which truncate has been logged + (indexed by page_id_t::space() - srv_undo_space_id_start) */ + struct trunc + { + /** log sequence number of FILE_CREATE, or 0 if none */ + lsn_t lsn; + /** truncated size of the tablespace, or 0 if not truncated */ + unsigned pages; + } truncated_undo_spaces[127]; public: /** The contents of the doublewrite buffer */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 9b4ceb4f03e..c4217d7a1cb 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1952,7 +1952,8 @@ same_page: /* The entire undo tablespace will be reinitialized by innodb_undo_log_truncate=ON. Discard old log for all pages. */ trim({space_id, 0}, recovered_lsn); - truncated_undo_spaces[space_id - srv_undo_space_id_start]= page_no; + truncated_undo_spaces[space_id - srv_undo_space_id_start]= + { recovered_lsn, page_no }; if (undo_space_trunc) undo_space_trunc(space_id); #endif @@ -2677,15 +2678,22 @@ void recv_sys_t::apply(bool last_batch) for (auto id= srv_undo_tablespaces_open; id--;) { - if (unsigned pages= truncated_undo_spaces[id]) + const trunc& t= truncated_undo_spaces[id]; + if (t.lsn) { - if (fil_space_t *space= fil_space_get(id + srv_undo_space_id_start)) + /* The entire undo tablespace will be reinitialized by + innodb_undo_log_truncate=ON. Discard old log for all pages. + Even though we recv_sys_t::parse() already invoked trim(), + this will be needed in case recovery consists of multiple batches + (there was an invocation with !last_batch). */ + trim({id + srv_undo_space_id_start, 0}, t.lsn); + if (fil_space_t *space = fil_space_get(id + srv_undo_space_id_start)) { ut_ad(UT_LIST_GET_LEN(space->chain) == 1); fil_node_t *file= UT_LIST_GET_FIRST(space->chain); ut_ad(file->is_open()); os_file_truncate(file->name, file->handle, - os_offset_t{pages} << srv_page_size_shift, true); + os_offset_t{t.pages} << srv_page_size_shift, true); } } } From 9c15799462d6a2673a6158eb1717d222c07cced4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Feb 2023 08:28:14 +0200 Subject: [PATCH 10/12] MDEV-30397: MariaDB crash due to DB_FAIL reported for a corrupted page buf_read_page_low(): Map the buf_page_t::read_complete() return value DB_FAIL to DB_PAGE_CORRUPTED. The purpose of the DB_FAIL return value is to avoid error log noise when read-ahead brings in an unused page that is typically filled with NUL bytes. If a synchronous read is bringing in a corrupted page where the page frame does not contain the expected tablespace identifier and page number, that must be treated as an attempt to read a corrupted page. The correct error code for this is DB_PAGE_CORRUPTED. The error code DB_FAIL is not handled by row_mysql_handle_errors(). This was missed in commit 0b47c126e31cddda1e94588799599e138400bcf8 (MDEV-13542). --- storage/innobase/buf/buf0buf.cc | 6 +++--- storage/innobase/buf/buf0rea.cc | 3 +++ storage/innobase/include/buf0buf.h | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 644d8680484..b17c49ab751 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -3449,8 +3449,7 @@ or decrypt/decompress just failed. @retval DB_SUCCESS if page has been read and is not corrupted @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but -after decryption normal page checksum does not match. -@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */ +after decryption normal page checksum does not match. */ static dberr_t buf_page_check_corrupt(buf_page_t *bpage, const fil_node_t &node) { @@ -3507,7 +3506,8 @@ static dberr_t buf_page_check_corrupt(buf_page_t *bpage, @param node data file @return whether the operation succeeded @retval DB_PAGE_CORRUPTED if the checksum fails -@retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ +@retval DB_DECRYPTION_FAILED if the page cannot be decrypted +@retval DB_FAIL if the page contains the wrong ID */ dberr_t buf_page_t::read_complete(const fil_node_t &node) { const page_id_t expected_id{id()}; diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc index 2f15fa62796..b20b105a4c4 100644 --- a/storage/innobase/buf/buf0rea.cc +++ b/storage/innobase/buf/buf0rea.cc @@ -331,6 +331,9 @@ nothing_read: /* The i/o was already completed in space->io() */ *err = bpage->read_complete(*fio.node); space->release(); + if (*err == DB_FAIL) { + *err = DB_PAGE_CORRUPTED; + } } return true; diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index d33c5513dc5..b4d8c8b76ac 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -773,7 +773,8 @@ public: @param node data file @return whether the operation succeeded @retval DB_PAGE_CORRUPTED if the checksum fails - @retval DB_DECRYPTION_FAILED if the page cannot be decrypted */ + @retval DB_DECRYPTION_FAILED if the page cannot be decrypted + @retval DB_FAIL if the page contains the wrong ID */ dberr_t read_complete(const fil_node_t &node); /** Note that a block is no longer dirty, while not removing From 54c0ac72e300acb4405529a827a8c02c4a5e5d15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Feb 2023 08:29:44 +0200 Subject: [PATCH 11/12] MDEV-30134 Assertion failed in buf_page_t::unfix() in buf_pool_t::watch_unset() buf_pool_t::watch_set(): Always buffer-fix a block if one was found, no matter if it is a watch sentinel or a buffer page. The type of the block descriptor will be rechecked in buf_page_t::watch_unset(). Do not expect the caller to acquire the page hash latch. Starting with commit bd5a6403cace36c6ed428cde62e35adcd3f7e7d0 it is safe to release buf_pool.mutex before acquiring a buf_pool.page_hash latch. buf_page_get_low(): Adjust to the changed buf_pool_t::watch_set(). This simplifies the logic and fixes a bug that was reproduced when using debug builds and the setting innodb_change_buffering_debug=1. --- storage/innobase/buf/buf0buf.cc | 71 ++++++++++++------------------ storage/innobase/include/buf0buf.h | 12 +++-- 2 files changed, 33 insertions(+), 50 deletions(-) diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index b17c49ab751..47cc915a11d 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -1951,28 +1951,22 @@ static void buf_relocate(buf_page_t *bpage, buf_page_t *dpage) buf_pool.page_hash.replace(chain, bpage, dpage); } -/** Register a watch for a page identifier. The caller must hold an -exclusive page hash latch. The *hash_lock may be released, -relocated, and reacquired. -@param id page identifier -@param chain hash table chain with exclusively held page_hash -@return a buffer pool block corresponding to id -@retval nullptr if the block was not present, and a watch was installed */ -inline buf_page_t *buf_pool_t::watch_set(const page_id_t id, - buf_pool_t::hash_chain &chain) +buf_page_t *buf_pool_t::watch_set(const page_id_t id, + buf_pool_t::hash_chain &chain) { ut_ad(&chain == &page_hash.cell_get(id.fold())); - ut_ad(page_hash.lock_get(chain).is_write_locked()); + page_hash.lock_get(chain).lock(); -retry: - if (buf_page_t *bpage= page_hash.get(id, chain)) + buf_page_t *bpage= page_hash.get(id, chain); + + if (bpage) { - if (!watch_is_sentinel(*bpage)) - /* The page was loaded meanwhile. */ - return bpage; - /* Add to an existing watch. */ +got_block: bpage->fix(); - return nullptr; + if (watch_is_sentinel(*bpage)) + bpage= nullptr; + page_hash.lock_get(chain).unlock(); + return bpage; } page_hash.lock_get(chain).unlock(); @@ -2001,25 +1995,23 @@ retry: w->set_state(buf_page_t::UNFIXED + 1); w->id_= id; - buf_page_t *bpage= page_hash.get(id, chain); + page_hash.lock_get(chain).lock(); + bpage= page_hash.get(id, chain); if (UNIV_LIKELY_NULL(bpage)) { w->set_state(buf_page_t::NOT_USED); - page_hash.lock_get(chain).lock(); mysql_mutex_unlock(&mutex); - goto retry; + goto got_block; } - page_hash.lock_get(chain).lock(); ut_ad(w->state() == buf_page_t::UNFIXED + 1); buf_pool.page_hash.append(chain, w); mysql_mutex_unlock(&mutex); + page_hash.lock_get(chain).unlock(); return nullptr; } ut_error; - mysql_mutex_unlock(&mutex); - return nullptr; } /** Stop watching whether a page has been read in. @@ -2467,16 +2459,13 @@ loop: case BUF_PEEK_IF_IN_POOL: return nullptr; case BUF_GET_IF_IN_POOL_OR_WATCH: - /* We cannot easily use a memory transaction here. */ - hash_lock.lock(); + /* Buffer-fixing inside watch_set() will prevent eviction */ block = reinterpret_cast (buf_pool.watch_set(page_id, chain)); - /* buffer-fixing will prevent eviction */ - state = block ? block->page.fix() : 0; - hash_lock.unlock(); if (block) { - goto got_block; + state = block->page.state(); + goto got_block_fixed; } return nullptr; @@ -2515,6 +2504,7 @@ loop: got_block: ut_ad(!block->page.in_zip_hash); state++; +got_block_fixed: ut_ad(state > buf_page_t::FREED); if (state > buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) { @@ -2724,24 +2714,19 @@ re_evict: const bool evicted = buf_LRU_free_page(&block->page, true); space->release(); + if (!evicted) { + block->fix(); + } + + mysql_mutex_unlock(&buf_pool.mutex); + if (evicted) { - page_hash_latch& hash_lock - = buf_pool.page_hash.lock_get(chain); - hash_lock.lock(); - mysql_mutex_unlock(&buf_pool.mutex); - /* We may set the watch, as it would have - been set if the page were not in the - buffer pool in the first place. */ - block= reinterpret_cast( - mode == BUF_GET_IF_IN_POOL_OR_WATCH - ? buf_pool.watch_set(page_id, chain) - : buf_pool.page_hash.get(page_id, chain)); - hash_lock.unlock(); + if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { + buf_pool.watch_set(page_id, chain); + } return(NULL); } - block->fix(); - mysql_mutex_unlock(&buf_pool.mutex); buf_flush_sync(); state = block->page.state(); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index b4d8c8b76ac..2dccdda8a2f 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1470,14 +1470,12 @@ public: return !watch_is_sentinel(*page_hash.get(id, chain)); } - /** Register a watch for a page identifier. The caller must hold an - exclusive page hash latch. The *hash_lock may be released, - relocated, and reacquired. + /** Register a watch for a page identifier. @param id page identifier - @param chain hash table chain with exclusively held page_hash - @return a buffer pool block corresponding to id - @retval nullptr if the block was not present, and a watch was installed */ - inline buf_page_t *watch_set(const page_id_t id, hash_chain &chain); + @param chain page_hash.cell_get(id.fold()) + @return a buffer page corresponding to id + @retval nullptr if the block was not present in page_hash */ + buf_page_t *watch_set(const page_id_t id, hash_chain &chain); /** Stop watching whether a page has been read in. watch_set(id) must have returned nullptr before. From 201cfc33e671705dc0f452fc69a98e3a09de61eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Feb 2023 08:30:20 +0200 Subject: [PATCH 12/12] MDEV-30638 Deadlock between INSERT and InnoDB non-persistent statistics update This is a partial revert of commit 8b6a308e463f937eb8d2498b04967a222c83af90 (MDEV-29883) and a follow-up to the merge commit 394fc71f4fa8f8b1b6d24adfead0ec45121d271e (MDEV-24569). The latching order related to any operation that accesses the allocation metadata of an InnoDB index tree is as follows: 1. Acquire dict_index_t::lock in non-shared mode. 2. Acquire the index root page latch in non-shared mode. 3. Possibly acquire further index page latches. Unless an exclusive dict_index_t::lock is held, this must follow the root-to-leaf, left-to-right order. 4. Acquire a *non-shared* fil_space_t::latch. 5. Acquire latches on the allocation metadata pages. 6. Possibly allocate and write some pages, or free some pages. btr_get_size_and_reserved(), dict_stats_update_transient_for_index(), dict_stats_analyze_index(): Acquire an exclusive fil_space_t::latch in order to avoid a deadlock in fseg_n_reserved_pages() in case of concurrent access to multiple indexes sharing the same "inode page". fseg_page_is_allocated(): Acquire an exclusive fil_space_t::latch in order to avoid deadlocks. All callers are holding latches on a buffer pool page, or an index, or both. Before commit edbde4a11fd0b6437202f8019a79911441b6fb32 (MDEV-24167) a third mode was available that would not conflict with the shared fil_space_t::latch acquired by ha_innobase::info_low(), i_s_sys_tablespaces_fill_table(), or i_s_tablespaces_encryption_fill_table(). Because those calls should be rather rare, it makes sense to use the simple rw_lock with only shared and exclusive modes. fil_crypt_get_page_throttle(): Avoid invoking fseg_page_is_allocated() on an allocation bitmap page (which can never be freed), to avoid acquiring a shared latch on top of an exclusive one. mtr_t::s_lock_space(), MTR_MEMO_SPACE_S_LOCK: Remove. --- storage/innobase/dict/dict0defrag_bg.cc | 2 +- storage/innobase/dict/dict0stats.cc | 4 ++-- storage/innobase/fil/fil0crypt.cc | 5 +++-- storage/innobase/fsp/fsp0fsp.cc | 2 +- storage/innobase/include/mtr0mtr.h | 16 ++-------------- storage/innobase/include/mtr0types.h | 4 +--- storage/innobase/mtr/mtr0mtr.cc | 13 +++---------- 7 files changed, 13 insertions(+), 33 deletions(-) diff --git a/storage/innobase/dict/dict0defrag_bg.cc b/storage/innobase/dict/dict0defrag_bg.cc index ea2914e52dc..bec6da8e6af 100644 --- a/storage/innobase/dict/dict0defrag_bg.cc +++ b/storage/innobase/dict/dict0defrag_bg.cc @@ -314,7 +314,7 @@ btr_get_size_and_reserved( return ULINT_UNDEFINED; } - mtr->s_lock_space(index->table->space); + mtr->x_lock_space(index->table->space); ulint n = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF + root->page.frame, used, mtr); diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 0d630583daa..7bdccd899b8 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -1490,7 +1490,7 @@ invalid: goto invalid; } - mtr.s_lock_space(index->table->space); + mtr.x_lock_space(index->table->space); ulint dummy, size; index->stat_index_size @@ -2697,7 +2697,7 @@ empty_index: } uint16_t root_level = btr_page_get_level(root->page.frame); - mtr.s_lock_space(index->table->space); + mtr.x_lock_space(index->table->space); ulint dummy, size; result.index_size = fseg_n_reserved_pages(*root, PAGE_HEADER + PAGE_BTR_SEG_LEAF diff --git a/storage/innobase/fil/fil0crypt.cc b/storage/innobase/fil/fil0crypt.cc index 7410986c441..fe75b7d58fa 100644 --- a/storage/innobase/fil/fil0crypt.cc +++ b/storage/innobase/fil/fil0crypt.cc @@ -1677,8 +1677,9 @@ fil_crypt_get_page_throttle( return NULL; } - if (DB_SUCCESS_LOCKED_REC - != fseg_page_is_allocated(space, state->offset)) { + if (offset % (zip_size ? zip_size : srv_page_size) + && DB_SUCCESS_LOCKED_REC + != fseg_page_is_allocated(space, offset)) { /* page is already freed */ return NULL; } diff --git a/storage/innobase/fsp/fsp0fsp.cc b/storage/innobase/fsp/fsp0fsp.cc index e9f3106feb0..514083d35cc 100644 --- a/storage/innobase/fsp/fsp0fsp.cc +++ b/storage/innobase/fsp/fsp0fsp.cc @@ -2653,7 +2653,7 @@ dberr_t fseg_page_is_allocated(fil_space_t *space, unsigned page) mtr.start(); if (!space->is_owner()) - mtr.s_lock_space(space); + mtr.x_lock_space(space); if (page >= space->free_limit || page >= space->size_in_header); else if (const buf_block_t *b= diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 140cd3dc1b6..f3fe1841b2e 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -282,17 +282,6 @@ struct mtr_t { memo_push(lock, MTR_MEMO_SX_LOCK); } - /** Acquire a tablespace S-latch. - @param space tablespace */ - void s_lock_space(fil_space_t *space) - { - ut_ad(space->purpose == FIL_TYPE_TEMPORARY || - space->purpose == FIL_TYPE_IMPORT || - space->purpose == FIL_TYPE_TABLESPACE); - memo_push(space, MTR_MEMO_SPACE_S_LOCK); - space->s_lock(); - } - /** Acquire an exclusive tablespace latch. @param space tablespace */ void x_lock_space(fil_space_t *space); @@ -357,9 +346,8 @@ public: /** Check if we are holding tablespace latch @param space tablespace to search for - @param shared whether to look for shared latch, instead of exclusive @return whether space.latch is being held */ - bool memo_contains(const fil_space_t& space, bool shared= false) const + bool memo_contains(const fil_space_t& space) const MY_ATTRIBUTE((warn_unused_result)); #ifdef UNIV_DEBUG /** Check if we are holding an rw-latch in this mini-transaction @@ -412,7 +400,7 @@ public: break; case MTR_MEMO_MODIFY: case MTR_MEMO_S_LOCK: case MTR_MEMO_X_LOCK: case MTR_MEMO_SX_LOCK: - case MTR_MEMO_SPACE_X_LOCK: case MTR_MEMO_SPACE_S_LOCK: + case MTR_MEMO_SPACE_X_LOCK: ut_ad("invalid type" == 0); } #endif diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 7acc255da36..465c20fe7d2 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -337,8 +337,6 @@ enum mtr_memo_type_t { MTR_MEMO_SX_LOCK = RW_SX_LATCH << 5, /** wr_lock() on fil_space_t::latch */ - MTR_MEMO_SPACE_X_LOCK = MTR_MEMO_SX_LOCK << 1, - /** rd_lock() on fil_space_t::latch */ - MTR_MEMO_SPACE_S_LOCK = MTR_MEMO_SX_LOCK << 2 + MTR_MEMO_SPACE_X_LOCK = MTR_MEMO_SX_LOCK << 1 }; #endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index 45a1424e572..2c004cb0aa6 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -55,9 +55,6 @@ void mtr_memo_slot_t::release() const static_cast(object)->set_committed_size(); static_cast(object)->x_unlock(); break; - case MTR_MEMO_SPACE_S_LOCK: - static_cast(object)->s_unlock(); - break; default: buf_page_t *bpage= static_cast(object); ut_d(const auto s=) @@ -912,18 +909,14 @@ bool mtr_t::have_u_or_x_latch(const buf_block_t &block) const /** Check if we are holding exclusive tablespace latch @param space tablespace to search for -@param shared whether to look for shared latch, instead of exclusive @return whether space.latch is being held */ -bool mtr_t::memo_contains(const fil_space_t& space, bool shared) const +bool mtr_t::memo_contains(const fil_space_t& space) const { - const mtr_memo_type_t type= shared - ? MTR_MEMO_SPACE_S_LOCK : MTR_MEMO_SPACE_X_LOCK; - for (const mtr_memo_slot_t &slot : m_memo) { - if (slot.object == &space && slot.type == type) + if (slot.object == &space && slot.type == MTR_MEMO_SPACE_X_LOCK) { - ut_ad(shared || space.is_owner()); + ut_ad(space.is_owner()); return true; } }