diff --git a/client/mysql.cc b/client/mysql.cc index bfb93d60c5a..0b19a52771c 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -255,6 +255,7 @@ static my_bool column_types_flag; static my_bool preserve_comments= 0; static my_bool in_com_source, aborted= 0; static ulong opt_max_allowed_packet, opt_net_buffer_length; +unsigned long quick_max_column_width= LONG_MAX; static uint verbose=0,opt_silent=0,opt_mysql_port=0, opt_local_infile=0; static uint my_end_arg; static char * opt_mysql_unix_port=0; @@ -1821,6 +1822,10 @@ static struct my_option my_long_options[] = "Don't cache result, print it row by row. This may slow down the server " "if the output is suspended. Doesn't use history file.", &quick, &quick, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, + {"quick-max-column-width", 0, + "Maximal field length limit in case of --qick", &quick_max_column_width, + &quick_max_column_width, 0, GET_ULONG, REQUIRED_ARG, LONG_MAX, 0, ULONG_MAX, + 0, 1, 0}, {"raw", 'r', "Write fields without conversion. Used with --batch.", &opt_raw_data, &opt_raw_data, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"reconnect", 0, "Reconnect if the connection is lost.", @@ -3877,7 +3882,7 @@ print_table_data(MYSQL_RES *result) { uint length= column_names ? field->name_length : 0; if (quick) - length= MY_MAX(length,field->length); + length= MY_MAX(length, MY_MIN(field->length, quick_max_column_width)); else length= MY_MAX(length,field->max_length); if (length < 4 && !IS_NOT_NULL(field->flags)) diff --git a/debian/autobake-deb.sh b/debian/autobake-deb.sh index 058f3fb73e7..3486473cf47 100755 --- a/debian/autobake-deb.sh +++ b/debian/autobake-deb.sh @@ -121,7 +121,7 @@ in replace_uring_with_aio fi ;& - "noble") + "noble"|"oracular") # mariadb-plugin-rocksdb s390x not supported by us (yet) # ubuntu doesn't support mips64el yet, so keep this just # in case something changes. diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index aff4b841348..f7e588f2038 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -2114,7 +2114,7 @@ static int prepare_export() IF_WIN("\"","") "\"%s\" --mysqld \"%s\"" " --defaults-extra-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=." " --innodb --innodb-fast-shutdown=0 --loose-partition" - " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu" + " --innodb-buffer-pool-size=%llu" " --console --skip-log-error --skip-log-bin --bootstrap %s< " BOOTSTRAP_FILENAME IF_WIN("\"",""), mariabackup_exe, @@ -2128,7 +2128,7 @@ static int prepare_export() IF_WIN("\"","") "\"%s\" --mysqld" " --defaults-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=." " --innodb --innodb-fast-shutdown=0 --loose-partition" - " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu" + " --innodb-buffer-pool-size=%llu" " --console --log-error= --skip-log-bin --bootstrap %s< " BOOTSTRAP_FILENAME IF_WIN("\"",""), mariabackup_exe, diff --git a/mysql-test/main/client.result b/mysql-test/main/client.result new file mode 100644 index 00000000000..bc3b9f64e81 --- /dev/null +++ b/mysql-test/main/client.result @@ -0,0 +1,62 @@ +# +# MDEV-34704: Quick mode produces the bug for mariadb client +# +create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1)); +insert into t1 values ("X", "X", "X", "X"); +# --table --quick ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +| X | X | X | X | ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +# --table --quick --quick-max-column-width=0 ++-----------+-------+------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+-------+------+------+ +| X | X | X | X | ++-----------+-------+------+------+ +# --table --quick --quick-max-column-width=10 ++-----------+------------+------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+------------+------+ +| X | X | X | X | ++-----------+------------+------------+------+ +# --table --quick --quick-max-column-width=20 ++-----------+------------+----------------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+----------------------+------+ +| X | X | X | X | ++-----------+------------+----------------------+------+ +insert into t1 values ("01234", "0123456789", "01234567890123456789", "1"); +# --table --quick ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +| X | X | X | X | +| 01234 | 0123456789 | 01234567890123456789 | 1 | ++-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+ +# --table --quick --quick-max-column-width=0 ++-----------+-------+------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+-------+------+------+ +| X | X | X | X | +| 01234 | 0123456789 | 01234567890123456789 | 1 | ++-----------+-------+------+------+ +# --table --quick --quick-max-column-width=10 ++-----------+------------+------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+------------+------+ +| X | X | X | X | +| 01234 | 0123456789 | 01234567890123456789 | 1 | ++-----------+------------+------------+------+ +# --table --quick --quick-max-column-width=20 ++-----------+------------+----------------------+------+ +| aaaaaaaaa | aaaaa | a | b | ++-----------+------------+----------------------+------+ +| X | X | X | X | +| 01234 | 0123456789 | 01234567890123456789 | 1 | ++-----------+------------+----------------------+------+ +drop table t1; +# +# End of 10.7 tests +# diff --git a/mysql-test/main/client.test b/mysql-test/main/client.test new file mode 100644 index 00000000000..20df85f0807 --- /dev/null +++ b/mysql-test/main/client.test @@ -0,0 +1,46 @@ +--source include/not_embedded.inc + +--echo # +--echo # MDEV-34704: Quick mode produces the bug for mariadb client +--echo # + + +create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1)); +insert into t1 values ("X", "X", "X", "X"); + + +--echo # --table --quick +--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1 + + +--echo # --table --quick --quick-max-column-width=0 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0 2>&1 + + +--echo # --table --quick --quick-max-column-width=10 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10 2>&1 + + +--echo # --table --quick --quick-max-column-width=20 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20 2>&1 + +insert into t1 values ("01234", "0123456789", "01234567890123456789", "1"); + +--echo # --table --quick +--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1 + + +--echo # --table --quick --quick-max-column-width=0 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0 2>&1 + +--echo # --table --quick --quick-max-column-width=10 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10 2>&1 + +--echo # --table --quick --quick-max-column-width=20 +--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20 2>&1 + +drop table t1; + +--echo # +--echo # End of 10.7 tests +--echo # diff --git a/mysql-test/main/item_types.result b/mysql-test/main/item_types.result index 0193d33be6d..2b7e9bb6853 100644 --- a/mysql-test/main/item_types.result +++ b/mysql-test/main/item_types.result @@ -42,5 +42,24 @@ SELECT * FROM v WHERE f = '10.5.20'; f drop view v; # +# MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone +# (Item_func_not_all) +# +CREATE VIEW t AS SELECT 0 AS a; +SELECT * FROM t WHERE a=ALL (SELECT 0); +a +0 +DROP VIEW t; +# +# MDEV-34833: Assertion failure in Item_float::do_build_clone +# (Item_static_float_func) +# +CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi(); +CREATE TABLE t1 AS SELECT 1; +SELECT * FROM v1 JOIN t1 ON f=f2; +f f2 1 +DROP VIEW v1; +DROP TABLE t1; +# # End of 10.5 tests # diff --git a/mysql-test/main/item_types.test b/mysql-test/main/item_types.test index 2818ae582af..c8a915bf779 100644 --- a/mysql-test/main/item_types.test +++ b/mysql-test/main/item_types.test @@ -46,6 +46,27 @@ CREATE VIEW v AS SELECT version() AS f; SELECT * FROM v WHERE f = '10.5.20'; drop view v; +--echo # +--echo # MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone +--echo # (Item_func_not_all) +--echo # + +CREATE VIEW t AS SELECT 0 AS a; +SELECT * FROM t WHERE a=ALL (SELECT 0); +DROP VIEW t; + + +--echo # +--echo # MDEV-34833: Assertion failure in Item_float::do_build_clone +--echo # (Item_static_float_func) +--echo # + +CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi(); +CREATE TABLE t1 AS SELECT 1; +SELECT * FROM v1 JOIN t1 ON f=f2; +DROP VIEW v1; +DROP TABLE t1; + --echo # --echo # End of 10.5 tests --echo # diff --git a/mysql-test/suite/innodb/r/alter_copy_bulk,ON.rdiff b/mysql-test/suite/innodb/r/alter_copy_bulk,ON.rdiff new file mode 100644 index 00000000000..091b1df219e --- /dev/null +++ b/mysql-test/suite/innodb/r/alter_copy_bulk,ON.rdiff @@ -0,0 +1,11 @@ +--- alter_copy_bulk.result ++++ alter_copy_bulk.result +@@ -4,7 +4,7 @@ + INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; + ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); + ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); +-ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY' ++ERROR 23000: Duplicate entry 'bb' for key 'PRIMARY' + INSERT INTO t1 VALUES(repeat('a', 200), 1); + ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2); + ERROR 23000: Duplicate entry '1' for key 'f2_2' diff --git a/mysql-test/suite/innodb/r/alter_copy_bulk.result b/mysql-test/suite/innodb/r/alter_copy_bulk.result index e34018bff9a..5ffde8ed63c 100644 --- a/mysql-test/suite/innodb/r/alter_copy_bulk.result +++ b/mysql-test/suite/innodb/r/alter_copy_bulk.result @@ -1,26 +1,50 @@ -SET @@alter_algorithm=COPY; -Warnings: -Warning 4200 The variable '@@alter_algorithm' is ignored. It only exists for compatibility with old installations and will be removed in a future release CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB; INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2; -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; -ALTER TABLE t1 ADD INDEX(f2); -ALTER TABLE t1 ADD PRIMARY KEY(f1(2)); -ERROR 23000: Duplicate entry 'aaaaaaaa' for key 'PRIMARY' +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); +ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY' INSERT INTO t1 VALUES(repeat('a', 200), 1); -ALTER TABLE t1 ADD UNIQUE KEY(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2); ERROR 23000: Duplicate entry '1' for key 'f2_2' ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL; CREATE TABLE t2(f1 INT NOT NULL, FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB; INSERT INTO t2 VALUES(1); -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; DROP TABLE t2, t1; CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2; -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536; -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); DROP TABLE t1; +# +# MDEV-34756 Validation of new foreign key skipped +# if innodb_alter_copy_bulk=ON +# +CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY, +f2 INT NOT NULL)ENGINE=InnoDB; +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY, +f2 INT NOT NULL)ENGINE=InnoDB; +ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1); +affected rows: 0 +info: Records: 0 Duplicates: 0 Warnings: 0 +INSERT INTO t1 VALUES (1, 1); +INSERT INTO t2 VALUES (1, 2); +ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`#sql-alter`, CONSTRAINT `#sql-alter_ibfk_2` FOREIGN KEY (`f2`) REFERENCES `t1` (`f1`)) +INSERT INTO t1 VALUES(3, 1); +SET STATEMENT foreign_key_checks=0 FOR +ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +affected rows: 1 +info: Records: 1 Duplicates: 0 Warnings: 0 +ALTER TABLE t1 ALGORITHM=COPY, FORCE; +affected rows: 2 +info: Records: 2 Duplicates: 0 Warnings: 0 +ALTER TABLE t2 ALGORITHM=COPY, FORCE; +affected rows: 1 +info: Records: 1 Duplicates: 0 Warnings: 0 +DROP TABLE t2, t1; diff --git a/mysql-test/suite/innodb/r/log_file_size_online.result b/mysql-test/suite/innodb/r/log_file_size_online.result index 1db2fdde576..e4c4e899995 100644 --- a/mysql-test/suite/innodb/r/log_file_size_online.result +++ b/mysql-test/suite/innodb/r/log_file_size_online.result @@ -19,8 +19,10 @@ SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 4194304 FOUND 1 /InnoDB: Resized log to 4\.000MiB/ in mysqld.1.err -UPDATE t SET b='' WHERE a<10; SET GLOBAL innodb_log_file_size=5242880; +connect con1,localhost,root; +UPDATE t SET b='' WHERE a<10; +connection default; SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 @@ -28,6 +30,9 @@ SELECT global_value FROM information_schema.system_variables WHERE variable_name = 'innodb_log_file_size'; global_value 5242880 +connection con1; +disconnect con1; +connection default; # restart SELECT * FROM t WHERE a<10; a b @@ -40,6 +45,10 @@ a b 7 8 9 +SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; +COUNT(*) LENGTH(b) +9 0 +19991 255 SHOW VARIABLES LIKE 'innodb_log_file_size'; Variable_name Value innodb_log_file_size 5242880 diff --git a/mysql-test/suite/innodb/t/alter_copy_bulk.test b/mysql-test/suite/innodb/t/alter_copy_bulk.test index 797724dcf2e..bc78661197c 100644 --- a/mysql-test/suite/innodb/t/alter_copy_bulk.test +++ b/mysql-test/suite/innodb/t/alter_copy_bulk.test @@ -1,26 +1,25 @@ --source include/have_innodb.inc --source include/have_partition.inc --source include/have_sequence.inc -SET @@alter_algorithm=COPY; CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB; INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2; # Buffer fits in the memory -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; # Insert more entries INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536; # Alter should use temporary file for sorting -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); # Error while buffering the insert operation --error ER_DUP_ENTRY -ALTER TABLE t1 ADD PRIMARY KEY(f1(2)); +ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2)); INSERT INTO t1 VALUES(repeat('a', 200), 1); # Error while applying the bulk insert operation --error ER_DUP_ENTRY -ALTER TABLE t1 ADD UNIQUE KEY(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2); # Ignore shouldn't go through bulk operation ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL; @@ -29,16 +28,41 @@ CREATE TABLE t2(f1 INT NOT NULL, FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB; INSERT INTO t2 VALUES(1); # Bulk operation shouldn't happen because of foreign key constraints -ALTER TABLE t2 FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; DROP TABLE t2, t1; CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2; INSERT INTO t1 VALUES(1, 1); INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2; # Buffer fits in the memory -ALTER TABLE t1 FORCE; +ALTER TABLE t1 ALGORITHM=COPY, FORCE; # Insert more entries INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536; # Alter should use temporary file for sorting -ALTER TABLE t1 ADD INDEX(f2); +ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2); DROP TABLE t1; + +--echo # +--echo # MDEV-34756 Validation of new foreign key skipped +--echo # if innodb_alter_copy_bulk=ON +--echo # +CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY, + f2 INT NOT NULL)ENGINE=InnoDB; +CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY, + f2 INT NOT NULL)ENGINE=InnoDB; +--enable_info +ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1); +--disable_info +INSERT INTO t1 VALUES (1, 1); +INSERT INTO t2 VALUES (1, 2); +--replace_regex /#sql-alter-[0-9a-f-]*/#sql-alter/ +--error ER_NO_REFERENCED_ROW_2 +ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +INSERT INTO t1 VALUES(3, 1); +--enable_info +SET STATEMENT foreign_key_checks=0 FOR +ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1); +ALTER TABLE t1 ALGORITHM=COPY, FORCE; +ALTER TABLE t2 ALGORITHM=COPY, FORCE; +--disable_info +DROP TABLE t2, t1; diff --git a/mysql-test/suite/innodb/t/log_file_size_online.test b/mysql-test/suite/innodb/t/log_file_size_online.test index 65551f13dbc..3b56144ca43 100644 --- a/mysql-test/suite/innodb/t/log_file_size_online.test +++ b/mysql-test/suite/innodb/t/log_file_size_online.test @@ -25,17 +25,28 @@ SHOW VARIABLES LIKE 'innodb_log_file_size'; let SEARCH_PATTERN = InnoDB: Resized log to 4\\.000MiB; --source include/search_pattern_in_file.inc -UPDATE t SET b='' WHERE a<10; +send SET GLOBAL innodb_log_file_size=5242880; -SET GLOBAL innodb_log_file_size=5242880; +--connect con1,localhost,root +send UPDATE t SET b='' WHERE a<10; + +--connection default +reap; SHOW VARIABLES LIKE 'innodb_log_file_size'; SELECT global_value FROM information_schema.system_variables WHERE variable_name = 'innodb_log_file_size'; +--connection con1 +reap; +--disconnect con1 +--connection default + +--let $shutdown_timeout=0 --let $restart_parameters= --source include/restart_mysqld.inc SELECT * FROM t WHERE a<10; +SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b; SHOW VARIABLES LIKE 'innodb_log_file_size'; let SEARCH_PATTERN = InnoDB: Resized log to 5\\.000MiB; diff --git a/mysql-test/suite/mariabackup/slave_provision_nolock.test b/mysql-test/suite/mariabackup/slave_provision_nolock.test index 874a32d6250..1746814d212 100644 --- a/mysql-test/suite/mariabackup/slave_provision_nolock.test +++ b/mysql-test/suite/mariabackup/slave_provision_nolock.test @@ -1,5 +1,7 @@ --source include/have_innodb.inc --source include/have_log_bin.inc +# Test does a lot of queries that take a lot of CPU under Valgrind. +--source include/not_valgrind.inc call mtr.add_suppression("Can't init tc log"); call mtr.add_suppression("Aborting"); diff --git a/mysql-test/suite/rpl/r/rpl_old_master.result b/mysql-test/suite/rpl/r/rpl_old_master.result index 5e9d8a88a20..11da61e09d0 100644 --- a/mysql-test/suite/rpl/r/rpl_old_master.result +++ b/mysql-test/suite/rpl/r/rpl_old_master.result @@ -9,10 +9,7 @@ connection slave; SET @old_parallel= @@GLOBAL.slave_parallel_threads; SET GLOBAL slave_parallel_threads=10; CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no; -FLUSH TABLES WITH READ LOCK; include/start_slave.inc -include/wait_for_slave_param.inc [Seconds_Behind_Master] -UNLOCK TABLES; connection master; CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; INSERT INTO t2 VALUES (1); diff --git a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result index dbea479bc12..3a113205193 100644 --- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result +++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result @@ -1,6 +1,7 @@ include/master-slave.inc [connection master] connection master; +call mtr.add_suppression("Got an error writing communication packets"); call mtr.add_suppression("Got an error reading communication packets"); call mtr.add_suppression("Could not read packet:.* vio_errno: 1158"); call mtr.add_suppression("Could not write packet:.* vio_errno: 1160"); diff --git a/mysql-test/suite/rpl/t/rpl_create_drop_event.test b/mysql-test/suite/rpl/t/rpl_create_drop_event.test index 96a7e82d6f7..79bb0ffec90 100644 --- a/mysql-test/suite/rpl/t/rpl_create_drop_event.test +++ b/mysql-test/suite/rpl/t/rpl_create_drop_event.test @@ -14,6 +14,12 @@ SET GLOBAL event_scheduler=on; let $wait_condition= SELECT count(*)>0 FROM t1; --source include/wait_condition.inc SET GLOBAL event_scheduler=off; +# If the time rolls to the next whole second just at this point, a new event +# run may be scheduled. Wait for this to disappear, otherwise we see occasional +# test failures if the table gets dropped before the extra event run completes. +# Expect 5 connections: default, master, master1, server_1, binlog dump thread +--let $wait_condition= SELECT COUNT(*) = 5 FROM INFORMATION_SCHEMA.PROCESSLIST; +--source include/wait_condition.inc SELECT DISTINCT a FROM t1; DELETE FROM t1; diff --git a/mysql-test/suite/rpl/t/rpl_mdev6020.test b/mysql-test/suite/rpl/t/rpl_mdev6020.test index 2d022abdd8b..f967f85df17 100644 --- a/mysql-test/suite/rpl/t/rpl_mdev6020.test +++ b/mysql-test/suite/rpl/t/rpl_mdev6020.test @@ -1,3 +1,5 @@ +# Test applies a large binlog, takes long under Valgrind with little benefit. +--source include/not_valgrind.inc --source include/have_innodb.inc --source include/have_partition.inc --source include/have_binlog_format_mixed_or_row.inc diff --git a/mysql-test/suite/rpl/t/rpl_old_master.test b/mysql-test/suite/rpl/t/rpl_old_master.test index 6faa8212d66..993ef977542 100644 --- a/mysql-test/suite/rpl/t/rpl_old_master.test +++ b/mysql-test/suite/rpl/t/rpl_old_master.test @@ -28,14 +28,7 @@ SET GLOBAL slave_parallel_threads=10; --replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1 eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no; -# Block execution yet when the blocked query timestamp has been already accounted -FLUSH TABLES WITH READ LOCK; --source include/start_slave.inc ---let $slave_param = Seconds_Behind_Master ---let $slave_param_value = 1 ---let $slave_param_comparison= >= ---source include/wait_for_slave_param.inc -UNLOCK TABLES; --connection master CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB; diff --git a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test index 0567d8a5700..ba048cd5be9 100644 --- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test +++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test @@ -25,6 +25,7 @@ --source include/master-slave.inc --connection master +call mtr.add_suppression("Got an error writing communication packets"); call mtr.add_suppression("Got an error reading communication packets"); call mtr.add_suppression("Could not read packet:.* vio_errno: 1158"); call mtr.add_suppression("Could not write packet:.* vio_errno: 1160"); diff --git a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test index 23b25b1bf85..ce7d51ca43d 100644 --- a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test +++ b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test @@ -19,7 +19,17 @@ --source include/master-slave.inc connection slave; ---let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'` +--let $i= 100 +while ($i > 0) { + dec $i; + --let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'` + if ($connection_id) { + let $i= 0; + } + if ($i > 0) { + --sleep 0.1 + } +} if(!$connection_id) { diff --git a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result index 442d44e7fb2..f5b01aa8016 100644 --- a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result +++ b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result @@ -1,19 +1,19 @@ SET @global_start_value = @@global.innodb_purge_batch_size; SELECT @global_start_value; @global_start_value -1000 +127 '#--------------------FN_DYNVARS_046_01------------------------#' SET @@global.innodb_purge_batch_size = 1; SET @@global.innodb_purge_batch_size = DEFAULT; SELECT @@global.innodb_purge_batch_size; @@global.innodb_purge_batch_size -1000 +127 '#---------------------FN_DYNVARS_046_02-------------------------#' SET innodb_purge_batch_size = 1; ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL SELECT @@innodb_purge_batch_size; @@innodb_purge_batch_size -1000 +127 SELECT local.innodb_purge_batch_size; ERROR 42S02: Unknown table 'local' in field list SET global innodb_purge_batch_size = 1; @@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size; SET @@global.innodb_purge_batch_size = @global_start_value; SELECT @@global.innodb_purge_batch_size; @@global.innodb_purge_batch_size -1000 +127 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff index 87958c58234..fe78cc6e01b 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff @@ -221,7 +221,7 @@ VARIABLE_SCOPE GLOBAL -VARIABLE_TYPE BIGINT UNSIGNED +VARIABLE_TYPE INT UNSIGNED - VARIABLE_COMMENT How many pages to flush on LRU eviction + VARIABLE_COMMENT Deprecated parameter with no effect NUMERIC_MIN_VALUE 1 -NUMERIC_MAX_VALUE 18446744073709551615 +NUMERIC_MAX_VALUE 4294967295 diff --git a/mysql-test/suite/sys_vars/r/sysvars_innodb.result b/mysql-test/suite/sys_vars/r/sysvars_innodb.result index 4673b385c24..129fc162c23 100644 --- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result +++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result @@ -984,13 +984,13 @@ SESSION_VALUE NULL DEFAULT_VALUE 32 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED -VARIABLE_COMMENT How many pages to flush on LRU eviction +VARIABLE_COMMENT Unused NUMERIC_MIN_VALUE 1 NUMERIC_MAX_VALUE 18446744073709551615 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -COMMAND_LINE_ARGUMENT REQUIRED +COMMAND_LINE_ARGUMENT NULL VARIABLE_NAME INNODB_LRU_SCAN_DEPTH SESSION_VALUE NULL DEFAULT_VALUE 1536 @@ -1233,7 +1233,7 @@ READ_ONLY NO COMMAND_LINE_ARGUMENT OPTIONAL VARIABLE_NAME INNODB_PURGE_BATCH_SIZE SESSION_VALUE NULL -DEFAULT_VALUE 1000 +DEFAULT_VALUE 127 VARIABLE_SCOPE GLOBAL VARIABLE_TYPE BIGINT UNSIGNED VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list @@ -1254,7 +1254,7 @@ NUMERIC_MAX_VALUE 128 NUMERIC_BLOCK_SIZE 0 ENUM_VALUE_LIST NULL READ_ONLY NO -COMMAND_LINE_ARGUMENT OPTIONAL +COMMAND_LINE_ARGUMENT NULL VARIABLE_NAME INNODB_PURGE_THREADS SESSION_VALUE NULL DEFAULT_VALUE 4 diff --git a/mysys/crc32/crc32c_x86.cc b/mysys/crc32/crc32c_x86.cc index 3ddddf1303c..fb5dc19f7a5 100644 --- a/mysys/crc32/crc32c_x86.cc +++ b/mysys/crc32/crc32c_x86.cc @@ -39,7 +39,7 @@ extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size); constexpr uint32_t cpuid_ecx_SSE42= 1U << 20; constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1; -constexpr uint32_t cpuid_ecx_XSAVE= 1U << 26; +constexpr uint32_t cpuid_ecx_AVX_AND_XSAVE= 1U << 28 | 1U << 27; static uint32_t cpuid_ecx() { @@ -395,7 +395,7 @@ static bool os_have_avx512() static ATTRIBUTE_NOINLINE bool have_vpclmulqdq(uint32_t cpuid_ecx) { - if (!(cpuid_ecx & cpuid_ecx_XSAVE) || !os_have_avx512()) + if ((~cpuid_ecx & cpuid_ecx_AVX_AND_XSAVE) || !os_have_avx512()) return false; # ifdef _MSC_VER int regs[4]; diff --git a/sql/item.h b/sql/item.h index 4de5686fb02..716e2a4e583 100644 --- a/sql/item.h +++ b/sql/item.h @@ -4769,6 +4769,8 @@ public: { return const_charset_converter(thd, tocs, true, func_name); } + Item *do_get_copy(THD *thd) const override + { return get_item_copy(thd, this); } }; @@ -4932,7 +4934,6 @@ public: } Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; @@ -4947,7 +4948,6 @@ public: { } Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; @@ -4964,7 +4964,6 @@ public: { } Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; @@ -5003,7 +5002,6 @@ public: } Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; @@ -5023,7 +5021,6 @@ public: } Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; @@ -5187,7 +5184,6 @@ public: void print(String *str, enum_query_type query_type) override; Item *do_get_copy(THD *thd) const override { return get_item_copy(thd, this); } - Item *do_build_clone(THD *thd) const override { return get_copy(thd); } }; diff --git a/sql/item_cmpfunc.h b/sql/item_cmpfunc.h index 4ca70931fb3..edc5e8e0cf0 100644 --- a/sql/item_cmpfunc.h +++ b/sql/item_cmpfunc.h @@ -752,6 +752,8 @@ public: void set_sub_test(Item_maxmin_subselect *item) { test_sub_item= item; test_sum_item= 0;}; bool empty_underlying_subquery(); Item *neg_transformer(THD *thd) override; + Item *do_get_copy(THD *thd) const override + { return get_item_copy(thd, this); } }; diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e938e8f6cfa..e9fda7233eb 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -5429,7 +5429,9 @@ static int init_server_components() MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"), MARIADB_REMOVED_OPTION("innodb-log-files-in-group"), MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"), + MARIADB_REMOVED_OPTION("innodb-lru-flush-size"), MARIADB_REMOVED_OPTION("innodb-page-cleaners"), + MARIADB_REMOVED_OPTION("innodb-purge-truncate-frequency"), MARIADB_REMOVED_OPTION("innodb-replication-delay"), MARIADB_REMOVED_OPTION("innodb-scrub-log"), MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"), diff --git a/sql/net_serv.cc b/sql/net_serv.cc index ed1cf61268d..7b16f3b90fb 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -713,7 +713,6 @@ net_real_write(NET *net,const uchar *packet, size_t len) { sql_print_warning("Could not write packet: fd: %lld state: %d " "errno: %d vio_errno: %d length: %ld", - MYF(ME_ERROR_LOG | ME_WARNING), (longlong) vio_fd(net->vio), (int) net->vio->state, vio_errno(net->vio), net->last_errno, (ulong) (end-pos)); diff --git a/sql/rpl_parallel.cc b/sql/rpl_parallel.cc index 29651a5bc9d..4add170544a 100644 --- a/sql/rpl_parallel.cc +++ b/sql/rpl_parallel.cc @@ -1495,11 +1495,23 @@ handle_rpl_parallel_thread(void *arg) after mark_start_commit(), we have to unmark, which has at least a theoretical possibility of leaving a window where it looks like all transactions in a GCO have started committing, while in fact one - will need to rollback and retry. This is not supposed to be possible - (since there is a deadlock, at least one transaction should be - blocked from reaching commit), but this seems a fragile ensurance, - and there were historically a number of subtle bugs in this area. + will need to rollback and retry. + + Normally this will not happen, since the kill is there to resolve a + deadlock that is preventing at least one transaction from proceeding. + One case it can happen is with InnoDB dict stats update, which can + temporarily cause transactions to block each other, but locks are + released immediately, they don't linger until commit. There could be + other similar cases, there were historically a number of subtle bugs + in this area. + + But once we start the commit, we can expect that no new lock + conflicts will be introduced. So by handling any lingering deadlock + kill at this point just before mark_start_commit(), we should be + robust even towards spurious deadlock kills. */ + if (rgi->killed_for_retry != rpl_group_info::RETRY_KILL_NONE) + wait_for_pending_deadlock_kill(thd, rgi); if (!thd->killed) { DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit"); diff --git a/sql/rpl_rli.cc b/sql/rpl_rli.cc index 499223f2c35..a8c00b67cac 100644 --- a/sql/rpl_rli.cc +++ b/sql/rpl_rli.cc @@ -2539,6 +2539,23 @@ rpl_group_info::unmark_start_commit() e= this->parallel_entry; mysql_mutex_lock(&e->LOCK_parallel_entry); + /* + Assert that we have not already wrongly completed this GCO and signalled + the next one to start, only to now unmark and make the signal invalid. + This is to catch problems like MDEV-34696. + + The error inject rpl_parallel_simulate_temp_err_xid is used to test this + precise situation, that we handle it gracefully if it somehow occurs in a + release build. So disable the assert in this case. + */ +#ifndef DBUG_OFF + bool allow_unmark_after_complete= false; + DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_xid", + allow_unmark_after_complete= true;); + DBUG_ASSERT(!gco->next_gco || + gco->next_gco->wait_count > e->count_committing_event_groups || + allow_unmark_after_complete); +#endif --e->count_committing_event_groups; mysql_mutex_unlock(&e->LOCK_parallel_entry); } diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 9f387df3869..49f0aa587cc 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -32197,7 +32197,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table, else { const KEY *ref_keyinfo= table->key_info + ref_key; - refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1]; + refkey_rows_estimate= + (ha_rows)ref_keyinfo->actual_rec_per_key(tab->ref.key_parts - 1); } set_if_bigger(refkey_rows_estimate, 1); } diff --git a/sql/sql_statistics.h b/sql/sql_statistics.h index bd997588153..ad3bb9ae7b7 100644 --- a/sql/sql_statistics.h +++ b/sql/sql_statistics.h @@ -610,7 +610,7 @@ public: bool avg_frequency_is_inited() { return avg_frequency != NULL; } - double get_avg_frequency(uint i) + double get_avg_frequency(uint i) const { return (double) avg_frequency[i] / Scale_factor_avg_frequency; } diff --git a/sql/structs.h b/sql/structs.h index a2f64f296df..8b71e4cc311 100644 --- a/sql/structs.h +++ b/sql/structs.h @@ -173,7 +173,7 @@ typedef struct st_key { engine_option_value *option_list; ha_index_option_struct *option_struct; /* structure with parsed options */ - double actual_rec_per_key(uint i); + double actual_rec_per_key(uint i) const; } KEY; diff --git a/sql/table.cc b/sql/table.cc index 23810d9a45d..a50403da941 100644 --- a/sql/table.cc +++ b/sql/table.cc @@ -10322,7 +10322,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd) } -double KEY::actual_rec_per_key(uint i) +double KEY::actual_rec_per_key(uint i) const { if (rec_per_key == 0) return 0; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index bdedbd7bf40..d2ce1ac68ed 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -2573,6 +2573,51 @@ err_exit: return(FALSE); } +buf_block_t* buf_pool_t::page_fix(const page_id_t id) +{ + ha_handler_stats *const stats= mariadb_stats; + buf_inc_get(stats); + auto& chain= page_hash.cell_get(id.fold()); + page_hash_latch &hash_lock= page_hash.lock_get(chain); + for (;;) + { + hash_lock.lock_shared(); + buf_page_t *b= page_hash.get(id, chain); + if (b) + { + uint32_t state= b->fix(); + hash_lock.unlock_shared(); + ut_ad(!b->in_zip_hash); + ut_ad(b->frame); + ut_ad(state >= buf_page_t::FREED); + if (state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX) + { + b->lock.s_lock(); + state= b->state(); + ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX); + b->lock.s_unlock(); + } + if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED)) + { + /* The page was marked as freed or corrupted. */ + b->unfix(); + b= nullptr; + } + return reinterpret_cast(b); + } + + hash_lock.unlock_shared(); + switch (buf_read_page(id, 0, chain)) { + default: + return nullptr; + case DB_SUCCESS: + case DB_SUCCESS_LOCKED_REC: + mariadb_increment_pages_read(stats); + buf_read_ahead_random(id, 0); + } + } +} + /** Low level function used to get access to a database page. @param[in] page_id page id @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index 5032b11d1d0..082e485b26f 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -39,9 +39,6 @@ Created 11/5/1995 Heikki Tuuri #include "srv0mon.h" #include "my_cpu.h" -/** Flush this many pages in buf_LRU_get_free_block() */ -size_t innodb_lru_flush_size; - /** The number of blocks from the LRU_old pointer onward, including the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the whole LRU list length, except that the tolerance defined below @@ -369,17 +366,13 @@ block to read in a page. Note that we only ever get a block from the free list. Even when we flush a page or find a page in LRU scan we put it to free list to be used. * iteration 0: - * get a block from the buf_pool.free list, success:done + * get a block from the buf_pool.free list * if buf_pool.try_LRU_scan is set * scan LRU up to 100 pages to free a clean block * success:retry the free list - * flush up to innodb_lru_flush_size LRU blocks to data files - (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth) - * on buf_page_write_complete() the blocks will put on buf_pool.free list - * success: retry the free list + * invoke buf_pool.page_cleaner_wakeup(true) and wait its completion * subsequent iterations: same as iteration 0 except: - * scan whole LRU list - * scan LRU list even if buf_pool.try_LRU_scan is not set + * scan the entire LRU list @param get how to allocate the block @return the free control block, in state BUF_BLOCK_MEMORY diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index f5b1c833664..78f09b78c4f 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18501,6 +18501,7 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*, ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_CANT_CREATE_HANDLER_FILE); break; case log_t::RESIZE_STARTED: + const lsn_t start{log_sys.resize_in_progress()}; for (timespec abstime;;) { if (thd_kill_level(thd)) @@ -18511,13 +18512,30 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*, set_timespec(abstime, 5); mysql_mutex_lock(&buf_pool.flush_list_mutex); - const bool in_progress(buf_pool.get_oldest_modification(LSN_MAX) < - log_sys.resize_in_progress()); - if (in_progress) + lsn_t resizing= log_sys.resize_in_progress(); + if (resizing > buf_pool.get_oldest_modification(0)) + { + buf_pool.page_cleaner_wakeup(true); my_cond_timedwait(&buf_pool.done_flush_list, &buf_pool.flush_list_mutex.m_mutex, &abstime); + resizing= log_sys.resize_in_progress(); + } mysql_mutex_unlock(&buf_pool.flush_list_mutex); - if (!log_sys.resize_in_progress()) + if (start > log_sys.get_lsn()) + { + ut_ad(!log_sys.is_pmem()); + /* The server is almost idle. Write dummy FILE_CHECKPOINT records + to ensure that the log resizing will complete. */ + log_sys.latch.wr_lock(SRW_LOCK_CALL); + while (start > log_sys.get_lsn()) + { + mtr_t mtr; + mtr.start(); + mtr.commit_files(log_sys.last_checkpoint_lsn); + } + log_sys.latch.wr_unlock(); + } + if (!resizing || resizing > start /* only wait for our resize */) break; } } @@ -18904,7 +18922,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, PLUGIN_VAR_OPCMDARG, "Number of UNDO log pages to purge in one batch from the history list", NULL, NULL, - 1000, /* Default setting */ + 127, /* Default setting */ 1, /* Minimum value */ innodb_purge_batch_size_MAX, 0); @@ -19167,11 +19185,6 @@ static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth, "How deep to scan LRU to keep it clean", NULL, NULL, 1536, 100, ~0UL, 0); -static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size, - PLUGIN_VAR_RQCMDARG, - "How many pages to flush on LRU eviction", - NULL, NULL, 32, 1, SIZE_T_MAX, 0); - static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors, PLUGIN_VAR_OPCMDARG, "Set to 0 (don't flush neighbors from buffer pool)," @@ -19435,14 +19448,21 @@ static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size, 10 << 20, 10 << 20, 1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0); -static ulong innodb_purge_rseg_truncate_frequency; +static ulong innodb_purge_rseg_truncate_frequency= 128; static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency, innodb_purge_rseg_truncate_frequency, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT, "Unused", NULL, NULL, 128, 1, 128, 0); +static size_t innodb_lru_flush_size; + +static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT, + "Unused", + NULL, NULL, 32, 1, SIZE_T_MAX, 0); + static void innodb_undo_log_truncate_update(THD *thd, struct st_mysql_sys_var*, void*, const void *save) { diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index dc64054eb3e..c8c8269fe60 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -74,14 +74,10 @@ page_zip_des_t* btr_cur_get_page_zip( /*=================*/ btr_cur_t* cursor);/*!< in: tree cursor */ -/*********************************************************//** -Returns the page of a tree cursor. +/** Returns the page of a tree cursor. @return pointer to page */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - btr_cur_t* cursor);/*!< in: tree cursor */ +#define btr_cur_get_page(cursor) (cursor)->block()->page.frame + /*********************************************************//** Returns the index of a cursor. @param cursor b-tree cursor diff --git a/storage/innobase/include/btr0cur.inl b/storage/innobase/include/btr0cur.inl index 955cf34288e..5981b1465c9 100644 --- a/storage/innobase/include/btr0cur.inl +++ b/storage/innobase/include/btr0cur.inl @@ -48,18 +48,6 @@ btr_cur_get_page_zip( return(buf_block_get_page_zip(btr_cur_get_block(cursor))); } -/*********************************************************//** -Returns the page of a tree cursor. -@return pointer to page */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - btr_cur_t* cursor) /*!< in: tree cursor */ -{ - return(page_align(page_cur_get_rec(&(cursor->page_cur)))); -} - /*********************************************************//** Positions a tree cursor at a given record. */ UNIV_INLINE diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index e43b86508ed..f2736b9b218 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1358,6 +1358,12 @@ public: } public: + /** Look up and buffer-fix a page. + @param id page identifier + @return undo log page, buffer-fixed + @retval nullptr if the undo page was corrupted or freed */ + buf_block_t *page_fix(const page_id_t id); + /** @return whether the buffer pool contains a page @param page_id page identifier @param chain hash table chain for page_id.fold() */ diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index c52fc05ce8f..a62930b65af 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -33,9 +33,6 @@ Created 11/5/1995 Heikki Tuuri struct trx_t; struct fil_space_t; -/** Flush this many pages in buf_LRU_get_free_block() */ -extern size_t innodb_lru_flush_size; - /*####################################################################### These are low-level functions #########################################################################*/ @@ -82,17 +79,13 @@ block to read in a page. Note that we only ever get a block from the free list. Even when we flush a page or find a page in LRU scan we put it to free list to be used. * iteration 0: - * get a block from the buf_pool.free list, success:done + * get a block from the buf_pool.free list * if buf_pool.try_LRU_scan is set * scan LRU up to 100 pages to free a clean block * success:retry the free list - * flush up to innodb_lru_flush_size LRU blocks to data files - (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth) - * on buf_page_write_complete() the blocks will put on buf_pool.free list - * success: retry the free list + * invoke buf_pool.page_cleaner_wakeup(true) and wait its completion * subsequent iterations: same as iteration 0 except: - * scan whole LRU list - * scan LRU list even if buf_pool.try_LRU_scan is not set + * scan the entire LRU list @param get how to allocate the block @return the free control block, in state BUF_BLOCK_MEMORY diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index dbd31670b14..21723c2610c 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -224,7 +224,7 @@ public: /** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */ alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch; - /** number of std::swap(buf, flush_buf) and writes from buf to log; + /** number of writes from buf or flush_buf to log; protected by latch.wr_lock() */ ulint write_to_log; @@ -232,8 +232,9 @@ public: lsn_t write_lsn; /** buffer for writing data to ib_logfile0, or nullptr if is_pmem() - In write_buf(), buf and flush_buf are swapped */ + In write_buf(), buf and flush_buf may be swapped */ byte *flush_buf; + /** set when there may be need to initiate a log checkpoint. This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */ std::atomic need_checkpoint; @@ -372,9 +373,10 @@ public: private: /** Write resize_buf to resize_log. - @param length the used length of resize_buf */ + @param b resize_buf or resize_flush_buf + @param length the used length of b */ ATTRIBUTE_COLD ATTRIBUTE_NOINLINE - void resize_write_buf(size_t length) noexcept; + void resize_write_buf(const byte *b, size_t length) noexcept; public: /** Rename a log file after resizing. @@ -506,13 +508,7 @@ public: @param d destination @param s string of bytes @param size length of str, in bytes */ - void append(byte *&d, const void *s, size_t size) noexcept - { - ut_ad(latch_have_any()); - ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size)); - memcpy(d, s, size); - d+= size; - } + static inline void append(byte *&d, const void *s, size_t size) noexcept; /** Set the log file format. */ void set_latest_format(bool encrypted) noexcept diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index 279138acd79..5365f627720 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -31,14 +31,6 @@ Created 10/4/1994 Heikki Tuuri #ifdef UNIV_DEBUG /*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - page_cur_t* cur); /*!< in: page cursor */ -/*********************************************************//** Gets pointer to the buffer block where the cursor is positioned. @return page */ UNIV_INLINE @@ -60,12 +52,12 @@ page_cur_get_page_zip( UNIV_INLINE rec_t *page_cur_get_rec(const page_cur_t *cur); #else /* UNIV_DEBUG */ -# define page_cur_get_page(cur) page_align((cur)->rec) # define page_cur_get_block(cur) (cur)->block # define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block) # define page_cur_get_rec(cur) (cur)->rec #endif /* UNIV_DEBUG */ -# define is_page_cur_get_page_zip(cur) is_buf_block_get_page_zip((cur)->block) +#define page_cur_get_page(cur) page_cur_get_block(cur)->page.frame +#define is_page_cur_get_page_zip(cur) is_buf_block_get_page_zip((cur)->block) /*********************************************************//** Sets the cursor object to point before the first user record on the page. */ diff --git a/storage/innobase/include/page0cur.inl b/storage/innobase/include/page0cur.inl index a73c31a7bff..a3225b3b94b 100644 --- a/storage/innobase/include/page0cur.inl +++ b/storage/innobase/include/page0cur.inl @@ -25,18 +25,6 @@ Created 10/4/1994 Heikki Tuuri *************************************************************************/ #ifdef UNIV_DEBUG -/*********************************************************//** -Gets pointer to the page frame where the cursor is positioned. -@return page */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - page_cur_t* cur) /*!< in: page cursor */ -{ - return page_align(page_cur_get_rec(cur)); -} - /*********************************************************//** Gets pointer to the buffer block where the cursor is positioned. @return page */ diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h index c60f390a092..6485e21e7fc 100644 --- a/storage/innobase/include/que0que.h +++ b/storage/innobase/include/que0que.h @@ -209,17 +209,6 @@ que_eval_sql( const char* sql, /*!< in: SQL string */ trx_t* trx); /*!< in: trx */ -/**********************************************************************//** -Round robin scheduler. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -que_thr_t* -que_fork_scheduler_round_robin( -/*===========================*/ - que_fork_t* fork, /*!< in: a query fork */ - que_thr_t* thr); /*!< in: current pos */ - /** Query thread states */ enum que_thr_state_t { /** in selects this means that the thread is at the end of its diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h index 60f310e1b0f..2ddffa41af1 100644 --- a/storage/innobase/include/row0vers.h +++ b/storage/innobase/include/row0vers.h @@ -54,32 +54,47 @@ row_vers_impl_x_locked( dict_index_t* index, const rec_offs* offsets); -/** Finds out if a version of the record, where the version >= the current -purge_sys.view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry == ientry; exactly in -this case we return TRUE. -@param[in] also_curr TRUE if also rec is included in the versions - to search; otherwise only versions prior - to it are searched -@param[in] rec record in the clustered index; the caller - must have a latch on the page -@param[in] mtr mtr holding the latch on rec; it will - also hold the latch on purge_view -@param[in] index secondary index -@param[in] ientry secondary index entry -@param[in] roll_ptr roll_ptr for the purge record -@param[in] trx_id transaction ID on the purging record -@return TRUE if earlier version should have */ +/** Find out whether data tuple has missing data type +for indexed virtual column. +@param tuple data tuple +@param index virtual index +@return true if tuple has missing column type */ +bool dtuple_vcol_data_missing(const dtuple_t &tuple, + const dict_index_t &index); +/** build virtual column value from current cluster index record data +@param[in,out] row the cluster index row in dtuple form +@param[in] clust_index clustered index +@param[in] index the secondary index +@param[in] heap heap used to build virtual dtuple. */ bool -row_vers_old_has_index_entry( - bool also_curr, - const rec_t* rec, - mtr_t* mtr, +row_vers_build_clust_v_col( + dtuple_t* row, + dict_index_t* clust_index, dict_index_t* index, - const dtuple_t* ientry, + mem_heap_t* heap); +/** Build a dtuple contains virtual column data for current cluster index +@param[in] rec cluster index rec +@param[in] clust_index cluster index +@param[in] clust_offsets cluster rec offset +@param[in] index secondary index +@param[in] trx_id transaction ID on the purging record, + or 0 if called outside purge +@param[in] roll_ptr roll_ptr for the purge record +@param[in,out] heap heap memory +@param[in,out] v_heap heap memory to keep virtual column tuple +@param[in,out] mtr mini-transaction +@return dtuple contains virtual column data */ +dtuple_t* +row_vers_build_cur_vrow( + const rec_t* rec, + dict_index_t* clust_index, + rec_offs** clust_offsets, + dict_index_t* index, + trx_id_t trx_id, roll_ptr_t roll_ptr, - trx_id_t trx_id); + mem_heap_t* heap, + mem_heap_t* v_heap, + mtr_t* mtr); /*****************************************************************//** Constructs the version of a clustered index record which a consistent diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index affb1533e1d..e53dcc00b81 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -550,6 +550,15 @@ void srv_monitor_task(void*); void srv_master_callback(void*); +/** + Fetches and executes tasks from the purge work queue, + until this queue is empty. + This is main part of purge worker task, but also + executed in coordinator. + @note needs current_thd to be set beforehand. +*/ +void srv_purge_worker_task_low(); + } /* extern "C" */ #ifdef UNIV_DEBUG diff --git a/storage/innobase/include/srw_lock.h b/storage/innobase/include/srw_lock.h index e65756097a8..10edc2ad0a0 100644 --- a/storage/innobase/include/srw_lock.h +++ b/storage/innobase/include/srw_lock.h @@ -280,6 +280,8 @@ public: #endif } + bool rd_u_upgrade_try() { return writer.wr_lock_try(); } + void u_wr_upgrade() { DBUG_ASSERT(writer.is_locked()); @@ -294,6 +296,13 @@ public: readers.store(0, std::memory_order_release); /* Note: Any pending rd_lock() will not be woken up until u_unlock() */ } + void u_rd_downgrade() + { + DBUG_ASSERT(writer.is_locked()); + ut_d(uint32_t lk=) readers.fetch_add(1, std::memory_order_relaxed); + ut_ad(lk < WRITER); + u_unlock(); + } void rd_unlock() { diff --git a/storage/innobase/include/sux_lock.h b/storage/innobase/include/sux_lock.h index f0ff2cde228..4d9a50b528e 100644 --- a/storage/innobase/include/sux_lock.h +++ b/storage/innobase/include/sux_lock.h @@ -198,6 +198,30 @@ public: /** Upgrade an update lock */ inline void u_x_upgrade(); inline void u_x_upgrade(const char *file, unsigned line); + /** @return whether a shared lock was upgraded to exclusive */ + bool s_x_upgrade_try() + { + ut_ad(have_s()); + ut_ad(!have_u_or_x()); + if (!lock.rd_u_upgrade_try()) + return false; + claim_ownership(); + s_unlock(); + lock.u_wr_upgrade(); + recursive= RECURSIVE_X; + return true; + } + __attribute__((warn_unused_result)) + /** @return whether the operation succeeded without waiting */ + bool s_x_upgrade() + { + if (s_x_upgrade_try()) + return true; + s_unlock(); + x_lock(); + return false; + } + /** Downgrade a single exclusive lock to an update lock */ void x_u_downgrade() { @@ -206,6 +230,16 @@ public: recursive*= RECURSIVE_U; lock.wr_u_downgrade(); } + /** Downgrade a single update lock to a shared lock */ + void u_s_downgrade() + { + ut_ad(have_u_or_x()); + ut_ad(recursive == RECURSIVE_U); + recursive= 0; + set_new_owner(0); + lock.u_rd_downgrade(); + ut_d(s_lock_register()); + } /** Acquire an exclusive lock or upgrade an update lock @return whether U locks were upgraded to X */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 1fb6cd68538..1dcc7845b96 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -149,10 +149,11 @@ public: private: /** number of pending stop() calls without resume() */ Atomic_counter m_paused; - /** number of stop_SYS() calls without resume_SYS() */ - Atomic_counter m_SYS_paused; - /** number of stop_FTS() calls without resume_FTS() */ - Atomic_counter m_FTS_paused; + /** PAUSED_SYS * number of stop_SYS() calls without resume_SYS() + + number of stop_FTS() calls without resume_FTS() */ + Atomic_relaxed m_FTS_paused; + /** The stop_SYS() multiplier in m_FTS_paused */ + static constexpr const uint32_t PAUSED_SYS= 1U << 16; /** latch protecting end_view */ alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock_low end_latch; @@ -321,16 +322,21 @@ private: void wait_FTS(bool also_sys); public: /** Suspend purge in data dictionary tables */ - void stop_SYS() { m_SYS_paused++; } + void stop_SYS() + { + ut_d(const auto p=) m_FTS_paused.fetch_add(PAUSED_SYS); + ut_ad(p < p + PAUSED_SYS); + } /** Resume purge in data dictionary tables */ static void resume_SYS(void *); /** Pause purge during a DDL operation that could drop FTS_ tables. */ void stop_FTS(); /** Resume purge after stop_FTS(). */ - void resume_FTS() { ut_d(const auto p=) m_FTS_paused--; ut_ad(p); } + void resume_FTS() + { ut_d(const auto p=) m_FTS_paused.fetch_sub(1); ut_ad(p & ~PAUSED_SYS); } /** @return whether stop_SYS() is in effect */ - bool must_wait_FTS() const { return m_FTS_paused; } + bool must_wait_FTS() const { return m_FTS_paused & ~PAUSED_SYS; } private: /** @@ -432,10 +438,17 @@ public: struct view_guard { - inline view_guard(); + enum guard { END_VIEW= -1, PURGE= 0, VIEW= 1}; + guard latch; + inline view_guard(guard latch); inline ~view_guard(); + /** Fetch an undo log page. + @param id page identifier + @param mtr mini-transaction + @return reference to buffer page, possibly buffer-fixed in mtr */ + inline const buf_block_t *get(const page_id_t id, mtr_t *mtr); - /** @return purge_sys.view */ + /** @return purge_sys.view or purge_sys.end_view */ inline const ReadViewBase &view() const; }; @@ -464,14 +477,39 @@ public: /** The global data structure coordinating a purge */ extern purge_sys_t purge_sys; -purge_sys_t::view_guard::view_guard() -{ purge_sys.latch.rd_lock(SRW_LOCK_CALL); } +purge_sys_t::view_guard::view_guard(purge_sys_t::view_guard::guard latch) : + latch(latch) +{ + switch (latch) { + case VIEW: + purge_sys.latch.rd_lock(SRW_LOCK_CALL); + break; + case END_VIEW: + purge_sys.end_latch.rd_lock(); + break; + case PURGE: + /* the access is within a purge batch; purge_coordinator_task + will wait for all workers to complete before updating the views */ + break; + } +} purge_sys_t::view_guard::~view_guard() -{ purge_sys.latch.rd_unlock(); } +{ + switch (latch) { + case VIEW: + purge_sys.latch.rd_unlock(); + break; + case END_VIEW: + purge_sys.end_latch.rd_unlock(); + break; + case PURGE: + break; + } +} const ReadViewBase &purge_sys_t::view_guard::view() const -{ return purge_sys.view; } +{ return latch == END_VIEW ? purge_sys.end_view : purge_sys.view; } purge_sys_t::end_view_guard::end_view_guard() { purge_sys.end_latch.rd_lock(); } diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h index 3d9b18689a7..609dd530498 100644 --- a/storage/innobase/include/trx0rec.h +++ b/storage/innobase/include/trx0rec.h @@ -157,50 +157,44 @@ trx_undo_report_row_operation( /** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it is being called purge view and we would like to get the purge record even it is in the purge view (in normal case, it will return without -fetching the purge record */ +fetching the purge record) */ static constexpr ulint TRX_UNDO_PREV_IN_PURGE = 1; /** This tells trx_undo_prev_version_build() to fetch the old value in the undo log (which is the after image for an update) */ static constexpr ulint TRX_UNDO_GET_OLD_V_VALUE = 2; -/** indicate a call from row_vers_old_has_index_entry() */ +/** indicate a call from row_undo_mod_sec_is_unsafe() */ static constexpr ulint TRX_UNDO_CHECK_PURGEABILITY = 4; +/** indicate a call from row_purge_is_unsafe() */ +static constexpr ulint TRX_UNDO_CHECK_PURGE_PAGES = 8; + /** Build a previous version of a clustered index record. The caller must hold a latch on the index page of the clustered index record. -@param rec version of a clustered index record -@param index clustered index -@param offsets rec_get_offsets(rec, index) -@param heap memory heap from which the memory needed is - allocated -@param old_vers previous version or NULL if rec is the - first inserted version, or if history data - has been deleted (an error), or if the purge - could have removed the version - though it has not yet done so -@param v_heap memory heap used to create vrow - dtuple if it is not yet created. This heap - diffs from "heap" above in that it could be - prebuilt->old_vers_heap for selection -@param vrow virtual column info, if any -@param v_status status determine if it is going into this - function by purge thread or not. - And if we read "after image" of undo log +@param rec version of a clustered index record +@param index clustered index +@param offsets rec_get_offsets(rec, index) +@param heap memory heap from which the memory needed is allocated +@param old_vers previous version, or NULL if rec is the first inserted + version, or if history data has been deleted (an error), + or if the purge could have removed the version though + it has not yet done so +@param mtr mini-transaction +@param v_status TRX_UNDO_PREV_IN_PURGE, ... +@param v_heap memory heap used to create vrow dtuple if it is not yet + created. This heap diffs from "heap" above in that it could be + prebuilt->old_vers_heap for selection +@param vrow virtual column info, if any @return error code @retval DB_SUCCESS if previous version was successfully built, or if it was an insert or the undo record refers to the table before rebuild @retval DB_MISSING_HISTORY if the history is missing */ -dberr_t -trx_undo_prev_version_build( - const rec_t *rec, - dict_index_t *index, - rec_offs *offsets, - mem_heap_t *heap, - rec_t **old_vers, - mem_heap_t *v_heap, - dtuple_t **vrow, - ulint v_status); +dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index, + rec_offs *offsets, mem_heap_t *heap, + rec_t **old_vers, mtr_t *mtr, + ulint v_status, + mem_heap_t *v_heap, dtuple_t **vrow); /** Read from an undo log record a non-virtual column value. @param ptr pointer to remaining part of the undo record diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 0acb85307bd..27eb62841a6 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -500,9 +500,8 @@ void lock_sys_t::close() requesting record lock are brute force (BF). If they are check is this BF-BF wait correct and if not report BF wait and assert. -@param[in] lock_rec other waiting record lock -@param[in] trx trx requesting conflicting record lock -@param[in] type_mode lock type mode of requesting trx +@param lock other waiting lock +@param trx transaction requesting conflicting lock */ static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx, const unsigned type_mode = LOCK_NONE) diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index b86b30ba5e0..b15819ea923 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -540,17 +540,14 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept resize_target= size; resize_buf= static_cast(ptr); resize_flush_buf= static_cast(ptr2); + start_lsn= get_lsn(); + if (is_pmem()) - { resize_log.close(); - start_lsn= get_lsn(); - } else - { - memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15); start_lsn= first_lsn + - (~lsn_t{write_size - 1} & (write_lsn - first_lsn)); - } + (~lsn_t{write_size - 1} & + (lsn_t{write_size - 1} + start_lsn - first_lsn)); } resize_lsn.store(start_lsn, std::memory_order_relaxed); status= success ? RESIZE_STARTED : RESIZE_FAILED; @@ -804,19 +801,26 @@ inline void log_t::persist(lsn_t lsn) noexcept #endif ATTRIBUTE_COLD ATTRIBUTE_NOINLINE -/** Write resize_buf to resize_log. -@param length the used length of resize_buf */ -void log_t::resize_write_buf(size_t length) noexcept +void log_t::resize_write_buf(const byte *b, size_t length) noexcept { const size_t block_size_1= write_size - 1; + ut_ad(b == resize_buf || b == resize_flush_buf); ut_ad(!(resize_target & block_size_1)); ut_ad(!(length & block_size_1)); ut_ad(length > block_size_1); ut_ad(length <= resize_target); - const lsn_t resizing{resize_in_progress()}; - ut_ad(resizing <= write_lsn); - lsn_t offset= START_OFFSET + - ((write_lsn - resizing) & ~lsn_t{block_size_1}) % + + int64_t d= int64_t(write_lsn - resize_in_progress()); + if (UNIV_UNLIKELY(d <= 0)) + { + d&= ~int64_t(block_size_1); + if (int64_t(d + length) <= 0) + return; + length+= d; + b-= d; + d= 0; + } + lsn_t offset= START_OFFSET + (lsn_t(d) & ~lsn_t{block_size_1}) % (resize_target - START_OFFSET); if (UNIV_UNLIKELY(offset + length > resize_target)) @@ -828,7 +832,7 @@ void log_t::resize_write_buf(size_t length) noexcept } ut_a(os_file_write_func(IORequestWrite, "ib_logfile101", resize_log.m_file, - buf, offset, length) == DB_SUCCESS); + b, offset, length) == DB_SUCCESS); } /** Write buf to ib_logfile0. @@ -862,6 +866,7 @@ template inline lsn_t log_t::write_buf() noexcept ut_ad(write_size_1 >= 511); const byte *const write_buf{buf}; + const byte *const re_write_buf{resize_buf}; offset&= ~lsn_t{write_size_1}; if (length <= write_size_1) @@ -875,8 +880,8 @@ template inline lsn_t log_t::write_buf() noexcept #else # ifdef HAVE_valgrind MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length); - if (UNIV_LIKELY_NULL(resize_buf)) - MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - length); + if (UNIV_LIKELY_NULL(re_write_buf)) + MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length); # endif buf[length]= 0; /* allow recovery to catch EOF faster */ #endif @@ -896,15 +901,15 @@ template inline lsn_t log_t::write_buf() noexcept the current LSN are generated. */ #ifdef HAVE_valgrind MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free); - if (UNIV_LIKELY_NULL(resize_buf)) - MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - + if (UNIV_LIKELY_NULL(re_write_buf)) + MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - new_buf_free); #endif buf[length]= 0; /* allow recovery to catch EOF faster */ length&= ~write_size_1; memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15); - if (UNIV_LIKELY_NULL(resize_buf)) - memcpy_aligned<16>(resize_flush_buf, resize_buf + length, + if (UNIV_LIKELY_NULL(re_write_buf)) + memcpy_aligned<16>(resize_flush_buf, re_write_buf + length, (new_buf_free + 15) & ~15); length+= write_size_1 + 1; } @@ -923,8 +928,8 @@ template inline lsn_t log_t::write_buf() noexcept /* Do the write to the log file */ log_write_buf(write_buf, length, offset); - if (UNIV_LIKELY_NULL(resize_buf)) - resize_write_buf(length); + if (UNIV_LIKELY_NULL(re_write_buf)) + resize_write_buf(re_write_buf, length); write_lsn= lsn; if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index c54cca4a2d1..e3a6dced764 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -4531,7 +4531,7 @@ dberr_t recv_recovery_from_checkpoint_start() ut_ad(recv_sys.pages.empty()); if (log_sys.format == log_t::FORMAT_3_23) { -early_exit: +func_exit: log_sys.latch.wr_unlock(); return err; } @@ -4547,7 +4547,7 @@ read_only_recovery: sql_print_warning("InnoDB: innodb_read_only" " prevents crash recovery"); err = DB_READ_ONLY; - goto early_exit; + goto func_exit; } if (recv_sys.is_corrupt_log()) { sql_print_error("InnoDB: Log scan aborted at LSN " @@ -4585,7 +4585,7 @@ read_only_recovery: rescan, missing_tablespace); if (err != DB_SUCCESS) { - goto early_exit; + goto func_exit; } if (missing_tablespace) { @@ -4607,7 +4607,7 @@ read_only_recovery: rescan, missing_tablespace); if (err != DB_SUCCESS) { - goto early_exit; + goto func_exit; } } while (missing_tablespace); @@ -4666,7 +4666,7 @@ read_only_recovery: if (recv_sys.lsn < log_sys.next_checkpoint_lsn) { err_exit: err = DB_ERROR; - goto early_exit; + goto func_exit; } if (!srv_read_only_mode && log_sys.is_latest()) { @@ -4690,7 +4690,7 @@ err_exit: ut_ad("log parsing error" == 0); mysql_mutex_unlock(&recv_sys.mutex); err = DB_CORRUPTION; - goto early_exit; + goto func_exit; } recv_sys.apply_log_recs = true; ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE @@ -4698,9 +4698,9 @@ err_exit: if (srv_operation == SRV_OPERATION_NORMAL) { err = recv_rename_files(); } - mysql_mutex_unlock(&recv_sys.mutex); recv_lsn_checks_on = true; + mysql_mutex_unlock(&recv_sys.mutex); /* The database is now ready to start almost normal processing of user transactions: transaction rollbacks and the application of the log @@ -4710,8 +4710,7 @@ err_exit: err = DB_CORRUPTION; } - log_sys.latch.wr_unlock(); - return err; + goto func_exit; } bool recv_dblwr_t::validate_page(const page_id_t page_id, diff --git a/storage/innobase/mtr/mtr0mtr.cc b/storage/innobase/mtr/mtr0mtr.cc index b2977bc3aad..db75f973eab 100644 --- a/storage/innobase/mtr/mtr0mtr.cc +++ b/storage/innobase/mtr/mtr0mtr.cc @@ -1312,6 +1312,15 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len, } } +inline void log_t::append(byte *&d, const void *s, size_t size) noexcept +{ + ut_ad(log_sys.latch_have_any()); + ut_ad(d + size <= log_sys.buf + + (log_sys.is_pmem() ? log_sys.file_size : log_sys.buf_size)); + memcpy(d, s, size); + d+= size; +} + template std::pair mtr_t::finish_writer(mtr_t *mtr, size_t len) diff --git a/storage/innobase/que/que0que.cc b/storage/innobase/que/que0que.cc index d910ee2a881..5e1e0686c97 100644 --- a/storage/innobase/que/que0que.cc +++ b/storage/innobase/que/que0que.cc @@ -166,40 +166,6 @@ que_thr_init_command( thr->state = QUE_THR_RUNNING; } -/**********************************************************************//** -Round robin scheduler. -@return a query thread of the graph moved to QUE_THR_RUNNING state, or -NULL; the query thread should be executed by que_run_threads by the -caller */ -que_thr_t* -que_fork_scheduler_round_robin( -/*===========================*/ - que_fork_t* fork, /*!< in: a query fork */ - que_thr_t* thr) /*!< in: current pos */ -{ - fork->trx->mutex_lock(); - - /* If no current, start first available. */ - if (thr == NULL) { - thr = UT_LIST_GET_FIRST(fork->thrs); - } else { - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - if (thr) { - - fork->state = QUE_FORK_ACTIVE; - - fork->last_sel_node = NULL; - ut_ad(thr->state == QUE_THR_COMPLETED); - que_thr_init_command(thr); - } - - fork->trx->mutex_unlock(); - - return(thr); -} - /**********************************************************************//** Starts execution of a command in a query fork. Picks a query thread which is not in the QUE_THR_RUNNING state and moves it to that state. If none diff --git a/storage/innobase/read/read0read.cc b/storage/innobase/read/read0read.cc index 97eda7dba32..46d58326edf 100644 --- a/storage/innobase/read/read0read.cc +++ b/storage/innobase/read/read0read.cc @@ -160,7 +160,7 @@ may be pointing to garbage (an undo log record discarded by purge), but it will never be dereferenced, because the purge view is older than any active transaction. -For details see: row_vers_old_has_index_entry() and row_purge_poss_sec() +For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec() */ diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 963ec1899c6..09e30db0d64 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2773,10 +2773,16 @@ avoid_bulk: ut_ad(index->table->skip_alter_undo); ut_ad(!entry->is_metadata()); + + /* If foreign key exist and foreign key is enabled + then avoid using bulk insert for copy algorithm */ if (innodb_alter_copy_bulk && !index->table->is_temporary() && !index->table->versioned() - && !index->table->has_spatial_index()) { + && !index->table->has_spatial_index() + && (!trx->check_foreigns + || (index->table->foreign_set.empty() + && index->table->referenced_set.empty()))) { ut_ad(page_is_empty(block->page.frame)); /* This code path has been executed at the start of the alter operation. Consecutive diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 882e7c64550..6af18ad213c 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -3821,7 +3821,7 @@ UndorecApplier::get_old_rec(const dtuple_t &tuple, dict_index_t *index, if (is_same(roll_ptr)) return version; trx_undo_prev_version_build(version, index, *offsets, heap, &prev_version, - nullptr, nullptr, 0); + &mtr, 0, nullptr, nullptr); version= prev_version; } while (version); @@ -3990,7 +3990,7 @@ void UndorecApplier::log_update(const dtuple_t &tuple, copy_rec= rec_copy(mem_heap_alloc( heap, rec_offs_size(offsets)), match_rec, offsets); trx_undo_prev_version_build(match_rec, clust_index, offsets, heap, - &prev_version, nullptr, nullptr, 0); + &prev_version, &mtr, 0, nullptr, nullptr); prev_offsets= rec_get_offsets(prev_version, clust_index, prev_offsets, clust_index->n_core_fields, diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc index adac8ecfb37..ee6360655cd 100644 --- a/storage/innobase/row/row0purge.cc +++ b/storage/innobase/row/row0purge.cc @@ -267,6 +267,448 @@ row_purge_remove_clust_if_poss( return(false); } +/** Check a virtual column value index secondary virtual index matches +that of current cluster index record, which is recreated from information +stored in undo log +@param[in] rec record in the clustered index +@param[in] icentry the index entry built from a cluster row +@param[in] clust_index cluster index +@param[in] clust_offsets offsets on the cluster record +@param[in] index the secondary index +@param[in] ientry the secondary index entry +@param[in] roll_ptr the rollback pointer for the purging record +@param[in] trx_id trx id for the purging record +@param[in,out] mtr mini-transaction +@param[in,out] v_row dtuple holding the virtual rows (if needed) +@return true if matches, false otherwise */ +static +bool +row_purge_vc_matches_cluster( + const rec_t* rec, + const dtuple_t* icentry, + dict_index_t* clust_index, + rec_offs* clust_offsets, + dict_index_t* index, + const dtuple_t* ientry, + roll_ptr_t roll_ptr, + trx_id_t trx_id, + mtr_t* mtr, + dtuple_t** vrow) +{ + const rec_t* version; + rec_t* prev_version; + mem_heap_t* heap2; + mem_heap_t* heap = NULL; + mem_heap_t* tuple_heap; + ulint num_v = dict_table_get_n_v_cols(index->table); + bool compare[REC_MAX_N_FIELDS]; + ulint n_fields = dtuple_get_n_fields(ientry); + ulint n_non_v_col = 0; + ulint n_cmp_v_col = 0; + const dfield_t* field1; + dfield_t* field2; + ulint i; + + /* First compare non-virtual columns (primary keys) */ + ut_ad(index->n_fields == n_fields); + ut_ad(n_fields == dtuple_get_n_fields(icentry)); + ut_ad(mtr->memo_contains_page_flagged(rec, + MTR_MEMO_PAGE_S_FIX + | MTR_MEMO_PAGE_X_FIX)); + + { + const dfield_t* a = ientry->fields; + const dfield_t* b = icentry->fields; + + for (const dict_field_t *ifield = index->fields, + *const end = &index->fields[index->n_fields]; + ifield != end; ifield++, a++, b++) { + if (!ifield->col->is_virtual()) { + if (cmp_dfield_dfield(a, b)) { + return false; + } + n_non_v_col++; + } + } + } + + tuple_heap = mem_heap_create(1024); + + ut_ad(n_fields > n_non_v_col); + + *vrow = dtuple_create_with_vcol(tuple_heap, 0, num_v); + dtuple_init_v_fld(*vrow); + + for (i = 0; i < num_v; i++) { + dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype + = DATA_MISSING; + compare[i] = false; + } + + version = rec; + + while (n_cmp_v_col < n_fields - n_non_v_col) { + heap2 = heap; + heap = mem_heap_create(1024); + roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr( + version, clust_index, clust_offsets); + + ut_ad(cur_roll_ptr != 0); + ut_ad(roll_ptr != 0); + + trx_undo_prev_version_build( + version, clust_index, clust_offsets, + heap, &prev_version, mtr, + TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE, + nullptr, vrow); + + if (heap2) { + mem_heap_free(heap2); + } + + if (!prev_version) { + /* Versions end here */ + goto func_exit; + } + + clust_offsets = rec_get_offsets(prev_version, clust_index, + NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + ulint entry_len = dict_index_get_n_fields(index); + + for (i = 0; i < entry_len; i++) { + const dict_field_t* ind_field + = dict_index_get_nth_field(index, i); + const dict_col_t* col = ind_field->col; + field1 = dtuple_get_nth_field(ientry, i); + + if (!col->is_virtual()) { + continue; + } + + const dict_v_col_t* v_col + = reinterpret_cast(col); + field2 + = dtuple_get_nth_v_field(*vrow, v_col->v_pos); + + if ((dfield_get_type(field2)->mtype != DATA_MISSING) + && (!compare[v_col->v_pos])) { + + if (ind_field->prefix_len != 0 + && !dfield_is_null(field2)) { + field2->len = unsigned( + dtype_get_at_most_n_mbchars( + field2->type.prtype, + field2->type.mbminlen, + field2->type.mbmaxlen, + ind_field->prefix_len, + field2->len, + static_cast + (field2->data))); + } + + /* The index field mismatch */ + if (cmp_dfield_dfield(field2, field1)) { + mem_heap_free(tuple_heap); + mem_heap_free(heap); + return(false); + } + + compare[v_col->v_pos] = true; + n_cmp_v_col++; + } + } + + trx_id_t rec_trx_id = row_get_rec_trx_id( + prev_version, clust_index, clust_offsets); + + if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) { + break; + } + + version = prev_version; + } + +func_exit: + if (n_cmp_v_col == 0) { + *vrow = NULL; + } + + mem_heap_free(tuple_heap); + mem_heap_free(heap); + + /* FIXME: In the case of n_cmp_v_col is not the same as + n_fields - n_non_v_col, callback is needed to compare the rest + columns. At the timebeing, we will need to return true */ + return (true); +} + +/** @return whether two data tuples are equal */ +bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2) +{ + ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N); + ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N); + ut_ad(dtuple_check_typed(&tuple1)); + ut_ad(dtuple_check_typed(&tuple2)); + ut_ad(tuple1.n_fields == tuple2.n_fields); + + for (ulint i= 0; i < tuple1.n_fields; i++) + if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i])) + return false; + return true; +} + +/** Finds out if a version of the record, where the version >= the current +purge_sys.view, should have ientry as its secondary index entry. We check +if there is any not delete marked version of the record where the trx +id >= purge view, and the secondary index entry == ientry; exactly in +this case we return TRUE. +@param node purge node +@param index secondary index +@param ientry secondary index entry +@param mtr mini-transaction +@return whether ientry cannot be purged */ +static bool row_purge_is_unsafe(const purge_node_t &node, + dict_index_t *index, + const dtuple_t *ientry, mtr_t *mtr) +{ + const rec_t* rec = btr_pcur_get_rec(&node.pcur); + roll_ptr_t roll_ptr = node.roll_ptr; + trx_id_t trx_id = node.trx_id; + const rec_t* version; + rec_t* prev_version; + dict_index_t* clust_index = node.pcur.index(); + rec_offs* clust_offsets; + mem_heap_t* heap; + dtuple_t* row; + const dtuple_t* entry; + dtuple_t* vrow = NULL; + mem_heap_t* v_heap = NULL; + dtuple_t* cur_vrow = NULL; + + ut_ad(index->table == clust_index->table); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(rec, clust_index, NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + if (dict_index_has_virtual(index)) { + v_heap = mem_heap_create(100); + } + + if (!rec_get_deleted_flag(rec, rec_offs_comp(clust_offsets))) { + row_ext_t* ext; + + /* The top of the stack of versions is locked by the + mtr holding a latch on the page containing the + clustered index record. The bottom of the stack is + locked by the fact that the purge_sys.view must + 'overtake' any read view of an active transaction. + Thus, it is safe to fetch the prefixes for + externally stored columns. */ + row = row_build(ROW_COPY_POINTERS, clust_index, + rec, clust_offsets, + NULL, NULL, NULL, &ext, heap); + + if (dict_index_has_virtual(index)) { + + +#ifdef DBUG_OFF +# define dbug_v_purge false +#else /* DBUG_OFF */ + bool dbug_v_purge = false; +#endif /* DBUG_OFF */ + + DBUG_EXECUTE_IF( + "ib_purge_virtual_index_callback", + dbug_v_purge = true;); + + roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr( + rec, clust_index, clust_offsets); + + /* if the row is newly inserted, then the virtual + columns need to be computed */ + if (trx_undo_roll_ptr_is_insert(t_roll_ptr) + || dbug_v_purge) { + + if (!row_vers_build_clust_v_col( + row, clust_index, index, heap)) { + goto unsafe_to_purge; + } + + entry = row_build_index_entry( + row, ext, index, heap); + if (entry && dtuple_coll_eq(*ientry, *entry)) { + goto unsafe_to_purge; + } + } else { + /* Build index entry out of row */ + entry = row_build_index_entry(row, ext, index, heap); + /* entry could only be NULL if + the clustered index record is an uncommitted + inserted record whose BLOBs have not been + written yet. The secondary index record + can be safely removed, because it cannot + possibly refer to this incomplete + clustered index record. (Insert would + always first be completed for the + clustered index record, then proceed to + secondary indexes.) */ + + if (entry && row_purge_vc_matches_cluster( + rec, entry, + clust_index, clust_offsets, + index, ientry, roll_ptr, + trx_id, mtr, &vrow)) { + goto unsafe_to_purge; + } + } + clust_offsets = rec_get_offsets(rec, clust_index, NULL, + clust_index + ->n_core_fields, + ULINT_UNDEFINED, &heap); + } else { + + entry = row_build_index_entry( + row, ext, index, heap); + + /* If entry == NULL, the record contains unset BLOB + pointers. This must be a freshly inserted record. If + this is called from + row_purge_remove_sec_if_poss_low(), the thread will + hold latches on the clustered index and the secondary + index. Because the insert works in three steps: + + (1) insert the record to clustered index + (2) store the BLOBs and update BLOB pointers + (3) insert records to secondary indexes + + the purge thread can safely ignore freshly inserted + records and delete the secondary index record. The + thread that inserted the new record will be inserting + the secondary index records. */ + + /* NOTE that we cannot do the comparison as binary + fields because the row is maybe being modified so that + the clustered index record has already been updated to + a different binary value in a char field, but the + collation identifies the old and new value anyway! */ + if (entry && dtuple_coll_eq(*ientry, *entry)) { +unsafe_to_purge: + mem_heap_free(heap); + + if (v_heap) { + mem_heap_free(v_heap); + } + return true; + } + } + } else if (dict_index_has_virtual(index)) { + /* The current cluster index record could be + deleted, but the previous version of it might not. We will + need to get the virtual column data from undo record + associated with current cluster index */ + + cur_vrow = row_vers_build_cur_vrow( + rec, clust_index, &clust_offsets, + index, trx_id, roll_ptr, heap, v_heap, mtr); + } + + version = rec; + + for (;;) { + mem_heap_t* heap2 = heap; + heap = mem_heap_create(1024); + vrow = NULL; + + trx_undo_prev_version_build(version, + clust_index, clust_offsets, + heap, &prev_version, mtr, + TRX_UNDO_CHECK_PURGE_PAGES, + nullptr, + dict_index_has_virtual(index) + ? &vrow : nullptr); + mem_heap_free(heap2); /* free version and clust_offsets */ + + if (!prev_version) { + /* Versions end here */ + mem_heap_free(heap); + + if (v_heap) { + mem_heap_free(v_heap); + } + + return false; + } + + clust_offsets = rec_get_offsets(prev_version, clust_index, + NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + if (dict_index_has_virtual(index)) { + if (vrow) { + if (dtuple_vcol_data_missing(*vrow, *index)) { + goto nochange_index; + } + /* Keep the virtual row info for the next + version, unless it is changed */ + mem_heap_empty(v_heap); + cur_vrow = dtuple_copy(vrow, v_heap); + dtuple_dup_v_fld(cur_vrow, v_heap); + } + + if (!cur_vrow) { + /* Nothing for this index has changed, + continue */ +nochange_index: + version = prev_version; + continue; + } + } + + if (!rec_get_deleted_flag(prev_version, + rec_offs_comp(clust_offsets))) { + row_ext_t* ext; + + /* The stack of versions is locked by mtr. + Thus, it is safe to fetch the prefixes for + externally stored columns. */ + row = row_build(ROW_COPY_POINTERS, clust_index, + prev_version, clust_offsets, + NULL, NULL, NULL, &ext, heap); + + if (dict_index_has_virtual(index)) { + ut_ad(cur_vrow); + ut_ad(row->n_v_fields == cur_vrow->n_v_fields); + dtuple_copy_v_fields(row, cur_vrow); + } + + entry = row_build_index_entry(row, ext, index, heap); + + /* If entry == NULL, the record contains unset + BLOB pointers. This must be a freshly + inserted record that we can safely ignore. + For the justification, see the comments after + the previous row_build_index_entry() call. */ + + /* NOTE that we cannot do the comparison as binary + fields because maybe the secondary index record has + already been updated to a different binary value in + a char field, but the collation identifies the old + and new value anyway! */ + + if (entry && dtuple_coll_eq(*ientry, *entry)) { + goto unsafe_to_purge; + } + } + + version = prev_version; + } +} + /** Determines if it is possible to remove a secondary index entry. Removal is possible if the secondary index entry does not refer to any not delete marked version of a clustered index record where DB_TRX_ID @@ -280,67 +722,45 @@ would refer to. However, in that case, the user transaction would also re-insert the secondary index entry after purge has removed it and released the leaf page latch. -@param[in,out] node row purge node -@param[in] index secondary index -@param[in] entry secondary index entry -@param[in,out] sec_pcur secondary index cursor or NULL - if it is called for purge buffering - operation. -@param[in,out] sec_mtr mini-transaction which holds - secondary index entry or NULL if it is - called for purge buffering operation. -@param[in] is_tree true=pessimistic purge, - false=optimistic (leaf-page only) -@return true if the secondary index record can be purged */ -static -bool -row_purge_poss_sec( - purge_node_t* node, - dict_index_t* index, - const dtuple_t* entry, - btr_pcur_t* sec_pcur, - mtr_t* sec_mtr, - bool is_tree) +@param node row purge node +@param index secondary index +@param entry secondary index entry +@param mtr mini-transaction for looking up clustered index +@return whether the secondary index record can be purged */ +static bool row_purge_poss_sec(purge_node_t *node, dict_index_t *index, + const dtuple_t *entry, mtr_t *mtr) { - bool can_delete; - mtr_t mtr; + ut_ad(!index->is_clust()); + const auto savepoint= mtr->get_savepoint(); + bool can_delete= !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr); - ut_ad(!dict_index_is_clust(index)); + if (!can_delete) + { + ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED); + can_delete= !row_purge_is_unsafe(*node, index, entry, mtr); + node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED; + node->pcur.latch_mode= BTR_NO_LATCHES; + } - mtr_start(&mtr); - - can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr) - || !row_vers_old_has_index_entry(true, - btr_pcur_get_rec(&node->pcur), - &mtr, index, entry, - node->roll_ptr, node->trx_id); - - /* Persistent cursor is closed if reposition fails. */ - if (node->found_clust) { - btr_pcur_commit_specify_mtr(&node->pcur, &mtr); - } else { - mtr.commit(); - } - - ut_ad(mtr.has_committed()); - - return can_delete; + mtr->rollback_to_savepoint(savepoint); + return can_delete; } -/*************************************************************** -Removes a secondary index entry if possible, by modifying the -index tree. Does not try to buffer the delete. -@return TRUE if success or if not found */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -ibool -row_purge_remove_sec_if_poss_tree( -/*==============================*/ - purge_node_t* node, /*!< in: row purge node */ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry) /*!< in: index entry */ +__attribute__((nonnull, warn_unused_result)) +/** Remove a secondary index entry if possible, by modifying the index tree. +@param node purge node +@param index secondary index +@param entry index entry +@param page_max_trx_id the PAGE_MAX_TRX_ID + when row_purge_remove_sec_if_poss_leaf() was invoked +@return whether the operation succeeded */ +static bool row_purge_remove_sec_if_poss_tree(purge_node_t *node, + dict_index_t *index, + const dtuple_t *entry, + trx_id_t page_max_trx_id) { btr_pcur_t pcur; - ibool success = TRUE; + bool success = true; dberr_t err; mtr_t mtr; @@ -371,7 +791,9 @@ row_purge_remove_sec_if_poss_tree( which cannot be purged yet, requires its existence. If some requires, we should do nothing. */ - if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) { + if (page_max_trx_id + == page_get_max_trx_id(btr_cur_get_page(&pcur.btr_cur)) + || row_purge_poss_sec(node, index, entry, &mtr)) { /* Remove the index record, which should have been marked for deletion. */ @@ -410,26 +832,23 @@ row_purge_remove_sec_if_poss_tree( func_exit: btr_pcur_close(&pcur); // FIXME: need this? mtr.commit(); - - return(success); + return success; } -/*************************************************************** -Removes a secondary index entry without modifying the index tree, -if possible. -@retval true if success or if not found -@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */ -static MY_ATTRIBUTE((nonnull, warn_unused_result)) -bool -row_purge_remove_sec_if_poss_leaf( -/*==============================*/ - purge_node_t* node, /*!< in: row purge node */ - dict_index_t* index, /*!< in: index */ - const dtuple_t* entry) /*!< in: index entry */ +__attribute__((nonnull, warn_unused_result)) +/** Remove a secondary index entry if possible, without modifying the tree. +@param node purge node +@param index secondary index +@param entry index entry +@return PAGE_MAX_TRX_ID for row_purge_remove_sec_if_poss_tree() +@retval 0 if success or if not found */ +static trx_id_t row_purge_remove_sec_if_poss_leaf(purge_node_t *node, + dict_index_t *index, + const dtuple_t *entry) { mtr_t mtr; btr_pcur_t pcur; - bool success = true; + trx_id_t page_max_trx_id = 0; log_free_check(); ut_ad(index->table == node->table); @@ -453,7 +872,7 @@ row_purge_remove_sec_if_poss_leaf( found: /* Before attempting to purge a record, check if it is safe to do so. */ - if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) { + if (row_purge_poss_sec(node, index, entry, &mtr)) { btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); /* Only delete-marked records should be purged. */ @@ -494,8 +913,11 @@ found: } } - success = btr_cur_optimistic_delete(btr_cur, 0, &mtr) - != DB_FAIL; + if (btr_cur_optimistic_delete(btr_cur, 0, &mtr) + == DB_FAIL) { + page_max_trx_id = page_get_max_trx_id( + btr_cur_get_page(btr_cur)); + } } } @@ -503,7 +925,7 @@ func_exit: mtr.commit(); cleanup: btr_pcur_close(&pcur); - return success; + return page_max_trx_id; } /***********************************************************//** @@ -516,38 +938,21 @@ row_purge_remove_sec_if_poss( dict_index_t* index, /*!< in: index */ const dtuple_t* entry) /*!< in: index entry */ { - ibool success; - ulint n_tries = 0; + if (UNIV_UNLIKELY(!entry)) + /* The node->row must have lacked some fields of this index. This + is possible when the undo log record was written before this index + was created. */ + return; - /* fputs("Purge: Removing secondary record\n", stderr); */ - - if (!entry) { - /* The node->row must have lacked some fields of this - index. This is possible when the undo log record was - written before this index was created. */ - return; - } - - if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) { - - return; - } -retry: - success = row_purge_remove_sec_if_poss_tree(node, index, entry); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - n_tries++; - - std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - ut_a(success); + if (trx_id_t page_max_trx_id= + row_purge_remove_sec_if_poss_leaf(node, index, entry)) + for (auto n_tries= BTR_CUR_RETRY_DELETE_N_TIMES; + !row_purge_remove_sec_if_poss_tree(node, index, entry, + page_max_trx_id); + std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME)) + /* The delete operation may fail if we have little + file space left (if innodb_file_per_table=0?) */ + ut_a(--n_tries); } /***********************************************************//** diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 46bfb482223..970dce3c90d 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -6612,7 +6612,7 @@ rec_loop: err= trx_undo_prev_version_build(clust_rec, clust_index, clust_offsets, vers_heap, &old_vers, - nullptr, nullptr, 0); + &mtr, 0, nullptr, nullptr); if (prev_heap) mem_heap_free(prev_heap); if (err != DB_SUCCESS) diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc index 38d19882de2..3493d624f1f 100644 --- a/storage/innobase/row/row0umod.cc +++ b/storage/innobase/row/row0umod.cc @@ -469,6 +469,146 @@ func_exit: return(err); } +bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2); + +/** Find out if an accessible version of a clustered index record +corresponds to a secondary index entry. +@param rec record in a latched clustered index page +@param index secondary index +@param ientry secondary index entry +@param mtr mini-transaction +@return whether an accessible non-dete-marked version of rec +corresponds to ientry */ +static bool row_undo_mod_sec_is_unsafe(const rec_t *rec, dict_index_t *index, + const dtuple_t *ientry, mtr_t *mtr) +{ + const rec_t* version; + rec_t* prev_version; + dict_index_t* clust_index; + rec_offs* clust_offsets; + mem_heap_t* heap; + mem_heap_t* heap2; + dtuple_t* row; + const dtuple_t* entry; + ulint comp; + dtuple_t* vrow = NULL; + mem_heap_t* v_heap = NULL; + dtuple_t* cur_vrow = NULL; + + clust_index = dict_table_get_first_index(index->table); + + comp = page_rec_is_comp(rec); + ut_ad(!dict_table_is_comp(index->table) == !comp); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(rec, clust_index, NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + if (dict_index_has_virtual(index)) { + v_heap = mem_heap_create(100); + /* The current cluster index record could be + deleted, but the previous version of it might not. We will + need to get the virtual column data from undo record + associated with current cluster index */ + + cur_vrow = row_vers_build_cur_vrow( + rec, clust_index, &clust_offsets, + index, 0, 0, heap, v_heap, mtr); + } + + version = rec; + + for (;;) { + heap2 = heap; + heap = mem_heap_create(1024); + vrow = NULL; + + trx_undo_prev_version_build(version, + clust_index, clust_offsets, + heap, &prev_version, + mtr, TRX_UNDO_CHECK_PURGEABILITY, + nullptr, + dict_index_has_virtual(index) + ? &vrow : nullptr); + mem_heap_free(heap2); /* free version and clust_offsets */ + + if (!prev_version) { + break; + } + + clust_offsets = rec_get_offsets(prev_version, clust_index, + NULL, + clust_index->n_core_fields, + ULINT_UNDEFINED, &heap); + + if (dict_index_has_virtual(index)) { + if (vrow) { + if (dtuple_vcol_data_missing(*vrow, *index)) { + goto nochange_index; + } + /* Keep the virtual row info for the next + version, unless it is changed */ + mem_heap_empty(v_heap); + cur_vrow = dtuple_copy(vrow, v_heap); + dtuple_dup_v_fld(cur_vrow, v_heap); + } + + if (!cur_vrow) { + /* Nothing for this index has changed, + continue */ +nochange_index: + version = prev_version; + continue; + } + } + + if (!rec_get_deleted_flag(prev_version, comp)) { + row_ext_t* ext; + + /* The stack of versions is locked by mtr. + Thus, it is safe to fetch the prefixes for + externally stored columns. */ + row = row_build(ROW_COPY_POINTERS, clust_index, + prev_version, clust_offsets, + NULL, NULL, NULL, &ext, heap); + + if (dict_index_has_virtual(index)) { + ut_ad(cur_vrow); + ut_ad(row->n_v_fields == cur_vrow->n_v_fields); + dtuple_copy_v_fields(row, cur_vrow); + } + + entry = row_build_index_entry(row, ext, index, heap); + + /* If entry == NULL, the record contains unset + BLOB pointers. This must be a freshly + inserted record that we can safely ignore. + For the justification, see the comments after + the previous row_build_index_entry() call. */ + + /* NOTE that we cannot do the comparison as binary + fields because maybe the secondary index record has + already been updated to a different binary value in + a char field, but the collation identifies the old + and new value anyway! */ + + if (entry && dtuple_coll_eq(*ientry, *entry)) { + break; + } + } + + version = prev_version; + } + + mem_heap_free(heap); + + if (v_heap) { + mem_heap_free(v_heap); + } + + return !!prev_version; +} + /***********************************************************//** Delete marks or removes a secondary index entry if found. @return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */ @@ -487,7 +627,6 @@ row_undo_mod_del_mark_or_remove_sec_low( btr_cur_t* btr_cur; dberr_t err = DB_SUCCESS; mtr_t mtr; - mtr_t mtr_vers; const bool modify_leaf = mode == BTR_MODIFY_LEAF; row_mtr_start(&mtr, index); @@ -543,17 +682,14 @@ found: which cannot be purged yet, requires its existence. If some requires, we should delete mark the record. */ - mtr_vers.start(); - - ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr_vers) == - btr_pcur_t::SAME_ALL); + ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr) == + btr_pcur_t::SAME_ALL); /* For temporary table, we can skip to check older version of clustered index entry, because there is no MVCC or purge. */ if (node->table->is_temporary() - || row_vers_old_has_index_entry( - false, btr_pcur_get_rec(&node->pcur), - &mtr_vers, index, entry, 0, 0)) { + || row_undo_mod_sec_is_unsafe( + btr_pcur_get_rec(&node->pcur), index, entry, &mtr)) { btr_rec_set_deleted(btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur), &mtr); } else { @@ -587,7 +723,9 @@ found: } } - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers); + ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED); + node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED; + node->pcur.latch_mode = BTR_NO_LATCHES; func_exit: btr_pcur_close(&pcur); diff --git a/storage/innobase/row/row0upd.cc b/storage/innobase/row/row0upd.cc index 76bd1eff9f1..ecf37fbf719 100644 --- a/storage/innobase/row/row0upd.cc +++ b/storage/innobase/row/row0upd.cc @@ -702,7 +702,7 @@ fetch; output: fetched length of the prefix @param[in,out] heap heap where to allocate @return BLOB prefix @retval NULL if the record is incomplete (should only happen -in row_vers_vc_matches_cluster() executed concurrently with another purge) */ +in row_purge_vc_matches_cluster() executed concurrently with another purge) */ static byte* row_upd_ext_fetch( diff --git a/storage/innobase/row/row0vers.cc b/storage/innobase/row/row0vers.cc index c3acf325f5c..896b7def41d 100644 --- a/storage/innobase/row/row0vers.cc +++ b/storage/innobase/row/row0vers.cc @@ -194,8 +194,8 @@ row_vers_impl_x_locked_low( trx_undo_prev_version_build( version, clust_index, clust_offsets, - heap, &prev_version, NULL, - dict_index_has_virtual(index) ? &vrow : NULL, 0); + heap, &prev_version, mtr, 0, NULL, + dict_index_has_virtual(index) ? &vrow : NULL); ut_d(trx->mutex_lock()); const bool committed = trx_state_eq( @@ -446,7 +446,6 @@ row_vers_impl_x_locked( @param[in] clust_index clustered index @param[in] index the secondary index @param[in] heap heap used to build virtual dtuple. */ -static bool row_vers_build_clust_v_col( dtuple_t* row, @@ -490,26 +489,25 @@ row_vers_build_clust_v_col( } /** Build latest virtual column data from undo log -@param[in] in_purge whether this is the purge thread @param[in] rec clustered index record @param[in] clust_index clustered index @param[in,out] clust_offsets offsets on the clustered index record @param[in] index the secondary index +@param[in] trx_id transaction ID on the purging record, + or 0 if called outside purge @param[in] roll_ptr the rollback pointer for the purging record -@param[in] trx_id trx id for the purging record @param[in,out] v_heap heap used to build vrow @param[out] v_row dtuple holding the virtual rows @param[in,out] mtr mtr holding the latch on rec */ static void row_vers_build_cur_vrow_low( - bool in_purge, const rec_t* rec, dict_index_t* clust_index, rec_offs* clust_offsets, dict_index_t* index, - roll_ptr_t roll_ptr, trx_id_t trx_id, + roll_ptr_t roll_ptr, mem_heap_t* v_heap, dtuple_t** vrow, mtr_t* mtr) @@ -539,7 +537,7 @@ row_vers_build_cur_vrow_low( /* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE bit to search the undo log until we hit the current undo log with roll_ptr */ - const ulint status = in_purge + const ulint status = trx_id ? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE : TRX_UNDO_GET_OLD_V_VALUE; @@ -551,7 +549,7 @@ row_vers_build_cur_vrow_low( trx_undo_prev_version_build( version, clust_index, clust_offsets, - heap, &prev_version, NULL, vrow, status); + heap, &prev_version, mtr, status, nullptr, vrow); if (heap2) { mem_heap_free(heap2); @@ -603,212 +601,27 @@ row_vers_build_cur_vrow_low( mem_heap_free(heap); } -/** Check a virtual column value index secondary virtual index matches -that of current cluster index record, which is recreated from information -stored in undo log -@param[in] rec record in the clustered index -@param[in] icentry the index entry built from a cluster row -@param[in] clust_index cluster index -@param[in] clust_offsets offsets on the cluster record -@param[in] index the secondary index -@param[in] ientry the secondary index entry -@param[in] roll_ptr the rollback pointer for the purging record -@param[in] trx_id trx id for the purging record -@param[in,out] v_heap heap used to build virtual dtuple -@param[in,out] v_row dtuple holding the virtual rows (if needed) -@param[in] mtr mtr holding the latch on rec -@return true if matches, false otherwise */ -static -bool -row_vers_vc_matches_cluster( - const rec_t* rec, - const dtuple_t* icentry, - dict_index_t* clust_index, - rec_offs* clust_offsets, - dict_index_t* index, - const dtuple_t* ientry, - roll_ptr_t roll_ptr, - trx_id_t trx_id, - mem_heap_t* v_heap, - dtuple_t** vrow, - mtr_t* mtr) -{ - const rec_t* version; - rec_t* prev_version; - mem_heap_t* heap2; - mem_heap_t* heap = NULL; - mem_heap_t* tuple_heap; - ulint num_v = dict_table_get_n_v_cols(index->table); - bool compare[REC_MAX_N_FIELDS]; - ulint n_fields = dtuple_get_n_fields(ientry); - ulint n_non_v_col = 0; - ulint n_cmp_v_col = 0; - const dfield_t* field1; - dfield_t* field2; - ulint i; - - /* First compare non-virtual columns (primary keys) */ - ut_ad(index->n_fields == n_fields); - ut_ad(n_fields == dtuple_get_n_fields(icentry)); - ut_ad(mtr->memo_contains_page_flagged(rec, - MTR_MEMO_PAGE_S_FIX - | MTR_MEMO_PAGE_X_FIX)); - - { - const dfield_t* a = ientry->fields; - const dfield_t* b = icentry->fields; - - for (const dict_field_t *ifield = index->fields, - *const end = &index->fields[index->n_fields]; - ifield != end; ifield++, a++, b++) { - if (!ifield->col->is_virtual()) { - if (cmp_dfield_dfield(a, b)) { - return false; - } - n_non_v_col++; - } - } - } - - tuple_heap = mem_heap_create(1024); - - ut_ad(n_fields > n_non_v_col); - - *vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v); - dtuple_init_v_fld(*vrow); - - for (i = 0; i < num_v; i++) { - dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype - = DATA_MISSING; - compare[i] = false; - } - - version = rec; - - while (n_cmp_v_col < n_fields - n_non_v_col) { - heap2 = heap; - heap = mem_heap_create(1024); - roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr( - version, clust_index, clust_offsets); - - ut_ad(cur_roll_ptr != 0); - ut_ad(roll_ptr != 0); - - trx_undo_prev_version_build( - version, clust_index, clust_offsets, - heap, &prev_version, NULL, vrow, - TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE); - - if (heap2) { - mem_heap_free(heap2); - } - - if (!prev_version) { - /* Versions end here */ - goto func_exit; - } - - clust_offsets = rec_get_offsets(prev_version, clust_index, - NULL, - clust_index->n_core_fields, - ULINT_UNDEFINED, &heap); - - ulint entry_len = dict_index_get_n_fields(index); - - for (i = 0; i < entry_len; i++) { - const dict_field_t* ind_field - = dict_index_get_nth_field(index, i); - const dict_col_t* col = ind_field->col; - field1 = dtuple_get_nth_field(ientry, i); - - if (!col->is_virtual()) { - continue; - } - - const dict_v_col_t* v_col - = reinterpret_cast(col); - field2 - = dtuple_get_nth_v_field(*vrow, v_col->v_pos); - - if ((dfield_get_type(field2)->mtype != DATA_MISSING) - && (!compare[v_col->v_pos])) { - - if (ind_field->prefix_len != 0 - && !dfield_is_null(field2)) { - field2->len = unsigned( - dtype_get_at_most_n_mbchars( - field2->type.prtype, - field2->type.mbminlen, - field2->type.mbmaxlen, - ind_field->prefix_len, - field2->len, - static_cast - (field2->data))); - } - - /* The index field mismatch */ - if (v_heap - || cmp_dfield_dfield(field2, field1)) { - if (v_heap) { - dtuple_dup_v_fld(*vrow, v_heap); - } - - mem_heap_free(tuple_heap); - mem_heap_free(heap); - return(false); - } - - compare[v_col->v_pos] = true; - n_cmp_v_col++; - } - } - - trx_id_t rec_trx_id = row_get_rec_trx_id( - prev_version, clust_index, clust_offsets); - - if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) { - break; - } - - version = prev_version; - } - -func_exit: - if (n_cmp_v_col == 0) { - *vrow = NULL; - } - - mem_heap_free(tuple_heap); - mem_heap_free(heap); - - /* FIXME: In the case of n_cmp_v_col is not the same as - n_fields - n_non_v_col, callback is needed to compare the rest - columns. At the timebeing, we will need to return true */ - return (true); -} - /** Build a dtuple contains virtual column data for current cluster index @param[in] in_purge called by purge thread @param[in] rec cluster index rec @param[in] clust_index cluster index @param[in] clust_offsets cluster rec offset @param[in] index secondary index +@param[in] trx_id transaction ID on the purging record, + or 0 if called outside purge @param[in] roll_ptr roll_ptr for the purge record -@param[in] trx_id transaction ID on the purging record @param[in,out] heap heap memory -@param[in,out] v_heap heap memory to keep virtual colum dtuple -@param[in] mtr mtr holding the latch on rec +@param[in,out] v_heap heap memory to keep virtual column tuple +@param[in,out] mtr mini-transaction @return dtuple contains virtual column data */ -static dtuple_t* row_vers_build_cur_vrow( - bool in_purge, const rec_t* rec, dict_index_t* clust_index, rec_offs** clust_offsets, dict_index_t* index, - roll_ptr_t roll_ptr, trx_id_t trx_id, + roll_ptr_t roll_ptr, mem_heap_t* heap, mem_heap_t* v_heap, mtr_t* mtr) @@ -841,8 +654,8 @@ row_vers_build_cur_vrow( } else { /* Try to fetch virtual column data from undo log */ row_vers_build_cur_vrow_low( - in_purge, rec, clust_index, *clust_offsets, - index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr); + rec, clust_index, *clust_offsets, + index, trx_id, roll_ptr, v_heap, &cur_vrow, mtr); } *clust_offsets = rec_get_offsets(rec, clust_index, NULL, @@ -851,312 +664,28 @@ row_vers_build_cur_vrow( return(cur_vrow); } -/** @return whether two data tuples are equal */ -static bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2) -{ - ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(&tuple1)); - ut_ad(dtuple_check_typed(&tuple2)); - ut_ad(tuple1.n_fields == tuple2.n_fields); - - for (ulint i= 0; i < tuple1.n_fields; i++) - if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i])) - return false; - return true; -} - /** Find out whether data tuple has missing data type for indexed virtual column. @param tuple data tuple @param index virtual index @return true if tuple has missing column type */ -static bool dtuple_vcol_data_missing(const dtuple_t &tuple, - dict_index_t *index) +bool dtuple_vcol_data_missing(const dtuple_t &tuple, + const dict_index_t &index) { - for (ulint i= 0; i < index->n_uniq; i++) + for (ulint i= 0; i < index.n_uniq; i++) { - dict_col_t *col= index->fields[i].col; + dict_col_t *col= index.fields[i].col; if (!col->is_virtual()) continue; dict_v_col_t *vcol= reinterpret_cast(col); - for (ulint j= 0; j < index->table->n_v_cols; j++) - { - if (vcol == &index->table->v_cols[j] - && tuple.v_fields[j].type.mtype == DATA_MISSING) + for (ulint j= 0; j < index.table->n_v_cols; j++) + if (vcol == &index.table->v_cols[j] && + tuple.v_fields[j].type.mtype == DATA_MISSING) return true; - } } return false; } -/** Finds out if a version of the record, where the version >= the current -purge_sys.view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry == ientry; exactly in -this case we return TRUE. -@param[in] also_curr TRUE if also rec is included in the versions - to search; otherwise only versions prior - to it are searched -@param[in] rec record in the clustered index; the caller - must have a latch on the page -@param[in] mtr mtr holding the latch on rec; it will - also hold the latch on purge_view -@param[in] index secondary index -@param[in] ientry secondary index entry -@param[in] roll_ptr roll_ptr for the purge record -@param[in] trx_id transaction ID on the purging record -@return TRUE if earlier version should have */ -bool -row_vers_old_has_index_entry( - bool also_curr, - const rec_t* rec, - mtr_t* mtr, - dict_index_t* index, - const dtuple_t* ientry, - roll_ptr_t roll_ptr, - trx_id_t trx_id) -{ - const rec_t* version; - rec_t* prev_version; - dict_index_t* clust_index; - rec_offs* clust_offsets; - mem_heap_t* heap; - mem_heap_t* heap2; - dtuple_t* row; - const dtuple_t* entry; - ulint comp; - dtuple_t* vrow = NULL; - mem_heap_t* v_heap = NULL; - dtuple_t* cur_vrow = NULL; - - ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX - | MTR_MEMO_PAGE_S_FIX)); - clust_index = dict_table_get_first_index(index->table); - - comp = page_rec_is_comp(rec); - ut_ad(!dict_table_is_comp(index->table) == !comp); - heap = mem_heap_create(1024); - clust_offsets = rec_get_offsets(rec, clust_index, NULL, - clust_index->n_core_fields, - ULINT_UNDEFINED, &heap); - - if (dict_index_has_virtual(index)) { - v_heap = mem_heap_create(100); - } - - DBUG_EXECUTE_IF("ib_purge_virtual_index_crash", - DBUG_SUICIDE();); - - if (also_curr && !rec_get_deleted_flag(rec, comp)) { - row_ext_t* ext; - - /* The top of the stack of versions is locked by the - mtr holding a latch on the page containing the - clustered index record. The bottom of the stack is - locked by the fact that the purge_sys.view must - 'overtake' any read view of an active transaction. - Thus, it is safe to fetch the prefixes for - externally stored columns. */ - row = row_build(ROW_COPY_POINTERS, clust_index, - rec, clust_offsets, - NULL, NULL, NULL, &ext, heap); - - if (dict_index_has_virtual(index)) { - - -#ifdef DBUG_OFF -# define dbug_v_purge false -#else /* DBUG_OFF */ - bool dbug_v_purge = false; -#endif /* DBUG_OFF */ - - DBUG_EXECUTE_IF( - "ib_purge_virtual_index_callback", - dbug_v_purge = true;); - - roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr( - rec, clust_index, clust_offsets); - - /* if the row is newly inserted, then the virtual - columns need to be computed */ - if (trx_undo_roll_ptr_is_insert(t_roll_ptr) - || dbug_v_purge) { - - if (!row_vers_build_clust_v_col( - row, clust_index, index, heap)) { - goto unsafe_to_purge; - } - - entry = row_build_index_entry( - row, ext, index, heap); - if (entry && dtuple_coll_eq(*ientry, *entry)) { - goto unsafe_to_purge; - } - } else { - /* Build index entry out of row */ - entry = row_build_index_entry(row, ext, index, heap); - /* entry could only be NULL if - the clustered index record is an uncommitted - inserted record whose BLOBs have not been - written yet. The secondary index record - can be safely removed, because it cannot - possibly refer to this incomplete - clustered index record. (Insert would - always first be completed for the - clustered index record, then proceed to - secondary indexes.) */ - - if (entry && row_vers_vc_matches_cluster( - rec, entry, - clust_index, clust_offsets, - index, ientry, roll_ptr, - trx_id, NULL, &vrow, mtr)) { - goto unsafe_to_purge; - } - } - clust_offsets = rec_get_offsets(rec, clust_index, NULL, - clust_index - ->n_core_fields, - ULINT_UNDEFINED, &heap); - } else { - - entry = row_build_index_entry( - row, ext, index, heap); - - /* If entry == NULL, the record contains unset BLOB - pointers. This must be a freshly inserted record. If - this is called from - row_purge_remove_sec_if_poss_low(), the thread will - hold latches on the clustered index and the secondary - index. Because the insert works in three steps: - - (1) insert the record to clustered index - (2) store the BLOBs and update BLOB pointers - (3) insert records to secondary indexes - - the purge thread can safely ignore freshly inserted - records and delete the secondary index record. The - thread that inserted the new record will be inserting - the secondary index records. */ - - /* NOTE that we cannot do the comparison as binary - fields because the row is maybe being modified so that - the clustered index record has already been updated to - a different binary value in a char field, but the - collation identifies the old and new value anyway! */ - if (entry && dtuple_coll_eq(*ientry, *entry)) { -unsafe_to_purge: - mem_heap_free(heap); - - if (v_heap) { - mem_heap_free(v_heap); - } - return true; - } - } - } else if (dict_index_has_virtual(index)) { - /* The current cluster index record could be - deleted, but the previous version of it might not. We will - need to get the virtual column data from undo record - associated with current cluster index */ - - cur_vrow = row_vers_build_cur_vrow( - also_curr, rec, clust_index, &clust_offsets, - index, roll_ptr, trx_id, heap, v_heap, mtr); - } - - version = rec; - - for (;;) { - heap2 = heap; - heap = mem_heap_create(1024); - vrow = NULL; - - trx_undo_prev_version_build(version, - clust_index, clust_offsets, - heap, &prev_version, nullptr, - dict_index_has_virtual(index) - ? &vrow : nullptr, - TRX_UNDO_CHECK_PURGEABILITY); - mem_heap_free(heap2); /* free version and clust_offsets */ - - if (!prev_version) { - /* Versions end here */ - mem_heap_free(heap); - - if (v_heap) { - mem_heap_free(v_heap); - } - - return false; - } - - clust_offsets = rec_get_offsets(prev_version, clust_index, - NULL, - clust_index->n_core_fields, - ULINT_UNDEFINED, &heap); - - if (dict_index_has_virtual(index)) { - if (vrow) { - if (dtuple_vcol_data_missing(*vrow, index)) { - goto nochange_index; - } - /* Keep the virtual row info for the next - version, unless it is changed */ - mem_heap_empty(v_heap); - cur_vrow = dtuple_copy(vrow, v_heap); - dtuple_dup_v_fld(cur_vrow, v_heap); - } - - if (!cur_vrow) { - /* Nothing for this index has changed, - continue */ -nochange_index: - version = prev_version; - continue; - } - } - - if (!rec_get_deleted_flag(prev_version, comp)) { - row_ext_t* ext; - - /* The stack of versions is locked by mtr. - Thus, it is safe to fetch the prefixes for - externally stored columns. */ - row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, clust_offsets, - NULL, NULL, NULL, &ext, heap); - - if (dict_index_has_virtual(index)) { - ut_ad(cur_vrow); - ut_ad(row->n_v_fields == cur_vrow->n_v_fields); - dtuple_copy_v_fields(row, cur_vrow); - } - - entry = row_build_index_entry(row, ext, index, heap); - - /* If entry == NULL, the record contains unset - BLOB pointers. This must be a freshly - inserted record that we can safely ignore. - For the justification, see the comments after - the previous row_build_index_entry() call. */ - - /* NOTE that we cannot do the comparison as binary - fields because maybe the secondary index record has - already been updated to a different binary value in - a char field, but the collation identifies the old - and new value anyway! */ - - if (entry && dtuple_coll_eq(*ientry, *entry)) { - goto unsafe_to_purge; - } - } - - version = prev_version; - } -} - /*****************************************************************//** Constructs the version of a clustered index record which a consistent read should see. We assume that the trx id stored in rec is such that @@ -1223,7 +752,7 @@ row_vers_build_for_consistent_read( err = trx_undo_prev_version_build( version, index, *offsets, heap, - &prev_version, NULL, vrow, 0); + &prev_version, mtr, 0, NULL, vrow); if (prev_heap != NULL) { mem_heap_free(prev_heap); @@ -1385,8 +914,8 @@ committed_version_trx: heap = mem_heap_create(1024); if (trx_undo_prev_version_build(version, index, *offsets, heap, - &prev_version, in_heap, vrow, - 0) != DB_SUCCESS) { + &prev_version, mtr, 0, + in_heap, vrow) != DB_SUCCESS) { mem_heap_free(heap); heap = heap2; heap2 = NULL; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index dc5d3122b1d..a051867a1bd 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1138,10 +1138,9 @@ bool purge_sys_t::running() void purge_sys_t::stop_FTS() { - latch.rd_lock(SRW_LOCK_CALL); - m_FTS_paused++; - latch.rd_unlock(); - while (m_active) + ut_d(const auto paused=) m_FTS_paused.fetch_add(1); + ut_ad((paused + 1) & ~PAUSED_SYS); + while (m_active.load(std::memory_order_acquire)) std::this_thread::sleep_for(std::chrono::seconds(1)); } @@ -1175,8 +1174,8 @@ void purge_sys_t::stop() /** Resume purge in data dictionary tables */ void purge_sys_t::resume_SYS(void *) { - ut_d(auto paused=) purge_sys.m_SYS_paused--; - ut_ad(paused); + ut_d(auto paused=) purge_sys.m_FTS_paused.fetch_sub(PAUSED_SYS); + ut_ad(paused >= PAUSED_SYS); } /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */ @@ -1346,7 +1345,6 @@ static bool srv_purge_should_exit(size_t old_history_size) /*********************************************************************//** Fetch and execute a task from the work queue. -@param [in,out] slot purge worker thread slot @return true if a task was executed */ static bool srv_task_execute() { @@ -1487,6 +1485,13 @@ static void release_thd(THD *thd, void *ctx) set_current_thd(0); } +void srv_purge_worker_task_low() +{ + ut_ad(current_thd); + while (srv_task_execute()) + ut_ad(purge_sys.running()); +} + static void purge_worker_callback(void*) { ut_ad(!current_thd); @@ -1494,8 +1499,7 @@ static void purge_worker_callback(void*) ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); void *ctx; THD *thd= acquire_thd(&ctx); - while (srv_task_execute()) - ut_ad(purge_sys.running()); + srv_purge_worker_task_low(); release_thd(thd,ctx); } diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc index 85c6dfdb5c1..3b7149528e6 100644 --- a/storage/innobase/trx/trx0purge.cc +++ b/storage/innobase/trx/trx0purge.cc @@ -774,26 +774,18 @@ not_free: buf_block_t *purge_sys_t::get_page(page_id_t id) { + ut_ad(!recv_sys.recovery_on); + buf_block_t*& undo_page= pages[id]; - if (undo_page) - return undo_page; - - mtr_t mtr; - mtr.start(); - undo_page= - buf_page_get_gen(id, 0, RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr); - - if (UNIV_LIKELY(undo_page != nullptr)) + if (!undo_page) { - undo_page->fix(); - mtr.commit(); - return undo_page; + undo_page= buf_pool.page_fix(id); // batch_cleanup() will unfix() + if (!undo_page) + pages.erase(id); } - mtr.commit(); - pages.erase(id); - return nullptr; + return undo_page; } bool purge_sys_t::rseg_get_next_history_log() @@ -1062,15 +1054,8 @@ static void trx_purge_close_tables(purge_node_t *node, THD *thd) void purge_sys_t::wait_FTS(bool also_sys) { - bool paused; - do - { - latch.wr_lock(SRW_LOCK_CALL); - paused= m_FTS_paused || (also_sys && m_SYS_paused); - latch.wr_unlock(); + for (const uint32_t mask= also_sys ? ~0U : ~PAUSED_SYS; m_FTS_paused & mask;) std::this_thread::sleep_for(std::chrono::milliseconds(10)); - } - while (paused); } __attribute__((nonnull)) @@ -1211,123 +1196,108 @@ dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd, /** Run a purge batch. @param n_purge_threads number of purge threads +@param thd purge coordinator thread handle +@param n_work_items number of work items (currently tables) to process @return new purge_sys.head */ -static purge_sys_t::iterator -trx_purge_attach_undo_recs(ulint n_purge_threads, THD *thd) +static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd, + ulint *n_work_items) { - que_thr_t* thr; - ulint i; + que_thr_t *thr; + purge_sys_t::iterator head= purge_sys.tail; - ut_a(n_purge_threads > 0); - ut_a(UT_LIST_GET_LEN(purge_sys.query->thrs) >= n_purge_threads); + /* Fetch and parse the UNDO records. The UNDO records are added + to a per purge node vector. */ + thr= nullptr; - purge_sys_t::iterator head = purge_sys.tail; + std::unordered_map + table_id_map(TRX_PURGE_TABLE_BUCKETS); + purge_sys.m_active= true; + + MDL_context *const mdl_context= + static_cast(thd_mdl_context(thd)); + ut_ad(mdl_context); + + const size_t max_pages= + std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size}); + + while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) + { + /* Track the max {trx_id, undo_no} for truncating the + UNDO logs once we have purged the records. */ + + if (head <= purge_sys.tail) + head= purge_sys.tail; + + /* Fetch the next record, and advance the purge_sys.tail. */ + trx_purge_rec_t purge_rec= purge_sys.fetch_next_rec(); + + if (!purge_rec.undo_rec) + { + if (!purge_rec.roll_ptr) + break; + ut_ad(purge_rec.roll_ptr == 1); + continue; + } + + table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec); + + purge_node_t *&table_node= table_id_map[table_id]; + if (table_node) + ut_ad(!table_node->in_progress); + if (!table_node) + { + std::pair p; + p.first= trx_purge_table_open(table_id, mdl_context, &p.second); + if (p.first == reinterpret_cast(-1)) + p.first= purge_sys.close_and_reopen(table_id, thd, &p.second); + + if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr))) + thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); + ++*n_work_items; + table_node= static_cast(thr->child); + + ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE); + ut_d(auto pair=) table_node->tables.emplace(table_id, p); + ut_ad(pair.second); + if (p.first) + goto enqueue; + } + else if (table_node->tables[table_id].first) + { + enqueue: + table_node->undo_recs.push(purge_rec); + ut_ad(!table_node->in_progress); + } + + if (purge_sys.n_pages_handled() >= max_pages) + break; + } + + purge_sys.m_active= false; #ifdef UNIV_DEBUG - i = 0; - /* Debug code to validate some pre-requisites and reset done flag. */ - for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs); - thr != NULL && i < n_purge_threads; - thr = UT_LIST_GET_NEXT(thrs, thr), ++i) { + thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); + for (ulint i= 0; thr && i < *n_work_items; + i++, thr= UT_LIST_GET_NEXT(thrs, thr)) + { + purge_node_t *node= static_cast(thr->child); + ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); + ut_ad(!node->in_progress); + node->in_progress= true; + } - purge_node_t* node; - - /* Get the purge node. */ - node = (purge_node_t*) thr->child; - - ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); - ut_ad(node->undo_recs.empty()); - ut_ad(!node->in_progress); - ut_d(node->in_progress = true); - } - - /* There should never be fewer nodes than threads, the inverse - however is allowed because we only use purge threads as needed. */ - ut_ad(i == n_purge_threads); + for (; thr; thr= UT_LIST_GET_NEXT(thrs, thr)) + { + purge_node_t *node= static_cast(thr->child); + ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); + ut_ad(!node->in_progress); + ut_ad(node->undo_recs.empty()); + } #endif - /* Fetch and parse the UNDO records. The UNDO records are added - to a per purge node vector. */ - thr = UT_LIST_GET_FIRST(purge_sys.query->thrs); + ut_ad(head <= purge_sys.tail); - ut_ad(head <= purge_sys.tail); - - i = 0; - - std::unordered_map - table_id_map(TRX_PURGE_TABLE_BUCKETS); - purge_sys.m_active = true; - - MDL_context* const mdl_context - = static_cast(thd_mdl_context(thd)); - ut_ad(mdl_context); - - const size_t max_pages = std::min(buf_pool.curr_size * 3 / 4, - size_t{srv_purge_batch_size}); - - while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) { - /* Track the max {trx_id, undo_no} for truncating the - UNDO logs once we have purged the records. */ - - if (head <= purge_sys.tail) { - head = purge_sys.tail; - } - - /* Fetch the next record, and advance the purge_sys.tail. */ - trx_purge_rec_t purge_rec = purge_sys.fetch_next_rec(); - - if (!purge_rec.undo_rec) { - if (!purge_rec.roll_ptr) { - break; - } - ut_ad(purge_rec.roll_ptr == 1); - continue; - } - - table_id_t table_id = trx_undo_rec_get_table_id( - purge_rec.undo_rec); - - purge_node_t*& table_node = table_id_map[table_id]; - - if (!table_node) { - std::pair p; - p.first = trx_purge_table_open(table_id, mdl_context, - &p.second); - if (p.first == reinterpret_cast(-1)) { - p.first = purge_sys.close_and_reopen( - table_id, thd, &p.second); - } - - thr = UT_LIST_GET_NEXT(thrs, thr); - - if (!(++i % n_purge_threads)) { - thr = UT_LIST_GET_FIRST( - purge_sys.query->thrs); - } - - table_node = static_cast(thr->child); - ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE); - ut_d(auto i=) - table_node->tables.emplace(table_id, p); - ut_ad(i.second); - if (p.first) { - goto enqueue; - } - } else if (table_node->tables[table_id].first) { -enqueue: - table_node->undo_recs.push(purge_rec); - } - - if (purge_sys.n_pages_handled() >= max_pages) { - break; - } - } - - purge_sys.m_active = false; - - ut_ad(head <= purge_sys.tail); - - return head; + return head; } extern tpool::waitable_task purge_worker_task; @@ -1385,68 +1355,89 @@ Run a purge batch. @return number of undo log pages handled in the batch */ TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size) { - ut_ad(n_tasks > 0); + ut_ad(n_tasks > 0); - purge_sys.clone_oldest_view(); + purge_sys.clone_oldest_view(); -#ifdef UNIV_DEBUG - if (srv_purge_view_update_only_debug) { - return(0); - } -#endif /* UNIV_DEBUG */ + ut_d(if (srv_purge_view_update_only_debug) return 0); - THD* const thd = current_thd; + THD *const thd= current_thd; - /* Fetch the UNDO recs that need to be purged. */ - const purge_sys_t::iterator head - = trx_purge_attach_undo_recs(n_tasks, thd); - const size_t n_pages = purge_sys.n_pages_handled(); + /* Fetch the UNDO recs that need to be purged. */ + ulint n_work= 0; + const purge_sys_t::iterator head= trx_purge_attach_undo_recs(thd, &n_work); + const size_t n_pages= purge_sys.n_pages_handled(); - { - ulint delay = n_pages ? srv_max_purge_lag : 0; - if (UNIV_UNLIKELY(delay)) { - if (delay >= history_size) { - no_throttle: - delay = 0; - } else if (const ulint max_delay = - srv_max_purge_lag_delay) { - delay = std::min(max_delay, - 10000 * history_size / delay - - 5000); - } else { - goto no_throttle; - } - } - srv_dml_needed_delay = delay; - } + { + ulint delay= n_pages ? srv_max_purge_lag : 0; + if (UNIV_UNLIKELY(delay)) + { + if (delay >= history_size) + no_throttle: + delay= 0; + else if (const ulint max_delay= srv_max_purge_lag_delay) + delay= std::min(max_delay, 10000 * history_size / delay - 5000); + else + goto no_throttle; + } + srv_dml_needed_delay= delay; + } - que_thr_t* thr = nullptr; + ut_ad(n_tasks); + que_thr_t *thr= nullptr; - /* Submit tasks to workers queue if using multi-threaded purge. */ - for (ulint i = n_tasks; --i; ) { - thr = que_fork_scheduler_round_robin(purge_sys.query, thr); - ut_a(thr); - srv_que_task_enqueue_low(thr); - srv_thread_pool->submit_task(&purge_worker_task); - } + if (n_work) + { + for (auto i= n_work; i--; ) + { + if (!thr) + thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); + else + thr= UT_LIST_GET_NEXT(thrs, thr); - thr = que_fork_scheduler_round_robin(purge_sys.query, thr); + if (!thr) + break; - que_run_threads(thr); + ut_ad(thr->state == QUE_THR_COMPLETED); + thr->state= QUE_THR_RUNNING; + thr->run_node= thr; + thr->prev_node= thr->common.parent; + purge_sys.query->state= QUE_FORK_ACTIVE; + purge_sys.query->last_sel_node= nullptr; + srv_que_task_enqueue_low(thr); + } - trx_purge_wait_for_workers_to_complete(); + /* + To reduce context switches we only submit at most n_tasks-1 worker task. + (we can use less tasks, if there is not enough work) - for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs); thr; - thr = UT_LIST_GET_NEXT(thrs, thr)) { - purge_node_t* node = static_cast(thr->child); - trx_purge_close_tables(node, thd); - node->tables.clear(); - } + The coordinator does worker's job, instead of waiting and sitting idle, + then waits for all others to finish. - purge_sys.batch_cleanup(head); + This also means if innodb_purge_threads=1, the coordinator does all + the work alone. + */ + const ulint workers{std::min(n_work, n_tasks) - 1}; + for (ulint i= 0; i < workers; i++) + srv_thread_pool->submit_task(&purge_worker_task); + srv_purge_worker_task_low(); - MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1); - MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages); + if (workers) + trx_purge_wait_for_workers_to_complete(); - return n_pages; + for (thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr && n_work--; + thr= UT_LIST_GET_NEXT(thrs, thr)) + { + purge_node_t *node= static_cast(thr->child); + trx_purge_close_tables(node, thd); + node->tables.clear(); + } + } + + purge_sys.batch_cleanup(head); + + MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1); + MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages); + + return n_pages; } diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index cf6c050e623..b933e5cd3a8 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -2045,170 +2045,128 @@ err_exit: /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ -/** Copy an undo record to heap. -@param[in] roll_ptr roll pointer to a record that exists -@param[in,out] heap memory heap where copied */ -static -trx_undo_rec_t* -trx_undo_get_undo_rec_low( - roll_ptr_t roll_ptr, - mem_heap_t* heap) +static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index, + rec_offs *offsets, mem_heap_t *heap, + rec_t **old_vers, mem_heap_t *v_heap, + dtuple_t **vrow, ulint v_status, + const trx_undo_rec_t *undo_rec); + +inline const buf_block_t * +purge_sys_t::view_guard::get(const page_id_t id, mtr_t *mtr) { - ulint rseg_id; - uint32_t page_no; - uint16_t offset; - bool is_insert; - mtr_t mtr; - - trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, &offset); - ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO); - ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id]; - ut_ad(rseg->is_persistent()); - - mtr.start(); - - trx_undo_rec_t *undo_rec= nullptr; - if (buf_block_t* undo_page= - buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr)) + buf_block_t *block; + ut_ad(mtr->is_active()); + if (!latch) { - buf_page_make_young_if_needed(&undo_page->page); - undo_rec= undo_page->page.frame + offset; - const size_t end= mach_read_from_2(undo_rec); - if (UNIV_UNLIKELY(end <= offset || - end >= srv_page_size - FIL_PAGE_DATA_END)) - undo_rec= nullptr; - else + decltype(purge_sys.pages)::const_iterator i= purge_sys.pages.find(id); + if (i != purge_sys.pages.end()) { - size_t len{end - offset}; - undo_rec= - static_cast(mem_heap_dup(heap, undo_rec, len)); - mach_write_to_2(undo_rec, len); + block= i->second; + ut_ad(block); + return block; } } - - mtr.commit(); - return undo_rec; -} - -/** Copy an undo record to heap, to check if a secondary index record -can be safely purged. -@param trx_id DB_TRX_ID corresponding to roll_ptr -@param name table name -@param roll_ptr DB_ROLL_PTR pointing to the undo log record -@param heap memory heap for allocation -@return copy of the record -@retval nullptr if the version is visible to purge_sys.view */ -static trx_undo_rec_t *trx_undo_get_rec_if_purgeable(trx_id_t trx_id, - const table_name_t &name, - roll_ptr_t roll_ptr, - mem_heap_t* heap) -{ + block= buf_pool.page_fix(id); + if (block) { - purge_sys_t::view_guard check; - if (!check.view().changes_visible(trx_id)) - return trx_undo_get_undo_rec_low(roll_ptr, heap); + mtr->memo_push(block, MTR_MEMO_BUF_FIX); + if (latch) + /* In MVCC operations (outside purge tasks), we will refresh the + buf_pool.LRU position. In purge, we expect the page to be freed + soon, at the end of the current batch. */ + buf_page_make_young_if_needed(&block->page); } - return nullptr; -} - -/** Copy an undo record to heap. -@param trx_id DB_TRX_ID corresponding to roll_ptr -@param name table name -@param roll_ptr DB_ROLL_PTR pointing to the undo log record -@param heap memory heap for allocation -@return copy of the record -@retval nullptr if the undo log is not available */ -static trx_undo_rec_t *trx_undo_get_undo_rec(trx_id_t trx_id, - const table_name_t &name, - roll_ptr_t roll_ptr, - mem_heap_t *heap) -{ - { - purge_sys_t::end_view_guard check; - if (!check.view().changes_visible(trx_id)) - return trx_undo_get_undo_rec_low(roll_ptr, heap); - } - return nullptr; + return block; } /** Build a previous version of a clustered index record. The caller must hold a latch on the index page of the clustered index record. -@param rec version of a clustered index record -@param index clustered index -@param offsets rec_get_offsets(rec, index) -@param heap memory heap from which the memory needed is - allocated -@param old_vers previous version or NULL if rec is the - first inserted version, or if history data - has been deleted (an error), or if the purge - could have removed the version - though it has not yet done so -@param v_heap memory heap used to create vrow - dtuple if it is not yet created. This heap - diffs from "heap" above in that it could be - prebuilt->old_vers_heap for selection -@param v_row virtual column info, if any -@param v_status status determine if it is going into this - function by purge thread or not. - And if we read "after image" of undo log -@param undo_block undo log block which was cached during - online dml apply or nullptr +@param rec version of a clustered index record +@param index clustered index +@param offsets rec_get_offsets(rec, index) +@param heap memory heap from which the memory needed is allocated +@param old_vers previous version, or NULL if rec is the first inserted + version, or if history data has been deleted (an error), + or if the purge could have removed the version though + it has not yet done so +@param mtr mini-transaction +@param v_status TRX_UNDO_PREV_IN_PURGE, ... +@param v_heap memory heap used to create vrow dtuple if it is not yet + created. This heap diffs from "heap" above in that it could be + prebuilt->old_vers_heap for selection +@param vrow virtual column info, if any @return error code @retval DB_SUCCESS if previous version was successfully built, or if it was an insert or the undo record refers to the table before rebuild @retval DB_MISSING_HISTORY if the history is missing */ TRANSACTIONAL_TARGET -dberr_t -trx_undo_prev_version_build( - const rec_t *rec, - dict_index_t *index, - rec_offs *offsets, - mem_heap_t *heap, - rec_t **old_vers, - mem_heap_t *v_heap, - dtuple_t **vrow, - ulint v_status) +dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index, + rec_offs *offsets, mem_heap_t *heap, + rec_t **old_vers, mtr_t *mtr, + ulint v_status, + mem_heap_t *v_heap, dtuple_t **vrow) { - dtuple_t* entry; - trx_id_t rec_trx_id; - undo_no_t undo_no; - table_id_t table_id; - trx_id_t trx_id; - roll_ptr_t roll_ptr; - upd_t* update; - byte type; - byte info_bits; - byte cmpl_info; - bool dummy_extern; - byte* buf; + ut_ad(!index->table->is_temporary()); + ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!index->table->is_temporary()); - ut_ad(rec_offs_validate(rec, index, offsets)); + const roll_ptr_t roll_ptr= row_get_rec_roll_ptr(rec, index, offsets); + *old_vers= nullptr; - roll_ptr = row_get_rec_roll_ptr(rec, index, offsets); + if (trx_undo_roll_ptr_is_insert(roll_ptr)) + /* The record rec is the first inserted version */ + return DB_SUCCESS; - *old_vers = NULL; + ut_ad(roll_ptr < 1ULL << 55); + ut_ad(uint16_t(roll_ptr) >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + ut_ad(uint32_t(roll_ptr >> 16) >= FSP_FIRST_INODE_PAGE_NO); - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - /* The record rec is the first inserted version */ - return DB_SUCCESS; - } + const trx_id_t rec_trx_id= row_get_rec_trx_id(rec, index, offsets); - mariadb_increment_undo_records_read(); - rec_trx_id = row_get_rec_trx_id(rec, index, offsets); + ut_ad(!index->table->skip_alter_undo); - ut_ad(!index->table->skip_alter_undo); + mariadb_increment_undo_records_read(); + const auto savepoint= mtr->get_savepoint(); + dberr_t err= DB_MISSING_HISTORY; + purge_sys_t::view_guard check{v_status == TRX_UNDO_CHECK_PURGE_PAGES + ? purge_sys_t::view_guard::PURGE + : v_status == TRX_UNDO_CHECK_PURGEABILITY + ? purge_sys_t::view_guard::VIEW + : purge_sys_t::view_guard::END_VIEW}; + if (!check.view().changes_visible(rec_trx_id)) + { + trx_undo_rec_t *undo_rec= nullptr; + static_assert(ROLL_PTR_RSEG_ID_POS == 48, ""); + static_assert(ROLL_PTR_PAGE_POS == 16, ""); + if (const buf_block_t *undo_page= + check.get(page_id_t{trx_sys.rseg_array[(roll_ptr >> 48) & 0x7f]. + space->id, + uint32_t(roll_ptr >> 16)}, mtr)) + { + static_assert(ROLL_PTR_BYTE_POS == 0, ""); + const uint16_t offset{uint16_t(roll_ptr)}; + undo_rec= undo_page->page.frame + offset; + const size_t end= mach_read_from_2(undo_rec); + if (UNIV_UNLIKELY(end > offset && + end < srv_page_size - FIL_PAGE_DATA_END)) + err= trx_undo_prev_version(rec, index, offsets, heap, + old_vers, v_heap, vrow, v_status, undo_rec); + } + } - trx_undo_rec_t* undo_rec = v_status == TRX_UNDO_CHECK_PURGEABILITY - ? trx_undo_get_rec_if_purgeable(rec_trx_id, index->table->name, - roll_ptr, heap) - : trx_undo_get_undo_rec(rec_trx_id, index->table->name, - roll_ptr, heap); - if (!undo_rec) { - return DB_MISSING_HISTORY; - } + mtr->rollback_to_savepoint(savepoint); + return err; +} +static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index, + rec_offs *offsets, mem_heap_t *heap, + rec_t **old_vers, mem_heap_t *v_heap, + dtuple_t **vrow, ulint v_status, + const trx_undo_rec_t *undo_rec) +{ + byte type, cmpl_info; + bool dummy_extern; + undo_no_t undo_no; + table_id_t table_id; const byte *ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &dummy_extern, &undo_no, &table_id); @@ -2220,6 +2178,10 @@ trx_undo_prev_version_build( return DB_SUCCESS; } + trx_id_t trx_id; + roll_ptr_t roll_ptr; + byte info_bits; + ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, &info_bits); @@ -2247,10 +2209,12 @@ trx_undo_prev_version_build( ptr = trx_undo_rec_skip_row_ref(ptr, index); + upd_t* update; ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id, roll_ptr, info_bits, heap, &update); ut_a(ptr); + byte* buf; if (row_upd_changes_field_size_or_external(index, offsets, update)) { /* We should confirm the existence of disowned external data, @@ -2276,9 +2240,10 @@ trx_undo_prev_version_build( those fields that update updates to become externally stored fields. Store the info: */ - entry = row_rec_to_index_entry(rec, index, offsets, heap); + dtuple_t* entry = row_rec_to_index_entry(rec, index, offsets, + heap); /* The page containing the clustered index record - corresponding to entry is latched in mtr. Thus the + corresponding to entry is latched. Thus the following call is safe. */ if (!row_upd_index_replace_new_col_vals(entry, *index, update, heap)) { diff --git a/storage/innobase/unittest/innodb_sync-t.cc b/storage/innobase/unittest/innodb_sync-t.cc index d0289086b24..5ad726d8429 100644 --- a/storage/innobase/unittest/innodb_sync-t.cc +++ b/storage/innobase/unittest/innodb_sync-t.cc @@ -92,6 +92,25 @@ static void test_ssux_lock() ssux.wr_u_downgrade(); ssux.u_unlock(); } + + for (auto j= M_ROUNDS; j--; ) + { + ssux.rd_lock(); + assert(!critical); + if (ssux.rd_u_upgrade_try()) + { + assert(!critical); + ssux.rd_unlock(); + ssux.u_wr_upgrade(); + assert(!critical); + critical= true; + critical= false; + ssux.wr_u_downgrade(); + ssux.u_rd_downgrade(); + } + assert(!critical); + ssux.rd_unlock(); + } } } @@ -129,6 +148,14 @@ static void test_sux_lock() critical= false; sux.x_u_downgrade(); sux.u_unlock(); + sux.s_lock(); + std::ignore= sux.s_x_upgrade(); + assert(!critical); + sux.x_lock(); + critical= true; + sux.x_unlock(); + critical= false; + sux.x_unlock(); } } } diff --git a/storage/maria/ma_bitmap.c b/storage/maria/ma_bitmap.c index 11add3b546f..af2b42a4ec1 100644 --- a/storage/maria/ma_bitmap.c +++ b/storage/maria/ma_bitmap.c @@ -3077,21 +3077,25 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info, int _ma_bitmap_create_first(MARIA_SHARE *share) { uint block_size= share->bitmap.block_size; + size_t error; File file= share->bitmap.file.file; - uchar marker[CRC_SIZE]; + uchar *temp_buff; + + if (!(temp_buff= (uchar*) my_alloca(block_size))) + return 1; + bzero(temp_buff, block_size); /* Next write operation of the page will write correct CRC if it is needed */ - int4store(marker, MARIA_NO_CRC_BITMAP_PAGE); + int4store(temp_buff + block_size - CRC_SIZE, MARIA_NO_CRC_BITMAP_PAGE); - if (mysql_file_chsize(file, block_size - sizeof(marker), - 0, MYF(MY_WME)) > 0 || - my_pwrite(file, marker, sizeof(marker), - block_size - sizeof(marker), - MYF(MY_NABP | MY_WME))) + error= my_pwrite(file, temp_buff, block_size, 0, MYF(MY_NABP | MY_WME)); + my_afree(temp_buff); + if (error) return 1; + share->state.state.data_file_length= block_size; _ma_bitmap_delete_all(share); return 0; diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index 7c57ddeb192..d0e05df1a1f 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -420,6 +420,8 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info) /* We cannot check file sizes for S3 */ DBUG_RETURN(0); } + /* We should never come here with internal temporary tables */ + DBUG_ASSERT(!share->internal_table); if (!(param->testflag & T_SILENT)) puts("- check file-size"); @@ -715,6 +717,8 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info, MARIA_PAGE ma_page; DBUG_ENTER("chk_index_down"); + DBUG_ASSERT(!share->internal_table); + /* Key blocks must lay within the key file length entirely. */ if (page + keyinfo->block_length > share->state.state.key_file_length) { @@ -2467,7 +2471,16 @@ static int initialize_variables_for_repair(HA_CHECK *param, return 1; /* calculate max_records */ - sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + if (!share->internal_table) + { + /* Get real file size */ + sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0)); + } + else + { + /* For internal temporary files we are using the logical file length */ + sort_info->filelength= share->state.state.data_file_length; + } param->max_progress= sort_info->filelength; if ((param->testflag & T_CREATE_MISSING_KEYS) || @@ -2865,7 +2878,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info, { fputs(" \r",stdout); fflush(stdout); } - if (mysql_file_chsize(share->kfile.file, + if (!share->internal_table && + mysql_file_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)) > 0) { _ma_check_print_warning(param, @@ -4184,7 +4198,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info, if (param->testflag & T_CALC_CHECKSUM) share->state.state.checksum=param->glob_crc; - if (mysql_file_chsize(share->kfile.file, + if (!share->internal_table && + mysql_file_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)) > 0) _ma_check_print_warning(param, "Can't change size of indexfile, error: %d", @@ -4733,7 +4748,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info, if (param->testflag & T_CALC_CHECKSUM) share->state.state.checksum=param->glob_crc; - if (mysql_file_chsize(share->kfile.file, + if (!share->internal_table && + mysql_file_chsize(share->kfile.file, share->state.state.key_file_length, 0, MYF(0)) > 0) _ma_check_print_warning(param, "Can't change size of indexfile, error: %d", @@ -6145,6 +6161,8 @@ int maria_test_if_almost_full(MARIA_HA *info) { MARIA_SHARE *share= info->s; + DBUG_ASSERT(!share->internal_table); + if (share->options & HA_OPTION_COMPRESS_RECORD) return 0; return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END, diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 15f3fb34223..aeb3e7608d4 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -129,11 +129,17 @@ int maria_delete_all_rows(MARIA_HA *info) _ma_unmap_file(info); #endif - if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX, + if (share->internal_table) + /* + Avoid truncate of internal temporary tables as this can have a big + performance overhead when called by mysql_handle_single_derived() + tables in MariaDB as part of split materialization. + */; + else if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX, FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED) || - mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 || - mysql_file_chsize(share->kfile.file, share->base.keystart, 0, - MYF(MY_WME)) > 0) + mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 || + mysql_file_chsize(share->kfile.file, share->base.keystart, 0, + MYF(MY_WME)) > 0) goto err; if (info->s->tracked) diff --git a/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result b/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result index f3d15ce9472..9a1b11e19ec 100644 --- a/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result +++ b/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result @@ -237,7 +237,7 @@ set session spider_suppress_comment_ignored_warning=0; CREATE TABLE tbl_a (a INT) ENGINE=Spider DEFAULT CHARSET=utf8 REMOTE_TABLE=t CONNECTION="srv s_2_1"; Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified drop table tbl_a; CREATE TABLE tbl_a ( a INT, @@ -411,7 +411,7 @@ PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" Warnings: Warning 138 Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead. Warning 138 Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead. -Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd"); ERROR HY000: Remote table 'auto_test_remote2.tbl_a#P#p2' is not found DROP TABLE tbl_a; @@ -427,9 +427,9 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_1"', PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" ); Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified -Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified -Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd"); ERROR HY000: Unable to connect to foreign data source: localhost DROP TABLE tbl_a; @@ -447,10 +447,10 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2"', PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" ); Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd"); Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified connection child2_1; SELECT * FROM tbl_a; a b @@ -477,7 +477,7 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2" read_only_mode "0"', PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" READ_ONLY=NO ); Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"); ERROR HY000: Table 'auto_test_local.tbl_a' is read only INSERT INTO tbl_a VALUES (3, "ccc"), (4, "ddd"); @@ -500,7 +500,7 @@ PRIMARY KEY(a) ) ENGINE=Spider DEFAULT CHARSET=utf8 REMOTE_SERVER="s_2_1" COMMENT='tbl "tbl_b"' REMOTE_TABLE="tbl_a"; Warnings: -Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified +Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified select table_name, server, tgt_table_name from mysql.spider_tables; table_name server tgt_table_name tbl_a s_2_1 tbl_a diff --git a/storage/spider/spd_conn.cc b/storage/spider/spd_conn.cc index 40dcb5e8629..ff289c220c1 100644 --- a/storage/spider/spd_conn.cc +++ b/storage/spider/spd_conn.cc @@ -402,7 +402,7 @@ SPIDER_CONN *spider_create_conn( char *tmp_name, *tmp_host, *tmp_username, *tmp_password, *tmp_socket; char *tmp_wrapper, *tmp_db, *tmp_ssl_ca, *tmp_ssl_capath, *tmp_ssl_cert; char *tmp_ssl_cipher, *tmp_ssl_key, *tmp_default_file, *tmp_default_group; - char *tmp_dsn, *tmp_filedsn, *tmp_driver; + char *tmp_dsn, *tmp_filedsn, *tmp_driver, *tmp_odbc_conn_str; DBUG_ENTER("spider_create_conn"); if (unlikely(!UTC)) @@ -454,6 +454,8 @@ SPIDER_CONN *spider_create_conn( (uint) (share->tgt_filedsns_lengths[link_idx] + 1), &tmp_driver, (uint) (share->tgt_drivers_lengths[link_idx] + 1), + &tmp_odbc_conn_str, + (uint) (share->tgt_odbc_conn_str_length + 1), &need_mon, (uint) (sizeof(int)), NullS)) ) { @@ -529,6 +531,10 @@ SPIDER_CONN *spider_create_conn( spider_memcpy_or_null(&conn->tgt_driver, tmp_driver, share->tgt_drivers[link_idx], &conn->tgt_driver_length, share->tgt_drivers_lengths[link_idx]); + spider_memcpy_or_null(&conn->tgt_odbc_conn_str, tmp_odbc_conn_str, + share->tgt_odbc_conn_str, + &conn->tgt_odbc_conn_str_length, + share->tgt_odbc_conn_str_length); conn->tgt_port = share->tgt_ports[link_idx]; conn->tgt_ssl_vsc = share->tgt_ssl_vscs[link_idx]; conn->dbton_id = share->sql_dbton_ids[link_idx]; diff --git a/storage/spider/spd_db_conn.cc b/storage/spider/spd_db_conn.cc index abb7f187de0..b1ec856e3fb 100644 --- a/storage/spider/spd_db_conn.cc +++ b/storage/spider/spd_db_conn.cc @@ -1485,6 +1485,7 @@ int spider_db_append_key_hint( if (str->reserve( hint_str_len - 2 + SPIDER_SQL_INDEX_USE_LEN + SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); hint_str += 2; str->q_append(SPIDER_SQL_INDEX_USE_STR, SPIDER_SQL_INDEX_USE_LEN); str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN); @@ -1497,10 +1498,11 @@ int spider_db_append_key_hint( if (str->reserve( hint_str_len - 3 + SPIDER_SQL_INDEX_IGNORE_LEN + SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN)) + DBUG_RETURN(HA_ERR_OUT_OF_MEM); hint_str += 3; str->q_append(SPIDER_SQL_INDEX_IGNORE_STR, SPIDER_SQL_INDEX_IGNORE_LEN); str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN); - str->q_append(hint_str, hint_str_len - 2); + str->q_append(hint_str, hint_str_len - 3); str->q_append(SPIDER_SQL_CLOSE_PAREN_STR, SPIDER_SQL_CLOSE_PAREN_LEN); } else if (str->reserve(hint_str_len + SPIDER_SQL_SPACE_LEN)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); diff --git a/storage/spider/spd_db_mysql.cc b/storage/spider/spd_db_mysql.cc index a45408c4408..59517190914 100644 --- a/storage/spider/spd_db_mysql.cc +++ b/storage/spider/spd_db_mysql.cc @@ -7624,8 +7624,8 @@ int spider_mbase_share::convert_key_hint_str() roop_count < (int) table_share->keys; roop_count++, tmp_key_hint++) { tmp_key_hint->length(0); - if (tmp_key_hint->append(spider_share->key_hint->ptr(), - spider_share->key_hint->length(), system_charset_info)) + if (tmp_key_hint->append(spider_share->key_hint[roop_count].ptr(), + spider_share->key_hint[roop_count].length(), system_charset_info)) DBUG_RETURN(HA_ERR_OUT_OF_MEM); } } else { diff --git a/storage/spider/spd_err.h b/storage/spider/spd_err.h index 67fcd0d5d9c..e4e74d6d5ba 100644 --- a/storage/spider/spd_err.h +++ b/storage/spider/spd_err.h @@ -73,7 +73,7 @@ #define ER_SPIDER_INVALID_TABLE_OPTION_NUM 12528 #define ER_SPIDER_INVALID_TABLE_OPTION_STR "The table option %s=%s is invalid" #define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_NUM 12529 -#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified" +#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified" #define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_NUM 12601 #define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_STR "Can't use both spider_use_consistent_snapshot = 1 and spider_internal_xa = 1" diff --git a/storage/spider/spd_include.h b/storage/spider/spd_include.h index d8fde166cc3..3a6a714cca9 100644 --- a/storage/spider/spd_include.h +++ b/storage/spider/spd_include.h @@ -678,6 +678,7 @@ typedef struct st_spider_conn char *tgt_dsn; char *tgt_filedsn; char *tgt_driver; + char *tgt_odbc_conn_str; long tgt_port; long tgt_ssl_vsc; @@ -697,6 +698,7 @@ typedef struct st_spider_conn uint tgt_dsn_length; uint tgt_filedsn_length; uint tgt_driver_length; + uint tgt_odbc_conn_str_length; uint dbton_id; volatile @@ -1195,6 +1197,7 @@ typedef struct st_spider_share char **tgt_dsns; char **tgt_filedsns; char **tgt_drivers; + char *tgt_odbc_conn_str; char **static_link_ids; char **tgt_pk_names; char **tgt_sequence_names; @@ -1237,6 +1240,7 @@ typedef struct st_spider_share uint *tgt_dsns_lengths; uint *tgt_filedsns_lengths; uint *tgt_drivers_lengths; + uint tgt_odbc_conn_str_length; uint *static_link_ids_lengths; uint *tgt_pk_names_lengths; uint *tgt_sequence_names_lengths; diff --git a/storage/spider/spd_table.cc b/storage/spider/spd_table.cc index e101acdc684..647a2d1ccc0 100644 --- a/storage/spider/spd_table.cc +++ b/storage/spider/spd_table.cc @@ -860,6 +860,8 @@ int spider_free_share_alloc( } spider_free(spider_current_trx, share->tgt_drivers, MYF(0)); } + if (share->tgt_odbc_conn_str) + spider_free(spider_current_trx, share->tgt_odbc_conn_str, MYF(0)); if (share->tgt_pk_names) { for (roop_count = 0; roop_count < (int) share->tgt_pk_names_length; @@ -2483,9 +2485,6 @@ int st_spider_param_string_parse::fail(bool restore_delim) /* Parse connection information specified by COMMENT, CONNECT, or engine-defined options. - - TODO: Deprecate the connection specification by COMMENT and CONNECT, - and then solely utilize engine-defined options. */ int spider_parse_connect_info( SPIDER_SHARE *share, @@ -2512,6 +2511,7 @@ int spider_parse_connect_info( DBUG_PRINT("info",("spider s->path=%s", table_share->path.str)); DBUG_PRINT("info", ("spider s->normalized_path=%s", table_share->normalized_path.str)); + parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; spider_get_partition_info(share->table_name, share->table_name_length, table_share, part_info, &part_elem, &sub_elem); /* Find the correct table options, depending on if we are parsing a @@ -2600,8 +2600,11 @@ int spider_parse_connect_info( goto error_alloc_conn_string; } DBUG_ASSERT(error_num_1 == 0); - /* If the connect string is explicitly ignored for parsing, or if - any option is specified, skip the parsing. */ + /* + If the COMMENT or CONNECTION string is explicitly ignored for + table param parsing, or if any option is specified, skip the + parsing. + */ if (spider_param_ignore_comments(current_thd) || option_specified) { if (!spider_param_suppress_comment_ignored_warning(current_thd)) @@ -2621,7 +2624,6 @@ int spider_parse_connect_info( "and will be removed in a future release. " "Please use table options instead."); start_param = connect_string; - parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM; while (*start_param != '\0') { if (parse.locate_param_def(start_param))