Merge 11.4 into 11.6

2025-01-15 19:42:28 +01:00 · 2024-09-04 10:38:25 +03:00 · 2024-09-04 10:38:25 +03:00 · a5b80531fb
commit a5b80531fb
parent c67149b859 44733aa8cf
78 changed files with 1736 additions and 1273 deletions
--- a/client/mysql.cc
+++ b/client/mysql.cc
@ -255,6 +255,7 @@ static my_bool column_types_flag;
 static my_bool preserve_comments= 0;
 static my_bool in_com_source, aborted= 0;
 static ulong opt_max_allowed_packet, opt_net_buffer_length;
+unsigned long quick_max_column_width= LONG_MAX;
 static uint verbose=0,opt_silent=0,opt_mysql_port=0, opt_local_infile=0;
 static uint my_end_arg;
 static char * opt_mysql_unix_port=0;
@ -1821,6 +1822,10 @@ static struct my_option my_long_options[] =
   "Don't cache result, print it row by row. This may slow down the server "
   "if the output is suspended. Doesn't use history file.",
   &quick, &quick, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"quick-max-column-width", 0,
+   "Maximal field length limit in case of --qick", &quick_max_column_width,
+   &quick_max_column_width, 0, GET_ULONG, REQUIRED_ARG, LONG_MAX, 0, ULONG_MAX,
+   0, 1, 0},
  {"raw", 'r', "Write fields without conversion. Used with --batch.",
   &opt_raw_data, &opt_raw_data, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
  {"reconnect", 0, "Reconnect if the connection is lost.",
@ -3877,7 +3882,7 @@ print_table_data(MYSQL_RES *result)
  {
    uint length= column_names ? field->name_length : 0;
    if (quick)
-      length= MY_MAX(length,field->length);
+      length= MY_MAX(length, MY_MIN(field->length, quick_max_column_width));
    else
      length= MY_MAX(length,field->max_length);
    if (length < 4 && !IS_NOT_NULL(field->flags))
--- a/debian/autobake-deb.sh
+++ b/debian/autobake-deb.sh
@ -121,7 +121,7 @@ in
      replace_uring_with_aio
    fi
    ;&
-  "noble")
+  "noble"|"oracular")
    # mariadb-plugin-rocksdb s390x not supported by us (yet)
    # ubuntu doesn't support mips64el yet, so keep this just
    # in case something changes.
--- a/extra/mariabackup/xtrabackup.cc
+++ b/extra/mariabackup/xtrabackup.cc
@ -2114,7 +2114,7 @@ static int prepare_export()
      IF_WIN("\"","") "\"%s\" --mysqld \"%s\""
      " --defaults-extra-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=."
      " --innodb --innodb-fast-shutdown=0 --loose-partition"
-      " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu"
+      " --innodb-buffer-pool-size=%llu"
      " --console --skip-log-error --skip-log-bin --bootstrap %s< "
      BOOTSTRAP_FILENAME IF_WIN("\"",""),
      mariabackup_exe,
@ -2128,7 +2128,7 @@ static int prepare_export()
      IF_WIN("\"","") "\"%s\" --mysqld"
      " --defaults-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=."
      " --innodb --innodb-fast-shutdown=0 --loose-partition"
-      " --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu"
+      " --innodb-buffer-pool-size=%llu"
      " --console --log-error= --skip-log-bin --bootstrap %s< "
      BOOTSTRAP_FILENAME IF_WIN("\"",""),
      mariabackup_exe,
--- a/mysql-test/main/client.result
+++ b/mysql-test/main/client.result
@ -0,0 +1,62 @@
+#
+# MDEV-34704: Quick mode produces the bug for mariadb client
+#
+create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1));
+insert into t1 values ("X", "X", "X", "X");
+# --table --quick
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+| aaaaaaaaa | aaaaa      | a                                                                                                                               | b    |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+| X         | X          | X                                                                                                                               | X    |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+# --table --quick --quick-max-column-width=0
+-----------+-------+------+------+
+| aaaaaaaaa | aaaaa | a    | b    |
+-----------+-------+------+------+
+| X         | X     | X    | X    |
+-----------+-------+------+------+
+# --table --quick --quick-max-column-width=10
+-----------+------------+------------+------+
+| aaaaaaaaa | aaaaa      | a          | b    |
+-----------+------------+------------+------+
+| X         | X          | X          | X    |
+-----------+------------+------------+------+
+# --table --quick --quick-max-column-width=20
+-----------+------------+----------------------+------+
+| aaaaaaaaa | aaaaa      | a                    | b    |
+-----------+------------+----------------------+------+
+| X         | X          | X                    | X    |
+-----------+------------+----------------------+------+
+insert into t1 values ("01234", "0123456789", "01234567890123456789", "1");
+# --table --quick
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+| aaaaaaaaa | aaaaa      | a                                                                                                                               | b    |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+| X         | X          | X                                                                                                                               | X    |
+| 01234     | 0123456789 | 01234567890123456789                                                                                                            | 1    |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
+# --table --quick --quick-max-column-width=0
+-----------+-------+------+------+
+| aaaaaaaaa | aaaaa | a    | b    |
+-----------+-------+------+------+
+| X         | X     | X    | X    |
+| 01234     | 0123456789 | 01234567890123456789 | 1    |
+-----------+-------+------+------+
+# --table --quick --quick-max-column-width=10
+-----------+------------+------------+------+
+| aaaaaaaaa | aaaaa      | a          | b    |
+-----------+------------+------------+------+
+| X         | X          | X          | X    |
+| 01234     | 0123456789 | 01234567890123456789 | 1    |
+-----------+------------+------------+------+
+# --table --quick --quick-max-column-width=20
+-----------+------------+----------------------+------+
+| aaaaaaaaa | aaaaa      | a                    | b    |
+-----------+------------+----------------------+------+
+| X         | X          | X                    | X    |
+| 01234     | 0123456789 | 01234567890123456789 | 1    |
+-----------+------------+----------------------+------+
+drop table t1;
+#
+# End of 10.7 tests
+#
--- a/mysql-test/main/client.test
+++ b/mysql-test/main/client.test
@ -0,0 +1,46 @@
+--source include/not_embedded.inc
+
+--echo #
+--echo # MDEV-34704: Quick mode produces the bug for mariadb client
+--echo #
+
+
+create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1));
+insert into t1 values ("X", "X", "X", "X");
+
+
+--echo # --table --quick
+--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1
+
+
+--echo # --table --quick --quick-max-column-width=0
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0  2>&1
+
+
+--echo # --table --quick --quick-max-column-width=10
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10  2>&1
+
+
+--echo # --table --quick --quick-max-column-width=20
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20  2>&1
+
+insert into t1 values ("01234", "0123456789", "01234567890123456789", "1");
+
+--echo # --table --quick
+--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1
+
+
+--echo # --table --quick --quick-max-column-width=0
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0  2>&1
+
+--echo # --table --quick --quick-max-column-width=10
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10  2>&1
+
+--echo # --table --quick --quick-max-column-width=20
+--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20  2>&1
+
+drop table t1;
+
+--echo #
+--echo # End of 10.7 tests
+--echo #
--- a/mysql-test/main/item_types.result
+++ b/mysql-test/main/item_types.result
@ -42,5 +42,24 @@ SELECT * FROM v WHERE f = '10.5.20';
 f
 drop view v;
 #
+# MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone
+# (Item_func_not_all)
+#
+CREATE VIEW t AS SELECT 0 AS a;
+SELECT * FROM t WHERE a=ALL (SELECT 0);
+a
+0
+DROP VIEW t;
+#
+# MDEV-34833: Assertion failure in Item_float::do_build_clone
+# (Item_static_float_func)
+#
+CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi();
+CREATE TABLE t1 AS SELECT 1;
+SELECT * FROM v1 JOIN t1 ON f=f2;
+f	f2	1
+DROP VIEW v1;
+DROP TABLE t1;
+#
 # End of 10.5 tests
 #
--- a/mysql-test/main/item_types.test
+++ b/mysql-test/main/item_types.test
@ -46,6 +46,27 @@ CREATE VIEW v AS SELECT version() AS f;
 SELECT * FROM v WHERE f = '10.5.20';
 drop view v;

+--echo #
+--echo # MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone
+--echo # (Item_func_not_all)
+--echo #
+
+CREATE VIEW t AS SELECT 0 AS a;
+SELECT * FROM t WHERE a=ALL (SELECT 0);
+DROP VIEW t;
+
+
+--echo #
+--echo # MDEV-34833: Assertion failure in Item_float::do_build_clone
+--echo # (Item_static_float_func)
+--echo #
+
+CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi();
+CREATE TABLE t1 AS SELECT 1;
+SELECT * FROM v1 JOIN t1 ON f=f2;
+DROP VIEW v1;
+DROP TABLE t1;
+
 --echo #
 --echo # End of 10.5 tests
 --echo #
--- a/mysql-test/suite/innodb/r/alter_copy_bulk,ON.rdiff
+++ b/mysql-test/suite/innodb/r/alter_copy_bulk,ON.rdiff
@ -0,0 +1,11 @@
+--- alter_copy_bulk.result
+++ alter_copy_bulk.result
+@@ -4,7 +4,7 @@
+ INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
+ ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
+ ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));
+-ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY'
+ERROR 23000: Duplicate entry 'bb' for key 'PRIMARY'
+ INSERT INTO t1 VALUES(repeat('a', 200), 1);
+ ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);
+ ERROR 23000: Duplicate entry '1' for key 'f2_2'
--- a/mysql-test/suite/innodb/r/alter_copy_bulk.result
+++ b/mysql-test/suite/innodb/r/alter_copy_bulk.result
@ -1,26 +1,50 @@
-SET @@alter_algorithm=COPY;
-Warnings:
-Warning	4200	The variable '@@alter_algorithm' is ignored. It only exists for compatibility with old installations and will be removed in a future release
 CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB;
 INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2;
-ALTER TABLE t1 FORCE;
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;
 INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
-ALTER TABLE t1 ADD INDEX(f2);
-ALTER TABLE t1 ADD PRIMARY KEY(f1(2));
-ERROR 23000: Duplicate entry 'aaaaaaaa' for key 'PRIMARY'
+ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));
+ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY'
 INSERT INTO t1 VALUES(repeat('a', 200), 1);
-ALTER TABLE t1 ADD UNIQUE KEY(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);
 ERROR 23000: Duplicate entry '1' for key 'f2_2'
 ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL;
 CREATE TABLE t2(f1 INT NOT NULL,
 FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB;
 INSERT INTO t2 VALUES(1);
-ALTER TABLE t2 FORCE;
+ALTER TABLE t2 ALGORITHM=COPY, FORCE;
 DROP TABLE t2, t1;
 CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2;
 INSERT INTO t1 VALUES(1, 1);
 INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2;
-ALTER TABLE t1 FORCE;
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;
 INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536;
-ALTER TABLE t1 ADD INDEX(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
 DROP TABLE t1;
+#
+# MDEV-34756 Validation of new foreign key skipped
+#            if innodb_alter_copy_bulk=ON
+#
+CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY,
+f2 INT NOT NULL)ENGINE=InnoDB;
+CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,
+f2 INT NOT NULL)ENGINE=InnoDB;
+ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1);
+affected rows: 0
+info: Records: 0  Duplicates: 0  Warnings: 0
+INSERT INTO t1 VALUES (1, 1);
+INSERT INTO t2 VALUES (1, 2);
+ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
+ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`#sql-alter`, CONSTRAINT `#sql-alter_ibfk_2` FOREIGN KEY (`f2`) REFERENCES `t1` (`f1`))
+INSERT INTO t1 VALUES(3, 1);
+SET STATEMENT foreign_key_checks=0 FOR
+ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
+affected rows: 1
+info: Records: 1  Duplicates: 0  Warnings: 0
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;
+affected rows: 2
+info: Records: 2  Duplicates: 0  Warnings: 0
+ALTER TABLE t2 ALGORITHM=COPY, FORCE;
+affected rows: 1
+info: Records: 1  Duplicates: 0  Warnings: 0
+DROP TABLE t2, t1;
--- a/mysql-test/suite/innodb/r/log_file_size_online.result
+++ b/mysql-test/suite/innodb/r/log_file_size_online.result
@ -19,8 +19,10 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
 Variable_name	Value
 innodb_log_file_size	4194304
 FOUND 1 /InnoDB: Resized log to 4\.000MiB/ in mysqld.1.err
-UPDATE t SET b='' WHERE a<10;
 SET GLOBAL innodb_log_file_size=5242880;
+connect con1,localhost,root;
+UPDATE t SET b='' WHERE a<10;
+connection default;
 SHOW VARIABLES LIKE 'innodb_log_file_size';
 Variable_name	Value
 innodb_log_file_size	5242880
@ -28,6 +30,9 @@ SELECT global_value FROM information_schema.system_variables
 WHERE variable_name = 'innodb_log_file_size';
 global_value
 5242880
+connection con1;
+disconnect con1;
+connection default;
 # restart
 SELECT * FROM t WHERE a<10;
 a	b
@ -40,6 +45,10 @@ a	b
 7	
 8	
 9	
+SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;
+COUNT(*)	LENGTH(b)
+9	0
+19991	255
 SHOW VARIABLES LIKE 'innodb_log_file_size';
 Variable_name	Value
 innodb_log_file_size	5242880
--- a/mysql-test/suite/innodb/t/alter_copy_bulk.test
+++ b/mysql-test/suite/innodb/t/alter_copy_bulk.test
@ -1,26 +1,25 @@
 --source include/have_innodb.inc
 --source include/have_partition.inc
 --source include/have_sequence.inc
-SET @@alter_algorithm=COPY;

 CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB;
 INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2;
 # Buffer fits in the memory
-ALTER TABLE t1 FORCE;
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;

 # Insert more entries
 INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
 # Alter should use temporary file for sorting
-ALTER TABLE t1 ADD INDEX(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);

 # Error while buffering the insert operation
 --error ER_DUP_ENTRY
-ALTER TABLE t1 ADD PRIMARY KEY(f1(2));
+ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));

 INSERT INTO t1 VALUES(repeat('a', 200), 1);
 # Error while applying the bulk insert operation
 --error ER_DUP_ENTRY
-ALTER TABLE t1 ADD UNIQUE KEY(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);

 # Ignore shouldn't go through bulk operation
 ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL;
@ -29,16 +28,41 @@ CREATE TABLE t2(f1 INT NOT NULL,
 		FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB;
 INSERT INTO t2 VALUES(1);
 # Bulk operation shouldn't happen because of foreign key constraints
-ALTER TABLE t2 FORCE;
+ALTER TABLE t2 ALGORITHM=COPY, FORCE;
 DROP TABLE t2, t1;

 CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2;
 INSERT INTO t1 VALUES(1, 1);
 INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2;
 # Buffer fits in the memory
-ALTER TABLE t1 FORCE;
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;
 # Insert more entries
 INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536;
 # Alter should use temporary file for sorting
-ALTER TABLE t1 ADD INDEX(f2);
+ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
 DROP TABLE t1;
+
+--echo #
+--echo # MDEV-34756 Validation of new foreign key skipped
+--echo #            if innodb_alter_copy_bulk=ON
+--echo #
+CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY,
+		f2 INT NOT NULL)ENGINE=InnoDB;
+CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,
+		f2 INT NOT NULL)ENGINE=InnoDB;
+--enable_info
+ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1);
+--disable_info
+INSERT INTO t1 VALUES (1, 1);
+INSERT INTO t2 VALUES (1, 2);
+--replace_regex /#sql-alter-[0-9a-f-]*/#sql-alter/
+--error ER_NO_REFERENCED_ROW_2
+ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
+INSERT INTO t1 VALUES(3, 1);
+--enable_info
+SET STATEMENT foreign_key_checks=0 FOR
+ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
+ALTER TABLE t1 ALGORITHM=COPY, FORCE;
+ALTER TABLE t2 ALGORITHM=COPY, FORCE;
+--disable_info
+DROP TABLE t2, t1;
--- a/mysql-test/suite/innodb/t/log_file_size_online.test
+++ b/mysql-test/suite/innodb/t/log_file_size_online.test
@ -25,17 +25,28 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
 let SEARCH_PATTERN = InnoDB: Resized log to 4\\.000MiB;
 --source include/search_pattern_in_file.inc

-UPDATE t SET b='' WHERE a<10;
+send SET GLOBAL innodb_log_file_size=5242880;

-SET GLOBAL innodb_log_file_size=5242880;
+--connect con1,localhost,root
+send UPDATE t SET b='' WHERE a<10;
+
+--connection default
+reap;
 SHOW VARIABLES LIKE 'innodb_log_file_size';
 SELECT global_value FROM information_schema.system_variables
 WHERE variable_name = 'innodb_log_file_size';

+--connection con1
+reap;
+--disconnect con1
+--connection default
+
+--let $shutdown_timeout=0
 --let $restart_parameters=
 --source include/restart_mysqld.inc

 SELECT * FROM t WHERE a<10;
+SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;

 SHOW VARIABLES LIKE 'innodb_log_file_size';
 let SEARCH_PATTERN = InnoDB: Resized log to 5\\.000MiB;
--- a/mysql-test/suite/mariabackup/slave_provision_nolock.test
+++ b/mysql-test/suite/mariabackup/slave_provision_nolock.test
@ -1,5 +1,7 @@
 --source include/have_innodb.inc
 --source include/have_log_bin.inc
+# Test does a lot of queries that take a lot of CPU under Valgrind.
+--source include/not_valgrind.inc

 call mtr.add_suppression("Can't init tc log");
 call mtr.add_suppression("Aborting");
--- a/mysql-test/suite/rpl/r/rpl_old_master.result
+++ b/mysql-test/suite/rpl/r/rpl_old_master.result
@ -9,10 +9,7 @@ connection slave;
 SET @old_parallel= @@GLOBAL.slave_parallel_threads;
 SET GLOBAL slave_parallel_threads=10;
 CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no;
-FLUSH TABLES WITH READ LOCK;
 include/start_slave.inc
-include/wait_for_slave_param.inc [Seconds_Behind_Master]
-UNLOCK TABLES;
 connection master;
 CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
 INSERT INTO t2 VALUES (1);
--- a/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync_cond_var_per_thd.result
@ -1,6 +1,7 @@
 include/master-slave.inc
 [connection master]
 connection master;
+call mtr.add_suppression("Got an error writing communication packets");
 call mtr.add_suppression("Got an error reading communication packets");
 call mtr.add_suppression("Could not read packet:.* vio_errno: 1158");
 call mtr.add_suppression("Could not write packet:.* vio_errno: 1160");
--- a/mysql-test/suite/rpl/t/rpl_create_drop_event.test
+++ b/mysql-test/suite/rpl/t/rpl_create_drop_event.test
@ -14,6 +14,12 @@ SET GLOBAL event_scheduler=on;
 let $wait_condition= SELECT count(*)>0 FROM t1;
 --source include/wait_condition.inc
 SET GLOBAL event_scheduler=off;
+# If the time rolls to the next whole second just at this point, a new event
+# run may be scheduled. Wait for this to disappear, otherwise we see occasional
+# test failures if the table gets dropped before the extra event run completes.
+# Expect 5 connections: default, master, master1, server_1, binlog dump thread
+--let $wait_condition= SELECT COUNT(*) = 5 FROM INFORMATION_SCHEMA.PROCESSLIST;
+--source include/wait_condition.inc
 SELECT DISTINCT a FROM t1;
 DELETE FROM t1;

--- a/mysql-test/suite/rpl/t/rpl_mdev6020.test
+++ b/mysql-test/suite/rpl/t/rpl_mdev6020.test
@ -1,3 +1,5 @@
+# Test applies a large binlog, takes long under Valgrind with little benefit.
+--source include/not_valgrind.inc
 --source include/have_innodb.inc
 --source include/have_partition.inc
 --source include/have_binlog_format_mixed_or_row.inc
--- a/mysql-test/suite/rpl/t/rpl_old_master.test
+++ b/mysql-test/suite/rpl/t/rpl_old_master.test
@ -28,14 +28,7 @@ SET GLOBAL slave_parallel_threads=10;
 --replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
 eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no;

-# Block execution yet when the blocked query timestamp has been already accounted
-FLUSH TABLES WITH READ LOCK;
 --source include/start_slave.inc
--let $slave_param           = Seconds_Behind_Master
--let $slave_param_value     = 1
--let $slave_param_comparison= >=
--source include/wait_for_slave_param.inc
-UNLOCK TABLES;

 --connection master
 CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
--- a/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync_cond_var_per_thd.test
@ -25,6 +25,7 @@
 --source include/master-slave.inc

 --connection master
+call mtr.add_suppression("Got an error writing communication packets");
 call mtr.add_suppression("Got an error reading communication packets");
 call mtr.add_suppression("Could not read packet:.* vio_errno: 1158");
 call mtr.add_suppression("Could not write packet:.* vio_errno: 1160");
--- a/mysql-test/suite/rpl/t/rpl_start_stop_slave.test
+++ b/mysql-test/suite/rpl/t/rpl_start_stop_slave.test
@ -19,7 +19,17 @@
 --source include/master-slave.inc

 connection slave;
--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
+--let $i= 100
+while ($i > 0) {
+  dec $i;
+  --let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
+  if ($connection_id) {
+    let $i= 0;
+  }
+  if ($i > 0) {
+    --sleep 0.1
+  }
+}

 if(!$connection_id)
 {
--- a/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_purge_batch_size_basic.result
@ -1,19 +1,19 @@
 SET @global_start_value = @@global.innodb_purge_batch_size;
 SELECT @global_start_value;
@global_start_value
-1000
+127
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_purge_batch_size = 1;
 SET @@global.innodb_purge_batch_size = DEFAULT;
 SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
-1000
+127
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_purge_batch_size = 1;
 ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_purge_batch_size;
@@innodb_purge_batch_size
-1000
+127
 SELECT local.innodb_purge_batch_size;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_purge_batch_size = 1;
@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size;
 SET @@global.innodb_purge_batch_size = @global_start_value;
 SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
-1000
+127
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb,32bit.rdiff
@ -221,7 +221,7 @@
 VARIABLE_SCOPE	GLOBAL
 -VARIABLE_TYPE	BIGINT UNSIGNED
 +VARIABLE_TYPE	INT UNSIGNED
- VARIABLE_COMMENT	How many pages to flush on LRU eviction
+ VARIABLE_COMMENT	Deprecated parameter with no effect
 NUMERIC_MIN_VALUE	1
 -NUMERIC_MAX_VALUE	18446744073709551615
 +NUMERIC_MAX_VALUE	4294967295
--- a/mysql-test/suite/sys_vars/r/sysvars_innodb.result
+++ b/mysql-test/suite/sys_vars/r/sysvars_innodb.result
@ -984,13 +984,13 @@ SESSION_VALUE	NULL
 DEFAULT_VALUE	32
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
-VARIABLE_COMMENT	How many pages to flush on LRU eviction
+VARIABLE_COMMENT	Unused
 NUMERIC_MIN_VALUE	1
 NUMERIC_MAX_VALUE	18446744073709551615
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
-COMMAND_LINE_ARGUMENT	REQUIRED
+COMMAND_LINE_ARGUMENT	NULL
 VARIABLE_NAME	INNODB_LRU_SCAN_DEPTH
 SESSION_VALUE	NULL
 DEFAULT_VALUE	1536
@ -1233,7 +1233,7 @@ READ_ONLY	NO
 COMMAND_LINE_ARGUMENT	OPTIONAL
 VARIABLE_NAME	INNODB_PURGE_BATCH_SIZE
 SESSION_VALUE	NULL
-DEFAULT_VALUE	1000
+DEFAULT_VALUE	127
 VARIABLE_SCOPE	GLOBAL
 VARIABLE_TYPE	BIGINT UNSIGNED
 VARIABLE_COMMENT	Number of UNDO log pages to purge in one batch from the history list
@ -1254,7 +1254,7 @@ NUMERIC_MAX_VALUE	128
 NUMERIC_BLOCK_SIZE	0
 ENUM_VALUE_LIST	NULL
 READ_ONLY	NO
-COMMAND_LINE_ARGUMENT	OPTIONAL
+COMMAND_LINE_ARGUMENT	NULL
 VARIABLE_NAME	INNODB_PURGE_THREADS
 SESSION_VALUE	NULL
 DEFAULT_VALUE	4
--- a/mysys/crc32/crc32c_x86.cc
+++ b/mysys/crc32/crc32c_x86.cc
@ -39,7 +39,7 @@ extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size);

 constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
 constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1;
-constexpr uint32_t cpuid_ecx_XSAVE= 1U << 26;
+constexpr uint32_t cpuid_ecx_AVX_AND_XSAVE= 1U << 28 | 1U << 27;

 static uint32_t cpuid_ecx()
 {
@ -395,7 +395,7 @@ static bool os_have_avx512()

 static ATTRIBUTE_NOINLINE bool have_vpclmulqdq(uint32_t cpuid_ecx)
 {
-  if (!(cpuid_ecx & cpuid_ecx_XSAVE) || !os_have_avx512())
+  if ((~cpuid_ecx & cpuid_ecx_AVX_AND_XSAVE) || !os_have_avx512())
    return false;
 # ifdef _MSC_VER
  int regs[4];
--- a/sql/item.h
+++ b/sql/item.h
@ -4769,6 +4769,8 @@ public:
  {
    return const_charset_converter(thd, tocs, true, func_name);
  }
+  Item *do_get_copy(THD *thd) const override
+  { return get_item_copy<Item_static_float_func>(thd, this); }
 };


@ -4932,7 +4934,6 @@ public:
  }
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_string_with_introducer>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


@ -4947,7 +4948,6 @@ public:
  { }
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_string_sys>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


@ -4964,7 +4964,6 @@ public:
  { }
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_string_ascii>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


@ -5003,7 +5002,6 @@ public:
  }
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_static_string_func>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


@ -5023,7 +5021,6 @@ public:
  }
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_partition_func_safe_string>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


@ -5187,7 +5184,6 @@ public:
  void print(String *str, enum_query_type query_type) override;
  Item *do_get_copy(THD *thd) const override
  { return get_item_copy<Item_bin_string>(thd, this); }
-  Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
 };


--- a/sql/item_cmpfunc.h
+++ b/sql/item_cmpfunc.h
@ -752,6 +752,8 @@ public:
  void set_sub_test(Item_maxmin_subselect *item) { test_sub_item= item; test_sum_item= 0;};
  bool empty_underlying_subquery();
  Item *neg_transformer(THD *thd) override;
+  Item *do_get_copy(THD *thd) const override
+  { return get_item_copy<Item_func_not_all>(thd, this); }
 };


--- a/sql/mysqld.cc
+++ b/sql/mysqld.cc
@ -5429,7 +5429,9 @@ static int init_server_components()
      MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"),
      MARIADB_REMOVED_OPTION("innodb-log-files-in-group"),
      MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"),
+      MARIADB_REMOVED_OPTION("innodb-lru-flush-size"),
      MARIADB_REMOVED_OPTION("innodb-page-cleaners"),
+      MARIADB_REMOVED_OPTION("innodb-purge-truncate-frequency"),
      MARIADB_REMOVED_OPTION("innodb-replication-delay"),
      MARIADB_REMOVED_OPTION("innodb-scrub-log"),
      MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"),
--- a/sql/net_serv.cc
+++ b/sql/net_serv.cc
@ -713,7 +713,6 @@ net_real_write(NET *net,const uchar *packet, size_t len)
      {
        sql_print_warning("Could not write packet: fd: %lld  state: %d  "
                          "errno: %d  vio_errno: %d  length: %ld",
-                          MYF(ME_ERROR_LOG | ME_WARNING),
                          (longlong) vio_fd(net->vio), (int) net->vio->state,
                          vio_errno(net->vio), net->last_errno,
                          (ulong) (end-pos));
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
@ -1495,11 +1495,23 @@ handle_rpl_parallel_thread(void *arg)
          after mark_start_commit(), we have to unmark, which has at least a
          theoretical possibility of leaving a window where it looks like all
          transactions in a GCO have started committing, while in fact one
-          will need to rollback and retry. This is not supposed to be possible
-          (since there is a deadlock, at least one transaction should be
-          blocked from reaching commit), but this seems a fragile ensurance,
-          and there were historically a number of subtle bugs in this area.
+          will need to rollback and retry.
+
+          Normally this will not happen, since the kill is there to resolve a
+          deadlock that is preventing at least one transaction from proceeding.
+          One case it can happen is with InnoDB dict stats update, which can
+          temporarily cause transactions to block each other, but locks are
+          released immediately, they don't linger until commit. There could be
+          other similar cases, there were historically a number of subtle bugs
+          in this area.
+
+          But once we start the commit, we can expect that no new lock
+          conflicts will be introduced. So by handling any lingering deadlock
+          kill at this point just before mark_start_commit(), we should be
+          robust even towards spurious deadlock kills.
        */
+        if (rgi->killed_for_retry != rpl_group_info::RETRY_KILL_NONE)
+          wait_for_pending_deadlock_kill(thd, rgi);
        if (!thd->killed)
        {
          DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");
--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@ -2539,6 +2539,23 @@ rpl_group_info::unmark_start_commit()

  e= this->parallel_entry;
  mysql_mutex_lock(&e->LOCK_parallel_entry);
+  /*
+    Assert that we have not already wrongly completed this GCO and signalled
+    the next one to start, only to now unmark and make the signal invalid.
+    This is to catch problems like MDEV-34696.
+
+    The error inject rpl_parallel_simulate_temp_err_xid is used to test this
+    precise situation, that we handle it gracefully if it somehow occurs in a
+    release build. So disable the assert in this case.
+  */
+#ifndef DBUG_OFF
+  bool allow_unmark_after_complete= false;
+  DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_xid",
+                  allow_unmark_after_complete= true;);
+  DBUG_ASSERT(!gco->next_gco ||
+              gco->next_gco->wait_count > e->count_committing_event_groups ||
+              allow_unmark_after_complete);
+#endif
  --e->count_committing_event_groups;
  mysql_mutex_unlock(&e->LOCK_parallel_entry);
 }
--- a/sql/sql_select.cc
+++ b/sql/sql_select.cc
@ -32197,7 +32197,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
    else
    {
      const KEY *ref_keyinfo= table->key_info + ref_key;
-      refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
+      refkey_rows_estimate=
+        (ha_rows)ref_keyinfo->actual_rec_per_key(tab->ref.key_parts - 1);
    }
    set_if_bigger(refkey_rows_estimate, 1);
  }
--- a/sql/sql_statistics.h
+++ b/sql/sql_statistics.h
@ -610,7 +610,7 @@ public:

  bool avg_frequency_is_inited() { return avg_frequency != NULL; }

-  double get_avg_frequency(uint i)
+  double get_avg_frequency(uint i) const
  {
    return (double) avg_frequency[i] / Scale_factor_avg_frequency;
  }
--- a/sql/structs.h
+++ b/sql/structs.h
@ -173,7 +173,7 @@ typedef struct st_key {
  engine_option_value *option_list;
  ha_index_option_struct *option_struct;                  /* structure with parsed options */

-  double actual_rec_per_key(uint i);
+  double actual_rec_per_key(uint i) const;
 } KEY;


--- a/sql/table.cc
+++ b/sql/table.cc
@ -10322,7 +10322,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd)
 }  


-double KEY::actual_rec_per_key(uint i)
+double KEY::actual_rec_per_key(uint i) const
 { 
  if (rec_per_key == 0)
    return 0;
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@ -2573,6 +2573,51 @@ err_exit:
 	return(FALSE);
 }

+buf_block_t* buf_pool_t::page_fix(const page_id_t id)
+{
+  ha_handler_stats *const stats= mariadb_stats;
+  buf_inc_get(stats);
+  auto& chain= page_hash.cell_get(id.fold());
+  page_hash_latch &hash_lock= page_hash.lock_get(chain);
+  for (;;)
+  {
+    hash_lock.lock_shared();
+    buf_page_t *b= page_hash.get(id, chain);
+    if (b)
+    {
+      uint32_t state= b->fix();
+      hash_lock.unlock_shared();
+      ut_ad(!b->in_zip_hash);
+      ut_ad(b->frame);
+      ut_ad(state >= buf_page_t::FREED);
+      if (state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX)
+      {
+        b->lock.s_lock();
+        state= b->state();
+        ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX);
+        b->lock.s_unlock();
+      }
+      if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED))
+      {
+        /* The page was marked as freed or corrupted. */
+        b->unfix();
+        b= nullptr;
+      }
+      return reinterpret_cast<buf_block_t*>(b);
+    }
+
+    hash_lock.unlock_shared();
+    switch (buf_read_page(id, 0, chain)) {
+    default:
+      return nullptr;
+    case DB_SUCCESS:
+    case DB_SUCCESS_LOCKED_REC:
+      mariadb_increment_pages_read(stats);
+      buf_read_ahead_random(id, 0);
+    }
+  }
+}
+
 /** Low level function used to get access to a database page.
@param[in]	page_id			page id
@param[in]	zip_size		ROW_FORMAT=COMPRESSED page size, or 0
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@ -39,9 +39,6 @@ Created 11/5/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "my_cpu.h"

-/** Flush this many pages in buf_LRU_get_free_block() */
-size_t innodb_lru_flush_size;
-
 /** The number of blocks from the LRU_old pointer onward, including
 the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
 of the whole LRU list length, except that the tolerance defined below
@ -369,17 +366,13 @@ block to read in a page. Note that we only ever get a block from
 the free list. Even when we flush a page or find a page in LRU scan
 we put it to free list to be used.
 * iteration 0:
-  * get a block from the buf_pool.free list, success:done
+  * get a block from the buf_pool.free list
  * if buf_pool.try_LRU_scan is set
    * scan LRU up to 100 pages to free a clean block
    * success:retry the free list
-  * flush up to innodb_lru_flush_size LRU blocks to data files
-    (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth)
-    * on buf_page_write_complete() the blocks will put on buf_pool.free list
-    * success: retry the free list
+  * invoke buf_pool.page_cleaner_wakeup(true) and wait its completion
 * subsequent iterations: same as iteration 0 except:
-  * scan whole LRU list
-  * scan LRU list even if buf_pool.try_LRU_scan is not set
+  * scan the entire LRU list

@param get  how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@ -18501,6 +18501,7 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
      ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_CANT_CREATE_HANDLER_FILE);
      break;
    case log_t::RESIZE_STARTED:
+      const lsn_t start{log_sys.resize_in_progress()};
      for (timespec abstime;;)
      {
        if (thd_kill_level(thd))
@ -18511,13 +18512,30 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,

        set_timespec(abstime, 5);
        mysql_mutex_lock(&buf_pool.flush_list_mutex);
-        const bool in_progress(buf_pool.get_oldest_modification(LSN_MAX) <
-                               log_sys.resize_in_progress());
-        if (in_progress)
+        lsn_t resizing= log_sys.resize_in_progress();
+        if (resizing > buf_pool.get_oldest_modification(0))
+        {
+          buf_pool.page_cleaner_wakeup(true);
          my_cond_timedwait(&buf_pool.done_flush_list,
                            &buf_pool.flush_list_mutex.m_mutex, &abstime);
+          resizing= log_sys.resize_in_progress();
+        }
        mysql_mutex_unlock(&buf_pool.flush_list_mutex);
-        if (!log_sys.resize_in_progress())
+        if (start > log_sys.get_lsn())
+        {
+          ut_ad(!log_sys.is_pmem());
+          /* The server is almost idle. Write dummy FILE_CHECKPOINT records
+          to ensure that the log resizing will complete. */
+          log_sys.latch.wr_lock(SRW_LOCK_CALL);
+          while (start > log_sys.get_lsn())
+          {
+            mtr_t mtr;
+            mtr.start();
+            mtr.commit_files(log_sys.last_checkpoint_lsn);
+          }
+          log_sys.latch.wr_unlock();
+        }
+        if (!resizing || resizing > start /* only wait for our resize */)
          break;
      }
    }
@ -18904,7 +18922,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
  PLUGIN_VAR_OPCMDARG,
  "Number of UNDO log pages to purge in one batch from the history list",
  NULL, NULL,
-  1000,			/* Default setting */
+  127,			/* Default setting */
  1,			/* Minimum value */
  innodb_purge_batch_size_MAX, 0);

@ -19167,11 +19185,6 @@ static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
  "How deep to scan LRU to keep it clean",
  NULL, NULL, 1536, 100, ~0UL, 0);

-static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size,
-  PLUGIN_VAR_RQCMDARG,
-  "How many pages to flush on LRU eviction",
-  NULL, NULL, 32, 1, SIZE_T_MAX, 0);
-
 static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
  PLUGIN_VAR_OPCMDARG,
  "Set to 0 (don't flush neighbors from buffer pool),"
@ -19435,14 +19448,21 @@ static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
  10 << 20, 10 << 20,
  1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);

-static ulong innodb_purge_rseg_truncate_frequency;
+static ulong innodb_purge_rseg_truncate_frequency= 128;

 static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
  innodb_purge_rseg_truncate_frequency,
-  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED,
+  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT,
  "Unused",
  NULL, NULL, 128, 1, 128, 0);

+static size_t innodb_lru_flush_size;
+
+static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT,
+  "Unused",
+  NULL, NULL, 32, 1, SIZE_T_MAX, 0);
+
 static void innodb_undo_log_truncate_update(THD *thd, struct st_mysql_sys_var*,
                                            void*, const void *save)
 {
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@ -74,14 +74,10 @@ page_zip_des_t*
 btr_cur_get_page_zip(
 /*=================*/
 	btr_cur_t*	cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the page of a tree cursor.
+/** Returns the page of a tree cursor.
@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
-	btr_cur_t*	cursor);/*!< in: tree cursor */
+#define btr_cur_get_page(cursor) (cursor)->block()->page.frame
+
 /*********************************************************//**
 Returns the index of a cursor.
@param cursor b-tree cursor
--- a/storage/innobase/include/btr0cur.inl
+++ b/storage/innobase/include/btr0cur.inl
@ -48,18 +48,6 @@ btr_cur_get_page_zip(
 	return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
 }

-/*********************************************************//**
-Returns the page of a tree cursor.
-@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
-	btr_cur_t*	cursor)	/*!< in: tree cursor */
-{
-	return(page_align(page_cur_get_rec(&(cursor->page_cur))));
-}
-
 /*********************************************************//**
 Positions a tree cursor at a given record. */
 UNIV_INLINE
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@ -1358,6 +1358,12 @@ public:
  }

 public:
+  /** Look up and buffer-fix a page.
+  @param id        page identifier
+  @return undo log page, buffer-fixed
+  @retval nullptr  if the undo page was corrupted or freed */
+  buf_block_t *page_fix(const page_id_t id);
+
  /** @return whether the buffer pool contains a page
  @param page_id       page identifier
  @param chain         hash table chain for page_id.fold() */
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@ -33,9 +33,6 @@ Created 11/5/1995 Heikki Tuuri
 struct trx_t;
 struct fil_space_t;

-/** Flush this many pages in buf_LRU_get_free_block() */
-extern size_t innodb_lru_flush_size;
-
 /*#######################################################################
 These are low-level functions
 #########################################################################*/
@ -82,17 +79,13 @@ block to read in a page. Note that we only ever get a block from
 the free list. Even when we flush a page or find a page in LRU scan
 we put it to free list to be used.
 * iteration 0:
-  * get a block from the buf_pool.free list, success:done
+  * get a block from the buf_pool.free list
  * if buf_pool.try_LRU_scan is set
    * scan LRU up to 100 pages to free a clean block
    * success:retry the free list
-  * flush up to innodb_lru_flush_size LRU blocks to data files
-    (until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth)
-    * on buf_page_write_complete() the blocks will put on buf_pool.free list
-    * success: retry the free list
+  * invoke buf_pool.page_cleaner_wakeup(true) and wait its completion
 * subsequent iterations: same as iteration 0 except:
-  * scan whole LRU list
-  * scan LRU list even if buf_pool.try_LRU_scan is not set
+  * scan the entire LRU list

@param get  how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@ -224,7 +224,7 @@ public:
  /** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */
  alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch;

-  /** number of std::swap(buf, flush_buf) and writes from buf to log;
+  /** number of writes from buf or flush_buf to log;
  protected by latch.wr_lock() */
  ulint write_to_log;

@ -232,8 +232,9 @@ public:
  lsn_t write_lsn;

  /** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
-  In write_buf(), buf and flush_buf are swapped */
+  In write_buf(), buf and flush_buf may be swapped */
  byte *flush_buf;
+
  /** set when there may be need to initiate a log checkpoint.
  This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
  std::atomic<bool> need_checkpoint;
@ -372,9 +373,10 @@ public:

 private:
  /** Write resize_buf to resize_log.
-  @param length  the used length of resize_buf */
+  @param b       resize_buf or resize_flush_buf
+  @param length  the used length of b */
  ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
-  void resize_write_buf(size_t length) noexcept;
+  void resize_write_buf(const byte *b, size_t length) noexcept;
 public:

  /** Rename a log file after resizing.
@ -506,13 +508,7 @@ public:
  @param d     destination
  @param s     string of bytes
  @param size  length of str, in bytes */
-  void append(byte *&d, const void *s, size_t size) noexcept
-  {
-    ut_ad(latch_have_any());
-    ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size));
-    memcpy(d, s, size);
-    d+= size;
-  }
+  static inline void append(byte *&d, const void *s, size_t size) noexcept;

  /** Set the log file format. */
  void set_latest_format(bool encrypted) noexcept
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@ -31,14 +31,6 @@ Created 10/4/1994 Heikki Tuuri

 #ifdef UNIV_DEBUG
 /*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
-	page_cur_t*	cur);	/*!< in: page cursor */
-/*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
@return page */
 UNIV_INLINE
@ -60,12 +52,12 @@ page_cur_get_page_zip(
 UNIV_INLINE
 rec_t *page_cur_get_rec(const page_cur_t *cur);
 #else /* UNIV_DEBUG */
-# define page_cur_get_page(cur)		page_align((cur)->rec)
 # define page_cur_get_block(cur)	(cur)->block
 # define page_cur_get_page_zip(cur)	buf_block_get_page_zip((cur)->block)
 # define page_cur_get_rec(cur)		(cur)->rec
 #endif /* UNIV_DEBUG */
-# define is_page_cur_get_page_zip(cur)	is_buf_block_get_page_zip((cur)->block)
+#define page_cur_get_page(cur)		page_cur_get_block(cur)->page.frame
+#define is_page_cur_get_page_zip(cur)	is_buf_block_get_page_zip((cur)->block)
 /*********************************************************//**
 Sets the cursor object to point before the first user record
 on the page. */
--- a/storage/innobase/include/page0cur.inl
+++ b/storage/innobase/include/page0cur.inl
@ -25,18 +25,6 @@ Created 10/4/1994 Heikki Tuuri
 *************************************************************************/

 #ifdef UNIV_DEBUG
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
-	page_cur_t*	cur)	/*!< in: page cursor */
-{
-  return page_align(page_cur_get_rec(cur));
-}
-
 /*********************************************************//**
 Gets pointer to the buffer block where the cursor is positioned.
@return page */
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@ -209,17 +209,6 @@ que_eval_sql(
 	const char*	sql,	/*!< in: SQL string */
 	trx_t*		trx);	/*!< in: trx */

-/**********************************************************************//**
-Round robin scheduler.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-que_thr_t*
-que_fork_scheduler_round_robin(
-/*===========================*/
-	que_fork_t*	fork,		/*!< in: a query fork */
-	que_thr_t*	thr);		/*!< in: current pos */
-
 /** Query thread states */
 enum que_thr_state_t {
 	/** in selects this means that the thread is at the end of its
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@ -54,32 +54,47 @@ row_vers_impl_x_locked(
 	dict_index_t*	index,
 	const rec_offs*	offsets);

-/** Finds out if a version of the record, where the version >= the current
-purge_sys.view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE.
-@param[in]	also_curr	TRUE if also rec is included in the versions
-				to search; otherwise only versions prior
-				to it are searched
-@param[in]	rec		record in the clustered index; the caller
-				must have a latch on the page
-@param[in]	mtr		mtr holding the latch on rec; it will
-				also hold the latch on purge_view
-@param[in]	index		secondary index
-@param[in]	ientry		secondary index entry
-@param[in]	roll_ptr	roll_ptr for the purge record
-@param[in]	trx_id		transaction ID on the purging record
-@return TRUE if earlier version should have */
+/** Find out whether data tuple has missing data type
+for indexed virtual column.
+@param tuple   data tuple
+@param index   virtual index
+@return true if tuple has missing column type */
+bool dtuple_vcol_data_missing(const dtuple_t &tuple,
+                              const dict_index_t &index);
+/** build virtual column value from current cluster index record data
+@param[in,out]	row		the cluster index row in dtuple form
+@param[in]	clust_index	clustered index
+@param[in]	index		the secondary index
+@param[in]	heap		heap used to build virtual dtuple. */
 bool
-row_vers_old_has_index_entry(
-	bool			also_curr,
-	const rec_t*		rec,
-	mtr_t*			mtr,
+row_vers_build_clust_v_col(
+	dtuple_t*		row,
+	dict_index_t*		clust_index,
 	dict_index_t*		index,
-	const dtuple_t*		ientry,
+	mem_heap_t*		heap);
+/** Build a dtuple contains virtual column data for current cluster index
+@param[in]	rec		cluster index rec
+@param[in]	clust_index	cluster index
+@param[in]	clust_offsets	cluster rec offset
+@param[in]	index		secondary index
+@param[in]	trx_id		transaction ID on the purging record,
+				or 0 if called outside purge
+@param[in]	roll_ptr	roll_ptr for the purge record
+@param[in,out]	heap		heap memory
+@param[in,out]	v_heap		heap memory to keep virtual column tuple
+@param[in,out]	mtr		mini-transaction
+@return dtuple contains virtual column data */
+dtuple_t*
+row_vers_build_cur_vrow(
+	const rec_t*		rec,
+	dict_index_t*		clust_index,
+	rec_offs**		clust_offsets,
+	dict_index_t*		index,
+	trx_id_t		trx_id,
 	roll_ptr_t		roll_ptr,
-	trx_id_t		trx_id);
+	mem_heap_t*		heap,
+	mem_heap_t*		v_heap,
+	mtr_t*			mtr);

 /*****************************************************************//**
 Constructs the version of a clustered index record which a consistent
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@ -550,6 +550,15 @@ void srv_monitor_task(void*);
 void srv_master_callback(void*);


+/**
+ Fetches and executes tasks from the purge work queue,
+ until this queue is empty.
+ This is main part of purge worker task, but also
+ executed in coordinator.
+ @note needs current_thd to be set beforehand.
+*/
+void srv_purge_worker_task_low();
+
 } /* extern "C" */

 #ifdef UNIV_DEBUG
--- a/storage/innobase/include/srw_lock.h
+++ b/storage/innobase/include/srw_lock.h
@ -280,6 +280,8 @@ public:
 #endif
  }

+  bool rd_u_upgrade_try() { return writer.wr_lock_try(); }
+
  void u_wr_upgrade()
  {
    DBUG_ASSERT(writer.is_locked());
@ -294,6 +296,13 @@ public:
    readers.store(0, std::memory_order_release);
    /* Note: Any pending rd_lock() will not be woken up until u_unlock() */
  }
+  void u_rd_downgrade()
+  {
+    DBUG_ASSERT(writer.is_locked());
+    ut_d(uint32_t lk=) readers.fetch_add(1, std::memory_order_relaxed);
+    ut_ad(lk < WRITER);
+    u_unlock();
+  }

  void rd_unlock()
  {
--- a/storage/innobase/include/sux_lock.h
+++ b/storage/innobase/include/sux_lock.h
@ -198,6 +198,30 @@ public:
  /** Upgrade an update lock */
  inline void u_x_upgrade();
  inline void u_x_upgrade(const char *file, unsigned line);
+  /** @return whether a shared lock was upgraded to exclusive */
+  bool s_x_upgrade_try()
+  {
+    ut_ad(have_s());
+    ut_ad(!have_u_or_x());
+    if (!lock.rd_u_upgrade_try())
+      return false;
+    claim_ownership();
+    s_unlock();
+    lock.u_wr_upgrade();
+    recursive= RECURSIVE_X;
+    return true;
+  }
+  __attribute__((warn_unused_result))
+  /** @return whether the operation succeeded without waiting */
+  bool s_x_upgrade()
+  {
+    if (s_x_upgrade_try())
+      return true;
+    s_unlock();
+    x_lock();
+    return false;
+  }
+
  /** Downgrade a single exclusive lock to an update lock */
  void x_u_downgrade()
  {
@ -206,6 +230,16 @@ public:
    recursive*= RECURSIVE_U;
    lock.wr_u_downgrade();
  }
+  /** Downgrade a single update lock to a shared lock */
+  void u_s_downgrade()
+  {
+    ut_ad(have_u_or_x());
+    ut_ad(recursive == RECURSIVE_U);
+    recursive= 0;
+    set_new_owner(0);
+    lock.u_rd_downgrade();
+    ut_d(s_lock_register());
+  }

  /** Acquire an exclusive lock or upgrade an update lock
  @return whether U locks were upgraded to X */
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@ -149,10 +149,11 @@ public:
 private:
  /** number of pending stop() calls without resume() */
  Atomic_counter<uint32_t> m_paused;
-  /** number of stop_SYS() calls without resume_SYS() */
-  Atomic_counter<uint32_t> m_SYS_paused;
-  /** number of stop_FTS() calls without resume_FTS() */
-  Atomic_counter<uint32_t> m_FTS_paused;
+  /** PAUSED_SYS * number of stop_SYS() calls without resume_SYS() +
+  number of stop_FTS() calls without resume_FTS() */
+  Atomic_relaxed<uint32_t> m_FTS_paused;
+  /** The stop_SYS() multiplier in m_FTS_paused */
+  static constexpr const uint32_t PAUSED_SYS= 1U << 16;

  /** latch protecting end_view */
  alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock_low end_latch;
@ -321,16 +322,21 @@ private:
  void wait_FTS(bool also_sys);
 public:
  /** Suspend purge in data dictionary tables */
-  void stop_SYS() { m_SYS_paused++; }
+  void stop_SYS()
+  {
+    ut_d(const auto p=) m_FTS_paused.fetch_add(PAUSED_SYS);
+    ut_ad(p < p + PAUSED_SYS);
+  }
  /** Resume purge in data dictionary tables */
  static void resume_SYS(void *);

  /** Pause purge during a DDL operation that could drop FTS_ tables. */
  void stop_FTS();
  /** Resume purge after stop_FTS(). */
-  void resume_FTS() { ut_d(const auto p=) m_FTS_paused--; ut_ad(p); }
+  void resume_FTS()
+  { ut_d(const auto p=) m_FTS_paused.fetch_sub(1); ut_ad(p & ~PAUSED_SYS); }
  /** @return whether stop_SYS() is in effect */
-  bool must_wait_FTS() const { return m_FTS_paused; }
+  bool must_wait_FTS() const { return m_FTS_paused & ~PAUSED_SYS; }

 private:
  /**
@ -432,10 +438,17 @@ public:

  struct view_guard
  {
-    inline view_guard();
+    enum guard { END_VIEW= -1, PURGE= 0, VIEW= 1};
+    guard latch;
+    inline view_guard(guard latch);
    inline ~view_guard();
+    /** Fetch an undo log page.
+    @param id   page identifier
+    @param mtr  mini-transaction
+    @return reference to buffer page, possibly buffer-fixed in mtr */
+    inline const buf_block_t *get(const page_id_t id, mtr_t *mtr);

-    /** @return purge_sys.view */
+    /** @return purge_sys.view or purge_sys.end_view */
    inline const ReadViewBase &view() const;
  };

@ -464,14 +477,39 @@ public:
 /** The global data structure coordinating a purge */
 extern purge_sys_t	purge_sys;

-purge_sys_t::view_guard::view_guard()
-{ purge_sys.latch.rd_lock(SRW_LOCK_CALL); }
+purge_sys_t::view_guard::view_guard(purge_sys_t::view_guard::guard latch) :
+  latch(latch)
+{
+  switch (latch) {
+  case VIEW:
+    purge_sys.latch.rd_lock(SRW_LOCK_CALL);
+    break;
+  case END_VIEW:
+    purge_sys.end_latch.rd_lock();
+    break;
+  case PURGE:
+    /* the access is within a purge batch; purge_coordinator_task
+    will wait for all workers to complete before updating the views */
+    break;
+  }
+}

 purge_sys_t::view_guard::~view_guard()
-{ purge_sys.latch.rd_unlock(); }
+{
+  switch (latch) {
+  case VIEW:
+    purge_sys.latch.rd_unlock();
+    break;
+  case END_VIEW:
+    purge_sys.end_latch.rd_unlock();
+    break;
+  case PURGE:
+    break;
+  }
+}

 const ReadViewBase &purge_sys_t::view_guard::view() const
-{ return purge_sys.view; }
+{ return latch == END_VIEW ? purge_sys.end_view : purge_sys.view; }

 purge_sys_t::end_view_guard::end_view_guard()
 { purge_sys.end_latch.rd_lock(); }
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@ -157,50 +157,44 @@ trx_undo_report_row_operation(
 /** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it
 is being called purge view and we would like to get the purge record
 even it is in the purge view (in normal case, it will return without
-fetching the purge record */
+fetching the purge record) */
 static constexpr ulint TRX_UNDO_PREV_IN_PURGE = 1;

 /** This tells trx_undo_prev_version_build() to fetch the old value in
 the undo log (which is the after image for an update) */
 static constexpr ulint TRX_UNDO_GET_OLD_V_VALUE = 2;

-/** indicate a call from row_vers_old_has_index_entry() */
+/** indicate a call from row_undo_mod_sec_is_unsafe() */
 static constexpr ulint TRX_UNDO_CHECK_PURGEABILITY = 4;

+/** indicate a call from row_purge_is_unsafe() */
+static constexpr ulint TRX_UNDO_CHECK_PURGE_PAGES = 8;
+
 /** Build a previous version of a clustered index record. The caller
 must hold a latch on the index page of the clustered index record.
-@param	rec		version of a clustered index record
-@param	index		clustered index
-@param	offsets		rec_get_offsets(rec, index)
-@param	heap		memory heap from which the memory needed is
-			allocated
-@param	old_vers	previous version or NULL if rec is the
-			first inserted version, or if history data
-			has been deleted (an error), or if the purge
-			could have removed the version
-			though it has not yet done so
-@param	v_heap		memory heap used to create vrow
-			dtuple if it is not yet created. This heap
-                        diffs from "heap" above in that it could be
-                        prebuilt->old_vers_heap for selection
-@param	vrow		virtual column info, if any
-@param	v_status	status determine if it is going into this
-			function by purge thread or not.
-			And if we read "after image" of undo log
+@param rec       version of a clustered index record
+@param index     clustered index
+@param offsets   rec_get_offsets(rec, index)
+@param heap      memory heap from which the memory needed is allocated
+@param old_vers  previous version, or NULL if rec is the first inserted
+                 version, or if history data has been deleted (an error),
+                 or if the purge could have removed the version though
+                 it has not yet done so
+@param mtr       mini-transaction
+@param v_status  TRX_UNDO_PREV_IN_PURGE, ...
+@param v_heap    memory heap used to create vrow dtuple if it is not yet
+                 created. This heap diffs from "heap" above in that it could be
+                 prebuilt->old_vers_heap for selection
+@param vrow      virtual column info, if any
@return error code
@retval DB_SUCCESS if previous version was successfully built,
 or if it was an insert or the undo record refers to the table before rebuild
@retval DB_MISSING_HISTORY if the history is missing */
-dberr_t
-trx_undo_prev_version_build(
-	const rec_t 	*rec,
-	dict_index_t	*index,
-	rec_offs	*offsets,
-	mem_heap_t	*heap,
-	rec_t		**old_vers,
-	mem_heap_t	*v_heap,
-	dtuple_t	**vrow,
-	ulint		v_status);
+dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index,
+                                    rec_offs *offsets, mem_heap_t *heap,
+                                    rec_t **old_vers, mtr_t *mtr,
+                                    ulint v_status,
+                                    mem_heap_t *v_heap, dtuple_t **vrow);

 /** Read from an undo log record a non-virtual column value.
@param ptr	pointer to remaining part of the undo record
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@ -500,9 +500,8 @@ void lock_sys_t::close()
 requesting record lock are brute force (BF). If they are check is
 this BF-BF wait correct and if not report BF wait and assert.

-@param[in]	lock_rec	other waiting record lock
-@param[in]	trx		trx requesting conflicting record lock
-@param[in]	type_mode	lock type mode of requesting trx
+@param lock  other waiting lock
+@param trx   transaction requesting conflicting lock
 */
 static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
                                       const unsigned type_mode = LOCK_NONE)
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@ -540,17 +540,14 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
        resize_target= size;
        resize_buf= static_cast<byte*>(ptr);
        resize_flush_buf= static_cast<byte*>(ptr2);
+        start_lsn= get_lsn();
+
        if (is_pmem())
-        {
          resize_log.close();
-          start_lsn= get_lsn();
-        }
        else
-        {
-          memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15);
          start_lsn= first_lsn +
-            (~lsn_t{write_size - 1} & (write_lsn - first_lsn));
-        }
+            (~lsn_t{write_size - 1} &
+             (lsn_t{write_size - 1} + start_lsn - first_lsn));
      }
      resize_lsn.store(start_lsn, std::memory_order_relaxed);
      status= success ? RESIZE_STARTED : RESIZE_FAILED;
@ -804,19 +801,26 @@ inline void log_t::persist(lsn_t lsn) noexcept
 #endif

 ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
-/** Write resize_buf to resize_log.
-@param length  the used length of resize_buf */
-void log_t::resize_write_buf(size_t length) noexcept
+void log_t::resize_write_buf(const byte *b, size_t length) noexcept
 {
  const size_t block_size_1= write_size - 1;
+  ut_ad(b == resize_buf || b == resize_flush_buf);
  ut_ad(!(resize_target & block_size_1));
  ut_ad(!(length & block_size_1));
  ut_ad(length > block_size_1);
  ut_ad(length <= resize_target);
-  const lsn_t resizing{resize_in_progress()};
-  ut_ad(resizing <= write_lsn);
-  lsn_t offset= START_OFFSET +
-    ((write_lsn - resizing) & ~lsn_t{block_size_1}) %
+
+  int64_t d= int64_t(write_lsn - resize_in_progress());
+  if (UNIV_UNLIKELY(d <= 0))
+  {
+    d&= ~int64_t(block_size_1);
+    if (int64_t(d + length) <= 0)
+      return;
+    length+= d;
+    b-= d;
+    d= 0;
+  }
+  lsn_t offset= START_OFFSET + (lsn_t(d) & ~lsn_t{block_size_1}) %
    (resize_target - START_OFFSET);

  if (UNIV_UNLIKELY(offset + length > resize_target))
@ -828,7 +832,7 @@ void log_t::resize_write_buf(size_t length) noexcept
  }

  ut_a(os_file_write_func(IORequestWrite, "ib_logfile101", resize_log.m_file,
-                          buf, offset, length) == DB_SUCCESS);
+                          b, offset, length) == DB_SUCCESS);
 }

 /** Write buf to ib_logfile0.
@ -862,6 +866,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
    ut_ad(write_size_1 >= 511);

    const byte *const write_buf{buf};
+    const byte *const re_write_buf{resize_buf};
    offset&= ~lsn_t{write_size_1};

    if (length <= write_size_1)
@ -875,8 +880,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
 #else
 # ifdef HAVE_valgrind
      MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length);
-      if (UNIV_LIKELY_NULL(resize_buf))
-        MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - length);
+      if (UNIV_LIKELY_NULL(re_write_buf))
+        MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length);
 # endif
      buf[length]= 0; /* allow recovery to catch EOF faster */
 #endif
@ -896,15 +901,15 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
        the current LSN are generated. */
 #ifdef HAVE_valgrind
          MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
-        if (UNIV_LIKELY_NULL(resize_buf))
-          MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) -
+        if (UNIV_LIKELY_NULL(re_write_buf))
+          MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) -
                           new_buf_free);
 #endif
        buf[length]= 0; /* allow recovery to catch EOF faster */
        length&= ~write_size_1;
        memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
-        if (UNIV_LIKELY_NULL(resize_buf))
-          memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
+        if (UNIV_LIKELY_NULL(re_write_buf))
+          memcpy_aligned<16>(resize_flush_buf, re_write_buf + length,
                             (new_buf_free + 15) & ~15);
        length+= write_size_1 + 1;
      }
@ -923,8 +928,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
    /* Do the write to the log file */
    log_write_buf(write_buf, length, offset);

-    if (UNIV_LIKELY_NULL(resize_buf))
-      resize_write_buf(length);
+    if (UNIV_LIKELY_NULL(re_write_buf))
+      resize_write_buf(re_write_buf, length);
    write_lsn= lsn;

    if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@ -4531,7 +4531,7 @@ dberr_t recv_recovery_from_checkpoint_start()
 	ut_ad(recv_sys.pages.empty());

 	if (log_sys.format == log_t::FORMAT_3_23) {
-early_exit:
+func_exit:
 		log_sys.latch.wr_unlock();
 		return err;
 	}
@ -4547,7 +4547,7 @@ read_only_recovery:
 			sql_print_warning("InnoDB: innodb_read_only"
 					  " prevents crash recovery");
 			err = DB_READ_ONLY;
-			goto early_exit;
+			goto func_exit;
 		}
 		if (recv_sys.is_corrupt_log()) {
 			sql_print_error("InnoDB: Log scan aborted at LSN "
@ -4585,7 +4585,7 @@ read_only_recovery:
 			rescan, missing_tablespace);

 		if (err != DB_SUCCESS) {
-			goto early_exit;
+			goto func_exit;
 		}

 		if (missing_tablespace) {
@ -4607,7 +4607,7 @@ read_only_recovery:
 					rescan, missing_tablespace);

 				if (err != DB_SUCCESS) {
-					goto early_exit;
+					goto func_exit;
 				}
 			} while (missing_tablespace);

@ -4666,7 +4666,7 @@ read_only_recovery:
 	if (recv_sys.lsn < log_sys.next_checkpoint_lsn) {
 err_exit:
 		err = DB_ERROR;
-		goto early_exit;
+		goto func_exit;
 	}

 	if (!srv_read_only_mode && log_sys.is_latest()) {
@ -4690,7 +4690,7 @@ err_exit:
 		ut_ad("log parsing error" == 0);
 		mysql_mutex_unlock(&recv_sys.mutex);
 		err = DB_CORRUPTION;
-		goto early_exit;
+		goto func_exit;
 	}
 	recv_sys.apply_log_recs = true;
 	ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
@ -4698,9 +4698,9 @@ err_exit:
 	if (srv_operation == SRV_OPERATION_NORMAL) {
 		err = recv_rename_files();
 	}
-	mysql_mutex_unlock(&recv_sys.mutex);

 	recv_lsn_checks_on = true;
+	mysql_mutex_unlock(&recv_sys.mutex);

 	/* The database is now ready to start almost normal processing of user
 	transactions: transaction rollbacks and the application of the log
@ -4710,8 +4710,7 @@ err_exit:
 		err = DB_CORRUPTION;
 	}

-	log_sys.latch.wr_unlock();
-	return err;
+	goto func_exit;
 }

 bool recv_dblwr_t::validate_page(const page_id_t page_id,
--- a/storage/innobase/mtr/mtr0mtr.cc
+++ b/storage/innobase/mtr/mtr0mtr.cc
@ -1312,6 +1312,15 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len,
  }
 }

+inline void log_t::append(byte *&d, const void *s, size_t size) noexcept
+{
+  ut_ad(log_sys.latch_have_any());
+  ut_ad(d + size <= log_sys.buf +
+        (log_sys.is_pmem() ? log_sys.file_size : log_sys.buf_size));
+  memcpy(d, s, size);
+  d+= size;
+}
+
 template<bool spin,bool pmem>
 std::pair<lsn_t,mtr_t::page_flush_ahead>
 mtr_t::finish_writer(mtr_t *mtr, size_t len)
--- a/storage/innobase/que/que0que.cc
+++ b/storage/innobase/que/que0que.cc
@ -166,40 +166,6 @@ que_thr_init_command(
 	thr->state = QUE_THR_RUNNING;
 }

-/**********************************************************************//**
-Round robin scheduler.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-que_thr_t*
-que_fork_scheduler_round_robin(
-/*===========================*/
-	que_fork_t*	fork,		/*!< in: a query fork */
-	que_thr_t*	thr)		/*!< in: current pos */
-{
-	fork->trx->mutex_lock();
-
-	/* If no current, start first available. */
-	if (thr == NULL) {
-		thr = UT_LIST_GET_FIRST(fork->thrs);
-	} else {
-		thr = UT_LIST_GET_NEXT(thrs, thr);
-	}
-
-	if (thr) {
-
-		fork->state = QUE_FORK_ACTIVE;
-
-		fork->last_sel_node = NULL;
-		ut_ad(thr->state == QUE_THR_COMPLETED);
-		que_thr_init_command(thr);
-	}
-
-	fork->trx->mutex_unlock();
-
-	return(thr);
-}
-
 /**********************************************************************//**
 Starts execution of a command in a query fork. Picks a query thread which
 is not in the QUE_THR_RUNNING state and moves it to that state. If none
--- a/storage/innobase/read/read0read.cc
+++ b/storage/innobase/read/read0read.cc
@ -160,7 +160,7 @@ may be pointing to garbage (an undo log record discarded by purge),
 but it will never be dereferenced, because the purge view is older
 than any active transaction.

-For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
+For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec()
 */


--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@ -2773,10 +2773,16 @@ avoid_bulk:

 		ut_ad(index->table->skip_alter_undo);
 		ut_ad(!entry->is_metadata());
+
+		/* If foreign key exist and foreign key is enabled
+		then avoid using bulk insert for copy algorithm */
 		if (innodb_alter_copy_bulk
 		    && !index->table->is_temporary()
 		    && !index->table->versioned()
-		    && !index->table->has_spatial_index()) {
+		    && !index->table->has_spatial_index()
+		    && (!trx->check_foreigns
+                        || (index->table->foreign_set.empty()
+                            && index->table->referenced_set.empty()))) {
 			ut_ad(page_is_empty(block->page.frame));
 			/* This code path has been executed at the
 			start of the alter operation. Consecutive
--- a/storage/innobase/row/row0log.cc
+++ b/storage/innobase/row/row0log.cc
@ -3821,7 +3821,7 @@ UndorecApplier::get_old_rec(const dtuple_t &tuple, dict_index_t *index,
    if (is_same(roll_ptr))
      return version;
    trx_undo_prev_version_build(version, index, *offsets, heap, &prev_version,
-                                nullptr, nullptr, 0);
+                                &mtr, 0, nullptr, nullptr);
    version= prev_version;
  }
  while (version);
@ -3990,7 +3990,7 @@ void UndorecApplier::log_update(const dtuple_t &tuple,
      copy_rec= rec_copy(mem_heap_alloc(
        heap, rec_offs_size(offsets)), match_rec, offsets);
    trx_undo_prev_version_build(match_rec, clust_index, offsets, heap,
-                                &prev_version, nullptr, nullptr, 0);
+                                &prev_version, &mtr, 0, nullptr, nullptr);

    prev_offsets= rec_get_offsets(prev_version, clust_index, prev_offsets,
                                  clust_index->n_core_fields,
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@ -267,6 +267,448 @@ row_purge_remove_clust_if_poss(
 	return(false);
 }

+/** Check a virtual column value index secondary virtual index matches
+that of current cluster index record, which is recreated from information
+stored in undo log
+@param[in]	rec		record in the clustered index
+@param[in]	icentry		the index entry built from a cluster row
+@param[in]	clust_index	cluster index
+@param[in]	clust_offsets	offsets on the cluster record
+@param[in]	index		the secondary index
+@param[in]	ientry		the secondary index entry
+@param[in]	roll_ptr	the rollback pointer for the purging record
+@param[in]	trx_id		trx id for the purging record
+@param[in,out]	mtr		mini-transaction
+@param[in,out]	v_row		dtuple holding the virtual rows (if needed)
+@return true if matches, false otherwise */
+static
+bool
+row_purge_vc_matches_cluster(
+	const rec_t*	rec,
+	const dtuple_t* icentry,
+	dict_index_t*	clust_index,
+	rec_offs*	clust_offsets,
+	dict_index_t*	index,
+	const dtuple_t* ientry,
+	roll_ptr_t	roll_ptr,
+	trx_id_t	trx_id,
+	mtr_t*		mtr,
+	dtuple_t**	vrow)
+{
+	const rec_t*	version;
+	rec_t*          prev_version;
+	mem_heap_t*	heap2;
+	mem_heap_t*	heap = NULL;
+	mem_heap_t*	tuple_heap;
+	ulint		num_v = dict_table_get_n_v_cols(index->table);
+	bool		compare[REC_MAX_N_FIELDS];
+	ulint		n_fields = dtuple_get_n_fields(ientry);
+	ulint		n_non_v_col = 0;
+	ulint		n_cmp_v_col = 0;
+	const dfield_t* field1;
+	dfield_t*	field2;
+	ulint		i;
+
+	/* First compare non-virtual columns (primary keys) */
+	ut_ad(index->n_fields == n_fields);
+	ut_ad(n_fields == dtuple_get_n_fields(icentry));
+	ut_ad(mtr->memo_contains_page_flagged(rec,
+					      MTR_MEMO_PAGE_S_FIX
+					      | MTR_MEMO_PAGE_X_FIX));
+
+	{
+		const dfield_t* a = ientry->fields;
+		const dfield_t* b = icentry->fields;
+
+		for (const dict_field_t *ifield = index->fields,
+			     *const end = &index->fields[index->n_fields];
+		     ifield != end; ifield++, a++, b++) {
+			if (!ifield->col->is_virtual()) {
+				if (cmp_dfield_dfield(a, b)) {
+					return false;
+				}
+				n_non_v_col++;
+			}
+		}
+	}
+
+	tuple_heap = mem_heap_create(1024);
+
+	ut_ad(n_fields > n_non_v_col);
+
+	*vrow = dtuple_create_with_vcol(tuple_heap, 0, num_v);
+	dtuple_init_v_fld(*vrow);
+
+	for (i = 0; i < num_v; i++) {
+		dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
+			 = DATA_MISSING;
+		compare[i] = false;
+	}
+
+	version = rec;
+
+	while (n_cmp_v_col < n_fields - n_non_v_col) {
+		heap2 = heap;
+		heap = mem_heap_create(1024);
+		roll_ptr_t	cur_roll_ptr = row_get_rec_roll_ptr(
+			version, clust_index, clust_offsets);
+
+		ut_ad(cur_roll_ptr != 0);
+		ut_ad(roll_ptr != 0);
+
+		trx_undo_prev_version_build(
+			version, clust_index, clust_offsets,
+			heap, &prev_version, mtr,
+			TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE,
+			nullptr, vrow);
+
+		if (heap2) {
+			mem_heap_free(heap2);
+		}
+
+		if (!prev_version) {
+			/* Versions end here */
+			goto func_exit;
+		}
+
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL,
+						clust_index->n_core_fields,
+						ULINT_UNDEFINED, &heap);
+
+		ulint	entry_len = dict_index_get_n_fields(index);
+
+		for (i = 0; i < entry_len; i++) {
+			const dict_field_t*	ind_field
+				 = dict_index_get_nth_field(index, i);
+			const dict_col_t*	col = ind_field->col;
+			field1 = dtuple_get_nth_field(ientry, i);
+
+			if (!col->is_virtual()) {
+				continue;
+			}
+
+			const dict_v_col_t*     v_col
+                                = reinterpret_cast<const dict_v_col_t*>(col);
+			field2
+				= dtuple_get_nth_v_field(*vrow, v_col->v_pos);
+
+			if ((dfield_get_type(field2)->mtype != DATA_MISSING)
+			    && (!compare[v_col->v_pos])) {
+
+				if (ind_field->prefix_len != 0
+				    && !dfield_is_null(field2)) {
+					field2->len = unsigned(
+						dtype_get_at_most_n_mbchars(
+							field2->type.prtype,
+							field2->type.mbminlen,
+							field2->type.mbmaxlen,
+							ind_field->prefix_len,
+							field2->len,
+							static_cast<char*>
+							(field2->data)));
+				}
+
+				/* The index field mismatch */
+				if (cmp_dfield_dfield(field2, field1)) {
+					mem_heap_free(tuple_heap);
+					mem_heap_free(heap);
+					return(false);
+				}
+
+				compare[v_col->v_pos] = true;
+				n_cmp_v_col++;
+			}
+		}
+
+		trx_id_t	rec_trx_id = row_get_rec_trx_id(
+			prev_version, clust_index, clust_offsets);
+
+		if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
+			break;
+		}
+
+		version = prev_version;
+	}
+
+func_exit:
+	if (n_cmp_v_col == 0) {
+		*vrow = NULL;
+	}
+
+	mem_heap_free(tuple_heap);
+	mem_heap_free(heap);
+
+	/* FIXME: In the case of n_cmp_v_col is not the same as
+	n_fields - n_non_v_col, callback is needed to compare the rest
+	columns. At the timebeing, we will need to return true */
+	return (true);
+}
+
+/** @return whether two data tuples are equal */
+bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2)
+{
+  ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N);
+  ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N);
+  ut_ad(dtuple_check_typed(&tuple1));
+  ut_ad(dtuple_check_typed(&tuple2));
+  ut_ad(tuple1.n_fields == tuple2.n_fields);
+
+  for (ulint i= 0; i < tuple1.n_fields; i++)
+    if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i]))
+      return false;
+  return true;
+}
+
+/** Finds out if a version of the record, where the version >= the current
+purge_sys.view, should have ientry as its secondary index entry. We check
+if there is any not delete marked version of the record where the trx
+id >= purge view, and the secondary index entry == ientry; exactly in
+this case we return TRUE.
+@param node    purge node
+@param index   secondary index
+@param ientry  secondary index entry
+@param mtr     mini-transaction
+@return whether ientry cannot be purged */
+static bool row_purge_is_unsafe(const purge_node_t &node,
+                                dict_index_t *index,
+                                const dtuple_t *ientry, mtr_t *mtr)
+{
+	const rec_t*	rec = btr_pcur_get_rec(&node.pcur);
+	roll_ptr_t	roll_ptr = node.roll_ptr;
+	trx_id_t	trx_id = node.trx_id;
+	const rec_t*	version;
+	rec_t*		prev_version;
+	dict_index_t*	clust_index = node.pcur.index();
+	rec_offs*	clust_offsets;
+	mem_heap_t*	heap;
+	dtuple_t*	row;
+	const dtuple_t*	entry;
+	dtuple_t*	vrow = NULL;
+	mem_heap_t*	v_heap = NULL;
+	dtuple_t*	cur_vrow = NULL;
+
+	ut_ad(index->table == clust_index->table);
+	heap = mem_heap_create(1024);
+	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+					clust_index->n_core_fields,
+					ULINT_UNDEFINED, &heap);
+
+	if (dict_index_has_virtual(index)) {
+		v_heap = mem_heap_create(100);
+	}
+
+	if (!rec_get_deleted_flag(rec, rec_offs_comp(clust_offsets))) {
+		row_ext_t*	ext;
+
+		/* The top of the stack of versions is locked by the
+		mtr holding a latch on the page containing the
+		clustered index record. The bottom of the stack is
+		locked by the fact that the purge_sys.view must
+		'overtake' any read view of an active transaction.
+		Thus, it is safe to fetch the prefixes for
+		externally stored columns. */
+		row = row_build(ROW_COPY_POINTERS, clust_index,
+				rec, clust_offsets,
+				NULL, NULL, NULL, &ext, heap);
+
+		if (dict_index_has_virtual(index)) {
+
+
+#ifdef DBUG_OFF
+# define dbug_v_purge false
+#else /* DBUG_OFF */
+                        bool    dbug_v_purge = false;
+#endif /* DBUG_OFF */
+
+			DBUG_EXECUTE_IF(
+				"ib_purge_virtual_index_callback",
+				dbug_v_purge = true;);
+
+			roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
+				rec, clust_index, clust_offsets);
+
+			/* if the row is newly inserted, then the virtual
+			columns need to be computed */
+			if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
+			    || dbug_v_purge) {
+
+				if (!row_vers_build_clust_v_col(
+					    row, clust_index, index, heap)) {
+					goto unsafe_to_purge;
+				}
+
+				entry = row_build_index_entry(
+					row, ext, index, heap);
+				if (entry && dtuple_coll_eq(*ientry, *entry)) {
+					goto unsafe_to_purge;
+				}
+			} else {
+				/* Build index entry out of row */
+				entry = row_build_index_entry(row, ext, index, heap);
+				/* entry could only be NULL if
+				the clustered index record is an uncommitted
+				inserted record whose BLOBs have not been
+				written yet. The secondary index record
+				can be safely removed, because it cannot
+				possibly refer to this incomplete
+				clustered index record. (Insert would
+				always first be completed for the
+				clustered index record, then proceed to
+				secondary indexes.) */
+
+				if (entry && row_purge_vc_matches_cluster(
+					    rec, entry,
+					    clust_index, clust_offsets,
+					    index, ientry, roll_ptr,
+					    trx_id, mtr, &vrow)) {
+					goto unsafe_to_purge;
+				}
+			}
+			clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+							clust_index
+							->n_core_fields,
+							ULINT_UNDEFINED, &heap);
+		} else {
+
+			entry = row_build_index_entry(
+				row, ext, index, heap);
+
+			/* If entry == NULL, the record contains unset BLOB
+			pointers.  This must be a freshly inserted record.  If
+			this is called from
+			row_purge_remove_sec_if_poss_low(), the thread will
+			hold latches on the clustered index and the secondary
+			index.  Because the insert works in three steps:
+
+				(1) insert the record to clustered index
+				(2) store the BLOBs and update BLOB pointers
+				(3) insert records to secondary indexes
+
+			the purge thread can safely ignore freshly inserted
+			records and delete the secondary index record.  The
+			thread that inserted the new record will be inserting
+			the secondary index records. */
+
+			/* NOTE that we cannot do the comparison as binary
+			fields because the row is maybe being modified so that
+			the clustered index record has already been updated to
+			a different binary value in a char field, but the
+			collation identifies the old and new value anyway! */
+			if (entry && dtuple_coll_eq(*ientry, *entry)) {
+unsafe_to_purge:
+				mem_heap_free(heap);
+
+				if (v_heap) {
+					mem_heap_free(v_heap);
+				}
+				return true;
+			}
+		}
+	} else if (dict_index_has_virtual(index)) {
+		/* The current cluster index record could be
+		deleted, but the previous version of it might not. We will
+		need to get the virtual column data from undo record
+		associated with current cluster index */
+
+		cur_vrow = row_vers_build_cur_vrow(
+			rec, clust_index, &clust_offsets,
+			index, trx_id, roll_ptr, heap, v_heap, mtr);
+	}
+
+	version = rec;
+
+	for (;;) {
+		mem_heap_t* heap2 = heap;
+		heap = mem_heap_create(1024);
+		vrow = NULL;
+
+		trx_undo_prev_version_build(version,
+					    clust_index, clust_offsets,
+					    heap, &prev_version, mtr,
+					    TRX_UNDO_CHECK_PURGE_PAGES,
+					    nullptr,
+					    dict_index_has_virtual(index)
+					    ? &vrow : nullptr);
+		mem_heap_free(heap2); /* free version and clust_offsets */
+
+		if (!prev_version) {
+			/* Versions end here */
+			mem_heap_free(heap);
+
+			if (v_heap) {
+				mem_heap_free(v_heap);
+			}
+
+			return false;
+		}
+
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL,
+						clust_index->n_core_fields,
+						ULINT_UNDEFINED, &heap);
+
+		if (dict_index_has_virtual(index)) {
+			if (vrow) {
+				if (dtuple_vcol_data_missing(*vrow, *index)) {
+					goto nochange_index;
+				}
+				/* Keep the virtual row info for the next
+				version, unless it is changed */
+				mem_heap_empty(v_heap);
+				cur_vrow = dtuple_copy(vrow, v_heap);
+				dtuple_dup_v_fld(cur_vrow, v_heap);
+			}
+
+			if (!cur_vrow) {
+				/* Nothing for this index has changed,
+				continue */
+nochange_index:
+				version = prev_version;
+				continue;
+			}
+		}
+
+		if (!rec_get_deleted_flag(prev_version,
+					  rec_offs_comp(clust_offsets))) {
+			row_ext_t*	ext;
+
+			/* The stack of versions is locked by mtr.
+			Thus, it is safe to fetch the prefixes for
+			externally stored columns. */
+			row = row_build(ROW_COPY_POINTERS, clust_index,
+					prev_version, clust_offsets,
+					NULL, NULL, NULL, &ext, heap);
+
+			if (dict_index_has_virtual(index)) {
+				ut_ad(cur_vrow);
+				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+				dtuple_copy_v_fields(row, cur_vrow);
+			}
+
+			entry = row_build_index_entry(row, ext, index, heap);
+
+			/* If entry == NULL, the record contains unset
+			BLOB pointers.  This must be a freshly
+			inserted record that we can safely ignore.
+			For the justification, see the comments after
+			the previous row_build_index_entry() call. */
+
+			/* NOTE that we cannot do the comparison as binary
+			fields because maybe the secondary index record has
+			already been updated to a different binary value in
+			a char field, but the collation identifies the old
+			and new value anyway! */
+
+			if (entry && dtuple_coll_eq(*ientry, *entry)) {
+				goto unsafe_to_purge;
+			}
+		}
+
+		version = prev_version;
+	}
+}
+
 /** Determines if it is possible to remove a secondary index entry.
 Removal is possible if the secondary index entry does not refer to any
 not delete marked version of a clustered index record where DB_TRX_ID
@ -280,67 +722,45 @@ would refer to.
 However, in that case, the user transaction would also re-insert the
 secondary index entry after purge has removed it and released the leaf
 page latch.
-@param[in,out]	node		row purge node
-@param[in]	index		secondary index
-@param[in]	entry		secondary index entry
-@param[in,out]	sec_pcur	secondary index cursor or NULL
-				if it is called for purge buffering
-				operation.
-@param[in,out]	sec_mtr		mini-transaction which holds
-				secondary index entry or NULL if it is
-				called for purge buffering operation.
-@param[in]	is_tree		true=pessimistic purge,
-				false=optimistic (leaf-page only)
-@return true if the secondary index record can be purged */
-static
-bool
-row_purge_poss_sec(
-	purge_node_t*	node,
-	dict_index_t*	index,
-	const dtuple_t*	entry,
-	btr_pcur_t*	sec_pcur,
-	mtr_t*		sec_mtr,
-	bool		is_tree)
+@param node   row purge node
+@param index  secondary index
+@param entry  secondary index entry
+@param mtr    mini-transaction for looking up clustered index
+@return whether the secondary index record can be purged */
+static bool row_purge_poss_sec(purge_node_t *node, dict_index_t *index,
+			       const dtuple_t *entry, mtr_t *mtr)
 {
-	bool	can_delete;
-	mtr_t	mtr;
+  ut_ad(!index->is_clust());
+  const auto savepoint= mtr->get_savepoint();
+  bool can_delete= !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr);

-	ut_ad(!dict_index_is_clust(index));
+  if (!can_delete)
+  {
+    ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED);
+    can_delete= !row_purge_is_unsafe(*node, index, entry, mtr);
+    node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED;
+    node->pcur.latch_mode= BTR_NO_LATCHES;
+  }

-	mtr_start(&mtr);
-
-	can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
-		|| !row_vers_old_has_index_entry(true,
-						 btr_pcur_get_rec(&node->pcur),
-						 &mtr, index, entry,
-						 node->roll_ptr, node->trx_id);
-
-	/* Persistent cursor is closed if reposition fails. */
-	if (node->found_clust) {
-		btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
-	} else {
-		mtr.commit();
-	}
-
-	ut_ad(mtr.has_committed());
-
-	return can_delete;
+  mtr->rollback_to_savepoint(savepoint);
+  return can_delete;
 }

-/***************************************************************
-Removes a secondary index entry if possible, by modifying the
-index tree.  Does not try to buffer the delete.
-@return TRUE if success or if not found */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ibool
-row_purge_remove_sec_if_poss_tree(
-/*==============================*/
-	purge_node_t*	node,	/*!< in: row purge node */
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	entry)	/*!< in: index entry */
+__attribute__((nonnull, warn_unused_result))
+/** Remove a secondary index entry if possible, by modifying the index tree.
+@param node             purge node
+@param index            secondary index
+@param entry            index entry
+@param page_max_trx_id  the PAGE_MAX_TRX_ID
+                        when row_purge_remove_sec_if_poss_leaf() was invoked
+@return whether the operation succeeded */
+static bool row_purge_remove_sec_if_poss_tree(purge_node_t *node,
+					      dict_index_t *index,
+					      const dtuple_t *entry,
+					      trx_id_t page_max_trx_id)
 {
 	btr_pcur_t		pcur;
-	ibool			success	= TRUE;
+	bool			success	= true;
 	dberr_t			err;
 	mtr_t			mtr;

@ -371,7 +791,9 @@ row_purge_remove_sec_if_poss_tree(
 	which cannot be purged yet, requires its existence. If some requires,
 	we should do nothing. */

-	if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
+	if (page_max_trx_id
+	    == page_get_max_trx_id(btr_cur_get_page(&pcur.btr_cur))
+	    || row_purge_poss_sec(node, index, entry, &mtr)) {

 		/* Remove the index record, which should have been
 		marked for deletion. */
@ -410,26 +832,23 @@ row_purge_remove_sec_if_poss_tree(
 func_exit:
 	btr_pcur_close(&pcur); // FIXME: need this?
 	mtr.commit();
-
-	return(success);
+	return success;
 }

-/***************************************************************
-Removes a secondary index entry without modifying the index tree,
-if possible.
-@retval true if success or if not found
-@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_remove_sec_if_poss_leaf(
-/*==============================*/
-	purge_node_t*	node,	/*!< in: row purge node */
-	dict_index_t*	index,	/*!< in: index */
-	const dtuple_t*	entry)	/*!< in: index entry */
+__attribute__((nonnull, warn_unused_result))
+/** Remove a secondary index entry if possible, without modifying the tree.
+@param node             purge node
+@param index            secondary index
+@param entry            index entry
+@return PAGE_MAX_TRX_ID for row_purge_remove_sec_if_poss_tree()
+@retval 0 if success or if not found */
+static trx_id_t row_purge_remove_sec_if_poss_leaf(purge_node_t *node,
+                                                  dict_index_t *index,
+                                                  const dtuple_t *entry)
 {
 	mtr_t			mtr;
 	btr_pcur_t		pcur;
-	bool			success	= true;
+	trx_id_t		page_max_trx_id = 0;

 	log_free_check();
 	ut_ad(index->table == node->table);
@ -453,7 +872,7 @@ row_purge_remove_sec_if_poss_leaf(
 found:
 		/* Before attempting to purge a record, check
 		if it is safe to do so. */
-		if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
+		if (row_purge_poss_sec(node, index, entry, &mtr)) {
 			btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);

 			/* Only delete-marked records should be purged. */
@ -494,8 +913,11 @@ found:
 				}
 			}

-			success = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
-				!= DB_FAIL;
+			if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)
+                            == DB_FAIL) {
+				page_max_trx_id = page_get_max_trx_id(
+					btr_cur_get_page(btr_cur));
+                        }
 		}
 	}

@ -503,7 +925,7 @@ func_exit:
 	mtr.commit();
 cleanup:
 	btr_pcur_close(&pcur);
-	return success;
+	return page_max_trx_id;
 }

 /***********************************************************//**
@ -516,38 +938,21 @@ row_purge_remove_sec_if_poss(
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	entry)	/*!< in: index entry */
 {
-	ibool	success;
-	ulint	n_tries		= 0;
+  if (UNIV_UNLIKELY(!entry))
+    /* The node->row must have lacked some fields of this index. This
+    is possible when the undo log record was written before this index
+    was created. */
+    return;

-	/*	fputs("Purge: Removing secondary record\n", stderr); */
-
-	if (!entry) {
-		/* The node->row must have lacked some fields of this
-		index. This is possible when the undo log record was
-		written before this index was created. */
-		return;
-	}
-
-	if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
-
-		return;
-	}
-retry:
-	success = row_purge_remove_sec_if_poss_tree(node, index, entry);
-	/* The delete operation may fail if we have little
-	file space left: TODO: easiest to crash the database
-	and restart with more file space */
-
-	if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
-		n_tries++;
-
-		std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
-
-		goto retry;
-	}
-
-	ut_a(success);
+  if (trx_id_t page_max_trx_id=
+      row_purge_remove_sec_if_poss_leaf(node, index, entry))
+    for (auto n_tries= BTR_CUR_RETRY_DELETE_N_TIMES;
+         !row_purge_remove_sec_if_poss_tree(node, index, entry,
+                                            page_max_trx_id);
+         std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME))
+      /* The delete operation may fail if we have little
+      file space left (if innodb_file_per_table=0?) */
+      ut_a(--n_tries);
 }

 /***********************************************************//**
--- a/storage/innobase/row/row0sel.cc
+++ b/storage/innobase/row/row0sel.cc
@ -6612,7 +6612,7 @@ rec_loop:
          err= trx_undo_prev_version_build(clust_rec,
                                           clust_index, clust_offsets,
                                           vers_heap, &old_vers,
-                                           nullptr, nullptr, 0);
+                                           &mtr, 0, nullptr, nullptr);
          if (prev_heap)
            mem_heap_free(prev_heap);
          if (err != DB_SUCCESS)
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@ -469,6 +469,146 @@ func_exit:
 	return(err);
 }

+bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2);
+
+/** Find out if an accessible version of a clustered index record
+corresponds to a secondary index entry.
+@param rec    record in a latched clustered index page
+@param index  secondary index
+@param ientry secondary index entry
+@param mtr    mini-transaction
+@return whether an accessible non-dete-marked version of rec
+corresponds to ientry */
+static bool row_undo_mod_sec_is_unsafe(const rec_t *rec, dict_index_t *index,
+                                       const dtuple_t *ientry, mtr_t *mtr)
+{
+	const rec_t*	version;
+	rec_t*		prev_version;
+	dict_index_t*	clust_index;
+	rec_offs*	clust_offsets;
+	mem_heap_t*	heap;
+	mem_heap_t*	heap2;
+	dtuple_t*	row;
+	const dtuple_t*	entry;
+	ulint		comp;
+	dtuple_t*	vrow = NULL;
+	mem_heap_t*	v_heap = NULL;
+	dtuple_t*	cur_vrow = NULL;
+
+	clust_index = dict_table_get_first_index(index->table);
+
+	comp = page_rec_is_comp(rec);
+	ut_ad(!dict_table_is_comp(index->table) == !comp);
+	heap = mem_heap_create(1024);
+	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
+					clust_index->n_core_fields,
+					ULINT_UNDEFINED, &heap);
+
+	if (dict_index_has_virtual(index)) {
+		v_heap = mem_heap_create(100);
+		/* The current cluster index record could be
+		deleted, but the previous version of it might not. We will
+		need to get the virtual column data from undo record
+		associated with current cluster index */
+
+		cur_vrow = row_vers_build_cur_vrow(
+			rec, clust_index, &clust_offsets,
+			index, 0, 0, heap, v_heap, mtr);
+	}
+
+	version = rec;
+
+	for (;;) {
+		heap2 = heap;
+		heap = mem_heap_create(1024);
+		vrow = NULL;
+
+		trx_undo_prev_version_build(version,
+					    clust_index, clust_offsets,
+					    heap, &prev_version,
+					    mtr, TRX_UNDO_CHECK_PURGEABILITY,
+					    nullptr,
+					    dict_index_has_virtual(index)
+					    ? &vrow : nullptr);
+		mem_heap_free(heap2); /* free version and clust_offsets */
+
+		if (!prev_version) {
+			break;
+		}
+
+		clust_offsets = rec_get_offsets(prev_version, clust_index,
+						NULL,
+						clust_index->n_core_fields,
+						ULINT_UNDEFINED, &heap);
+
+		if (dict_index_has_virtual(index)) {
+			if (vrow) {
+				if (dtuple_vcol_data_missing(*vrow, *index)) {
+					goto nochange_index;
+				}
+				/* Keep the virtual row info for the next
+				version, unless it is changed */
+				mem_heap_empty(v_heap);
+				cur_vrow = dtuple_copy(vrow, v_heap);
+				dtuple_dup_v_fld(cur_vrow, v_heap);
+			}
+
+			if (!cur_vrow) {
+				/* Nothing for this index has changed,
+				continue */
+nochange_index:
+				version = prev_version;
+				continue;
+			}
+		}
+
+		if (!rec_get_deleted_flag(prev_version, comp)) {
+			row_ext_t*	ext;
+
+			/* The stack of versions is locked by mtr.
+			Thus, it is safe to fetch the prefixes for
+			externally stored columns. */
+			row = row_build(ROW_COPY_POINTERS, clust_index,
+					prev_version, clust_offsets,
+					NULL, NULL, NULL, &ext, heap);
+
+			if (dict_index_has_virtual(index)) {
+				ut_ad(cur_vrow);
+				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
+				dtuple_copy_v_fields(row, cur_vrow);
+			}
+
+			entry = row_build_index_entry(row, ext, index, heap);
+
+			/* If entry == NULL, the record contains unset
+			BLOB pointers.  This must be a freshly
+			inserted record that we can safely ignore.
+			For the justification, see the comments after
+			the previous row_build_index_entry() call. */
+
+			/* NOTE that we cannot do the comparison as binary
+			fields because maybe the secondary index record has
+			already been updated to a different binary value in
+			a char field, but the collation identifies the old
+			and new value anyway! */
+
+			if (entry && dtuple_coll_eq(*ientry, *entry)) {
+				break;
+			}
+		}
+
+		version = prev_version;
+	}
+
+	mem_heap_free(heap);
+
+	if (v_heap) {
+		mem_heap_free(v_heap);
+	}
+
+	return !!prev_version;
+}
+
 /***********************************************************//**
 Delete marks or removes a secondary index entry if found.
@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
@ -487,7 +627,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
 	btr_cur_t*		btr_cur;
 	dberr_t			err	= DB_SUCCESS;
 	mtr_t			mtr;
-	mtr_t			mtr_vers;
 	const bool		modify_leaf = mode == BTR_MODIFY_LEAF;

 	row_mtr_start(&mtr, index);
@ -543,17 +682,14 @@ found:
 	which cannot be purged yet, requires its existence. If some requires,
 	we should delete mark the record. */

-	mtr_vers.start();
-
-	ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr_vers) ==
-	      btr_pcur_t::SAME_ALL);
+	ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr) ==
+	     btr_pcur_t::SAME_ALL);

 	/* For temporary table, we can skip to check older version of
 	clustered index entry, because there is no MVCC or purge. */
 	if (node->table->is_temporary()
-	    || row_vers_old_has_index_entry(
-		    false, btr_pcur_get_rec(&node->pcur),
-		    &mtr_vers, index, entry, 0, 0)) {
+	    || row_undo_mod_sec_is_unsafe(
+		       btr_pcur_get_rec(&node->pcur), index, entry, &mtr)) {
 		btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
 					  btr_cur_get_rec(btr_cur), &mtr);
 	} else {
@ -587,7 +723,9 @@ found:
 		}
 	}

-	btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
+	ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED);
+	node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED;
+	node->pcur.latch_mode = BTR_NO_LATCHES;

 func_exit:
 	btr_pcur_close(&pcur);
--- a/storage/innobase/row/row0upd.cc
+++ b/storage/innobase/row/row0upd.cc
@ -702,7 +702,7 @@ fetch; output: fetched length of the prefix
@param[in,out]	heap		heap where to allocate
@return BLOB prefix
@retval NULL if the record is incomplete (should only happen
-in row_vers_vc_matches_cluster() executed concurrently with another purge) */
+in row_purge_vc_matches_cluster() executed concurrently with another purge) */
 static
 byte*
 row_upd_ext_fetch(
--- a/storage/innobase/row/row0vers.cc
+++ b/storage/innobase/row/row0vers.cc
@ -194,8 +194,8 @@ row_vers_impl_x_locked_low(

 		trx_undo_prev_version_build(
 			version, clust_index, clust_offsets,
-			heap, &prev_version, NULL,
-			dict_index_has_virtual(index) ? &vrow : NULL, 0);
+			heap, &prev_version, mtr, 0, NULL,
+			dict_index_has_virtual(index) ? &vrow : NULL);

 		ut_d(trx->mutex_lock());
 		const bool committed = trx_state_eq(
@ -446,7 +446,6 @@ row_vers_impl_x_locked(
@param[in]	clust_index	clustered index
@param[in]	index		the secondary index
@param[in]	heap		heap used to build virtual dtuple. */
-static
 bool
 row_vers_build_clust_v_col(
 	dtuple_t*		row,
@ -490,26 +489,25 @@ row_vers_build_clust_v_col(
 }

 /** Build latest virtual column data from undo log
-@param[in]	in_purge	whether this is the purge thread
@param[in]	rec		clustered index record
@param[in]	clust_index	clustered index
@param[in,out]	clust_offsets	offsets on the clustered index record
@param[in]	index		the secondary index
+@param[in]	trx_id		transaction ID on the purging record,
+				or 0 if called outside purge
@param[in]	roll_ptr	the rollback pointer for the purging record
-@param[in]	trx_id		trx id for the purging record
@param[in,out]	v_heap		heap used to build vrow
@param[out]	v_row		dtuple holding the virtual rows
@param[in,out]	mtr		mtr holding the latch on rec */
 static
 void
 row_vers_build_cur_vrow_low(
-	bool			in_purge,
 	const rec_t*		rec,
 	dict_index_t*		clust_index,
 	rec_offs*		clust_offsets,
 	dict_index_t*		index,
-	roll_ptr_t		roll_ptr,
 	trx_id_t		trx_id,
+	roll_ptr_t		roll_ptr,
 	mem_heap_t*		v_heap,
 	dtuple_t**		vrow,
 	mtr_t*			mtr)
@ -539,7 +537,7 @@ row_vers_build_cur_vrow_low(
 	/* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
 	bit to search the undo log until we hit the current undo log with
 	roll_ptr */
-	const ulint	status = in_purge
+	const ulint	status = trx_id
 		? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE
 		: TRX_UNDO_GET_OLD_V_VALUE;

@ -551,7 +549,7 @@ row_vers_build_cur_vrow_low(

 		trx_undo_prev_version_build(
 			version, clust_index, clust_offsets,
-			heap, &prev_version, NULL, vrow, status);
+			heap, &prev_version, mtr, status, nullptr, vrow);

 		if (heap2) {
 			mem_heap_free(heap2);
@ -603,212 +601,27 @@ row_vers_build_cur_vrow_low(
 	mem_heap_free(heap);
 }

-/** Check a virtual column value index secondary virtual index matches
-that of current cluster index record, which is recreated from information
-stored in undo log
-@param[in]	rec		record in the clustered index
-@param[in]	icentry		the index entry built from a cluster row
-@param[in]	clust_index	cluster index
-@param[in]	clust_offsets	offsets on the cluster record
-@param[in]	index		the secondary index
-@param[in]	ientry		the secondary index entry
-@param[in]	roll_ptr	the rollback pointer for the purging record
-@param[in]	trx_id		trx id for the purging record
-@param[in,out]	v_heap		heap used to build virtual dtuple
-@param[in,out]	v_row		dtuple holding the virtual rows (if needed)
-@param[in]	mtr		mtr holding the latch on rec
-@return true if matches, false otherwise */
-static
-bool
-row_vers_vc_matches_cluster(
-	const rec_t*	rec,
-	const dtuple_t* icentry,
-	dict_index_t*	clust_index,
-	rec_offs*	clust_offsets,
-	dict_index_t*	index,
-	const dtuple_t* ientry,
-	roll_ptr_t	roll_ptr,
-	trx_id_t	trx_id,
-	mem_heap_t*	v_heap,
-	dtuple_t**	vrow,
-	mtr_t*		mtr)
-{
-	const rec_t*	version;
-	rec_t*          prev_version;
-	mem_heap_t*	heap2;
-	mem_heap_t*	heap = NULL;
-	mem_heap_t*	tuple_heap;
-	ulint		num_v = dict_table_get_n_v_cols(index->table);
-	bool		compare[REC_MAX_N_FIELDS];
-	ulint		n_fields = dtuple_get_n_fields(ientry);
-	ulint		n_non_v_col = 0;
-	ulint		n_cmp_v_col = 0;
-	const dfield_t* field1;
-	dfield_t*	field2;
-	ulint		i;
-
-	/* First compare non-virtual columns (primary keys) */
-	ut_ad(index->n_fields == n_fields);
-	ut_ad(n_fields == dtuple_get_n_fields(icentry));
-	ut_ad(mtr->memo_contains_page_flagged(rec,
-					      MTR_MEMO_PAGE_S_FIX
-					      | MTR_MEMO_PAGE_X_FIX));
-
-	{
-		const dfield_t* a = ientry->fields;
-		const dfield_t* b = icentry->fields;
-
-		for (const dict_field_t *ifield = index->fields,
-			     *const end = &index->fields[index->n_fields];
-		     ifield != end; ifield++, a++, b++) {
-			if (!ifield->col->is_virtual()) {
-				if (cmp_dfield_dfield(a, b)) {
-					return false;
-				}
-				n_non_v_col++;
-			}
-		}
-	}
-
-	tuple_heap = mem_heap_create(1024);
-
-	ut_ad(n_fields > n_non_v_col);
-
-	*vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v);
-	dtuple_init_v_fld(*vrow);
-
-	for (i = 0; i < num_v; i++) {
-		dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
-			 = DATA_MISSING;
-		compare[i] = false;
-	}
-
-	version = rec;
-
-	while (n_cmp_v_col < n_fields - n_non_v_col) {
-		heap2 = heap;
-		heap = mem_heap_create(1024);
-		roll_ptr_t	cur_roll_ptr = row_get_rec_roll_ptr(
-			version, clust_index, clust_offsets);
-
-		ut_ad(cur_roll_ptr != 0);
-		ut_ad(roll_ptr != 0);
-
-		trx_undo_prev_version_build(
-			version, clust_index, clust_offsets,
-			heap, &prev_version, NULL, vrow,
-			TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE);
-
-		if (heap2) {
-			mem_heap_free(heap2);
-		}
-
-		if (!prev_version) {
-			/* Versions end here */
-			goto func_exit;
-		}
-
-		clust_offsets = rec_get_offsets(prev_version, clust_index,
-						NULL,
-						clust_index->n_core_fields,
-						ULINT_UNDEFINED, &heap);
-
-		ulint	entry_len = dict_index_get_n_fields(index);
-
-		for (i = 0; i < entry_len; i++) {
-			const dict_field_t*	ind_field
-				 = dict_index_get_nth_field(index, i);
-			const dict_col_t*	col = ind_field->col;
-			field1 = dtuple_get_nth_field(ientry, i);
-
-			if (!col->is_virtual()) {
-				continue;
-			}
-
-			const dict_v_col_t*     v_col
-                                = reinterpret_cast<const dict_v_col_t*>(col);
-			field2
-				= dtuple_get_nth_v_field(*vrow, v_col->v_pos);
-
-			if ((dfield_get_type(field2)->mtype != DATA_MISSING)
-			    && (!compare[v_col->v_pos])) {
-
-				if (ind_field->prefix_len != 0
-				    && !dfield_is_null(field2)) {
-					field2->len = unsigned(
-						dtype_get_at_most_n_mbchars(
-							field2->type.prtype,
-							field2->type.mbminlen,
-							field2->type.mbmaxlen,
-							ind_field->prefix_len,
-							field2->len,
-							static_cast<char*>
-							(field2->data)));
-				}
-
-				/* The index field mismatch */
-				if (v_heap
-				    || cmp_dfield_dfield(field2, field1)) {
-					if (v_heap) {
-						dtuple_dup_v_fld(*vrow, v_heap);
-					}
-
-					mem_heap_free(tuple_heap);
-					mem_heap_free(heap);
-					return(false);
-				}
-
-				compare[v_col->v_pos] = true;
-				n_cmp_v_col++;
-			}
-		}
-
-		trx_id_t	rec_trx_id = row_get_rec_trx_id(
-			prev_version, clust_index, clust_offsets);
-
-		if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
-			break;
-		}
-
-		version = prev_version;
-	}
-
-func_exit:
-	if (n_cmp_v_col == 0) {
-		*vrow = NULL;
-	}
-
-	mem_heap_free(tuple_heap);
-	mem_heap_free(heap);
-
-	/* FIXME: In the case of n_cmp_v_col is not the same as
-	n_fields - n_non_v_col, callback is needed to compare the rest
-	columns. At the timebeing, we will need to return true */
-	return (true);
-}
-
 /** Build a dtuple contains virtual column data for current cluster index
@param[in]	in_purge	called by purge thread
@param[in]	rec		cluster index rec
@param[in]	clust_index	cluster index
@param[in]	clust_offsets	cluster rec offset
@param[in]	index		secondary index
+@param[in]	trx_id		transaction ID on the purging record,
+				or 0 if called outside purge
@param[in]	roll_ptr	roll_ptr for the purge record
-@param[in]	trx_id		transaction ID on the purging record
@param[in,out]	heap		heap memory
-@param[in,out]	v_heap		heap memory to keep virtual colum dtuple
-@param[in]	mtr		mtr holding the latch on rec
+@param[in,out]	v_heap		heap memory to keep virtual column tuple
+@param[in,out]	mtr		mini-transaction
@return dtuple contains virtual column data */
-static
 dtuple_t*
 row_vers_build_cur_vrow(
-	bool			in_purge,
 	const rec_t*		rec,
 	dict_index_t*		clust_index,
 	rec_offs**		clust_offsets,
 	dict_index_t*		index,
-	roll_ptr_t		roll_ptr,
 	trx_id_t		trx_id,
+	roll_ptr_t		roll_ptr,
 	mem_heap_t*		heap,
 	mem_heap_t*		v_heap,
 	mtr_t*			mtr)
@ -841,8 +654,8 @@ row_vers_build_cur_vrow(
 	} else {
 		/* Try to fetch virtual column data from undo log */
 		row_vers_build_cur_vrow_low(
-			in_purge, rec, clust_index, *clust_offsets,
-			index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr);
+			rec, clust_index, *clust_offsets,
+			index, trx_id, roll_ptr, v_heap, &cur_vrow, mtr);
 	}

 	*clust_offsets = rec_get_offsets(rec, clust_index, NULL,
@ -851,312 +664,28 @@ row_vers_build_cur_vrow(
 	return(cur_vrow);
 }

-/** @return whether two data tuples are equal */
-static bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2)
-{
-  ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N);
-  ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N);
-  ut_ad(dtuple_check_typed(&tuple1));
-  ut_ad(dtuple_check_typed(&tuple2));
-  ut_ad(tuple1.n_fields == tuple2.n_fields);
-
-  for (ulint i= 0; i < tuple1.n_fields; i++)
-    if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i]))
-      return false;
-  return true;
-}
-
 /** Find out whether data tuple has missing data type
 for indexed virtual column.
@param tuple   data tuple
@param index   virtual index
@return true if tuple has missing column type */
-static bool dtuple_vcol_data_missing(const dtuple_t &tuple,
-                                     dict_index_t *index)
+bool dtuple_vcol_data_missing(const dtuple_t &tuple,
+                              const dict_index_t &index)
 {
-  for (ulint i= 0; i < index->n_uniq; i++)
+  for (ulint i= 0; i < index.n_uniq; i++)
  {
-    dict_col_t *col= index->fields[i].col;
+    dict_col_t *col= index.fields[i].col;
    if (!col->is_virtual())
      continue;
    dict_v_col_t *vcol= reinterpret_cast<dict_v_col_t*>(col);
-    for (ulint j= 0; j < index->table->n_v_cols; j++)
-    {
-      if (vcol == &index->table->v_cols[j]
-          && tuple.v_fields[j].type.mtype == DATA_MISSING)
+    for (ulint j= 0; j < index.table->n_v_cols; j++)
+      if (vcol == &index.table->v_cols[j] &&
+          tuple.v_fields[j].type.mtype == DATA_MISSING)
        return true;
-    }
  }
  return false;
 }

-/** Finds out if a version of the record, where the version >= the current
-purge_sys.view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE.
-@param[in]	also_curr	TRUE if also rec is included in the versions
-				to search; otherwise only versions prior
-				to it are searched
-@param[in]	rec		record in the clustered index; the caller
-				must have a latch on the page
-@param[in]	mtr		mtr holding the latch on rec; it will
-				also hold the latch on purge_view
-@param[in]	index		secondary index
-@param[in]	ientry		secondary index entry
-@param[in]	roll_ptr	roll_ptr for the purge record
-@param[in]	trx_id		transaction ID on the purging record
-@return TRUE if earlier version should have */
-bool
-row_vers_old_has_index_entry(
-	bool			also_curr,
-	const rec_t*		rec,
-	mtr_t*			mtr,
-	dict_index_t*		index,
-	const dtuple_t*		ientry,
-	roll_ptr_t		roll_ptr,
-	trx_id_t		trx_id)
-{
-	const rec_t*	version;
-	rec_t*		prev_version;
-	dict_index_t*	clust_index;
-	rec_offs*	clust_offsets;
-	mem_heap_t*	heap;
-	mem_heap_t*	heap2;
-	dtuple_t*	row;
-	const dtuple_t*	entry;
-	ulint		comp;
-	dtuple_t*	vrow = NULL;
-	mem_heap_t*	v_heap = NULL;
-	dtuple_t*	cur_vrow = NULL;
-
-	ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
-					      | MTR_MEMO_PAGE_S_FIX));
-	clust_index = dict_table_get_first_index(index->table);
-
-	comp = page_rec_is_comp(rec);
-	ut_ad(!dict_table_is_comp(index->table) == !comp);
-	heap = mem_heap_create(1024);
-	clust_offsets = rec_get_offsets(rec, clust_index, NULL,
-					clust_index->n_core_fields,
-					ULINT_UNDEFINED, &heap);
-
-	if (dict_index_has_virtual(index)) {
-		v_heap = mem_heap_create(100);
-	}
-
-	DBUG_EXECUTE_IF("ib_purge_virtual_index_crash",
-			DBUG_SUICIDE(););
-
-	if (also_curr && !rec_get_deleted_flag(rec, comp)) {
-		row_ext_t*	ext;
-
-		/* The top of the stack of versions is locked by the
-		mtr holding a latch on the page containing the
-		clustered index record. The bottom of the stack is
-		locked by the fact that the purge_sys.view must
-		'overtake' any read view of an active transaction.
-		Thus, it is safe to fetch the prefixes for
-		externally stored columns. */
-		row = row_build(ROW_COPY_POINTERS, clust_index,
-				rec, clust_offsets,
-				NULL, NULL, NULL, &ext, heap);
-
-		if (dict_index_has_virtual(index)) {
-
-
-#ifdef DBUG_OFF
-# define dbug_v_purge false
-#else /* DBUG_OFF */
-                        bool    dbug_v_purge = false;
-#endif /* DBUG_OFF */
-
-			DBUG_EXECUTE_IF(
-				"ib_purge_virtual_index_callback",
-				dbug_v_purge = true;);
-
-			roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
-				rec, clust_index, clust_offsets);
-
-			/* if the row is newly inserted, then the virtual
-			columns need to be computed */
-			if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
-			    || dbug_v_purge) {
-
-				if (!row_vers_build_clust_v_col(
-					    row, clust_index, index, heap)) {
-					goto unsafe_to_purge;
-				}
-
-				entry = row_build_index_entry(
-					row, ext, index, heap);
-				if (entry && dtuple_coll_eq(*ientry, *entry)) {
-					goto unsafe_to_purge;
-				}
-			} else {
-				/* Build index entry out of row */
-				entry = row_build_index_entry(row, ext, index, heap);
-				/* entry could only be NULL if
-				the clustered index record is an uncommitted
-				inserted record whose BLOBs have not been
-				written yet. The secondary index record
-				can be safely removed, because it cannot
-				possibly refer to this incomplete
-				clustered index record. (Insert would
-				always first be completed for the
-				clustered index record, then proceed to
-				secondary indexes.) */
-
-				if (entry && row_vers_vc_matches_cluster(
-					    rec, entry,
-					    clust_index, clust_offsets,
-					    index, ientry, roll_ptr,
-					    trx_id, NULL, &vrow, mtr)) {
-					goto unsafe_to_purge;
-				}
-			}
-			clust_offsets = rec_get_offsets(rec, clust_index, NULL,
-							clust_index
-							->n_core_fields,
-							ULINT_UNDEFINED, &heap);
-		} else {
-
-			entry = row_build_index_entry(
-				row, ext, index, heap);
-
-			/* If entry == NULL, the record contains unset BLOB
-			pointers.  This must be a freshly inserted record.  If
-			this is called from
-			row_purge_remove_sec_if_poss_low(), the thread will
-			hold latches on the clustered index and the secondary
-			index.  Because the insert works in three steps:
-
-				(1) insert the record to clustered index
-				(2) store the BLOBs and update BLOB pointers
-				(3) insert records to secondary indexes
-
-			the purge thread can safely ignore freshly inserted
-			records and delete the secondary index record.  The
-			thread that inserted the new record will be inserting
-			the secondary index records. */
-
-			/* NOTE that we cannot do the comparison as binary
-			fields because the row is maybe being modified so that
-			the clustered index record has already been updated to
-			a different binary value in a char field, but the
-			collation identifies the old and new value anyway! */
-			if (entry && dtuple_coll_eq(*ientry, *entry)) {
-unsafe_to_purge:
-				mem_heap_free(heap);
-
-				if (v_heap) {
-					mem_heap_free(v_heap);
-				}
-				return true;
-			}
-		}
-	} else if (dict_index_has_virtual(index)) {
-		/* The current cluster index record could be
-		deleted, but the previous version of it might not. We will
-		need to get the virtual column data from undo record
-		associated with current cluster index */
-
-		cur_vrow = row_vers_build_cur_vrow(
-			also_curr, rec, clust_index, &clust_offsets,
-			index, roll_ptr, trx_id, heap, v_heap, mtr);
-	}
-
-	version = rec;
-
-	for (;;) {
-		heap2 = heap;
-		heap = mem_heap_create(1024);
-		vrow = NULL;
-
-		trx_undo_prev_version_build(version,
-					    clust_index, clust_offsets,
-					    heap, &prev_version, nullptr,
-					    dict_index_has_virtual(index)
-					    ? &vrow : nullptr,
-					    TRX_UNDO_CHECK_PURGEABILITY);
-		mem_heap_free(heap2); /* free version and clust_offsets */
-
-		if (!prev_version) {
-			/* Versions end here */
-			mem_heap_free(heap);
-
-			if (v_heap) {
-				mem_heap_free(v_heap);
-			}
-
-			return false;
-		}
-
-		clust_offsets = rec_get_offsets(prev_version, clust_index,
-						NULL,
-						clust_index->n_core_fields,
-						ULINT_UNDEFINED, &heap);
-
-		if (dict_index_has_virtual(index)) {
-			if (vrow) {
-				if (dtuple_vcol_data_missing(*vrow, index)) {
-					goto nochange_index;
-				}
-				/* Keep the virtual row info for the next
-				version, unless it is changed */
-				mem_heap_empty(v_heap);
-				cur_vrow = dtuple_copy(vrow, v_heap);
-				dtuple_dup_v_fld(cur_vrow, v_heap);
-			}
-
-			if (!cur_vrow) {
-				/* Nothing for this index has changed,
-				continue */
-nochange_index:
-				version = prev_version;
-				continue;
-			}
-		}
-
-		if (!rec_get_deleted_flag(prev_version, comp)) {
-			row_ext_t*	ext;
-
-			/* The stack of versions is locked by mtr.
-			Thus, it is safe to fetch the prefixes for
-			externally stored columns. */
-			row = row_build(ROW_COPY_POINTERS, clust_index,
-					prev_version, clust_offsets,
-					NULL, NULL, NULL, &ext, heap);
-
-			if (dict_index_has_virtual(index)) {
-				ut_ad(cur_vrow);
-				ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
-				dtuple_copy_v_fields(row, cur_vrow);
-			}
-
-			entry = row_build_index_entry(row, ext, index, heap);
-
-			/* If entry == NULL, the record contains unset
-			BLOB pointers.  This must be a freshly
-			inserted record that we can safely ignore.
-			For the justification, see the comments after
-			the previous row_build_index_entry() call. */
-
-			/* NOTE that we cannot do the comparison as binary
-			fields because maybe the secondary index record has
-			already been updated to a different binary value in
-			a char field, but the collation identifies the old
-			and new value anyway! */
-
-			if (entry && dtuple_coll_eq(*ientry, *entry)) {
-				goto unsafe_to_purge;
-			}
-		}
-
-		version = prev_version;
-	}
-}
-
 /*****************************************************************//**
 Constructs the version of a clustered index record which a consistent
 read should see. We assume that the trx id stored in rec is such that
@ -1223,7 +752,7 @@ row_vers_build_for_consistent_read(

 		err = trx_undo_prev_version_build(
 			version, index, *offsets, heap,
-			&prev_version, NULL, vrow, 0);
+			&prev_version, mtr, 0, NULL, vrow);

 		if (prev_heap != NULL) {
 			mem_heap_free(prev_heap);
@ -1385,8 +914,8 @@ committed_version_trx:
 		heap = mem_heap_create(1024);

 		if (trx_undo_prev_version_build(version, index, *offsets, heap,
-						&prev_version, in_heap, vrow,
-						0) != DB_SUCCESS) {
+						&prev_version, mtr, 0,
+						in_heap, vrow) != DB_SUCCESS) {
 			mem_heap_free(heap);
 			heap = heap2;
 			heap2 = NULL;
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@ -1138,10 +1138,9 @@ bool purge_sys_t::running()

 void purge_sys_t::stop_FTS()
 {
-  latch.rd_lock(SRW_LOCK_CALL);
-  m_FTS_paused++;
-  latch.rd_unlock();
-  while (m_active)
+  ut_d(const auto paused=) m_FTS_paused.fetch_add(1);
+  ut_ad((paused + 1) & ~PAUSED_SYS);
+  while (m_active.load(std::memory_order_acquire))
    std::this_thread::sleep_for(std::chrono::seconds(1));
 }

@ -1175,8 +1174,8 @@ void purge_sys_t::stop()
 /** Resume purge in data dictionary tables */
 void purge_sys_t::resume_SYS(void *)
 {
-  ut_d(auto paused=) purge_sys.m_SYS_paused--;
-  ut_ad(paused);
+  ut_d(auto paused=) purge_sys.m_FTS_paused.fetch_sub(PAUSED_SYS);
+  ut_ad(paused >= PAUSED_SYS);
 }

 /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
@ -1346,7 +1345,6 @@ static bool srv_purge_should_exit(size_t old_history_size)

 /*********************************************************************//**
 Fetch and execute a task from the work queue.
-@param [in,out]	slot	purge worker thread slot
@return true if a task was executed */
 static bool srv_task_execute()
 {
@ -1487,6 +1485,13 @@ static void release_thd(THD *thd, void *ctx)
 	set_current_thd(0);
 }

+void srv_purge_worker_task_low()
+{
+  ut_ad(current_thd);
+  while (srv_task_execute())
+    ut_ad(purge_sys.running());
+}
+
 static void purge_worker_callback(void*)
 {
  ut_ad(!current_thd);
@ -1494,8 +1499,7 @@ static void purge_worker_callback(void*)
  ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
  void *ctx;
  THD *thd= acquire_thd(&ctx);
-  while (srv_task_execute())
-    ut_ad(purge_sys.running());
+  srv_purge_worker_task_low();
  release_thd(thd,ctx);
 }

--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@ -774,26 +774,18 @@ not_free:

 buf_block_t *purge_sys_t::get_page(page_id_t id)
 {
+  ut_ad(!recv_sys.recovery_on);
+
  buf_block_t*& undo_page= pages[id];

-  if (undo_page)
-    return undo_page;
-
-  mtr_t mtr;
-  mtr.start();
-  undo_page=
-    buf_page_get_gen(id, 0, RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr);
-
-  if (UNIV_LIKELY(undo_page != nullptr))
+  if (!undo_page)
  {
-    undo_page->fix();
-    mtr.commit();
-    return undo_page;
+    undo_page= buf_pool.page_fix(id); // batch_cleanup() will unfix()
+    if (!undo_page)
+      pages.erase(id);
  }

-  mtr.commit();
-  pages.erase(id);
-  return nullptr;
+  return undo_page;
 }

 bool purge_sys_t::rseg_get_next_history_log()
@ -1062,15 +1054,8 @@ static void trx_purge_close_tables(purge_node_t *node, THD *thd)

 void purge_sys_t::wait_FTS(bool also_sys)
 {
-  bool paused;
-  do
-  {
-    latch.wr_lock(SRW_LOCK_CALL);
-    paused= m_FTS_paused || (also_sys && m_SYS_paused);
-    latch.wr_unlock();
+  for (const uint32_t mask= also_sys ? ~0U : ~PAUSED_SYS; m_FTS_paused & mask;)
    std::this_thread::sleep_for(std::chrono::milliseconds(10));
-  }
-  while (paused);
 }

 __attribute__((nonnull))
@ -1211,123 +1196,108 @@ dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd,

 /** Run a purge batch.
@param n_purge_threads	number of purge threads
+@param thd              purge coordinator thread handle
+@param n_work_items     number of work items (currently tables) to process
@return new purge_sys.head */
-static purge_sys_t::iterator
-trx_purge_attach_undo_recs(ulint n_purge_threads, THD *thd)
+static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd,
+                                                        ulint *n_work_items)
 {
-	que_thr_t*	thr;
-	ulint		i;
+  que_thr_t *thr;
+  purge_sys_t::iterator head= purge_sys.tail;

-	ut_a(n_purge_threads > 0);
-	ut_a(UT_LIST_GET_LEN(purge_sys.query->thrs) >= n_purge_threads);
+  /* Fetch and parse the UNDO records. The UNDO records are added
+  to a per purge node vector. */
+  thr= nullptr;

-	purge_sys_t::iterator head = purge_sys.tail;
+  std::unordered_map<table_id_t, purge_node_t *>
+    table_id_map(TRX_PURGE_TABLE_BUCKETS);
+  purge_sys.m_active= true;
+
+  MDL_context *const mdl_context=
+    static_cast<MDL_context*>(thd_mdl_context(thd));
+  ut_ad(mdl_context);
+
+  const size_t max_pages=
+    std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size});
+
+  while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown)
+  {
+    /* Track the max {trx_id, undo_no} for truncating the
+    UNDO logs once we have purged the records. */
+
+    if (head <= purge_sys.tail)
+      head= purge_sys.tail;
+
+    /* Fetch the next record, and advance the purge_sys.tail. */
+    trx_purge_rec_t purge_rec= purge_sys.fetch_next_rec();
+
+    if (!purge_rec.undo_rec)
+    {
+      if (!purge_rec.roll_ptr)
+        break;
+      ut_ad(purge_rec.roll_ptr == 1);
+      continue;
+    }
+
+    table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec);
+
+    purge_node_t *&table_node= table_id_map[table_id];
+    if (table_node)
+      ut_ad(!table_node->in_progress);
+    if (!table_node)
+    {
+      std::pair<dict_table_t *, MDL_ticket *> p;
+      p.first= trx_purge_table_open(table_id, mdl_context, &p.second);
+      if (p.first == reinterpret_cast<dict_table_t *>(-1))
+        p.first= purge_sys.close_and_reopen(table_id, thd, &p.second);
+
+      if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr)))
+        thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
+      ++*n_work_items;
+      table_node= static_cast<purge_node_t *>(thr->child);
+
+      ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE);
+      ut_d(auto pair=) table_node->tables.emplace(table_id, p);
+      ut_ad(pair.second);
+      if (p.first)
+        goto enqueue;
+    }
+    else if (table_node->tables[table_id].first)
+    {
+    enqueue:
+      table_node->undo_recs.push(purge_rec);
+      ut_ad(!table_node->in_progress);
+    }
+
+    if (purge_sys.n_pages_handled() >= max_pages)
+      break;
+  }
+
+  purge_sys.m_active= false;

 #ifdef UNIV_DEBUG
-	i = 0;
-	/* Debug code to validate some pre-requisites and reset done flag. */
-	for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
-	     thr != NULL && i < n_purge_threads;
-	     thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
+  thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
+  for (ulint i= 0; thr && i < *n_work_items;
+       i++, thr= UT_LIST_GET_NEXT(thrs, thr))
+  {
+    purge_node_t *node= static_cast<purge_node_t*>(thr->child);
+    ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
+    ut_ad(!node->in_progress);
+    node->in_progress= true;
+  }

-		purge_node_t*		node;
-
-		/* Get the purge node. */
-		node = (purge_node_t*) thr->child;
-
-		ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-		ut_ad(node->undo_recs.empty());
-		ut_ad(!node->in_progress);
-		ut_d(node->in_progress = true);
-	}
-
-	/* There should never be fewer nodes than threads, the inverse
-	however is allowed because we only use purge threads as needed. */
-	ut_ad(i == n_purge_threads);
+  for (; thr; thr= UT_LIST_GET_NEXT(thrs, thr))
+  {
+    purge_node_t *node= static_cast<purge_node_t*>(thr->child);
+    ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
+    ut_ad(!node->in_progress);
+    ut_ad(node->undo_recs.empty());
+  }
 #endif

-	/* Fetch and parse the UNDO records. The UNDO records are added
-	to a per purge node vector. */
-	thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
+  ut_ad(head <= purge_sys.tail);

-	ut_ad(head <= purge_sys.tail);
-
-	i = 0;
-
-	std::unordered_map<table_id_t, purge_node_t*>
-		table_id_map(TRX_PURGE_TABLE_BUCKETS);
-	purge_sys.m_active = true;
-
-	MDL_context* const mdl_context
-		= static_cast<MDL_context*>(thd_mdl_context(thd));
-	ut_ad(mdl_context);
-
-	const size_t max_pages = std::min(buf_pool.curr_size * 3 / 4,
-					  size_t{srv_purge_batch_size});
-
-	while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) {
-		/* Track the max {trx_id, undo_no} for truncating the
-		UNDO logs once we have purged the records. */
-
-		if (head <= purge_sys.tail) {
-			head = purge_sys.tail;
-		}
-
-		/* Fetch the next record, and advance the purge_sys.tail. */
-		trx_purge_rec_t purge_rec = purge_sys.fetch_next_rec();
-
-		if (!purge_rec.undo_rec) {
-			if (!purge_rec.roll_ptr) {
-				break;
-			}
-			ut_ad(purge_rec.roll_ptr == 1);
-			continue;
-		}
-
-		table_id_t table_id = trx_undo_rec_get_table_id(
-			purge_rec.undo_rec);
-
-		purge_node_t*& table_node = table_id_map[table_id];
-
-		if (!table_node) {
-			std::pair<dict_table_t*,MDL_ticket*> p;
-			p.first = trx_purge_table_open(table_id, mdl_context,
-						       &p.second);
-			if (p.first == reinterpret_cast<dict_table_t*>(-1)) {
-				p.first = purge_sys.close_and_reopen(
-					table_id, thd, &p.second);
-			}
-
-			thr = UT_LIST_GET_NEXT(thrs, thr);
-
-			if (!(++i % n_purge_threads)) {
-				thr = UT_LIST_GET_FIRST(
-					purge_sys.query->thrs);
-			}
-
-			table_node = static_cast<purge_node_t*>(thr->child);
-			ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE);
-			ut_d(auto i=)
-			table_node->tables.emplace(table_id, p);
-			ut_ad(i.second);
-			if (p.first) {
-				goto enqueue;
-			}
-		} else if (table_node->tables[table_id].first) {
-enqueue:
-			table_node->undo_recs.push(purge_rec);
-		}
-
-		if (purge_sys.n_pages_handled() >= max_pages) {
-			break;
-		}
-	}
-
-	purge_sys.m_active = false;
-
-	ut_ad(head <= purge_sys.tail);
-
-	return head;
+  return head;
 }

 extern tpool::waitable_task purge_worker_task;
@ -1385,68 +1355,89 @@ Run a purge batch.
@return number of undo log pages handled in the batch */
 TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size)
 {
-	ut_ad(n_tasks > 0);
+  ut_ad(n_tasks > 0);

-	purge_sys.clone_oldest_view();
+  purge_sys.clone_oldest_view();

-#ifdef UNIV_DEBUG
-	if (srv_purge_view_update_only_debug) {
-		return(0);
-	}
-#endif /* UNIV_DEBUG */
+  ut_d(if (srv_purge_view_update_only_debug) return 0);

-	THD* const thd = current_thd;
+  THD *const thd= current_thd;

-	/* Fetch the UNDO recs that need to be purged. */
-	const purge_sys_t::iterator head
-		=  trx_purge_attach_undo_recs(n_tasks, thd);
-	const size_t n_pages = purge_sys.n_pages_handled();
+  /* Fetch the UNDO recs that need to be purged. */
+  ulint n_work= 0;
+  const purge_sys_t::iterator head= trx_purge_attach_undo_recs(thd, &n_work);
+  const size_t n_pages= purge_sys.n_pages_handled();

-	{
-		ulint delay = n_pages ? srv_max_purge_lag : 0;
-		if (UNIV_UNLIKELY(delay)) {
-			if (delay >= history_size) {
-		no_throttle:
-				delay = 0;
-			} else if (const ulint max_delay =
-				   srv_max_purge_lag_delay) {
-				delay = std::min(max_delay,
-						 10000 * history_size / delay
-						 - 5000);
-			} else {
-				goto no_throttle;
-			}
-		}
-		srv_dml_needed_delay = delay;
-	}
+  {
+    ulint delay= n_pages ? srv_max_purge_lag : 0;
+    if (UNIV_UNLIKELY(delay))
+    {
+      if (delay >= history_size)
+      no_throttle:
+        delay= 0;
+      else if (const ulint max_delay= srv_max_purge_lag_delay)
+        delay= std::min(max_delay, 10000 * history_size / delay - 5000);
+      else
+        goto no_throttle;
+    }
+    srv_dml_needed_delay= delay;
+  }

-	que_thr_t* thr = nullptr;
+  ut_ad(n_tasks);
+  que_thr_t *thr= nullptr;

-	/* Submit tasks to workers queue if using multi-threaded purge. */
-	for (ulint i = n_tasks; --i; ) {
-		thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
-		ut_a(thr);
-		srv_que_task_enqueue_low(thr);
-		srv_thread_pool->submit_task(&purge_worker_task);
-	}
+  if (n_work)
+  {
+    for (auto i= n_work; i--; )
+    {
+      if (!thr)
+	thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
+      else
+	thr= UT_LIST_GET_NEXT(thrs, thr);

-	thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
+      if (!thr)
+	break;

-	que_run_threads(thr);
+      ut_ad(thr->state == QUE_THR_COMPLETED);
+      thr->state= QUE_THR_RUNNING;
+      thr->run_node= thr;
+      thr->prev_node= thr->common.parent;
+      purge_sys.query->state= QUE_FORK_ACTIVE;
+      purge_sys.query->last_sel_node= nullptr;
+      srv_que_task_enqueue_low(thr);
+    }

-	trx_purge_wait_for_workers_to_complete();
+    /*
+      To reduce context switches we only submit at most n_tasks-1 worker task.
+      (we can use less tasks, if there is not enough work)

-	for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs); thr;
-	     thr = UT_LIST_GET_NEXT(thrs, thr)) {
-		purge_node_t* node = static_cast<purge_node_t*>(thr->child);
-		trx_purge_close_tables(node, thd);
-		node->tables.clear();
-	}
+      The coordinator does worker's job, instead of waiting and sitting idle,
+      then waits for all others to finish.

-	purge_sys.batch_cleanup(head);
+      This also means if innodb_purge_threads=1, the coordinator does all
+      the work alone.
+    */
+    const ulint workers{std::min(n_work, n_tasks) - 1};
+    for (ulint i= 0; i < workers; i++)
+      srv_thread_pool->submit_task(&purge_worker_task);
+    srv_purge_worker_task_low();

-	MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
-	MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages);
+    if (workers)
+      trx_purge_wait_for_workers_to_complete();

-	return n_pages;
+    for (thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr && n_work--;
+         thr= UT_LIST_GET_NEXT(thrs, thr))
+    {
+      purge_node_t *node= static_cast<purge_node_t*>(thr->child);
+      trx_purge_close_tables(node, thd);
+      node->tables.clear();
+    }
+  }
+
+  purge_sys.batch_cleanup(head);
+
+  MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
+  MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages);
+
+  return n_pages;
 }
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@ -2045,170 +2045,128 @@ err_exit:

 /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/

-/** Copy an undo record to heap.
-@param[in]	roll_ptr	roll pointer to a record that exists
-@param[in,out]	heap		memory heap where copied */
-static
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-	roll_ptr_t		roll_ptr,
-	mem_heap_t*		heap)
+static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
+                                     rec_offs *offsets, mem_heap_t *heap,
+                                     rec_t **old_vers, mem_heap_t *v_heap,
+                                     dtuple_t **vrow, ulint v_status,
+                                     const trx_undo_rec_t *undo_rec);
+
+inline const buf_block_t *
+purge_sys_t::view_guard::get(const page_id_t id, mtr_t *mtr)
 {
-  ulint rseg_id;
-  uint32_t page_no;
-  uint16_t offset;
-  bool is_insert;
-  mtr_t mtr;
-
-  trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, &offset);
-  ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
-  ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-  trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id];
-  ut_ad(rseg->is_persistent());
-
-  mtr.start();
-
-  trx_undo_rec_t *undo_rec= nullptr;
-  if (buf_block_t* undo_page=
-      buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr))
+  buf_block_t *block;
+  ut_ad(mtr->is_active());
+  if (!latch)
  {
-    buf_page_make_young_if_needed(&undo_page->page);
-    undo_rec= undo_page->page.frame + offset;
-    const size_t end= mach_read_from_2(undo_rec);
-    if (UNIV_UNLIKELY(end <= offset ||
-                      end >= srv_page_size - FIL_PAGE_DATA_END))
-      undo_rec= nullptr;
-    else
+    decltype(purge_sys.pages)::const_iterator i= purge_sys.pages.find(id);
+    if (i != purge_sys.pages.end())
    {
-      size_t len{end - offset};
-      undo_rec=
-        static_cast<trx_undo_rec_t*>(mem_heap_dup(heap, undo_rec, len));
-      mach_write_to_2(undo_rec, len);
+      block= i->second;
+      ut_ad(block);
+      return block;
    }
  }
-
-  mtr.commit();
-  return undo_rec;
-}
-
-/** Copy an undo record to heap, to check if a secondary index record
-can be safely purged.
-@param trx_id   DB_TRX_ID corresponding to roll_ptr
-@param name     table name
-@param roll_ptr	DB_ROLL_PTR pointing to the undo log record
-@param heap     memory heap for allocation
-@return copy of the record
-@retval nullptr if the version is visible to purge_sys.view */
-static trx_undo_rec_t *trx_undo_get_rec_if_purgeable(trx_id_t trx_id,
-                                                     const table_name_t &name,
-                                                     roll_ptr_t roll_ptr,
-                                                     mem_heap_t* heap)
-{
+  block= buf_pool.page_fix(id);
+  if (block)
  {
-    purge_sys_t::view_guard check;
-    if (!check.view().changes_visible(trx_id))
-      return trx_undo_get_undo_rec_low(roll_ptr, heap);
+    mtr->memo_push(block, MTR_MEMO_BUF_FIX);
+    if (latch)
+      /* In MVCC operations (outside purge tasks), we will refresh the
+      buf_pool.LRU position. In purge, we expect the page to be freed
+      soon, at the end of the current batch. */
+      buf_page_make_young_if_needed(&block->page);
  }
-  return nullptr;
-}
-
-/** Copy an undo record to heap.
-@param trx_id   DB_TRX_ID corresponding to roll_ptr
-@param name     table name
-@param roll_ptr	DB_ROLL_PTR pointing to the undo log record
-@param heap     memory heap for allocation
-@return copy of the record
-@retval nullptr if the undo log is not available */
-static trx_undo_rec_t *trx_undo_get_undo_rec(trx_id_t trx_id,
-                                             const table_name_t &name,
-                                             roll_ptr_t roll_ptr,
-                                             mem_heap_t *heap)
-{
-  {
-    purge_sys_t::end_view_guard check;
-    if (!check.view().changes_visible(trx_id))
-      return trx_undo_get_undo_rec_low(roll_ptr, heap);
-  }
-  return nullptr;
+  return block;
 }

 /** Build a previous version of a clustered index record. The caller
 must hold a latch on the index page of the clustered index record.
-@param	rec		version of a clustered index record
-@param	index		clustered index
-@param	offsets		rec_get_offsets(rec, index)
-@param	heap		memory heap from which the memory needed is
-			allocated
-@param	old_vers	previous version or NULL if rec is the
-			first inserted version, or if history data
-			has been deleted (an error), or if the purge
-			could have removed the version
-			though it has not yet done so
-@param	v_heap		memory heap used to create vrow
-			dtuple if it is not yet created. This heap
-			diffs from "heap" above in that it could be
-			prebuilt->old_vers_heap for selection
-@param	v_row		virtual column info, if any
-@param	v_status	status determine if it is going into this
-			function by purge thread or not.
-			And if we read "after image" of undo log
-@param	undo_block	undo log block which was cached during
-			online dml apply or nullptr
+@param rec       version of a clustered index record
+@param index     clustered index
+@param offsets   rec_get_offsets(rec, index)
+@param heap      memory heap from which the memory needed is allocated
+@param old_vers  previous version, or NULL if rec is the first inserted
+                 version, or if history data has been deleted (an error),
+                 or if the purge could have removed the version though
+                 it has not yet done so
+@param mtr       mini-transaction
+@param v_status  TRX_UNDO_PREV_IN_PURGE, ...
+@param v_heap    memory heap used to create vrow dtuple if it is not yet
+                 created. This heap diffs from "heap" above in that it could be
+                 prebuilt->old_vers_heap for selection
+@param vrow      virtual column info, if any
@return error code
@retval DB_SUCCESS if previous version was successfully built,
 or if it was an insert or the undo record refers to the table before rebuild
@retval DB_MISSING_HISTORY if the history is missing */
 TRANSACTIONAL_TARGET
-dberr_t
-trx_undo_prev_version_build(
-	const rec_t 	*rec,
-	dict_index_t	*index,
-	rec_offs	*offsets,
-	mem_heap_t	*heap,
-	rec_t		**old_vers,
-	mem_heap_t	*v_heap,
-	dtuple_t	**vrow,
-	ulint		v_status)
+dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index,
+                                    rec_offs *offsets, mem_heap_t *heap,
+                                    rec_t **old_vers, mtr_t *mtr,
+                                    ulint v_status,
+                                    mem_heap_t *v_heap, dtuple_t **vrow)
 {
-	dtuple_t*	entry;
-	trx_id_t	rec_trx_id;
-	undo_no_t	undo_no;
-	table_id_t	table_id;
-	trx_id_t	trx_id;
-	roll_ptr_t	roll_ptr;
-	upd_t*		update;
-	byte		type;
-	byte		info_bits;
-	byte		cmpl_info;
-	bool		dummy_extern;
-	byte*		buf;
+  ut_ad(!index->table->is_temporary());
+  ut_ad(rec_offs_validate(rec, index, offsets));

-	ut_ad(!index->table->is_temporary());
-	ut_ad(rec_offs_validate(rec, index, offsets));
+  const roll_ptr_t roll_ptr= row_get_rec_roll_ptr(rec, index, offsets);
+  *old_vers= nullptr;

-	roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
+  if (trx_undo_roll_ptr_is_insert(roll_ptr))
+    /* The record rec is the first inserted version */
+    return DB_SUCCESS;

-	*old_vers = NULL;
+  ut_ad(roll_ptr < 1ULL << 55);
+  ut_ad(uint16_t(roll_ptr) >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
+  ut_ad(uint32_t(roll_ptr >> 16) >= FSP_FIRST_INODE_PAGE_NO);

-	if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-		/* The record rec is the first inserted version */
-		return DB_SUCCESS;
-	}
+  const trx_id_t rec_trx_id= row_get_rec_trx_id(rec, index, offsets);

-	mariadb_increment_undo_records_read();
-	rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
+  ut_ad(!index->table->skip_alter_undo);

-	ut_ad(!index->table->skip_alter_undo);
+  mariadb_increment_undo_records_read();
+  const auto savepoint= mtr->get_savepoint();
+  dberr_t err= DB_MISSING_HISTORY;
+  purge_sys_t::view_guard check{v_status == TRX_UNDO_CHECK_PURGE_PAGES
+                                ? purge_sys_t::view_guard::PURGE
+                                : v_status == TRX_UNDO_CHECK_PURGEABILITY
+                                ? purge_sys_t::view_guard::VIEW
+                                : purge_sys_t::view_guard::END_VIEW};
+  if (!check.view().changes_visible(rec_trx_id))
+  {
+    trx_undo_rec_t *undo_rec= nullptr;
+    static_assert(ROLL_PTR_RSEG_ID_POS == 48, "");
+    static_assert(ROLL_PTR_PAGE_POS == 16, "");
+    if (const buf_block_t *undo_page=
+        check.get(page_id_t{trx_sys.rseg_array[(roll_ptr >> 48) & 0x7f].
+                            space->id,
+                            uint32_t(roll_ptr >> 16)}, mtr))
+    {
+      static_assert(ROLL_PTR_BYTE_POS == 0, "");
+      const uint16_t offset{uint16_t(roll_ptr)};
+      undo_rec= undo_page->page.frame + offset;
+      const size_t end= mach_read_from_2(undo_rec);
+      if (UNIV_UNLIKELY(end > offset &&
+                        end < srv_page_size - FIL_PAGE_DATA_END))
+        err= trx_undo_prev_version(rec, index, offsets, heap,
+                                   old_vers, v_heap, vrow, v_status, undo_rec);
+    }
+  }

-	trx_undo_rec_t*	undo_rec = v_status == TRX_UNDO_CHECK_PURGEABILITY
-		? trx_undo_get_rec_if_purgeable(rec_trx_id, index->table->name,
-						roll_ptr, heap)
-		: trx_undo_get_undo_rec(rec_trx_id, index->table->name,
-					roll_ptr, heap);
-	if (!undo_rec) {
-		return DB_MISSING_HISTORY;
-	}
+  mtr->rollback_to_savepoint(savepoint);
+  return err;
+}

+static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
+                                     rec_offs *offsets, mem_heap_t *heap,
+                                     rec_t **old_vers, mem_heap_t *v_heap,
+                                     dtuple_t **vrow, ulint v_status,
+                                     const trx_undo_rec_t *undo_rec)
+{
+	byte type, cmpl_info;
+	bool dummy_extern;
+	undo_no_t undo_no;
+	table_id_t table_id;
 	const byte *ptr =
 		trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
 				      &dummy_extern, &undo_no, &table_id);
@ -2220,6 +2178,10 @@ trx_undo_prev_version_build(
 		return DB_SUCCESS;
 	}

+	trx_id_t trx_id;
+	roll_ptr_t roll_ptr;
+	byte info_bits;
+
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
 					       &info_bits);

@ -2247,10 +2209,12 @@ trx_undo_prev_version_build(

 	ptr = trx_undo_rec_skip_row_ref(ptr, index);

+	upd_t* update;
 	ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
 					     roll_ptr, info_bits,
 					     heap, &update);
 	ut_a(ptr);
+	byte* buf;

 	if (row_upd_changes_field_size_or_external(index, offsets, update)) {
 		/* We should confirm the existence of disowned external data,
@ -2276,9 +2240,10 @@ trx_undo_prev_version_build(
 		those fields that update updates to become externally stored
 		fields. Store the info: */

-		entry = row_rec_to_index_entry(rec, index, offsets, heap);
+		dtuple_t* entry = row_rec_to_index_entry(rec, index, offsets,
+							 heap);
 		/* The page containing the clustered index record
-		corresponding to entry is latched in mtr.  Thus the
+		corresponding to entry is latched.  Thus the
 		following call is safe. */
 		if (!row_upd_index_replace_new_col_vals(entry, *index, update,
 							heap)) {
--- a/storage/innobase/unittest/innodb_sync-t.cc
+++ b/storage/innobase/unittest/innodb_sync-t.cc
@ -92,6 +92,25 @@ static void test_ssux_lock()
      ssux.wr_u_downgrade();
      ssux.u_unlock();
    }
+
+    for (auto j= M_ROUNDS; j--; )
+    {
+      ssux.rd_lock();
+      assert(!critical);
+      if (ssux.rd_u_upgrade_try())
+      {
+        assert(!critical);
+        ssux.rd_unlock();
+        ssux.u_wr_upgrade();
+        assert(!critical);
+        critical= true;
+        critical= false;
+        ssux.wr_u_downgrade();
+        ssux.u_rd_downgrade();
+      }
+      assert(!critical);
+      ssux.rd_unlock();
+    }
  }
 }

@ -129,6 +148,14 @@ static void test_sux_lock()
      critical= false;
      sux.x_u_downgrade();
      sux.u_unlock();
+      sux.s_lock();
+      std::ignore= sux.s_x_upgrade();
+      assert(!critical);
+      sux.x_lock();
+      critical= true;
+      sux.x_unlock();
+      critical= false;
+      sux.x_unlock();
    }
  }
 }
--- a/storage/maria/ma_bitmap.c
+++ b/storage/maria/ma_bitmap.c
@ -3077,21 +3077,25 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
 int _ma_bitmap_create_first(MARIA_SHARE *share)
 {
  uint block_size= share->bitmap.block_size;
+  size_t error;
  File file= share->bitmap.file.file;
-  uchar marker[CRC_SIZE];
+  uchar *temp_buff;
+
+  if (!(temp_buff= (uchar*) my_alloca(block_size)))
+    return 1;
+  bzero(temp_buff, block_size);

  /*
    Next write operation of the page will write correct CRC
    if it is needed
  */
-  int4store(marker, MARIA_NO_CRC_BITMAP_PAGE);
+  int4store(temp_buff + block_size - CRC_SIZE, MARIA_NO_CRC_BITMAP_PAGE);

-  if (mysql_file_chsize(file, block_size - sizeof(marker),
-                        0, MYF(MY_WME)) > 0 ||
-      my_pwrite(file, marker, sizeof(marker),
-                block_size - sizeof(marker),
-                MYF(MY_NABP | MY_WME)))
+  error= my_pwrite(file, temp_buff, block_size, 0, MYF(MY_NABP | MY_WME));
+  my_afree(temp_buff);
+  if (error)
    return 1;
+
  share->state.state.data_file_length= block_size;
  _ma_bitmap_delete_all(share);
  return 0;
--- a/storage/maria/ma_check.c
+++ b/storage/maria/ma_check.c
@ -420,6 +420,8 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
    /* We cannot check file sizes for S3 */
    DBUG_RETURN(0);
  }
+  /* We should never come here with internal temporary tables */
+  DBUG_ASSERT(!share->internal_table);

  if (!(param->testflag & T_SILENT))
    puts("- check file-size");
@ -715,6 +717,8 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
  MARIA_PAGE ma_page;
  DBUG_ENTER("chk_index_down");

+  DBUG_ASSERT(!share->internal_table);
+
  /* Key blocks must lay within the key file length entirely. */
  if (page + keyinfo->block_length > share->state.state.key_file_length)
  {
@ -2467,7 +2471,16 @@ static int initialize_variables_for_repair(HA_CHECK *param,
    return 1;

  /* calculate max_records */
-  sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+  if (!share->internal_table)
+  {
+    /* Get real file size */
+    sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
+  }
+  else
+  {
+    /* For internal temporary files we are using the logical file length */
+    sort_info->filelength= share->state.state.data_file_length;
+  }

  param->max_progress= sort_info->filelength;
  if ((param->testflag & T_CREATE_MISSING_KEYS) ||
@ -2865,7 +2878,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
  {
    fputs("          \r",stdout); fflush(stdout);
  }
-  if (mysql_file_chsize(share->kfile.file,
+  if (!share->internal_table &&
+      mysql_file_chsize(share->kfile.file,
                        share->state.state.key_file_length, 0, MYF(0)) > 0)
  {
    _ma_check_print_warning(param,
@ -4184,7 +4198,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
  if (param->testflag & T_CALC_CHECKSUM)
    share->state.state.checksum=param->glob_crc;

-  if (mysql_file_chsize(share->kfile.file,
+  if (!share->internal_table &&
+      mysql_file_chsize(share->kfile.file,
                        share->state.state.key_file_length, 0, MYF(0)) > 0)
    _ma_check_print_warning(param,
 			   "Can't change size of indexfile, error: %d",
@ -4733,7 +4748,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
  if (param->testflag & T_CALC_CHECKSUM)
    share->state.state.checksum=param->glob_crc;

-  if (mysql_file_chsize(share->kfile.file,
+  if (!share->internal_table &&
+      mysql_file_chsize(share->kfile.file,
                        share->state.state.key_file_length, 0, MYF(0)) > 0)
    _ma_check_print_warning(param,
 			   "Can't change size of indexfile, error: %d",
@ -6145,6 +6161,8 @@ int maria_test_if_almost_full(MARIA_HA *info)
 {
  MARIA_SHARE *share= info->s;

+  DBUG_ASSERT(!share->internal_table);
+
  if (share->options & HA_OPTION_COMPRESS_RECORD)
    return 0;
  return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,
--- a/storage/maria/ma_delete_all.c
+++ b/storage/maria/ma_delete_all.c
@ -129,11 +129,17 @@ int maria_delete_all_rows(MARIA_HA *info)
    _ma_unmap_file(info);
 #endif

-  if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX,
+  if (share->internal_table)
+    /*
+      Avoid truncate of internal temporary tables as this can have a big
+      performance overhead when called by mysql_handle_single_derived()
+      tables in MariaDB as part of split materialization.
+    */;
+  else if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX,
                            FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED) ||
-      mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 ||
-      mysql_file_chsize(share->kfile.file, share->base.keystart, 0,
-                        MYF(MY_WME)) > 0)
+           mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 ||
+           mysql_file_chsize(share->kfile.file, share->base.keystart, 0,
+                             MYF(MY_WME)) > 0)
    goto err;

  if (info->s->tracked)
--- a/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result
+++ b/storage/spider/mysql-test/spider/feature/r/engine_defined_attributes.result
@ -237,7 +237,7 @@ set session spider_suppress_comment_ignored_warning=0;
 CREATE TABLE tbl_a (a INT) ENGINE=Spider DEFAULT CHARSET=utf8
 REMOTE_TABLE=t CONNECTION="srv s_2_1";
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 drop table tbl_a;
 CREATE TABLE tbl_a (
 a INT,
@ -411,7 +411,7 @@ PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
 Warnings:
 Warning	138	Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead.
 Warning	138	Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead.
-Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
 ERROR HY000: Remote table 'auto_test_remote2.tbl_a#P#p2' is not found
 DROP TABLE tbl_a;
@ -427,9 +427,9 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_1"',
 PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
 );
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
-Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
-Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
 ERROR HY000: Unable to connect to foreign data source: localhost
 DROP TABLE tbl_a;
@ -447,10 +447,10 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2"',
 PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
 );
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 connection child2_1;
 SELECT * FROM tbl_a;
 a	b
@ -477,7 +477,7 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2" read_only_mode "0"',
 PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" READ_ONLY=NO
 );
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb");
 ERROR HY000: Table 'auto_test_local.tbl_a' is read only
 INSERT INTO tbl_a VALUES (3, "ccc"), (4, "ddd");
@ -500,7 +500,7 @@ PRIMARY KEY(a)
 ) ENGINE=Spider DEFAULT CHARSET=utf8
 REMOTE_SERVER="s_2_1" COMMENT='tbl "tbl_b"' REMOTE_TABLE="tbl_a";
 Warnings:
-Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
+Warning	12529	The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
 select table_name, server, tgt_table_name from mysql.spider_tables;
 table_name	server	tgt_table_name
 tbl_a	s_2_1	tbl_a
--- a/storage/spider/spd_conn.cc
+++ b/storage/spider/spd_conn.cc
@ -402,7 +402,7 @@ SPIDER_CONN *spider_create_conn(
  char *tmp_name, *tmp_host, *tmp_username, *tmp_password, *tmp_socket;
  char *tmp_wrapper, *tmp_db, *tmp_ssl_ca, *tmp_ssl_capath, *tmp_ssl_cert;
  char *tmp_ssl_cipher, *tmp_ssl_key, *tmp_default_file, *tmp_default_group;
-  char *tmp_dsn, *tmp_filedsn, *tmp_driver;
+  char *tmp_dsn, *tmp_filedsn, *tmp_driver, *tmp_odbc_conn_str;
  DBUG_ENTER("spider_create_conn");

  if (unlikely(!UTC))
@ -454,6 +454,8 @@ SPIDER_CONN *spider_create_conn(
          (uint) (share->tgt_filedsns_lengths[link_idx] + 1),
        &tmp_driver,
          (uint) (share->tgt_drivers_lengths[link_idx] + 1),
+        &tmp_odbc_conn_str,
+          (uint) (share->tgt_odbc_conn_str_length + 1),
        &need_mon, (uint) (sizeof(int)),
        NullS))
    ) {
@ -529,6 +531,10 @@ SPIDER_CONN *spider_create_conn(
    spider_memcpy_or_null(&conn->tgt_driver, tmp_driver, share->tgt_drivers[link_idx],
                          &conn->tgt_driver_length,
                          share->tgt_drivers_lengths[link_idx]);
+    spider_memcpy_or_null(&conn->tgt_odbc_conn_str, tmp_odbc_conn_str,
+                          share->tgt_odbc_conn_str,
+                          &conn->tgt_odbc_conn_str_length,
+                          share->tgt_odbc_conn_str_length);
    conn->tgt_port = share->tgt_ports[link_idx];
    conn->tgt_ssl_vsc = share->tgt_ssl_vscs[link_idx];
    conn->dbton_id = share->sql_dbton_ids[link_idx];
--- a/storage/spider/spd_db_conn.cc
+++ b/storage/spider/spd_db_conn.cc
@ -1485,6 +1485,7 @@ int spider_db_append_key_hint(
    if (str->reserve(
          hint_str_len - 2 + SPIDER_SQL_INDEX_USE_LEN +
          SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN))
+      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    hint_str += 2;
    str->q_append(SPIDER_SQL_INDEX_USE_STR, SPIDER_SQL_INDEX_USE_LEN);
    str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
@ -1497,10 +1498,11 @@ int spider_db_append_key_hint(
    if (str->reserve(
          hint_str_len - 3 + SPIDER_SQL_INDEX_IGNORE_LEN +
          SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN))
+      DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    hint_str += 3;
    str->q_append(SPIDER_SQL_INDEX_IGNORE_STR, SPIDER_SQL_INDEX_IGNORE_LEN);
    str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
-    str->q_append(hint_str, hint_str_len - 2);
+    str->q_append(hint_str, hint_str_len - 3);
    str->q_append(SPIDER_SQL_CLOSE_PAREN_STR, SPIDER_SQL_CLOSE_PAREN_LEN);
  } else if (str->reserve(hint_str_len + SPIDER_SQL_SPACE_LEN))
    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
--- a/storage/spider/spd_db_mysql.cc
+++ b/storage/spider/spd_db_mysql.cc
@ -7624,8 +7624,8 @@ int spider_mbase_share::convert_key_hint_str()
      roop_count < (int) table_share->keys; roop_count++, tmp_key_hint++)
    {
      tmp_key_hint->length(0);
-      if (tmp_key_hint->append(spider_share->key_hint->ptr(),
-        spider_share->key_hint->length(), system_charset_info))
+      if (tmp_key_hint->append(spider_share->key_hint[roop_count].ptr(),
+        spider_share->key_hint[roop_count].length(), system_charset_info))
        DBUG_RETURN(HA_ERR_OUT_OF_MEM);
    }
  } else {
--- a/storage/spider/spd_err.h
+++ b/storage/spider/spd_err.h
@ -73,7 +73,7 @@
 #define ER_SPIDER_INVALID_TABLE_OPTION_NUM 12528
 #define ER_SPIDER_INVALID_TABLE_OPTION_STR "The table option %s=%s is invalid"
 #define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_NUM 12529
-#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified"
+#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified"

 #define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_NUM 12601
 #define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_STR "Can't use both spider_use_consistent_snapshot = 1 and spider_internal_xa = 1"
--- a/storage/spider/spd_include.h
+++ b/storage/spider/spd_include.h
@ -678,6 +678,7 @@ typedef struct st_spider_conn
  char               *tgt_dsn;
  char               *tgt_filedsn;
  char               *tgt_driver;
+  char               *tgt_odbc_conn_str;
  long               tgt_port;
  long               tgt_ssl_vsc;

@ -697,6 +698,7 @@ typedef struct st_spider_conn
  uint               tgt_dsn_length;
  uint               tgt_filedsn_length;
  uint               tgt_driver_length;
+  uint               tgt_odbc_conn_str_length;
  uint               dbton_id;

  volatile
@ -1195,6 +1197,7 @@ typedef struct st_spider_share
  char               **tgt_dsns;
  char               **tgt_filedsns;
  char               **tgt_drivers;
+  char               *tgt_odbc_conn_str;
  char               **static_link_ids;
  char               **tgt_pk_names;
  char               **tgt_sequence_names;
@ -1237,6 +1240,7 @@ typedef struct st_spider_share
  uint               *tgt_dsns_lengths;
  uint               *tgt_filedsns_lengths;
  uint               *tgt_drivers_lengths;
+  uint               tgt_odbc_conn_str_length;
  uint               *static_link_ids_lengths;
  uint               *tgt_pk_names_lengths;
  uint               *tgt_sequence_names_lengths;
--- a/storage/spider/spd_table.cc
+++ b/storage/spider/spd_table.cc
@ -860,6 +860,8 @@ int spider_free_share_alloc(
    }
    spider_free(spider_current_trx, share->tgt_drivers, MYF(0));
  }
+  if (share->tgt_odbc_conn_str)
+    spider_free(spider_current_trx, share->tgt_odbc_conn_str, MYF(0));
  if (share->tgt_pk_names)
  {
    for (roop_count = 0; roop_count < (int) share->tgt_pk_names_length;
@ -2483,9 +2485,6 @@ int st_spider_param_string_parse::fail(bool restore_delim)
 /*
  Parse connection information specified by COMMENT, CONNECT, or engine-defined
  options.
-
-  TODO: Deprecate the connection specification by COMMENT and CONNECT,
-  and then solely utilize engine-defined options.
 */
 int spider_parse_connect_info(
  SPIDER_SHARE *share,
@ -2512,6 +2511,7 @@ int spider_parse_connect_info(
  DBUG_PRINT("info",("spider s->path=%s", table_share->path.str));
  DBUG_PRINT("info",
    ("spider s->normalized_path=%s", table_share->normalized_path.str));
+  parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
  spider_get_partition_info(share->table_name, share->table_name_length,
    table_share, part_info, &part_elem, &sub_elem);
  /* Find the correct table options, depending on if we are parsing a
@ -2600,8 +2600,11 @@ int spider_parse_connect_info(
      goto error_alloc_conn_string;
    }
    DBUG_ASSERT(error_num_1 == 0);
-    /* If the connect string is explicitly ignored for parsing, or if
-    any option is specified, skip the parsing. */
+    /*
+      If the COMMENT or CONNECTION string is explicitly ignored for
+      table param parsing, or if any option is specified, skip the
+      parsing.
+    */
    if (spider_param_ignore_comments(current_thd) || option_specified)
    {
      if (!spider_param_suppress_comment_ignored_warning(current_thd))
@ -2621,7 +2624,6 @@ int spider_parse_connect_info(
                          "and will be removed in a future release. "
                          "Please use table options instead.");
    start_param = connect_string;
-    parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
    while (*start_param != '\0')
    {
      if (parse.locate_param_def(start_param))