Merge 11.4 into 11.6

This commit is contained in:
Marko Mäkelä 2024-09-04 10:38:25 +03:00
commit a5b80531fb
78 changed files with 1736 additions and 1273 deletions

View file

@ -255,6 +255,7 @@ static my_bool column_types_flag;
static my_bool preserve_comments= 0;
static my_bool in_com_source, aborted= 0;
static ulong opt_max_allowed_packet, opt_net_buffer_length;
unsigned long quick_max_column_width= LONG_MAX;
static uint verbose=0,opt_silent=0,opt_mysql_port=0, opt_local_infile=0;
static uint my_end_arg;
static char * opt_mysql_unix_port=0;
@ -1821,6 +1822,10 @@ static struct my_option my_long_options[] =
"Don't cache result, print it row by row. This may slow down the server "
"if the output is suspended. Doesn't use history file.",
&quick, &quick, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"quick-max-column-width", 0,
"Maximal field length limit in case of --qick", &quick_max_column_width,
&quick_max_column_width, 0, GET_ULONG, REQUIRED_ARG, LONG_MAX, 0, ULONG_MAX,
0, 1, 0},
{"raw", 'r', "Write fields without conversion. Used with --batch.",
&opt_raw_data, &opt_raw_data, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
{"reconnect", 0, "Reconnect if the connection is lost.",
@ -3877,7 +3882,7 @@ print_table_data(MYSQL_RES *result)
{
uint length= column_names ? field->name_length : 0;
if (quick)
length= MY_MAX(length,field->length);
length= MY_MAX(length, MY_MIN(field->length, quick_max_column_width));
else
length= MY_MAX(length,field->max_length);
if (length < 4 && !IS_NOT_NULL(field->flags))

View file

@ -121,7 +121,7 @@ in
replace_uring_with_aio
fi
;&
"noble")
"noble"|"oracular")
# mariadb-plugin-rocksdb s390x not supported by us (yet)
# ubuntu doesn't support mips64el yet, so keep this just
# in case something changes.

View file

@ -2114,7 +2114,7 @@ static int prepare_export()
IF_WIN("\"","") "\"%s\" --mysqld \"%s\""
" --defaults-extra-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=."
" --innodb --innodb-fast-shutdown=0 --loose-partition"
" --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu"
" --innodb-buffer-pool-size=%llu"
" --console --skip-log-error --skip-log-bin --bootstrap %s< "
BOOTSTRAP_FILENAME IF_WIN("\"",""),
mariabackup_exe,
@ -2128,7 +2128,7 @@ static int prepare_export()
IF_WIN("\"","") "\"%s\" --mysqld"
" --defaults-file=./backup-my.cnf --defaults-group-suffix=%s --datadir=."
" --innodb --innodb-fast-shutdown=0 --loose-partition"
" --innodb_purge_rseg_truncate_frequency=1 --innodb-buffer-pool-size=%llu"
" --innodb-buffer-pool-size=%llu"
" --console --log-error= --skip-log-bin --bootstrap %s< "
BOOTSTRAP_FILENAME IF_WIN("\"",""),
mariabackup_exe,

View file

@ -0,0 +1,62 @@
#
# MDEV-34704: Quick mode produces the bug for mariadb client
#
create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1));
insert into t1 values ("X", "X", "X", "X");
# --table --quick
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
| X | X | X | X |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
# --table --quick --quick-max-column-width=0
+-----------+-------+------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+-------+------+------+
| X | X | X | X |
+-----------+-------+------+------+
# --table --quick --quick-max-column-width=10
+-----------+------------+------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+------------+------+
| X | X | X | X |
+-----------+------------+------------+------+
# --table --quick --quick-max-column-width=20
+-----------+------------+----------------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+----------------------+------+
| X | X | X | X |
+-----------+------------+----------------------+------+
insert into t1 values ("01234", "0123456789", "01234567890123456789", "1");
# --table --quick
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
| X | X | X | X |
| 01234 | 0123456789 | 01234567890123456789 | 1 |
+-----------+------------+---------------------------------------------------------------------------------------------------------------------------------+------+
# --table --quick --quick-max-column-width=0
+-----------+-------+------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+-------+------+------+
| X | X | X | X |
| 01234 | 0123456789 | 01234567890123456789 | 1 |
+-----------+-------+------+------+
# --table --quick --quick-max-column-width=10
+-----------+------------+------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+------------+------+
| X | X | X | X |
| 01234 | 0123456789 | 01234567890123456789 | 1 |
+-----------+------------+------------+------+
# --table --quick --quick-max-column-width=20
+-----------+------------+----------------------+------+
| aaaaaaaaa | aaaaa | a | b |
+-----------+------------+----------------------+------+
| X | X | X | X |
| 01234 | 0123456789 | 01234567890123456789 | 1 |
+-----------+------------+----------------------+------+
drop table t1;
#
# End of 10.7 tests
#

View file

@ -0,0 +1,46 @@
--source include/not_embedded.inc
--echo #
--echo # MDEV-34704: Quick mode produces the bug for mariadb client
--echo #
create table t1 (aaaaaaaaa char (5), aaaaa char (10), a char (127), b char(1));
insert into t1 values ("X", "X", "X", "X");
--echo # --table --quick
--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1
--echo # --table --quick --quick-max-column-width=0
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0 2>&1
--echo # --table --quick --quick-max-column-width=10
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10 2>&1
--echo # --table --quick --quick-max-column-width=20
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20 2>&1
insert into t1 values ("01234", "0123456789", "01234567890123456789", "1");
--echo # --table --quick
--exec echo "select * from test.t1;" | $MYSQL --table --quick 2>&1
--echo # --table --quick --quick-max-column-width=0
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=0 2>&1
--echo # --table --quick --quick-max-column-width=10
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=10 2>&1
--echo # --table --quick --quick-max-column-width=20
--exec echo "select * from test.t1;" | $MYSQL --table --quick --quick-max-column-width=20 2>&1
drop table t1;
--echo #
--echo # End of 10.7 tests
--echo #

View file

@ -42,5 +42,24 @@ SELECT * FROM v WHERE f = '10.5.20';
f
drop view v;
#
# MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone
# (Item_func_not_all)
#
CREATE VIEW t AS SELECT 0 AS a;
SELECT * FROM t WHERE a=ALL (SELECT 0);
a
0
DROP VIEW t;
#
# MDEV-34833: Assertion failure in Item_float::do_build_clone
# (Item_static_float_func)
#
CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi();
CREATE TABLE t1 AS SELECT 1;
SELECT * FROM v1 JOIN t1 ON f=f2;
f f2 1
DROP VIEW v1;
DROP TABLE t1;
#
# End of 10.5 tests
#

View file

@ -46,6 +46,27 @@ CREATE VIEW v AS SELECT version() AS f;
SELECT * FROM v WHERE f = '10.5.20';
drop view v;
--echo #
--echo # MDEV-34785: Assertion failure in Item_func_or_sum::do_build_clone
--echo # (Item_func_not_all)
--echo #
CREATE VIEW t AS SELECT 0 AS a;
SELECT * FROM t WHERE a=ALL (SELECT 0);
DROP VIEW t;
--echo #
--echo # MDEV-34833: Assertion failure in Item_float::do_build_clone
--echo # (Item_static_float_func)
--echo #
CREATE VIEW v1 (f,f2) AS SELECT connection_id(),pi();
CREATE TABLE t1 AS SELECT 1;
SELECT * FROM v1 JOIN t1 ON f=f2;
DROP VIEW v1;
DROP TABLE t1;
--echo #
--echo # End of 10.5 tests
--echo #

View file

@ -0,0 +1,11 @@
--- alter_copy_bulk.result
+++ alter_copy_bulk.result
@@ -4,7 +4,7 @@
INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));
-ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY'
+ERROR 23000: Duplicate entry 'bb' for key 'PRIMARY'
INSERT INTO t1 VALUES(repeat('a', 200), 1);
ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);
ERROR 23000: Duplicate entry '1' for key 'f2_2'

View file

@ -1,26 +1,50 @@
SET @@alter_algorithm=COPY;
Warnings:
Warning 4200 The variable '@@alter_algorithm' is ignored. It only exists for compatibility with old installations and will be removed in a future release
CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB;
INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2;
ALTER TABLE t1 FORCE;
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
ALTER TABLE t1 ADD INDEX(f2);
ALTER TABLE t1 ADD PRIMARY KEY(f1(2));
ERROR 23000: Duplicate entry 'aaaaaaaa' for key 'PRIMARY'
ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));
ERROR 23000: Duplicate entry 'aa' for key 'PRIMARY'
INSERT INTO t1 VALUES(repeat('a', 200), 1);
ALTER TABLE t1 ADD UNIQUE KEY(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);
ERROR 23000: Duplicate entry '1' for key 'f2_2'
ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL;
CREATE TABLE t2(f1 INT NOT NULL,
FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB;
INSERT INTO t2 VALUES(1);
ALTER TABLE t2 FORCE;
ALTER TABLE t2 ALGORITHM=COPY, FORCE;
DROP TABLE t2, t1;
CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2;
INSERT INTO t1 VALUES(1, 1);
INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2;
ALTER TABLE t1 FORCE;
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536;
ALTER TABLE t1 ADD INDEX(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
DROP TABLE t1;
#
# MDEV-34756 Validation of new foreign key skipped
# if innodb_alter_copy_bulk=ON
#
CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY,
f2 INT NOT NULL)ENGINE=InnoDB;
CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,
f2 INT NOT NULL)ENGINE=InnoDB;
ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1);
affected rows: 0
info: Records: 0 Duplicates: 0 Warnings: 0
INSERT INTO t1 VALUES (1, 1);
INSERT INTO t2 VALUES (1, 2);
ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
ERROR 23000: Cannot add or update a child row: a foreign key constraint fails (`test`.`#sql-alter`, CONSTRAINT `#sql-alter_ibfk_2` FOREIGN KEY (`f2`) REFERENCES `t1` (`f1`))
INSERT INTO t1 VALUES(3, 1);
SET STATEMENT foreign_key_checks=0 FOR
ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
affected rows: 1
info: Records: 1 Duplicates: 0 Warnings: 0
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
affected rows: 2
info: Records: 2 Duplicates: 0 Warnings: 0
ALTER TABLE t2 ALGORITHM=COPY, FORCE;
affected rows: 1
info: Records: 1 Duplicates: 0 Warnings: 0
DROP TABLE t2, t1;

View file

@ -19,8 +19,10 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 4194304
FOUND 1 /InnoDB: Resized log to 4\.000MiB/ in mysqld.1.err
UPDATE t SET b='' WHERE a<10;
SET GLOBAL innodb_log_file_size=5242880;
connect con1,localhost,root;
UPDATE t SET b='' WHERE a<10;
connection default;
SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 5242880
@ -28,6 +30,9 @@ SELECT global_value FROM information_schema.system_variables
WHERE variable_name = 'innodb_log_file_size';
global_value
5242880
connection con1;
disconnect con1;
connection default;
# restart
SELECT * FROM t WHERE a<10;
a b
@ -40,6 +45,10 @@ a b
7
8
9
SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;
COUNT(*) LENGTH(b)
9 0
19991 255
SHOW VARIABLES LIKE 'innodb_log_file_size';
Variable_name Value
innodb_log_file_size 5242880

View file

@ -1,26 +1,25 @@
--source include/have_innodb.inc
--source include/have_partition.inc
--source include/have_sequence.inc
SET @@alter_algorithm=COPY;
CREATE TABLE t1(f1 CHAR(200), f2 INT NOT NULL)engine=InnoDB;
INSERT INTO t1 SELECT repeat('a', 200), seq FROM seq_1_to_2;
# Buffer fits in the memory
ALTER TABLE t1 FORCE;
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
# Insert more entries
INSERT INTO t1 SELECT repeat('b', 200), seq FROM seq_3_to_65536;
# Alter should use temporary file for sorting
ALTER TABLE t1 ADD INDEX(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
# Error while buffering the insert operation
--error ER_DUP_ENTRY
ALTER TABLE t1 ADD PRIMARY KEY(f1(2));
ALTER TABLE t1 ALGORITHM=COPY, ADD PRIMARY KEY(f1(2));
INSERT INTO t1 VALUES(repeat('a', 200), 1);
# Error while applying the bulk insert operation
--error ER_DUP_ENTRY
ALTER TABLE t1 ADD UNIQUE KEY(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD UNIQUE KEY(f2);
# Ignore shouldn't go through bulk operation
ALTER IGNORE TABLE t1 MODIFY f1 CHAR(200) NOT NULL;
@ -29,16 +28,41 @@ CREATE TABLE t2(f1 INT NOT NULL,
FOREIGN KEY(f1) REFERENCES t1(f2))ENGINE=InnoDB;
INSERT INTO t2 VALUES(1);
# Bulk operation shouldn't happen because of foreign key constraints
ALTER TABLE t2 FORCE;
ALTER TABLE t2 ALGORITHM=COPY, FORCE;
DROP TABLE t2, t1;
CREATE TABLE t1 (f1 INT, f2 INT) ENGINE=InnoDB PARTITION BY HASH(f1) PARTITIONS 2;
INSERT INTO t1 VALUES(1, 1);
INSERT INTO t1 SELECT seq, seq * 2 FROM seq_1_to_2;
# Buffer fits in the memory
ALTER TABLE t1 FORCE;
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
# Insert more entries
INSERT INTO t1 SELECT seq, seq * 2 FROM seq_3_to_65536;
# Alter should use temporary file for sorting
ALTER TABLE t1 ADD INDEX(f2);
ALTER TABLE t1 ALGORITHM=COPY, ADD INDEX(f2);
DROP TABLE t1;
--echo #
--echo # MDEV-34756 Validation of new foreign key skipped
--echo # if innodb_alter_copy_bulk=ON
--echo #
CREATE TABLE t1(f1 INT NOT NULL PRIMARY KEY,
f2 INT NOT NULL)ENGINE=InnoDB;
CREATE TABLE t2(f1 INT NOT NULL PRIMARY KEY,
f2 INT NOT NULL)ENGINE=InnoDB;
--enable_info
ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f1) REFERENCES t1(f1);
--disable_info
INSERT INTO t1 VALUES (1, 1);
INSERT INTO t2 VALUES (1, 2);
--replace_regex /#sql-alter-[0-9a-f-]*/#sql-alter/
--error ER_NO_REFERENCED_ROW_2
ALTER TABLE t2 ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
INSERT INTO t1 VALUES(3, 1);
--enable_info
SET STATEMENT foreign_key_checks=0 FOR
ALTER TABLE t2 ALGORITHM=COPY, ADD CONSTRAINT FOREIGN KEY(f2) REFERENCES t1(f1);
ALTER TABLE t1 ALGORITHM=COPY, FORCE;
ALTER TABLE t2 ALGORITHM=COPY, FORCE;
--disable_info
DROP TABLE t2, t1;

View file

@ -25,17 +25,28 @@ SHOW VARIABLES LIKE 'innodb_log_file_size';
let SEARCH_PATTERN = InnoDB: Resized log to 4\\.000MiB;
--source include/search_pattern_in_file.inc
UPDATE t SET b='' WHERE a<10;
send SET GLOBAL innodb_log_file_size=5242880;
SET GLOBAL innodb_log_file_size=5242880;
--connect con1,localhost,root
send UPDATE t SET b='' WHERE a<10;
--connection default
reap;
SHOW VARIABLES LIKE 'innodb_log_file_size';
SELECT global_value FROM information_schema.system_variables
WHERE variable_name = 'innodb_log_file_size';
--connection con1
reap;
--disconnect con1
--connection default
--let $shutdown_timeout=0
--let $restart_parameters=
--source include/restart_mysqld.inc
SELECT * FROM t WHERE a<10;
SELECT COUNT(*),LENGTH(b) FROM t GROUP BY b;
SHOW VARIABLES LIKE 'innodb_log_file_size';
let SEARCH_PATTERN = InnoDB: Resized log to 5\\.000MiB;

View file

@ -1,5 +1,7 @@
--source include/have_innodb.inc
--source include/have_log_bin.inc
# Test does a lot of queries that take a lot of CPU under Valgrind.
--source include/not_valgrind.inc
call mtr.add_suppression("Can't init tc log");
call mtr.add_suppression("Aborting");

View file

@ -9,10 +9,7 @@ connection slave;
SET @old_parallel= @@GLOBAL.slave_parallel_threads;
SET GLOBAL slave_parallel_threads=10;
CHANGE MASTER TO master_host='127.0.0.1', master_port=SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no;
FLUSH TABLES WITH READ LOCK;
include/start_slave.inc
include/wait_for_slave_param.inc [Seconds_Behind_Master]
UNLOCK TABLES;
connection master;
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1);

View file

@ -1,6 +1,7 @@
include/master-slave.inc
[connection master]
connection master;
call mtr.add_suppression("Got an error writing communication packets");
call mtr.add_suppression("Got an error reading communication packets");
call mtr.add_suppression("Could not read packet:.* vio_errno: 1158");
call mtr.add_suppression("Could not write packet:.* vio_errno: 1160");

View file

@ -14,6 +14,12 @@ SET GLOBAL event_scheduler=on;
let $wait_condition= SELECT count(*)>0 FROM t1;
--source include/wait_condition.inc
SET GLOBAL event_scheduler=off;
# If the time rolls to the next whole second just at this point, a new event
# run may be scheduled. Wait for this to disappear, otherwise we see occasional
# test failures if the table gets dropped before the extra event run completes.
# Expect 5 connections: default, master, master1, server_1, binlog dump thread
--let $wait_condition= SELECT COUNT(*) = 5 FROM INFORMATION_SCHEMA.PROCESSLIST;
--source include/wait_condition.inc
SELECT DISTINCT a FROM t1;
DELETE FROM t1;

View file

@ -1,3 +1,5 @@
# Test applies a large binlog, takes long under Valgrind with little benefit.
--source include/not_valgrind.inc
--source include/have_innodb.inc
--source include/have_partition.inc
--source include/have_binlog_format_mixed_or_row.inc

View file

@ -28,14 +28,7 @@ SET GLOBAL slave_parallel_threads=10;
--replace_result $SERVER_MYPORT_1 SERVER_MYPORT_1
eval CHANGE MASTER TO master_host='127.0.0.1', master_port=$SERVER_MYPORT_1, master_user='root', master_log_file='master-bin.000001', master_log_pos=4, master_use_gtid=no;
# Block execution yet when the blocked query timestamp has been already accounted
FLUSH TABLES WITH READ LOCK;
--source include/start_slave.inc
--let $slave_param = Seconds_Behind_Master
--let $slave_param_value = 1
--let $slave_param_comparison= >=
--source include/wait_for_slave_param.inc
UNLOCK TABLES;
--connection master
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;

View file

@ -25,6 +25,7 @@
--source include/master-slave.inc
--connection master
call mtr.add_suppression("Got an error writing communication packets");
call mtr.add_suppression("Got an error reading communication packets");
call mtr.add_suppression("Could not read packet:.* vio_errno: 1158");
call mtr.add_suppression("Could not write packet:.* vio_errno: 1160");

View file

@ -19,7 +19,17 @@
--source include/master-slave.inc
connection slave;
--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
--let $i= 100
while ($i > 0) {
dec $i;
--let $connection_id=`SELECT id FROM information_schema.processlist where state LIKE 'Waiting for master to send event'`
if ($connection_id) {
let $i= 0;
}
if ($i > 0) {
--sleep 0.1
}
}
if(!$connection_id)
{

View file

@ -1,19 +1,19 @@
SET @global_start_value = @@global.innodb_purge_batch_size;
SELECT @global_start_value;
@global_start_value
1000
127
'#--------------------FN_DYNVARS_046_01------------------------#'
SET @@global.innodb_purge_batch_size = 1;
SET @@global.innodb_purge_batch_size = DEFAULT;
SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
1000
127
'#---------------------FN_DYNVARS_046_02-------------------------#'
SET innodb_purge_batch_size = 1;
ERROR HY000: Variable 'innodb_purge_batch_size' is a GLOBAL variable and should be set with SET GLOBAL
SELECT @@innodb_purge_batch_size;
@@innodb_purge_batch_size
1000
127
SELECT local.innodb_purge_batch_size;
ERROR 42S02: Unknown table 'local' in field list
SET global innodb_purge_batch_size = 1;
@ -112,4 +112,4 @@ SELECT @@global.innodb_purge_batch_size;
SET @@global.innodb_purge_batch_size = @global_start_value;
SELECT @@global.innodb_purge_batch_size;
@@global.innodb_purge_batch_size
1000
127

View file

@ -221,7 +221,7 @@
VARIABLE_SCOPE GLOBAL
-VARIABLE_TYPE BIGINT UNSIGNED
+VARIABLE_TYPE INT UNSIGNED
VARIABLE_COMMENT How many pages to flush on LRU eviction
VARIABLE_COMMENT Deprecated parameter with no effect
NUMERIC_MIN_VALUE 1
-NUMERIC_MAX_VALUE 18446744073709551615
+NUMERIC_MAX_VALUE 4294967295

View file

@ -984,13 +984,13 @@ SESSION_VALUE NULL
DEFAULT_VALUE 32
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT How many pages to flush on LRU eviction
VARIABLE_COMMENT Unused
NUMERIC_MIN_VALUE 1
NUMERIC_MAX_VALUE 18446744073709551615
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
COMMAND_LINE_ARGUMENT NULL
VARIABLE_NAME INNODB_LRU_SCAN_DEPTH
SESSION_VALUE NULL
DEFAULT_VALUE 1536
@ -1233,7 +1233,7 @@ READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
VARIABLE_NAME INNODB_PURGE_BATCH_SIZE
SESSION_VALUE NULL
DEFAULT_VALUE 1000
DEFAULT_VALUE 127
VARIABLE_SCOPE GLOBAL
VARIABLE_TYPE BIGINT UNSIGNED
VARIABLE_COMMENT Number of UNDO log pages to purge in one batch from the history list
@ -1254,7 +1254,7 @@ NUMERIC_MAX_VALUE 128
NUMERIC_BLOCK_SIZE 0
ENUM_VALUE_LIST NULL
READ_ONLY NO
COMMAND_LINE_ARGUMENT OPTIONAL
COMMAND_LINE_ARGUMENT NULL
VARIABLE_NAME INNODB_PURGE_THREADS
SESSION_VALUE NULL
DEFAULT_VALUE 4

View file

@ -39,7 +39,7 @@ extern "C" unsigned crc32c_sse42(unsigned crc, const void* buf, size_t size);
constexpr uint32_t cpuid_ecx_SSE42= 1U << 20;
constexpr uint32_t cpuid_ecx_SSE42_AND_PCLMUL= cpuid_ecx_SSE42 | 1U << 1;
constexpr uint32_t cpuid_ecx_XSAVE= 1U << 26;
constexpr uint32_t cpuid_ecx_AVX_AND_XSAVE= 1U << 28 | 1U << 27;
static uint32_t cpuid_ecx()
{
@ -395,7 +395,7 @@ static bool os_have_avx512()
static ATTRIBUTE_NOINLINE bool have_vpclmulqdq(uint32_t cpuid_ecx)
{
if (!(cpuid_ecx & cpuid_ecx_XSAVE) || !os_have_avx512())
if ((~cpuid_ecx & cpuid_ecx_AVX_AND_XSAVE) || !os_have_avx512())
return false;
# ifdef _MSC_VER
int regs[4];

View file

@ -4769,6 +4769,8 @@ public:
{
return const_charset_converter(thd, tocs, true, func_name);
}
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_static_float_func>(thd, this); }
};
@ -4932,7 +4934,6 @@ public:
}
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_string_with_introducer>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};
@ -4947,7 +4948,6 @@ public:
{ }
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_string_sys>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};
@ -4964,7 +4964,6 @@ public:
{ }
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_string_ascii>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};
@ -5003,7 +5002,6 @@ public:
}
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_static_string_func>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};
@ -5023,7 +5021,6 @@ public:
}
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_partition_func_safe_string>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};
@ -5187,7 +5184,6 @@ public:
void print(String *str, enum_query_type query_type) override;
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_bin_string>(thd, this); }
Item *do_build_clone(THD *thd) const override { return get_copy(thd); }
};

View file

@ -752,6 +752,8 @@ public:
void set_sub_test(Item_maxmin_subselect *item) { test_sub_item= item; test_sum_item= 0;};
bool empty_underlying_subquery();
Item *neg_transformer(THD *thd) override;
Item *do_get_copy(THD *thd) const override
{ return get_item_copy<Item_func_not_all>(thd, this); }
};

View file

@ -5429,7 +5429,9 @@ static int init_server_components()
MARIADB_REMOVED_OPTION("innodb-log-compressed-pages"),
MARIADB_REMOVED_OPTION("innodb-log-files-in-group"),
MARIADB_REMOVED_OPTION("innodb-log-optimize-ddl"),
MARIADB_REMOVED_OPTION("innodb-lru-flush-size"),
MARIADB_REMOVED_OPTION("innodb-page-cleaners"),
MARIADB_REMOVED_OPTION("innodb-purge-truncate-frequency"),
MARIADB_REMOVED_OPTION("innodb-replication-delay"),
MARIADB_REMOVED_OPTION("innodb-scrub-log"),
MARIADB_REMOVED_OPTION("innodb-scrub-log-speed"),

View file

@ -713,7 +713,6 @@ net_real_write(NET *net,const uchar *packet, size_t len)
{
sql_print_warning("Could not write packet: fd: %lld state: %d "
"errno: %d vio_errno: %d length: %ld",
MYF(ME_ERROR_LOG | ME_WARNING),
(longlong) vio_fd(net->vio), (int) net->vio->state,
vio_errno(net->vio), net->last_errno,
(ulong) (end-pos));

View file

@ -1495,11 +1495,23 @@ handle_rpl_parallel_thread(void *arg)
after mark_start_commit(), we have to unmark, which has at least a
theoretical possibility of leaving a window where it looks like all
transactions in a GCO have started committing, while in fact one
will need to rollback and retry. This is not supposed to be possible
(since there is a deadlock, at least one transaction should be
blocked from reaching commit), but this seems a fragile ensurance,
and there were historically a number of subtle bugs in this area.
will need to rollback and retry.
Normally this will not happen, since the kill is there to resolve a
deadlock that is preventing at least one transaction from proceeding.
One case it can happen is with InnoDB dict stats update, which can
temporarily cause transactions to block each other, but locks are
released immediately, they don't linger until commit. There could be
other similar cases, there were historically a number of subtle bugs
in this area.
But once we start the commit, we can expect that no new lock
conflicts will be introduced. So by handling any lingering deadlock
kill at this point just before mark_start_commit(), we should be
robust even towards spurious deadlock kills.
*/
if (rgi->killed_for_retry != rpl_group_info::RETRY_KILL_NONE)
wait_for_pending_deadlock_kill(thd, rgi);
if (!thd->killed)
{
DEBUG_SYNC(thd, "rpl_parallel_before_mark_start_commit");

View file

@ -2539,6 +2539,23 @@ rpl_group_info::unmark_start_commit()
e= this->parallel_entry;
mysql_mutex_lock(&e->LOCK_parallel_entry);
/*
Assert that we have not already wrongly completed this GCO and signalled
the next one to start, only to now unmark and make the signal invalid.
This is to catch problems like MDEV-34696.
The error inject rpl_parallel_simulate_temp_err_xid is used to test this
precise situation, that we handle it gracefully if it somehow occurs in a
release build. So disable the assert in this case.
*/
#ifndef DBUG_OFF
bool allow_unmark_after_complete= false;
DBUG_EXECUTE_IF("rpl_parallel_simulate_temp_err_xid",
allow_unmark_after_complete= true;);
DBUG_ASSERT(!gco->next_gco ||
gco->next_gco->wait_count > e->count_committing_event_groups ||
allow_unmark_after_complete);
#endif
--e->count_committing_event_groups;
mysql_mutex_unlock(&e->LOCK_parallel_entry);
}

View file

@ -32197,7 +32197,8 @@ test_if_cheaper_ordering(const JOIN_TAB *tab, ORDER *order, TABLE *table,
else
{
const KEY *ref_keyinfo= table->key_info + ref_key;
refkey_rows_estimate= ref_keyinfo->rec_per_key[tab->ref.key_parts - 1];
refkey_rows_estimate=
(ha_rows)ref_keyinfo->actual_rec_per_key(tab->ref.key_parts - 1);
}
set_if_bigger(refkey_rows_estimate, 1);
}

View file

@ -610,7 +610,7 @@ public:
bool avg_frequency_is_inited() { return avg_frequency != NULL; }
double get_avg_frequency(uint i)
double get_avg_frequency(uint i) const
{
return (double) avg_frequency[i] / Scale_factor_avg_frequency;
}

View file

@ -173,7 +173,7 @@ typedef struct st_key {
engine_option_value *option_list;
ha_index_option_struct *option_struct; /* structure with parsed options */
double actual_rec_per_key(uint i);
double actual_rec_per_key(uint i) const;
} KEY;

View file

@ -10322,7 +10322,7 @@ uint TABLE_SHARE::actual_n_key_parts(THD *thd)
}
double KEY::actual_rec_per_key(uint i)
double KEY::actual_rec_per_key(uint i) const
{
if (rec_per_key == 0)
return 0;

View file

@ -2573,6 +2573,51 @@ err_exit:
return(FALSE);
}
buf_block_t* buf_pool_t::page_fix(const page_id_t id)
{
ha_handler_stats *const stats= mariadb_stats;
buf_inc_get(stats);
auto& chain= page_hash.cell_get(id.fold());
page_hash_latch &hash_lock= page_hash.lock_get(chain);
for (;;)
{
hash_lock.lock_shared();
buf_page_t *b= page_hash.get(id, chain);
if (b)
{
uint32_t state= b->fix();
hash_lock.unlock_shared();
ut_ad(!b->in_zip_hash);
ut_ad(b->frame);
ut_ad(state >= buf_page_t::FREED);
if (state >= buf_page_t::READ_FIX && state < buf_page_t::WRITE_FIX)
{
b->lock.s_lock();
state= b->state();
ut_ad(state < buf_page_t::READ_FIX || state >= buf_page_t::WRITE_FIX);
b->lock.s_unlock();
}
if (UNIV_UNLIKELY(state < buf_page_t::UNFIXED))
{
/* The page was marked as freed or corrupted. */
b->unfix();
b= nullptr;
}
return reinterpret_cast<buf_block_t*>(b);
}
hash_lock.unlock_shared();
switch (buf_read_page(id, 0, chain)) {
default:
return nullptr;
case DB_SUCCESS:
case DB_SUCCESS_LOCKED_REC:
mariadb_increment_pages_read(stats);
buf_read_ahead_random(id, 0);
}
}
}
/** Low level function used to get access to a database page.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0

View file

@ -39,9 +39,6 @@ Created 11/5/1995 Heikki Tuuri
#include "srv0mon.h"
#include "my_cpu.h"
/** Flush this many pages in buf_LRU_get_free_block() */
size_t innodb_lru_flush_size;
/** The number of blocks from the LRU_old pointer onward, including
the block pointed to, must be buf_pool.LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
of the whole LRU list length, except that the tolerance defined below
@ -369,17 +366,13 @@ block to read in a page. Note that we only ever get a block from
the free list. Even when we flush a page or find a page in LRU scan
we put it to free list to be used.
* iteration 0:
* get a block from the buf_pool.free list, success:done
* get a block from the buf_pool.free list
* if buf_pool.try_LRU_scan is set
* scan LRU up to 100 pages to free a clean block
* success:retry the free list
* flush up to innodb_lru_flush_size LRU blocks to data files
(until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth)
* on buf_page_write_complete() the blocks will put on buf_pool.free list
* success: retry the free list
* invoke buf_pool.page_cleaner_wakeup(true) and wait its completion
* subsequent iterations: same as iteration 0 except:
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
* scan the entire LRU list
@param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY

View file

@ -18501,6 +18501,7 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
ib_senderrf(thd, IB_LOG_LEVEL_ERROR, ER_CANT_CREATE_HANDLER_FILE);
break;
case log_t::RESIZE_STARTED:
const lsn_t start{log_sys.resize_in_progress()};
for (timespec abstime;;)
{
if (thd_kill_level(thd))
@ -18511,13 +18512,30 @@ static void innodb_log_file_size_update(THD *thd, st_mysql_sys_var*,
set_timespec(abstime, 5);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const bool in_progress(buf_pool.get_oldest_modification(LSN_MAX) <
log_sys.resize_in_progress());
if (in_progress)
lsn_t resizing= log_sys.resize_in_progress();
if (resizing > buf_pool.get_oldest_modification(0))
{
buf_pool.page_cleaner_wakeup(true);
my_cond_timedwait(&buf_pool.done_flush_list,
&buf_pool.flush_list_mutex.m_mutex, &abstime);
resizing= log_sys.resize_in_progress();
}
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (!log_sys.resize_in_progress())
if (start > log_sys.get_lsn())
{
ut_ad(!log_sys.is_pmem());
/* The server is almost idle. Write dummy FILE_CHECKPOINT records
to ensure that the log resizing will complete. */
log_sys.latch.wr_lock(SRW_LOCK_CALL);
while (start > log_sys.get_lsn())
{
mtr_t mtr;
mtr.start();
mtr.commit_files(log_sys.last_checkpoint_lsn);
}
log_sys.latch.wr_unlock();
}
if (!resizing || resizing > start /* only wait for our resize */)
break;
}
}
@ -18904,7 +18922,7 @@ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
"Number of UNDO log pages to purge in one batch from the history list",
NULL, NULL,
1000, /* Default setting */
127, /* Default setting */
1, /* Minimum value */
innodb_purge_batch_size_MAX, 0);
@ -19167,11 +19185,6 @@ static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
"How deep to scan LRU to keep it clean",
NULL, NULL, 1536, 100, ~0UL, 0);
static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size,
PLUGIN_VAR_RQCMDARG,
"How many pages to flush on LRU eviction",
NULL, NULL, 32, 1, SIZE_T_MAX, 0);
static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
PLUGIN_VAR_OPCMDARG,
"Set to 0 (don't flush neighbors from buffer pool),"
@ -19435,14 +19448,21 @@ static MYSQL_SYSVAR_ULONGLONG(max_undo_log_size, srv_max_undo_log_size,
10 << 20, 10 << 20,
1ULL << (32 + UNIV_PAGE_SIZE_SHIFT_MAX), 0);
static ulong innodb_purge_rseg_truncate_frequency;
static ulong innodb_purge_rseg_truncate_frequency= 128;
static MYSQL_SYSVAR_ULONG(purge_rseg_truncate_frequency,
innodb_purge_rseg_truncate_frequency,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT,
"Unused",
NULL, NULL, 128, 1, 128, 0);
static size_t innodb_lru_flush_size;
static MYSQL_SYSVAR_SIZE_T(lru_flush_size, innodb_lru_flush_size,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_DEPRECATED | PLUGIN_VAR_NOCMDOPT,
"Unused",
NULL, NULL, 32, 1, SIZE_T_MAX, 0);
static void innodb_undo_log_truncate_update(THD *thd, struct st_mysql_sys_var*,
void*, const void *save)
{

View file

@ -74,14 +74,10 @@ page_zip_des_t*
btr_cur_get_page_zip(
/*=================*/
btr_cur_t* cursor);/*!< in: tree cursor */
/*********************************************************//**
Returns the page of a tree cursor.
/** Returns the page of a tree cursor.
@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
btr_cur_t* cursor);/*!< in: tree cursor */
#define btr_cur_get_page(cursor) (cursor)->block()->page.frame
/*********************************************************//**
Returns the index of a cursor.
@param cursor b-tree cursor

View file

@ -48,18 +48,6 @@ btr_cur_get_page_zip(
return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
}
/*********************************************************//**
Returns the page of a tree cursor.
@return pointer to page */
UNIV_INLINE
page_t*
btr_cur_get_page(
/*=============*/
btr_cur_t* cursor) /*!< in: tree cursor */
{
return(page_align(page_cur_get_rec(&(cursor->page_cur))));
}
/*********************************************************//**
Positions a tree cursor at a given record. */
UNIV_INLINE

View file

@ -1358,6 +1358,12 @@ public:
}
public:
/** Look up and buffer-fix a page.
@param id page identifier
@return undo log page, buffer-fixed
@retval nullptr if the undo page was corrupted or freed */
buf_block_t *page_fix(const page_id_t id);
/** @return whether the buffer pool contains a page
@param page_id page identifier
@param chain hash table chain for page_id.fold() */

View file

@ -33,9 +33,6 @@ Created 11/5/1995 Heikki Tuuri
struct trx_t;
struct fil_space_t;
/** Flush this many pages in buf_LRU_get_free_block() */
extern size_t innodb_lru_flush_size;
/*#######################################################################
These are low-level functions
#########################################################################*/
@ -82,17 +79,13 @@ block to read in a page. Note that we only ever get a block from
the free list. Even when we flush a page or find a page in LRU scan
we put it to free list to be used.
* iteration 0:
* get a block from the buf_pool.free list, success:done
* get a block from the buf_pool.free list
* if buf_pool.try_LRU_scan is set
* scan LRU up to 100 pages to free a clean block
* success:retry the free list
* flush up to innodb_lru_flush_size LRU blocks to data files
(until UT_LIST_GET_GEN(buf_pool.free) < innodb_lru_scan_depth)
* on buf_page_write_complete() the blocks will put on buf_pool.free list
* success: retry the free list
* invoke buf_pool.page_cleaner_wakeup(true) and wait its completion
* subsequent iterations: same as iteration 0 except:
* scan whole LRU list
* scan LRU list even if buf_pool.try_LRU_scan is not set
* scan the entire LRU list
@param get how to allocate the block
@return the free control block, in state BUF_BLOCK_MEMORY

View file

@ -224,7 +224,7 @@ public:
/** exclusive latch for checkpoint, shared for mtr_t::commit() to buf */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock latch;
/** number of std::swap(buf, flush_buf) and writes from buf to log;
/** number of writes from buf or flush_buf to log;
protected by latch.wr_lock() */
ulint write_to_log;
@ -232,8 +232,9 @@ public:
lsn_t write_lsn;
/** buffer for writing data to ib_logfile0, or nullptr if is_pmem()
In write_buf(), buf and flush_buf are swapped */
In write_buf(), buf and flush_buf may be swapped */
byte *flush_buf;
/** set when there may be need to initiate a log checkpoint.
This must hold if lsn - last_checkpoint_lsn > max_checkpoint_age. */
std::atomic<bool> need_checkpoint;
@ -372,9 +373,10 @@ public:
private:
/** Write resize_buf to resize_log.
@param length the used length of resize_buf */
@param b resize_buf or resize_flush_buf
@param length the used length of b */
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
void resize_write_buf(size_t length) noexcept;
void resize_write_buf(const byte *b, size_t length) noexcept;
public:
/** Rename a log file after resizing.
@ -506,13 +508,7 @@ public:
@param d destination
@param s string of bytes
@param size length of str, in bytes */
void append(byte *&d, const void *s, size_t size) noexcept
{
ut_ad(latch_have_any());
ut_ad(d + size <= buf + (is_pmem() ? file_size : buf_size));
memcpy(d, s, size);
d+= size;
}
static inline void append(byte *&d, const void *s, size_t size) noexcept;
/** Set the log file format. */
void set_latest_format(bool encrypted) noexcept

View file

@ -31,14 +31,6 @@ Created 10/4/1994 Heikki Tuuri
#ifdef UNIV_DEBUG
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
/*==============*/
page_cur_t* cur); /*!< in: page cursor */
/*********************************************************//**
Gets pointer to the buffer block where the cursor is positioned.
@return page */
UNIV_INLINE
@ -60,12 +52,12 @@ page_cur_get_page_zip(
UNIV_INLINE
rec_t *page_cur_get_rec(const page_cur_t *cur);
#else /* UNIV_DEBUG */
# define page_cur_get_page(cur) page_align((cur)->rec)
# define page_cur_get_block(cur) (cur)->block
# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block)
# define page_cur_get_rec(cur) (cur)->rec
#endif /* UNIV_DEBUG */
# define is_page_cur_get_page_zip(cur) is_buf_block_get_page_zip((cur)->block)
#define page_cur_get_page(cur) page_cur_get_block(cur)->page.frame
#define is_page_cur_get_page_zip(cur) is_buf_block_get_page_zip((cur)->block)
/*********************************************************//**
Sets the cursor object to point before the first user record
on the page. */

View file

@ -25,18 +25,6 @@ Created 10/4/1994 Heikki Tuuri
*************************************************************************/
#ifdef UNIV_DEBUG
/*********************************************************//**
Gets pointer to the page frame where the cursor is positioned.
@return page */
UNIV_INLINE
page_t*
page_cur_get_page(
/*==============*/
page_cur_t* cur) /*!< in: page cursor */
{
return page_align(page_cur_get_rec(cur));
}
/*********************************************************//**
Gets pointer to the buffer block where the cursor is positioned.
@return page */

View file

@ -209,17 +209,6 @@ que_eval_sql(
const char* sql, /*!< in: SQL string */
trx_t* trx); /*!< in: trx */
/**********************************************************************//**
Round robin scheduler.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
que_thr_t*
que_fork_scheduler_round_robin(
/*===========================*/
que_fork_t* fork, /*!< in: a query fork */
que_thr_t* thr); /*!< in: current pos */
/** Query thread states */
enum que_thr_state_t {
/** in selects this means that the thread is at the end of its

View file

@ -54,32 +54,47 @@ row_vers_impl_x_locked(
dict_index_t* index,
const rec_offs* offsets);
/** Finds out if a version of the record, where the version >= the current
purge_sys.view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@param[in] also_curr TRUE if also rec is included in the versions
to search; otherwise only versions prior
to it are searched
@param[in] rec record in the clustered index; the caller
must have a latch on the page
@param[in] mtr mtr holding the latch on rec; it will
also hold the latch on purge_view
@param[in] index secondary index
@param[in] ientry secondary index entry
@param[in] roll_ptr roll_ptr for the purge record
@param[in] trx_id transaction ID on the purging record
@return TRUE if earlier version should have */
/** Find out whether data tuple has missing data type
for indexed virtual column.
@param tuple data tuple
@param index virtual index
@return true if tuple has missing column type */
bool dtuple_vcol_data_missing(const dtuple_t &tuple,
const dict_index_t &index);
/** build virtual column value from current cluster index record data
@param[in,out] row the cluster index row in dtuple form
@param[in] clust_index clustered index
@param[in] index the secondary index
@param[in] heap heap used to build virtual dtuple. */
bool
row_vers_old_has_index_entry(
bool also_curr,
const rec_t* rec,
mtr_t* mtr,
row_vers_build_clust_v_col(
dtuple_t* row,
dict_index_t* clust_index,
dict_index_t* index,
const dtuple_t* ientry,
mem_heap_t* heap);
/** Build a dtuple contains virtual column data for current cluster index
@param[in] rec cluster index rec
@param[in] clust_index cluster index
@param[in] clust_offsets cluster rec offset
@param[in] index secondary index
@param[in] trx_id transaction ID on the purging record,
or 0 if called outside purge
@param[in] roll_ptr roll_ptr for the purge record
@param[in,out] heap heap memory
@param[in,out] v_heap heap memory to keep virtual column tuple
@param[in,out] mtr mini-transaction
@return dtuple contains virtual column data */
dtuple_t*
row_vers_build_cur_vrow(
const rec_t* rec,
dict_index_t* clust_index,
rec_offs** clust_offsets,
dict_index_t* index,
trx_id_t trx_id,
roll_ptr_t roll_ptr,
trx_id_t trx_id);
mem_heap_t* heap,
mem_heap_t* v_heap,
mtr_t* mtr);
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent

View file

@ -550,6 +550,15 @@ void srv_monitor_task(void*);
void srv_master_callback(void*);
/**
Fetches and executes tasks from the purge work queue,
until this queue is empty.
This is main part of purge worker task, but also
executed in coordinator.
@note needs current_thd to be set beforehand.
*/
void srv_purge_worker_task_low();
} /* extern "C" */
#ifdef UNIV_DEBUG

View file

@ -280,6 +280,8 @@ public:
#endif
}
bool rd_u_upgrade_try() { return writer.wr_lock_try(); }
void u_wr_upgrade()
{
DBUG_ASSERT(writer.is_locked());
@ -294,6 +296,13 @@ public:
readers.store(0, std::memory_order_release);
/* Note: Any pending rd_lock() will not be woken up until u_unlock() */
}
void u_rd_downgrade()
{
DBUG_ASSERT(writer.is_locked());
ut_d(uint32_t lk=) readers.fetch_add(1, std::memory_order_relaxed);
ut_ad(lk < WRITER);
u_unlock();
}
void rd_unlock()
{

View file

@ -198,6 +198,30 @@ public:
/** Upgrade an update lock */
inline void u_x_upgrade();
inline void u_x_upgrade(const char *file, unsigned line);
/** @return whether a shared lock was upgraded to exclusive */
bool s_x_upgrade_try()
{
ut_ad(have_s());
ut_ad(!have_u_or_x());
if (!lock.rd_u_upgrade_try())
return false;
claim_ownership();
s_unlock();
lock.u_wr_upgrade();
recursive= RECURSIVE_X;
return true;
}
__attribute__((warn_unused_result))
/** @return whether the operation succeeded without waiting */
bool s_x_upgrade()
{
if (s_x_upgrade_try())
return true;
s_unlock();
x_lock();
return false;
}
/** Downgrade a single exclusive lock to an update lock */
void x_u_downgrade()
{
@ -206,6 +230,16 @@ public:
recursive*= RECURSIVE_U;
lock.wr_u_downgrade();
}
/** Downgrade a single update lock to a shared lock */
void u_s_downgrade()
{
ut_ad(have_u_or_x());
ut_ad(recursive == RECURSIVE_U);
recursive= 0;
set_new_owner(0);
lock.u_rd_downgrade();
ut_d(s_lock_register());
}
/** Acquire an exclusive lock or upgrade an update lock
@return whether U locks were upgraded to X */

View file

@ -149,10 +149,11 @@ public:
private:
/** number of pending stop() calls without resume() */
Atomic_counter<uint32_t> m_paused;
/** number of stop_SYS() calls without resume_SYS() */
Atomic_counter<uint32_t> m_SYS_paused;
/** number of stop_FTS() calls without resume_FTS() */
Atomic_counter<uint32_t> m_FTS_paused;
/** PAUSED_SYS * number of stop_SYS() calls without resume_SYS() +
number of stop_FTS() calls without resume_FTS() */
Atomic_relaxed<uint32_t> m_FTS_paused;
/** The stop_SYS() multiplier in m_FTS_paused */
static constexpr const uint32_t PAUSED_SYS= 1U << 16;
/** latch protecting end_view */
alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock_low end_latch;
@ -321,16 +322,21 @@ private:
void wait_FTS(bool also_sys);
public:
/** Suspend purge in data dictionary tables */
void stop_SYS() { m_SYS_paused++; }
void stop_SYS()
{
ut_d(const auto p=) m_FTS_paused.fetch_add(PAUSED_SYS);
ut_ad(p < p + PAUSED_SYS);
}
/** Resume purge in data dictionary tables */
static void resume_SYS(void *);
/** Pause purge during a DDL operation that could drop FTS_ tables. */
void stop_FTS();
/** Resume purge after stop_FTS(). */
void resume_FTS() { ut_d(const auto p=) m_FTS_paused--; ut_ad(p); }
void resume_FTS()
{ ut_d(const auto p=) m_FTS_paused.fetch_sub(1); ut_ad(p & ~PAUSED_SYS); }
/** @return whether stop_SYS() is in effect */
bool must_wait_FTS() const { return m_FTS_paused; }
bool must_wait_FTS() const { return m_FTS_paused & ~PAUSED_SYS; }
private:
/**
@ -432,10 +438,17 @@ public:
struct view_guard
{
inline view_guard();
enum guard { END_VIEW= -1, PURGE= 0, VIEW= 1};
guard latch;
inline view_guard(guard latch);
inline ~view_guard();
/** Fetch an undo log page.
@param id page identifier
@param mtr mini-transaction
@return reference to buffer page, possibly buffer-fixed in mtr */
inline const buf_block_t *get(const page_id_t id, mtr_t *mtr);
/** @return purge_sys.view */
/** @return purge_sys.view or purge_sys.end_view */
inline const ReadViewBase &view() const;
};
@ -464,14 +477,39 @@ public:
/** The global data structure coordinating a purge */
extern purge_sys_t purge_sys;
purge_sys_t::view_guard::view_guard()
{ purge_sys.latch.rd_lock(SRW_LOCK_CALL); }
purge_sys_t::view_guard::view_guard(purge_sys_t::view_guard::guard latch) :
latch(latch)
{
switch (latch) {
case VIEW:
purge_sys.latch.rd_lock(SRW_LOCK_CALL);
break;
case END_VIEW:
purge_sys.end_latch.rd_lock();
break;
case PURGE:
/* the access is within a purge batch; purge_coordinator_task
will wait for all workers to complete before updating the views */
break;
}
}
purge_sys_t::view_guard::~view_guard()
{ purge_sys.latch.rd_unlock(); }
{
switch (latch) {
case VIEW:
purge_sys.latch.rd_unlock();
break;
case END_VIEW:
purge_sys.end_latch.rd_unlock();
break;
case PURGE:
break;
}
}
const ReadViewBase &purge_sys_t::view_guard::view() const
{ return purge_sys.view; }
{ return latch == END_VIEW ? purge_sys.end_view : purge_sys.view; }
purge_sys_t::end_view_guard::end_view_guard()
{ purge_sys.end_latch.rd_lock(); }

View file

@ -157,50 +157,44 @@ trx_undo_report_row_operation(
/** TRX_UNDO_PREV_IN_PURGE tells trx_undo_prev_version_build() that it
is being called purge view and we would like to get the purge record
even it is in the purge view (in normal case, it will return without
fetching the purge record */
fetching the purge record) */
static constexpr ulint TRX_UNDO_PREV_IN_PURGE = 1;
/** This tells trx_undo_prev_version_build() to fetch the old value in
the undo log (which is the after image for an update) */
static constexpr ulint TRX_UNDO_GET_OLD_V_VALUE = 2;
/** indicate a call from row_vers_old_has_index_entry() */
/** indicate a call from row_undo_mod_sec_is_unsafe() */
static constexpr ulint TRX_UNDO_CHECK_PURGEABILITY = 4;
/** indicate a call from row_purge_is_unsafe() */
static constexpr ulint TRX_UNDO_CHECK_PURGE_PAGES = 8;
/** Build a previous version of a clustered index record. The caller
must hold a latch on the index page of the clustered index record.
@param rec version of a clustered index record
@param index clustered index
@param offsets rec_get_offsets(rec, index)
@param heap memory heap from which the memory needed is
allocated
@param old_vers previous version or NULL if rec is the
first inserted version, or if history data
has been deleted (an error), or if the purge
could have removed the version
though it has not yet done so
@param v_heap memory heap used to create vrow
dtuple if it is not yet created. This heap
diffs from "heap" above in that it could be
prebuilt->old_vers_heap for selection
@param vrow virtual column info, if any
@param v_status status determine if it is going into this
function by purge thread or not.
And if we read "after image" of undo log
@param rec version of a clustered index record
@param index clustered index
@param offsets rec_get_offsets(rec, index)
@param heap memory heap from which the memory needed is allocated
@param old_vers previous version, or NULL if rec is the first inserted
version, or if history data has been deleted (an error),
or if the purge could have removed the version though
it has not yet done so
@param mtr mini-transaction
@param v_status TRX_UNDO_PREV_IN_PURGE, ...
@param v_heap memory heap used to create vrow dtuple if it is not yet
created. This heap diffs from "heap" above in that it could be
prebuilt->old_vers_heap for selection
@param vrow virtual column info, if any
@return error code
@retval DB_SUCCESS if previous version was successfully built,
or if it was an insert or the undo record refers to the table before rebuild
@retval DB_MISSING_HISTORY if the history is missing */
dberr_t
trx_undo_prev_version_build(
const rec_t *rec,
dict_index_t *index,
rec_offs *offsets,
mem_heap_t *heap,
rec_t **old_vers,
mem_heap_t *v_heap,
dtuple_t **vrow,
ulint v_status);
dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index,
rec_offs *offsets, mem_heap_t *heap,
rec_t **old_vers, mtr_t *mtr,
ulint v_status,
mem_heap_t *v_heap, dtuple_t **vrow);
/** Read from an undo log record a non-virtual column value.
@param ptr pointer to remaining part of the undo record

View file

@ -500,9 +500,8 @@ void lock_sys_t::close()
requesting record lock are brute force (BF). If they are check is
this BF-BF wait correct and if not report BF wait and assert.
@param[in] lock_rec other waiting record lock
@param[in] trx trx requesting conflicting record lock
@param[in] type_mode lock type mode of requesting trx
@param lock other waiting lock
@param trx transaction requesting conflicting lock
*/
static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx,
const unsigned type_mode = LOCK_NONE)

View file

@ -540,17 +540,14 @@ log_t::resize_start_status log_t::resize_start(os_offset_t size) noexcept
resize_target= size;
resize_buf= static_cast<byte*>(ptr);
resize_flush_buf= static_cast<byte*>(ptr2);
start_lsn= get_lsn();
if (is_pmem())
{
resize_log.close();
start_lsn= get_lsn();
}
else
{
memcpy_aligned<16>(resize_buf, buf, (buf_free + 15) & ~15);
start_lsn= first_lsn +
(~lsn_t{write_size - 1} & (write_lsn - first_lsn));
}
(~lsn_t{write_size - 1} &
(lsn_t{write_size - 1} + start_lsn - first_lsn));
}
resize_lsn.store(start_lsn, std::memory_order_relaxed);
status= success ? RESIZE_STARTED : RESIZE_FAILED;
@ -804,19 +801,26 @@ inline void log_t::persist(lsn_t lsn) noexcept
#endif
ATTRIBUTE_COLD ATTRIBUTE_NOINLINE
/** Write resize_buf to resize_log.
@param length the used length of resize_buf */
void log_t::resize_write_buf(size_t length) noexcept
void log_t::resize_write_buf(const byte *b, size_t length) noexcept
{
const size_t block_size_1= write_size - 1;
ut_ad(b == resize_buf || b == resize_flush_buf);
ut_ad(!(resize_target & block_size_1));
ut_ad(!(length & block_size_1));
ut_ad(length > block_size_1);
ut_ad(length <= resize_target);
const lsn_t resizing{resize_in_progress()};
ut_ad(resizing <= write_lsn);
lsn_t offset= START_OFFSET +
((write_lsn - resizing) & ~lsn_t{block_size_1}) %
int64_t d= int64_t(write_lsn - resize_in_progress());
if (UNIV_UNLIKELY(d <= 0))
{
d&= ~int64_t(block_size_1);
if (int64_t(d + length) <= 0)
return;
length+= d;
b-= d;
d= 0;
}
lsn_t offset= START_OFFSET + (lsn_t(d) & ~lsn_t{block_size_1}) %
(resize_target - START_OFFSET);
if (UNIV_UNLIKELY(offset + length > resize_target))
@ -828,7 +832,7 @@ void log_t::resize_write_buf(size_t length) noexcept
}
ut_a(os_file_write_func(IORequestWrite, "ib_logfile101", resize_log.m_file,
buf, offset, length) == DB_SUCCESS);
b, offset, length) == DB_SUCCESS);
}
/** Write buf to ib_logfile0.
@ -862,6 +866,7 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
ut_ad(write_size_1 >= 511);
const byte *const write_buf{buf};
const byte *const re_write_buf{resize_buf};
offset&= ~lsn_t{write_size_1};
if (length <= write_size_1)
@ -875,8 +880,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
#else
# ifdef HAVE_valgrind
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - length);
if (UNIV_LIKELY_NULL(resize_buf))
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) - length);
if (UNIV_LIKELY_NULL(re_write_buf))
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) - length);
# endif
buf[length]= 0; /* allow recovery to catch EOF faster */
#endif
@ -896,15 +901,15 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
the current LSN are generated. */
#ifdef HAVE_valgrind
MEM_MAKE_DEFINED(buf + length, (write_size_1 + 1) - new_buf_free);
if (UNIV_LIKELY_NULL(resize_buf))
MEM_MAKE_DEFINED(resize_buf + length, (write_size_1 + 1) -
if (UNIV_LIKELY_NULL(re_write_buf))
MEM_MAKE_DEFINED(re_write_buf + length, (write_size_1 + 1) -
new_buf_free);
#endif
buf[length]= 0; /* allow recovery to catch EOF faster */
length&= ~write_size_1;
memcpy_aligned<16>(flush_buf, buf + length, (new_buf_free + 15) & ~15);
if (UNIV_LIKELY_NULL(resize_buf))
memcpy_aligned<16>(resize_flush_buf, resize_buf + length,
if (UNIV_LIKELY_NULL(re_write_buf))
memcpy_aligned<16>(resize_flush_buf, re_write_buf + length,
(new_buf_free + 15) & ~15);
length+= write_size_1 + 1;
}
@ -923,8 +928,8 @@ template<bool release_latch> inline lsn_t log_t::write_buf() noexcept
/* Do the write to the log file */
log_write_buf(write_buf, length, offset);
if (UNIV_LIKELY_NULL(resize_buf))
resize_write_buf(length);
if (UNIV_LIKELY_NULL(re_write_buf))
resize_write_buf(re_write_buf, length);
write_lsn= lsn;
if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED))

View file

@ -4531,7 +4531,7 @@ dberr_t recv_recovery_from_checkpoint_start()
ut_ad(recv_sys.pages.empty());
if (log_sys.format == log_t::FORMAT_3_23) {
early_exit:
func_exit:
log_sys.latch.wr_unlock();
return err;
}
@ -4547,7 +4547,7 @@ read_only_recovery:
sql_print_warning("InnoDB: innodb_read_only"
" prevents crash recovery");
err = DB_READ_ONLY;
goto early_exit;
goto func_exit;
}
if (recv_sys.is_corrupt_log()) {
sql_print_error("InnoDB: Log scan aborted at LSN "
@ -4585,7 +4585,7 @@ read_only_recovery:
rescan, missing_tablespace);
if (err != DB_SUCCESS) {
goto early_exit;
goto func_exit;
}
if (missing_tablespace) {
@ -4607,7 +4607,7 @@ read_only_recovery:
rescan, missing_tablespace);
if (err != DB_SUCCESS) {
goto early_exit;
goto func_exit;
}
} while (missing_tablespace);
@ -4666,7 +4666,7 @@ read_only_recovery:
if (recv_sys.lsn < log_sys.next_checkpoint_lsn) {
err_exit:
err = DB_ERROR;
goto early_exit;
goto func_exit;
}
if (!srv_read_only_mode && log_sys.is_latest()) {
@ -4690,7 +4690,7 @@ err_exit:
ut_ad("log parsing error" == 0);
mysql_mutex_unlock(&recv_sys.mutex);
err = DB_CORRUPTION;
goto early_exit;
goto func_exit;
}
recv_sys.apply_log_recs = true;
ut_d(recv_no_log_write = srv_operation == SRV_OPERATION_RESTORE
@ -4698,9 +4698,9 @@ err_exit:
if (srv_operation == SRV_OPERATION_NORMAL) {
err = recv_rename_files();
}
mysql_mutex_unlock(&recv_sys.mutex);
recv_lsn_checks_on = true;
mysql_mutex_unlock(&recv_sys.mutex);
/* The database is now ready to start almost normal processing of user
transactions: transaction rollbacks and the application of the log
@ -4710,8 +4710,7 @@ err_exit:
err = DB_CORRUPTION;
}
log_sys.latch.wr_unlock();
return err;
goto func_exit;
}
bool recv_dblwr_t::validate_page(const page_id_t page_id,

View file

@ -1312,6 +1312,15 @@ inline void log_t::resize_write(lsn_t lsn, const byte *end, size_t len,
}
}
inline void log_t::append(byte *&d, const void *s, size_t size) noexcept
{
ut_ad(log_sys.latch_have_any());
ut_ad(d + size <= log_sys.buf +
(log_sys.is_pmem() ? log_sys.file_size : log_sys.buf_size));
memcpy(d, s, size);
d+= size;
}
template<bool spin,bool pmem>
std::pair<lsn_t,mtr_t::page_flush_ahead>
mtr_t::finish_writer(mtr_t *mtr, size_t len)

View file

@ -166,40 +166,6 @@ que_thr_init_command(
thr->state = QUE_THR_RUNNING;
}
/**********************************************************************//**
Round robin scheduler.
@return a query thread of the graph moved to QUE_THR_RUNNING state, or
NULL; the query thread should be executed by que_run_threads by the
caller */
que_thr_t*
que_fork_scheduler_round_robin(
/*===========================*/
que_fork_t* fork, /*!< in: a query fork */
que_thr_t* thr) /*!< in: current pos */
{
fork->trx->mutex_lock();
/* If no current, start first available. */
if (thr == NULL) {
thr = UT_LIST_GET_FIRST(fork->thrs);
} else {
thr = UT_LIST_GET_NEXT(thrs, thr);
}
if (thr) {
fork->state = QUE_FORK_ACTIVE;
fork->last_sel_node = NULL;
ut_ad(thr->state == QUE_THR_COMPLETED);
que_thr_init_command(thr);
}
fork->trx->mutex_unlock();
return(thr);
}
/**********************************************************************//**
Starts execution of a command in a query fork. Picks a query thread which
is not in the QUE_THR_RUNNING state and moves it to that state. If none

View file

@ -160,7 +160,7 @@ may be pointing to garbage (an undo log record discarded by purge),
but it will never be dereferenced, because the purge view is older
than any active transaction.
For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
For details see: row_undo_mod_sec_is_unsafe() and row_purge_poss_sec()
*/

View file

@ -2773,10 +2773,16 @@ avoid_bulk:
ut_ad(index->table->skip_alter_undo);
ut_ad(!entry->is_metadata());
/* If foreign key exist and foreign key is enabled
then avoid using bulk insert for copy algorithm */
if (innodb_alter_copy_bulk
&& !index->table->is_temporary()
&& !index->table->versioned()
&& !index->table->has_spatial_index()) {
&& !index->table->has_spatial_index()
&& (!trx->check_foreigns
|| (index->table->foreign_set.empty()
&& index->table->referenced_set.empty()))) {
ut_ad(page_is_empty(block->page.frame));
/* This code path has been executed at the
start of the alter operation. Consecutive

View file

@ -3821,7 +3821,7 @@ UndorecApplier::get_old_rec(const dtuple_t &tuple, dict_index_t *index,
if (is_same(roll_ptr))
return version;
trx_undo_prev_version_build(version, index, *offsets, heap, &prev_version,
nullptr, nullptr, 0);
&mtr, 0, nullptr, nullptr);
version= prev_version;
}
while (version);
@ -3990,7 +3990,7 @@ void UndorecApplier::log_update(const dtuple_t &tuple,
copy_rec= rec_copy(mem_heap_alloc(
heap, rec_offs_size(offsets)), match_rec, offsets);
trx_undo_prev_version_build(match_rec, clust_index, offsets, heap,
&prev_version, nullptr, nullptr, 0);
&prev_version, &mtr, 0, nullptr, nullptr);
prev_offsets= rec_get_offsets(prev_version, clust_index, prev_offsets,
clust_index->n_core_fields,

View file

@ -267,6 +267,448 @@ row_purge_remove_clust_if_poss(
return(false);
}
/** Check a virtual column value index secondary virtual index matches
that of current cluster index record, which is recreated from information
stored in undo log
@param[in] rec record in the clustered index
@param[in] icentry the index entry built from a cluster row
@param[in] clust_index cluster index
@param[in] clust_offsets offsets on the cluster record
@param[in] index the secondary index
@param[in] ientry the secondary index entry
@param[in] roll_ptr the rollback pointer for the purging record
@param[in] trx_id trx id for the purging record
@param[in,out] mtr mini-transaction
@param[in,out] v_row dtuple holding the virtual rows (if needed)
@return true if matches, false otherwise */
static
bool
row_purge_vc_matches_cluster(
const rec_t* rec,
const dtuple_t* icentry,
dict_index_t* clust_index,
rec_offs* clust_offsets,
dict_index_t* index,
const dtuple_t* ientry,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
mtr_t* mtr,
dtuple_t** vrow)
{
const rec_t* version;
rec_t* prev_version;
mem_heap_t* heap2;
mem_heap_t* heap = NULL;
mem_heap_t* tuple_heap;
ulint num_v = dict_table_get_n_v_cols(index->table);
bool compare[REC_MAX_N_FIELDS];
ulint n_fields = dtuple_get_n_fields(ientry);
ulint n_non_v_col = 0;
ulint n_cmp_v_col = 0;
const dfield_t* field1;
dfield_t* field2;
ulint i;
/* First compare non-virtual columns (primary keys) */
ut_ad(index->n_fields == n_fields);
ut_ad(n_fields == dtuple_get_n_fields(icentry));
ut_ad(mtr->memo_contains_page_flagged(rec,
MTR_MEMO_PAGE_S_FIX
| MTR_MEMO_PAGE_X_FIX));
{
const dfield_t* a = ientry->fields;
const dfield_t* b = icentry->fields;
for (const dict_field_t *ifield = index->fields,
*const end = &index->fields[index->n_fields];
ifield != end; ifield++, a++, b++) {
if (!ifield->col->is_virtual()) {
if (cmp_dfield_dfield(a, b)) {
return false;
}
n_non_v_col++;
}
}
}
tuple_heap = mem_heap_create(1024);
ut_ad(n_fields > n_non_v_col);
*vrow = dtuple_create_with_vcol(tuple_heap, 0, num_v);
dtuple_init_v_fld(*vrow);
for (i = 0; i < num_v; i++) {
dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
= DATA_MISSING;
compare[i] = false;
}
version = rec;
while (n_cmp_v_col < n_fields - n_non_v_col) {
heap2 = heap;
heap = mem_heap_create(1024);
roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
version, clust_index, clust_offsets);
ut_ad(cur_roll_ptr != 0);
ut_ad(roll_ptr != 0);
trx_undo_prev_version_build(
version, clust_index, clust_offsets,
heap, &prev_version, mtr,
TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE,
nullptr, vrow);
if (heap2) {
mem_heap_free(heap2);
}
if (!prev_version) {
/* Versions end here */
goto func_exit;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
ulint entry_len = dict_index_get_n_fields(index);
for (i = 0; i < entry_len; i++) {
const dict_field_t* ind_field
= dict_index_get_nth_field(index, i);
const dict_col_t* col = ind_field->col;
field1 = dtuple_get_nth_field(ientry, i);
if (!col->is_virtual()) {
continue;
}
const dict_v_col_t* v_col
= reinterpret_cast<const dict_v_col_t*>(col);
field2
= dtuple_get_nth_v_field(*vrow, v_col->v_pos);
if ((dfield_get_type(field2)->mtype != DATA_MISSING)
&& (!compare[v_col->v_pos])) {
if (ind_field->prefix_len != 0
&& !dfield_is_null(field2)) {
field2->len = unsigned(
dtype_get_at_most_n_mbchars(
field2->type.prtype,
field2->type.mbminlen,
field2->type.mbmaxlen,
ind_field->prefix_len,
field2->len,
static_cast<char*>
(field2->data)));
}
/* The index field mismatch */
if (cmp_dfield_dfield(field2, field1)) {
mem_heap_free(tuple_heap);
mem_heap_free(heap);
return(false);
}
compare[v_col->v_pos] = true;
n_cmp_v_col++;
}
}
trx_id_t rec_trx_id = row_get_rec_trx_id(
prev_version, clust_index, clust_offsets);
if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
break;
}
version = prev_version;
}
func_exit:
if (n_cmp_v_col == 0) {
*vrow = NULL;
}
mem_heap_free(tuple_heap);
mem_heap_free(heap);
/* FIXME: In the case of n_cmp_v_col is not the same as
n_fields - n_non_v_col, callback is needed to compare the rest
columns. At the timebeing, we will need to return true */
return (true);
}
/** @return whether two data tuples are equal */
bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2)
{
ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(&tuple1));
ut_ad(dtuple_check_typed(&tuple2));
ut_ad(tuple1.n_fields == tuple2.n_fields);
for (ulint i= 0; i < tuple1.n_fields; i++)
if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i]))
return false;
return true;
}
/** Finds out if a version of the record, where the version >= the current
purge_sys.view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@param node purge node
@param index secondary index
@param ientry secondary index entry
@param mtr mini-transaction
@return whether ientry cannot be purged */
static bool row_purge_is_unsafe(const purge_node_t &node,
dict_index_t *index,
const dtuple_t *ientry, mtr_t *mtr)
{
const rec_t* rec = btr_pcur_get_rec(&node.pcur);
roll_ptr_t roll_ptr = node.roll_ptr;
trx_id_t trx_id = node.trx_id;
const rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index = node.pcur.index();
rec_offs* clust_offsets;
mem_heap_t* heap;
dtuple_t* row;
const dtuple_t* entry;
dtuple_t* vrow = NULL;
mem_heap_t* v_heap = NULL;
dtuple_t* cur_vrow = NULL;
ut_ad(index->table == clust_index->table);
heap = mem_heap_create(1024);
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
v_heap = mem_heap_create(100);
}
if (!rec_get_deleted_flag(rec, rec_offs_comp(clust_offsets))) {
row_ext_t* ext;
/* The top of the stack of versions is locked by the
mtr holding a latch on the page containing the
clustered index record. The bottom of the stack is
locked by the fact that the purge_sys.view must
'overtake' any read view of an active transaction.
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
rec, clust_offsets,
NULL, NULL, NULL, &ext, heap);
if (dict_index_has_virtual(index)) {
#ifdef DBUG_OFF
# define dbug_v_purge false
#else /* DBUG_OFF */
bool dbug_v_purge = false;
#endif /* DBUG_OFF */
DBUG_EXECUTE_IF(
"ib_purge_virtual_index_callback",
dbug_v_purge = true;);
roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
rec, clust_index, clust_offsets);
/* if the row is newly inserted, then the virtual
columns need to be computed */
if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
|| dbug_v_purge) {
if (!row_vers_build_clust_v_col(
row, clust_index, index, heap)) {
goto unsafe_to_purge;
}
entry = row_build_index_entry(
row, ext, index, heap);
if (entry && dtuple_coll_eq(*ientry, *entry)) {
goto unsafe_to_purge;
}
} else {
/* Build index entry out of row */
entry = row_build_index_entry(row, ext, index, heap);
/* entry could only be NULL if
the clustered index record is an uncommitted
inserted record whose BLOBs have not been
written yet. The secondary index record
can be safely removed, because it cannot
possibly refer to this incomplete
clustered index record. (Insert would
always first be completed for the
clustered index record, then proceed to
secondary indexes.) */
if (entry && row_purge_vc_matches_cluster(
rec, entry,
clust_index, clust_offsets,
index, ientry, roll_ptr,
trx_id, mtr, &vrow)) {
goto unsafe_to_purge;
}
}
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
clust_index
->n_core_fields,
ULINT_UNDEFINED, &heap);
} else {
entry = row_build_index_entry(
row, ext, index, heap);
/* If entry == NULL, the record contains unset BLOB
pointers. This must be a freshly inserted record. If
this is called from
row_purge_remove_sec_if_poss_low(), the thread will
hold latches on the clustered index and the secondary
index. Because the insert works in three steps:
(1) insert the record to clustered index
(2) store the BLOBs and update BLOB pointers
(3) insert records to secondary indexes
the purge thread can safely ignore freshly inserted
records and delete the secondary index record. The
thread that inserted the new record will be inserting
the secondary index records. */
/* NOTE that we cannot do the comparison as binary
fields because the row is maybe being modified so that
the clustered index record has already been updated to
a different binary value in a char field, but the
collation identifies the old and new value anyway! */
if (entry && dtuple_coll_eq(*ientry, *entry)) {
unsafe_to_purge:
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return true;
}
}
} else if (dict_index_has_virtual(index)) {
/* The current cluster index record could be
deleted, but the previous version of it might not. We will
need to get the virtual column data from undo record
associated with current cluster index */
cur_vrow = row_vers_build_cur_vrow(
rec, clust_index, &clust_offsets,
index, trx_id, roll_ptr, heap, v_heap, mtr);
}
version = rec;
for (;;) {
mem_heap_t* heap2 = heap;
heap = mem_heap_create(1024);
vrow = NULL;
trx_undo_prev_version_build(version,
clust_index, clust_offsets,
heap, &prev_version, mtr,
TRX_UNDO_CHECK_PURGE_PAGES,
nullptr,
dict_index_has_virtual(index)
? &vrow : nullptr);
mem_heap_free(heap2); /* free version and clust_offsets */
if (!prev_version) {
/* Versions end here */
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return false;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
if (vrow) {
if (dtuple_vcol_data_missing(*vrow, *index)) {
goto nochange_index;
}
/* Keep the virtual row info for the next
version, unless it is changed */
mem_heap_empty(v_heap);
cur_vrow = dtuple_copy(vrow, v_heap);
dtuple_dup_v_fld(cur_vrow, v_heap);
}
if (!cur_vrow) {
/* Nothing for this index has changed,
continue */
nochange_index:
version = prev_version;
continue;
}
}
if (!rec_get_deleted_flag(prev_version,
rec_offs_comp(clust_offsets))) {
row_ext_t* ext;
/* The stack of versions is locked by mtr.
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, clust_offsets,
NULL, NULL, NULL, &ext, heap);
if (dict_index_has_virtual(index)) {
ut_ad(cur_vrow);
ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
dtuple_copy_v_fields(row, cur_vrow);
}
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset
BLOB pointers. This must be a freshly
inserted record that we can safely ignore.
For the justification, see the comments after
the previous row_build_index_entry() call. */
/* NOTE that we cannot do the comparison as binary
fields because maybe the secondary index record has
already been updated to a different binary value in
a char field, but the collation identifies the old
and new value anyway! */
if (entry && dtuple_coll_eq(*ientry, *entry)) {
goto unsafe_to_purge;
}
}
version = prev_version;
}
}
/** Determines if it is possible to remove a secondary index entry.
Removal is possible if the secondary index entry does not refer to any
not delete marked version of a clustered index record where DB_TRX_ID
@ -280,67 +722,45 @@ would refer to.
However, in that case, the user transaction would also re-insert the
secondary index entry after purge has removed it and released the leaf
page latch.
@param[in,out] node row purge node
@param[in] index secondary index
@param[in] entry secondary index entry
@param[in,out] sec_pcur secondary index cursor or NULL
if it is called for purge buffering
operation.
@param[in,out] sec_mtr mini-transaction which holds
secondary index entry or NULL if it is
called for purge buffering operation.
@param[in] is_tree true=pessimistic purge,
false=optimistic (leaf-page only)
@return true if the secondary index record can be purged */
static
bool
row_purge_poss_sec(
purge_node_t* node,
dict_index_t* index,
const dtuple_t* entry,
btr_pcur_t* sec_pcur,
mtr_t* sec_mtr,
bool is_tree)
@param node row purge node
@param index secondary index
@param entry secondary index entry
@param mtr mini-transaction for looking up clustered index
@return whether the secondary index record can be purged */
static bool row_purge_poss_sec(purge_node_t *node, dict_index_t *index,
const dtuple_t *entry, mtr_t *mtr)
{
bool can_delete;
mtr_t mtr;
ut_ad(!index->is_clust());
const auto savepoint= mtr->get_savepoint();
bool can_delete= !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr);
ut_ad(!dict_index_is_clust(index));
if (!can_delete)
{
ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED);
can_delete= !row_purge_is_unsafe(*node, index, entry, mtr);
node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED;
node->pcur.latch_mode= BTR_NO_LATCHES;
}
mtr_start(&mtr);
can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
|| !row_vers_old_has_index_entry(true,
btr_pcur_get_rec(&node->pcur),
&mtr, index, entry,
node->roll_ptr, node->trx_id);
/* Persistent cursor is closed if reposition fails. */
if (node->found_clust) {
btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
} else {
mtr.commit();
}
ut_ad(mtr.has_committed());
return can_delete;
mtr->rollback_to_savepoint(savepoint);
return can_delete;
}
/***************************************************************
Removes a secondary index entry if possible, by modifying the
index tree. Does not try to buffer the delete.
@return TRUE if success or if not found */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
ibool
row_purge_remove_sec_if_poss_tree(
/*==============================*/
purge_node_t* node, /*!< in: row purge node */
dict_index_t* index, /*!< in: index */
const dtuple_t* entry) /*!< in: index entry */
__attribute__((nonnull, warn_unused_result))
/** Remove a secondary index entry if possible, by modifying the index tree.
@param node purge node
@param index secondary index
@param entry index entry
@param page_max_trx_id the PAGE_MAX_TRX_ID
when row_purge_remove_sec_if_poss_leaf() was invoked
@return whether the operation succeeded */
static bool row_purge_remove_sec_if_poss_tree(purge_node_t *node,
dict_index_t *index,
const dtuple_t *entry,
trx_id_t page_max_trx_id)
{
btr_pcur_t pcur;
ibool success = TRUE;
bool success = true;
dberr_t err;
mtr_t mtr;
@ -371,7 +791,9 @@ row_purge_remove_sec_if_poss_tree(
which cannot be purged yet, requires its existence. If some requires,
we should do nothing. */
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, true)) {
if (page_max_trx_id
== page_get_max_trx_id(btr_cur_get_page(&pcur.btr_cur))
|| row_purge_poss_sec(node, index, entry, &mtr)) {
/* Remove the index record, which should have been
marked for deletion. */
@ -410,26 +832,23 @@ row_purge_remove_sec_if_poss_tree(
func_exit:
btr_pcur_close(&pcur); // FIXME: need this?
mtr.commit();
return(success);
return success;
}
/***************************************************************
Removes a secondary index entry without modifying the index tree,
if possible.
@retval true if success or if not found
@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
static MY_ATTRIBUTE((nonnull, warn_unused_result))
bool
row_purge_remove_sec_if_poss_leaf(
/*==============================*/
purge_node_t* node, /*!< in: row purge node */
dict_index_t* index, /*!< in: index */
const dtuple_t* entry) /*!< in: index entry */
__attribute__((nonnull, warn_unused_result))
/** Remove a secondary index entry if possible, without modifying the tree.
@param node purge node
@param index secondary index
@param entry index entry
@return PAGE_MAX_TRX_ID for row_purge_remove_sec_if_poss_tree()
@retval 0 if success or if not found */
static trx_id_t row_purge_remove_sec_if_poss_leaf(purge_node_t *node,
dict_index_t *index,
const dtuple_t *entry)
{
mtr_t mtr;
btr_pcur_t pcur;
bool success = true;
trx_id_t page_max_trx_id = 0;
log_free_check();
ut_ad(index->table == node->table);
@ -453,7 +872,7 @@ row_purge_remove_sec_if_poss_leaf(
found:
/* Before attempting to purge a record, check
if it is safe to do so. */
if (row_purge_poss_sec(node, index, entry, &pcur, &mtr, false)) {
if (row_purge_poss_sec(node, index, entry, &mtr)) {
btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
/* Only delete-marked records should be purged. */
@ -494,8 +913,11 @@ found:
}
}
success = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
!= DB_FAIL;
if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)
== DB_FAIL) {
page_max_trx_id = page_get_max_trx_id(
btr_cur_get_page(btr_cur));
}
}
}
@ -503,7 +925,7 @@ func_exit:
mtr.commit();
cleanup:
btr_pcur_close(&pcur);
return success;
return page_max_trx_id;
}
/***********************************************************//**
@ -516,38 +938,21 @@ row_purge_remove_sec_if_poss(
dict_index_t* index, /*!< in: index */
const dtuple_t* entry) /*!< in: index entry */
{
ibool success;
ulint n_tries = 0;
if (UNIV_UNLIKELY(!entry))
/* The node->row must have lacked some fields of this index. This
is possible when the undo log record was written before this index
was created. */
return;
/* fputs("Purge: Removing secondary record\n", stderr); */
if (!entry) {
/* The node->row must have lacked some fields of this
index. This is possible when the undo log record was
written before this index was created. */
return;
}
if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
return;
}
retry:
success = row_purge_remove_sec_if_poss_tree(node, index, entry);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
n_tries++;
std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME);
goto retry;
}
ut_a(success);
if (trx_id_t page_max_trx_id=
row_purge_remove_sec_if_poss_leaf(node, index, entry))
for (auto n_tries= BTR_CUR_RETRY_DELETE_N_TIMES;
!row_purge_remove_sec_if_poss_tree(node, index, entry,
page_max_trx_id);
std::this_thread::sleep_for(BTR_CUR_RETRY_SLEEP_TIME))
/* The delete operation may fail if we have little
file space left (if innodb_file_per_table=0?) */
ut_a(--n_tries);
}
/***********************************************************//**

View file

@ -6612,7 +6612,7 @@ rec_loop:
err= trx_undo_prev_version_build(clust_rec,
clust_index, clust_offsets,
vers_heap, &old_vers,
nullptr, nullptr, 0);
&mtr, 0, nullptr, nullptr);
if (prev_heap)
mem_heap_free(prev_heap);
if (err != DB_SUCCESS)

View file

@ -469,6 +469,146 @@ func_exit:
return(err);
}
bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2);
/** Find out if an accessible version of a clustered index record
corresponds to a secondary index entry.
@param rec record in a latched clustered index page
@param index secondary index
@param ientry secondary index entry
@param mtr mini-transaction
@return whether an accessible non-dete-marked version of rec
corresponds to ientry */
static bool row_undo_mod_sec_is_unsafe(const rec_t *rec, dict_index_t *index,
const dtuple_t *ientry, mtr_t *mtr)
{
const rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index;
rec_offs* clust_offsets;
mem_heap_t* heap;
mem_heap_t* heap2;
dtuple_t* row;
const dtuple_t* entry;
ulint comp;
dtuple_t* vrow = NULL;
mem_heap_t* v_heap = NULL;
dtuple_t* cur_vrow = NULL;
clust_index = dict_table_get_first_index(index->table);
comp = page_rec_is_comp(rec);
ut_ad(!dict_table_is_comp(index->table) == !comp);
heap = mem_heap_create(1024);
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
v_heap = mem_heap_create(100);
/* The current cluster index record could be
deleted, but the previous version of it might not. We will
need to get the virtual column data from undo record
associated with current cluster index */
cur_vrow = row_vers_build_cur_vrow(
rec, clust_index, &clust_offsets,
index, 0, 0, heap, v_heap, mtr);
}
version = rec;
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
vrow = NULL;
trx_undo_prev_version_build(version,
clust_index, clust_offsets,
heap, &prev_version,
mtr, TRX_UNDO_CHECK_PURGEABILITY,
nullptr,
dict_index_has_virtual(index)
? &vrow : nullptr);
mem_heap_free(heap2); /* free version and clust_offsets */
if (!prev_version) {
break;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
if (vrow) {
if (dtuple_vcol_data_missing(*vrow, *index)) {
goto nochange_index;
}
/* Keep the virtual row info for the next
version, unless it is changed */
mem_heap_empty(v_heap);
cur_vrow = dtuple_copy(vrow, v_heap);
dtuple_dup_v_fld(cur_vrow, v_heap);
}
if (!cur_vrow) {
/* Nothing for this index has changed,
continue */
nochange_index:
version = prev_version;
continue;
}
}
if (!rec_get_deleted_flag(prev_version, comp)) {
row_ext_t* ext;
/* The stack of versions is locked by mtr.
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, clust_offsets,
NULL, NULL, NULL, &ext, heap);
if (dict_index_has_virtual(index)) {
ut_ad(cur_vrow);
ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
dtuple_copy_v_fields(row, cur_vrow);
}
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset
BLOB pointers. This must be a freshly
inserted record that we can safely ignore.
For the justification, see the comments after
the previous row_build_index_entry() call. */
/* NOTE that we cannot do the comparison as binary
fields because maybe the secondary index record has
already been updated to a different binary value in
a char field, but the collation identifies the old
and new value anyway! */
if (entry && dtuple_coll_eq(*ientry, *entry)) {
break;
}
}
version = prev_version;
}
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return !!prev_version;
}
/***********************************************************//**
Delete marks or removes a secondary index entry if found.
@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
@ -487,7 +627,6 @@ row_undo_mod_del_mark_or_remove_sec_low(
btr_cur_t* btr_cur;
dberr_t err = DB_SUCCESS;
mtr_t mtr;
mtr_t mtr_vers;
const bool modify_leaf = mode == BTR_MODIFY_LEAF;
row_mtr_start(&mtr, index);
@ -543,17 +682,14 @@ found:
which cannot be purged yet, requires its existence. If some requires,
we should delete mark the record. */
mtr_vers.start();
ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr_vers) ==
btr_pcur_t::SAME_ALL);
ut_a(node->pcur.restore_position(BTR_SEARCH_LEAF, &mtr) ==
btr_pcur_t::SAME_ALL);
/* For temporary table, we can skip to check older version of
clustered index entry, because there is no MVCC or purge. */
if (node->table->is_temporary()
|| row_vers_old_has_index_entry(
false, btr_pcur_get_rec(&node->pcur),
&mtr_vers, index, entry, 0, 0)) {
|| row_undo_mod_sec_is_unsafe(
btr_pcur_get_rec(&node->pcur), index, entry, &mtr)) {
btr_rec_set_deleted<true>(btr_cur_get_block(btr_cur),
btr_cur_get_rec(btr_cur), &mtr);
} else {
@ -587,7 +723,9 @@ found:
}
}
btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
ut_ad(node->pcur.pos_state == BTR_PCUR_IS_POSITIONED);
node->pcur.pos_state = BTR_PCUR_WAS_POSITIONED;
node->pcur.latch_mode = BTR_NO_LATCHES;
func_exit:
btr_pcur_close(&pcur);

View file

@ -702,7 +702,7 @@ fetch; output: fetched length of the prefix
@param[in,out] heap heap where to allocate
@return BLOB prefix
@retval NULL if the record is incomplete (should only happen
in row_vers_vc_matches_cluster() executed concurrently with another purge) */
in row_purge_vc_matches_cluster() executed concurrently with another purge) */
static
byte*
row_upd_ext_fetch(

View file

@ -194,8 +194,8 @@ row_vers_impl_x_locked_low(
trx_undo_prev_version_build(
version, clust_index, clust_offsets,
heap, &prev_version, NULL,
dict_index_has_virtual(index) ? &vrow : NULL, 0);
heap, &prev_version, mtr, 0, NULL,
dict_index_has_virtual(index) ? &vrow : NULL);
ut_d(trx->mutex_lock());
const bool committed = trx_state_eq(
@ -446,7 +446,6 @@ row_vers_impl_x_locked(
@param[in] clust_index clustered index
@param[in] index the secondary index
@param[in] heap heap used to build virtual dtuple. */
static
bool
row_vers_build_clust_v_col(
dtuple_t* row,
@ -490,26 +489,25 @@ row_vers_build_clust_v_col(
}
/** Build latest virtual column data from undo log
@param[in] in_purge whether this is the purge thread
@param[in] rec clustered index record
@param[in] clust_index clustered index
@param[in,out] clust_offsets offsets on the clustered index record
@param[in] index the secondary index
@param[in] trx_id transaction ID on the purging record,
or 0 if called outside purge
@param[in] roll_ptr the rollback pointer for the purging record
@param[in] trx_id trx id for the purging record
@param[in,out] v_heap heap used to build vrow
@param[out] v_row dtuple holding the virtual rows
@param[in,out] mtr mtr holding the latch on rec */
static
void
row_vers_build_cur_vrow_low(
bool in_purge,
const rec_t* rec,
dict_index_t* clust_index,
rec_offs* clust_offsets,
dict_index_t* index,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
roll_ptr_t roll_ptr,
mem_heap_t* v_heap,
dtuple_t** vrow,
mtr_t* mtr)
@ -539,7 +537,7 @@ row_vers_build_cur_vrow_low(
/* If this is called by purge thread, set TRX_UNDO_PREV_IN_PURGE
bit to search the undo log until we hit the current undo log with
roll_ptr */
const ulint status = in_purge
const ulint status = trx_id
? TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE
: TRX_UNDO_GET_OLD_V_VALUE;
@ -551,7 +549,7 @@ row_vers_build_cur_vrow_low(
trx_undo_prev_version_build(
version, clust_index, clust_offsets,
heap, &prev_version, NULL, vrow, status);
heap, &prev_version, mtr, status, nullptr, vrow);
if (heap2) {
mem_heap_free(heap2);
@ -603,212 +601,27 @@ row_vers_build_cur_vrow_low(
mem_heap_free(heap);
}
/** Check a virtual column value index secondary virtual index matches
that of current cluster index record, which is recreated from information
stored in undo log
@param[in] rec record in the clustered index
@param[in] icentry the index entry built from a cluster row
@param[in] clust_index cluster index
@param[in] clust_offsets offsets on the cluster record
@param[in] index the secondary index
@param[in] ientry the secondary index entry
@param[in] roll_ptr the rollback pointer for the purging record
@param[in] trx_id trx id for the purging record
@param[in,out] v_heap heap used to build virtual dtuple
@param[in,out] v_row dtuple holding the virtual rows (if needed)
@param[in] mtr mtr holding the latch on rec
@return true if matches, false otherwise */
static
bool
row_vers_vc_matches_cluster(
const rec_t* rec,
const dtuple_t* icentry,
dict_index_t* clust_index,
rec_offs* clust_offsets,
dict_index_t* index,
const dtuple_t* ientry,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
mem_heap_t* v_heap,
dtuple_t** vrow,
mtr_t* mtr)
{
const rec_t* version;
rec_t* prev_version;
mem_heap_t* heap2;
mem_heap_t* heap = NULL;
mem_heap_t* tuple_heap;
ulint num_v = dict_table_get_n_v_cols(index->table);
bool compare[REC_MAX_N_FIELDS];
ulint n_fields = dtuple_get_n_fields(ientry);
ulint n_non_v_col = 0;
ulint n_cmp_v_col = 0;
const dfield_t* field1;
dfield_t* field2;
ulint i;
/* First compare non-virtual columns (primary keys) */
ut_ad(index->n_fields == n_fields);
ut_ad(n_fields == dtuple_get_n_fields(icentry));
ut_ad(mtr->memo_contains_page_flagged(rec,
MTR_MEMO_PAGE_S_FIX
| MTR_MEMO_PAGE_X_FIX));
{
const dfield_t* a = ientry->fields;
const dfield_t* b = icentry->fields;
for (const dict_field_t *ifield = index->fields,
*const end = &index->fields[index->n_fields];
ifield != end; ifield++, a++, b++) {
if (!ifield->col->is_virtual()) {
if (cmp_dfield_dfield(a, b)) {
return false;
}
n_non_v_col++;
}
}
}
tuple_heap = mem_heap_create(1024);
ut_ad(n_fields > n_non_v_col);
*vrow = dtuple_create_with_vcol(v_heap ? v_heap : tuple_heap, 0, num_v);
dtuple_init_v_fld(*vrow);
for (i = 0; i < num_v; i++) {
dfield_get_type(dtuple_get_nth_v_field(*vrow, i))->mtype
= DATA_MISSING;
compare[i] = false;
}
version = rec;
while (n_cmp_v_col < n_fields - n_non_v_col) {
heap2 = heap;
heap = mem_heap_create(1024);
roll_ptr_t cur_roll_ptr = row_get_rec_roll_ptr(
version, clust_index, clust_offsets);
ut_ad(cur_roll_ptr != 0);
ut_ad(roll_ptr != 0);
trx_undo_prev_version_build(
version, clust_index, clust_offsets,
heap, &prev_version, NULL, vrow,
TRX_UNDO_PREV_IN_PURGE | TRX_UNDO_GET_OLD_V_VALUE);
if (heap2) {
mem_heap_free(heap2);
}
if (!prev_version) {
/* Versions end here */
goto func_exit;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
ulint entry_len = dict_index_get_n_fields(index);
for (i = 0; i < entry_len; i++) {
const dict_field_t* ind_field
= dict_index_get_nth_field(index, i);
const dict_col_t* col = ind_field->col;
field1 = dtuple_get_nth_field(ientry, i);
if (!col->is_virtual()) {
continue;
}
const dict_v_col_t* v_col
= reinterpret_cast<const dict_v_col_t*>(col);
field2
= dtuple_get_nth_v_field(*vrow, v_col->v_pos);
if ((dfield_get_type(field2)->mtype != DATA_MISSING)
&& (!compare[v_col->v_pos])) {
if (ind_field->prefix_len != 0
&& !dfield_is_null(field2)) {
field2->len = unsigned(
dtype_get_at_most_n_mbchars(
field2->type.prtype,
field2->type.mbminlen,
field2->type.mbmaxlen,
ind_field->prefix_len,
field2->len,
static_cast<char*>
(field2->data)));
}
/* The index field mismatch */
if (v_heap
|| cmp_dfield_dfield(field2, field1)) {
if (v_heap) {
dtuple_dup_v_fld(*vrow, v_heap);
}
mem_heap_free(tuple_heap);
mem_heap_free(heap);
return(false);
}
compare[v_col->v_pos] = true;
n_cmp_v_col++;
}
}
trx_id_t rec_trx_id = row_get_rec_trx_id(
prev_version, clust_index, clust_offsets);
if (rec_trx_id < trx_id || roll_ptr == cur_roll_ptr) {
break;
}
version = prev_version;
}
func_exit:
if (n_cmp_v_col == 0) {
*vrow = NULL;
}
mem_heap_free(tuple_heap);
mem_heap_free(heap);
/* FIXME: In the case of n_cmp_v_col is not the same as
n_fields - n_non_v_col, callback is needed to compare the rest
columns. At the timebeing, we will need to return true */
return (true);
}
/** Build a dtuple contains virtual column data for current cluster index
@param[in] in_purge called by purge thread
@param[in] rec cluster index rec
@param[in] clust_index cluster index
@param[in] clust_offsets cluster rec offset
@param[in] index secondary index
@param[in] trx_id transaction ID on the purging record,
or 0 if called outside purge
@param[in] roll_ptr roll_ptr for the purge record
@param[in] trx_id transaction ID on the purging record
@param[in,out] heap heap memory
@param[in,out] v_heap heap memory to keep virtual colum dtuple
@param[in] mtr mtr holding the latch on rec
@param[in,out] v_heap heap memory to keep virtual column tuple
@param[in,out] mtr mini-transaction
@return dtuple contains virtual column data */
static
dtuple_t*
row_vers_build_cur_vrow(
bool in_purge,
const rec_t* rec,
dict_index_t* clust_index,
rec_offs** clust_offsets,
dict_index_t* index,
roll_ptr_t roll_ptr,
trx_id_t trx_id,
roll_ptr_t roll_ptr,
mem_heap_t* heap,
mem_heap_t* v_heap,
mtr_t* mtr)
@ -841,8 +654,8 @@ row_vers_build_cur_vrow(
} else {
/* Try to fetch virtual column data from undo log */
row_vers_build_cur_vrow_low(
in_purge, rec, clust_index, *clust_offsets,
index, roll_ptr, trx_id, v_heap, &cur_vrow, mtr);
rec, clust_index, *clust_offsets,
index, trx_id, roll_ptr, v_heap, &cur_vrow, mtr);
}
*clust_offsets = rec_get_offsets(rec, clust_index, NULL,
@ -851,312 +664,28 @@ row_vers_build_cur_vrow(
return(cur_vrow);
}
/** @return whether two data tuples are equal */
static bool dtuple_coll_eq(const dtuple_t &tuple1, const dtuple_t &tuple2)
{
ut_ad(tuple1.magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(tuple2.magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(&tuple1));
ut_ad(dtuple_check_typed(&tuple2));
ut_ad(tuple1.n_fields == tuple2.n_fields);
for (ulint i= 0; i < tuple1.n_fields; i++)
if (cmp_dfield_dfield(&tuple1.fields[i], &tuple2.fields[i]))
return false;
return true;
}
/** Find out whether data tuple has missing data type
for indexed virtual column.
@param tuple data tuple
@param index virtual index
@return true if tuple has missing column type */
static bool dtuple_vcol_data_missing(const dtuple_t &tuple,
dict_index_t *index)
bool dtuple_vcol_data_missing(const dtuple_t &tuple,
const dict_index_t &index)
{
for (ulint i= 0; i < index->n_uniq; i++)
for (ulint i= 0; i < index.n_uniq; i++)
{
dict_col_t *col= index->fields[i].col;
dict_col_t *col= index.fields[i].col;
if (!col->is_virtual())
continue;
dict_v_col_t *vcol= reinterpret_cast<dict_v_col_t*>(col);
for (ulint j= 0; j < index->table->n_v_cols; j++)
{
if (vcol == &index->table->v_cols[j]
&& tuple.v_fields[j].type.mtype == DATA_MISSING)
for (ulint j= 0; j < index.table->n_v_cols; j++)
if (vcol == &index.table->v_cols[j] &&
tuple.v_fields[j].type.mtype == DATA_MISSING)
return true;
}
}
return false;
}
/** Finds out if a version of the record, where the version >= the current
purge_sys.view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
id >= purge view, and the secondary index entry == ientry; exactly in
this case we return TRUE.
@param[in] also_curr TRUE if also rec is included in the versions
to search; otherwise only versions prior
to it are searched
@param[in] rec record in the clustered index; the caller
must have a latch on the page
@param[in] mtr mtr holding the latch on rec; it will
also hold the latch on purge_view
@param[in] index secondary index
@param[in] ientry secondary index entry
@param[in] roll_ptr roll_ptr for the purge record
@param[in] trx_id transaction ID on the purging record
@return TRUE if earlier version should have */
bool
row_vers_old_has_index_entry(
bool also_curr,
const rec_t* rec,
mtr_t* mtr,
dict_index_t* index,
const dtuple_t* ientry,
roll_ptr_t roll_ptr,
trx_id_t trx_id)
{
const rec_t* version;
rec_t* prev_version;
dict_index_t* clust_index;
rec_offs* clust_offsets;
mem_heap_t* heap;
mem_heap_t* heap2;
dtuple_t* row;
const dtuple_t* entry;
ulint comp;
dtuple_t* vrow = NULL;
mem_heap_t* v_heap = NULL;
dtuple_t* cur_vrow = NULL;
ut_ad(mtr->memo_contains_page_flagged(rec, MTR_MEMO_PAGE_X_FIX
| MTR_MEMO_PAGE_S_FIX));
clust_index = dict_table_get_first_index(index->table);
comp = page_rec_is_comp(rec);
ut_ad(!dict_table_is_comp(index->table) == !comp);
heap = mem_heap_create(1024);
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
v_heap = mem_heap_create(100);
}
DBUG_EXECUTE_IF("ib_purge_virtual_index_crash",
DBUG_SUICIDE(););
if (also_curr && !rec_get_deleted_flag(rec, comp)) {
row_ext_t* ext;
/* The top of the stack of versions is locked by the
mtr holding a latch on the page containing the
clustered index record. The bottom of the stack is
locked by the fact that the purge_sys.view must
'overtake' any read view of an active transaction.
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
rec, clust_offsets,
NULL, NULL, NULL, &ext, heap);
if (dict_index_has_virtual(index)) {
#ifdef DBUG_OFF
# define dbug_v_purge false
#else /* DBUG_OFF */
bool dbug_v_purge = false;
#endif /* DBUG_OFF */
DBUG_EXECUTE_IF(
"ib_purge_virtual_index_callback",
dbug_v_purge = true;);
roll_ptr_t t_roll_ptr = row_get_rec_roll_ptr(
rec, clust_index, clust_offsets);
/* if the row is newly inserted, then the virtual
columns need to be computed */
if (trx_undo_roll_ptr_is_insert(t_roll_ptr)
|| dbug_v_purge) {
if (!row_vers_build_clust_v_col(
row, clust_index, index, heap)) {
goto unsafe_to_purge;
}
entry = row_build_index_entry(
row, ext, index, heap);
if (entry && dtuple_coll_eq(*ientry, *entry)) {
goto unsafe_to_purge;
}
} else {
/* Build index entry out of row */
entry = row_build_index_entry(row, ext, index, heap);
/* entry could only be NULL if
the clustered index record is an uncommitted
inserted record whose BLOBs have not been
written yet. The secondary index record
can be safely removed, because it cannot
possibly refer to this incomplete
clustered index record. (Insert would
always first be completed for the
clustered index record, then proceed to
secondary indexes.) */
if (entry && row_vers_vc_matches_cluster(
rec, entry,
clust_index, clust_offsets,
index, ientry, roll_ptr,
trx_id, NULL, &vrow, mtr)) {
goto unsafe_to_purge;
}
}
clust_offsets = rec_get_offsets(rec, clust_index, NULL,
clust_index
->n_core_fields,
ULINT_UNDEFINED, &heap);
} else {
entry = row_build_index_entry(
row, ext, index, heap);
/* If entry == NULL, the record contains unset BLOB
pointers. This must be a freshly inserted record. If
this is called from
row_purge_remove_sec_if_poss_low(), the thread will
hold latches on the clustered index and the secondary
index. Because the insert works in three steps:
(1) insert the record to clustered index
(2) store the BLOBs and update BLOB pointers
(3) insert records to secondary indexes
the purge thread can safely ignore freshly inserted
records and delete the secondary index record. The
thread that inserted the new record will be inserting
the secondary index records. */
/* NOTE that we cannot do the comparison as binary
fields because the row is maybe being modified so that
the clustered index record has already been updated to
a different binary value in a char field, but the
collation identifies the old and new value anyway! */
if (entry && dtuple_coll_eq(*ientry, *entry)) {
unsafe_to_purge:
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return true;
}
}
} else if (dict_index_has_virtual(index)) {
/* The current cluster index record could be
deleted, but the previous version of it might not. We will
need to get the virtual column data from undo record
associated with current cluster index */
cur_vrow = row_vers_build_cur_vrow(
also_curr, rec, clust_index, &clust_offsets,
index, roll_ptr, trx_id, heap, v_heap, mtr);
}
version = rec;
for (;;) {
heap2 = heap;
heap = mem_heap_create(1024);
vrow = NULL;
trx_undo_prev_version_build(version,
clust_index, clust_offsets,
heap, &prev_version, nullptr,
dict_index_has_virtual(index)
? &vrow : nullptr,
TRX_UNDO_CHECK_PURGEABILITY);
mem_heap_free(heap2); /* free version and clust_offsets */
if (!prev_version) {
/* Versions end here */
mem_heap_free(heap);
if (v_heap) {
mem_heap_free(v_heap);
}
return false;
}
clust_offsets = rec_get_offsets(prev_version, clust_index,
NULL,
clust_index->n_core_fields,
ULINT_UNDEFINED, &heap);
if (dict_index_has_virtual(index)) {
if (vrow) {
if (dtuple_vcol_data_missing(*vrow, index)) {
goto nochange_index;
}
/* Keep the virtual row info for the next
version, unless it is changed */
mem_heap_empty(v_heap);
cur_vrow = dtuple_copy(vrow, v_heap);
dtuple_dup_v_fld(cur_vrow, v_heap);
}
if (!cur_vrow) {
/* Nothing for this index has changed,
continue */
nochange_index:
version = prev_version;
continue;
}
}
if (!rec_get_deleted_flag(prev_version, comp)) {
row_ext_t* ext;
/* The stack of versions is locked by mtr.
Thus, it is safe to fetch the prefixes for
externally stored columns. */
row = row_build(ROW_COPY_POINTERS, clust_index,
prev_version, clust_offsets,
NULL, NULL, NULL, &ext, heap);
if (dict_index_has_virtual(index)) {
ut_ad(cur_vrow);
ut_ad(row->n_v_fields == cur_vrow->n_v_fields);
dtuple_copy_v_fields(row, cur_vrow);
}
entry = row_build_index_entry(row, ext, index, heap);
/* If entry == NULL, the record contains unset
BLOB pointers. This must be a freshly
inserted record that we can safely ignore.
For the justification, see the comments after
the previous row_build_index_entry() call. */
/* NOTE that we cannot do the comparison as binary
fields because maybe the secondary index record has
already been updated to a different binary value in
a char field, but the collation identifies the old
and new value anyway! */
if (entry && dtuple_coll_eq(*ientry, *entry)) {
goto unsafe_to_purge;
}
}
version = prev_version;
}
}
/*****************************************************************//**
Constructs the version of a clustered index record which a consistent
read should see. We assume that the trx id stored in rec is such that
@ -1223,7 +752,7 @@ row_vers_build_for_consistent_read(
err = trx_undo_prev_version_build(
version, index, *offsets, heap,
&prev_version, NULL, vrow, 0);
&prev_version, mtr, 0, NULL, vrow);
if (prev_heap != NULL) {
mem_heap_free(prev_heap);
@ -1385,8 +914,8 @@ committed_version_trx:
heap = mem_heap_create(1024);
if (trx_undo_prev_version_build(version, index, *offsets, heap,
&prev_version, in_heap, vrow,
0) != DB_SUCCESS) {
&prev_version, mtr, 0,
in_heap, vrow) != DB_SUCCESS) {
mem_heap_free(heap);
heap = heap2;
heap2 = NULL;

View file

@ -1138,10 +1138,9 @@ bool purge_sys_t::running()
void purge_sys_t::stop_FTS()
{
latch.rd_lock(SRW_LOCK_CALL);
m_FTS_paused++;
latch.rd_unlock();
while (m_active)
ut_d(const auto paused=) m_FTS_paused.fetch_add(1);
ut_ad((paused + 1) & ~PAUSED_SYS);
while (m_active.load(std::memory_order_acquire))
std::this_thread::sleep_for(std::chrono::seconds(1));
}
@ -1175,8 +1174,8 @@ void purge_sys_t::stop()
/** Resume purge in data dictionary tables */
void purge_sys_t::resume_SYS(void *)
{
ut_d(auto paused=) purge_sys.m_SYS_paused--;
ut_ad(paused);
ut_d(auto paused=) purge_sys.m_FTS_paused.fetch_sub(PAUSED_SYS);
ut_ad(paused >= PAUSED_SYS);
}
/** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
@ -1346,7 +1345,6 @@ static bool srv_purge_should_exit(size_t old_history_size)
/*********************************************************************//**
Fetch and execute a task from the work queue.
@param [in,out] slot purge worker thread slot
@return true if a task was executed */
static bool srv_task_execute()
{
@ -1487,6 +1485,13 @@ static void release_thd(THD *thd, void *ctx)
set_current_thd(0);
}
void srv_purge_worker_task_low()
{
ut_ad(current_thd);
while (srv_task_execute())
ut_ad(purge_sys.running());
}
static void purge_worker_callback(void*)
{
ut_ad(!current_thd);
@ -1494,8 +1499,7 @@ static void purge_worker_callback(void*)
ut_ad(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
void *ctx;
THD *thd= acquire_thd(&ctx);
while (srv_task_execute())
ut_ad(purge_sys.running());
srv_purge_worker_task_low();
release_thd(thd,ctx);
}

View file

@ -774,26 +774,18 @@ not_free:
buf_block_t *purge_sys_t::get_page(page_id_t id)
{
ut_ad(!recv_sys.recovery_on);
buf_block_t*& undo_page= pages[id];
if (undo_page)
return undo_page;
mtr_t mtr;
mtr.start();
undo_page=
buf_page_get_gen(id, 0, RW_S_LATCH, nullptr, BUF_GET_POSSIBLY_FREED, &mtr);
if (UNIV_LIKELY(undo_page != nullptr))
if (!undo_page)
{
undo_page->fix();
mtr.commit();
return undo_page;
undo_page= buf_pool.page_fix(id); // batch_cleanup() will unfix()
if (!undo_page)
pages.erase(id);
}
mtr.commit();
pages.erase(id);
return nullptr;
return undo_page;
}
bool purge_sys_t::rseg_get_next_history_log()
@ -1062,15 +1054,8 @@ static void trx_purge_close_tables(purge_node_t *node, THD *thd)
void purge_sys_t::wait_FTS(bool also_sys)
{
bool paused;
do
{
latch.wr_lock(SRW_LOCK_CALL);
paused= m_FTS_paused || (also_sys && m_SYS_paused);
latch.wr_unlock();
for (const uint32_t mask= also_sys ? ~0U : ~PAUSED_SYS; m_FTS_paused & mask;)
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
while (paused);
}
__attribute__((nonnull))
@ -1211,123 +1196,108 @@ dict_table_t *purge_sys_t::close_and_reopen(table_id_t id, THD *thd,
/** Run a purge batch.
@param n_purge_threads number of purge threads
@param thd purge coordinator thread handle
@param n_work_items number of work items (currently tables) to process
@return new purge_sys.head */
static purge_sys_t::iterator
trx_purge_attach_undo_recs(ulint n_purge_threads, THD *thd)
static purge_sys_t::iterator trx_purge_attach_undo_recs(THD *thd,
ulint *n_work_items)
{
que_thr_t* thr;
ulint i;
que_thr_t *thr;
purge_sys_t::iterator head= purge_sys.tail;
ut_a(n_purge_threads > 0);
ut_a(UT_LIST_GET_LEN(purge_sys.query->thrs) >= n_purge_threads);
/* Fetch and parse the UNDO records. The UNDO records are added
to a per purge node vector. */
thr= nullptr;
purge_sys_t::iterator head = purge_sys.tail;
std::unordered_map<table_id_t, purge_node_t *>
table_id_map(TRX_PURGE_TABLE_BUCKETS);
purge_sys.m_active= true;
MDL_context *const mdl_context=
static_cast<MDL_context*>(thd_mdl_context(thd));
ut_ad(mdl_context);
const size_t max_pages=
std::min(buf_pool.curr_size * 3 / 4, size_t{srv_purge_batch_size});
while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown)
{
/* Track the max {trx_id, undo_no} for truncating the
UNDO logs once we have purged the records. */
if (head <= purge_sys.tail)
head= purge_sys.tail;
/* Fetch the next record, and advance the purge_sys.tail. */
trx_purge_rec_t purge_rec= purge_sys.fetch_next_rec();
if (!purge_rec.undo_rec)
{
if (!purge_rec.roll_ptr)
break;
ut_ad(purge_rec.roll_ptr == 1);
continue;
}
table_id_t table_id= trx_undo_rec_get_table_id(purge_rec.undo_rec);
purge_node_t *&table_node= table_id_map[table_id];
if (table_node)
ut_ad(!table_node->in_progress);
if (!table_node)
{
std::pair<dict_table_t *, MDL_ticket *> p;
p.first= trx_purge_table_open(table_id, mdl_context, &p.second);
if (p.first == reinterpret_cast<dict_table_t *>(-1))
p.first= purge_sys.close_and_reopen(table_id, thd, &p.second);
if (!thr || !(thr= UT_LIST_GET_NEXT(thrs, thr)))
thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
++*n_work_items;
table_node= static_cast<purge_node_t *>(thr->child);
ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE);
ut_d(auto pair=) table_node->tables.emplace(table_id, p);
ut_ad(pair.second);
if (p.first)
goto enqueue;
}
else if (table_node->tables[table_id].first)
{
enqueue:
table_node->undo_recs.push(purge_rec);
ut_ad(!table_node->in_progress);
}
if (purge_sys.n_pages_handled() >= max_pages)
break;
}
purge_sys.m_active= false;
#ifdef UNIV_DEBUG
i = 0;
/* Debug code to validate some pre-requisites and reset done flag. */
for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
thr != NULL && i < n_purge_threads;
thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
for (ulint i= 0; thr && i < *n_work_items;
i++, thr= UT_LIST_GET_NEXT(thrs, thr))
{
purge_node_t *node= static_cast<purge_node_t*>(thr->child);
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
ut_ad(!node->in_progress);
node->in_progress= true;
}
purge_node_t* node;
/* Get the purge node. */
node = (purge_node_t*) thr->child;
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
ut_ad(node->undo_recs.empty());
ut_ad(!node->in_progress);
ut_d(node->in_progress = true);
}
/* There should never be fewer nodes than threads, the inverse
however is allowed because we only use purge threads as needed. */
ut_ad(i == n_purge_threads);
for (; thr; thr= UT_LIST_GET_NEXT(thrs, thr))
{
purge_node_t *node= static_cast<purge_node_t*>(thr->child);
ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
ut_ad(!node->in_progress);
ut_ad(node->undo_recs.empty());
}
#endif
/* Fetch and parse the UNDO records. The UNDO records are added
to a per purge node vector. */
thr = UT_LIST_GET_FIRST(purge_sys.query->thrs);
ut_ad(head <= purge_sys.tail);
ut_ad(head <= purge_sys.tail);
i = 0;
std::unordered_map<table_id_t, purge_node_t*>
table_id_map(TRX_PURGE_TABLE_BUCKETS);
purge_sys.m_active = true;
MDL_context* const mdl_context
= static_cast<MDL_context*>(thd_mdl_context(thd));
ut_ad(mdl_context);
const size_t max_pages = std::min(buf_pool.curr_size * 3 / 4,
size_t{srv_purge_batch_size});
while (UNIV_LIKELY(srv_undo_sources) || !srv_fast_shutdown) {
/* Track the max {trx_id, undo_no} for truncating the
UNDO logs once we have purged the records. */
if (head <= purge_sys.tail) {
head = purge_sys.tail;
}
/* Fetch the next record, and advance the purge_sys.tail. */
trx_purge_rec_t purge_rec = purge_sys.fetch_next_rec();
if (!purge_rec.undo_rec) {
if (!purge_rec.roll_ptr) {
break;
}
ut_ad(purge_rec.roll_ptr == 1);
continue;
}
table_id_t table_id = trx_undo_rec_get_table_id(
purge_rec.undo_rec);
purge_node_t*& table_node = table_id_map[table_id];
if (!table_node) {
std::pair<dict_table_t*,MDL_ticket*> p;
p.first = trx_purge_table_open(table_id, mdl_context,
&p.second);
if (p.first == reinterpret_cast<dict_table_t*>(-1)) {
p.first = purge_sys.close_and_reopen(
table_id, thd, &p.second);
}
thr = UT_LIST_GET_NEXT(thrs, thr);
if (!(++i % n_purge_threads)) {
thr = UT_LIST_GET_FIRST(
purge_sys.query->thrs);
}
table_node = static_cast<purge_node_t*>(thr->child);
ut_a(que_node_get_type(table_node) == QUE_NODE_PURGE);
ut_d(auto i=)
table_node->tables.emplace(table_id, p);
ut_ad(i.second);
if (p.first) {
goto enqueue;
}
} else if (table_node->tables[table_id].first) {
enqueue:
table_node->undo_recs.push(purge_rec);
}
if (purge_sys.n_pages_handled() >= max_pages) {
break;
}
}
purge_sys.m_active = false;
ut_ad(head <= purge_sys.tail);
return head;
return head;
}
extern tpool::waitable_task purge_worker_task;
@ -1385,68 +1355,89 @@ Run a purge batch.
@return number of undo log pages handled in the batch */
TRANSACTIONAL_TARGET ulint trx_purge(ulint n_tasks, ulint history_size)
{
ut_ad(n_tasks > 0);
ut_ad(n_tasks > 0);
purge_sys.clone_oldest_view();
purge_sys.clone_oldest_view();
#ifdef UNIV_DEBUG
if (srv_purge_view_update_only_debug) {
return(0);
}
#endif /* UNIV_DEBUG */
ut_d(if (srv_purge_view_update_only_debug) return 0);
THD* const thd = current_thd;
THD *const thd= current_thd;
/* Fetch the UNDO recs that need to be purged. */
const purge_sys_t::iterator head
= trx_purge_attach_undo_recs(n_tasks, thd);
const size_t n_pages = purge_sys.n_pages_handled();
/* Fetch the UNDO recs that need to be purged. */
ulint n_work= 0;
const purge_sys_t::iterator head= trx_purge_attach_undo_recs(thd, &n_work);
const size_t n_pages= purge_sys.n_pages_handled();
{
ulint delay = n_pages ? srv_max_purge_lag : 0;
if (UNIV_UNLIKELY(delay)) {
if (delay >= history_size) {
no_throttle:
delay = 0;
} else if (const ulint max_delay =
srv_max_purge_lag_delay) {
delay = std::min(max_delay,
10000 * history_size / delay
- 5000);
} else {
goto no_throttle;
}
}
srv_dml_needed_delay = delay;
}
{
ulint delay= n_pages ? srv_max_purge_lag : 0;
if (UNIV_UNLIKELY(delay))
{
if (delay >= history_size)
no_throttle:
delay= 0;
else if (const ulint max_delay= srv_max_purge_lag_delay)
delay= std::min(max_delay, 10000 * history_size / delay - 5000);
else
goto no_throttle;
}
srv_dml_needed_delay= delay;
}
que_thr_t* thr = nullptr;
ut_ad(n_tasks);
que_thr_t *thr= nullptr;
/* Submit tasks to workers queue if using multi-threaded purge. */
for (ulint i = n_tasks; --i; ) {
thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
ut_a(thr);
srv_que_task_enqueue_low(thr);
srv_thread_pool->submit_task(&purge_worker_task);
}
if (n_work)
{
for (auto i= n_work; i--; )
{
if (!thr)
thr= UT_LIST_GET_FIRST(purge_sys.query->thrs);
else
thr= UT_LIST_GET_NEXT(thrs, thr);
thr = que_fork_scheduler_round_robin(purge_sys.query, thr);
if (!thr)
break;
que_run_threads(thr);
ut_ad(thr->state == QUE_THR_COMPLETED);
thr->state= QUE_THR_RUNNING;
thr->run_node= thr;
thr->prev_node= thr->common.parent;
purge_sys.query->state= QUE_FORK_ACTIVE;
purge_sys.query->last_sel_node= nullptr;
srv_que_task_enqueue_low(thr);
}
trx_purge_wait_for_workers_to_complete();
/*
To reduce context switches we only submit at most n_tasks-1 worker task.
(we can use less tasks, if there is not enough work)
for (thr = UT_LIST_GET_FIRST(purge_sys.query->thrs); thr;
thr = UT_LIST_GET_NEXT(thrs, thr)) {
purge_node_t* node = static_cast<purge_node_t*>(thr->child);
trx_purge_close_tables(node, thd);
node->tables.clear();
}
The coordinator does worker's job, instead of waiting and sitting idle,
then waits for all others to finish.
purge_sys.batch_cleanup(head);
This also means if innodb_purge_threads=1, the coordinator does all
the work alone.
*/
const ulint workers{std::min(n_work, n_tasks) - 1};
for (ulint i= 0; i < workers; i++)
srv_thread_pool->submit_task(&purge_worker_task);
srv_purge_worker_task_low();
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages);
if (workers)
trx_purge_wait_for_workers_to_complete();
return n_pages;
for (thr= UT_LIST_GET_FIRST(purge_sys.query->thrs); thr && n_work--;
thr= UT_LIST_GET_NEXT(thrs, thr))
{
purge_node_t *node= static_cast<purge_node_t*>(thr->child);
trx_purge_close_tables(node, thd);
node->tables.clear();
}
}
purge_sys.batch_cleanup(head);
MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages);
return n_pages;
}

View file

@ -2045,170 +2045,128 @@ err_exit:
/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
/** Copy an undo record to heap.
@param[in] roll_ptr roll pointer to a record that exists
@param[in,out] heap memory heap where copied */
static
trx_undo_rec_t*
trx_undo_get_undo_rec_low(
roll_ptr_t roll_ptr,
mem_heap_t* heap)
static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
rec_offs *offsets, mem_heap_t *heap,
rec_t **old_vers, mem_heap_t *v_heap,
dtuple_t **vrow, ulint v_status,
const trx_undo_rec_t *undo_rec);
inline const buf_block_t *
purge_sys_t::view_guard::get(const page_id_t id, mtr_t *mtr)
{
ulint rseg_id;
uint32_t page_no;
uint16_t offset;
bool is_insert;
mtr_t mtr;
trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, &offset);
ut_ad(page_no > FSP_FIRST_INODE_PAGE_NO);
ut_ad(offset >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
trx_rseg_t *rseg= &trx_sys.rseg_array[rseg_id];
ut_ad(rseg->is_persistent());
mtr.start();
trx_undo_rec_t *undo_rec= nullptr;
if (buf_block_t* undo_page=
buf_page_get(page_id_t(rseg->space->id, page_no), 0, RW_S_LATCH, &mtr))
buf_block_t *block;
ut_ad(mtr->is_active());
if (!latch)
{
buf_page_make_young_if_needed(&undo_page->page);
undo_rec= undo_page->page.frame + offset;
const size_t end= mach_read_from_2(undo_rec);
if (UNIV_UNLIKELY(end <= offset ||
end >= srv_page_size - FIL_PAGE_DATA_END))
undo_rec= nullptr;
else
decltype(purge_sys.pages)::const_iterator i= purge_sys.pages.find(id);
if (i != purge_sys.pages.end())
{
size_t len{end - offset};
undo_rec=
static_cast<trx_undo_rec_t*>(mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(undo_rec, len);
block= i->second;
ut_ad(block);
return block;
}
}
mtr.commit();
return undo_rec;
}
/** Copy an undo record to heap, to check if a secondary index record
can be safely purged.
@param trx_id DB_TRX_ID corresponding to roll_ptr
@param name table name
@param roll_ptr DB_ROLL_PTR pointing to the undo log record
@param heap memory heap for allocation
@return copy of the record
@retval nullptr if the version is visible to purge_sys.view */
static trx_undo_rec_t *trx_undo_get_rec_if_purgeable(trx_id_t trx_id,
const table_name_t &name,
roll_ptr_t roll_ptr,
mem_heap_t* heap)
{
block= buf_pool.page_fix(id);
if (block)
{
purge_sys_t::view_guard check;
if (!check.view().changes_visible(trx_id))
return trx_undo_get_undo_rec_low(roll_ptr, heap);
mtr->memo_push(block, MTR_MEMO_BUF_FIX);
if (latch)
/* In MVCC operations (outside purge tasks), we will refresh the
buf_pool.LRU position. In purge, we expect the page to be freed
soon, at the end of the current batch. */
buf_page_make_young_if_needed(&block->page);
}
return nullptr;
}
/** Copy an undo record to heap.
@param trx_id DB_TRX_ID corresponding to roll_ptr
@param name table name
@param roll_ptr DB_ROLL_PTR pointing to the undo log record
@param heap memory heap for allocation
@return copy of the record
@retval nullptr if the undo log is not available */
static trx_undo_rec_t *trx_undo_get_undo_rec(trx_id_t trx_id,
const table_name_t &name,
roll_ptr_t roll_ptr,
mem_heap_t *heap)
{
{
purge_sys_t::end_view_guard check;
if (!check.view().changes_visible(trx_id))
return trx_undo_get_undo_rec_low(roll_ptr, heap);
}
return nullptr;
return block;
}
/** Build a previous version of a clustered index record. The caller
must hold a latch on the index page of the clustered index record.
@param rec version of a clustered index record
@param index clustered index
@param offsets rec_get_offsets(rec, index)
@param heap memory heap from which the memory needed is
allocated
@param old_vers previous version or NULL if rec is the
first inserted version, or if history data
has been deleted (an error), or if the purge
could have removed the version
though it has not yet done so
@param v_heap memory heap used to create vrow
dtuple if it is not yet created. This heap
diffs from "heap" above in that it could be
prebuilt->old_vers_heap for selection
@param v_row virtual column info, if any
@param v_status status determine if it is going into this
function by purge thread or not.
And if we read "after image" of undo log
@param undo_block undo log block which was cached during
online dml apply or nullptr
@param rec version of a clustered index record
@param index clustered index
@param offsets rec_get_offsets(rec, index)
@param heap memory heap from which the memory needed is allocated
@param old_vers previous version, or NULL if rec is the first inserted
version, or if history data has been deleted (an error),
or if the purge could have removed the version though
it has not yet done so
@param mtr mini-transaction
@param v_status TRX_UNDO_PREV_IN_PURGE, ...
@param v_heap memory heap used to create vrow dtuple if it is not yet
created. This heap diffs from "heap" above in that it could be
prebuilt->old_vers_heap for selection
@param vrow virtual column info, if any
@return error code
@retval DB_SUCCESS if previous version was successfully built,
or if it was an insert or the undo record refers to the table before rebuild
@retval DB_MISSING_HISTORY if the history is missing */
TRANSACTIONAL_TARGET
dberr_t
trx_undo_prev_version_build(
const rec_t *rec,
dict_index_t *index,
rec_offs *offsets,
mem_heap_t *heap,
rec_t **old_vers,
mem_heap_t *v_heap,
dtuple_t **vrow,
ulint v_status)
dberr_t trx_undo_prev_version_build(const rec_t *rec, dict_index_t *index,
rec_offs *offsets, mem_heap_t *heap,
rec_t **old_vers, mtr_t *mtr,
ulint v_status,
mem_heap_t *v_heap, dtuple_t **vrow)
{
dtuple_t* entry;
trx_id_t rec_trx_id;
undo_no_t undo_no;
table_id_t table_id;
trx_id_t trx_id;
roll_ptr_t roll_ptr;
upd_t* update;
byte type;
byte info_bits;
byte cmpl_info;
bool dummy_extern;
byte* buf;
ut_ad(!index->table->is_temporary());
ut_ad(rec_offs_validate(rec, index, offsets));
ut_ad(!index->table->is_temporary());
ut_ad(rec_offs_validate(rec, index, offsets));
const roll_ptr_t roll_ptr= row_get_rec_roll_ptr(rec, index, offsets);
*old_vers= nullptr;
roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
if (trx_undo_roll_ptr_is_insert(roll_ptr))
/* The record rec is the first inserted version */
return DB_SUCCESS;
*old_vers = NULL;
ut_ad(roll_ptr < 1ULL << 55);
ut_ad(uint16_t(roll_ptr) >= TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
ut_ad(uint32_t(roll_ptr >> 16) >= FSP_FIRST_INODE_PAGE_NO);
if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
/* The record rec is the first inserted version */
return DB_SUCCESS;
}
const trx_id_t rec_trx_id= row_get_rec_trx_id(rec, index, offsets);
mariadb_increment_undo_records_read();
rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
ut_ad(!index->table->skip_alter_undo);
ut_ad(!index->table->skip_alter_undo);
mariadb_increment_undo_records_read();
const auto savepoint= mtr->get_savepoint();
dberr_t err= DB_MISSING_HISTORY;
purge_sys_t::view_guard check{v_status == TRX_UNDO_CHECK_PURGE_PAGES
? purge_sys_t::view_guard::PURGE
: v_status == TRX_UNDO_CHECK_PURGEABILITY
? purge_sys_t::view_guard::VIEW
: purge_sys_t::view_guard::END_VIEW};
if (!check.view().changes_visible(rec_trx_id))
{
trx_undo_rec_t *undo_rec= nullptr;
static_assert(ROLL_PTR_RSEG_ID_POS == 48, "");
static_assert(ROLL_PTR_PAGE_POS == 16, "");
if (const buf_block_t *undo_page=
check.get(page_id_t{trx_sys.rseg_array[(roll_ptr >> 48) & 0x7f].
space->id,
uint32_t(roll_ptr >> 16)}, mtr))
{
static_assert(ROLL_PTR_BYTE_POS == 0, "");
const uint16_t offset{uint16_t(roll_ptr)};
undo_rec= undo_page->page.frame + offset;
const size_t end= mach_read_from_2(undo_rec);
if (UNIV_UNLIKELY(end > offset &&
end < srv_page_size - FIL_PAGE_DATA_END))
err= trx_undo_prev_version(rec, index, offsets, heap,
old_vers, v_heap, vrow, v_status, undo_rec);
}
}
trx_undo_rec_t* undo_rec = v_status == TRX_UNDO_CHECK_PURGEABILITY
? trx_undo_get_rec_if_purgeable(rec_trx_id, index->table->name,
roll_ptr, heap)
: trx_undo_get_undo_rec(rec_trx_id, index->table->name,
roll_ptr, heap);
if (!undo_rec) {
return DB_MISSING_HISTORY;
}
mtr->rollback_to_savepoint(savepoint);
return err;
}
static dberr_t trx_undo_prev_version(const rec_t *rec, dict_index_t *index,
rec_offs *offsets, mem_heap_t *heap,
rec_t **old_vers, mem_heap_t *v_heap,
dtuple_t **vrow, ulint v_status,
const trx_undo_rec_t *undo_rec)
{
byte type, cmpl_info;
bool dummy_extern;
undo_no_t undo_no;
table_id_t table_id;
const byte *ptr =
trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
@ -2220,6 +2178,10 @@ trx_undo_prev_version_build(
return DB_SUCCESS;
}
trx_id_t trx_id;
roll_ptr_t roll_ptr;
byte info_bits;
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
@ -2247,10 +2209,12 @@ trx_undo_prev_version_build(
ptr = trx_undo_rec_skip_row_ref(ptr, index);
upd_t* update;
ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
roll_ptr, info_bits,
heap, &update);
ut_a(ptr);
byte* buf;
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
/* We should confirm the existence of disowned external data,
@ -2276,9 +2240,10 @@ trx_undo_prev_version_build(
those fields that update updates to become externally stored
fields. Store the info: */
entry = row_rec_to_index_entry(rec, index, offsets, heap);
dtuple_t* entry = row_rec_to_index_entry(rec, index, offsets,
heap);
/* The page containing the clustered index record
corresponding to entry is latched in mtr. Thus the
corresponding to entry is latched. Thus the
following call is safe. */
if (!row_upd_index_replace_new_col_vals(entry, *index, update,
heap)) {

View file

@ -92,6 +92,25 @@ static void test_ssux_lock()
ssux.wr_u_downgrade();
ssux.u_unlock();
}
for (auto j= M_ROUNDS; j--; )
{
ssux.rd_lock();
assert(!critical);
if (ssux.rd_u_upgrade_try())
{
assert(!critical);
ssux.rd_unlock();
ssux.u_wr_upgrade();
assert(!critical);
critical= true;
critical= false;
ssux.wr_u_downgrade();
ssux.u_rd_downgrade();
}
assert(!critical);
ssux.rd_unlock();
}
}
}
@ -129,6 +148,14 @@ static void test_sux_lock()
critical= false;
sux.x_u_downgrade();
sux.u_unlock();
sux.s_lock();
std::ignore= sux.s_x_upgrade();
assert(!critical);
sux.x_lock();
critical= true;
sux.x_unlock();
critical= false;
sux.x_unlock();
}
}
}

View file

@ -3077,21 +3077,25 @@ my_bool _ma_check_if_right_bitmap_type(MARIA_HA *info,
int _ma_bitmap_create_first(MARIA_SHARE *share)
{
uint block_size= share->bitmap.block_size;
size_t error;
File file= share->bitmap.file.file;
uchar marker[CRC_SIZE];
uchar *temp_buff;
if (!(temp_buff= (uchar*) my_alloca(block_size)))
return 1;
bzero(temp_buff, block_size);
/*
Next write operation of the page will write correct CRC
if it is needed
*/
int4store(marker, MARIA_NO_CRC_BITMAP_PAGE);
int4store(temp_buff + block_size - CRC_SIZE, MARIA_NO_CRC_BITMAP_PAGE);
if (mysql_file_chsize(file, block_size - sizeof(marker),
0, MYF(MY_WME)) > 0 ||
my_pwrite(file, marker, sizeof(marker),
block_size - sizeof(marker),
MYF(MY_NABP | MY_WME)))
error= my_pwrite(file, temp_buff, block_size, 0, MYF(MY_NABP | MY_WME));
my_afree(temp_buff);
if (error)
return 1;
share->state.state.data_file_length= block_size;
_ma_bitmap_delete_all(share);
return 0;

View file

@ -420,6 +420,8 @@ int maria_chk_size(HA_CHECK *param, register MARIA_HA *info)
/* We cannot check file sizes for S3 */
DBUG_RETURN(0);
}
/* We should never come here with internal temporary tables */
DBUG_ASSERT(!share->internal_table);
if (!(param->testflag & T_SILENT))
puts("- check file-size");
@ -715,6 +717,8 @@ static int chk_index_down(HA_CHECK *param, MARIA_HA *info,
MARIA_PAGE ma_page;
DBUG_ENTER("chk_index_down");
DBUG_ASSERT(!share->internal_table);
/* Key blocks must lay within the key file length entirely. */
if (page + keyinfo->block_length > share->state.state.key_file_length)
{
@ -2467,7 +2471,16 @@ static int initialize_variables_for_repair(HA_CHECK *param,
return 1;
/* calculate max_records */
sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
if (!share->internal_table)
{
/* Get real file size */
sort_info->filelength= my_seek(info->dfile.file, 0L, MY_SEEK_END, MYF(0));
}
else
{
/* For internal temporary files we are using the logical file length */
sort_info->filelength= share->state.state.data_file_length;
}
param->max_progress= sort_info->filelength;
if ((param->testflag & T_CREATE_MISSING_KEYS) ||
@ -2865,7 +2878,8 @@ int maria_repair(HA_CHECK *param, register MARIA_HA *info,
{
fputs(" \r",stdout); fflush(stdout);
}
if (mysql_file_chsize(share->kfile.file,
if (!share->internal_table &&
mysql_file_chsize(share->kfile.file,
share->state.state.key_file_length, 0, MYF(0)) > 0)
{
_ma_check_print_warning(param,
@ -4184,7 +4198,8 @@ int maria_repair_by_sort(HA_CHECK *param, register MARIA_HA *info,
if (param->testflag & T_CALC_CHECKSUM)
share->state.state.checksum=param->glob_crc;
if (mysql_file_chsize(share->kfile.file,
if (!share->internal_table &&
mysql_file_chsize(share->kfile.file,
share->state.state.key_file_length, 0, MYF(0)) > 0)
_ma_check_print_warning(param,
"Can't change size of indexfile, error: %d",
@ -4733,7 +4748,8 @@ int maria_repair_parallel(HA_CHECK *param, register MARIA_HA *info,
if (param->testflag & T_CALC_CHECKSUM)
share->state.state.checksum=param->glob_crc;
if (mysql_file_chsize(share->kfile.file,
if (!share->internal_table &&
mysql_file_chsize(share->kfile.file,
share->state.state.key_file_length, 0, MYF(0)) > 0)
_ma_check_print_warning(param,
"Can't change size of indexfile, error: %d",
@ -6145,6 +6161,8 @@ int maria_test_if_almost_full(MARIA_HA *info)
{
MARIA_SHARE *share= info->s;
DBUG_ASSERT(!share->internal_table);
if (share->options & HA_OPTION_COMPRESS_RECORD)
return 0;
return mysql_file_seek(share->kfile.file, 0L, MY_SEEK_END,

View file

@ -129,11 +129,17 @@ int maria_delete_all_rows(MARIA_HA *info)
_ma_unmap_file(info);
#endif
if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX,
if (share->internal_table)
/*
Avoid truncate of internal temporary tables as this can have a big
performance overhead when called by mysql_handle_single_derived()
tables in MariaDB as part of split materialization.
*/;
else if (_ma_flush_table_files(info, MARIA_FLUSH_DATA|MARIA_FLUSH_INDEX,
FLUSH_IGNORE_CHANGED, FLUSH_IGNORE_CHANGED) ||
mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 ||
mysql_file_chsize(share->kfile.file, share->base.keystart, 0,
MYF(MY_WME)) > 0)
mysql_file_chsize(info->dfile.file, 0, 0, MYF(MY_WME)) > 0 ||
mysql_file_chsize(share->kfile.file, share->base.keystart, 0,
MYF(MY_WME)) > 0)
goto err;
if (info->s->tracked)

View file

@ -237,7 +237,7 @@ set session spider_suppress_comment_ignored_warning=0;
CREATE TABLE tbl_a (a INT) ENGINE=Spider DEFAULT CHARSET=utf8
REMOTE_TABLE=t CONNECTION="srv s_2_1";
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv s_2_1' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
drop table tbl_a;
CREATE TABLE tbl_a (
a INT,
@ -411,7 +411,7 @@ PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
Warnings:
Warning 138 Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead.
Warning 138 Spider table params in COMMENT or CONNECTION strings have been deprecated and will be removed in a future release. Please use table options instead.
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
ERROR HY000: Remote table 'auto_test_remote2.tbl_a#P#p2' is not found
DROP TABLE tbl_a;
@ -427,9 +427,9 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_1"',
PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
);
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_1"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_a"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
ERROR HY000: Unable to connect to foreign data source: localhost
DROP TABLE tbl_a;
@ -447,10 +447,10 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2"',
PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2"
);
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb"), (3, "ccc"), (4, "ddd");
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
connection child2_1;
SELECT * FROM tbl_a;
a b
@ -477,7 +477,7 @@ PARTITION p1 VALUES LESS THAN (3) COMMENT='srv "s_2_2" read_only_mode "0"',
PARTITION p2 VALUES LESS THAN MAXVALUE REMOTE_SERVER="s_2_2" READ_ONLY=NO
);
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'srv "s_2_2" read_only_mode "0"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
INSERT INTO tbl_a VALUES (1, "aaa"), (2, "bbb");
ERROR HY000: Table 'auto_test_local.tbl_a' is read only
INSERT INTO tbl_a VALUES (3, "ccc"), (4, "ddd");
@ -500,7 +500,7 @@ PRIMARY KEY(a)
) ENGINE=Spider DEFAULT CHARSET=utf8
REMOTE_SERVER="s_2_1" COMMENT='tbl "tbl_b"' REMOTE_TABLE="tbl_a";
Warnings:
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified
Warning 12529 The table or partition COMMENT or CONNECTION string 'tbl "tbl_b"' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified
select table_name, server, tgt_table_name from mysql.spider_tables;
table_name server tgt_table_name
tbl_a s_2_1 tbl_a

View file

@ -402,7 +402,7 @@ SPIDER_CONN *spider_create_conn(
char *tmp_name, *tmp_host, *tmp_username, *tmp_password, *tmp_socket;
char *tmp_wrapper, *tmp_db, *tmp_ssl_ca, *tmp_ssl_capath, *tmp_ssl_cert;
char *tmp_ssl_cipher, *tmp_ssl_key, *tmp_default_file, *tmp_default_group;
char *tmp_dsn, *tmp_filedsn, *tmp_driver;
char *tmp_dsn, *tmp_filedsn, *tmp_driver, *tmp_odbc_conn_str;
DBUG_ENTER("spider_create_conn");
if (unlikely(!UTC))
@ -454,6 +454,8 @@ SPIDER_CONN *spider_create_conn(
(uint) (share->tgt_filedsns_lengths[link_idx] + 1),
&tmp_driver,
(uint) (share->tgt_drivers_lengths[link_idx] + 1),
&tmp_odbc_conn_str,
(uint) (share->tgt_odbc_conn_str_length + 1),
&need_mon, (uint) (sizeof(int)),
NullS))
) {
@ -529,6 +531,10 @@ SPIDER_CONN *spider_create_conn(
spider_memcpy_or_null(&conn->tgt_driver, tmp_driver, share->tgt_drivers[link_idx],
&conn->tgt_driver_length,
share->tgt_drivers_lengths[link_idx]);
spider_memcpy_or_null(&conn->tgt_odbc_conn_str, tmp_odbc_conn_str,
share->tgt_odbc_conn_str,
&conn->tgt_odbc_conn_str_length,
share->tgt_odbc_conn_str_length);
conn->tgt_port = share->tgt_ports[link_idx];
conn->tgt_ssl_vsc = share->tgt_ssl_vscs[link_idx];
conn->dbton_id = share->sql_dbton_ids[link_idx];

View file

@ -1485,6 +1485,7 @@ int spider_db_append_key_hint(
if (str->reserve(
hint_str_len - 2 + SPIDER_SQL_INDEX_USE_LEN +
SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
hint_str += 2;
str->q_append(SPIDER_SQL_INDEX_USE_STR, SPIDER_SQL_INDEX_USE_LEN);
str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
@ -1497,10 +1498,11 @@ int spider_db_append_key_hint(
if (str->reserve(
hint_str_len - 3 + SPIDER_SQL_INDEX_IGNORE_LEN +
SPIDER_SQL_OPEN_PAREN_LEN + SPIDER_SQL_CLOSE_PAREN_LEN))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
hint_str += 3;
str->q_append(SPIDER_SQL_INDEX_IGNORE_STR, SPIDER_SQL_INDEX_IGNORE_LEN);
str->q_append(SPIDER_SQL_OPEN_PAREN_STR, SPIDER_SQL_OPEN_PAREN_LEN);
str->q_append(hint_str, hint_str_len - 2);
str->q_append(hint_str, hint_str_len - 3);
str->q_append(SPIDER_SQL_CLOSE_PAREN_STR, SPIDER_SQL_CLOSE_PAREN_LEN);
} else if (str->reserve(hint_str_len + SPIDER_SQL_SPACE_LEN))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);

View file

@ -7624,8 +7624,8 @@ int spider_mbase_share::convert_key_hint_str()
roop_count < (int) table_share->keys; roop_count++, tmp_key_hint++)
{
tmp_key_hint->length(0);
if (tmp_key_hint->append(spider_share->key_hint->ptr(),
spider_share->key_hint->length(), system_charset_info))
if (tmp_key_hint->append(spider_share->key_hint[roop_count].ptr(),
spider_share->key_hint[roop_count].length(), system_charset_info))
DBUG_RETURN(HA_ERR_OUT_OF_MEM);
}
} else {

View file

@ -73,7 +73,7 @@
#define ER_SPIDER_INVALID_TABLE_OPTION_NUM 12528
#define ER_SPIDER_INVALID_TABLE_OPTION_STR "The table option %s=%s is invalid"
#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_NUM 12529
#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not used as connection info because spider_ignore_comment is 1 or at least one table option has been specified"
#define ER_SPIDER_COMMENT_CONNECTION_IGNORED_BY_TABLE_OPTIONS_STR "The table or partition COMMENT or CONNECTION string '%s' is not parsed for table params because spider_ignore_comment is 1 or at least one table option has been specified"
#define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_NUM 12601
#define ER_SPIDER_CANT_USE_BOTH_INNER_XA_AND_SNAPSHOT_STR "Can't use both spider_use_consistent_snapshot = 1 and spider_internal_xa = 1"

View file

@ -678,6 +678,7 @@ typedef struct st_spider_conn
char *tgt_dsn;
char *tgt_filedsn;
char *tgt_driver;
char *tgt_odbc_conn_str;
long tgt_port;
long tgt_ssl_vsc;
@ -697,6 +698,7 @@ typedef struct st_spider_conn
uint tgt_dsn_length;
uint tgt_filedsn_length;
uint tgt_driver_length;
uint tgt_odbc_conn_str_length;
uint dbton_id;
volatile
@ -1195,6 +1197,7 @@ typedef struct st_spider_share
char **tgt_dsns;
char **tgt_filedsns;
char **tgt_drivers;
char *tgt_odbc_conn_str;
char **static_link_ids;
char **tgt_pk_names;
char **tgt_sequence_names;
@ -1237,6 +1240,7 @@ typedef struct st_spider_share
uint *tgt_dsns_lengths;
uint *tgt_filedsns_lengths;
uint *tgt_drivers_lengths;
uint tgt_odbc_conn_str_length;
uint *static_link_ids_lengths;
uint *tgt_pk_names_lengths;
uint *tgt_sequence_names_lengths;

View file

@ -860,6 +860,8 @@ int spider_free_share_alloc(
}
spider_free(spider_current_trx, share->tgt_drivers, MYF(0));
}
if (share->tgt_odbc_conn_str)
spider_free(spider_current_trx, share->tgt_odbc_conn_str, MYF(0));
if (share->tgt_pk_names)
{
for (roop_count = 0; roop_count < (int) share->tgt_pk_names_length;
@ -2483,9 +2485,6 @@ int st_spider_param_string_parse::fail(bool restore_delim)
/*
Parse connection information specified by COMMENT, CONNECT, or engine-defined
options.
TODO: Deprecate the connection specification by COMMENT and CONNECT,
and then solely utilize engine-defined options.
*/
int spider_parse_connect_info(
SPIDER_SHARE *share,
@ -2512,6 +2511,7 @@ int spider_parse_connect_info(
DBUG_PRINT("info",("spider s->path=%s", table_share->path.str));
DBUG_PRINT("info",
("spider s->normalized_path=%s", table_share->normalized_path.str));
parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
spider_get_partition_info(share->table_name, share->table_name_length,
table_share, part_info, &part_elem, &sub_elem);
/* Find the correct table options, depending on if we are parsing a
@ -2600,8 +2600,11 @@ int spider_parse_connect_info(
goto error_alloc_conn_string;
}
DBUG_ASSERT(error_num_1 == 0);
/* If the connect string is explicitly ignored for parsing, or if
any option is specified, skip the parsing. */
/*
If the COMMENT or CONNECTION string is explicitly ignored for
table param parsing, or if any option is specified, skip the
parsing.
*/
if (spider_param_ignore_comments(current_thd) || option_specified)
{
if (!spider_param_suppress_comment_ignored_warning(current_thd))
@ -2621,7 +2624,6 @@ int spider_parse_connect_info(
"and will be removed in a future release. "
"Please use table options instead.");
start_param = connect_string;
parse.error_num = ER_SPIDER_INVALID_CONNECT_INFO_NUM;
while (*start_param != '\0')
{
if (parse.locate_param_def(start_param))