mariadb/mysql-test/main/derived_split_innodb.test
Galina Shalygina 4bc1860eb4 MDEV-23878 Wrong result with semi-join and splittable derived table
Due to this bug a wrong result might be expected from queries with
an IN subquery predicate in the WHERE clause and a derived table in the
FROM clause to which split optimization could be applied.

The function JOIN::fix_all_splittings_in_plan() used the value of the
bitmap JOIN::sjm_lookup_tables() such as it had been left after the
search for the best plan for the select containing the splittable
derived table. That value could not be guaranteed to be correct. So the
recalculation of this bitmap is needed to exclude the plans with key
accesses from SJM lookup tables.

Approved by Igor Babaev <igor@maridb.com>
2024-05-07 12:21:35 +02:00

548 lines
13 KiB
Text

--source include/have_innodb.inc
--source include/default_optimizer_switch.inc
--source include/have_sequence.inc
--echo #
--echo # MDEV-16917: do not use splitting for derived with join cache
--echo #
CREATE TABLE t1 (
n1 int(10) NOT NULL,
n2 int(10) NOT NULL,
c1 char(1) NOT NULL,
KEY c1 (c1),
KEY n1_c1_n2 (n1,c1,n2)
) ENGINE=InnoDB;
INSERT INTO t1 VALUES (0, 2, 'a'), (1, 3, 'a');
insert into t1 select seq+1,seq+2,'c' from seq_1_to_1000;
ANALYZE TABLE t1;
Let $q=
SELECT t1.n1 FROM t1, (SELECT n1, n2 FROM t1 WHERE c1 = 'a' GROUP BY n1) as t
WHERE t.n1 = t1.n1 AND t.n2 = t1.n2 AND c1 = 'a' GROUP BY n1;
eval EXPLAIN $q;
eval $q;
DROP TABLE t1;
--echo #
--echo # MDEV-17211: splittable materialized derived joining 3 tables with
--echo # GROUP BY list containing fields from 2 of them
--echo #
CREATE TABLE t1 (
id1 int, i1 int, id2 int,
PRIMARY KEY (id1), KEY (i1), KEY (id2)
) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,1,1);
CREATE TABLE t2 (id2 int, i2 int) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1, 1);
CREATE TABLE t3 (id3 int, i3 int, PRIMARY KEY (id3)) ENGINE=InnoDB;
INSERT INTO t3 VALUES (1,1);
let $q=
SELECT id3
FROM (SELECT t3.id3, t2.i2, t1.id2 FROM t3,t1,t2
WHERE t3.i3=t1.id1 AND t2.id2=t1.id2
GROUP BY t3.id3, t1.id2) AS t,
t2
WHERE t2.id2=t.id2;
eval EXPLAIN $q;
eval $q;
DROP TABLE t1,t2,t3;
--echo #
--echo # Bug mdev-17381: equi-join of derived table with join_cache_level=4
--echo #
CREATE TABLE t1 (
id int NOT NULL,
amount decimal DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=INNODB;
CREATE TABLE t2 (
id int NOT NULL,
name varchar(50) DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=INNODB;
INSERT INTO t1 VALUES
(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000),
(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000);
INSERT INTO t2 VALUES
(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL),
(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL);
set join_cache_level=4;
let $q=
SELECT t2.id,t2.name,t.total_amt
FROM t2
LEFT JOIN
(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t
ON t2.id=t.id
WHERE t2.id < 3;
eval $q;
eval EXPLAIN $q;
set join_cache_level=default;
DROP TABLE t1,t2;
--echo #
--echo # Bug mdev-18467: join of grouping view and a base table as inner operand
--echo # of left join with on condition containing impossible range
--echo #
create table t1 (f1 int, f2 int, key(f2)) engine=InnoDB;
insert into t1 values (3,33), (7,77), (1,11);
create table t2 (f1 int, f2 int, primary key (f1)) engine=InnoDB;
insert into t2 values (3,33), (9,99), (1,11);
create view v1 as
select f1, max(f2) as f2 from t2 group by f1;
let $q=
select t.f2
from t1
left join
(v1 join t1 as t on v1.f1=t.f1 and t.f2 = null)
on t1.f1=t.f1;
eval $q;
eval explain $q;
eval set statement optimizer_switch='split_materialized=off' for explain $q;
drop view v1;
drop table t1,t2;
--echo #
--echo # MDEV-23723: Crash when test_if_skip_sort_order() is checked for derived table subject to split
--echo #
CREATE TABLE t1 (a INT, b INT, KEY (a), KEY (a,b)) ENGINE=InnoDB;
CREATE TABLE t2 (c INT, KEY (c)) ENGINE=InnoDB;
SELECT * FROM t1 t1a JOIN t1 t1b;
INSERT INTO t2 VALUES (1),(2);
INSERT INTO t1 VALUES (1,2),(3,4),(5,6),(7,8),(9,10),(11,12);
let $query=
EXPLAIN
SELECT *
FROM
t1 JOIN
(SELECT t1.a, t1.b FROM t1, t2 WHERE t1.b = t2.c GROUP BY t1.a, t1.b) as dt
WHERE
t1.a = dt.a;
eval set statement optimizer_switch='split_materialized=off' for $query;
eval set statement optimizer_switch='split_materialized=on' for $query;
DROP TABLE t1, t2;
--echo #
--echo # Bug mdev-25714: usage non-splitting covering index is cheaper than
--echo # usage of the best splitting index for one group
--echo #
create table t1 (
id int not null, itemid int not null, index idx (itemid)
) engine=innodb;
insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
create table t2 (id int not null) engine=innodb;
insert into t2 values (2);
create table t3 (
id int not null, itemid int not null, userid int not null, primary key (id),
index idx1 (userid, itemid), index idx2 (itemid)
) engine innodb;
insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
set use_stat_tables='never';
set optimizer_use_condition_selectivity=1;
analyze table t1,t2,t3;
let $q=
select t1.id, t1.itemid, dt.id, t2.id
from t1,
(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
t2
where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
set optimizer_switch='split_materialized=on';
eval explain $q;
eval $q;
set optimizer_switch='split_materialized=off';
eval explain $q;
eval $q;
drop table t1,t2,t3;
set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo #
--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results
--echo # (The testcase is taken from testcase for MDEV-13389 due to it being
--echo # much smaller)
--echo #
create table t3 (a int, b int, c char(127), index idx_b(b)) engine=myisam;
insert into t3 values
(8,11,'aa'), (5,15,'cc'), (1,14,'bb'), (2,12,'aa'), (7,17,'cc'),
(7,18,'aa'), (2,11,'aa'), (7,10,'bb'), (3,11,'dd'), (4,12,'ee'),
(5,14,'dd'), (9,12,'ee');
create table t4 (a int, b int, c char(127), index idx(a,c)) engine=myisam;
insert into t4 values
(7,10,'cc'), (1,20,'aa'), (2,23,'bb'), (7,18,'cc'), (1,30,'bb'),
(4,71,'xx'), (3,15,'aa'), (7,82,'aa'), (8,12,'dd'), (4,15,'aa'),
(11,33,'yy'), (10,42,'zz'), (4,53,'xx'), (10,17,'yy'), (7,12,'cc'),
(8,20,'dd'), (7,32,'bb'), (1,50,'aa'), (3,40,'bb'), (3,77,'aa');
insert into t4 select a+10, b+10, concat(c,'f') from t4;
analyze table t3,t4;
--echo # This should use a plan with LATERAL DERIVED:
explain select t3.a,t3.c,t.max,t.min
from t3 join
(select a, c, max(b) max, min(b) min from t4 group by a,c) t
on t3.a=t.a and t3.c=t.c
where t3.b > 15;
--echo # ... and if one adds WITH ROLLUP, then LATERAL DERIVED is no longer used:
explain select t3.a,t3.c,t.max,t.min
from t3 join
(select a, c, max(b) max, min(b) min from t4 group by a,c with rollup) t
on t3.a=t.a and t3.c=t.c
where t3.b > 15;
drop table t3, t4;
--echo # End of 10.3 tests
--source include/have_sequence.inc
--echo #
--echo # MDEV-26301: Split optimization refills temporary table too many times
--echo #
# 5 values
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
# 5 value groups of size 2 each
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
# 5 value groups of size 3 each
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
explain
select * from
(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
# Now, create tables for Groups.
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
# 100 groups of 100 values each
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
# and X10 multiplier
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
let $q1=
select * from
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b;
eval
explain $q1;
--echo # The important part in the below output is:
--echo # "lateral": 1,
--echo # "query_block": {
--echo # "select_id": 2,
--echo # "r_loops": 5, <-- must be 5, not 30.
--source include/analyze-format.inc
eval
analyze format=json $q1;
create table t21 (pk int primary key);
insert into t21 values (1),(2),(3);
create table t22 (pk int primary key);
insert into t22 values (1),(2),(3);
# Same as above but throw in a couple of const tables.
explain
select * from
t21, t22,
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1 and t22.pk=2;
explain
select * from
t21,
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from
t22 join t10 left join t11 on t11.col1=t10.col1
where
t22.pk=1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1;
# And also add a non-const table
create table t5 (
pk int primary key
);
insert into t5 select seq from seq_1_to_1000;
explain
select * from
t21,
(
(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from
t22 join t10 left join t11 on t11.col1=t10.col1
where
t22.pk=1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1;
drop table t1,t2,t3,t5, t10, t11, t21, t22;
# 5 values
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
# 5 value groups of size 2 each
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
# 5 value groups of size 3 each
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
# 100 groups of 100 values each
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
# and X10 multiplier
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
let $q=
select *
from
(
(t1 left join t2 on t2.a=t1.b)
left join
t3
on t3.a=t1.b
)
left join
(
select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id
)dt
on dt.grp_id=t1.b;
eval explain $q;
eval $q;
set join_cache_level=4;
eval explain $q;
eval $q;
set join_cache_level=default;
drop index a on t2;
drop index a on t3;
eval explain $q;
eval $q;
drop table t1,t2,t3;
drop table t10, t11;
--echo #
--echo # MDEV-31194: Server crash or assertion failure with join_cache_level=4
--echo # (a followup to the above bug, MDEV-26301)
--echo #
CREATE TABLE t1 (a INT);
INSERT INTO t1 VALUES (3),(4);
CREATE TABLE t2 (id INT PRIMARY KEY) ENGINE=Aria;
INSERT INTO t2 VALUES (1),(2);
set @tmp1= @@optimizer_switch, @tmp2= @@join_cache_level;
set
optimizer_switch= 'derived_with_keys=off',
join_cache_level= 4;
SELECT t1.* FROM t1 JOIN (SELECT id, COUNT(*) FROM t2 GROUP BY id) sq ON sq.id= t1.a;
set optimizer_switch= @tmp1, join_cache_level= @tmp2;
# Cleanup
DROP TABLE t1, t2;
--echo #
--echo # MDEV-31403: Server crashes in st_join_table::choose_best_splitting (still)
--echo #
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES
(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15);
CREATE TABLE t2 (b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (100),(200);
CREATE TABLE t3 (c INT, d INT, KEY(c)) ENGINE=InnoDB;
INSERT INTO t3 VALUES (1,1),(2,2);
CREATE VIEW v AS SELECT c, d FROM t3 GROUP BY c, d;
SELECT * FROM t1 JOIN t2 WHERE (t1.a, t2.b) IN (SELECT * FROM v);
# Cleanup
DROP VIEW v;
DROP TABLE t1, t2, t3;
--echo #
--echo # MDEV-31279 Crash when lateral derived is guaranteed to return no rows
--echo #
CREATE TABLE t1 (a CHAR(1)) ENGINE=MyISAM;
INSERT INTO t1 VALUES ('1'),('2');
CREATE TABLE t2 (b INT, KEY(b)) ENGINE=MyISAM;
ALTER TABLE t2 DISABLE KEYS;
INSERT INTO t2 VALUES (1),(2),(3);
ALTER TABLE t2 ENABLE KEYS;
CREATE TABLE t3 (c INT) ENGINE=MyISAM;
INSERT INTO t3 (c) SELECT seq FROM seq_1_to_101;
SELECT * FROM t1 WHERE t1.a IN (SELECT b FROM
(SELECT t2.b FROM t2 WHERE NOT EXISTS (SELECT 1 FROM t3) GROUP BY b) sq);
DROP TABLE t1, t2, t3;
--echo #
--echo # MDEV-23878: Wrong result with semi-join and splittable derived table
--echo #
CREATE TABLE t1 (
groupId int,
id int unsigned,
PRIMARY KEY (groupId, id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO t1 VALUES
(8,1),(8,2),(8,3),(8,4),(8,5),(8,6),(8,7),(8,8),(8,9),(8,10),
(8,11),(8,12),(8,13),(8,14),(8,15),(8,16),(8,17),(8,18),(8,19);
let $query=
SELECT COUNT(*) AS cnt FROM t1
JOIN
(
SELECT groupId, id
FROM t1
WHERE id IN (1,2,3,4,5,6)
GROUP BY groupId, id
) AS t2
USING (groupId, id)
WHERE id IN (1,2,3,4,5,6,7,8);
let $tvc_conversion_threshold =
set statement in_predicate_conversion_threshold=2 for;
eval $tvc_conversion_threshold $query;
eval $tvc_conversion_threshold EXPLAIN $query;
let $no_split_materialized_loosescan=
set statement optimizer_switch='split_materialized=off, loosescan=off' for;
# Correct result with split materializied optimization disabled
eval $no_split_materialized_loosescan
$tvc_conversion_threshold
$query;
DROP TABLE t1;
--echo # End of 10.4 tests