mariadb/mysql-test/main/derived_split_innodb.test

507 lines
12 KiB
Text
Raw Normal View History

--source include/have_innodb.inc
2019-09-02 13:06:56 +02:00
--source include/default_optimizer_switch.inc
Updated optimizer costs in multi_range_read_info_const() and sql_select.cc - multi_range_read_info_const now uses the new records_in_range interface - Added handler::avg_io_cost() - Don't calculate avg_io_cost() in get_sweep_read_cost if avg_io_cost is not 1.0. In this case we trust the avg_io_cost() from the handler. - Changed test_quick_select to use TIME_FOR_COMPARE instead of TIME_FOR_COMPARE_IDX to align this with the rest of the code. - Fixed bug when using test_if_cheaper_ordering where we didn't use keyread if index was changed - Fixed a bug where we didn't use index only read when using order-by-index - Added keyread_time() to HEAP. The default keyread_time() was optimized for blocks and not suitable for HEAP. The effect was the HEAP prefered table scans over ranges for btree indexes. - Fixed get_sweep_read_cost() for HEAP tables - Ensure that range and ref have same cost for simple ranges Added a small cost (MULTI_RANGE_READ_SETUP_COST) to ranges to ensure we favior ref for range for simple queries. - Fixed that matching_candidates_in_table() uses same number of records as the rest of the optimizer - Added avg_io_cost() to JT_EQ_REF cost. This helps calculate the cost for HEAP and temporary tables better. A few tests changed because of this. - heap::read_time() and heap::keyread_time() adjusted to not add +1. This was to ensure that handler::keyread_time() doesn't give higher cost for heap tables than for normal tables. One effect of this is that heap and derived tables stored in heap will prefer key access as this is now regarded as cheap. - Changed cost for index read in sql_select.cc to match multi_range_read_info_const(). All index cost calculation is now done trough one function. - 'ref' will now use quick_cost for keys if it exists. This is done so that for '=' ranges, 'ref' is prefered over 'range'. - scan_time() now takes avg_io_costs() into account - get_delayed_table_estimates() uses block_size and avg_io_cost() - Removed default argument to test_if_order_by_key(); simplifies code
2020-02-28 11:59:30 +01:00
--source include/have_sequence.inc
--echo #
--echo # MDEV-16917: do not use splitting for derived with join cache
--echo #
CREATE TABLE t1 (
n1 int(10) NOT NULL,
n2 int(10) NOT NULL,
c1 char(1) NOT NULL,
KEY c1 (c1),
KEY n1_c1_n2 (n1,c1,n2)
) ENGINE=InnoDB;
INSERT INTO t1 VALUES (0, 2, 'a'), (1, 3, 'a');
Updated optimizer costs in multi_range_read_info_const() and sql_select.cc - multi_range_read_info_const now uses the new records_in_range interface - Added handler::avg_io_cost() - Don't calculate avg_io_cost() in get_sweep_read_cost if avg_io_cost is not 1.0. In this case we trust the avg_io_cost() from the handler. - Changed test_quick_select to use TIME_FOR_COMPARE instead of TIME_FOR_COMPARE_IDX to align this with the rest of the code. - Fixed bug when using test_if_cheaper_ordering where we didn't use keyread if index was changed - Fixed a bug where we didn't use index only read when using order-by-index - Added keyread_time() to HEAP. The default keyread_time() was optimized for blocks and not suitable for HEAP. The effect was the HEAP prefered table scans over ranges for btree indexes. - Fixed get_sweep_read_cost() for HEAP tables - Ensure that range and ref have same cost for simple ranges Added a small cost (MULTI_RANGE_READ_SETUP_COST) to ranges to ensure we favior ref for range for simple queries. - Fixed that matching_candidates_in_table() uses same number of records as the rest of the optimizer - Added avg_io_cost() to JT_EQ_REF cost. This helps calculate the cost for HEAP and temporary tables better. A few tests changed because of this. - heap::read_time() and heap::keyread_time() adjusted to not add +1. This was to ensure that handler::keyread_time() doesn't give higher cost for heap tables than for normal tables. One effect of this is that heap and derived tables stored in heap will prefer key access as this is now regarded as cheap. - Changed cost for index read in sql_select.cc to match multi_range_read_info_const(). All index cost calculation is now done trough one function. - 'ref' will now use quick_cost for keys if it exists. This is done so that for '=' ranges, 'ref' is prefered over 'range'. - scan_time() now takes avg_io_costs() into account - get_delayed_table_estimates() uses block_size and avg_io_cost() - Removed default argument to test_if_order_by_key(); simplifies code
2020-02-28 11:59:30 +01:00
insert into t1 select seq+1,seq+2,'c' from seq_1_to_1000;
ANALYZE TABLE t1;
Let $q=
SELECT t1.n1 FROM t1, (SELECT n1, n2 FROM t1 WHERE c1 = 'a' GROUP BY n1) as t
WHERE t.n1 = t1.n1 AND t.n2 = t1.n2 AND c1 = 'a' GROUP BY n1;
eval EXPLAIN $q;
eval $q;
DROP TABLE t1;
--echo #
--echo # MDEV-17211: splittable materialized derived joining 3 tables with
--echo # GROUP BY list containing fields from 2 of them
--echo #
CREATE TABLE t1 (
id1 int, i1 int, id2 int,
PRIMARY KEY (id1), KEY (i1), KEY (id2)
) ENGINE=InnoDB;
INSERT INTO t1 VALUES (1,1,1);
CREATE TABLE t2 (id2 int, i2 int) ENGINE=InnoDB;
INSERT INTO t2 VALUES (1, 1);
CREATE TABLE t3 (id3 int, i3 int, PRIMARY KEY (id3)) ENGINE=InnoDB;
INSERT INTO t3 VALUES (1,1);
let $q=
SELECT id3
FROM (SELECT t3.id3, t2.i2, t1.id2 FROM t3,t1,t2
WHERE t3.i3=t1.id1 AND t2.id2=t1.id2
GROUP BY t3.id3, t1.id2) AS t,
t2
WHERE t2.id2=t.id2;
eval EXPLAIN $q;
eval $q;
DROP TABLE t1,t2,t3;
--echo #
--echo # Bug mdev-17381: equi-join of derived table with join_cache_level=4
--echo #
CREATE TABLE t1 (
id int NOT NULL,
amount decimal DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=INNODB;
CREATE TABLE t2 (
id int NOT NULL,
name varchar(50) DEFAULT NULL,
PRIMARY KEY (id)
) ENGINE=INNODB;
INSERT INTO t1 VALUES
(1, 10.0000), (2, 20.0000), (3, 30.0000), (4, 40.0000),
(5, NULL), (6, NULL), (7, 70.0000), (8, 80.0000);
INSERT INTO t2 VALUES
(1,'A'), (2,'B'), (3,'C'), (4,'D'), (5, NULL), (6, NULL),
(7,'E'), (8,'F'), (9,'G'), (10,'H'), (11, NULL), (12, NULL);
set join_cache_level=4;
let $q=
SELECT t2.id,t2.name,t.total_amt
FROM t2
LEFT JOIN
(SELECT id, sum(amount) total_amt FROM t1 GROUP BY id) AS t
ON t2.id=t.id
WHERE t2.id < 3;
eval $q;
eval EXPLAIN $q;
set join_cache_level=default;
DROP TABLE t1,t2;
--echo #
--echo # Bug mdev-18467: join of grouping view and a base table as inner operand
--echo # of left join with on condition containing impossible range
--echo #
create table t1 (f1 int, f2 int, key(f2)) engine=InnoDB;
insert into t1 values (3,33), (7,77), (1,11);
create table t2 (f1 int, f2 int, primary key (f1)) engine=InnoDB;
insert into t2 values (3,33), (9,99), (1,11);
create view v1 as
select f1, max(f2) as f2 from t2 group by f1;
let $q=
select t.f2
from t1
left join
(v1 join t1 as t on v1.f1=t.f1 and t.f2 = null)
on t1.f1=t.f1;
eval $q;
eval explain $q;
eval set statement optimizer_switch='split_materialized=off' for explain $q;
drop view v1;
drop table t1,t2;
--echo #
--echo # MDEV-23723: Crash when test_if_skip_sort_order() is checked for derived table subject to split
--echo #
CREATE TABLE t1 (a INT, b INT, KEY (a), KEY (a,b)) ENGINE=InnoDB;
CREATE TABLE t2 (c INT, KEY (c)) ENGINE=InnoDB;
SELECT * FROM t1 t1a JOIN t1 t1b;
INSERT INTO t2 VALUES (1),(2);
INSERT INTO t1 VALUES (1,2),(3,4),(5,6),(7,8),(9,10),(11,12);
let $query=
EXPLAIN
SELECT *
FROM
t1 JOIN
(SELECT t1.a, t1.b FROM t1, t2 WHERE t1.b = t2.c GROUP BY t1.a, t1.b) as dt
WHERE
t1.a = dt.a;
eval set statement optimizer_switch='split_materialized=off' for $query;
eval set statement optimizer_switch='split_materialized=on' for $query;
DROP TABLE t1, t2;
--echo #
--echo # Bug mdev-25714: usage non-splitting covering index is cheaper than
--echo # usage of the best splitting index for one group
--echo #
create table t1 (
id int not null, itemid int not null, index idx (itemid)
) engine=innodb;
insert into t1 values (1, 2), (2,2), (4,2), (4,2), (0,3), (3,3);
create table t2 (id int not null) engine=innodb;
insert into t2 values (2);
create table t3 (
id int not null, itemid int not null, userid int not null, primary key (id),
index idx1 (userid, itemid), index idx2 (itemid)
) engine innodb;
insert into t3 values (1,1,1), (2,1,1), (3,2,1), (4,2,1), (5,3,1);
set use_stat_tables='never';
set optimizer_use_condition_selectivity=1;
analyze table t1,t2,t3;
let $q=
select t1.id, t1.itemid, dt.id, t2.id
from t1,
(select itemid, max(id) as id from t3 where userid = 1 group by itemid) dt,
t2
where t1.id = dt.id and t1.itemid = dt.itemid and t2.id=t1.itemid;
set optimizer_switch='split_materialized=on';
eval explain $q;
eval $q;
set optimizer_switch='split_materialized=off';
eval explain $q;
eval $q;
drop table t1,t2,t3;
set optimizer_switch='split_materialized=default';
set use_stat_tables=default;
set optimizer_use_condition_selectivity=default;
--echo #
--echo # MDEV-26337: subquery with groupby and ROLLUP returns incorrect results
--echo # (The testcase is taken from testcase for MDEV-13389 due to it being
--echo # much smaller)
--echo #
create table t3 (a int, b int, c char(127), index idx_b(b)) engine=myisam;
insert into t3 values
(8,11,'aa'), (5,15,'cc'), (1,14,'bb'), (2,12,'aa'), (7,17,'cc'),
(7,18,'aa'), (2,11,'aa'), (7,10,'bb'), (3,11,'dd'), (4,12,'ee'),
(5,14,'dd'), (9,12,'ee');
create table t4 (a int, b int, c char(127), index idx(a,c)) engine=myisam;
insert into t4 values
(7,10,'cc'), (1,20,'aa'), (2,23,'bb'), (7,18,'cc'), (1,30,'bb'),
(4,71,'xx'), (3,15,'aa'), (7,82,'aa'), (8,12,'dd'), (4,15,'aa'),
(11,33,'yy'), (10,42,'zz'), (4,53,'xx'), (10,17,'yy'), (7,12,'cc'),
(8,20,'dd'), (7,32,'bb'), (1,50,'aa'), (3,40,'bb'), (3,77,'aa');
insert into t4 select a+10, b+10, concat(c,'f') from t4;
analyze table t3,t4;
--echo # This should use a plan with LATERAL DERIVED:
explain select t3.a,t3.c,t.max,t.min
from t3 join
(select a, c, max(b) max, min(b) min from t4 group by a,c) t
on t3.a=t.a and t3.c=t.c
where t3.b > 15;
--echo # ... and if one adds WITH ROLLUP, then LATERAL DERIVED is no longer used:
explain select t3.a,t3.c,t.max,t.min
from t3 join
(select a, c, max(b) max, min(b) min from t4 group by a,c with rollup) t
on t3.a=t.a and t3.c=t.c
where t3.b > 15;
drop table t3, t4;
--echo # End of 10.3 tests
--source include/have_sequence.inc
--echo #
--echo # MDEV-26301: Split optimization refills temporary table too many times
--echo #
# 5 values
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
# 5 value groups of size 2 each
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
# 5 value groups of size 3 each
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
explain
select * from
(t1 left join t2 on t2.a=t1.b) left join t3 on t3.a=t1.b;
# Now, create tables for Groups.
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
# 100 groups of 100 values each
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
# and X10 multiplier
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
let $q1=
select * from
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b;
eval
explain $q1;
--echo # The important part in the below output is:
--echo # "lateral": 1,
--echo # "query_block": {
--echo # "select_id": 2,
--echo # "r_loops": 5, <-- must be 5, not 30.
--source include/analyze-format.inc
eval
analyze format=json $q1;
create table t21 (pk int primary key);
insert into t21 values (1),(2),(3);
create table t22 (pk int primary key);
insert into t22 values (1),(2),(3);
# Same as above but throw in a couple of const tables.
explain
select * from
t21, t22,
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1 and t22.pk=2;
explain
select * from
t21,
(
(t1 left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from
t22 join t10 left join t11 on t11.col1=t10.col1
where
t22.pk=1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1;
# And also add a non-const table
create table t5 (
pk int primary key
);
insert into t5 select seq from seq_1_to_1000;
explain
select * from
t21,
(
(((t1 join t5 on t5.pk=t1.b)) left join t2 on t2.a=t1.b)
left join t3 on t3.a=t1.b
) left join (select grp_id, count(*)
from
t22 join t10 left join t11 on t11.col1=t10.col1
where
t22.pk=1
group by grp_id) T on T.grp_id=t1.b
where
t21.pk=1;
drop table t1,t2,t3,t5, t10, t11, t21, t22;
# 5 values
create table t1(a int, b int);
insert into t1 select seq,seq from seq_1_to_5;
# 5 value groups of size 2 each
create table t2(a int, b int, key(a));
insert into t2
select A.seq,B.seq from seq_1_to_25 A, seq_1_to_2 B;
# 5 value groups of size 3 each
create table t3(a int, b int, key(a));
insert into t3
select A.seq,B.seq from seq_1_to_5 A, seq_1_to_3 B;
analyze table t1,t2,t3 persistent for all;
create table t10 (
grp_id int,
col1 int,
key(grp_id)
);
# 100 groups of 100 values each
insert into t10
select
A.seq,
B.seq
from
seq_1_to_100 A,
seq_1_to_100 B;
# and X10 multiplier
create table t11 (
col1 int,
col2 int
);
insert into t11
select A.seq, A.seq from seq_1_to_10 A;
analyze table t10,t11 persistent for all;
let $q=
select *
from
(
(t1 left join t2 on t2.a=t1.b)
left join
t3
on t3.a=t1.b
)
left join
(
select grp_id, count(*)
from t10 left join t11 on t11.col1=t10.col1
group by grp_id
)dt
on dt.grp_id=t1.b;
eval explain $q;
eval $q;
set join_cache_level=4;
eval explain $q;
eval $q;
set join_cache_level=default;
drop index a on t2;
drop index a on t3;
eval explain $q;
eval $q;
drop table t1,t2,t3;
drop table t10, t11;
--echo #
--echo # MDEV-31194: Server crash or assertion failure with join_cache_level=4
--echo # (a followup to the above bug, MDEV-26301)
--echo #
CREATE TABLE t1 (a INT);
INSERT INTO t1 VALUES (3),(4);
CREATE TABLE t2 (id INT PRIMARY KEY) ENGINE=Aria;
INSERT INTO t2 VALUES (1),(2);
set @tmp1= @@optimizer_switch, @tmp2= @@join_cache_level;
set
optimizer_switch= 'derived_with_keys=off',
join_cache_level= 4;
SELECT t1.* FROM t1 JOIN (SELECT id, COUNT(*) FROM t2 GROUP BY id) sq ON sq.id= t1.a;
set optimizer_switch= @tmp1, join_cache_level= @tmp2;
# Cleanup
DROP TABLE t1, t2;
--echo #
--echo # MDEV-31403: Server crashes in st_join_table::choose_best_splitting (still)
--echo #
CREATE TABLE t1 (a INT) ENGINE=InnoDB;
INSERT INTO t1 VALUES
(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15);
CREATE TABLE t2 (b INT) ENGINE=InnoDB;
INSERT INTO t2 VALUES (100),(200);
CREATE TABLE t3 (c INT, d INT, KEY(c)) ENGINE=InnoDB;
INSERT INTO t3 VALUES (1,1),(2,2);
CREATE VIEW v AS SELECT c, d FROM t3 GROUP BY c, d;
SELECT * FROM t1 JOIN t2 WHERE (t1.a, t2.b) IN (SELECT * FROM v);
# Cleanup
DROP VIEW v;
DROP TABLE t1, t2, t3;
--echo #
--echo # MDEV-31279 Crash when lateral derived is guaranteed to return no rows
--echo #
CREATE TABLE t1 (a CHAR(1)) ENGINE=MyISAM;
INSERT INTO t1 VALUES ('1'),('2');
CREATE TABLE t2 (b INT, KEY(b)) ENGINE=MyISAM;
ALTER TABLE t2 DISABLE KEYS;
INSERT INTO t2 VALUES (1),(2),(3);
ALTER TABLE t2 ENABLE KEYS;
CREATE TABLE t3 (c INT) ENGINE=MyISAM;
INSERT INTO t3 (c) SELECT seq FROM seq_1_to_101;
SELECT * FROM t1 WHERE t1.a IN (SELECT b FROM
(SELECT t2.b FROM t2 WHERE NOT EXISTS (SELECT 1 FROM t3) GROUP BY b) sq);
DROP TABLE t1, t2, t3;
--echo # End of 10.4 tests