mariadb/mysql-test/main/subselect_sj_nonmerged.result
Monty b3c74bdc1f Improve pruning in greedy_search by sorting tables during search
MDEV-28073 Slow query performance in MariaDB when using many tables

The faster we can find a good query plan, the more options we have for
finding and pruning (ignoring) bad plans.

This patch adds sorting of plans to best_extension_by_limited_search().
The plans, from best_access_path() are sorted according to the numbers
of found rows.  This allows us to faster find 'good tables' and we are
thus able to eliminate 'bad plans' faster.

One side effect of this patch is that if two tables have equal cost,
the table that which was used earlier in the query is preferred.
This allows users to improve plans by reordering eq_ref tables in the
order they would like them to be uses.

Result changes caused by the patch:
- Traces are different as now we print the cost for using tables before
  we start considering them in the plan.
- Table order are changed for some plans. In most cases this is because
  the plans are equal and tables are in this case sorted according to
  their usage in the original query.
- A few plans was changed as the optimizer was able to find a better
  plan (that was pruned by the original code).

Other things:

- Added a new statistic variable: "optimizer_join_prefixes_check_calls",
  which counts number of calls to best_extension_by_limited_search().
  This can be used to check the prune efficiency in greedy_search().
- Added variable "JOIN_TAB::embedded_dependent" to be able to handle
  XX IN (SELECT..) in the greedy_optimizer.  The idea is that we
  should prune a table if any of the tables in embedded_dependent is
  not yet read.
- When using many tables in a query, there will be some additional
  memory usage as we need to pre-allocate table of
  table_count*table_count*sizeof(POSITION) objects (POSITION is 312
  bytes for now) to hold the pre-calculated best_access_path()
  information.  This memory usage is offset by the expected
  performance improvement when using many tables in a query.
- Removed the code from an earlier patch to keep the table order in
  join->best_ref in the original order.  This is not needed anymore as we
  are now sorting the tables for each best_extension_by_limited_search()
  call.
2022-07-26 22:27:28 +07:00

122 lines
5.6 KiB
Text

drop table if exists t0, t1, t2, t3, t4;
set @save_optimizer_switch=@@optimizer_switch;
set optimizer_switch='semijoin=on,materialization=on';
set optimizer_switch='mrr=on,mrr_sort_keys=on,index_condition_pushdown=on';
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1 as select * from t0;
# The following should use full scan on <subquery2> and it must scan 1 row:
explain select * from t0 where a in (select max(a) from t1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1
1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10
select * from t0 where a in (select max(a) from t1);
a
9
insert into t1 values (11);
select * from t0 where a in (select max(a) from t1);
a
delete from t1 where a=11;
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
a
9
delete from t0 where a is NULL;
delete from t1;
select * from t0 where a in (select max(a) from t1);
a
insert into t0 values (NULL);
select * from t0 where a in (select max(a) from t1);
a
delete from t0 where a is NULL;
drop table t1;
create table t1 (a int, b int);
insert into t1 select a,a from t0;
create table t2 as select * from t1 where a<5;
create table t3 as select (A.a + 10*B.a) as a from t0 A, t0 B;
alter table t3 add primary key(a);
# The following should have do a full scan on <subquery2> and scan 5 rows
# (despite that subquery's join output estimate is 50 rows)
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY t3 eq_ref PRIMARY PRIMARY 8 <subquery2>.max(t2.a) 1 Using where; Using index
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
# Compare to this which really will have 50 record combinations:
explain select * from t3 where a in (select max(t2.a) from t1, t2 group by t2.b, t1.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 index PRIMARY PRIMARY 8 NULL 100 Using index
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t3.a 1 Using where
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
SET @save_optimizer_switch=@@optimizer_switch;
SET optimizer_switch='outer_join_with_cache=off';
# Outer joins also work:
explain select * from t3
where a in (select max(t2.a) from t1 left join t2 on t1.a=t2.a group by t2.b, t1.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t3 index PRIMARY PRIMARY 8 NULL 100 Using index
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 test.t3.a 1 Using where
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using temporary
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using where
SET optimizer_switch=@save_optimizer_switch;
create table t4 (a int, b int, filler char(20), unique key(a,b));
insert into t4 select A.a + 10*B.a, A.a + 10*B.a, 'filler' from t0 A, t0 B;
explain select * from t0, t4 where
t4.b=t0.a and t4.a in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY t0 ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
1 PRIMARY t4 eq_ref a a 10 <subquery2>.max(t2.a),test.t0.a 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
insert into t4 select 100 + (B.a *100 + A.a), 100 + (B.a*100 + A.a), 'filler' from t4 A, t0 B;
explain select * from t4 where
t4.a in (select max(t2.a) from t1, t2 group by t2.b) and
t4.b in (select max(t2.a) from t1, t2 group by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 5
1 PRIMARY <subquery3> ALL distinct_key NULL NULL NULL 5 Using join buffer (flat, BNL join)
1 PRIMARY t4 eq_ref a a 10 <subquery2>.max(t2.a),<subquery3>.max(t2.a) 1
3 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
3 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 5 Using temporary
2 MATERIALIZED t1 ALL NULL NULL NULL NULL 10 Using join buffer (flat, BNL join)
drop table t1,t2,t3,t4;
drop table t0;
#
# BUG#780359: Crash with get_fanout_with_deps in maria-5.3-mwl90
#
CREATE TABLE t1 (f1 int);
INSERT INTO t1 VALUES (2),(2);
CREATE TABLE t2 (f3 int);
INSERT INTO t2 VALUES (2),(2);
SELECT *
FROM t1
WHERE ( f1 ) IN (
SELECT t2.f3
FROM t2
WHERE t2.f3 = 97
AND t2.f3 = 50
GROUP BY 1
);
f1
DROP TABLE t1, t2;
#
# BUG#727183: WL#90 does not trigger with non-comma joins
#
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int, key(a));
insert into t1 select A.a + 10*B.a + 100*C.a from t0 A, t0 B, t0 C;
# The following must use non-merged SJ-Materialization:
explain select * from t1 X join t0 Y on X.a < Y.a where X.a in (select max(a) from t0);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL 1
1 PRIMARY X ref a a 5 <subquery2>.max(a) 1 Using index
1 PRIMARY Y ALL NULL NULL NULL NULL 10 Using where; Using join buffer (flat, BNL join)
2 MATERIALIZED t0 ALL NULL NULL NULL NULL 10
drop table t0, t1;
set optimizer_switch=@save_optimizer_switch;