MDEV-19134: EXISTS() slower if ORDER BY is defined

Step #2: "[ORDER BY ...] LIMIT n" should not prevent EXISTS-to-IN
conversion, as long as
- the LIMIT clause doesn't have OFFSET
- the LIMIT is not "LIMIT 0".
This commit is contained in:
Sergei Petrunia 2019-05-10 12:47:44 +03:00
parent b1f828a82a
commit d448cfc92a
4 changed files with 82 additions and 20 deletions

View file

@ -2543,12 +2543,38 @@ create table t1(a int, b int);
insert into t1 select
A.a + B.a*10, A.a + B.a*10 from t0 A, t0 B;
create table t2 as select * from t1;
# This will not be able to convert to semi-join but will not require filesort:
# This will be converted to semi-join:
explain
select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL # Using where
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL # Using where
1 PRIMARY t1 ALL NULL NULL NULL NULL 100
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100
# query with a non-zero constant LIMIT is converted to semi-join, too:
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100
# Zero LIMIT should prevent the conversion (but it is not visible atm
# due to MDEV-19429)
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 0);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100
1 PRIMARY <subquery2> eq_ref distinct_key distinct_key 4 func 1
2 MATERIALIZED t2 ALL NULL NULL NULL NULL 100
# LIMIT+OFFSET prevents the conversion:
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2,3);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 100 Using where
2 DEPENDENT SUBQUERY t2 ALL NULL NULL NULL NULL 100 Using where; Using filesort
# This will be merged and converted into a semi-join:
explain
select * from t1 where t1.a in (select t2.a from t2 order by t2.b);

View file

@ -2088,14 +2088,31 @@ insert into t1 select
create table t2 as select * from t1;
--echo # This will not be able to convert to semi-join but will not require filesort:
--replace_column 9 #
--echo # This will be converted to semi-join:
explain
select * from t1 where exists (select * from t2 where t2.a=t1.a order by t2.b);
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b);
--echo # query with a non-zero constant LIMIT is converted to semi-join, too:
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2);
--echo # Zero LIMIT should prevent the conversion (but it is not visible atm
--echo # due to MDEV-19429)
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 0);
--echo # LIMIT+OFFSET prevents the conversion:
explain
select * from t1
where exists (select * from t2 where t2.a=t1.a order by t2.b limit 2,3);
--echo # This will be merged and converted into a semi-join:
explain
select * from t1 where t1.a in (select t2.a from t2 order by t2.b);
drop table t0, t1, t2;

View file

@ -311,7 +311,7 @@ EXPLAIN SELECT 1 FROM t1 WHERE NOT EXISTS
(SELECT 1 FROM t2 WHERE d = (SELECT d FROM t2 WHERE a >= 1) ORDER BY d);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 ALL NULL NULL NULL NULL 1 Using where
2 DEPENDENT SUBQUERY t2 eq_ref PRIMARY,d PRIMARY 1 func 1 Using where
2 DEPENDENT SUBQUERY t2 unique_subquery PRIMARY,d PRIMARY 1 func 1 Using where
3 DEPENDENT SUBQUERY t2 index NULL d 2 NULL 1 Using index
DROP TABLE t2;
CREATE TABLE t2 (b INT, c INT, UNIQUE KEY (b), UNIQUE KEY (b, c )) ENGINE=INNODB;

View file

@ -2906,7 +2906,6 @@ bool Item_exists_subselect::exists2in_processor(void *opt_arg)
!upper_not->is_top_level_item())) ||
first_select->is_part_of_union() ||
first_select->group_list.elements ||
first_select->order_list.elements ||
join->having ||
first_select->with_sum_func ||
!first_select->leaf_tables.elements||
@ -2914,8 +2913,31 @@ bool Item_exists_subselect::exists2in_processor(void *opt_arg)
with_recursive_reference)
DBUG_RETURN(FALSE);
DBUG_ASSERT(first_select->order_list.elements == 0 &&
first_select->group_list.elements == 0 &&
/*
EXISTS-to-IN coversion and ORDER BY ... LIMIT clause:
- "[ORDER BY ...] LIMIT n" clause with a non-zero n does not affect
the result of the EXISTS(...) predicate, and so we can discard
it during the conversion.
- "[ORDER BY ...] LIMIT m, n" can turn a non-empty resultset into empty
one, so it affects tthe EXISTS(...) result and cannot be discarded.
Disallow exists-to-in conversion if
(1). three is a LIMIT which is not a basic constant
(1a) or is a "LIMIT 0" (see MDEV-19429)
(2). there is an OFFSET clause
*/
if ((first_select->select_limit && // (1)
(!first_select->select_limit->basic_const_item() || // (1)
first_select->select_limit->val_uint() == 0)) || // (1a)
first_select->offset_limit) // (2)
{
DBUG_RETURN(FALSE);
}
/* Disallow the conversion if offset + limit exists */
DBUG_ASSERT(first_select->group_list.elements == 0 &&
first_select->having == NULL);
if (find_inner_outer_equalities(&join->conds, eqs))
@ -2946,9 +2968,6 @@ bool Item_exists_subselect::exists2in_processor(void *opt_arg)
if ((uint)eqs.elements() > (first_select->item_list.elements +
first_select->select_n_reserved))
goto out;
/* It is simple query */
DBUG_ASSERT(first_select->join->all_fields.elements ==
first_select->item_list.elements);
arena= thd->activate_stmt_arena_if_needed(&backup);