mirror of
https://github.com/MariaDB/server.git
synced 2025-01-29 10:14:19 +01:00
BUG#17379 Wrong reuse of E(#rows(range)) as E(#rows(ref(const))):
Re-work best_access_path() and find_best() to reuse E(#rows(range access)) as E(#rows(ref[_or_null](const) access) only when it is appropriate. [This is the final cumulative patch] mysql-test/r/select.result: BUG#17379: Testcase mysql-test/r/subselect.result: BUG#17379: Updated test results mysql-test/t/select.test: BUG#17379: Testcase sql/opt_range.cc: BUG#17379: Wrong reuse of E(#rows(range)) as E(#rows(ref(const))): Make range optimizer together with TABLE::quick_* also return TABLE::quick_n_ranges sql/sql_select.cc: BUG#17379: Wrong reuse of E(#rows(range)) as E(#rows(ref(const))): Re-work best_access_path() to reuse E(#rows(range access)) as E(#rows(ref[_or_null](const) access) only when it is appropriate. sql/table.h: BUG#17379: Wrong reuse of E(#rows(range)) as E(#rows(ref(const))): Make range optimizer together with TABLE::quick_* also return TABLE::quick_n_ranges
This commit is contained in:
parent
588082712a
commit
3fa6432b09
6 changed files with 154 additions and 24 deletions
|
@ -3411,3 +3411,19 @@ SELECT * FROM t1;
|
|||
i
|
||||
255
|
||||
DROP TABLE t1;
|
||||
create table t1 (a int);
|
||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t2 (a int, b int, c int, e int, primary key(a,b,c));
|
||||
insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
|
||||
analyze table t2;
|
||||
Table Op Msg_type Msg_text
|
||||
test.t2 analyze status OK
|
||||
select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
|
||||
Z
|
||||
In next EXPLAIN, B.rows must be exactly 10:
|
||||
explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
|
||||
and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 SIMPLE A range PRIMARY PRIMARY 12 NULL 3 Using where
|
||||
1 SIMPLE B ref PRIMARY PRIMARY 8 const,test.A.e 10
|
||||
drop table t1, t2;
|
||||
|
|
|
@ -1480,7 +1480,7 @@ Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<e
|
|||
explain extended select s1, s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2') from t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1 index NULL s1 6 NULL 3 Using index
|
||||
2 DEPENDENT SUBQUERY t2 index_subquery s1 s1 6 func 1 Using index; Using where
|
||||
2 DEPENDENT SUBQUERY t2 index_subquery s1 s1 6 func 2 Using index; Using where
|
||||
Warnings:
|
||||
Note 1003 select `test`.`t1`.`s1` AS `s1`,not(<in_optimizer>(`test`.`t1`.`s1`,<exists>(<index_lookup>(<cache>(`test`.`t1`.`s1`) in t2 on s1 checking NULL where (`test`.`t2`.`s1` < _latin1'a2'))))) AS `s1 NOT IN (SELECT s1 FROM t2 WHERE s1 < 'a2')` from `test`.`t1`
|
||||
drop table t1,t2;
|
||||
|
|
|
@ -2886,3 +2886,16 @@ SELECT * FROM t1;
|
|||
UPDATE t1 SET i = i - 1;
|
||||
SELECT * FROM t1;
|
||||
DROP TABLE t1;
|
||||
|
||||
# BUG#17379
|
||||
|
||||
create table t1 (a int);
|
||||
insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
|
||||
create table t2 (a int, b int, c int, e int, primary key(a,b,c));
|
||||
insert into t2 select A.a, B.a, C.a, C.a from t1 A, t1 B, t1 C;
|
||||
analyze table t2;
|
||||
select 'In next EXPLAIN, B.rows must be exactly 10:' Z;
|
||||
|
||||
explain select * from t2 A, t2 B where A.a=5 and A.b=5 and A.C<5
|
||||
and B.a=5 and B.b=A.e and (B.b =1 or B.b = 3 or B.b=5);
|
||||
drop table t1, t2;
|
||||
|
|
|
@ -347,8 +347,10 @@ typedef struct st_qsel_param {
|
|||
uint *imerge_cost_buff; /* buffer for index_merge cost estimates */
|
||||
uint imerge_cost_buff_size; /* size of the buffer */
|
||||
|
||||
/* TRUE if last checked tree->key can be used for ROR-scan */
|
||||
/* TRUE if last checked tree->key can be used for ROR-scan */
|
||||
bool is_ror_scan;
|
||||
/* Number of ranges in the last checked tree->key */
|
||||
uint n_ranges;
|
||||
} PARAM;
|
||||
|
||||
class TABLE_READ_PLAN;
|
||||
|
@ -5297,6 +5299,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
|
|||
param->table->file->primary_key_is_clustered());
|
||||
param->is_ror_scan= !cpk_scan;
|
||||
}
|
||||
param->n_ranges= 0;
|
||||
|
||||
records=check_quick_keys(param,idx,tree,param->min_key,0,param->max_key,0);
|
||||
if (records != HA_POS_ERROR)
|
||||
|
@ -5304,7 +5307,7 @@ check_quick_select(PARAM *param,uint idx,SEL_ARG *tree)
|
|||
param->table->quick_keys.set_bit(key);
|
||||
param->table->quick_rows[key]=records;
|
||||
param->table->quick_key_parts[key]=param->max_key_part+1;
|
||||
|
||||
param->table->quick_n_ranges[key]= param->n_ranges;
|
||||
if (cpk_scan)
|
||||
param->is_ror_scan= TRUE;
|
||||
}
|
||||
|
@ -5440,7 +5443,10 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
|
|||
HA_NOSAME &&
|
||||
min_key_length == max_key_length &&
|
||||
!memcmp(param->min_key,param->max_key,min_key_length))
|
||||
{
|
||||
tmp=1; // Max one record
|
||||
param->n_ranges++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (param->is_ror_scan)
|
||||
|
@ -5460,6 +5466,7 @@ check_quick_keys(PARAM *param,uint idx,SEL_ARG *key_tree,
|
|||
is_key_scan_ror(param, keynr, key_tree->part + 1)))
|
||||
param->is_ror_scan= FALSE;
|
||||
}
|
||||
param->n_ranges++;
|
||||
|
||||
if (tmp_min_flag & GEOM_FLAG)
|
||||
{
|
||||
|
|
|
@ -3335,7 +3335,10 @@ best_access_path(JOIN *join,
|
|||
uint key= keyuse->key;
|
||||
KEY *keyinfo= table->key_info+key;
|
||||
bool ft_key= (keyuse->keypart == FT_KEYPART);
|
||||
uint found_ref_or_null= 0;
|
||||
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
|
||||
key_part_map const_part= 0;
|
||||
/* The or-null keypart in ref-or-null access: */
|
||||
key_part_map ref_or_null_part= 0;
|
||||
|
||||
/* Calculate how many key segments of the current key we can use */
|
||||
start_key= keyuse;
|
||||
|
@ -3347,12 +3350,14 @@ best_access_path(JOIN *join,
|
|||
do
|
||||
{
|
||||
if (!(remaining_tables & keyuse->used_tables) &&
|
||||
!(found_ref_or_null & keyuse->optimize))
|
||||
!(ref_or_null_part && (keyuse->optimize &
|
||||
KEY_OPTIMIZE_REF_OR_NULL)))
|
||||
{
|
||||
found_part|= keyuse->keypart_map;
|
||||
double tmp= prev_record_reads(join,
|
||||
(found_ref |
|
||||
keyuse->used_tables));
|
||||
if (!(keyuse->used_tables & ~join->const_table_map))
|
||||
const_part|= keyuse->keypart_map;
|
||||
double tmp= prev_record_reads(join, (found_ref |
|
||||
keyuse->used_tables));
|
||||
if (tmp < best_prev_record_reads)
|
||||
{
|
||||
best_part_found_ref= keyuse->used_tables;
|
||||
|
@ -3364,8 +3369,8 @@ best_access_path(JOIN *join,
|
|||
If there is one 'key_column IS NULL' expression, we can
|
||||
use this ref_or_null optimisation of this field
|
||||
*/
|
||||
found_ref_or_null|= (keyuse->optimize &
|
||||
KEY_OPTIMIZE_REF_OR_NULL);
|
||||
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
|
||||
ref_or_null_part |= keyuse->keypart_map;
|
||||
}
|
||||
keyuse++;
|
||||
} while (keyuse->table == table && keyuse->key == key &&
|
||||
|
@ -3401,7 +3406,7 @@ best_access_path(JOIN *join,
|
|||
Check if we found full key
|
||||
*/
|
||||
if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
|
||||
!found_ref_or_null)
|
||||
!ref_or_null_part)
|
||||
{ /* use eq key */
|
||||
max_key_part= (uint) ~0;
|
||||
if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
|
||||
|
@ -3413,6 +3418,23 @@ best_access_path(JOIN *join,
|
|||
{
|
||||
if (!found_ref)
|
||||
{ /* We found a const key */
|
||||
/*
|
||||
ReuseRangeEstimateForRef-1:
|
||||
We get here if we've found a ref(const) (c_i are constants):
|
||||
"(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
|
||||
|
||||
If range optimizer was able to construct a "range"
|
||||
access on this index, then its condition "quick_cond" was
|
||||
eqivalent to ref_const_cond (*), and we can re-use E(#rows)
|
||||
from the range optimizer.
|
||||
|
||||
Proof of (*): By properties of range and ref optimizers
|
||||
quick_cond will be equal or tighther than ref_const_cond.
|
||||
ref_const_cond already covers "smallest" possible interval -
|
||||
a singlepoint interval over all keyparts. Therefore,
|
||||
quick_cond is equivalent to ref_const_cond (if it was an
|
||||
empty interval we wouldn't have got here).
|
||||
*/
|
||||
if (table->quick_keys.is_set(key))
|
||||
records= (double) table->quick_rows[key];
|
||||
else
|
||||
|
@ -3433,6 +3455,23 @@ best_access_path(JOIN *join,
|
|||
if (records < 2.0)
|
||||
records=2.0; /* Can't be as good as a unique */
|
||||
}
|
||||
/*
|
||||
ReuseRangeEstimateForRef-2: We get here if we could not reuse
|
||||
E(#rows) from range optimizer. Make another try:
|
||||
|
||||
If range optimizer produced E(#rows) for a prefix of the ref
|
||||
access we're considering, and that E(#rows) is lower then our
|
||||
current estimate, make an adjustment. The criteria of when we
|
||||
can make an adjustment is a special case of the criteria used
|
||||
in ReuseRangeEstimateForRef-3.
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
const_part & (1 << table->quick_key_parts[key]) &&
|
||||
table->quick_n_ranges[key] == 1 &&
|
||||
records > (double) table->quick_rows[key])
|
||||
{
|
||||
records= (double) table->quick_rows[key];
|
||||
}
|
||||
}
|
||||
/* Limit the number of matched rows */
|
||||
tmp= records;
|
||||
|
@ -3461,12 +3500,50 @@ best_access_path(JOIN *join,
|
|||
{
|
||||
max_key_part= max_part_bit(found_part);
|
||||
/*
|
||||
Check if quick_range could determinate how many rows we
|
||||
will match
|
||||
ReuseRangeEstimateForRef-3:
|
||||
We're now considering a ref[or_null] access via
|
||||
(t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
|
||||
(same-as-above but with one cond replaced
|
||||
with "t.keypart_i IS NULL")] (**)
|
||||
|
||||
Try re-using E(#rows) from "range" optimizer:
|
||||
We can do so if "range" optimizer used the same intervals as
|
||||
in (**). The intervals used by range optimizer may be not
|
||||
available at this point (as "range" access might have choosen to
|
||||
create quick select over another index), so we can't compare
|
||||
them to (**). We'll make indirect judgements instead.
|
||||
The sufficient conditions for re-use are:
|
||||
(C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
|
||||
this is not satisfied we have no way to know which ranges
|
||||
will be actually scanned by 'ref' until we execute the
|
||||
join)
|
||||
(C2) max #key parts in 'range' access == K == max_key_part (this
|
||||
is apparently a necessary requirement)
|
||||
|
||||
We also have a property that "range optimizer produces equal or
|
||||
tighter set of scan intervals than ref(const) optimizer". Each
|
||||
of the intervals in (**) are "tightest possible" intervals when
|
||||
one limits itself to using keyparts 1..K (which we do in #2).
|
||||
From here it follows that range access used either one, or
|
||||
both of the (I1) and (I2) intervals:
|
||||
|
||||
(t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
|
||||
(same-as-above but with one cond replaced
|
||||
with "t.keypart_i IS NULL") (I2)
|
||||
|
||||
The remaining part is to exclude the situation where range
|
||||
optimizer used one interval while we're considering
|
||||
ref-or-null and looking for estimate for two intervals. This
|
||||
is done by last limitation:
|
||||
|
||||
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
table->quick_key_parts[key] == max_key_part)
|
||||
if (table->quick_keys.is_set(key) && !found_ref && //(C1)
|
||||
table->quick_key_parts[key] == max_key_part && //(C2)
|
||||
table->quick_n_ranges[key] == 1+test(ref_or_null_part)) //(C3)
|
||||
{
|
||||
tmp= records= (double) table->quick_rows[key];
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Check if we have statistic about the distribution */
|
||||
|
@ -3510,21 +3587,37 @@ best_access_path(JOIN *join,
|
|||
}
|
||||
records = (ulong) tmp;
|
||||
}
|
||||
/*
|
||||
If quick_select was used on a part of this key, we know
|
||||
the maximum number of rows that the key can match.
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
table->quick_key_parts[key] <= max_key_part &&
|
||||
records > (double) table->quick_rows[key])
|
||||
tmp= records= (double) table->quick_rows[key];
|
||||
else if (found_ref_or_null)
|
||||
|
||||
if (ref_or_null_part)
|
||||
{
|
||||
/* We need to do two key searches to find key */
|
||||
tmp *= 2.0;
|
||||
records *= 2.0;
|
||||
}
|
||||
|
||||
/*
|
||||
ReuseRangeEstimateForRef-4: We get here if we could not reuse
|
||||
E(#rows) from range optimizer. Make another try:
|
||||
|
||||
If range optimizer produced E(#rows) for a prefix of the ref
|
||||
access we're considering, and that E(#rows) is lower then our
|
||||
current estimate, make the adjustment.
|
||||
|
||||
The decision whether we can re-use the estimate from the range
|
||||
optimizer is the same as in ReuseRangeEstimateForRef-3,
|
||||
applied to first table->quick_key_parts[key] key parts.
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
table->quick_key_parts[key] <= max_key_part &&
|
||||
const_part & (1 << table->quick_key_parts[key]) &&
|
||||
table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
|
||||
const_part) &&
|
||||
records > (double) table->quick_rows[key])
|
||||
{
|
||||
tmp= records= (double) table->quick_rows[key];
|
||||
}
|
||||
}
|
||||
|
||||
/* Limit the number of matched rows */
|
||||
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
|
||||
if (table->used_keys.is_set(key))
|
||||
|
|
|
@ -221,6 +221,7 @@ struct st_table {
|
|||
ha_rows quick_rows[MAX_KEY];
|
||||
key_part_map const_key_parts[MAX_KEY];
|
||||
uint quick_key_parts[MAX_KEY];
|
||||
uint quick_n_ranges[MAX_KEY];
|
||||
|
||||
/*
|
||||
If this table has TIMESTAMP field with auto-set property (pointed by
|
||||
|
|
Loading…
Add table
Reference in a new issue