mirror of
https://github.com/MariaDB/server.git
synced 2025-01-18 04:53:01 +01:00
Refactoring: Factor out common code from find_best() and best_access_path(): make
find_best() call best_access_path().
This commit is contained in:
parent
f6c95ee81a
commit
d268cf6db1
1 changed files with 5 additions and 338 deletions
|
@ -4327,344 +4327,11 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
|
|||
if ((rest_tables & real_table_bit) && !(rest_tables & s->dependent) &&
|
||||
(!idx|| !check_interleaving_with_nj(join->positions[idx-1].table, s)))
|
||||
{
|
||||
double best,best_time,records;
|
||||
best=best_time=records=DBL_MAX;
|
||||
KEYUSE *best_key=0;
|
||||
uint best_max_key_part=0;
|
||||
my_bool found_constraint= 0;
|
||||
|
||||
if (s->keyuse)
|
||||
{ /* Use key if possible */
|
||||
TABLE *table=s->table;
|
||||
KEYUSE *keyuse,*start_key=0;
|
||||
double best_records=DBL_MAX;
|
||||
uint max_key_part=0;
|
||||
|
||||
/* Test how we can use keys */
|
||||
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
|
||||
for (keyuse=s->keyuse ; keyuse->table == table ;)
|
||||
{
|
||||
key_part_map found_part=0;
|
||||
table_map found_ref=0;
|
||||
uint key=keyuse->key;
|
||||
KEY *keyinfo=table->key_info+key;
|
||||
bool ft_key=(keyuse->keypart == FT_KEYPART);
|
||||
uint found_ref_or_null= 0;
|
||||
|
||||
/* Calculate how many key segments of the current key we can use */
|
||||
start_key=keyuse;
|
||||
do
|
||||
{
|
||||
uint keypart=keyuse->keypart;
|
||||
table_map best_part_found_ref= 0;
|
||||
double best_prev_record_reads= DBL_MAX;
|
||||
do
|
||||
{
|
||||
if (!(rest_tables & keyuse->used_tables) &&
|
||||
!(found_ref_or_null & keyuse->optimize))
|
||||
{
|
||||
found_part|=keyuse->keypart_map;
|
||||
double tmp= prev_record_reads(join,
|
||||
(found_ref |
|
||||
keyuse->used_tables));
|
||||
if (tmp < best_prev_record_reads)
|
||||
{
|
||||
best_part_found_ref= keyuse->used_tables;
|
||||
best_prev_record_reads= tmp;
|
||||
}
|
||||
if (rec > keyuse->ref_table_rows)
|
||||
rec= keyuse->ref_table_rows;
|
||||
/*
|
||||
If there is one 'key_column IS NULL' expression, we can
|
||||
use this ref_or_null optimisation of this field
|
||||
*/
|
||||
found_ref_or_null|= (keyuse->optimize &
|
||||
KEY_OPTIMIZE_REF_OR_NULL);
|
||||
}
|
||||
keyuse++;
|
||||
} while (keyuse->table == table && keyuse->key == key &&
|
||||
keyuse->keypart == keypart);
|
||||
found_ref|= best_part_found_ref;
|
||||
} while (keyuse->table == table && keyuse->key == key);
|
||||
|
||||
/*
|
||||
Assume that that each key matches a proportional part of table.
|
||||
*/
|
||||
if (!found_part && !ft_key)
|
||||
continue; // Nothing usable found
|
||||
if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
|
||||
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
|
||||
|
||||
/*
|
||||
ft-keys require special treatment
|
||||
*/
|
||||
if (ft_key)
|
||||
{
|
||||
/*
|
||||
Really, there should be records=0.0 (yes!)
|
||||
but 1.0 would be probably safer
|
||||
*/
|
||||
tmp=prev_record_reads(join,found_ref);
|
||||
records=1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
found_constraint= 1;
|
||||
/*
|
||||
Check if we found full key
|
||||
*/
|
||||
if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
|
||||
!found_ref_or_null)
|
||||
{ /* use eq key */
|
||||
max_key_part= (uint) ~0;
|
||||
if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY |
|
||||
HA_END_SPACE_KEY)) == HA_NOSAME)
|
||||
{
|
||||
tmp=prev_record_reads(join,found_ref);
|
||||
records=1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!found_ref)
|
||||
{ // We found a const key
|
||||
if (table->quick_keys.is_set(key))
|
||||
records= (double) table->quick_rows[key];
|
||||
else
|
||||
{
|
||||
/* quick_range couldn't use key! */
|
||||
records= (double) s->records/rec;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!(records=keyinfo->rec_per_key[keyinfo->key_parts-1]))
|
||||
{ // Prefere longer keys
|
||||
records=
|
||||
((double) s->records / (double) rec *
|
||||
(1.0 +
|
||||
((double) (table->s->max_key_length-keyinfo->key_length) /
|
||||
(double) table->s->max_key_length)));
|
||||
if (records < 2.0)
|
||||
records=2.0; // Can't be as good as a unique
|
||||
}
|
||||
}
|
||||
/* Limit the number of matched rows */
|
||||
tmp= records;
|
||||
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
|
||||
if (table->used_keys.is_set(key))
|
||||
{
|
||||
/* we can use only index tree */
|
||||
uint keys_per_block= table->file->block_size/2/
|
||||
(keyinfo->key_length+table->file->ref_length)+1;
|
||||
tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
|
||||
}
|
||||
else
|
||||
tmp=record_count*min(tmp,s->worst_seeks);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
Use as much key-parts as possible and a uniq key is better
|
||||
than a not unique key
|
||||
Set tmp to (previous record count) * (records / combination)
|
||||
*/
|
||||
if ((found_part & 1) &&
|
||||
(!(table->file->index_flags(key,0,0) & HA_ONLY_WHOLE_INDEX) ||
|
||||
found_part == PREV_BITS(uint,keyinfo->key_parts)))
|
||||
{
|
||||
max_key_part=max_part_bit(found_part);
|
||||
/*
|
||||
Check if quick_range could determinate how many rows we
|
||||
will match
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
table->quick_key_parts[key] == max_key_part)
|
||||
tmp=records= (double) table->quick_rows[key];
|
||||
else
|
||||
{
|
||||
/* Check if we have statistic about the distribution */
|
||||
if ((records=keyinfo->rec_per_key[max_key_part-1]))
|
||||
tmp=records;
|
||||
else
|
||||
{
|
||||
/*
|
||||
Assume that the first key part matches 1% of the file
|
||||
and that the whole key matches 10 (duplicates) or 1
|
||||
(unique) records.
|
||||
Assume also that more key matches proportionally more
|
||||
records
|
||||
This gives the formula:
|
||||
records= (x * (b-a) + a*c-b)/(c-1)
|
||||
|
||||
b = records matched by whole key
|
||||
a = records matched by first key part (10% of all records?)
|
||||
c = number of key parts in key
|
||||
x = used key parts (1 <= x <= c)
|
||||
*/
|
||||
double rec_per_key;
|
||||
rec_per_key= keyinfo->rec_per_key[keyinfo->key_parts-1] ?
|
||||
(double) keyinfo->rec_per_key[keyinfo->key_parts-1] :
|
||||
(double) s->records/rec+1;
|
||||
if (!s->records)
|
||||
tmp=0;
|
||||
else if (rec_per_key/(double) s->records >= 0.01)
|
||||
tmp=rec_per_key;
|
||||
else
|
||||
{
|
||||
double a=s->records*0.01;
|
||||
tmp=(max_key_part * (rec_per_key - a) +
|
||||
a*keyinfo->key_parts - rec_per_key)/
|
||||
(keyinfo->key_parts-1);
|
||||
set_if_bigger(tmp,1.0);
|
||||
}
|
||||
records=(ulong) tmp;
|
||||
}
|
||||
/*
|
||||
If quick_select was used on a part of this key, we know
|
||||
the maximum number of rows that the key can match.
|
||||
*/
|
||||
if (table->quick_keys.is_set(key) &&
|
||||
table->quick_key_parts[key] <= max_key_part &&
|
||||
records > (double) table->quick_rows[key])
|
||||
tmp= records= (double) table->quick_rows[key];
|
||||
else if (found_ref_or_null)
|
||||
{
|
||||
/* We need to do two key searches to find key */
|
||||
tmp*= 2.0;
|
||||
records*= 2.0;
|
||||
}
|
||||
}
|
||||
/* Limit the number of matched rows */
|
||||
set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
|
||||
if (table->used_keys.is_set(key))
|
||||
{
|
||||
/* we can use only index tree */
|
||||
uint keys_per_block= table->file->block_size/2/
|
||||
(keyinfo->key_length+table->file->ref_length)+1;
|
||||
tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
|
||||
}
|
||||
else
|
||||
tmp=record_count*min(tmp,s->worst_seeks);
|
||||
}
|
||||
else
|
||||
tmp=best_time; // Do nothing
|
||||
}
|
||||
} /* not ft_key */
|
||||
if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
|
||||
{
|
||||
best_time=tmp + records/(double) TIME_FOR_COMPARE;
|
||||
best=tmp;
|
||||
best_records=records;
|
||||
best_key=start_key;
|
||||
best_max_key_part=max_key_part;
|
||||
}
|
||||
}
|
||||
records=best_records;
|
||||
}
|
||||
|
||||
/*
|
||||
Don't test table scan if it can't be better.
|
||||
Prefer key lookup if we would use the same key for scanning.
|
||||
|
||||
Don't do a table scan on InnoDB tables, if we can read the used
|
||||
parts of the row from any of the used index.
|
||||
This is because table scans uses index and we would not win
|
||||
anything by using a table scan.
|
||||
(see comment in best_access_path() for more details on the below
|
||||
condition)
|
||||
*/
|
||||
if ((records >= s->found_records || best > s->read_time) &&
|
||||
!(s->quick && best_key && s->quick->index == best_key->key &&
|
||||
best_max_key_part >= s->table->quick_key_parts[best_key->key]) &&
|
||||
!((s->table->file->table_flags() & HA_TABLE_SCAN_ON_INDEX) &&
|
||||
! s->table->used_keys.is_clear_all() && best_key) &&
|
||||
!(s->table->force_index && best_key && !s->quick))
|
||||
{ // Check full join
|
||||
ha_rows rnd_records= s->found_records;
|
||||
/*
|
||||
If there is a restriction on the table, assume that 25% of the
|
||||
rows can be skipped on next part.
|
||||
This is to force tables that this table depends on before this
|
||||
table
|
||||
*/
|
||||
if (found_constraint)
|
||||
rnd_records-= rnd_records/4;
|
||||
|
||||
/*
|
||||
Range optimizer never proposes a RANGE if it isn't better
|
||||
than FULL: so if RANGE is present, it's always preferred to FULL.
|
||||
Here we estimate its cost.
|
||||
*/
|
||||
if (s->quick)
|
||||
{
|
||||
/*
|
||||
For each record we:
|
||||
- read record range through 'quick'
|
||||
- skip rows which does not satisfy WHERE constraints
|
||||
*/
|
||||
tmp= record_count *
|
||||
(s->quick->read_time +
|
||||
(s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Estimate cost of reading table. */
|
||||
tmp= s->table->file->scan_time();
|
||||
if (s->table->map & join->outer_join) // Can't use join cache
|
||||
{
|
||||
/*
|
||||
For each record we have to:
|
||||
- read the whole table record
|
||||
- skip rows which does not satisfy join condition
|
||||
*/
|
||||
tmp= record_count *
|
||||
(tmp +
|
||||
(s->records - rnd_records)/(double) TIME_FOR_COMPARE);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We read the table as many times as join buffer becomes full. */
|
||||
tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
|
||||
record_count /
|
||||
(double) thd->variables.join_buff_size));
|
||||
/*
|
||||
We don't make full cartesian product between rows in the scanned
|
||||
table and existing records because we skip all rows from the
|
||||
scanned table, which does not satisfy join condition when
|
||||
we read the table (see flush_cached_records for details). Here we
|
||||
take into account cost to read and skip these records.
|
||||
*/
|
||||
tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
We estimate the cost of evaluating WHERE clause for found records
|
||||
as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
|
||||
tmp give us total cost of using TABLE SCAN
|
||||
*/
|
||||
if (best == DBL_MAX ||
|
||||
(tmp + record_count/(double) TIME_FOR_COMPARE*rnd_records <
|
||||
best + record_count/(double) TIME_FOR_COMPARE*records))
|
||||
{
|
||||
/*
|
||||
If the table has a range (s->quick is set) make_join_select()
|
||||
will ensure that this will be used
|
||||
*/
|
||||
best=tmp;
|
||||
records= rows2double(rnd_records);
|
||||
best_key=0;
|
||||
}
|
||||
}
|
||||
join->positions[idx].records_read= records;
|
||||
join->positions[idx].key=best_key;
|
||||
join->positions[idx].table= s;
|
||||
if (!best_key && idx == join->const_tables &&
|
||||
s->table == join->sort_by_table &&
|
||||
join->unit->select_limit_cnt >= records)
|
||||
join->sort_by_table= (TABLE*) 1; // Must use temporary table
|
||||
|
||||
double records, best;
|
||||
best_access_path(join, s, thd, rest_tables, idx, record_count,
|
||||
read_time);
|
||||
records= join->positions[idx].records_read;
|
||||
best= join->positions[idx].read_time;
|
||||
/*
|
||||
Go to the next level only if there hasn't been a better key on
|
||||
this level! This will cut down the search for a lot simple cases!
|
||||
|
|
Loading…
Reference in a new issue