mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 03:52:35 +01:00
Precise read time estimates for index_merge/Unique
This commit is contained in:
parent
211bc24063
commit
7dbdedcb72
13 changed files with 606 additions and 212 deletions
|
@ -1,4 +1,4 @@
|
|||
drop table if exists t0, t1, t2, t3;
|
||||
drop table if exists t0, t1, t2, t3,t4;
|
||||
create table t0
|
||||
(
|
||||
key1 int not null,
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
--disable_warnings
|
||||
drop table if exists t0, t1, t2, t3;
|
||||
drop table if exists t0, t1, t2, t3,t4;
|
||||
--enable_warnings
|
||||
|
||||
# Create and fill a table with simple keys
|
||||
|
|
|
@ -88,9 +88,9 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
|
|||
#endif
|
||||
FILESORT_INFO table_sort;
|
||||
/*
|
||||
don't use table->sort in filesort as it is also used by
|
||||
QUICK_INDEX_MERGE_SELECT. work with a copy of it and put it back at the
|
||||
end when index_merge select has finished with it.
|
||||
Don't use table->sort in filesort as it is also used by
|
||||
QUICK_INDEX_MERGE_SELECT. Work with a copy and put it back at the end
|
||||
when index_merge select has finished with it.
|
||||
*/
|
||||
memcpy(&table_sort, &table->sort, sizeof(FILESORT_INFO));
|
||||
table->sort.io_cache= NULL;
|
||||
|
@ -452,7 +452,7 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
|
|||
if (quick_select)
|
||||
{
|
||||
/*
|
||||
index_merge quick select uses table->sort when retrieving rows, so free
|
||||
index_merge quick select uses table->sort when retrieving rows, so free
|
||||
resoures it has allocated.
|
||||
*/
|
||||
end_read_record(&read_record_info);
|
||||
|
|
|
@ -167,7 +167,7 @@ class ha_berkeley: public handler
|
|||
longlong get_auto_increment();
|
||||
void print_error(int error, myf errflag);
|
||||
uint8 table_cache_type() { return HA_CACHE_TBL_TRANSACT; }
|
||||
bool primary_key_is_clustered_covering() { return true; }
|
||||
bool primary_key_is_clustered() { return true; }
|
||||
};
|
||||
|
||||
extern bool berkeley_skip, berkeley_shared_data;
|
||||
|
|
|
@ -2003,7 +2003,8 @@ build_template(
|
|||
update field->query_id so that the formula
|
||||
thd->query_id == field->query_id did not work. */
|
||||
|
||||
ibool index_contains_field = dict_index_contains_col_or_prefix(index, i);
|
||||
ibool index_contains_field=
|
||||
dict_index_contains_col_or_prefix(index, i);
|
||||
|
||||
if (templ_type == ROW_MYSQL_REC_FIELDS &&
|
||||
((prebuilt->read_just_key && !index_contains_field) ||
|
||||
|
|
|
@ -187,7 +187,7 @@ class ha_innobase: public handler
|
|||
void init_table_handle_for_HANDLER();
|
||||
longlong get_auto_increment();
|
||||
uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; }
|
||||
bool primary_key_is_clustered_covering() { return true; }
|
||||
bool primary_key_is_clustered() { return true; }
|
||||
};
|
||||
|
||||
extern bool innodb_skip;
|
||||
|
|
|
@ -378,10 +378,10 @@ public:
|
|||
|
||||
/*
|
||||
RETURN
|
||||
true primary key (if there is one) is clustered key covering all fields
|
||||
true Primary key (if there is one) is clustered key covering all fields
|
||||
false otherwise
|
||||
*/
|
||||
virtual bool primary_key_is_clustered_covering() { return false; }
|
||||
virtual bool primary_key_is_clustered() { return false; }
|
||||
};
|
||||
|
||||
/* Some extern variables used with handlers */
|
||||
|
|
|
@ -118,6 +118,26 @@ extern CHARSET_INFO *national_charset_info, *table_alias_charset;
|
|||
*/
|
||||
#define TIME_FOR_COMPARE 5 // 5 compares == one read
|
||||
|
||||
/*
|
||||
Number of comparisons of table rowids equivalent to reading one row from a
|
||||
table.
|
||||
*/
|
||||
#define TIME_FOR_COMPARE_ROWID (TIME_FOR_COMPARE*2)
|
||||
|
||||
/*
|
||||
For sequential disk seeks the cost formula is:
|
||||
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST * #blocks_to_skip
|
||||
|
||||
The cost of average seek
|
||||
DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*BLOCKS_IN_AVG_SEEK =1.0.
|
||||
*/
|
||||
#define DISK_SEEK_BASE_COST ((double)0.5)
|
||||
|
||||
#define BLOCKS_IN_AVG_SEEK 128
|
||||
|
||||
#define DISK_SEEK_PROP_COST ((double)0.5/BLOCKS_IN_AVG_SEEK)
|
||||
|
||||
|
||||
/*
|
||||
Number of rows in a reference table when refereed through a not unique key.
|
||||
This value is only used when we don't know anything about the key
|
||||
|
|
493
sql/opt_range.cc
493
sql/opt_range.cc
|
@ -307,12 +307,18 @@ static ha_rows check_quick_keys(PARAM *param,uint index,SEL_ARG *key_tree,
|
|||
|
||||
QUICK_RANGE_SELECT *get_quick_select(PARAM *param,uint index,
|
||||
SEL_ARG *key_tree, MEM_ROOT *alloc = NULL);
|
||||
static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
|
||||
key_map& needed_reg, TABLE *head,
|
||||
static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
|
||||
key_map& needed_reg,
|
||||
bool index_read_can_be_used,
|
||||
double* read_time,
|
||||
ha_rows* records,
|
||||
double *read_time,
|
||||
ha_rows *records,
|
||||
SEL_ARG*** key_to_read);
|
||||
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
|
||||
SEL_IMERGE *imerge, double *read_time,
|
||||
ha_rows* imerge_rows);
|
||||
inline double get_index_only_read_time(PARAM* param, ha_rows records,
|
||||
int keynr);
|
||||
|
||||
#ifndef DBUG_OFF
|
||||
static void print_quick_sel_imerge(QUICK_INDEX_MERGE_SELECT *quick,
|
||||
const key_map *needed_reg);
|
||||
|
@ -453,7 +459,7 @@ int SEL_IMERGE::or_sel_tree_with_checks(PARAM *param, SEL_TREE *new_tree)
|
|||
}
|
||||
}
|
||||
|
||||
/* new tree cannot be combined with any of existing trees */
|
||||
/* New tree cannot be combined with any of existing trees. */
|
||||
return or_sel_tree(param, new_tree);
|
||||
}
|
||||
|
||||
|
@ -483,7 +489,6 @@ int SEL_IMERGE::or_sel_imerge_with_checks(PARAM *param, SEL_IMERGE* imerge)
|
|||
|
||||
/*
|
||||
Perform AND operation on two index_merge lists and store result in *im1.
|
||||
|
||||
*/
|
||||
|
||||
inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
|
||||
|
@ -503,18 +508,16 @@ inline void imerge_list_and_list(List<SEL_IMERGE> *im1, List<SEL_IMERGE> *im2)
|
|||
i.e. all conjuncts except the first one are currently dropped.
|
||||
This is done to avoid producing N*K ways to do index_merge.
|
||||
|
||||
If (a_1||b_1) produce a condition that is always true, NULL is
|
||||
returned and index_merge is discarded. (while it is actually
|
||||
possible to try harder).
|
||||
If (a_1||b_1) produce a condition that is always true, NULL is returned
|
||||
and index_merge is discarded (while it is actually possible to try
|
||||
harder).
|
||||
|
||||
As a consequence of this, choice of keys to do index_merge
|
||||
read may depend on the order of conditions in WHERE part of
|
||||
the query.
|
||||
As a consequence of this, choice of keys to do index_merge read may depend
|
||||
on the order of conditions in WHERE part of the query.
|
||||
|
||||
RETURN
|
||||
0 OK, result is stored in *im1
|
||||
0 OK, result is stored in *im1
|
||||
other Error, both passed lists are unusable
|
||||
|
||||
*/
|
||||
|
||||
int imerge_list_or_list(PARAM *param,
|
||||
|
@ -533,7 +536,7 @@ int imerge_list_or_list(PARAM *param,
|
|||
Perform OR operation on index_merge list and key tree.
|
||||
|
||||
RETURN
|
||||
0 OK, result is stored in *im1
|
||||
0 OK, result is stored in *im1.
|
||||
other Error
|
||||
|
||||
*/
|
||||
|
@ -685,10 +688,10 @@ bool
|
|||
QUICK_INDEX_MERGE_SELECT::push_quick_back(QUICK_RANGE_SELECT *quick_sel_range)
|
||||
{
|
||||
/*
|
||||
Save quick_select that does scan on clustered covering primary key as
|
||||
it will be processed separately
|
||||
Save quick_select that does scan on clustered primary key as it will be
|
||||
processed separately.
|
||||
*/
|
||||
if (head->file->primary_key_is_clustered_covering() &&
|
||||
if (head->file->primary_key_is_clustered() &&
|
||||
quick_sel_range->index == head->primary_key)
|
||||
pk_quick_select= quick_sel_range;
|
||||
else
|
||||
|
@ -1001,7 +1004,7 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
|
|||
ha_rows found_records;
|
||||
double found_read_time= read_time;
|
||||
|
||||
if (!get_quick_select_params(tree, param, needed_reg, head, true,
|
||||
if (!get_quick_select_params(tree, ¶m, needed_reg, true,
|
||||
&found_read_time, &found_records,
|
||||
&best_key))
|
||||
{
|
||||
|
@ -1021,120 +1024,57 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
|
|||
}
|
||||
|
||||
/*
|
||||
btw, tree type SEL_TREE::INDEX_MERGE was not introduced
|
||||
intentionally
|
||||
Btw, tree type SEL_TREE::INDEX_MERGE was not introduced
|
||||
intentionally.
|
||||
*/
|
||||
|
||||
/* if no range select could be built, try using index_merge */
|
||||
/* If no range select could be built, try using index_merge. */
|
||||
if (!quick && !tree->merges.is_empty())
|
||||
{
|
||||
DBUG_PRINT("info",("No range reads possible,"
|
||||
" trying to construct index_merge"));
|
||||
SEL_IMERGE *imerge;
|
||||
SEL_IMERGE *min_imerge= NULL;
|
||||
double min_imerge_cost= DBL_MAX;
|
||||
double min_imerge_read_time;
|
||||
ha_rows min_imerge_records;
|
||||
|
||||
if (!head->used_keys.is_clear_all())
|
||||
{
|
||||
int key_for_use= find_shortest_key(head, &head->used_keys);
|
||||
ha_rows total_table_records= (0 == head->file->records)? 1 :
|
||||
head->file->records;
|
||||
read_time = get_index_only_read_time(¶m, total_table_records,
|
||||
key_for_use);
|
||||
DBUG_PRINT("info",
|
||||
("'all' scan will be using key %d, read time %g",
|
||||
key_for_use, read_time));
|
||||
}
|
||||
|
||||
min_imerge_read_time=read_time;
|
||||
/*
|
||||
Ok, read_time contains best 'all' read time.
|
||||
Now look for index_merge with cost < read_time
|
||||
*/
|
||||
List_iterator_fast<SEL_IMERGE> it(tree->merges);
|
||||
/* find index_merge with minimal cost */
|
||||
while ((imerge= it++))
|
||||
{
|
||||
bool imerge_failed= false;
|
||||
double imerge_cost= 0;
|
||||
ha_rows imerge_total_records= 0;
|
||||
double tree_read_time;
|
||||
ha_rows tree_records;
|
||||
imerge->best_keys=
|
||||
(SEL_ARG***)alloc_root(&alloc,
|
||||
(imerge->trees_next - imerge->trees)*
|
||||
sizeof(void*));
|
||||
/*
|
||||
It may be possible to use different keys for index_merge, e.g for
|
||||
queries like
|
||||
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
|
||||
We assume we get the best index_merge if we choose the best key
|
||||
read inside each of the conjuncts.
|
||||
*/
|
||||
for (SEL_TREE **ptree= imerge->trees;
|
||||
ptree != imerge->trees_next;
|
||||
ptree++)
|
||||
{
|
||||
tree_read_time= read_time;
|
||||
if (get_quick_select_params(*ptree, param, needed_reg, head,
|
||||
false,
|
||||
&tree_read_time, &tree_records,
|
||||
&(imerge->best_keys[ptree -
|
||||
imerge->trees])))
|
||||
imerge_failed= true;
|
||||
imerge_cost += tree_read_time;
|
||||
imerge_total_records += tree_records;
|
||||
}
|
||||
|
||||
if (!imerge_failed)
|
||||
{
|
||||
imerge_total_records= min(imerge_total_records,
|
||||
head->file->records);
|
||||
imerge_cost += imerge_total_records / TIME_FOR_COMPARE;
|
||||
if (imerge_cost < min_imerge_cost)
|
||||
{
|
||||
min_imerge= imerge;
|
||||
min_imerge_cost= imerge_cost;
|
||||
min_imerge_records= imerge_total_records;
|
||||
}
|
||||
}
|
||||
if (!get_index_merge_params(¶m, needed_reg, imerge,
|
||||
&min_imerge_read_time,
|
||||
&min_imerge_records))
|
||||
min_imerge= imerge;
|
||||
}
|
||||
|
||||
if (!min_imerge)
|
||||
goto end_free;
|
||||
|
||||
records= min_imerge_records;
|
||||
/*
|
||||
Ok, got minimal index merge, *min_imerge, with cost min_imerge_cost
|
||||
Compare its cost with "all" scan cost (or "all+using index" if
|
||||
it is possible) and choose the best.
|
||||
*/
|
||||
|
||||
if (!head->used_keys.is_clear_all())
|
||||
{
|
||||
/* check if "ALL" +"using index" read would be faster */
|
||||
int key_for_use= find_shortest_key(head, &head->used_keys);
|
||||
ha_rows total_table_records= (0 == head->file->records)? 1 :
|
||||
head->file->records;
|
||||
uint keys_per_block= (head->file->block_size/2/
|
||||
(head->key_info[key_for_use].key_length+
|
||||
head->file->ref_length) + 1);
|
||||
double all_index_scan_read_time= ((double)(total_table_records+
|
||||
keys_per_block-1)/
|
||||
(double) keys_per_block);
|
||||
|
||||
DBUG_PRINT("info",
|
||||
("'all' scan will be using key %d, read time %g",
|
||||
key_for_use, all_index_scan_read_time));
|
||||
if (all_index_scan_read_time < min_imerge_cost)
|
||||
{
|
||||
DBUG_PRINT("info",
|
||||
("index merge would be slower, "
|
||||
"will do full 'index' scan"));
|
||||
goto end_free;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* check if "ALL" would be faster */
|
||||
if (read_time < min_imerge_cost)
|
||||
{
|
||||
DBUG_PRINT("info",
|
||||
("index merge would be slower, "
|
||||
"will do full table scan"));
|
||||
goto end_free;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Ok, using index_merge is the best option, so construct it. */
|
||||
if (!(quick= quick_imerge= new QUICK_INDEX_MERGE_SELECT(thd, head)))
|
||||
goto end_free;
|
||||
|
||||
quick->records= min_imerge_records;
|
||||
quick->read_time= min_imerge_cost;
|
||||
quick->read_time= min_imerge_read_time;
|
||||
|
||||
my_pthread_setspecific_ptr(THR_MALLOC, &quick_imerge->alloc);
|
||||
|
||||
|
@ -1152,10 +1092,10 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use,
|
|||
&quick_imerge->alloc)))
|
||||
{
|
||||
new_quick->records= min_imerge_records;
|
||||
new_quick->read_time= min_imerge_cost;
|
||||
new_quick->read_time= min_imerge_read_time;
|
||||
/*
|
||||
QUICK_RANGE_SELECT::QUICK_RANGE_SELECT leaves THR_MALLOC
|
||||
pointing to its allocator, restore it back
|
||||
pointing to its allocator, restore it back.
|
||||
*/
|
||||
quick_imerge->last_quick_select= new_quick;
|
||||
|
||||
|
@ -1214,15 +1154,265 @@ end:
|
|||
|
||||
|
||||
/*
|
||||
Calculate quick range select read time, # of records, and best key to use
|
||||
without constructing QUICK_RANGE_SELECT object.
|
||||
Calculate index merge cost and save parameters for its construction.
|
||||
|
||||
SYNOPSIS
|
||||
get_index_merge_params()
|
||||
param in parameter with structure.
|
||||
needed_reg in/out needed_reg from this SQL_SELECT
|
||||
imerge in index_merge description structure
|
||||
read_time in/out in: cost of an existing way to read a table
|
||||
out: cost of index merge
|
||||
imerge_rows out pessimistic estimate of # of rows to be retrieved
|
||||
|
||||
RETURN
|
||||
0 Cost of this index_merge is less than passed *read_time,
|
||||
*imerge_rows and *read_time contain new index_merge parameters.
|
||||
1 Cost of this index_merge is more than *read_time,
|
||||
*imerge_rows and *read_time are not modified.
|
||||
-1 error
|
||||
|
||||
NOTES
|
||||
index_merge_cost =
|
||||
cost(index_reads) + (see #1)
|
||||
cost(rowid_to_row_scan) + (see #2)
|
||||
cost(unique_use) (see #3)
|
||||
|
||||
1. cost(index_reads) =SUM_i(cost(index_read_i))
|
||||
For non-CPK scans,
|
||||
cost(index_read_i) = {cost of ordinary 'index only' scan}
|
||||
For CPK scan,
|
||||
cost(index_read_i) = {cost of non-'index only' scan}
|
||||
|
||||
2. cost(rowid_to_row_scan)
|
||||
If table PK is clustered then
|
||||
cost(rowid_to_row_scan) =
|
||||
{cost of ordinary clustered PK scan with n_ranges=n_rows}
|
||||
|
||||
Otherwise, we use the following model to calculate costs:
|
||||
We need to retrieve n_rows rows from file that occupies n_blocks blocks.
|
||||
We assume that offsets of rows we need are independent variates with
|
||||
uniform distribution in [0..max_file_offset] range.
|
||||
|
||||
We'll denote block as "busy" if it contains row(s) we need to retrieve
|
||||
and "empty" if doesn't contain rows we need.
|
||||
|
||||
Probability that a block is empty is (1 - 1/n_blocks)^n_rows (this
|
||||
applies to any block in file). Let x_i be a variate taking value 1 if
|
||||
block #i is empty and 0 otherwise.
|
||||
|
||||
Then E(x_i) = (1 - 1/n_blocks)^n_rows;
|
||||
|
||||
E(n_empty_blocks) = E(sum(x_i)) = sum(E(x_i)) =
|
||||
= n_blocks * ((1 - 1/n_blocks)^n_rows) =
|
||||
~= n_blocks * exp(-n_rows/n_blocks).
|
||||
|
||||
E(n_busy_blocks) = n_blocks*(1 - (1 - 1/n_blocks)^n_rows) =
|
||||
~= n_blocks * (1 - exp(-n_rows/n_blocks)).
|
||||
|
||||
Average size of "hole" between neighbor non-empty blocks is
|
||||
E(hole_size) = n_blocks/E(n_busy_blocks).
|
||||
|
||||
The total cost of reading all needed blocks in one "sweep" is:
|
||||
|
||||
E(n_busy_blocks)*
|
||||
(DISK_SEEK_BASE_COST + DISK_SEEK_PROP_COST*n_blocks/E(n_busy_blocks)).
|
||||
|
||||
3. Cost of Unique use is calculated in Unique::get_use_cost function.
|
||||
*/
|
||||
|
||||
static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
|
||||
key_map& needed_reg, TABLE *head,
|
||||
static int get_index_merge_params(PARAM *param, key_map& needed_reg,
|
||||
SEL_IMERGE *imerge, double *read_time,
|
||||
ha_rows* imerge_rows)
|
||||
{
|
||||
double imerge_cost= 0.0; /* cost of this index_merge */
|
||||
bool imerge_too_expensive= false;
|
||||
double tree_read_time;
|
||||
ha_rows tree_records;
|
||||
bool pk_is_clustered= param->table->file->primary_key_is_clustered();
|
||||
bool have_cpk_scan;
|
||||
ha_rows records_for_unique= 0;
|
||||
ha_rows cpk_records= 0;
|
||||
|
||||
DBUG_ENTER("get_index_merge_params");
|
||||
|
||||
/* allocate structs to save construction info */
|
||||
imerge->best_keys=
|
||||
(SEL_ARG***)alloc_root(param->mem_root,
|
||||
(imerge->trees_next - imerge->trees)*
|
||||
sizeof(void*));
|
||||
/*
|
||||
PHASE 1: get the best keys to use for this index_merge
|
||||
*/
|
||||
|
||||
/*
|
||||
It may be possible to use different keys for index_merge scans,
|
||||
e.g. for query like
|
||||
...WHERE (key1 < c2 AND key2 < c2) OR (key3 < c3 AND key4 < c4)
|
||||
we have to make choice between key1 and key2 for one scan and
|
||||
between key3,key4 for another.
|
||||
We assume we'll get the best way if we choose the best key read
|
||||
inside each of the conjuncts. Comparison is done without 'using index'.
|
||||
*/
|
||||
for (SEL_TREE **ptree= imerge->trees;
|
||||
ptree != imerge->trees_next;
|
||||
ptree++)
|
||||
{
|
||||
SEL_ARG **tree_best_key;
|
||||
uint keynr;
|
||||
|
||||
tree_read_time= *read_time;
|
||||
if (get_quick_select_params(*ptree, param, needed_reg, false,
|
||||
&tree_read_time, &tree_records,
|
||||
&tree_best_key))
|
||||
{
|
||||
/*
|
||||
Non-'index only' range scan on a one in index_merge key is more
|
||||
expensive than other available option. The entire index_merge will be
|
||||
more expensive then, too. We continue here only to update SQL_SELECT
|
||||
members.
|
||||
*/
|
||||
imerge_too_expensive= true;
|
||||
}
|
||||
|
||||
if (imerge_too_expensive)
|
||||
continue;
|
||||
|
||||
imerge->best_keys[ptree - imerge->trees]= tree_best_key;
|
||||
keynr= param->real_keynr[(tree_best_key-(*ptree)->keys)];
|
||||
|
||||
if (pk_is_clustered && keynr == param->table->primary_key)
|
||||
{
|
||||
/* This is a Clustered PK scan, it will be done without 'index only' */
|
||||
imerge_cost += tree_read_time;
|
||||
have_cpk_scan= true;
|
||||
cpk_records= tree_records;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Non-CPK scan, calculate time to do it using 'index only' */
|
||||
imerge_cost += get_index_only_read_time(param, tree_records,keynr);
|
||||
records_for_unique += tree_records;
|
||||
}
|
||||
}
|
||||
|
||||
if (imerge_too_expensive)
|
||||
DBUG_RETURN(1);
|
||||
|
||||
if ((imerge_cost > *read_time) ||
|
||||
((records_for_unique + cpk_records) >= param->table->file->records) &&
|
||||
*read_time != DBL_MAX)
|
||||
{
|
||||
/* Bail out if it is obvious that index_merge would be more expensive */
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
if (have_cpk_scan)
|
||||
{
|
||||
/*
|
||||
Add one ROWID comparison for each row retrieved on non-CPK scan.
|
||||
(it is done in QUICK_RANGE_SELECT::row_in_ranges)
|
||||
*/
|
||||
imerge_cost += records_for_unique / TIME_FOR_COMPARE_ROWID;
|
||||
}
|
||||
|
||||
/* PHASE 2: Calculate cost(rowid_to_row_scan) */
|
||||
if (pk_is_clustered)
|
||||
{
|
||||
imerge_cost +=
|
||||
param->table->file->read_time(param->table->primary_key,
|
||||
records_for_unique,
|
||||
records_for_unique)
|
||||
+ rows2double(records_for_unique) / TIME_FOR_COMPARE;
|
||||
}
|
||||
else
|
||||
{
|
||||
double n_blocks=
|
||||
ceil((double)(longlong)param->table->file->data_file_length / IO_SIZE);
|
||||
/* Don't ceil the following as it is an estimate */
|
||||
double busy_blocks=
|
||||
n_blocks * (1.0 - pow(1.0 - 1.0/n_blocks, records_for_unique));
|
||||
|
||||
JOIN *join= param->thd->lex->select_lex.join;
|
||||
if (!join || join->tables == 1)
|
||||
{
|
||||
imerge_cost += busy_blocks*(DISK_SEEK_BASE_COST +
|
||||
DISK_SEEK_PROP_COST*n_blocks/busy_blocks);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
It can be a join with source table being non-last table, so assume
|
||||
that disk seeks are random here.
|
||||
(TODO it is possible to determine if this *is* a last table in 'index
|
||||
checked for each record'-type join)
|
||||
*/
|
||||
imerge_cost += busy_blocks;
|
||||
}
|
||||
}
|
||||
|
||||
/* PHASE 3: Add Unique operations cost */
|
||||
imerge_cost += Unique::get_use_cost(param->mem_root, records_for_unique,
|
||||
param->table->file->ref_length,
|
||||
param->thd->variables.sortbuff_size);
|
||||
if (imerge_cost < *read_time)
|
||||
{
|
||||
*read_time= imerge_cost;
|
||||
records_for_unique += cpk_records;
|
||||
*imerge_rows= min(records_for_unique, param->table->file->records);
|
||||
DBUG_RETURN(0);
|
||||
}
|
||||
DBUG_RETURN(1);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Calculate cost of 'index only' scan for given index and number of records.
|
||||
(We can resolve this by only reading through this key.)
|
||||
|
||||
SYNOPSIS
|
||||
get_whole_index_read_time()
|
||||
param parameters structure
|
||||
records #of records to read
|
||||
keynr key to read
|
||||
|
||||
NOTES
|
||||
It is assumed that we will read trough the whole key range and that all
|
||||
key blocks are half full (normally things are much better).
|
||||
*/
|
||||
|
||||
inline double get_index_only_read_time(PARAM* param, ha_rows records, int keynr)
|
||||
{
|
||||
double read_time;
|
||||
uint keys_per_block= (param->table->file->block_size/2/
|
||||
(param->table->key_info[keynr].key_length+
|
||||
param->table->file->ref_length) + 1);
|
||||
read_time=((double) (records+keys_per_block-1)/
|
||||
(double) keys_per_block);
|
||||
return read_time;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Calculate quick range select read time, # of records, and best key to use
|
||||
without constructing QUICK_RANGE_SELECT object.
|
||||
SYNOPSIS
|
||||
get_quick_select_params
|
||||
tree in make range select for this SEL_TREE
|
||||
param in parameters from test_quick_select
|
||||
needed_reg in/out other table data needed by this quick_select
|
||||
index_read_can_be_used if false, assume that 'index only' option is not
|
||||
available.
|
||||
read_time out read time estimate
|
||||
records out # of records estimate
|
||||
key_to_read out SEL_ARG to be used for creating quick select
|
||||
*/
|
||||
|
||||
static int get_quick_select_params(SEL_TREE *tree, PARAM *param,
|
||||
key_map& needed_reg,
|
||||
bool index_read_can_be_used,
|
||||
double* read_time, ha_rows* records,
|
||||
SEL_ARG*** key_to_read)
|
||||
double *read_time, ha_rows *records,
|
||||
SEL_ARG ***key_to_read)
|
||||
{
|
||||
int idx;
|
||||
int result = 1;
|
||||
|
@ -1233,7 +1423,7 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
|
|||
*/
|
||||
SEL_ARG **key,**end;
|
||||
|
||||
for (idx= 0,key=tree->keys, end=key+param.keys ;
|
||||
for (idx= 0,key=tree->keys, end=key+param->keys ;
|
||||
key != end ;
|
||||
key++,idx++)
|
||||
{
|
||||
|
@ -1241,16 +1431,18 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
|
|||
double found_read_time;
|
||||
if (*key)
|
||||
{
|
||||
uint keynr= param.real_keynr[idx];
|
||||
uint keynr= param->real_keynr[idx];
|
||||
if ((*key)->type == SEL_ARG::MAYBE_KEY ||
|
||||
(*key)->maybe_flag)
|
||||
needed_reg.set_bit(keynr);
|
||||
|
||||
bool read_index_only= index_read_can_be_used? head->used_keys.is_set(keynr): false;
|
||||
found_records=check_quick_select(¶m, idx, *key);
|
||||
bool read_index_only= index_read_can_be_used?
|
||||
param->table->used_keys.is_set(keynr): false;
|
||||
found_records=check_quick_select(param, idx, *key);
|
||||
|
||||
if (found_records != HA_POS_ERROR && found_records > 2 &&
|
||||
read_index_only &&
|
||||
(head->file->index_flags(keynr) & HA_KEY_READ_ONLY))
|
||||
(param->table->file->index_flags(keynr) & HA_KEY_READ_ONLY))
|
||||
{
|
||||
/*
|
||||
We can resolve this by only reading through this key.
|
||||
|
@ -1258,21 +1450,17 @@ static int get_quick_select_params(SEL_TREE *tree, PARAM& param,
|
|||
and that all key blocks are half full (normally things are
|
||||
much better).
|
||||
*/
|
||||
uint keys_per_block= (head->file->block_size/2/
|
||||
(head->key_info[keynr].key_length+
|
||||
head->file->ref_length) + 1);
|
||||
found_read_time=((double) (found_records+keys_per_block-1)/
|
||||
(double) keys_per_block);
|
||||
found_read_time=get_index_only_read_time(param, found_records, keynr);
|
||||
}
|
||||
else
|
||||
found_read_time= (head->file->read_time(keynr,
|
||||
param.range_count,
|
||||
found_read_time= (param->table->file->read_time(keynr,
|
||||
param->range_count,
|
||||
found_records)+
|
||||
(double) found_records / TIME_FOR_COMPARE);
|
||||
if (*read_time > found_read_time && found_records != HA_POS_ERROR)
|
||||
{
|
||||
*read_time= found_read_time;
|
||||
*records= found_records;
|
||||
*records= found_records;
|
||||
*key_to_read= key;
|
||||
result = 0;
|
||||
}
|
||||
|
@ -3118,8 +3306,8 @@ err:
|
|||
/*
|
||||
Fetch all row ids into unique.
|
||||
|
||||
If table has a clustered primary key(PK) that contains all rows (bdb and
|
||||
innodb currently) and one of the index_merge scans is a scan on primary key,
|
||||
If table has a clustered primary key that covers all rows (true for bdb
|
||||
and innodb currently) and one of the index_merge scans is a scan on PK,
|
||||
then
|
||||
primary key scan rowids are not put into Unique and also
|
||||
rows that will be retrieved by PK scan are not put into Unique
|
||||
|
@ -3134,15 +3322,15 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
|
|||
int result;
|
||||
DBUG_ENTER("QUICK_INDEX_MERGE_SELECT::prepare_unique");
|
||||
|
||||
/* we're going to just read rowids. */
|
||||
/* We're going to just read rowids. */
|
||||
head->file->extra(HA_EXTRA_KEYREAD);
|
||||
|
||||
/*
|
||||
Make innodb retrieve all PK member fields, so
|
||||
* ha_innobase::position (which uses them) call works.
|
||||
* we filter out rows retrieved by CCPK.
|
||||
* We can filter out rows that will be retrieved by clustered PK.
|
||||
(This also creates a deficiency - it is possible that we will retrieve
|
||||
parts of key that are not used by current query at all)
|
||||
parts of key that are not used by current query at all.)
|
||||
*/
|
||||
head->file->extra(HA_EXTRA_RETRIEVE_ALL_COLS);
|
||||
|
||||
|
@ -3170,22 +3358,15 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
|
|||
|
||||
if (result)
|
||||
{
|
||||
/*
|
||||
table read error (including HA_ERR_END_OF_FILE on last quick select
|
||||
in index_merge)
|
||||
*/
|
||||
if (result != HA_ERR_END_OF_FILE)
|
||||
{
|
||||
DBUG_RETURN(result);
|
||||
}
|
||||
else
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
if (thd->killed)
|
||||
DBUG_RETURN(1);
|
||||
|
||||
/* skip row if it will be retrieved by clustered covering PK scan */
|
||||
/* skip row if it will be retrieved by clustered PK scan */
|
||||
if (pk_quick_select && pk_quick_select->row_in_ranges())
|
||||
continue;
|
||||
|
||||
|
@ -3207,14 +3388,16 @@ int QUICK_INDEX_MERGE_SELECT::prepare_unique()
|
|||
DBUG_RETURN(result);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Get next row for index_merge.
|
||||
NOTES
|
||||
The rows are read from
|
||||
1. rowids stored in Unique.
|
||||
2. QUICK_RANGE_SELECT with clustered primary key (if any).
|
||||
the sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
|
||||
The rows are read from
|
||||
1. rowids stored in Unique.
|
||||
2. QUICK_RANGE_SELECT with clustered primary key (if any).
|
||||
The sets of rows retrieved in 1) and 2) are guaranteed to be disjoint.
|
||||
*/
|
||||
|
||||
int QUICK_INDEX_MERGE_SELECT::get_next()
|
||||
{
|
||||
int result;
|
||||
|
@ -3228,8 +3411,8 @@ int QUICK_INDEX_MERGE_SELECT::get_next()
|
|||
if (result == -1)
|
||||
{
|
||||
result= HA_ERR_END_OF_FILE;
|
||||
/* All rows from Unique have been retrieved, do a CCPK scan */
|
||||
end_read_record(&read_record);
|
||||
/* All rows from Unique have been retrieved, do a clustered PK scan */
|
||||
if(pk_quick_select)
|
||||
{
|
||||
doing_pk_scan= true;
|
||||
|
@ -3275,8 +3458,9 @@ int QUICK_RANGE_SELECT::get_next()
|
|||
if (!cur_range)
|
||||
range= *(cur_range= (QUICK_RANGE**)ranges.buffer);
|
||||
else
|
||||
range= (cur_range == ((QUICK_RANGE**)ranges.buffer + ranges.elements - 1))?
|
||||
NULL: *(++cur_range);
|
||||
range=
|
||||
(cur_range == ((QUICK_RANGE**)ranges.buffer + ranges.elements - 1))?
|
||||
NULL: *(++cur_range);
|
||||
|
||||
if (!range)
|
||||
DBUG_RETURN(HA_ERR_END_OF_FILE); // All ranges used
|
||||
|
@ -3371,16 +3555,17 @@ int QUICK_RANGE_SELECT::cmp_next(QUICK_RANGE *range_arg)
|
|||
|
||||
/*
|
||||
Check if current row will be retrieved by this QUICK_RANGE_SELECT
|
||||
(this is used to filter out CCPK scan rows in index_merge).
|
||||
|
||||
NOTES
|
||||
It is assumed that currently a scan is being done on another index
|
||||
which reads all necessary parts of the index that is scanned by this
|
||||
quick select.
|
||||
|
||||
The implementation does a binary search on sorted array of disjoint
|
||||
ranges, without taking size of range into account.
|
||||
|
||||
This function is used to filter out clustered PK scan rows in
|
||||
index_merge quick select.
|
||||
|
||||
RETURN
|
||||
true if current row will be retrieved by this quick select
|
||||
false if not
|
||||
|
|
|
@ -118,11 +118,13 @@ public:
|
|||
protected:
|
||||
friend void print_quick_sel_range(QUICK_RANGE_SELECT *quick,
|
||||
const key_map* needed_reg);
|
||||
friend QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
|
||||
struct st_table_ref *ref);
|
||||
friend
|
||||
QUICK_RANGE_SELECT *get_quick_select_for_ref(THD *thd, TABLE *table,
|
||||
struct st_table_ref *ref);
|
||||
friend bool get_quick_keys(struct st_qsel_param *param,
|
||||
QUICK_RANGE_SELECT *quick,KEY_PART *key,
|
||||
SEL_ARG *key_tree,char *min_key,uint min_key_flag,
|
||||
SEL_ARG *key_tree,
|
||||
char *min_key, uint min_key_flag,
|
||||
char *max_key, uint max_key_flag);
|
||||
friend QUICK_RANGE_SELECT *get_quick_select(struct st_qsel_param*,uint idx,
|
||||
SEL_ARG *key_tree,
|
||||
|
@ -160,58 +162,62 @@ public:
|
|||
|
||||
|
||||
/*
|
||||
QUICK_INDEX_MERGE_SELECT - index_merge acces method quick select.
|
||||
QUICK_INDEX_MERGE_SELECT - index_merge access method quick select.
|
||||
|
||||
QUICK_INDEX_MERGE_SELECT uses
|
||||
* QUICK_RANGE_SELECTs to get rows
|
||||
* Unique class to remove duplicate rows
|
||||
QUICK_INDEX_MERGE_SELECT uses
|
||||
* QUICK_RANGE_SELECTs to get rows
|
||||
* Unique class to remove duplicate rows
|
||||
|
||||
INDEX MERGE OPTIMIZER
|
||||
Current implementation doesn't detect all cases where index_merge could be
|
||||
used, in particular:
|
||||
* index_merge will never be used if range scan is possible (even if range
|
||||
scan is more expensive)
|
||||
INDEX MERGE OPTIMIZER
|
||||
Current implementation doesn't detect all cases where index_merge could
|
||||
be used, in particular:
|
||||
* index_merge will never be used if range scan is possible (even if
|
||||
range scan is more expensive)
|
||||
|
||||
* index_merge+'using index' is not supported (this the consequence of the
|
||||
above restriction)
|
||||
* index_merge+'using index' is not supported (this the consequence of
|
||||
the above restriction)
|
||||
|
||||
* If WHERE part contains complex nested AND and OR conditions, some ways to
|
||||
retrieve rows using index_merge will not be considered. The choice of
|
||||
read plan may depend on the order of conjuncts/disjuncts in WHERE part of
|
||||
the query, see comments near SEL_IMERGE::or_sel_tree_with_checks and
|
||||
imerge_list_or_list function for details.
|
||||
* If WHERE part contains complex nested AND and OR conditions, some ways
|
||||
to retrieve rows using index_merge will not be considered. The choice
|
||||
of read plan may depend on the order of conjuncts/disjuncts in WHERE
|
||||
part of the query, see comments near imerge_list_or_list and
|
||||
SEL_IMERGE::or_sel_tree_with_checks functions for details.
|
||||
|
||||
* there is no "index_merge_ref" method (but index_merge on non-first table
|
||||
in join is possible with 'range checked for each record').
|
||||
* There is no "index_merge_ref" method (but index_merge on non-first
|
||||
table in join is possible with 'range checked for each record').
|
||||
|
||||
See comments around SEL_IMERGE class and test_quick_select for more details.
|
||||
See comments around SEL_IMERGE class and test_quick_select for more
|
||||
details.
|
||||
|
||||
ROW RETRIEVAL ALGORITHM
|
||||
ROW RETRIEVAL ALGORITHM
|
||||
|
||||
index_merge uses Unique class for duplicates removal. Index merge takes
|
||||
advantage of clustered covering primary key (CCPK) if the table has one.
|
||||
The algorithm is as follows:
|
||||
index_merge uses Unique class for duplicates removal. index_merge takes
|
||||
advantage of Clustered Primary Key (CPK) if the table has one.
|
||||
The index_merge algorithm consists of two phases:
|
||||
|
||||
prepare() //implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique
|
||||
{
|
||||
activate 'index only';
|
||||
while(retrieve next row for non-CCPK scan)
|
||||
Phase 1 (implemented in QUICK_INDEX_MERGE_SELECT::prepare_unique):
|
||||
prepare()
|
||||
{
|
||||
if (there is a CCPK scan and row will be retrieved by it)
|
||||
skip this row;
|
||||
else
|
||||
put rowid into Unique;
|
||||
activate 'index only';
|
||||
while(retrieve next row for non-CPK scan)
|
||||
{
|
||||
if (there is a CPK scan and row will be retrieved by it)
|
||||
skip this row;
|
||||
else
|
||||
put its rowid into Unique;
|
||||
}
|
||||
deactivate 'index only';
|
||||
}
|
||||
deactivate 'index only';
|
||||
}
|
||||
|
||||
fetch() //implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next calls
|
||||
{
|
||||
retrieve all rows from row pointers stored in Unique;
|
||||
free Unique;
|
||||
retrieve all rows for CCPK scan;
|
||||
}
|
||||
|
||||
Phase 2 (implemented as sequence of QUICK_INDEX_MERGE_SELECT::get_next
|
||||
calls):
|
||||
|
||||
fetch()
|
||||
{
|
||||
retrieve all rows from row pointers stored in Unique;
|
||||
free Unique;
|
||||
retrieve all rows for CPK scan;
|
||||
}
|
||||
*/
|
||||
|
||||
class QUICK_INDEX_MERGE_SELECT : public QUICK_SELECT_I
|
||||
|
@ -239,10 +245,10 @@ public:
|
|||
/* last element in quick_selects list */
|
||||
QUICK_RANGE_SELECT* last_quick_select;
|
||||
|
||||
/* quick select that uses Covering Clustered Primary Key (NULL if none) */
|
||||
/* quick select that uses clustered primary key (NULL if none) */
|
||||
QUICK_RANGE_SELECT* pk_quick_select;
|
||||
|
||||
/* true if this select is currently doing a CCPK scan */
|
||||
/* true if this select is currently doing a clustered PK scan */
|
||||
bool doing_pk_scan;
|
||||
|
||||
Unique *unique;
|
||||
|
|
|
@ -98,7 +98,6 @@ void init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
|
|||
}
|
||||
}
|
||||
else if (select && select->quick)
|
||||
//&& (select->quick->get_type() != QUICK_SELECT_I::QS_TYPE_INDEX_MERGE))
|
||||
{
|
||||
DBUG_PRINT("info",("using rr_quick"));
|
||||
info->read_record=rr_quick;
|
||||
|
|
|
@ -1233,7 +1233,8 @@ public:
|
|||
}
|
||||
|
||||
bool get(TABLE *table);
|
||||
|
||||
static double get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
|
||||
ulong max_in_memory_size);
|
||||
friend int unique_write_to_file(gptr key, element_count count, Unique *unique);
|
||||
friend int unique_write_to_ptrs(gptr key, element_count count, Unique *unique);
|
||||
};
|
||||
|
|
182
sql/uniques.cc
182
sql/uniques.cc
|
@ -63,12 +63,194 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
|
|||
comp_func_fixed_arg);
|
||||
/* If the following fail's the next add will also fail */
|
||||
my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16);
|
||||
/*
|
||||
If you change the following, change it in get_max_elements function, too.
|
||||
*/
|
||||
max_elements= max_in_memory_size / ALIGN_SIZE(sizeof(TREE_ELEMENT)+size);
|
||||
open_cached_file(&file, mysql_tmpdir,TEMP_PREFIX, DISK_BUFFER_SIZE,
|
||||
MYF(MY_WME));
|
||||
}
|
||||
|
||||
|
||||
#ifndef M_PI
|
||||
#define M_PI 3.14159265358979323846
|
||||
#endif
|
||||
|
||||
#define M_E (exp(1))
|
||||
|
||||
inline double log2_n_fact(double x)
|
||||
{
|
||||
return (2 * ( ((x)+1) * log(((x)+1)/M_E) + log(2*M_PI*((x)+1))/2 ) / log(2));
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate cost of merge_buffers call.
|
||||
|
||||
NOTE
|
||||
See comment near Unique::get_use_cost for cost formula derivation.
|
||||
*/
|
||||
static double get_merge_buffers_cost(uint* buff_sizes, uint elem_size,
|
||||
int last, int f,int t)
|
||||
{
|
||||
uint sum= 0;
|
||||
for (int i=f; i <= t; i++)
|
||||
sum+= buff_sizes[i];
|
||||
buff_sizes[last]= sum;
|
||||
|
||||
int n_buffers= t - f + 1;
|
||||
double buf_length= sum*elem_size;
|
||||
|
||||
return (((double)buf_length/(n_buffers+1)) / IO_SIZE) * 2 * n_buffers +
|
||||
buf_length * log(n_buffers) / (TIME_FOR_COMPARE_ROWID * log(2.0));
|
||||
}
|
||||
|
||||
/*
|
||||
Calculate cost of merging buffers into one in Unique::get, i.e. calculate
|
||||
how long (in terms of disk seeks) the two call
|
||||
merge_many_buffs(...);
|
||||
merge_buffers(...);
|
||||
will take.
|
||||
|
||||
SYNOPSIS
|
||||
get_merge_many_buffs_cost()
|
||||
alloc memory pool to use
|
||||
maxbuffer # of full buffers.
|
||||
max_n_elems # of elements in first maxbuffer buffers.
|
||||
last_n_elems # of elements in last buffer.
|
||||
elem_size size of buffer element.
|
||||
|
||||
NOTES
|
||||
It is assumed that maxbuffer+1 buffers are merged, first maxbuffer buffers
|
||||
contain max_n_elems each, last buffer contains last_n_elems elements.
|
||||
|
||||
The current implementation does a dumb simulation of merge_many_buffs
|
||||
actions.
|
||||
|
||||
RETURN
|
||||
>=0 Cost of merge in disk seeks.
|
||||
<0 Out of memory.
|
||||
*/
|
||||
static double get_merge_many_buffs_cost(MEM_ROOT *alloc,
|
||||
uint maxbuffer, uint max_n_elems,
|
||||
uint last_n_elems, int elem_size)
|
||||
{
|
||||
register int i;
|
||||
double total_cost= 0.0;
|
||||
int lastbuff;
|
||||
uint* buff_sizes;
|
||||
|
||||
if (!(buff_sizes= (uint*)alloc_root(alloc, sizeof(uint) * (maxbuffer + 1))))
|
||||
return -1.0;
|
||||
for(i = 0; i < (int)maxbuffer; i++)
|
||||
buff_sizes[i]= max_n_elems;
|
||||
|
||||
buff_sizes[maxbuffer]= last_n_elems;
|
||||
|
||||
if (maxbuffer >= MERGEBUFF2)
|
||||
{
|
||||
/* Simulate merge_many_buff */
|
||||
while (maxbuffer >= MERGEBUFF2)
|
||||
{
|
||||
lastbuff=0;
|
||||
for (i = 0; i <= (int) maxbuffer - MERGEBUFF*3/2; i += MERGEBUFF)
|
||||
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
|
||||
lastbuff++, i, i+MERGEBUFF-1);
|
||||
|
||||
total_cost += get_merge_buffers_cost(buff_sizes, elem_size,
|
||||
lastbuff++, i, maxbuffer);
|
||||
maxbuffer= (uint)lastbuff-1;
|
||||
}
|
||||
}
|
||||
|
||||
/* Simulate final merge_buff call. */
|
||||
total_cost += get_merge_buffers_cost(buff_sizes, elem_size, 0, 0,
|
||||
maxbuffer);
|
||||
return total_cost;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Calclulate cost of using Unique for processing nkeys elements of size
|
||||
key_size using max_in_memory_size memory.
|
||||
|
||||
RETURN
|
||||
Use cost as # of disk seeks.
|
||||
|
||||
NOTES
|
||||
cost(using_unqiue) =
|
||||
cost(create_trees) + (see #1)
|
||||
cost(merge) + (see #2)
|
||||
cost(read_result) (see #3)
|
||||
|
||||
1. Cost of trees creation
|
||||
For each Unique::put operation there will be 2*log2(n+1) elements
|
||||
comparisons, where n runs from 1 tree_size (we assume that all added
|
||||
elements are different). Together this gives:
|
||||
|
||||
n_compares = 2*(log2(2) + log2(3) + ... + log2(N+1)) = 2*log2((N+1)!) =
|
||||
|
||||
= 2*ln((N+1)!) / ln(2) = {using Stirling formula} =
|
||||
|
||||
= 2*( (N+1)*ln((N+1)/e) + (1/2)*ln(2*pi*(N+1)) / ln(2).
|
||||
|
||||
then cost(tree_creation) = n_compares*ROWID_COMPARE_COST;
|
||||
|
||||
Total cost of creating trees:
|
||||
(n_trees - 1)*max_size_tree_cost + non_max_size_tree_cost.
|
||||
|
||||
2. Cost of merging.
|
||||
If only one tree is created by Unique no merging will be necessary.
|
||||
Otherwise, we model execution of merge_many_buff function and count
|
||||
#of merges. (The reason behind this is that number of buffers is small,
|
||||
while size of buffers is big and we don't want to loose precision with
|
||||
O(x)-style formula)
|
||||
|
||||
3. If only one tree is created by Unique no disk io will happen.
|
||||
Otherwise, ceil(key_len*n_keys) disk seeks are necessary. We assume
|
||||
these will be random seeks.
|
||||
*/
|
||||
|
||||
double Unique::get_use_cost(MEM_ROOT *alloc, uint nkeys, uint key_size,
|
||||
ulong max_in_memory_size)
|
||||
{
|
||||
ulong max_elements_in_tree;
|
||||
ulong last_tree_elems;
|
||||
int n_full_trees; /* number of trees in unique - 1 */
|
||||
double result;
|
||||
|
||||
max_elements_in_tree= max_in_memory_size /
|
||||
ALIGN_SIZE(sizeof(TREE_ELEMENT)+key_size);
|
||||
n_full_trees= nkeys / max_elements_in_tree;
|
||||
last_tree_elems= nkeys % max_elements_in_tree;
|
||||
|
||||
/* Calculate cost of creating trees */
|
||||
result= log2_n_fact(last_tree_elems);
|
||||
if (n_full_trees)
|
||||
result+= n_full_trees * log2_n_fact(max_elements_in_tree);
|
||||
result /= TIME_FOR_COMPARE_ROWID;
|
||||
|
||||
/* Calculate cost of merging */
|
||||
if (!n_full_trees)
|
||||
return result;
|
||||
|
||||
/* There is more then one tree and merging is necessary. */
|
||||
/* Add cost of writing all trees to disk. */
|
||||
result += n_full_trees * ceil(key_size*max_elements_in_tree / IO_SIZE);
|
||||
result += ceil(key_size*last_tree_elems / IO_SIZE);
|
||||
|
||||
/* Cost of merge */
|
||||
result += get_merge_many_buffs_cost(alloc, n_full_trees,
|
||||
max_elements_in_tree,
|
||||
last_tree_elems, key_size);
|
||||
/*
|
||||
Add cost of reading the resulting sequence, assuming there were no
|
||||
duplicate elements.
|
||||
*/
|
||||
result += ceil((double)key_size*nkeys/IO_SIZE);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Unique::~Unique()
|
||||
{
|
||||
close_cached_file(&file);
|
||||
|
|
Loading…
Reference in a new issue