mirror of
https://github.com/MariaDB/server.git
synced 2026-05-16 20:07:13 +02:00
MWL#90: Non-merged semi-joins
- Take into account that grouping or aggregates decrease join output cardinality. (First code, can't make use of index statistics yet)
This commit is contained in:
parent
ea43df7685
commit
524e6aad73
5 changed files with 220 additions and 20 deletions
|
|
@ -475,10 +475,10 @@ bool Item_subselect::exec()
|
|||
return (res);
|
||||
}
|
||||
|
||||
int Item_subselect::optimize()
|
||||
int Item_subselect::optimize(double *out_rows, double *cost)
|
||||
{
|
||||
int res;
|
||||
res= engine->optimize();
|
||||
res= engine->optimize(out_rows, cost);
|
||||
return res;
|
||||
}
|
||||
|
||||
|
|
@ -4085,16 +4085,218 @@ void subselect_hash_sj_engine::cleanup()
|
|||
result->cleanup(); /* Resets the temp table as well. */
|
||||
}
|
||||
|
||||
JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const);
|
||||
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab);
|
||||
|
||||
int subselect_hash_sj_engine::optimize()
|
||||
/*
|
||||
Get fanout produced by tables specified in the table_map
|
||||
*/
|
||||
|
||||
double get_fanout_with_deps(JOIN *join, table_map tset)
|
||||
{
|
||||
/* First, recursively get all tables we depend on */
|
||||
table_map deps_to_check= tset;
|
||||
table_map checked_deps= 0;
|
||||
table_map further_deps;
|
||||
do
|
||||
{
|
||||
further_deps= 0;
|
||||
Table_map_iterator tm_it(deps_to_check);
|
||||
int tableno;
|
||||
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
|
||||
{
|
||||
/* get tableno's dependency tables that are not in needed_set */
|
||||
further_deps |= join->map2table[tableno]->ref.depend_map & ~checked_deps;
|
||||
}
|
||||
|
||||
checked_deps |= deps_to_check;
|
||||
deps_to_check= further_deps;
|
||||
} while (further_deps != 0);
|
||||
|
||||
|
||||
/* Now, walk the join order and calculate the fanout */
|
||||
double fanout= 1;
|
||||
for (JOIN_TAB *tab= first_top_level_tab(join, WITHOUT_CONST_TABLES); tab;
|
||||
tab= next_top_level_tab(join, tab))
|
||||
{
|
||||
fanout *= (tab->records_read && !tab->emb_sj_nest) ?
|
||||
rows2double(tab->records_read) : 1;
|
||||
}
|
||||
return fanout;
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
void check_out_index_stats(JOIN *join)
|
||||
{
|
||||
ORDER *order;
|
||||
uint n_order_items;
|
||||
|
||||
/*
|
||||
First, collect the keys that we can use in each table.
|
||||
We can use a key if
|
||||
- all tables refer to it.
|
||||
*/
|
||||
key_map key_start_use[MAX_TABLES];
|
||||
key_map key_infix_use[MAX_TABLES];
|
||||
table_map key_used=0;
|
||||
table_map non_key_used= 0;
|
||||
|
||||
bzero(&key_start_use, sizeof(key_start_use)); //psergey-todo: safe initialization!
|
||||
bzero(&key_infix_use, sizeof(key_infix_use));
|
||||
|
||||
for (order= join->group_list; order; order= order->next)
|
||||
{
|
||||
Item *item= order->item[0];
|
||||
|
||||
if (item->real_type() == Item::FIELD_ITEM)
|
||||
{
|
||||
if (item->used_tables() & OUTER_REF_TABLE_BIT)
|
||||
continue; /* outside references are like constants for us */
|
||||
|
||||
Field *field= ((Item_field*)item->real_item())->field;
|
||||
uint table_no= field->table->tablenr;
|
||||
if (!(non_key_used && table_map(1) << table_no) &&
|
||||
!field->part_of_key.is_clear_all())
|
||||
{
|
||||
key_map infix_map= field->part_of_key;
|
||||
infix_map.subtract(field->key_start);
|
||||
key_start_use[table_no].merge(field->key_start);
|
||||
key_infix_use[table_no].merge(infix_map);
|
||||
key_used |= table_no;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
Note: the below will cause clauses like GROUP BY YEAR(date) not to be
|
||||
handled.
|
||||
*/
|
||||
non_key_used |= item->used_tables();
|
||||
}
|
||||
|
||||
Table_map_iterator tm_it(key_used & ~non_key_used);
|
||||
int tableno;
|
||||
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
|
||||
{
|
||||
key_map::iterator key_it(key_start_use);
|
||||
int keyno;
|
||||
while ((keyno = tm_it.next_bit()) != key_map::iterator::BITMAP_END)
|
||||
{
|
||||
for (order= join->group_list; order; order= order->next)
|
||||
{
|
||||
Item *item= order->item[0];
|
||||
if (item->used_tables() & (table_map(1) << tableno))
|
||||
{
|
||||
DBUG_ASSERT(item->real_type() == Item::FIELD_ITEM);
|
||||
}
|
||||
}
|
||||
/*
|
||||
if (continuation)
|
||||
{
|
||||
walk through list and find which key parts are occupied;
|
||||
// note that the above can't be made any faster.
|
||||
}
|
||||
else
|
||||
use rec_per_key[0];
|
||||
|
||||
find out the cardinality.
|
||||
check if cardinality decreases if we use it;
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
double get_post_group_estimate(JOIN* join)
|
||||
{
|
||||
table_map tables_in_group_list= table_map(0);
|
||||
|
||||
/* Find out which tables are used in GROUP BY list */
|
||||
for (ORDER *order= join->group_list; order; order= order->next)
|
||||
{
|
||||
Item *item= order->item[0];
|
||||
if (item->used_tables() & RAND_TABLE_BIT)
|
||||
return HA_POS_ERROR; // TODO: change to join-output-estimate
|
||||
|
||||
tables_in_group_list|= item->used_tables();
|
||||
}
|
||||
tables_in_group_list &= ~PSEUDO_TABLE_BITS;
|
||||
|
||||
/*
|
||||
Use join fanouts to calculate the max. number of records in the group-list
|
||||
*/
|
||||
double fanout_rows[MAX_KEY];
|
||||
bzero(&fanout_rows, sizeof(fanout_rows));
|
||||
double out_rows;
|
||||
|
||||
out_rows= get_fanout_with_deps(join, tables_in_group_list);
|
||||
|
||||
/*
|
||||
Also generate max. number of records for each of the tables mentioned
|
||||
in the group-list. We'll use that a baseline number that we'll try to
|
||||
reduce by using
|
||||
- #table-records
|
||||
- index statistics.
|
||||
*/
|
||||
Table_map_iterator tm_it(tables_in_group_list);
|
||||
int tableno;
|
||||
while ((tableno = tm_it.next_bit()) != Table_map_iterator::BITMAP_END)
|
||||
{
|
||||
fanout_rows[tableno]= get_fanout_with_deps(join, table_map(1) << tableno);
|
||||
}
|
||||
|
||||
/*
|
||||
Try to bring down estimates using index statistics.
|
||||
*/
|
||||
//check_out_index_stats(join);
|
||||
return out_rows;
|
||||
}
|
||||
|
||||
|
||||
int subselect_hash_sj_engine::optimize(double *out_rows, double *cost)
|
||||
{
|
||||
int res;
|
||||
DBUG_ENTER("subselect_hash_sj_engine::optimize");
|
||||
SELECT_LEX *save_select= thd->lex->current_select;
|
||||
thd->lex->current_select= materialize_join->select_lex;
|
||||
res= materialize_join->optimize();
|
||||
thd->lex->current_select= save_select;
|
||||
JOIN *join= materialize_join;
|
||||
|
||||
return res;
|
||||
thd->lex->current_select= join->select_lex;
|
||||
res= join->optimize();
|
||||
|
||||
/* Calculate #rows and cost of join execution */
|
||||
get_partial_join_cost(join, join->table_count - join->const_tables,
|
||||
cost, out_rows);
|
||||
|
||||
/*
|
||||
Adjust join output cardinality. There can be these cases:
|
||||
- Have no GROUP BY and no aggregate funcs: we won't get into this
|
||||
function because such join will be processed as a merged semi-join
|
||||
(TODO: does it really mean we don't need to handle such cases here at
|
||||
all? put ASSERT)
|
||||
- Have no GROUP BY but have aggregate funcs: output is 1 record.
|
||||
- Have GROUP BY and have (or not) aggregate funcs: need to adjust output
|
||||
cardinality.
|
||||
*/
|
||||
thd->lex->current_select= save_select;
|
||||
if (!join->group_list && !join->group_optimized_away &&
|
||||
join->tmp_table_param.sum_func_count)
|
||||
{
|
||||
DBUG_PRINT("info",("Materialized join will have only 1 row (has "
|
||||
"aggregates but not GROUP BY"));
|
||||
*out_rows= 1;
|
||||
}
|
||||
|
||||
/* Now with grouping */
|
||||
if (join->group_list)
|
||||
{
|
||||
DBUG_PRINT("info",("Materialized join has grouping, trying to estimate"));
|
||||
double output_rows= get_post_group_estimate(materialize_join);
|
||||
DBUG_PRINT("info",("Got value of %g", output_rows));
|
||||
*out_rows= output_rows;
|
||||
}
|
||||
|
||||
DBUG_RETURN(res);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -147,7 +147,7 @@ public:
|
|||
bool mark_as_dependent(THD *thd, st_select_lex *select, Item *item);
|
||||
void fix_after_pullout(st_select_lex *new_parent, Item **ref);
|
||||
void recalc_used_tables(st_select_lex *new_parent, bool after_pullout);
|
||||
virtual int optimize();
|
||||
virtual int optimize(double *out_rows, double *cost);
|
||||
virtual bool exec();
|
||||
virtual void fix_length_and_dec();
|
||||
table_map used_tables() const;
|
||||
|
|
@ -534,7 +534,7 @@ public:
|
|||
THD * get_thd() { return thd; }
|
||||
virtual int prepare()= 0;
|
||||
virtual void fix_length_and_dec(Item_cache** row)= 0;
|
||||
virtual int optimize() { DBUG_ASSERT(0); return 0; }
|
||||
virtual int optimize(double *out_rows, double *cost) { DBUG_ASSERT(0); return 0; }
|
||||
/*
|
||||
Execute the engine
|
||||
|
||||
|
|
@ -804,7 +804,7 @@ public:
|
|||
bool init_runtime();
|
||||
void cleanup();
|
||||
int prepare() { return 0; } /* Override virtual function in base class. */
|
||||
int optimize();
|
||||
int optimize(double *out_rows, double *cost);
|
||||
int exec();
|
||||
virtual void print(String *str, enum_query_type query_type);
|
||||
uint cols()
|
||||
|
|
|
|||
|
|
@ -825,21 +825,16 @@ void get_delayed_table_estimates(TABLE *table,
|
|||
double *startup_cost)
|
||||
{
|
||||
Item_in_subselect *item= table->pos_in_table_list->jtbm_subselect;
|
||||
item->optimize();
|
||||
double rows;
|
||||
double read_time;
|
||||
|
||||
item->optimize(&rows, &read_time);
|
||||
|
||||
DBUG_ASSERT(item->engine->engine_type() ==
|
||||
subselect_engine::HASH_SJ_ENGINE);
|
||||
|
||||
subselect_hash_sj_engine *hash_sj_engine=
|
||||
((subselect_hash_sj_engine*)item->engine);
|
||||
JOIN *join= hash_sj_engine->materialize_join;
|
||||
|
||||
double rows;
|
||||
double read_time;
|
||||
|
||||
/* Calculate #rows and cost of join execution */
|
||||
get_partial_join_cost(join, join->table_count - join->const_tables,
|
||||
&read_time, &rows);
|
||||
|
||||
*out_rows= (ha_rows)rows;
|
||||
*startup_cost= read_time;
|
||||
|
|
|
|||
|
|
@ -6489,7 +6489,7 @@ JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables with_const
|
|||
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
|
||||
{
|
||||
tab= next_breadth_first_tab(join, tab);
|
||||
if (tab->bush_root_tab)
|
||||
if (tab && tab->bush_root_tab)
|
||||
tab= NULL;
|
||||
return tab;
|
||||
}
|
||||
|
|
@ -9262,6 +9262,8 @@ void JOIN::cleanup(bool full)
|
|||
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
|
||||
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
|
||||
@endcode
|
||||
|
||||
TODO: this function checks ORDER::used, which can only have a value of 0.
|
||||
*/
|
||||
|
||||
static bool
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ typedef struct st_order {
|
|||
bool counter_used; /* parameter was counter of columns */
|
||||
Field *field; /* If tmp-table group */
|
||||
char *buff; /* If tmp-table group */
|
||||
table_map used, depend_map;
|
||||
table_map used; /* NOTE: the below is only set to 0 but is still used by eq_ref_table */
|
||||
table_map depend_map;
|
||||
} ORDER;
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue