MWL#89: Cost-based choice between Materialization and IN->EXISTS transformation

1. Changed the lazy optimization for subqueries that can be
   materialized into bottom-up optimization during the optimization of
   the main query.

   The main change is implemented by the method
   Item_in_subselect::setup_engine.
  
   All other changes were required to correct problems resulting from
   changing the order of optimization. Most of these problems followed
   the same pattern - there are some shared structures between a
   subquery and its parent query. Depending on which one is optimized
   first (parent or child query), these shared strucutres may get
   different values, thus resulting in an inconsistent query plan.

2. Changed the code-generation for subquery materialization to be
   performed in runtime memory for each (re)execution, instead of in
   statement memory (once per prepared statement).
   - Item_in_subselect::setup_engine() no longer creates materialization
     related objects in statement memory.
   - Merged subselect_hash_sj_engine::init_permanent and
     subselect_hash_sj_engine::init_runtime into
     subselect_hash_sj_engine::init, which is called for each
     (re)execution.
   - Fixed deletion of the temp table accordingly.


mysql-test/r/subselect_mat.result:
  Adjusted changed EXPLAIN because of earlier optimization of subqueries.
This commit is contained in:
unknown 2010-07-16 13:52:02 +03:00
parent ceb5468fd8
commit 875bd20a73
6 changed files with 146 additions and 121 deletions

View file

@ -1139,7 +1139,7 @@ insert into t1 values (5);
explain select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2 group by b1);
min(a1)
set @@optimizer_switch='default,materialization=off';
@ -1153,7 +1153,7 @@ set @@optimizer_switch='default,semijoin=off';
explain select min(a1) from t1 where 7 in (select b1 from t2);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY NULL NULL NULL NULL NULL NULL NULL Select tables optimized away
2 SUBQUERY t2 system NULL NULL NULL NULL 0 const row not found
2 SUBQUERY NULL NULL NULL NULL NULL NULL NULL no matching row in const table
select min(a1) from t1 where 7 in (select b1 from t2);
min(a1)
set @@optimizer_switch='default,materialization=off';

View file

@ -166,6 +166,7 @@ void Item_in_subselect::cleanup()
Item_subselect::~Item_subselect()
{
delete engine;
engine= NULL;
}
Item_subselect::trans_res
@ -2220,73 +2221,73 @@ void Item_in_subselect::update_used_tables()
bool Item_in_subselect::setup_engine()
{
subselect_hash_sj_engine *new_engine= NULL;
bool res= FALSE;
subselect_hash_sj_engine *mat_engine= NULL;
subselect_single_select_engine *select_engine;
DBUG_ENTER("Item_in_subselect::setup_engine");
if (engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE)
{
/* Create/initialize objects in permanent memory. */
subselect_single_select_engine *old_engine;
Query_arena *arena= thd->stmt_arena, backup;
old_engine= (subselect_single_select_engine*) engine;
if (arena->is_conventional())
arena= 0;
else
thd->set_n_backup_active_arena(arena, &backup);
if (!(new_engine= new subselect_hash_sj_engine(thd, this,
old_engine)) ||
new_engine->init_permanent(unit->get_unit_column_types()))
{
Item_subselect::trans_res trans_res;
/*
If for some reason we cannot use materialization for this IN predicate,
delete all materialization-related objects, and apply the IN=>EXISTS
transformation.
*/
delete new_engine;
new_engine= NULL;
exec_method= NOT_TRANSFORMED;
if (left_expr->cols() == 1)
trans_res= single_value_in_to_exists_transformer(old_engine->join,
&eq_creator);
else
trans_res= row_value_in_to_exists_transformer(old_engine->join);
res= (trans_res != Item_subselect::RES_OK);
}
if (new_engine)
engine= new_engine;
if (arena)
thd->restore_active_arena(arena, &backup);
}
else
{
DBUG_ASSERT(engine->engine_type() == subselect_engine::HASH_SJ_ENGINE);
new_engine= (subselect_hash_sj_engine*) engine;
}
/* Initilizations done in runtime memory, repeated for each execution. */
if (new_engine)
SELECT_LEX *save_select= thd->lex->current_select;
thd->lex->current_select= get_select_lex();
int res= thd->lex->current_select->join->optimize();
thd->lex->current_select= save_select;
if (res)
DBUG_RETURN(TRUE);
/*
The select_engine (that executes transformed IN=>EXISTS subselects) is
pre-created at parse time, and is stored in statment memory (preserved
across PS executions).
*/
DBUG_ASSERT(engine->engine_type() == subselect_engine::SINGLE_SELECT_ENGINE);
select_engine= (subselect_single_select_engine*) engine;
/* Create/initialize execution objects. */
if (!(mat_engine= new subselect_hash_sj_engine(thd, this, select_engine)))
DBUG_RETURN(TRUE);
if (mat_engine->init(&select_engine->join->fields_list))
{
Item_subselect::trans_res trans_res;
/*
Reset the LIMIT 1 set in Item_exists_subselect::fix_length_and_dec.
TODO:
Currently we set the subquery LIMIT to infinity, and this is correct
because we forbid at parse time LIMIT inside IN subqueries (see
Item_in_subselect::test_limit). However, once we allow this, here
we should set the correct limit if given in the query.
If for some reason we cannot use materialization for this IN predicate,
delete all materialization-related objects, and apply the IN=>EXISTS
transformation.
*/
unit->global_parameters->select_limit= NULL;
if ((res= new_engine->init_runtime()))
DBUG_RETURN(res);
delete mat_engine;
mat_engine= NULL;
exec_method= NOT_TRANSFORMED;
if (left_expr->cols() == 1)
trans_res= single_value_in_to_exists_transformer(select_engine->join,
&eq_creator);
else
trans_res= row_value_in_to_exists_transformer(select_engine->join);
/*
The IN=>EXISTS transformation above injects new predicates into the
WHERE and HAVING clauses. Since the subquery was already optimized,
below we force its reoptimization with the new injected conditions
by the first call to subselect_single_select_engine::exec().
This is the only case of lazy subquery optimization in the server.
*/
DBUG_ASSERT(select_engine->join->optimized);
select_engine->join->optimized= false;
DBUG_RETURN(trans_res != Item_subselect::RES_OK);
}
DBUG_RETURN(res);
/*
Reset the "LIMIT 1" set in Item_exists_subselect::fix_length_and_dec.
TODO:
Currently we set the subquery LIMIT to infinity, and this is correct
because we forbid at parse time LIMIT inside IN subqueries (see
Item_in_subselect::test_limit). However, once we allow this, here
we should set the correct limit if given in the query.
*/
unit->global_parameters->select_limit= NULL;
engine= mat_engine;
DBUG_RETURN(FALSE);
}
@ -3787,13 +3788,14 @@ bitmap_init_memroot(MY_BITMAP *map, uint n_bits, MEM_ROOT *mem_root)
@retval FALSE otherwise
*/
bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
bool subselect_hash_sj_engine::init(List<Item> *tmp_columns)
{
select_union *result_sink;
/* Options to create_tmp_table. */
ulonglong tmp_create_options= thd->options | TMP_TABLE_ALL_COLUMNS;
/* | TMP_TABLE_FORCE_MYISAM; TIMOUR: force MYISAM */
DBUG_ENTER("subselect_hash_sj_engine::init_permanent");
DBUG_ENTER("subselect_hash_sj_engine::init");
if (bitmap_init_memroot(&non_null_key_parts, tmp_columns->elements,
thd->mem_root) ||
@ -3822,15 +3824,16 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
DBUG_RETURN(TRUE);
}
*/
if (!(result= new select_materialize_with_stats))
if (!(result_sink= new select_materialize_with_stats))
DBUG_RETURN(TRUE);
result_sink->get_tmp_table_param()->materialized_subquery= true;
if (result_sink->create_result_table(thd, tmp_columns, TRUE,
tmp_create_options,
"materialized subselect", TRUE))
DBUG_RETURN(TRUE);
if (((select_union*) result)->create_result_table(
thd, tmp_columns, TRUE, tmp_create_options,
"materialized subselect", TRUE))
DBUG_RETURN(TRUE);
tmp_table= ((select_union*) result)->table;
tmp_table= result_sink->table;
result= result_sink;
/*
If the subquery has blobs, or the total key lenght is bigger than
@ -3867,6 +3870,17 @@ bool subselect_hash_sj_engine::init_permanent(List<Item> *tmp_columns)
!(lookup_engine= make_unique_engine()))
DBUG_RETURN(TRUE);
/*
Repeat name resolution for 'cond' since cond is not part of any
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
if (semi_join_conds && !semi_join_conds->fixed &&
semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
DBUG_RETURN(TRUE);
/* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join;
materialize_join->change_result(result);
DBUG_RETURN(FALSE);
}
@ -3957,8 +3971,6 @@ subselect_hash_sj_engine::make_unique_engine()
Item_iterator_row it(item_in->left_expr);
/* The only index on the temporary table. */
KEY *tmp_key= tmp_table->key_info;
/* Number of keyparts in tmp_key. */
uint tmp_key_parts= tmp_key->key_parts;
JOIN_TAB *tab;
DBUG_ENTER("subselect_hash_sj_engine::make_unique_engine");
@ -3981,35 +3993,6 @@ subselect_hash_sj_engine::make_unique_engine()
}
/**
Initialize members of the engine that need to be re-initilized at each
execution.
@retval TRUE if a memory allocation error occurred
@retval FALSE if success
*/
bool subselect_hash_sj_engine::init_runtime()
{
/*
Create and optimize the JOIN that will be used to materialize
the subquery if not yet created.
*/
materialize_engine->prepare();
/*
Repeat name resolution for 'cond' since cond is not part of any
clause of the query, and it is not 'fixed' during JOIN::prepare.
*/
if (semi_join_conds && !semi_join_conds->fixed &&
semi_join_conds->fix_fields(thd, (Item**)&semi_join_conds))
return TRUE;
/* Let our engine reuse this query plan for materialization. */
materialize_join= materialize_engine->join;
materialize_join->change_result(result);
return FALSE;
}
subselect_hash_sj_engine::~subselect_hash_sj_engine()
{
delete lookup_engine;
@ -4019,6 +4002,16 @@ subselect_hash_sj_engine::~subselect_hash_sj_engine()
}
int subselect_hash_sj_engine::prepare()
{
/*
Create and optimize the JOIN that will be used to materialize
the subquery if not yet created.
*/
return materialize_engine->prepare();
}
/**
Cleanup performed after each PS execution.
@ -4036,6 +4029,12 @@ void subselect_hash_sj_engine::cleanup()
count_null_only_columns= 0;
strategy= UNDEFINED;
materialize_engine->cleanup();
/*
Restore the original Item_in_subselect engine. This engine is created once
at parse time and stored across executions, while all other materialization
related engines are created and chosen for each execution.
*/
((Item_in_subselect *) item)->engine= materialize_engine;
if (lookup_engine_type == TABLE_SCAN_ENGINE ||
lookup_engine_type == ROWID_MERGE_ENGINE)
{
@ -4052,6 +4051,9 @@ void subselect_hash_sj_engine::cleanup()
DBUG_ASSERT(lookup_engine->engine_type() == UNIQUESUBQUERY_ENGINE);
lookup_engine->cleanup();
result->cleanup(); /* Resets the temp table as well. */
DBUG_ASSERT(tmp_table);
free_tmp_table(thd, tmp_table);
tmp_table= NULL;
}
@ -4080,9 +4082,8 @@ int subselect_hash_sj_engine::exec()
the subquery predicate.
*/
thd->lex->current_select= materialize_engine->select_lex;
if ((res= materialize_join->optimize()))
goto err; /* purecov: inspected */
DBUG_ASSERT(!is_materialized); /* We should materialize only once. */
/* The subquery should be optimized, and materialized only once. */
DBUG_ASSERT(materialize_join->optimized && !is_materialized);
materialize_join->exec();
if ((res= test(materialize_join->error || thd->is_fatal_error)))
goto err;

View file

@ -817,10 +817,9 @@ public:
}
~subselect_hash_sj_engine();
bool init_permanent(List<Item> *tmp_columns);
bool init_runtime();
bool init(List<Item> *tmp_columns);
void cleanup();
int prepare() { return 0; } /* Override virtual function in base class. */
int prepare();
int exec();
virtual void print(String *str, enum_query_type query_type);
uint cols()

View file

@ -3052,6 +3052,7 @@ void TMP_TABLE_PARAM::init()
table_charset= 0;
precomputed_group_by= 0;
bit_fields_as_long= 0;
materialized_subquery= 0;
skip_create_table= 0;
DBUG_VOID_RETURN;
}

View file

@ -2852,6 +2852,8 @@ public:
uint convert_blob_length;
CHARSET_INFO *table_charset;
bool schema_table;
/* TRUE if the temp table is created for subquery materialization. */
bool materialized_subquery;
/*
True if GROUP BY and its aggregate functions are already computed
by a table access method (e.g. by loose index scan). In this case
@ -2875,8 +2877,8 @@ public:
TMP_TABLE_PARAM()
:copy_field(0), group_parts(0),
group_length(0), group_null_parts(0), convert_blob_length(0),
schema_table(0), precomputed_group_by(0), force_copy_fields(0),
bit_fields_as_long(0), skip_create_table(0)
schema_table(0), materialized_subquery(0), precomputed_group_by(0),
force_copy_fields(0), bit_fields_as_long(0), skip_create_table(0)
{}
~TMP_TABLE_PARAM()
{
@ -2905,6 +2907,7 @@ public:
bool send_data(List<Item> &items);
bool send_eof();
bool flush();
TMP_TABLE_PARAM *get_tmp_table_param() { return &tmp_table_param; }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
@ -2969,7 +2972,7 @@ protected:
ha_rows count_rows;
public:
select_materialize_with_stats() {}
select_materialize_with_stats() { tmp_table_param.init(); }
virtual bool create_result_table(THD *thd, List<Item> *column_types,
bool is_distinct, ulonglong options,
const char *alias, bool bit_fields_as_long);

View file

@ -2586,14 +2586,13 @@ err:
Setup for execution all subqueries of a query, for which the optimizer
chose hash semi-join.
@details Iterate over all subqueries of the query, and if they are under an
IN predicate, and the optimizer chose to compute it via hash semi-join:
- try to initialize all data structures needed for the materialized execution
of the IN predicate,
- if this fails, then perform the IN=>EXISTS transformation which was
previously blocked during JOIN::prepare.
This method is part of the "code generation" query processing phase.
@details Iterate over all immediate child subqueries of the query, and if
they are under an IN predicate, and the optimizer chose to compute it via
materialization:
- optimize each subquery,
- choose an optimial execution strategy for the IN predicate - either
materialization, or an IN=>EXISTS transformation with an approriate
engine.
This phase must be called after substitute_for_best_equal_field() because
that function may replace items with other items from a multiple equality,
@ -7925,7 +7924,7 @@ bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
use that information instead.
*/
cur_ref_buff + null_count,
null_count ? key_buff : 0,
null_count ? cur_ref_buff : 0,
cur_key_part->length, items[i], value);
cur_ref_buff+= cur_key_part->store_length;
}
@ -11408,10 +11407,30 @@ create_tmp_table(THD *thd,TMP_TABLE_PARAM *param,List<Item> &fields,
{
if (thd->is_fatal_error)
goto err; // Got OOM
continue; // Some kindf of const item
continue; // Some kind of const item
}
if (type == Item::SUM_FUNC_ITEM)
((Item_sum *) item)->result_field= new_field;
{
Item_sum *agg_item= (Item_sum *) item;
/*
Update the result field only if it has never been set, or if the
created temporary table is not to be used for subquery
materialization.
The reason is that for subqueries that require materialization as part
of their plan, we create the 'external' temporary table needed for IN
execution, after the 'internal' temporary table needed for grouping.
Since both the external and the internal temporary tables are created
for the same list of SELECT fields of the subquery, setting
'result_field' for each invocation of create_tmp_table overrides the
previous value of 'result_field'.
The condition below prevents the creation of the external temp table
to override the 'result_field' that was set for the internal temp table.
*/
if (!agg_item->result_field || !param->materialized_subquery)
agg_item->result_field= new_field;
}
tmp_from_field++;
reclength+=new_field->pack_length();
if (!(new_field->flags & NOT_NULL_FLAG))
@ -19240,6 +19259,8 @@ bool JOIN::change_result(select_result *res)
{
DBUG_ENTER("JOIN::change_result");
result= res;
if (tmp_join)
tmp_join->result= res;
if (!procedure && (result->prepare(fields_list, select_lex->master_unit()) ||
result->prepare2()))
{