Semi-join optimizations code cleanup:

- Break down POSITION/advance_sj_state() into four classes 
  representing potential semi-join strategies.

- Treat all strategies uniformly (before, DuplicateWeedout 
  was special as it was the catch-all strategy. Now, we're 
  still relying on it to be the catch-all, but are able to 
  function,e.g. with firstmatch=on,duplicate_weedout=off.

- Update test results (checked)
This commit is contained in:
Sergey Petrunya 2011-11-23 04:25:52 +04:00
parent 7f746fbe74
commit 694ce95557
6 changed files with 780 additions and 489 deletions

View file

@ -323,8 +323,8 @@ WHERE Language='English' AND Percentage > 10 AND
t2.Population > 100000);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where
set optimizer_switch=@bug35674_save_optimizer_switch;
DROP TABLE t1,t2,t3;
CREATE TABLE t1 (

View file

@ -332,8 +332,8 @@ WHERE Language='English' AND Percentage > 10 AND
t2.Population > 100000);
id select_type table type possible_keys key key_len ref rows Extra
1 PRIMARY t1 range Population,Country Population 4 NULL 1 Using index condition; Rowid-ordered scan; Start temporary
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; End temporary; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t2 eq_ref PRIMARY,Population PRIMARY 3 test.t1.Country 1 Using where; End temporary; Using join buffer (flat, BKA join); Key-ordered Rowid-ordered scan
1 PRIMARY t3 eq_ref PRIMARY,Percentage PRIMARY 33 test.t1.Country,const 1 Using index condition; Using where; Using join buffer (incremental, BKA join); Key-ordered Rowid-ordered scan
set optimizer_switch=@bug35674_save_optimizer_switch;
DROP TABLE t1,t2,t3;
CREATE TABLE t1 (

File diff suppressed because it is too large Load diff

View file

@ -263,8 +263,8 @@ public:
{
pos->records_read= best_loose_scan_records;
pos->key= best_loose_scan_start_key;
pos->loosescan_key= best_loose_scan_key;
pos->loosescan_parts= best_max_loose_keypart + 1;
pos->loosescan_picker.loosescan_key= best_loose_scan_key;
pos->loosescan_picker.loosescan_parts= best_max_loose_keypart + 1;
pos->use_join_buffer= FALSE;
pos->table= tab;
// todo need ref_depend_map ?
@ -277,8 +277,7 @@ public:
};
void advance_sj_state(JOIN *join, const table_map remaining_tables,
const JOIN_TAB *new_join_tab, uint idx,
void advance_sj_state(JOIN *join, const table_map remaining_tables, uint idx,
double *current_record_count, double *current_read_time,
POSITION *loose_scan_pos);
void restore_prev_sj_state(const table_map remaining_tables,

View file

@ -85,7 +85,7 @@ static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const
static bool find_best(JOIN *join,table_map rest_tables,uint index,
double record_count,double read_time);
static uint cache_record_length(JOIN *join,uint index);
static bool get_best_combination(JOIN *join);
bool get_best_combination(JOIN *join);
static store_key *get_store_key(THD *thd,
KEYUSE *keyuse, table_map used_tables,
KEY_PART_INFO *key_part, uchar *key_buff,
@ -4883,7 +4883,7 @@ void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
join->positions[idx].records_read=1.0; /* This is a const table */
join->positions[idx].ref_depend_map= 0;
join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
join->positions[idx].sj_strategy= SJ_OPT_NONE;
join->positions[idx].use_join_buffer= FALSE;
@ -5533,7 +5533,7 @@ best_access_path(JOIN *join,
pos->key= best_key;
pos->table= s;
pos->ref_depend_map= best_ref_depends_map;
pos->loosescan_key= MAX_KEY;
pos->loosescan_picker.loosescan_key= MAX_KEY;
pos->use_join_buffer= best_uses_jbuf;
loose_scan_opt.save_to_position(s, loose_scan_pos);
@ -5840,7 +5840,7 @@ optimize_straight_join(JOIN *join, table_map join_tables)
/* compute the cost of the new plan extended with 's' */
record_count*= join->positions[idx].records_read;
read_time+= join->positions[idx].read_time;
advance_sj_state(join, join_tables, s, idx, &record_count, &read_time,
advance_sj_state(join, join_tables, idx, &record_count, &read_time,
&loose_scan_pos);
join_tables&= ~(s->table->map);
@ -6356,7 +6356,7 @@ best_extension_by_limited_search(JOIN *join,
current_record_count= record_count * position->records_read;
current_read_time= read_time + position->read_time;
advance_sj_state(join, remaining_tables, s, idx, &current_record_count,
advance_sj_state(join, remaining_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
/* Expand only partial plans with lower cost than the best QEP so far */
@ -6513,7 +6513,7 @@ find_best(JOIN *join,table_map rest_tables,uint idx,double record_count,
*/
double current_record_count=record_count*records;
double current_read_time=read_time+best;
advance_sj_state(join, rest_tables, s, idx, &current_record_count,
advance_sj_state(join, rest_tables, idx, &current_record_count,
&current_read_time, &loose_scan_pos);
if (best_record_count > current_record_count ||
@ -7013,7 +7013,7 @@ static Item * const null_ptr= NULL;
TRUE Out of memory
*/
static bool
bool
get_best_combination(JOIN *join)
{
uint tablenr;
@ -7091,13 +7091,6 @@ get_best_combination(JOIN *join)
*j= *join->best_positions[tablenr].table;
#if 0
/* SJ-Materialization is represented with join tab ranges */
if (j->sj_strategy == SJ_OPT_MATERIALIZE ||
j->sj_strategy == SJ_OPT_MATERIALIZE)
j->sj_strategy= SJ_OPT_NONE;
#endif
j->bush_root_tab= sjm_nest_root;
form=join->table[tablenr]=j->table;
@ -7120,7 +7113,7 @@ get_best_combination(JOIN *join)
(join->best_positions[tablenr].sj_strategy == SJ_OPT_LOOSE_SCAN))
{
j->type=JT_ALL;
j->index= join->best_positions[tablenr].loosescan_key;
j->index= join->best_positions[tablenr].loosescan_picker.loosescan_key;
if (tablenr != join->const_tables)
join->full_join=1;
}

View file

@ -158,6 +158,17 @@ enum enum_nested_loop_state
};
/* Possible sj_strategy values */
enum sj_strategy_enum
{
SJ_OPT_NONE=0,
SJ_OPT_DUPS_WEEDOUT=1,
SJ_OPT_LOOSE_SCAN =2,
SJ_OPT_FIRST_MATCH =3,
SJ_OPT_MATERIALIZE =4,
SJ_OPT_MATERIALIZE_SCAN=5
};
/* Values for JOIN_TAB::packed_info */
#define TAB_INFO_HAVE_VALUE 1
#define TAB_INFO_USING_INDEX 2
@ -374,7 +385,7 @@ typedef struct st_join_table {
POSITION::sj_strategy field. This field is set up by the
fix_semijoin_strategies_for_picked_join_order.
*/
uint sj_strategy;
enum sj_strategy_enum sj_strategy;
uint n_sj_tables;
@ -496,66 +507,126 @@ enum_nested_loop_state
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
bool end_of_records);
/* psergey */
/**
Information about a position of table within a join order. Used in join
optimization.
*/
typedef struct st_position
struct st_position;
class Semi_join_strategy_picker
{
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
*/
double records_read;
public:
/* Called when starting to build a new join prefix */
virtual void set_empty() = 0;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
Update internal state after another table has been added to the join
prefix
*/
double read_time;
JOIN_TAB *table;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
bool use_join_buffer;
virtual void set_from_prev(struct st_position *prev) = 0;
virtual bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos) = 0;
virtual void mark_used() = 0;
virtual ~Semi_join_strategy_picker() {}
};
/*
Duplicate Weedout strategy optimization state
*/
class Duplicate_weedout_picker : public Semi_join_strategy_picker
{
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
/* These form a stack of partial join order costs and output sizes */
COST_VECT prefix_cost;
double prefix_record_count;
bool is_used;
public:
void set_empty()
{
dupsweedout_tables= 0;
first_dupsweedout_table= MAX_TABLES;
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *stratey,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Firstmatch_picker : public Semi_join_strategy_picker
{
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
Index of the first inner table that we intend to handle with this
strategy
*/
uint sj_strategy;
uint first_firstmatch_table;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
uint n_sj_tables;
table_map first_firstmatch_rtbl;
/*
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
/* LooseScan strategy members */
bool is_used;
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
public:
void set_empty()
{
invalidate_firstmatch_prefix();
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class LooseScan_picker : public Semi_join_strategy_picker
{
/* The first (i.e. driving) table we're doing loose scan for */
uint first_loosescan_table;
/*
@ -573,36 +644,46 @@ typedef struct st_position
uint loosescan_key; // final (one for strategy instance )
uint loosescan_parts; /* Number of keyparts to be kept distinct */
/* FirstMatch strategy */
/*
Index of the first inner table that we intend to handle with this
strategy
*/
uint first_firstmatch_table;
/*
Tables that were not in the join prefix when we've started considering
FirstMatch strategy.
*/
table_map first_firstmatch_rtbl;
/*
Tables that need to be in the prefix before we can calculate the cost
of using FirstMatch strategy.
*/
table_map firstmatch_need_tables;
bool is_used;
public:
void set_empty()
{
first_loosescan_table= MAX_TABLES;
is_used= FALSE;
}
bool in_firstmatch_prefix() { return (first_firstmatch_table != MAX_TABLES); }
void invalidate_firstmatch_prefix() { first_firstmatch_table= MAX_TABLES; }
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
/* Duplicate Weedout strategy */
/* The first table that the strategy will need to handle */
uint first_dupsweedout_table;
/*
Tables that we will need to have in the prefix to do the weedout step
(all inner and all outer that the involved semi-joins are correlated with)
*/
table_map dupsweedout_tables;
friend class Loose_scan_opt;
friend void best_access_path(JOIN *join,
JOIN_TAB *s,
table_map remaining_tables,
uint idx,
bool disable_jbuf,
double record_count,
struct st_position *pos,
struct st_position *loose_scan_pos);
friend bool get_best_combination(JOIN *join);
friend int setup_semijoin_dups_elimination(JOIN *join, ulonglong options,
uint no_jbuf_after);
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
class Sj_materialization_picker : public Semi_join_strategy_picker
{
bool is_used;
/* SJ-Materialization-Scan strategy */
/* The last inner table (valid once we're after it) */
uint sjm_scan_last_inner;
/*
@ -612,9 +693,101 @@ typedef struct st_position
*/
table_map sjm_scan_need_tables;
table_map prefix_dups_producing_tables;
} POSITION;
public:
void set_empty()
{
sjm_scan_need_tables= 0;
LINT_INIT(sjm_scan_last_inner);
is_used= FALSE;
}
void set_from_prev(struct st_position *prev);
bool check_qep(JOIN *join,
uint idx,
table_map remaining_tables,
const JOIN_TAB *new_join_tab,
double *record_count,
double *read_time,
table_map *handled_fanout,
sj_strategy_enum *strategy,
struct st_position *loose_scan_pos);
void mark_used() { is_used= TRUE; }
friend void fix_semijoin_strategies_for_picked_join_order(JOIN *join);
};
/**
Information about a position of table within a join order. Used in join
optimization.
*/
typedef struct st_position
{
/* The table that's put into join order */
JOIN_TAB *table;
/*
The "fanout": number of output rows that will be produced (after
pushed down selection condition is applied) per each row combination of
previous tables.
*/
double records_read;
/*
Cost accessing the table in course of the entire complete join execution,
i.e. cost of one access method use (e.g. 'range' or 'ref' scan ) times
number the access method will be invoked.
*/
double read_time;
/* Cumulative cost and record count for the join prefix */
COST_VECT prefix_cost;
double prefix_record_count;
/*
NULL - 'index' or 'range' or 'index_merge' or 'ALL' access is used.
Other - [eq_]ref[_or_null] access is used. Pointer to {t.keypart1 = expr}
*/
KEYUSE *key;
/* If ref-based access is used: bitmap of tables this table depends on */
table_map ref_depend_map;
/*
TRUE <=> join buffering will be used. At the moment this is based on
*very* imprecise guesses made in best_access_path().
*/
bool use_join_buffer;
/*
Current optimization state: Semi-join strategy to be used for this
and preceding join tables.
Join optimizer sets this for the *last* join_tab in the
duplicate-generating range. That is, in order to interpret this field,
one needs to traverse join->[best_]positions array from right to left.
When you see a join table with sj_strategy!= SJ_OPT_NONE, some other
field (depending on the strategy) tells how many preceding positions
this applies to. The values of covered_preceding_positions->sj_strategy
must be ignored.
*/
enum sj_strategy_enum sj_strategy;
/*
Valid only after fix_semijoin_strategies_for_picked_join_order() call:
if sj_strategy!=SJ_OPT_NONE, this is the number of subsequent tables that
are covered by the specified semi-join strategy
*/
uint n_sj_tables;
table_map prefix_dups_producing_tables;
table_map inner_tables_handled_with_other_sjs;
Duplicate_weedout_picker dups_weedout_picker;
Firstmatch_picker firstmatch_picker;
LooseScan_picker loosescan_picker;
Sj_materialization_picker sjmat_picker;
} POSITION;
typedef struct st_rollup
{
@ -626,18 +799,6 @@ typedef struct st_rollup
} ROLLUP;
#define SJ_OPT_NONE 0
#define SJ_OPT_DUPS_WEEDOUT 1
#define SJ_OPT_LOOSE_SCAN 2
#define SJ_OPT_FIRST_MATCH 3
#define SJ_OPT_MATERIALIZE 4
#define SJ_OPT_MATERIALIZE_SCAN 5
inline bool sj_is_materialize_strategy(uint strategy)
{
return strategy >= SJ_OPT_MATERIALIZE;
}
class JOIN_TAB_RANGE: public Sql_alloc
{
public:
@ -808,7 +969,7 @@ public:
they produce.
*/
table_map cur_dups_producing_tables;
/* We also maintain a stack of join optimization states in * join->positions[] */
/******* Join optimization state members end *******/
/*