mirror of
https://github.com/MariaDB/server.git
synced 2025-01-16 12:02:42 +01:00
d6add9a03d
MDEV-33407 Parser support for vector indexes The syntax is create table t1 (... vector index (v) ...); limitation: * v is a binary string and NOT NULL * only one vector index per table * temporary tables are not supported MDEV-33404 Engine-independent indexes: subtable method added support for so-called "high level indexes", they are not visible to the storage engine, implemented on the sql level. For every such an index in a table, say, t1, the server implicitly creates a second table named, like, t1#i#05 (where "05" is the index number in t1). This table has a fixed structure, no frm, not accessible directly, doesn't go into the table cache, needs no MDLs. MDEV-33406 basic optimizer support for k-NN searches for a query like SELECT ... ORDER BY func() optimizer will use item_func->part_of_sortkey() to decide what keys can be used to resolve ORDER BY.
34339 lines
1.1 MiB
34339 lines
1.1 MiB
/* Copyright (c) 2000, 2016, Oracle and/or its affiliates.
|
|
Copyright (c) 2009, 2024, MariaDB Corporation.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; version 2 of the License.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
|
|
|
|
/**
|
|
@file
|
|
|
|
@brief
|
|
mysql_select and join optimization
|
|
|
|
|
|
@defgroup Query_Optimizer Query Optimizer
|
|
@{
|
|
*/
|
|
|
|
#include "mariadb.h"
|
|
#include "sql_priv.h"
|
|
#include "unireg.h"
|
|
#include "sql_select.h"
|
|
#include "sql_cache.h" // query_cache_*
|
|
#include "sql_table.h" // primary_key_name
|
|
#include "probes_mysql.h"
|
|
#include "key.h" // key_copy, key_cmp, key_cmp_if_same
|
|
#include "lock.h" // mysql_unlock_some_tables,
|
|
// mysql_unlock_read_tables
|
|
#include "sql_show.h" // append_identifier
|
|
#include "sql_base.h" // setup_wild, setup_fields, fill_record
|
|
#include "sql_parse.h" // check_stack_overrun
|
|
#include "sql_partition.h" // make_used_partitions_str
|
|
#include "sql_test.h" // print_where, print_keyuse_array,
|
|
// print_sjm, print_plan, TEST_join
|
|
#include "records.h" // init_read_record, end_read_record
|
|
#include "filesort.h" // filesort_free_buffers
|
|
#include "filesort_utils.h" // get_qsort_sort_cost
|
|
#include "sql_union.h" // mysql_union
|
|
#include "opt_subselect.h"
|
|
#include "sql_derived.h"
|
|
#include "sql_statistics.h"
|
|
#include "sql_cte.h"
|
|
#include "sql_window.h"
|
|
#include "tztime.h"
|
|
|
|
#include "debug_sync.h" // DEBUG_SYNC
|
|
#include <m_ctype.h>
|
|
#include <my_bit.h>
|
|
#include <hash.h>
|
|
#include <ft_global.h>
|
|
#include "sys_vars_shared.h"
|
|
#include "sp_head.h"
|
|
#include "sp_rcontext.h"
|
|
#include "rowid_filter.h"
|
|
#include "select_handler.h"
|
|
#include "my_json_writer.h"
|
|
#include "opt_trace.h"
|
|
#include "derived_handler.h"
|
|
#include "create_tmp_table.h"
|
|
#include "optimizer_defaults.h"
|
|
#include "derived_handler.h"
|
|
|
|
/*
|
|
A key part number that means we're using a fulltext scan.
|
|
|
|
In order not to confuse it with regular equalities, we need to pick
|
|
a number that's greater than MAX_REF_PARTS.
|
|
|
|
Hash Join code stores field->field_index in KEYUSE::keypart, so the
|
|
number needs to be bigger than MAX_FIELDS, also.
|
|
|
|
CAUTION: sql_test.cc has its own definition of FT_KEYPART.
|
|
*/
|
|
#define FT_KEYPART (MAX_FIELDS+10)
|
|
|
|
/*
|
|
We assume that when we do hash join, only 10 % rows in the hash will
|
|
match the current found row.
|
|
*/
|
|
#define HASH_FANOUT 0.1
|
|
|
|
/*
|
|
The following is used to check that A <= B, but with some margin as the
|
|
calculation is done slightly differently (mathematically correct, but
|
|
double calculations are not exact).
|
|
This is only used when comparing read rows and output rows, which
|
|
means that we can assume that both values are >= 0 and B cannot be notable
|
|
smaller than A.
|
|
*/
|
|
|
|
#define crash_if_first_double_is_bigger(A,B) DBUG_ASSERT(((A) == 0.0 && (B) == 0.0) || (A)/(B) < 1.0000001)
|
|
|
|
#define double_to_rows(A) ((A) >= ((double)HA_ROWS_MAX) ? HA_ROWS_MAX : (ha_rows) (A))
|
|
|
|
#define double_to_ulonglong(A) ((A) >= ((double)ULONGLONG_MAX) ? ULONGLONG_MAX : (ulonglong) (A))
|
|
|
|
/* Used to ensure that costs are calculate the same way */
|
|
inline bool compare_cost(double a, double b)
|
|
{
|
|
DBUG_ASSERT(a >= 0.0 && b >= 0.0);
|
|
return (a >= b - b/10000000.0 && a <= b+b/10000000.0);
|
|
}
|
|
|
|
inline double safe_filtered(double a, double b)
|
|
{
|
|
return b != 0 ? a/b*100.0 : 0.0;
|
|
}
|
|
|
|
const char *join_type_str[]={ "UNKNOWN","system","const","eq_ref","ref",
|
|
"MAYBE_REF","ALL","range","index","fulltext",
|
|
"ref_or_null","unique_subquery","index_subquery",
|
|
"index_merge", "hash_ALL", "hash_range",
|
|
"hash_index", "hash_index_merge" };
|
|
|
|
static const Lex_ident_column group_key= "group_key"_Lex_ident_column;
|
|
static const Lex_ident_column distinct_key= "distinct_key"_Lex_ident_column;
|
|
|
|
struct st_sargable_param;
|
|
|
|
static bool make_join_statistics(JOIN *join, List<TABLE_LIST> &leaves,
|
|
DYNAMIC_ARRAY *keyuse);
|
|
static bool update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,
|
|
JOIN_TAB *join_tab,
|
|
uint tables, COND *conds,
|
|
table_map table_map, SELECT_LEX *select_lex,
|
|
SARGABLE_PARAM **sargables);
|
|
static int sort_keyuse(KEYUSE *a,KEYUSE *b);
|
|
static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables);
|
|
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j, KEYUSE *org_keyuse,
|
|
bool allow_full_scan, table_map used_tables);
|
|
static bool get_quick_record_count(THD *thd, SQL_SELECT *select,
|
|
TABLE *table,
|
|
const key_map *keys,ha_rows limit,
|
|
ha_rows *quick_count);
|
|
static void optimize_straight_join(JOIN *join, table_map join_tables);
|
|
static bool greedy_search(JOIN *join, table_map remaining_tables,
|
|
uint depth, uint use_cond_selectivity);
|
|
|
|
enum enum_best_search {
|
|
SEARCH_ABORT= -2,
|
|
SEARCH_ERROR= -1,
|
|
SEARCH_OK= 0,
|
|
SEARCH_FOUND_EDGE=1
|
|
};
|
|
|
|
static enum_best_search
|
|
best_extension_by_limited_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint idx, double record_count,
|
|
double read_time, uint depth,
|
|
uint use_cond_selectivity,
|
|
table_map *processed_eq_ref_tables);
|
|
static uint determine_search_depth(JOIN* join);
|
|
C_MODE_START
|
|
static int join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2);
|
|
static int join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2);
|
|
static int join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void *ptr2);
|
|
C_MODE_END
|
|
static uint cache_record_length(JOIN *join,uint index);
|
|
static store_key *get_store_key(THD *thd,
|
|
KEYUSE *keyuse, table_map used_tables,
|
|
KEY_PART_INFO *key_part, uchar *key_buff,
|
|
uint maybe_null);
|
|
static bool make_outerjoin_info(JOIN *join);
|
|
static Item*
|
|
make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
|
|
table_map sjm_tables, bool inside_or_clause);
|
|
static bool make_join_select(JOIN *join,SQL_SELECT *select,COND *item);
|
|
static void revise_cache_usage(JOIN_TAB *join_tab);
|
|
static bool make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after);
|
|
static bool only_eq_ref_tables(JOIN *join, ORDER *order, table_map tables);
|
|
static void update_depend_map(JOIN *join);
|
|
static void update_depend_map_for_order(JOIN *join, ORDER *order);
|
|
static ORDER *remove_const(JOIN *join,ORDER *first_order,COND *cond,
|
|
bool change_list, bool *simple_order);
|
|
static int return_zero_rows(JOIN *join, select_result *res,
|
|
List<TABLE_LIST> *tables,
|
|
List<Item> *fields, bool send_row,
|
|
ulonglong select_options, const char *info,
|
|
Item *having, List<Item> *all_fields);
|
|
static COND *build_equal_items(JOIN *join, COND *cond,
|
|
COND_EQUAL *inherited,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
COND_EQUAL **cond_equal_ref,
|
|
bool link_equal_fields= FALSE);
|
|
static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
|
|
COND *cond,
|
|
COND_EQUAL *cond_equal,
|
|
void *table_join_idx,
|
|
bool do_substitution);
|
|
static COND *simplify_joins(JOIN *join, List<TABLE_LIST> *join_list,
|
|
COND *conds, bool top, bool in_sj);
|
|
static bool check_interleaving_with_nj(JOIN_TAB *next);
|
|
static void restore_prev_nj_state(JOIN_TAB *last);
|
|
static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list);
|
|
static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
|
|
uint first_unused);
|
|
|
|
static COND *optimize_cond(JOIN *join, COND *conds,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
Item::cond_result *cond_value,
|
|
COND_EQUAL **cond_equal,
|
|
int flags= 0);
|
|
bool const_expression_in_where(COND *conds,Item *item, Item **comp_item);
|
|
static int do_select(JOIN *join, Procedure *procedure);
|
|
|
|
static enum_nested_loop_state evaluate_join_record(JOIN *, JOIN_TAB *, int);
|
|
static enum_nested_loop_state
|
|
evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab);
|
|
static enum_nested_loop_state
|
|
end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_write(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
static enum_nested_loop_state
|
|
end_unique_update(JOIN *join, JOIN_TAB *join_tab, bool end_of_records);
|
|
|
|
static int join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos);
|
|
static int join_read_system(JOIN_TAB *tab);
|
|
static int join_read_const(JOIN_TAB *tab);
|
|
static int join_read_key(JOIN_TAB *tab);
|
|
static void join_read_key_unlock_row(st_join_table *tab);
|
|
static void join_const_unlock_row(JOIN_TAB *tab);
|
|
static int join_read_always_key(JOIN_TAB *tab);
|
|
static int join_read_last_key(JOIN_TAB *tab);
|
|
static int join_no_more_records(READ_RECORD *info);
|
|
static int join_read_next(READ_RECORD *info);
|
|
static int join_hlindex_read_next(READ_RECORD *info);
|
|
static int join_init_quick_read_record(JOIN_TAB *tab);
|
|
static quick_select_return test_if_quick_select(JOIN_TAB *tab);
|
|
static int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab);
|
|
static int join_read_first(JOIN_TAB *tab);
|
|
static int join_read_next_same(READ_RECORD *info);
|
|
static int join_read_last(JOIN_TAB *tab);
|
|
static int join_read_prev_same(READ_RECORD *info);
|
|
static int join_read_prev(READ_RECORD *info);
|
|
static int join_ft_read_first(JOIN_TAB *tab);
|
|
static int join_ft_read_next(READ_RECORD *info);
|
|
int join_read_always_key_or_null(JOIN_TAB *tab);
|
|
int join_read_next_same_or_null(READ_RECORD *info);
|
|
static COND *make_cond_for_table(THD *thd, Item *cond,table_map table,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond,
|
|
bool retain_ref_cond);
|
|
static COND *make_cond_for_table_from_pred(THD *thd, Item *root_cond,
|
|
Item *cond,
|
|
table_map tables,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond,
|
|
bool retain_ref_cond,
|
|
bool is_top_and_level);
|
|
|
|
static Item* part_of_refkey(TABLE *form,Field *field);
|
|
static bool test_if_cheaper_ordering(bool in_join_optimizer,
|
|
const JOIN_TAB *tab,
|
|
ORDER *order, TABLE *table,
|
|
key_map usable_keys, int key,
|
|
ha_rows select_limit,
|
|
int *new_key, int *new_key_direction,
|
|
ha_rows *new_select_limit,
|
|
double *new_read_time,
|
|
uint *new_used_key_parts= NULL,
|
|
uint *saved_best_key_parts= NULL);
|
|
static int test_if_order_by_key(JOIN *, ORDER *, TABLE *, uint, uint *);
|
|
static bool test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,
|
|
ha_rows select_limit, bool no_changes,
|
|
const key_map *map,
|
|
bool *fatal_error);
|
|
static bool list_contains_unique_index(TABLE *table,
|
|
bool (*find_func) (Field *, void *), void *data);
|
|
static bool find_field_in_item_list (Field *field, void *data);
|
|
static bool find_field_in_order_list (Field *field, void *data);
|
|
int create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort);
|
|
static int remove_dup_with_compare(THD *thd, TABLE *entry, Field **field,
|
|
SORT_FIELD *sortorder, ulong keylength,
|
|
Item *having);
|
|
static int remove_dup_with_hash_index(THD *thd,TABLE *table,
|
|
uint field_count, Field **first_field,
|
|
SORT_FIELD *sortorder,
|
|
ulong key_length,Item *having);
|
|
static bool cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref);
|
|
static bool setup_new_fields(THD *thd, List<Item> &fields,
|
|
List<Item> &all_fields, ORDER *new_order);
|
|
static ORDER *create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
ORDER *order, List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
bool *all_order_by_fields_used);
|
|
static bool test_if_subpart(ORDER *group_by, ORDER *order_by);
|
|
static TABLE *get_sort_by_table(ORDER *a,ORDER *b,List<TABLE_LIST> &tables,
|
|
table_map const_tables);
|
|
static void calc_group_buffer(JOIN *join, ORDER *group);
|
|
static bool make_group_fields(JOIN *main_join, JOIN *curr_join);
|
|
static bool alloc_group_fields(JOIN *join, ORDER *group);
|
|
static bool alloc_order_fields(JOIN *join, ORDER *group,
|
|
uint max_number_of_elements);
|
|
// Create list for using with tempory table
|
|
static bool change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &new_list1,
|
|
List<Item> &new_list2,
|
|
uint elements, List<Item> &items);
|
|
// Create list for using with tempory table
|
|
static bool change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &new_list1,
|
|
List<Item> &new_list2,
|
|
uint elements, List<Item> &items);
|
|
static void init_tmptable_sum_functions(Item_sum **func);
|
|
static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
|
|
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
|
|
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
|
|
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
|
|
static bool init_sum_functions(Item_sum **func, Item_sum **end);
|
|
static bool update_sum_func(Item_sum **func);
|
|
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
|
|
bool distinct, const char *message=NullS);
|
|
static void add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab);
|
|
static uint make_join_orderinfo(JOIN *join);
|
|
static bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array);
|
|
|
|
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
|
|
bool *inherited_fl);
|
|
JOIN_TAB *first_depth_first_tab(JOIN* join);
|
|
JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab);
|
|
|
|
static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
|
|
uint n_top_tabs_count, JOIN_TAB *tab);
|
|
static bool find_order_in_list(THD *, Ref_ptr_array, TABLE_LIST *, ORDER *,
|
|
List<Item> &, List<Item> &, bool, bool, bool);
|
|
|
|
static double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables,
|
|
double *records_out);
|
|
void set_postjoin_aggr_write_func(JOIN_TAB *tab);
|
|
|
|
static Item **get_sargable_cond(JOIN *join, TABLE *table);
|
|
|
|
bool is_eq_cond_injected_for_split_opt(Item_func_eq *eq_item);
|
|
|
|
void print_list_item(String *str, List_item *list,
|
|
enum_query_type query_type);
|
|
|
|
static
|
|
bool build_notnull_conds_for_range_scans(JOIN *join, COND *cond,
|
|
table_map allowed);
|
|
static
|
|
void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
|
|
TABLE_LIST *nest_tbl);
|
|
static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex);
|
|
static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit, Item *cond);
|
|
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond);
|
|
static double prev_record_reads(const POSITION *positions, uint idx,
|
|
table_map found_ref, double record_count,
|
|
double *same_keys);
|
|
|
|
static
|
|
bool join_limit_shortcut_is_applicable(const JOIN *join);
|
|
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost);
|
|
|
|
static bool find_indexes_matching_order(JOIN *, TABLE *, ORDER *, key_map *);
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
/*
|
|
SHOW EXPLAIN testing: wait for, and serve n_calls APC requests.
|
|
*/
|
|
void dbug_serve_apcs(THD *thd, int n_calls)
|
|
{
|
|
const char *save_proc_info= thd->proc_info;
|
|
|
|
/* Busy-wait for n_calls APC requests to arrive and be processed */
|
|
int n_apcs= thd->apc_target.n_calls_processed + n_calls;
|
|
while (thd->apc_target.n_calls_processed < n_apcs)
|
|
{
|
|
/* This is so that mysqltest knows we're ready to serve requests: */
|
|
thd_proc_info(thd, "show_explain_trap");
|
|
my_sleep(30000);
|
|
thd_proc_info(thd, save_proc_info);
|
|
if (unlikely(thd->check_killed(1)))
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Debugging: check if @name=value, comparing as integer
|
|
|
|
Intended usage:
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_2",
|
|
if (dbug_user_var_equals_int(thd, "select_id", select_id))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
|
|
*/
|
|
|
|
bool dbug_user_var_equals_int(THD *thd, const char *name, int value)
|
|
{
|
|
user_var_entry *var;
|
|
LEX_CSTRING varname= { name, strlen(name)};
|
|
if ((var= get_variable(&thd->user_vars, &varname, FALSE)))
|
|
{
|
|
bool null_value;
|
|
longlong var_value= var->val_int(&null_value);
|
|
if (!null_value && var_value == value)
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
Debugging : check if @name= value, comparing as string
|
|
|
|
Intended usage :
|
|
|
|
DBUG_EXECUTE_IF("log_slow_statement_end",
|
|
if (dbug_user_var_equals_str(thd, "show_explain_probe_query",
|
|
thd->query()))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
*/
|
|
|
|
bool dbug_user_var_equals_str(THD *thd, const char *name, const char* value)
|
|
{
|
|
user_var_entry *var;
|
|
LEX_CSTRING varname= {name, strlen(name)};
|
|
if ((var= get_variable(&thd->user_vars, &varname, FALSE)))
|
|
{
|
|
bool null_value;
|
|
String str;
|
|
auto var_value= var->val_str(&null_value, &str, 10)->ptr();
|
|
if (!null_value && !strncmp(var_value, value, strlen(value)))
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
#endif /* DBUG_OFF */
|
|
|
|
/*
|
|
Intialize POSITION structure.
|
|
*/
|
|
|
|
POSITION::POSITION()
|
|
{
|
|
table= 0;
|
|
records_read= cond_selectivity= read_time= records_out= records_init= 0.0;
|
|
prefix_record_count= 0.0;
|
|
key= 0;
|
|
forced_index= 0;
|
|
use_join_buffer= 0;
|
|
firstmatch_with_join_buf= false;
|
|
sj_strategy= SJ_OPT_NONE;
|
|
n_sj_tables= 0;
|
|
spl_plan= 0;
|
|
range_rowid_filter_info= 0;
|
|
ref_depend_map= dups_producing_tables= 0;
|
|
inner_tables_handled_with_other_sjs= 0;
|
|
type= JT_UNKNOWN;
|
|
key_dependent= 0;
|
|
dups_weedout_picker.set_empty();
|
|
firstmatch_picker.set_empty();
|
|
loosescan_picker.set_empty();
|
|
sjmat_picker.set_empty();
|
|
}
|
|
|
|
|
|
void JOIN::init(THD *thd_arg, List<Item> &fields_arg,
|
|
ulonglong select_options_arg, select_result *result_arg)
|
|
{
|
|
join_tab= 0;
|
|
table= 0;
|
|
table_count= 0;
|
|
top_join_tab_count= 0;
|
|
const_tables= 0;
|
|
const_table_map= found_const_table_map= not_usable_rowid_map= 0;
|
|
aggr_tables= 0;
|
|
eliminated_tables= 0;
|
|
join_list= 0;
|
|
implicit_grouping= FALSE;
|
|
sort_and_group= 0;
|
|
first_record= 0;
|
|
do_send_rows= 1;
|
|
duplicate_rows= send_records= 0;
|
|
found_records= accepted_rows= 0;
|
|
fetch_limit= HA_POS_ERROR;
|
|
thd= thd_arg;
|
|
sum_funcs= sum_funcs2= 0;
|
|
procedure= 0;
|
|
having= tmp_having= having_history= 0;
|
|
having_is_correlated= false;
|
|
group_list_for_estimates= 0;
|
|
select_options= select_options_arg;
|
|
result= result_arg;
|
|
lock= thd_arg->lock;
|
|
select_lex= 0; //for safety
|
|
select_distinct= MY_TEST(select_options & SELECT_DISTINCT);
|
|
no_order= 0;
|
|
simple_order= 0;
|
|
simple_group= 0;
|
|
ordered_index_usage= ordered_index_void;
|
|
need_distinct= 0;
|
|
skip_sort_order= 0;
|
|
with_two_phase_optimization= 0;
|
|
save_qep= 0;
|
|
spl_opt_info= 0;
|
|
ext_keyuses_for_splitting= 0;
|
|
spl_opt_info= 0;
|
|
need_tmp= 0;
|
|
hidden_group_fields= 0; /*safety*/
|
|
error= 0;
|
|
select= 0;
|
|
return_tab= 0;
|
|
ref_ptrs.reset();
|
|
items0.reset();
|
|
items1.reset();
|
|
items2.reset();
|
|
items3.reset();
|
|
zero_result_cause= 0;
|
|
optimization_state= JOIN::NOT_OPTIMIZED;
|
|
have_query_plan= QEP_NOT_PRESENT_YET;
|
|
initialized= 0;
|
|
cleaned= 0;
|
|
cond_equal= 0;
|
|
having_equal= 0;
|
|
exec_const_cond= 0;
|
|
group_optimized_away= 0;
|
|
no_rows_in_result_called= 0;
|
|
positions= best_positions= 0;
|
|
pushdown_query= 0;
|
|
original_join_tab= 0;
|
|
explain= NULL;
|
|
tmp_table_keep_current_rowid= 0;
|
|
allowed_top_level_tables= 0;
|
|
|
|
all_fields= fields_arg;
|
|
if (&fields_list != &fields_arg) /* Avoid valgrind-warning */
|
|
fields_list= fields_arg;
|
|
non_agg_fields.empty();
|
|
bzero((char*) &keyuse,sizeof(keyuse));
|
|
having_value= Item::COND_UNDEF;
|
|
tmp_table_param.init();
|
|
tmp_table_param.end_write_records= HA_POS_ERROR;
|
|
rollup.state= ROLLUP::STATE_NONE;
|
|
|
|
no_const_tables= FALSE;
|
|
first_select= sub_select;
|
|
group_sent= 0;
|
|
|
|
outer_ref_cond= pseudo_bits_cond= NULL;
|
|
in_to_exists_where= NULL;
|
|
in_to_exists_having= NULL;
|
|
emb_sjm_nest= NULL;
|
|
sjm_lookup_tables= 0;
|
|
sjm_scan_tables= 0;
|
|
is_orig_degenerated= false;
|
|
with_ties_order_count= 0;
|
|
prepared= false;
|
|
sql_cmd_dml= NULL;
|
|
};
|
|
|
|
|
|
static void trace_table_dependencies(THD *thd,
|
|
JOIN_TAB *join_tabs, uint table_count)
|
|
{
|
|
DBUG_ASSERT(thd->trace_started());
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_array trace_dep(thd, "table_dependencies");
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
TABLE_LIST *table_ref= join_tabs[i].tab_list;
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.
|
|
add_table_name(&join_tabs[i]).
|
|
add("row_may_be_null",
|
|
(bool)table_ref->table->maybe_null);
|
|
const table_map map= table_ref->get_map();
|
|
DBUG_ASSERT(map < (1ULL << table_count));
|
|
for (uint j= 0; j < table_count; j++)
|
|
{
|
|
if (map & (1ULL << j))
|
|
{
|
|
trace_one_table.add("map_bit", j);
|
|
break;
|
|
}
|
|
}
|
|
Json_writer_array depends_on(thd, "depends_on_map_bits");
|
|
Table_map_iterator it(join_tabs[i].dependent);
|
|
uint dep_bit;
|
|
while ((dep_bit= it++) != Table_map_iterator::BITMAP_END)
|
|
depends_on.add(static_cast<longlong>(dep_bit));
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
This handles SELECT with and without UNION.
|
|
*/
|
|
|
|
bool handle_select(THD *thd, LEX *lex, select_result *result,
|
|
ulonglong setup_tables_done_option)
|
|
{
|
|
bool res;
|
|
SELECT_LEX *select_lex= lex->first_select_lex();
|
|
DBUG_ENTER("handle_select");
|
|
MYSQL_SELECT_START(thd->query());
|
|
|
|
if (select_lex->master_unit()->is_unit_op() ||
|
|
select_lex->master_unit()->fake_select_lex)
|
|
res= mysql_union(thd, lex, result, &lex->unit, setup_tables_done_option);
|
|
else
|
|
{
|
|
SELECT_LEX_UNIT *unit= &lex->unit;
|
|
unit->set_limit(unit->global_parameters());
|
|
/*
|
|
'options' of mysql_select will be set in JOIN, as far as JOIN for
|
|
every PS/SP execution new, we will not need reset this flag if
|
|
setup_tables_done_option changed for next rexecution
|
|
*/
|
|
res= mysql_select(thd,
|
|
select_lex->table_list.first,
|
|
select_lex->item_list,
|
|
select_lex->where,
|
|
select_lex->order_list.elements +
|
|
select_lex->group_list.elements,
|
|
select_lex->order_list.first,
|
|
select_lex->group_list.first,
|
|
select_lex->having,
|
|
lex->proc_list.first,
|
|
select_lex->options | thd->variables.option_bits |
|
|
setup_tables_done_option,
|
|
result, unit, select_lex);
|
|
}
|
|
DBUG_PRINT("info",("res: %d is_error(): %d", res,
|
|
thd->is_error()));
|
|
res|= thd->is_error();
|
|
if (unlikely(res))
|
|
result->abort_result_set();
|
|
if (unlikely(thd->killed == ABORT_QUERY && !thd->no_errors))
|
|
{
|
|
/*
|
|
If LIMIT ROWS EXAMINED interrupted query execution, issue a warning,
|
|
continue with normal processing and produce an incomplete query result.
|
|
*/
|
|
bool saved_abort_on_warning= thd->abort_on_warning;
|
|
thd->abort_on_warning= false;
|
|
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
|
ER_QUERY_RESULT_INCOMPLETE,
|
|
ER_THD(thd, ER_QUERY_RESULT_INCOMPLETE),
|
|
"LIMIT ROWS EXAMINED",
|
|
thd->lex->limit_rows_examined->val_uint());
|
|
thd->abort_on_warning= saved_abort_on_warning;
|
|
thd->reset_killed();
|
|
}
|
|
/* Disable LIMIT ROWS EXAMINED after query execution. */
|
|
thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
|
|
|
|
MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
|
|
/**
|
|
Fix fields referenced from inner selects.
|
|
|
|
@param thd Thread handle
|
|
@param all_fields List of all fields used in select
|
|
@param select Current select
|
|
@param ref_pointer_array Array of references to Items used in current select
|
|
@param group_list GROUP BY list (is NULL by default)
|
|
|
|
@details
|
|
The function serves 3 purposes
|
|
|
|
- adds fields referenced from inner query blocks to the current select list
|
|
|
|
- Decides which class to use to reference the items (Item_ref or
|
|
Item_direct_ref)
|
|
|
|
- fixes references (Item_ref objects) to these fields.
|
|
|
|
If a field isn't already on the select list and the ref_pointer_array
|
|
is provided then it is added to the all_fields list and the pointer to
|
|
it is saved in the ref_pointer_array.
|
|
|
|
The class to access the outer field is determined by the following rules:
|
|
|
|
-#. If the outer field isn't used under an aggregate function then the
|
|
Item_ref class should be used.
|
|
|
|
-#. If the outer field is used under an aggregate function and this
|
|
function is, in turn, aggregated in the query block where the outer
|
|
field was resolved or some query nested therein, then the
|
|
Item_direct_ref class should be used. Also it should be used if we are
|
|
grouping by a subquery that references this outer field.
|
|
|
|
The resolution is done here and not at the fix_fields() stage as
|
|
it can be done only after aggregate functions are fixed and pulled up to
|
|
selects where they are to be aggregated.
|
|
|
|
When the class is chosen it substitutes the original field in the
|
|
Item_outer_ref object.
|
|
|
|
After this we proceed with fixing references (Item_outer_ref objects) to
|
|
this field from inner subqueries.
|
|
|
|
@return Status
|
|
@retval true An error occurred.
|
|
@retval false OK.
|
|
*/
|
|
|
|
bool
|
|
fix_inner_refs(THD *thd, List<Item> &all_fields, SELECT_LEX *select,
|
|
Ref_ptr_array ref_pointer_array)
|
|
{
|
|
Item_outer_ref *ref;
|
|
|
|
/*
|
|
Mark the references from the inner_refs_list that are occurred in
|
|
the group by expressions. Those references will contain direct
|
|
references to the referred fields. The markers are set in
|
|
the found_in_group_by field of the references from the list.
|
|
*/
|
|
List_iterator_fast <Item_outer_ref> ref_it(select->inner_refs_list);
|
|
for (ORDER *group= select->join->group_list; group; group= group->next)
|
|
{
|
|
(*group->item)->walk(&Item::check_inner_refs_processor, TRUE, &ref_it);
|
|
}
|
|
|
|
while ((ref= ref_it++))
|
|
{
|
|
bool direct_ref= false;
|
|
Item *item= ref->outer_ref;
|
|
Item **item_ref= ref->ref;
|
|
Item_ref *new_ref;
|
|
/*
|
|
TODO: this field item already might be present in the select list.
|
|
In this case instead of adding new field item we could use an
|
|
existing one. The change will lead to less operations for copying fields,
|
|
smaller temporary tables and less data passed through filesort.
|
|
*/
|
|
if (!ref_pointer_array.is_null() && !ref->found_in_select_list)
|
|
{
|
|
int el= all_fields.elements;
|
|
ref_pointer_array[el]= item;
|
|
/* Add the field item to the select list of the current select. */
|
|
all_fields.push_front(item, thd->mem_root);
|
|
/*
|
|
If it's needed reset each Item_ref item that refers this field with
|
|
a new reference taken from ref_pointer_array.
|
|
*/
|
|
item_ref= &ref_pointer_array[el];
|
|
}
|
|
|
|
if (ref->in_sum_func)
|
|
{
|
|
Item_sum *sum_func;
|
|
if (ref->in_sum_func->nest_level > select->nest_level)
|
|
direct_ref= TRUE;
|
|
else
|
|
{
|
|
for (sum_func= ref->in_sum_func; sum_func &&
|
|
sum_func->aggr_level >= select->nest_level;
|
|
sum_func= sum_func->in_sum_func)
|
|
{
|
|
if (sum_func->aggr_level == select->nest_level)
|
|
{
|
|
direct_ref= TRUE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (ref->found_in_group_by)
|
|
direct_ref= TRUE;
|
|
|
|
new_ref= direct_ref ?
|
|
new (thd->mem_root) Item_direct_ref(thd, ref->context, item_ref, ref->table_name,
|
|
ref->field_name, ref->alias_name_used) :
|
|
new (thd->mem_root) Item_ref(thd, ref->context, item_ref, ref->table_name,
|
|
ref->field_name, ref->alias_name_used);
|
|
if (!new_ref)
|
|
return TRUE;
|
|
ref->outer_ref= new_ref;
|
|
ref->ref= &ref->outer_ref;
|
|
|
|
if (ref->fix_fields_if_needed(thd, 0))
|
|
return TRUE;
|
|
thd->lex->used_tables|= item->used_tables();
|
|
thd->lex->current_select->select_list_tables|= item->used_tables();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
The following clauses are redundant for subqueries:
|
|
|
|
DISTINCT
|
|
GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
|
|
Because redundant clauses are removed both from JOIN and
|
|
select_lex, the removal is permanent. Thus, it only makes sense to
|
|
call this function for normal queries and on first execution of
|
|
SP/PS
|
|
|
|
@param subq_select_lex select_lex that is part of a subquery
|
|
predicate. This object and the associated
|
|
join is modified.
|
|
*/
|
|
|
|
static
|
|
void remove_redundant_subquery_clauses(st_select_lex *subq_select_lex)
|
|
{
|
|
DBUG_ENTER("remove_redundant_subquery_clauses");
|
|
Item_subselect *subq_predicate= subq_select_lex->master_unit()->item;
|
|
/*
|
|
The removal should happen for IN, ALL, ANY and EXISTS subqueries,
|
|
which means all but single row subqueries. Example single row
|
|
subqueries:
|
|
a) SELECT * FROM t1 WHERE t1.a = (<single row subquery>)
|
|
b) SELECT a, (<single row subquery) FROM t1
|
|
*/
|
|
if (subq_predicate->substype() == Item_subselect::SINGLEROW_SUBS)
|
|
DBUG_VOID_RETURN;
|
|
|
|
/* A subquery that is not single row should be one of IN/ALL/ANY/EXISTS. */
|
|
DBUG_ASSERT (subq_predicate->substype() == Item_subselect::EXISTS_SUBS ||
|
|
subq_predicate->is_in_predicate());
|
|
|
|
if (subq_select_lex->options & SELECT_DISTINCT)
|
|
{
|
|
subq_select_lex->join->select_distinct= false;
|
|
subq_select_lex->options&= ~SELECT_DISTINCT;
|
|
DBUG_PRINT("info", ("DISTINCT removed"));
|
|
}
|
|
|
|
/*
|
|
Remove GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
*/
|
|
if (subq_select_lex->group_list.elements &&
|
|
!subq_select_lex->with_sum_func && !subq_select_lex->join->having)
|
|
{
|
|
/*
|
|
Temporary workaround for MDEV-28621: Do not remove GROUP BY expression
|
|
if it has any subqueries in it.
|
|
*/
|
|
bool have_subquery= false;
|
|
for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
|
|
{
|
|
if ((*ord->item)->with_subquery())
|
|
{
|
|
have_subquery= true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!have_subquery)
|
|
{
|
|
for (ORDER *ord= subq_select_lex->group_list.first; ord; ord= ord->next)
|
|
{
|
|
/*
|
|
Do not remove the item if it is used in select list and then referred
|
|
from GROUP BY clause by its name or number. Example:
|
|
|
|
select (select ... ) as SUBQ ... group by SUBQ
|
|
|
|
Here SUBQ cannot be removed.
|
|
*/
|
|
if (!ord->in_field_list)
|
|
{
|
|
/*
|
|
Not necessary due to workaround for MDEV-28621:
|
|
(*ord->item)->walk(&Item::eliminate_subselect_processor, FALSE, NULL);
|
|
*/
|
|
/*
|
|
Remove from the JOIN::all_fields list any reference to the elements
|
|
of the eliminated GROUP BY list unless it is 'in_field_list'.
|
|
This is needed in order not to confuse JOIN::make_aggr_tables_info()
|
|
when it constructs different structure for execution phase.
|
|
*/
|
|
List_iterator<Item> li(subq_select_lex->join->all_fields);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (item == *ord->item)
|
|
li.remove();
|
|
}
|
|
}
|
|
}
|
|
subq_select_lex->join->group_list= NULL;
|
|
subq_select_lex->group_list.empty();
|
|
DBUG_PRINT("info", ("GROUP BY removed"));
|
|
}
|
|
}
|
|
|
|
/*
|
|
TODO: This would prevent processing quries with ORDER BY ... LIMIT
|
|
therefore we disable this optimization for now.
|
|
Remove GROUP BY if there are no aggregate functions and no HAVING
|
|
clause
|
|
if (subq_select_lex->group_list.elements &&
|
|
!subq_select_lex->with_sum_func && !subq_select_lex->join->having)
|
|
{
|
|
subq_select_lex->join->group_list= NULL;
|
|
subq_select_lex->group_list.empty();
|
|
}
|
|
*/
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Function to setup clauses without sum functions.
|
|
*/
|
|
static inline int
|
|
setup_without_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
TABLE_LIST *tables,
|
|
List<TABLE_LIST> &leaves,
|
|
List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
COND **conds,
|
|
ORDER *order,
|
|
ORDER *group,
|
|
List<Window_spec> &win_specs,
|
|
List<Item_window_func> &win_funcs,
|
|
bool *hidden_group_fields)
|
|
{
|
|
int res;
|
|
enum_parsing_place save_place;
|
|
st_select_lex *const select= thd->lex->current_select;
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
/*
|
|
Need to stave the value, so we can turn off only any new non_agg_field_used
|
|
additions coming from the WHERE
|
|
*/
|
|
const bool saved_non_agg_field_used= select->non_agg_field_used();
|
|
DBUG_ENTER("setup_without_group");
|
|
|
|
thd->lex->allow_sum_func.clear_bit(select->nest_level);
|
|
res= setup_conds(thd, tables, leaves, conds);
|
|
|
|
/* it's not wrong to have non-aggregated columns in a WHERE */
|
|
select->set_non_agg_field_used(saved_non_agg_field_used);
|
|
|
|
thd->lex->allow_sum_func.set_bit(select->nest_level);
|
|
|
|
save_place= thd->lex->current_select->context_analysis_place;
|
|
thd->lex->current_select->context_analysis_place= IN_ORDER_BY;
|
|
res= res || setup_order(thd, ref_pointer_array, tables, fields, all_fields,
|
|
order);
|
|
thd->lex->allow_sum_func.clear_bit(select->nest_level);
|
|
thd->lex->current_select->context_analysis_place= IN_GROUP_BY;
|
|
res= res || setup_group(thd, ref_pointer_array, tables, fields, all_fields,
|
|
group, hidden_group_fields);
|
|
thd->lex->current_select->context_analysis_place= save_place;
|
|
thd->lex->allow_sum_func.set_bit(select->nest_level);
|
|
res= res || setup_windows(thd, ref_pointer_array, tables, fields, all_fields,
|
|
win_specs, win_funcs);
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
bool vers_select_conds_t::init_from_sysvar(THD *thd)
|
|
{
|
|
vers_asof_timestamp_t &in= thd->variables.vers_asof_timestamp;
|
|
type= (vers_system_time_t) in.type;
|
|
delete_history= false;
|
|
start.unit= VERS_TIMESTAMP;
|
|
if (type != SYSTEM_TIME_UNSPECIFIED && type != SYSTEM_TIME_ALL)
|
|
{
|
|
DBUG_ASSERT(type == SYSTEM_TIME_AS_OF);
|
|
Datetime dt(in.unix_time, in.second_part, thd->variables.time_zone);
|
|
|
|
start.item= new (thd->mem_root)
|
|
Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS);
|
|
if (!start.item)
|
|
return true;
|
|
}
|
|
else
|
|
start.item= NULL;
|
|
end.empty();
|
|
return false;
|
|
}
|
|
|
|
void vers_select_conds_t::print(String *str, enum_query_type query_type) const
|
|
{
|
|
switch (orig_type) {
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
break;
|
|
case SYSTEM_TIME_AS_OF:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME AS OF "));
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME FROM "));
|
|
end.print(str, query_type, STRING_WITH_LEN(" TO "));
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BETWEEN "));
|
|
end.print(str, query_type, STRING_WITH_LEN(" AND "));
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
start.print(str, query_type, STRING_WITH_LEN(" FOR SYSTEM_TIME BEFORE "));
|
|
break;
|
|
case SYSTEM_TIME_HISTORY:
|
|
// nothing to add
|
|
break;
|
|
case SYSTEM_TIME_ALL:
|
|
str->append(STRING_WITH_LEN(" FOR SYSTEM_TIME ALL"));
|
|
break;
|
|
}
|
|
}
|
|
|
|
static
|
|
Item* period_get_condition(THD *thd, TABLE_LIST *table, SELECT_LEX *select,
|
|
vers_select_conds_t *conds, bool timestamp)
|
|
{
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->table);
|
|
#define newx new (thd->mem_root)
|
|
TABLE_SHARE *share= table->table->s;
|
|
const TABLE_SHARE::period_info_t *period= conds->period;
|
|
|
|
const LEX_CSTRING &fstart= period->start_field(share)->field_name;
|
|
const LEX_CSTRING &fend= period->end_field(share)->field_name;
|
|
|
|
conds->field_start= newx Item_field(thd, &select->context,
|
|
table->db, table->alias,
|
|
thd->strmake_lex_cstring(fstart));
|
|
conds->field_end= newx Item_field(thd, &select->context,
|
|
table->db, table->alias,
|
|
thd->strmake_lex_cstring(fend));
|
|
|
|
Item *cond1= NULL, *cond2= NULL, *cond3= NULL, *curr= NULL;
|
|
if (timestamp)
|
|
{
|
|
MYSQL_TIME max_time;
|
|
switch (conds->type)
|
|
{
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
case SYSTEM_TIME_HISTORY:
|
|
{
|
|
thd->variables.time_zone->gmt_sec_to_TIME(&max_time, TIMESTAMP_MAX_VALUE);
|
|
max_time.second_part= TIME_MAX_SECOND_PART;
|
|
Datetime dt(&max_time);
|
|
curr= newx Item_datetime_literal(thd, &dt, TIME_SECOND_PART_DIGITS);
|
|
if (conds->type == SYSTEM_TIME_UNSPECIFIED)
|
|
cond1= newx Item_func_eq(thd, conds->field_end, curr);
|
|
else
|
|
cond1= newx Item_func_lt(thd, conds->field_end, curr);
|
|
break;
|
|
}
|
|
case SYSTEM_TIME_AS_OF:
|
|
cond1= newx Item_func_le(thd, conds->field_start, conds->start.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
cond1= newx Item_func_lt(thd, conds->field_start, conds->end.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
cond1= newx Item_func_le(thd, conds->field_start, conds->end.item);
|
|
cond2= newx Item_func_gt(thd, conds->field_end, conds->start.item);
|
|
cond3= newx Item_func_le(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
cond1= newx Item_func_history(thd, conds->field_end);
|
|
cond2= newx Item_func_lt(thd, conds->field_end, conds->start.item);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(table->table->s && table->table->s->db_plugin);
|
|
|
|
Item *trx_id0= conds->start.item;
|
|
Item *trx_id1= conds->end.item;
|
|
if (conds->start.item && conds->start.unit == VERS_TIMESTAMP)
|
|
{
|
|
bool backwards= conds->type != SYSTEM_TIME_AS_OF;
|
|
trx_id0= newx Item_func_trt_id(thd, conds->start.item,
|
|
TR_table::FLD_TRX_ID, backwards);
|
|
}
|
|
if (conds->end.item && conds->end.unit == VERS_TIMESTAMP)
|
|
{
|
|
trx_id1= newx Item_func_trt_id(thd, conds->end.item,
|
|
TR_table::FLD_TRX_ID, false);
|
|
}
|
|
|
|
switch (conds->type)
|
|
{
|
|
case SYSTEM_TIME_UNSPECIFIED:
|
|
case SYSTEM_TIME_HISTORY:
|
|
curr= newx Item_int(thd, ULONGLONG_MAX);
|
|
if (conds->type == SYSTEM_TIME_UNSPECIFIED)
|
|
cond1= newx Item_func_eq(thd, conds->field_end, curr);
|
|
else
|
|
cond1= newx Item_func_lt(thd, conds->field_end, curr);
|
|
break;
|
|
DBUG_ASSERT(!conds->start.item);
|
|
DBUG_ASSERT(!conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_AS_OF:
|
|
cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id0, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees(thd, conds->field_end, trx_id0);
|
|
DBUG_ASSERT(!conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_FROM_TO:
|
|
cond1= newx Item_func_trt_trx_sees(thd, trx_id1, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0);
|
|
cond3= newx Item_func_lt(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BETWEEN:
|
|
cond1= newx Item_func_trt_trx_sees_eq(thd, trx_id1, conds->field_start);
|
|
cond2= newx Item_func_trt_trx_sees_eq(thd, conds->field_end, trx_id0);
|
|
cond3= newx Item_func_le(thd, conds->start.item, conds->end.item);
|
|
break;
|
|
case SYSTEM_TIME_BEFORE:
|
|
cond1= newx Item_func_history(thd, conds->field_end);
|
|
cond2= newx Item_func_trt_trx_sees(thd, trx_id0, conds->field_end);
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
if (cond1)
|
|
{
|
|
cond1= and_items(thd, cond2, cond1);
|
|
cond1= and_items(thd, cond3, cond1);
|
|
}
|
|
return cond1;
|
|
}
|
|
|
|
static
|
|
bool skip_setup_conds(THD *thd)
|
|
{
|
|
return (!thd->stmt_arena->is_conventional()
|
|
&& !thd->stmt_arena->is_stmt_prepare_or_first_sp_execute())
|
|
|| thd->lex->is_view_context_analysis();
|
|
}
|
|
|
|
int SELECT_LEX::period_setup_conds(THD *thd, TABLE_LIST *tables)
|
|
{
|
|
DBUG_ENTER("SELECT_LEX::period_setup_conds");
|
|
const bool update_conds= !skip_setup_conds(thd);
|
|
|
|
Query_arena backup;
|
|
Query_arena *arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
DBUG_ASSERT(!tables->next_local && tables->table);
|
|
|
|
Item *result= NULL;
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (!table->table)
|
|
continue;
|
|
vers_select_conds_t &conds= table->period_conditions;
|
|
if (!table->table->s->period.name.streq(conds.name))
|
|
{
|
|
my_error(ER_PERIOD_NOT_FOUND, MYF(0), conds.name.str);
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (update_conds)
|
|
{
|
|
conds.period= &table->table->s->period;
|
|
result= and_items(thd, result,
|
|
period_get_condition(thd, table, this, &conds, true));
|
|
}
|
|
}
|
|
if (update_conds)
|
|
where= and_items(thd, where, result);
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
int SELECT_LEX::vers_setup_conds(THD *thd, TABLE_LIST *tables)
|
|
{
|
|
DBUG_ENTER("SELECT_LEX::vers_setup_conds");
|
|
const bool update_conds= !skip_setup_conds(thd);
|
|
|
|
if (!versioned_tables)
|
|
{
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (table->table && table->table->versioned())
|
|
versioned_tables++;
|
|
else if (table->vers_conditions.is_set() &&
|
|
(table->is_non_derived() || !table->vers_conditions.used))
|
|
{
|
|
my_error(ER_VERS_NOT_VERSIONED, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (versioned_tables == 0)
|
|
DBUG_RETURN(0);
|
|
|
|
/* For prepared statements we create items on statement arena,
|
|
because they must outlive execution phase for multiple executions. */
|
|
Query_arena_stmt on_stmt_arena(thd);
|
|
|
|
// find outer system_time
|
|
SELECT_LEX *outer_slex= outer_select();
|
|
TABLE_LIST* outer_table= NULL;
|
|
|
|
if (outer_slex)
|
|
{
|
|
TABLE_LIST* derived= master_unit()->derived;
|
|
// inner SELECT may not be a derived table (derived == NULL)
|
|
while (derived && outer_slex && !derived->vers_conditions.is_set())
|
|
{
|
|
derived= outer_slex->master_unit()->derived;
|
|
outer_slex= outer_slex->outer_select();
|
|
}
|
|
if (derived && outer_slex)
|
|
{
|
|
DBUG_ASSERT(derived->vers_conditions.is_set());
|
|
outer_table= derived;
|
|
}
|
|
}
|
|
|
|
bool is_select= false;
|
|
bool use_sysvar= false;
|
|
switch (thd->lex->sql_command)
|
|
{
|
|
case SQLCOM_SELECT:
|
|
use_sysvar= true;
|
|
/* fall through */
|
|
case SQLCOM_CREATE_TABLE:
|
|
case SQLCOM_INSERT_SELECT:
|
|
case SQLCOM_REPLACE_SELECT:
|
|
case SQLCOM_DELETE_MULTI:
|
|
case SQLCOM_UPDATE_MULTI:
|
|
is_select= true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
for (TABLE_LIST *table= tables; table; table= table->next_local)
|
|
{
|
|
if (!table->table || table->is_view() || !table->table->versioned())
|
|
continue;
|
|
|
|
vers_select_conds_t &vers_conditions= table->vers_conditions;
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
/*
|
|
if the history is stored in partitions, then partitions
|
|
themselves are not versioned
|
|
*/
|
|
if (table->partition_names && table->table->part_info->vers_info)
|
|
{
|
|
/* If the history is stored in partitions, then partitions
|
|
themselves are not versioned. */
|
|
if (vers_conditions.was_set())
|
|
{
|
|
my_error(ER_VERS_QUERY_IN_PARTITION, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
else if (!vers_conditions.is_set())
|
|
vers_conditions.set_all();
|
|
}
|
|
#endif
|
|
|
|
if (outer_table && !vers_conditions.is_set())
|
|
{
|
|
// propagate system_time from nearest outer SELECT_LEX
|
|
vers_conditions= outer_table->vers_conditions;
|
|
outer_table->vers_conditions.used= true;
|
|
}
|
|
|
|
// propagate system_time from sysvar
|
|
if (!vers_conditions.is_set() && use_sysvar)
|
|
{
|
|
if (vers_conditions.init_from_sysvar(thd))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (vers_conditions.is_set())
|
|
{
|
|
if (vers_conditions.was_set() &&
|
|
table->lock_type >= TL_FIRST_WRITE &&
|
|
!vers_conditions.delete_history)
|
|
{
|
|
my_error(ER_TABLE_NOT_LOCKED_FOR_WRITE, MYF(0), table->alias.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (vers_conditions.type == SYSTEM_TIME_ALL)
|
|
continue;
|
|
}
|
|
|
|
bool timestamps_only= table->table->versioned(VERS_TIMESTAMP);
|
|
|
|
if (vers_conditions.is_set() && vers_conditions.type != SYSTEM_TIME_HISTORY)
|
|
{
|
|
thd->where= THD_WHERE::FOR_SYSTEM_TIME;
|
|
/* TODO: do resolve fix_length_and_dec(), fix_fields(). This requires
|
|
storing vers_conditions as Item and make some magic related to
|
|
vers_system_time_t/VERS_TRX_ID at stage of fix_fields()
|
|
(this is large refactoring). */
|
|
if (vers_conditions.check_units(thd))
|
|
DBUG_RETURN(-1);
|
|
if (timestamps_only && (vers_conditions.start.unit == VERS_TRX_ID ||
|
|
vers_conditions.end.unit == VERS_TRX_ID))
|
|
{
|
|
my_error(ER_VERS_ENGINE_UNSUPPORTED, MYF(0), table->table_name.str);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (update_conds)
|
|
{
|
|
vers_conditions.period = &table->table->s->vers;
|
|
Item *cond= period_get_condition(thd, table, this, &vers_conditions,
|
|
timestamps_only);
|
|
if (is_select)
|
|
table->on_expr= and_items(thd, table->on_expr, cond);
|
|
else
|
|
{
|
|
if (join)
|
|
{
|
|
where= and_items(thd, join->conds, cond);
|
|
join->conds= where;
|
|
}
|
|
else
|
|
where= and_items(thd, where, cond);
|
|
table->where= and_items(thd, table->where, cond);
|
|
}
|
|
|
|
table->vers_conditions.set_all();
|
|
}
|
|
} // for (table= tables; ...)
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Check fields, find best join, do the select and output fields.
|
|
mysql_select assumes that all tables are already opened
|
|
*****************************************************************************/
|
|
|
|
/*
|
|
Check if we have a field reference. If yes, we have to use
|
|
mixed_implicit_grouping.
|
|
*/
|
|
|
|
static bool check_list_for_field(List<Item> *items)
|
|
{
|
|
List_iterator_fast <Item> select_it(*items);
|
|
Item *select_el;
|
|
|
|
while ((select_el= select_it++))
|
|
{
|
|
if (select_el->with_field())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool check_list_for_field(ORDER *order)
|
|
{
|
|
for (; order; order= order->next)
|
|
{
|
|
if (order->item[0]->with_field())
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Prepare of whole select (including sub queries in future).
|
|
|
|
@todo
|
|
Add check of calculation of GROUP functions and fields:
|
|
SELECT COUNT(*)+table.col1 from table1;
|
|
|
|
@retval
|
|
-1 on error
|
|
@retval
|
|
0 on success
|
|
*/
|
|
int
|
|
JOIN::prepare(TABLE_LIST *tables_init, COND *conds_init, uint og_num,
|
|
ORDER *order_init, bool skip_order_by,
|
|
ORDER *group_init, Item *having_init,
|
|
ORDER *proc_param_init, SELECT_LEX *select_lex_arg,
|
|
SELECT_LEX_UNIT *unit_arg)
|
|
{
|
|
DBUG_ENTER("JOIN::prepare");
|
|
|
|
// to prevent double initialization on EXPLAIN
|
|
if (optimization_state != JOIN::NOT_OPTIMIZED)
|
|
DBUG_RETURN(0);
|
|
|
|
conds= conds_init;
|
|
order= order_init;
|
|
group_list= group_init;
|
|
having= having_init;
|
|
proc_param= proc_param_init;
|
|
tables_list= tables_init;
|
|
select_lex= select_lex_arg;
|
|
DBUG_PRINT("info", ("select %p (%u) = JOIN %p",
|
|
select_lex, select_lex->select_number, this));
|
|
select_lex->join= this;
|
|
join_list= &select_lex->top_join_list;
|
|
union_part= unit_arg->is_unit_op();
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_prepare(thd, "join_preparation");
|
|
trace_prepare.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
// simple check that we got usable conds
|
|
dbug_print_item(conds);
|
|
|
|
/* Fix items that requires the join structure to exist */
|
|
fix_items_after_optimize(thd, select_lex);
|
|
|
|
/*
|
|
It is hack which force creating EXPLAIN object always on runt-time arena
|
|
(because very top JOIN::prepare executes always with runtime arena, but
|
|
constant subquery like (SELECT 'x') can be called with statement arena
|
|
during prepare phase of top SELECT).
|
|
*/
|
|
if (!(thd->lex->context_analysis_only & CONTEXT_ANALYSIS_ONLY_PREPARE))
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
|
|
if (select_lex->handle_derived(thd->lex, DT_PREPARE))
|
|
DBUG_RETURN(-1);
|
|
|
|
thd->lex->current_select->context_analysis_place= NO_MATTER;
|
|
thd->lex->current_select->is_item_list_lookup= 1;
|
|
/*
|
|
If we have already executed SELECT, then it have not sense to prevent
|
|
its table from update (see unique_table())
|
|
Affects only materialized derived tables.
|
|
*/
|
|
/* Check that all tables, fields, conds and order are ok */
|
|
if (!(select_options & OPTION_SETUP_TABLES_DONE) &&
|
|
setup_tables_and_check_access(thd, &select_lex->context, join_list,
|
|
tables_list, select_lex->leaf_tables,
|
|
FALSE, SELECT_ACL, SELECT_ACL, FALSE))
|
|
DBUG_RETURN(-1);
|
|
|
|
/* System Versioning: handle FOR SYSTEM_TIME clause. */
|
|
if (select_lex->vers_setup_conds(thd, tables_list) < 0)
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
mixed_implicit_grouping will be set to TRUE if the SELECT list
|
|
mixes elements with and without grouping, and there is no GROUP BY
|
|
clause.
|
|
Mixing non-aggregated fields with aggregate functions in the
|
|
SELECT list or HAVING is a MySQL extension that is allowed only if
|
|
the ONLY_FULL_GROUP_BY sql mode is not set.
|
|
*/
|
|
mixed_implicit_grouping= false;
|
|
if ((~thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY) &&
|
|
select_lex->with_sum_func && !group_list)
|
|
{
|
|
if (check_list_for_field(&fields_list) ||
|
|
check_list_for_field(order))
|
|
{
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
|
|
mixed_implicit_grouping= true; // mark for future
|
|
|
|
while (TABLE_LIST *tbl= li++)
|
|
{
|
|
/*
|
|
If the query uses implicit grouping where the select list
|
|
contains both aggregate functions and non-aggregate fields,
|
|
any non-aggregated field may produce a NULL value. Set all
|
|
fields of each table as nullable before semantic analysis to
|
|
take into account this change of nullability.
|
|
|
|
Note: this loop doesn't touch tables inside merged
|
|
semi-joins, because subquery-to-semijoin conversion has not
|
|
been done yet. This is intended.
|
|
*/
|
|
if (tbl->table)
|
|
tbl->table->maybe_null= 1;
|
|
}
|
|
}
|
|
}
|
|
table_count= select_lex->leaf_tables.elements;
|
|
|
|
uint real_og_num= og_num;
|
|
if (skip_order_by &&
|
|
select_lex != select_lex->master_unit()->global_parameters())
|
|
real_og_num+= select_lex->order_list.elements;
|
|
|
|
DBUG_ASSERT(select_lex->hidden_bit_fields == 0);
|
|
if (setup_wild(thd, tables_list, fields_list, &all_fields, select_lex, false))
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
If the select_lex is immediately contained within a derived table
|
|
AND this derived table is a CTE
|
|
WITH supplied column names
|
|
AND we have the correct number of elements in both lists
|
|
(mismatches found in mysql_derived_prepare/rename_columns_of_derived_unit)
|
|
THEN NOW is the time to take a copy of these item_names for
|
|
later restoration if required.
|
|
*/
|
|
TABLE_LIST *derived= select_lex->master_unit()->derived;
|
|
|
|
if (derived &&
|
|
derived->with &&
|
|
derived->with->column_list.elements &&
|
|
(derived->with->column_list.elements == select_lex->item_list.elements))
|
|
{
|
|
if (select_lex->save_item_list_names(thd))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
if (thd->lex->current_select->first_cond_optimization)
|
|
{
|
|
if ( conds && ! thd->lex->current_select->merged_into)
|
|
select_lex->select_n_reserved= conds->exists2in_reserved_items();
|
|
else
|
|
select_lex->select_n_reserved= 0;
|
|
}
|
|
|
|
if (select_lex->setup_ref_array(thd, real_og_num))
|
|
DBUG_RETURN(-1);
|
|
|
|
ref_ptrs= ref_ptr_array_slice(0);
|
|
|
|
enum_parsing_place save_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
thd->lex->current_select->context_analysis_place= SELECT_LIST;
|
|
|
|
{
|
|
List_iterator_fast<TABLE_LIST> it(select_lex->leaf_tables);
|
|
while (TABLE_LIST *tbl= it++)
|
|
{
|
|
if (tbl->table_function &&
|
|
tbl->table_function->setup(thd, tbl, select_lex_arg))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (setup_fields(thd, ref_ptrs, fields_list, select_lex->item_list_usage,
|
|
&all_fields, &select_lex->pre_fix, 1))
|
|
DBUG_RETURN(-1);
|
|
thd->lex->current_select->context_analysis_place= save_place;
|
|
|
|
if (setup_without_group(thd, ref_ptrs, tables_list,
|
|
select_lex->leaf_tables, fields_list,
|
|
all_fields, &conds, order, group_list,
|
|
select_lex->window_specs,
|
|
select_lex->window_funcs,
|
|
&hidden_group_fields))
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
Permanently remove redundant parts from the query if
|
|
1) This is a subquery
|
|
2) This is the first time this query is optimized (since the
|
|
transformation is permanent
|
|
3) Not normalizing a view. Removal should take place when a
|
|
query involving a view is optimized, not when the view
|
|
is created
|
|
*/
|
|
if (select_lex->master_unit()->item && // 1)
|
|
select_lex->first_cond_optimization && // 2)
|
|
!thd->lex->is_view_context_analysis()) // 3)
|
|
{
|
|
remove_redundant_subquery_clauses(select_lex);
|
|
}
|
|
|
|
/* Resolve the ORDER BY that was skipped, then remove it. */
|
|
if (skip_order_by && select_lex !=
|
|
select_lex->master_unit()->global_parameters())
|
|
{
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
thd->lex->allow_sum_func.set_bit(select_lex->nest_level);
|
|
thd->where= THD_WHERE::ORDER_CLAUSE;
|
|
for (ORDER *order= select_lex->order_list.first; order; order= order->next)
|
|
{
|
|
/* Don't add the order items to all fields. Just resolve them to ensure
|
|
the query is valid, we'll drop them immediately after. */
|
|
if (find_order_in_list(thd, ref_ptrs, tables_list, order,
|
|
fields_list, all_fields, false, false, false))
|
|
DBUG_RETURN(-1);
|
|
}
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
select_lex->order_list.empty();
|
|
}
|
|
|
|
if (having)
|
|
{
|
|
nesting_map save_allow_sum_func= thd->lex->allow_sum_func;
|
|
thd->where= THD_WHERE::HAVING_CLAUSE;
|
|
thd->lex->allow_sum_func.set_bit(select_lex_arg->nest_level);
|
|
select_lex->having_fix_field= 1;
|
|
/*
|
|
Wrap alone field in HAVING clause in case it will be outer field
|
|
of subquery which need persistent pointer on it, but having
|
|
could be changed by optimizer
|
|
*/
|
|
if (having->type() == Item::REF_ITEM &&
|
|
((Item_ref *)having)->ref_type() == Item_ref::REF)
|
|
wrap_ident(thd, &having);
|
|
bool having_fix_rc= having->fix_fields_if_needed_for_bool(thd, &having);
|
|
select_lex->having_fix_field= 0;
|
|
|
|
if (unlikely(having_fix_rc || thd->is_error()))
|
|
DBUG_RETURN(-1); /* purecov: inspected */
|
|
thd->lex->allow_sum_func= save_allow_sum_func;
|
|
|
|
if (having->with_window_func())
|
|
{
|
|
my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
/*
|
|
After setting up window functions, we may have discovered additional
|
|
used tables from the PARTITION BY and ORDER BY list. Update all items
|
|
that contain window functions.
|
|
*/
|
|
if (select_lex->have_window_funcs())
|
|
{
|
|
List_iterator_fast<Item> it(select_lex->item_list);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (item->with_window_func())
|
|
item->update_used_tables();
|
|
}
|
|
}
|
|
|
|
With_clause *with_clause=select_lex->get_with_clause();
|
|
if (with_clause && with_clause->prepare_unreferenced_elements(thd))
|
|
DBUG_RETURN(1);
|
|
|
|
With_element *with_elem= select_lex->get_with_element();
|
|
if (with_elem &&
|
|
select_lex->check_unrestricted_recursive(
|
|
thd->variables.only_standard_compliant_cte))
|
|
DBUG_RETURN(-1);
|
|
if (!(select_lex->changed_elements & TOUCHED_SEL_COND))
|
|
select_lex->check_subqueries_with_recursive_references();
|
|
|
|
int res= check_and_do_in_subquery_rewrites(this);
|
|
|
|
select_lex->fix_prepare_information(thd, &conds, &having);
|
|
|
|
if (res)
|
|
DBUG_RETURN(res);
|
|
|
|
if (order)
|
|
{
|
|
bool requires_sorting= FALSE;
|
|
/*
|
|
WITH TIES forces the results to be sorted, even if it's not sanely
|
|
sortable.
|
|
*/
|
|
if (select_lex->limit_params.with_ties)
|
|
requires_sorting= true;
|
|
|
|
/*
|
|
Go through each ORDER BY item and perform the following:
|
|
1. Detect if none of the items contain meaningful data, which means we
|
|
can drop the sorting altogether.
|
|
2. Split any columns with aggregation functions or window functions into
|
|
their base components and store them as separate fields.
|
|
(see split_sum_func) for more details.
|
|
*/
|
|
for (ORDER *ord= order; ord; ord= ord->next)
|
|
{
|
|
Item *item= *ord->item;
|
|
/*
|
|
Disregard sort order if there's only
|
|
zero length NOT NULL fields (e.g. {VAR}CHAR(0) NOT NULL") or
|
|
zero length NOT NULL string functions there.
|
|
Such tuples don't contain any data to sort.
|
|
*/
|
|
if (!requires_sorting &&
|
|
/* Not a zero length NOT NULL field */
|
|
((item->type() != Item::FIELD_ITEM ||
|
|
((Item_field *) item)->field->maybe_null() ||
|
|
((Item_field *) item)->field->sort_length()) &&
|
|
/* AND not a zero length NOT NULL string function. */
|
|
(item->type() != Item::FUNC_ITEM ||
|
|
item->maybe_null() ||
|
|
item->result_type() != STRING_RESULT ||
|
|
item->max_length)))
|
|
requires_sorting= TRUE;
|
|
|
|
if ((item->with_sum_func() && item->type() != Item::SUM_FUNC_ITEM) ||
|
|
item->with_window_func())
|
|
item->split_sum_func(thd, ref_ptrs, all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
/* Drop the ORDER BY clause if none of the columns contain any data that
|
|
can produce a meaningful sorted set. */
|
|
if (!requires_sorting)
|
|
order= NULL;
|
|
}
|
|
else
|
|
{
|
|
/* The current select does not have an ORDER BY */
|
|
if (select_lex->limit_params.with_ties)
|
|
{
|
|
my_error(ER_WITH_TIES_NEEDS_ORDER, MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
}
|
|
|
|
if (having && (having->with_sum_func() || having->with_rownum_func()))
|
|
having->split_sum_func2(thd, ref_ptrs, all_fields,
|
|
&having, SPLIT_SUM_SKIP_REGISTERED);
|
|
if (select_lex->inner_sum_func_list)
|
|
{
|
|
Item_sum *end=select_lex->inner_sum_func_list;
|
|
Item_sum *item_sum= end;
|
|
do
|
|
{
|
|
item_sum= item_sum->next;
|
|
item_sum->split_sum_func2(thd, ref_ptrs,
|
|
all_fields, item_sum->ref_by, 0);
|
|
} while (item_sum != end);
|
|
}
|
|
|
|
if (select_lex->inner_refs_list.elements &&
|
|
fix_inner_refs(thd, all_fields, select_lex, ref_ptrs))
|
|
DBUG_RETURN(-1);
|
|
|
|
if (group_list)
|
|
{
|
|
/*
|
|
Because HEAP tables can't index BIT fields we need to use an
|
|
additional hidden field for grouping because later it will be
|
|
converted to a LONG field. Original field will remain of the
|
|
BIT type and will be returned to a client.
|
|
*/
|
|
for (ORDER *ord= group_list; ord; ord= ord->next)
|
|
{
|
|
if ((*ord->item)->type() == Item::FIELD_ITEM &&
|
|
(*ord->item)->field_type() == MYSQL_TYPE_BIT)
|
|
{
|
|
Item_field *field= new (thd->mem_root) Item_field(thd, *(Item_field**)ord->item);
|
|
if (!field)
|
|
DBUG_RETURN(-1);
|
|
int el= all_fields.elements;
|
|
ref_ptrs[el]= field;
|
|
all_fields.push_front(field, thd->mem_root);
|
|
ord->item= &ref_ptrs[el];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Check if there are references to un-aggregated columns when computing
|
|
aggregate functions with implicit grouping (there is no GROUP BY).
|
|
*/
|
|
if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY && !group_list &&
|
|
!(select_lex->master_unit()->item &&
|
|
select_lex->master_unit()->item->is_in_predicate() &&
|
|
select_lex->master_unit()->item->get_IN_subquery()->
|
|
test_set_strategy(SUBS_MAXMIN_INJECTED)) &&
|
|
select_lex->non_agg_field_used() &&
|
|
select_lex->agg_func_used())
|
|
{
|
|
my_message(ER_MIX_OF_GROUP_FUNC_AND_FIELDS,
|
|
ER_THD(thd, ER_MIX_OF_GROUP_FUNC_AND_FIELDS), MYF(0));
|
|
DBUG_RETURN(-1);
|
|
}
|
|
{
|
|
/* Caclulate the number of groups */
|
|
send_group_parts= 0;
|
|
for (ORDER *group_tmp= group_list ; group_tmp ; group_tmp= group_tmp->next)
|
|
send_group_parts++;
|
|
}
|
|
|
|
procedure= setup_procedure(thd, proc_param, result, fields_list, &error);
|
|
if (unlikely(error))
|
|
goto err; /* purecov: inspected */
|
|
if (procedure)
|
|
{
|
|
if (setup_new_fields(thd, fields_list, all_fields,
|
|
procedure->param_fields))
|
|
goto err; /* purecov: inspected */
|
|
if (procedure->group)
|
|
{
|
|
if (!test_if_subpart(procedure->group,group_list))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_DIFF_GROUPS_PROC, ER_THD(thd, ER_DIFF_GROUPS_PROC),
|
|
MYF(0)); /* purecov: inspected */
|
|
goto err; /* purecov: inspected */
|
|
}
|
|
}
|
|
if (order && (procedure->flags & PROC_NO_SORT))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_ORDER_WITH_PROC, ER_THD(thd, ER_ORDER_WITH_PROC),
|
|
MYF(0)); /* purecov: inspected */
|
|
goto err; /* purecov: inspected */
|
|
}
|
|
if (thd->lex->derived_tables)
|
|
{
|
|
/*
|
|
Queries with derived tables and PROCEDURE are not allowed.
|
|
Many of such queries are disallowed grammatically, but there
|
|
are still some complex cases:
|
|
SELECT 1 FROM (SELECT 1) a PROCEDURE ANALYSE()
|
|
*/
|
|
my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE",
|
|
thd->lex->derived_tables & DERIVED_VIEW ?
|
|
"view" : "subquery");
|
|
goto err;
|
|
}
|
|
if (thd->lex->sql_command != SQLCOM_SELECT)
|
|
{
|
|
// EXPLAIN SELECT * FROM t1 PROCEDURE ANALYSE()
|
|
my_error(ER_WRONG_USAGE, MYF(0), "PROCEDURE", "non-SELECT");
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_wrapper(thd);
|
|
opt_trace_print_expanded_query(thd, select_lex, &trace_wrapper);
|
|
}
|
|
|
|
if (!procedure && result && result->prepare(fields_list, unit_arg))
|
|
goto err; /* purecov: inspected */
|
|
|
|
select_lex->where_cond_after_prepare= conds;
|
|
|
|
unit= unit_arg;
|
|
if (prepare_stage2())
|
|
goto err;
|
|
prepared= true;
|
|
|
|
DBUG_RETURN(0); // All OK
|
|
|
|
err:
|
|
delete procedure; /* purecov: inspected */
|
|
procedure= 0;
|
|
DBUG_RETURN(-1); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
/**
|
|
Second phase of prepare where we collect some statistic.
|
|
|
|
@details
|
|
We made this part separate to be able recalculate some statistic after
|
|
transforming subquery on optimization phase.
|
|
*/
|
|
|
|
bool JOIN::prepare_stage2()
|
|
{
|
|
bool res= TRUE;
|
|
DBUG_ENTER("JOIN::prepare_stage2");
|
|
|
|
/* Init join struct */
|
|
count_field_types(select_lex, &tmp_table_param, all_fields, 0);
|
|
this->group= group_list != 0;
|
|
|
|
if (tmp_table_param.sum_func_count && !group_list)
|
|
{
|
|
implicit_grouping= TRUE;
|
|
// Result will contain zero or one row - ordering is meaningless
|
|
order= NULL;
|
|
}
|
|
|
|
#ifdef RESTRICTED_GROUP
|
|
if (implicit_grouping)
|
|
{
|
|
my_message(ER_WRONG_SUM_SELECT,ER_THD(thd, ER_WRONG_SUM_SELECT),MYF(0));
|
|
goto err;
|
|
}
|
|
#endif
|
|
if (select_lex->olap == ROLLUP_TYPE && rollup_init())
|
|
goto err;
|
|
if (alloc_func_list() ||
|
|
make_sum_func_list(all_fields, fields_list, false))
|
|
goto err;
|
|
|
|
res= FALSE;
|
|
err:
|
|
DBUG_RETURN(res); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
bool JOIN::build_explain()
|
|
{
|
|
DBUG_ENTER("JOIN::build_explain");
|
|
have_query_plan= QEP_AVAILABLE;
|
|
|
|
/*
|
|
explain data must be created on the Explain_query::mem_root. Because it's
|
|
just a memroot, not an arena, explain data must not contain any Items
|
|
*/
|
|
MEM_ROOT *old_mem_root= thd->mem_root;
|
|
Item *old_free_list __attribute__((unused))= thd->free_list;
|
|
thd->mem_root= thd->lex->explain->mem_root;
|
|
bool res= save_explain_data(thd->lex->explain, false /* can overwrite */,
|
|
need_tmp,
|
|
!skip_sort_order && !no_order && (order || group_list),
|
|
select_distinct);
|
|
thd->mem_root= old_mem_root;
|
|
DBUG_ASSERT(thd->free_list == old_free_list); // no Items were created
|
|
if (res)
|
|
DBUG_RETURN(1);
|
|
uint select_nr= select_lex->select_number;
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
for (uint i= 0; i < aggr_tables; i++, curr_tab++)
|
|
{
|
|
if (select_nr == FAKE_SELECT_LEX_ID)
|
|
{
|
|
/* this is a fake_select_lex of a union */
|
|
select_nr= select_lex->master_unit()->first_select()->select_number;
|
|
curr_tab->tracker= thd->lex->explain->get_union(select_nr)->
|
|
get_tmptable_read_tracker();
|
|
}
|
|
else if (select_nr < INT_MAX)
|
|
{
|
|
Explain_select *tmp= thd->lex->explain->get_select(select_nr);
|
|
if (tmp)
|
|
curr_tab->tracker= tmp->get_using_temporary_read_tracker();
|
|
}
|
|
}
|
|
if (is_in_subquery())
|
|
{
|
|
Item_in_subselect *subq= unit->item->get_IN_subquery();
|
|
subq->init_subq_materialization_tracker(thd);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::optimize()
|
|
{
|
|
int res= 0;
|
|
join_optimization_state init_state= optimization_state;
|
|
if (select_lex->pushdown_select)
|
|
{
|
|
if (optimization_state == JOIN::OPTIMIZATION_DONE)
|
|
return 0;
|
|
DBUG_ASSERT(optimization_state == JOIN::NOT_OPTIMIZED);
|
|
|
|
// Do same as JOIN::optimize_inner does:
|
|
fields= &select_lex->item_list;
|
|
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
/* Prepare to execute the query pushed into a foreign engine */
|
|
res= select_lex->pushdown_select->prepare();
|
|
}
|
|
with_two_phase_optimization= false;
|
|
}
|
|
else if (optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
|
|
res= optimize_stage2();
|
|
else
|
|
{
|
|
// to prevent double initialization on EXPLAIN
|
|
if (optimization_state != JOIN::NOT_OPTIMIZED)
|
|
return FALSE;
|
|
optimization_state= JOIN::OPTIMIZATION_IN_PROGRESS;
|
|
res= optimize_inner();
|
|
}
|
|
if (!with_two_phase_optimization ||
|
|
init_state == JOIN::OPTIMIZATION_PHASE_1_DONE)
|
|
{
|
|
if (!res && have_query_plan != QEP_DELETED)
|
|
res= build_explain();
|
|
optimization_state= JOIN::OPTIMIZATION_DONE;
|
|
}
|
|
|
|
/*
|
|
Store the cost of this query into a user variable
|
|
TODO: calculate a correct cost for a query with subqueries and UNIONs.
|
|
*/
|
|
if (select_lex->select_number == 1)
|
|
thd->status_var.last_query_cost= best_read;
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Create range filters objects needed in execution for all join tables
|
|
|
|
@details
|
|
For each join table from the chosen execution plan such that a range filter
|
|
is used when joining this table the function creates a Rowid_filter object
|
|
for this range filter. In order to do this the function first constructs
|
|
a quick select to scan the range for this range filter. Then it creates
|
|
a container for the range filter and finally constructs a Range_rowid_filter
|
|
object a pointer to which is set in the field JOIN_TAB::rowid_filter of
|
|
the joined table.
|
|
|
|
@retval false Ok
|
|
@retval true Error, query should abort
|
|
*/
|
|
|
|
bool JOIN::make_range_rowid_filters()
|
|
{
|
|
DBUG_ENTER("make_range_rowid_filters");
|
|
|
|
/*
|
|
Do not build range filters with detected impossible WHERE.
|
|
Anyway conditions cannot be used anymore to extract ranges for filters.
|
|
*/
|
|
if (const_table_map != found_const_table_map)
|
|
DBUG_RETURN(0);
|
|
|
|
JOIN_TAB *tab;
|
|
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (!tab->range_rowid_filter_info)
|
|
continue;
|
|
|
|
DBUG_ASSERT(!(tab->ref.key >= 0 &&
|
|
tab->ref.key == (int) tab->range_rowid_filter_info->get_key_no()));
|
|
DBUG_ASSERT(!(tab->ref.key == -1 && tab->quick &&
|
|
tab->quick->index == tab->range_rowid_filter_info->get_key_no()));
|
|
|
|
int err;
|
|
SQL_SELECT *sel= NULL;
|
|
Rowid_filter_container *filter_container= NULL;
|
|
Item **sargable_cond= get_sargable_cond(this, tab->table);
|
|
sel= make_select(tab->table, const_table_map, const_table_map,
|
|
*sargable_cond, (SORT_INFO*) 0, 1, &err);
|
|
if (!sel)
|
|
continue;
|
|
|
|
key_map filter_map;
|
|
filter_map.clear_all();
|
|
filter_map.set_bit(tab->range_rowid_filter_info->get_key_no());
|
|
filter_map.merge(tab->table->with_impossible_ranges);
|
|
quick_select_return rc;
|
|
/*
|
|
EQ_FUNC and EQUAL_FUNC already sent unusable key notes (if any)
|
|
during update_ref_and_keys(). Have only other functions raise notes
|
|
from can_optimize_scalar_range().
|
|
*/
|
|
rc= sel->test_quick_select(thd, filter_map, (table_map) 0,
|
|
(ha_rows) HA_POS_ERROR, true, false, true,
|
|
true, Item_func::BITMAP_EXCEPT_ANY_EQUALITY);
|
|
if (rc == SQL_SELECT::ERROR || thd->is_error() || thd->check_killed())
|
|
{
|
|
delete sel;
|
|
DBUG_RETURN(true); /* Fatal error */
|
|
}
|
|
/*
|
|
If SUBS_IN_TO_EXISTS strtrategy is chosen for the subquery then
|
|
additional conditions are injected into WHERE/ON/HAVING and it may
|
|
happen that the call of test_quick_select() discovers impossible range.
|
|
*/
|
|
if (rc == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
const_table_map|= tab->table->map;
|
|
goto no_filter;
|
|
}
|
|
DBUG_ASSERT(sel->quick);
|
|
filter_container=
|
|
tab->range_rowid_filter_info->create_container();
|
|
if (filter_container)
|
|
{
|
|
tab->rowid_filter=
|
|
new (thd->mem_root) Range_rowid_filter(tab->table,
|
|
tab->range_rowid_filter_info,
|
|
filter_container, sel);
|
|
if (tab->rowid_filter)
|
|
{
|
|
tab->need_to_build_rowid_filter= true;
|
|
continue;
|
|
}
|
|
}
|
|
no_filter:
|
|
delete sel;
|
|
}
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Allocate memory the rowid containers of the used the range filters
|
|
|
|
@details
|
|
For each join table from the chosen execution plan such that a range filter
|
|
is used when joining this table the function allocate memory for the
|
|
rowid container employed by the filter. On success it lets the table engine
|
|
know that what rowid filter will be used when accessing the table rows.
|
|
|
|
@retval
|
|
false OK
|
|
true Error, query should abort
|
|
*/
|
|
|
|
bool
|
|
JOIN::init_range_rowid_filters()
|
|
{
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("init_range_rowid_filters");
|
|
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->need_to_build_rowid_filter= false; // Safety
|
|
if (!tab->rowid_filter)
|
|
continue;
|
|
if (tab->rowid_filter->get_container()->alloc())
|
|
{
|
|
tab->clear_range_rowid_filter();
|
|
continue;
|
|
}
|
|
tab->table->file->rowid_filter_push(tab->rowid_filter);
|
|
tab->need_to_build_rowid_filter= true;
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
global select optimisation.
|
|
|
|
@note
|
|
error code saved in field 'error'
|
|
|
|
@retval
|
|
0 success
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
int
|
|
JOIN::optimize_inner()
|
|
{
|
|
DBUG_ENTER("JOIN::optimize_inner");
|
|
subq_exit_fl= false;
|
|
best_read= 0.0;
|
|
|
|
DEBUG_SYNC(thd, "before_join_optimize");
|
|
THD_STAGE_INFO(thd, stage_optimizing);
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= 0;
|
|
#endif
|
|
|
|
// rownum used somewhere in query, no limits and it is derived
|
|
if (unlikely(thd->lex->with_rownum &&
|
|
select_lex->first_cond_optimization &&
|
|
select_lex->master_unit()->derived))
|
|
optimize_upper_rownum_func();
|
|
|
|
do_send_rows = (unit->lim.get_select_limit()) ? 1 : 0;
|
|
|
|
set_allowed_join_cache_types();
|
|
need_distinct= TRUE;
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_prepare(thd, "join_optimization");
|
|
trace_prepare.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
/*
|
|
Needed in case optimizer short-cuts,
|
|
set properly in make_aggr_tables_info()
|
|
*/
|
|
fields= &select_lex->item_list;
|
|
|
|
if (select_lex->first_cond_optimization)
|
|
{
|
|
//Do it only for the first execution
|
|
/* Merge all mergeable derived tables/views in this SELECT. */
|
|
if (select_lex->handle_derived(thd->lex, DT_MERGE))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (select_lex->first_cond_optimization &&
|
|
transform_in_predicates_into_in_subq(thd))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
Update used tables after all handling derived table procedures
|
|
After this call, select_lex->select_list_tables contains the table
|
|
bits of all items in the select list (but not bits from WHERE clause or
|
|
other items).
|
|
*/
|
|
select_lex->update_used_tables();
|
|
|
|
/*
|
|
In fact we transform underlying subqueries after their 'prepare' phase and
|
|
before 'optimize' from upper query 'optimize' to allow semijoin
|
|
conversion happened (which done in the same way.
|
|
*/
|
|
if (select_lex->first_cond_optimization &&
|
|
conds && conds->walk(&Item::exists2in_processor, 0, thd))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
TODO
|
|
make view to decide if it is possible to write to WHERE directly or make Semi-Joins able to process ON condition if it is possible
|
|
for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
|
|
{
|
|
if (tbl->on_expr &&
|
|
tbl->on_expr->walk(&Item::exists2in_processor, 0, thd))
|
|
DBUG_RETURN(1);
|
|
}
|
|
*/
|
|
|
|
if (transform_max_min_subquery())
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
|
|
if (select_lex->first_cond_optimization)
|
|
{
|
|
/* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
|
|
if (convert_join_subqueries_to_semijoins(this))
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
/* dump_TABLE_LIST_graph(select_lex, select_lex->leaf_tables); */
|
|
select_lex->update_used_tables();
|
|
}
|
|
|
|
eval_select_list_used_tables();
|
|
|
|
if (select_lex->options & OPTION_SCHEMA_TABLE &&
|
|
optimize_schema_tables_memory_usage(select_lex->leaf_tables))
|
|
DBUG_RETURN(1);
|
|
|
|
if (setup_ftfuncs(select_lex)) /* should be after having->fix_fields */
|
|
DBUG_RETURN(-1);
|
|
|
|
row_limit= ((select_distinct || order || group_list) ? HA_POS_ERROR :
|
|
unit->lim.get_select_limit());
|
|
/* select_limit is used to decide if we are likely to scan the whole table */
|
|
select_limit= unit->lim.get_select_limit();
|
|
if (having || (select_options & OPTION_FOUND_ROWS))
|
|
select_limit= HA_POS_ERROR;
|
|
#ifdef HAVE_REF_TO_FIELDS // Not done yet
|
|
/* Add HAVING to WHERE if possible */
|
|
if (having && !group_list && !sum_func_count)
|
|
{
|
|
if (!conds)
|
|
{
|
|
conds= having;
|
|
having= 0;
|
|
}
|
|
else if ((conds=new (thd->mem_root) Item_cond_and(conds,having)))
|
|
{
|
|
/*
|
|
Item_cond_and can't be fixed after creation, so we do not check
|
|
conds->fixed()
|
|
*/
|
|
conds->fix_fields(thd, &conds);
|
|
conds->change_ref_to_fields(thd, tables_list);
|
|
conds->top_level_item();
|
|
having= 0;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
SELECT_LEX *sel= select_lex;
|
|
if (sel->first_cond_optimization)
|
|
{
|
|
/*
|
|
The following code will allocate the new items in a permanent
|
|
MEMROOT for prepared statements and stored procedures.
|
|
|
|
But first we need to ensure that thd->lex->explain is allocated
|
|
in the execution arena
|
|
*/
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
|
|
Query_arena *arena, backup;
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
sel->first_cond_optimization= 0;
|
|
|
|
/* Convert all outer joins to inner joins if possible */
|
|
conds= simplify_joins(this, join_list, conds, TRUE, FALSE);
|
|
|
|
add_table_function_dependencies(join_list, table_map(-1));
|
|
|
|
if (thd->is_error() ||
|
|
(!select_lex->leaf_tables_saved && select_lex->save_leaf_tables(thd)))
|
|
{
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
DBUG_RETURN(1);
|
|
}
|
|
select_lex->leaf_tables_saved= true;
|
|
build_bitmap_for_nested_joins(join_list, 0);
|
|
|
|
sel->prep_where= conds ? conds->copy_andor_structure(thd) : 0;
|
|
|
|
sel->where= conds;
|
|
|
|
select_lex->update_used_tables();
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
}
|
|
|
|
if (!allowed_top_level_tables)
|
|
calc_allowed_top_level_tables(select_lex);
|
|
|
|
if (optimize_constant_subqueries())
|
|
DBUG_RETURN(1);
|
|
|
|
if (conds && conds->with_subquery())
|
|
(void) conds->walk(&Item::cleanup_is_expensive_cache_processor,
|
|
0, (void *) 0);
|
|
if (having && having->with_subquery())
|
|
(void) having->walk(&Item::cleanup_is_expensive_cache_processor,
|
|
0, (void *) 0);
|
|
|
|
List<Item> eq_list;
|
|
|
|
if (setup_degenerate_jtbm_semi_joins(this, join_list, eq_list))
|
|
DBUG_RETURN(1);
|
|
|
|
if (eq_list.elements != 0)
|
|
{
|
|
Item *new_cond;
|
|
|
|
if (eq_list.elements == 1)
|
|
new_cond= eq_list.pop();
|
|
else
|
|
new_cond= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
|
|
if (new_cond &&
|
|
((new_cond->fix_fields(thd, &new_cond) ||
|
|
!(conds= and_items(thd, conds, new_cond)) ||
|
|
conds->fix_fields(thd, &conds))))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
eq_list.empty();
|
|
|
|
if (select_lex->cond_pushed_into_where)
|
|
{
|
|
conds= and_conds(thd, conds, select_lex->cond_pushed_into_where);
|
|
if (conds && conds->fix_fields(thd, &conds))
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (select_lex->cond_pushed_into_having)
|
|
{
|
|
having= and_conds(thd, having, select_lex->cond_pushed_into_having);
|
|
if (having)
|
|
{
|
|
select_lex->having_fix_field= 1;
|
|
select_lex->having_fix_field_for_pushed_cond= 1;
|
|
if (having->fix_fields(thd, &having))
|
|
DBUG_RETURN(1);
|
|
select_lex->having_fix_field= 0;
|
|
select_lex->having_fix_field_for_pushed_cond= 0;
|
|
}
|
|
}
|
|
|
|
bool ignore_on_expr= false;
|
|
/*
|
|
PS/SP note: on_expr of versioned table can not be reallocated
|
|
(see build_equal_items() below) because it can be not rebuilt
|
|
at second invocation.
|
|
*/
|
|
if (!thd->stmt_arena->is_conventional() &&
|
|
thd->mem_root != thd->stmt_arena->mem_root)
|
|
for (TABLE_LIST *tbl= tables_list; tbl; tbl= tbl->next_local)
|
|
if (tbl->table && tbl->on_expr && tbl->table->versioned())
|
|
{
|
|
ignore_on_expr= true;
|
|
break;
|
|
}
|
|
|
|
transform_in_predicates_into_equalities(thd);
|
|
if (thd->lex->are_date_funcs_used())
|
|
transform_date_conds_into_sargable();
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_SARGABLE_CASEFOLD))
|
|
{
|
|
transform_all_conds_and_on_exprs(
|
|
thd, &Item::varchar_upper_cmp_transformer);
|
|
}
|
|
|
|
conds= optimize_cond(this, conds, join_list, ignore_on_expr,
|
|
&cond_value, &cond_equal, OPT_LINK_EQUAL_FIELDS);
|
|
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from optimize_cond"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (select_lex->with_rownum && ! order && ! group_list &&
|
|
!select_distinct && conds && select_lex == unit->global_parameters() &&
|
|
select_lex->first_rownum_optimization)
|
|
{
|
|
optimize_rownum(thd, unit, conds);
|
|
select_lex->first_rownum_optimization= false;
|
|
}
|
|
|
|
having= optimize_cond(this, having, join_list, TRUE,
|
|
&having_value, &having_equal);
|
|
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from optimize_cond"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/* Do not push into WHERE from HAVING if cond_value == Item::COND_FALSE */
|
|
|
|
if (thd->lex->sql_command == SQLCOM_SELECT &&
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING) &&
|
|
cond_value != Item::COND_FALSE)
|
|
{
|
|
having=
|
|
select_lex->pushdown_from_having_into_where(thd, having);
|
|
if (select_lex->attach_to_conds.elements != 0)
|
|
{
|
|
conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal,
|
|
select_lex->attach_to_conds,
|
|
&cond_value);
|
|
sel->attach_to_conds.empty();
|
|
Json_writer_object wrapper(thd);
|
|
Json_writer_object pushd(thd, "condition_pushdown_from_having");
|
|
pushd.add("conds", conds);
|
|
pushd.add("having", having);
|
|
}
|
|
}
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_SUBQUERY))
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
if (tbl->jtbm_subselect)
|
|
{
|
|
if (tbl->jtbm_subselect->pushdown_cond_for_in_subquery(thd, conds))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
|
|
if (setup_jtbm_semi_joins(this, join_list, eq_list))
|
|
DBUG_RETURN(1);
|
|
|
|
if (eq_list.elements != 0)
|
|
{
|
|
conds= and_new_conditions_to_optimized_cond(thd, conds, &cond_equal,
|
|
eq_list, &cond_value);
|
|
|
|
if (!conds &&
|
|
cond_value != Item::COND_FALSE && cond_value != Item::COND_TRUE)
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_COND_PUSHDOWN_FOR_DERIVED))
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
{
|
|
/*
|
|
Do not push conditions from where into materialized inner tables
|
|
of outer joins: this is not valid.
|
|
*/
|
|
if (tbl->is_materialized_derived())
|
|
{
|
|
JOIN *join= tbl->get_unit()->first_select()->join;
|
|
if (join &&
|
|
join->optimization_state == JOIN::OPTIMIZATION_PHASE_1_DONE &&
|
|
join->with_two_phase_optimization)
|
|
continue;
|
|
/*
|
|
Do not push conditions from where into materialized inner tables
|
|
of outer joins: this is not valid.
|
|
*/
|
|
if (!tbl->is_inner_table_of_outer_join())
|
|
{
|
|
if (pushdown_cond_for_derived(thd, conds, tbl))
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (mysql_handle_single_derived(thd->lex, tbl, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Run optimize phase for all derived tables/views used in this SELECT. */
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
{
|
|
if (select_lex->where)
|
|
{
|
|
select_lex->cond_value= cond_value;
|
|
if (sel->where != conds && cond_value == Item::COND_OK)
|
|
thd->change_item_tree(&sel->where, conds);
|
|
}
|
|
if (select_lex->having)
|
|
{
|
|
select_lex->having_value= having_value;
|
|
if (sel->having != having && having_value == Item::COND_OK)
|
|
thd->change_item_tree(&sel->having, having);
|
|
}
|
|
if (cond_value == Item::COND_FALSE || having_value == Item::COND_FALSE ||
|
|
(!unit->lim.get_select_limit() &&
|
|
!(select_options & OPTION_FOUND_ROWS)))
|
|
{ /* Impossible cond */
|
|
if (unit->lim.get_select_limit())
|
|
{
|
|
DBUG_PRINT("info", (having_value == Item::COND_FALSE ?
|
|
"Impossible HAVING" : "Impossible WHERE"));
|
|
zero_result_cause= having_value == Item::COND_FALSE ?
|
|
"Impossible HAVING" : "Impossible WHERE";
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info", ("Zero limit"));
|
|
zero_result_cause= "Zero limit";
|
|
}
|
|
table_count= top_join_tab_count= 0;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
error= 0;
|
|
subq_exit_fl= true;
|
|
goto setup_subq_exit;
|
|
}
|
|
}
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator_fast<TABLE_LIST> li(select_lex->leaf_tables);
|
|
while ((tbl= li++))
|
|
{
|
|
Item **prune_cond= get_sargable_cond(this, tbl->table);
|
|
tbl->table->all_partitions_pruned_away=
|
|
prune_partitions(thd, tbl->table, *prune_cond);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
Try to optimize count(*), MY_MIN() and MY_MAX() to const fields if
|
|
there is implicit grouping (aggregate functions but no
|
|
group_list). In this case, the result set shall only contain one
|
|
row.
|
|
*/
|
|
if (tables_list && implicit_grouping)
|
|
{
|
|
int res;
|
|
/*
|
|
opt_sum_query() returns HA_ERR_KEY_NOT_FOUND if no rows match
|
|
to the WHERE conditions,
|
|
or 1 if all items were resolved (optimized away),
|
|
or 0, or an error number HA_ERR_...
|
|
|
|
If all items were resolved by opt_sum_query, there is no need to
|
|
open any tables.
|
|
*/
|
|
|
|
/*
|
|
The following resetting and restoring of sum_funcs is needed to
|
|
go around a bug in spider where it assumes that
|
|
make_sum_func_list() has not been called yet and do logical
|
|
choices based on this if special handling of min/max functions should
|
|
be done. We disable this special handling while we are trying to find
|
|
out if we can replace MIN/MAX values with constants.
|
|
*/
|
|
Item_sum **save_func_sums= sum_funcs, *tmp_sum_funcs= 0;
|
|
sum_funcs= &tmp_sum_funcs;
|
|
res= opt_sum_query(thd, select_lex->leaf_tables, all_fields, conds);
|
|
sum_funcs= save_func_sums;
|
|
|
|
if (res)
|
|
{
|
|
DBUG_ASSERT(res >= 0);
|
|
if (res == HA_ERR_KEY_NOT_FOUND)
|
|
{
|
|
DBUG_PRINT("info",("No matching min/max row"));
|
|
zero_result_cause= "No matching min/max row";
|
|
table_count= top_join_tab_count= 0;
|
|
error=0;
|
|
subq_exit_fl= true;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
if (res > 1)
|
|
{
|
|
error= res;
|
|
DBUG_PRINT("error",("Error from opt_sum_query"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
DBUG_PRINT("info",("Select tables optimized away"));
|
|
if (!select_lex->have_window_funcs())
|
|
zero_result_cause= "Select tables optimized away";
|
|
tables_list= 0; // All tables resolved
|
|
select_lex->min_max_opt_list.empty();
|
|
const_tables= top_join_tab_count= table_count;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
/*
|
|
Extract all table-independent conditions and replace the WHERE
|
|
clause with them. All other conditions were computed by opt_sum_query
|
|
and the MIN/MAX/COUNT function(s) have been replaced by constants,
|
|
so there is no need to compute the whole WHERE clause again.
|
|
Notice that make_cond_for_table() will always succeed to remove all
|
|
computed conditions, because opt_sum_query() is applicable only to
|
|
conjunctions.
|
|
Preserve conditions for EXPLAIN.
|
|
*/
|
|
if (conds && !(thd->lex->describe & DESCRIBE_EXTENDED))
|
|
{
|
|
COND *table_independent_conds=
|
|
make_cond_for_table(thd, conds, PSEUDO_TABLE_BITS, 0, -1,
|
|
FALSE, FALSE);
|
|
if (!table_independent_conds && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
DBUG_EXECUTE("where",
|
|
print_where(table_independent_conds,
|
|
"where after opt_sum_query()",
|
|
QT_ORDINARY););
|
|
conds= table_independent_conds;
|
|
}
|
|
}
|
|
}
|
|
if (!tables_list)
|
|
{
|
|
DBUG_PRINT("info",("No tables"));
|
|
error= 0;
|
|
subq_exit_fl= true;
|
|
goto setup_subq_exit;
|
|
}
|
|
error= -1; // Error is sent to client
|
|
/* get_sort_by_table() call used to be here: */
|
|
MEM_UNDEFINED(&sort_by_table, sizeof(sort_by_table));
|
|
|
|
/*
|
|
We have to remove constants and duplicates from group_list before
|
|
calling make_join_statistics() as this may call get_best_group_min_max()
|
|
which needs a simplified group_list.
|
|
*/
|
|
if (group_list && table_count == 1)
|
|
{
|
|
group_list= remove_const(this, group_list, conds,
|
|
rollup.state == ROLLUP::STATE_NONE,
|
|
&simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (!group_list)
|
|
{
|
|
/* The output has only one row */
|
|
order=0;
|
|
simple_order=1;
|
|
group_optimized_away= 1;
|
|
select_distinct=0;
|
|
}
|
|
}
|
|
|
|
/* Calculate how to do the join */
|
|
THD_STAGE_INFO(thd, stage_statistics);
|
|
result->prepare_to_read_rows();
|
|
if (unlikely(make_join_statistics(this, select_lex->leaf_tables,
|
|
&keyuse)) ||
|
|
unlikely(thd->is_error()))
|
|
{
|
|
DBUG_PRINT("error",("Error: make_join_statistics() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If a splittable materialized derived/view dt_i is embedded into
|
|
into another splittable materialized derived/view dt_o then
|
|
splitting plans for dt_i and dt_o are evaluated independently.
|
|
First the optimizer looks for the best splitting plan sp_i for dt_i.
|
|
It happens when non-splitting plans for dt_o are evaluated.
|
|
The cost of sp_i is considered as the cost of materialization of dt_i
|
|
when evaluating any splitting plan for dt_o.
|
|
*/
|
|
if (fix_all_splittings_in_plan())
|
|
DBUG_RETURN(1);
|
|
|
|
setup_subq_exit:
|
|
with_two_phase_optimization= check_two_phase_optimization(thd);
|
|
if (with_two_phase_optimization)
|
|
optimization_state= JOIN::OPTIMIZATION_PHASE_1_DONE;
|
|
else
|
|
{
|
|
if (optimize_stage2())
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::optimize_stage2()
|
|
{
|
|
ulonglong select_opts_for_readinfo;
|
|
uint no_jbuf_after;
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("JOIN::optimize_stage2");
|
|
|
|
if (subq_exit_fl)
|
|
goto setup_subq_exit;
|
|
|
|
if (unlikely(thd->check_killed()))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Generate an execution plan from the found optimal join order. */
|
|
if (get_best_combination())
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_range_rowid_filters())
|
|
DBUG_RETURN(1);
|
|
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
We have to call drop_unused_derived_keys() even if we don't have any
|
|
generated keys (enabled with OPTIMIZER_SWITCH_DERIVED_WITH_KEYS)
|
|
as we may still have unique constraints we have to get rid of.
|
|
*/
|
|
drop_unused_derived_keys();
|
|
|
|
if (rollup.state != ROLLUP::STATE_NONE)
|
|
{
|
|
if (rollup_process_const_fields())
|
|
{
|
|
DBUG_PRINT("error", ("Error: rollup_process_fields() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Remove distinct if only const tables */
|
|
select_distinct= select_distinct && (const_tables != table_count);
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_preparing);
|
|
if (result->initialize_tables(this))
|
|
{
|
|
DBUG_PRINT("error",("Error: initialize_tables() failed"));
|
|
DBUG_RETURN(1); // error == -1
|
|
}
|
|
if (const_table_map != found_const_table_map &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
// There is at least one empty const table
|
|
zero_result_cause= "no matching row in const table";
|
|
DBUG_PRINT("error",("Error: %s", zero_result_cause));
|
|
error= 0;
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
if (!(thd->variables.option_bits & OPTION_BIG_SELECTS) &&
|
|
join_record_count > (double) thd->variables.max_join_size &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{ /* purecov: inspected */
|
|
my_message(ER_TOO_BIG_SELECT, ER_THD(thd, ER_TOO_BIG_SELECT), MYF(0));
|
|
error= -1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (const_tables && !thd->locked_tables_mode &&
|
|
!(select_options & SELECT_NO_UNLOCK))
|
|
{
|
|
/*
|
|
Unlock all tables, except sequences, as accessing these may still
|
|
require table updates. It's safe to ignore result code as all
|
|
tables where opened for read only.
|
|
*/
|
|
(void) mysql_unlock_some_tables(thd, table, const_tables,
|
|
GET_LOCK_SKIP_SEQUENCES);
|
|
}
|
|
if (!conds && outer_join)
|
|
{
|
|
/* Handle the case where we have an OUTER JOIN without a WHERE */
|
|
conds= (Item*) Item_true;
|
|
}
|
|
|
|
if (impossible_where)
|
|
{
|
|
zero_result_cause=
|
|
"Impossible WHERE noticed after reading const tables";
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
|
|
select= make_select(*table, const_table_map,
|
|
const_table_map, conds, (SORT_INFO*) 0, 1, &error);
|
|
if (unlikely(error))
|
|
{ /* purecov: inspected */
|
|
error= -1; /* purecov: inspected */
|
|
DBUG_PRINT("error",("Error: make_select() failed"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
reset_nj_counters(this, join_list);
|
|
if (make_outerjoin_info(this))
|
|
{
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
Among the equal fields belonging to the same multiple equality
|
|
choose the one that is to be retrieved first and substitute
|
|
all references to these in where condition for a reference for
|
|
the selected field.
|
|
*/
|
|
if (conds)
|
|
{
|
|
conds= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, conds,
|
|
cond_equal, map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
conds->update_used_tables();
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_condition(thd, "WHERE", "substitute_best_equal", conds);
|
|
|
|
DBUG_EXECUTE("where",
|
|
print_where(conds,
|
|
"after substitute_best_equal",
|
|
QT_ORDINARY););
|
|
}
|
|
if (having)
|
|
{
|
|
having= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB, having,
|
|
having_equal, map2table, false);
|
|
if (thd->is_error())
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (having)
|
|
{
|
|
having->update_used_tables();
|
|
if (unlikely(thd->trace_started()))
|
|
trace_condition(thd, "HAVING", "substitute_best_equal", having);
|
|
}
|
|
|
|
DBUG_EXECUTE("having",
|
|
print_where(having,
|
|
"after substitute_best_equal",
|
|
QT_ORDINARY););
|
|
}
|
|
|
|
/*
|
|
Perform the optimization on fields evaluation mentioned above
|
|
for all on expressions.
|
|
*/
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (*tab->on_expr_ref)
|
|
{
|
|
*tab->on_expr_ref= substitute_for_best_equal_field(thd, NO_PARTICULAR_TAB,
|
|
*tab->on_expr_ref,
|
|
tab->cond_equal,
|
|
map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_PRINT("error",("Error from substitute_for_best_equal"));
|
|
DBUG_RETURN(1);
|
|
}
|
|
(*tab->on_expr_ref)->update_used_tables();
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_condition(thd, "ON expr", "substitute_best_equal",
|
|
(*tab->on_expr_ref), tab->table->alias.c_ptr());
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Perform the optimization on fields evaliation mentioned above
|
|
for all used ref items.
|
|
*/
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
uint key_copy_index=0;
|
|
for (uint i=0; i < tab->ref.key_parts; i++)
|
|
{
|
|
Item **ref_item_ptr= tab->ref.items+i;
|
|
Item *ref_item= *ref_item_ptr;
|
|
if (!ref_item->used_tables() && !(select_options & SELECT_DESCRIBE))
|
|
continue;
|
|
COND_EQUAL *equals= cond_equal;
|
|
JOIN_TAB *first_inner= tab->first_inner;
|
|
while (equals)
|
|
{
|
|
ref_item= substitute_for_best_equal_field(thd, tab, ref_item,
|
|
equals, map2table, true);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(1);
|
|
|
|
if (first_inner)
|
|
{
|
|
equals= first_inner->cond_equal;
|
|
first_inner= first_inner->first_upper;
|
|
}
|
|
else
|
|
equals= 0;
|
|
}
|
|
ref_item->update_used_tables();
|
|
if (*ref_item_ptr != ref_item)
|
|
{
|
|
*ref_item_ptr= ref_item;
|
|
Item *item= ref_item->real_item();
|
|
store_key *key_copy= tab->ref.key_copy[key_copy_index];
|
|
if (key_copy->type() == store_key::FIELD_STORE_KEY)
|
|
{
|
|
if (item->basic_const_item())
|
|
{
|
|
/* It is constant propagated here */
|
|
tab->ref.key_copy[key_copy_index]=
|
|
new store_key_const_item(*tab->ref.key_copy[key_copy_index],
|
|
item);
|
|
}
|
|
else if (item->const_item())
|
|
{
|
|
tab->ref.key_copy[key_copy_index]=
|
|
new store_key_item(*tab->ref.key_copy[key_copy_index],
|
|
item, TRUE);
|
|
}
|
|
else
|
|
{
|
|
store_key_field *field_copy= ((store_key_field *)key_copy);
|
|
DBUG_ASSERT(item->type() == Item::FIELD_ITEM);
|
|
field_copy->change_source_field((Item_field *) item);
|
|
}
|
|
}
|
|
}
|
|
key_copy_index++;
|
|
}
|
|
}
|
|
|
|
if (conds && const_table_map != found_const_table_map &&
|
|
(select_options & SELECT_DESCRIBE))
|
|
conds= (Item*) Item_false;
|
|
|
|
/* Cache constant expressions in WHERE, HAVING, ON clauses. */
|
|
cache_const_exprs();
|
|
|
|
if (setup_semijoin_loosescan(this))
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_join_select(this, select, conds))
|
|
{
|
|
if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
zero_result_cause=
|
|
"Impossible WHERE noticed after reading const tables";
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
handle_implicit_grouping_with_window_funcs();
|
|
goto setup_subq_exit;
|
|
}
|
|
|
|
error= -1; /* if goto err */
|
|
|
|
/* Optimize distinct away if possible */
|
|
{
|
|
ORDER *org_order= order;
|
|
order=remove_const(this, order,conds,1, &simple_order);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If we are using ORDER BY NULL or ORDER BY const_expression,
|
|
return result in any order (even if we are using a GROUP BY)
|
|
*/
|
|
if (!order && org_order)
|
|
skip_sort_order= 1;
|
|
}
|
|
|
|
/*
|
|
For FETCH ... WITH TIES save how many items order by had, after we've
|
|
removed constant items that have no relevance on the final sorting.
|
|
*/
|
|
if (unit->lim.is_with_ties())
|
|
{
|
|
DBUG_ASSERT(with_ties_order_count == 0);
|
|
for (ORDER *it= order; it; it= it->next)
|
|
with_ties_order_count+= 1;
|
|
}
|
|
|
|
|
|
/*
|
|
Check if we can optimize away GROUP BY/DISTINCT.
|
|
We can do that if there are no aggregate functions, the
|
|
fields in DISTINCT clause (if present) and/or columns in GROUP BY
|
|
(if present) contain direct references to all key parts of
|
|
an unique index (in whatever order) and if the key parts of the
|
|
unique index cannot contain NULLs.
|
|
Note that the unique keys for DISTINCT and GROUP BY should not
|
|
be the same (as long as they are unique).
|
|
|
|
The FROM clause must contain a single non-constant table.
|
|
*/
|
|
if (table_count - const_tables == 1 && (group || select_distinct) &&
|
|
!tmp_table_param.sum_func_count &&
|
|
(!join_tab[const_tables].select ||
|
|
!join_tab[const_tables].select->quick ||
|
|
join_tab[const_tables].select->quick->get_type() !=
|
|
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX) &&
|
|
!select_lex->have_window_funcs())
|
|
{
|
|
if (group && rollup.state == ROLLUP::STATE_NONE &&
|
|
list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_order_list,
|
|
(void *) group_list))
|
|
{
|
|
/*
|
|
We have found that grouping can be removed since groups correspond to
|
|
only one row anyway, but we still have to guarantee correct result
|
|
order. The line below effectively rewrites the query from GROUP BY
|
|
<fields> to ORDER BY <fields>. There are three exceptions:
|
|
- if skip_sort_order is set (see above), then we can simply skip
|
|
GROUP BY;
|
|
- if we are in a subquery, we don't have to maintain order unless there
|
|
is a limit clause in the subquery.
|
|
- we can only rewrite ORDER BY if the ORDER BY fields are 'compatible'
|
|
with the GROUP BY ones, i.e. either one is a prefix of another.
|
|
We only check if the ORDER BY is a prefix of GROUP BY. In this case
|
|
test_if_subpart() copies the ASC/DESC attributes from the original
|
|
ORDER BY fields.
|
|
If GROUP BY is a prefix of ORDER BY, then it is safe to leave
|
|
'order' as is.
|
|
*/
|
|
if (!order || test_if_subpart(group_list, order))
|
|
{
|
|
if (skip_sort_order ||
|
|
(select_lex->master_unit()->item && select_limit == HA_POS_ERROR)) // This is a subquery
|
|
order= NULL;
|
|
else
|
|
order= group_list;
|
|
}
|
|
/*
|
|
If we have an IGNORE INDEX FOR GROUP BY(fields) clause, this must be
|
|
rewritten to IGNORE INDEX FOR ORDER BY(fields).
|
|
*/
|
|
join_tab->table->keys_in_use_for_order_by=
|
|
join_tab->table->keys_in_use_for_group_by;
|
|
group_list= 0;
|
|
group= 0;
|
|
}
|
|
if (select_distinct &&
|
|
list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_item_list,
|
|
(void *) &fields_list))
|
|
{
|
|
select_distinct= 0;
|
|
}
|
|
}
|
|
if (group || tmp_table_param.sum_func_count)
|
|
{
|
|
if (! hidden_group_fields && rollup.state == ROLLUP::STATE_NONE
|
|
&& !select_lex->have_window_funcs())
|
|
select_distinct=0;
|
|
}
|
|
else if (select_distinct && table_count - const_tables == 1 &&
|
|
rollup.state == ROLLUP::STATE_NONE &&
|
|
!select_lex->have_window_funcs())
|
|
{
|
|
/*
|
|
We are only using one table. In this case we change DISTINCT to a
|
|
GROUP BY query if:
|
|
- The GROUP BY can be done through indexes (no sort) and the ORDER
|
|
BY only uses selected fields.
|
|
(In this case we can later optimize away GROUP BY and ORDER BY)
|
|
- We are scanning the whole table without LIMIT
|
|
This can happen if:
|
|
- We are using CALC_FOUND_ROWS
|
|
- We are using an ORDER BY that can't be optimized away.
|
|
|
|
We don't want to use this optimization when we are using LIMIT
|
|
because in this case we can just create a temporary table that
|
|
holds LIMIT rows and stop when this table is full.
|
|
*/
|
|
bool all_order_fields_used;
|
|
|
|
tab= &join_tab[const_tables];
|
|
if (order)
|
|
{
|
|
bool fatal_err;
|
|
skip_sort_order=
|
|
test_if_skip_sort_order(tab, order, select_limit,
|
|
true, // no_changes
|
|
&tab->table->keys_in_use_for_order_by,
|
|
&fatal_err);
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
if ((group_list=create_distinct_group(thd, select_lex->ref_pointer_array,
|
|
order, fields_list, all_fields,
|
|
&all_order_fields_used)))
|
|
{
|
|
bool fatal_err= 0;
|
|
const bool skip_group=
|
|
skip_sort_order &&
|
|
test_if_skip_sort_order(tab, group_list, select_limit,
|
|
true, // no_changes
|
|
&tab->table->keys_in_use_for_group_by,
|
|
&fatal_err);
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
|
|
count_field_types(select_lex, &tmp_table_param, all_fields, 0);
|
|
if ((skip_group && all_order_fields_used) ||
|
|
select_limit == HA_POS_ERROR ||
|
|
(order && !skip_sort_order))
|
|
{
|
|
/* Change DISTINCT to GROUP BY */
|
|
select_distinct= 0;
|
|
no_order= !order;
|
|
if (all_order_fields_used)
|
|
{
|
|
if (order && skip_sort_order)
|
|
{
|
|
/*
|
|
Force MySQL to read the table in sorted order to get result in
|
|
ORDER BY order.
|
|
*/
|
|
tmp_table_param.quick_group=0;
|
|
}
|
|
order=0;
|
|
}
|
|
group=1; // For end_write_group
|
|
}
|
|
else
|
|
group_list= 0;
|
|
}
|
|
else if (thd->is_error()) // End of memory
|
|
DBUG_RETURN(1);
|
|
}
|
|
simple_group= rollup.state == ROLLUP::STATE_NONE;
|
|
if (group)
|
|
{
|
|
/*
|
|
Update simple_group and group_list as we now have more information, like
|
|
which tables or columns are constant.
|
|
*/
|
|
group_list= remove_const(this, group_list, conds,
|
|
rollup.state == ROLLUP::STATE_NONE,
|
|
&simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
if (!group_list)
|
|
{
|
|
/* The output has only one row */
|
|
order=0;
|
|
simple_order=1;
|
|
select_distinct= 0;
|
|
group_optimized_away= 1;
|
|
}
|
|
}
|
|
|
|
calc_group_buffer(this, group_list);
|
|
send_group_parts= tmp_table_param.group_parts; /* Save org parts */
|
|
if (procedure && procedure->group)
|
|
{
|
|
group_list= procedure->group= remove_const(this, procedure->group, conds,
|
|
1, &simple_group);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= 1;
|
|
DBUG_RETURN(1);
|
|
}
|
|
calc_group_buffer(this, group_list);
|
|
}
|
|
|
|
/*
|
|
We can ignore ORDER BY if it's a prefix of the GROUP BY list
|
|
(as MariaDB is by default sorting on GROUP BY) or
|
|
if there is no GROUP BY and aggregate functions are used
|
|
(as the result will only contain one row).
|
|
*/
|
|
if (order && (test_if_subpart(group_list, order) ||
|
|
(!group_list && tmp_table_param.sum_func_count)))
|
|
order=0;
|
|
|
|
// Can't use sort on head table if using join buffering
|
|
if (full_join || hash_join)
|
|
{
|
|
TABLE *stable= (sort_by_table == (TABLE *) 1 ?
|
|
join_tab[const_tables].table : sort_by_table);
|
|
/*
|
|
FORCE INDEX FOR ORDER BY can be used to prevent join buffering when
|
|
sorting on the first table.
|
|
*/
|
|
if (!stable || (!stable->force_index_order &&
|
|
!map2table[stable->tablenr]->keep_current_rowid))
|
|
{
|
|
if (group_list)
|
|
simple_group= 0;
|
|
if (order)
|
|
simple_order= 0;
|
|
}
|
|
}
|
|
|
|
need_tmp= test_if_need_tmp_table();
|
|
|
|
/*
|
|
If window functions are present then we can't have simple_order set to
|
|
TRUE as the window function needs a temp table for computation.
|
|
ORDER BY is computed after the window function computation is done, so
|
|
the sort will be done on the temp table.
|
|
*/
|
|
if (select_lex->have_window_funcs())
|
|
simple_order= FALSE;
|
|
|
|
/*
|
|
If the hint FORCE INDEX FOR ORDER BY/GROUP BY is used for the table
|
|
whose columns are required to be returned in a sorted order, then
|
|
the proper value for no_jbuf_after should be yielded by a call to
|
|
the make_join_orderinfo function.
|
|
Yet the current implementation of FORCE INDEX hints does not
|
|
allow us to do it in a clean manner.
|
|
*/
|
|
no_jbuf_after= 1 ? table_count : make_join_orderinfo(this);
|
|
|
|
// Don't use join buffering when we use MATCH
|
|
select_opts_for_readinfo=
|
|
(select_options & (SELECT_DESCRIBE | SELECT_NO_JOIN_CACHE)) |
|
|
(select_lex->ftfunc_list->elements ? SELECT_NO_JOIN_CACHE : 0);
|
|
|
|
if (select_lex->options & OPTION_SCHEMA_TABLE &&
|
|
optimize_schema_tables_reads(this))
|
|
DBUG_RETURN(1);
|
|
|
|
if (make_join_readinfo(this, select_opts_for_readinfo, no_jbuf_after))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Perform FULLTEXT search before all regular searches */
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
It's necessary to check const part of HAVING cond as
|
|
there is a chance that some cond parts may become
|
|
const items after make_join_statistics(for example
|
|
when Item is a reference to cost table field from
|
|
outer join).
|
|
This check is performed only for those conditions
|
|
which do not use aggregate functions. In such case
|
|
temporary table may not be used and const condition
|
|
elements may be lost during further having
|
|
condition transformation.
|
|
*/
|
|
if (having && const_table_map && !having->with_sum_func())
|
|
{
|
|
having->update_used_tables();
|
|
if (having->const_item() && !having->is_expensive())
|
|
{
|
|
if (!having->val_int())
|
|
{
|
|
having= Item_false;
|
|
zero_result_cause= "Impossible HAVING noticed after reading const tables";
|
|
error= 0;
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
goto setup_subq_exit;
|
|
}
|
|
having= Item_true;
|
|
}
|
|
}
|
|
|
|
if (optimize_unflattened_subqueries())
|
|
DBUG_RETURN(1);
|
|
|
|
int res;
|
|
if ((res= rewrite_to_index_subquery_engine(this)) != -1)
|
|
DBUG_RETURN(res);
|
|
if (setup_subquery_caches())
|
|
DBUG_RETURN(-1);
|
|
|
|
/*
|
|
Need to tell handlers that to play it safe, it should fetch all
|
|
columns of the primary key of the tables: this is because MySQL may
|
|
build row pointers for the rows, and for all columns of the primary key
|
|
the read set has not necessarily been set by the server code.
|
|
*/
|
|
if (need_tmp || select_distinct || group_list || order)
|
|
{
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
if (!(table[i]->map & const_table_map))
|
|
table[i]->prepare_for_position();
|
|
}
|
|
}
|
|
|
|
DBUG_EXECUTE("info",TEST_join(this););
|
|
|
|
if (!only_const_tables())
|
|
{
|
|
JOIN_TAB *tab= &join_tab[const_tables];
|
|
|
|
if (order && !need_tmp)
|
|
{
|
|
/*
|
|
Force using of tmp table if sorting by a SP or UDF function due to
|
|
their expensive and probably non-deterministic nature.
|
|
*/
|
|
for (ORDER *tmp_order= order; tmp_order ; tmp_order=tmp_order->next)
|
|
{
|
|
Item *item= *tmp_order->item;
|
|
if (item->is_expensive())
|
|
{
|
|
/* Force tmp table without sort */
|
|
need_tmp=1; simple_order=simple_group=0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Because filesort always does a full table scan or a quick range scan
|
|
we must add the removed reference to the select for the table.
|
|
We only need to do this when we have a simple_order or simple_group
|
|
as in other cases the join is done before the sort.
|
|
*/
|
|
if ((order || group_list) &&
|
|
tab->type != JT_ALL &&
|
|
tab->type != JT_RANGE &&
|
|
tab->type != JT_NEXT &&
|
|
tab->type != JT_FT &&
|
|
tab->type != JT_REF_OR_NULL &&
|
|
((order && simple_order) || (group_list && simple_group)))
|
|
{
|
|
if (add_ref_to_table_cond(thd,tab)) {
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
/*
|
|
Investigate whether we may use an ordered index as part of either
|
|
DISTINCT, GROUP BY or ORDER BY execution. An ordered index may be
|
|
used for only the first of any of these terms to be executed. This
|
|
is reflected in the order which we check for test_if_skip_sort_order()
|
|
below. However we do not check for DISTINCT here, as it would have
|
|
been transformed to a GROUP BY at this stage if it is a candidate for
|
|
ordered index optimization.
|
|
If a decision was made to use an ordered index, the availability
|
|
of such an access path is stored in 'ordered_index_usage' for later
|
|
use by 'execute' or 'explain'
|
|
*/
|
|
DBUG_ASSERT(ordered_index_usage == ordered_index_void);
|
|
|
|
if (group_list) // GROUP BY honoured first
|
|
// (DISTINCT was rewritten to GROUP BY if skippable)
|
|
{
|
|
/*
|
|
When there is SQL_BIG_RESULT do not sort using index for GROUP BY,
|
|
and thus force sorting on disk unless a group min-max optimization
|
|
is going to be used as it is applied now only for one table queries
|
|
with covering indexes.
|
|
*/
|
|
if (!(select_options & SELECT_BIG_RESULT) ||
|
|
(tab->select &&
|
|
tab->select->quick &&
|
|
tab->select->quick->get_type() ==
|
|
QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX))
|
|
{
|
|
if (simple_group && // GROUP BY is possibly skippable
|
|
!select_distinct) // .. if not preceded by a DISTINCT
|
|
{
|
|
/*
|
|
Calculate a possible 'limit' of table rows for 'GROUP BY':
|
|
A specified 'LIMIT' is relative to the final resultset.
|
|
'need_tmp' implies that there will be more postprocessing
|
|
so the specified 'limit' should not be enforced yet.
|
|
*/
|
|
bool fatal_err;
|
|
const ha_rows limit = need_tmp ? HA_POS_ERROR : select_limit;
|
|
if (test_if_skip_sort_order(tab, group_list, limit, false,
|
|
&tab->table->keys_in_use_for_group_by,
|
|
&fatal_err))
|
|
{
|
|
ordered_index_usage= ordered_index_group_by;
|
|
}
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
If we are going to use semi-join LooseScan, it will depend
|
|
on the selected index scan to be used. If index is not used
|
|
for the GROUP BY, we risk that sorting is put on the LooseScan
|
|
table. In order to avoid this, force use of temporary table.
|
|
TODO: Explain the quick_group part of the test below.
|
|
*/
|
|
if ((ordered_index_usage != ordered_index_group_by) &&
|
|
((tmp_table_param.quick_group && !procedure) ||
|
|
(tab->emb_sj_nest &&
|
|
best_positions[const_tables].sj_strategy == SJ_OPT_LOOSE_SCAN)))
|
|
{
|
|
need_tmp=1;
|
|
simple_order= simple_group= false; // Force tmp table without sort
|
|
}
|
|
}
|
|
}
|
|
else if (order && // ORDER BY wo/ preceding GROUP BY
|
|
(simple_order || skip_sort_order)) // which is possibly skippable
|
|
{
|
|
bool fatal_err;
|
|
if (test_if_skip_sort_order(tab, order, select_limit, false,
|
|
&tab->table->keys_in_use_for_order_by,
|
|
&fatal_err))
|
|
{
|
|
ordered_index_usage= ordered_index_order_by;
|
|
}
|
|
if (fatal_err)
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
|
|
if (having)
|
|
having_is_correlated= MY_TEST(having->used_tables() & OUTER_REF_TABLE_BIT);
|
|
tmp_having= having;
|
|
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
/*
|
|
The loose index scan access method guarantees that all grouping or
|
|
duplicate row elimination (for distinct) is already performed
|
|
during data retrieval, and that all MIN/MAX functions are already
|
|
computed for each group. Thus all MIN/MAX functions should be
|
|
treated as regular functions, and there is no need to perform
|
|
grouping in the main execution loop.
|
|
Notice that currently loose index scan is applicable only for
|
|
single table queries, thus it is sufficient to test only the first
|
|
join_tab element of the plan for its access method.
|
|
*/
|
|
if (join_tab->is_using_loose_index_scan())
|
|
{
|
|
tmp_table_param.precomputed_group_by= TRUE;
|
|
if (join_tab->is_using_agg_loose_index_scan())
|
|
{
|
|
need_distinct= FALSE;
|
|
tmp_table_param.precomputed_group_by= FALSE;
|
|
}
|
|
}
|
|
|
|
if (make_aggr_tables_info())
|
|
DBUG_RETURN(1);
|
|
|
|
init_join_cache_and_keyread();
|
|
|
|
if (init_range_rowid_filters())
|
|
DBUG_RETURN(1);
|
|
|
|
error= 0;
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
goto derived_exit;
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
setup_subq_exit:
|
|
/* Choose an execution strategy for this JOIN. */
|
|
if (!tables_list || !table_count)
|
|
{
|
|
choose_tableless_subquery_plan();
|
|
|
|
/* The output has atmost one row */
|
|
if (group_list)
|
|
{
|
|
group_list= NULL;
|
|
group_optimized_away= 1;
|
|
rollup.state= ROLLUP::STATE_NONE;
|
|
}
|
|
order= NULL;
|
|
simple_order= TRUE;
|
|
select_distinct= FALSE;
|
|
|
|
if (select_lex->have_window_funcs())
|
|
{
|
|
if (!(join_tab= thd->alloc<JOIN_TAB>(1)))
|
|
DBUG_RETURN(1);
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= 1;
|
|
#endif
|
|
need_tmp= 1;
|
|
}
|
|
if (make_aggr_tables_info())
|
|
DBUG_RETURN(1);
|
|
|
|
/*
|
|
It could be that we've only done optimization stage 1 for
|
|
some of the derived tables, and never did stage 2.
|
|
Do it now, otherwise Explain data structure will not be complete.
|
|
*/
|
|
if (select_lex->handle_derived(thd->lex, DT_OPTIMIZE))
|
|
DBUG_RETURN(1);
|
|
}
|
|
/*
|
|
Even with zero matching rows, subqueries in the HAVING clause may
|
|
need to be evaluated if there are aggregate functions in the query.
|
|
*/
|
|
if (optimize_unflattened_subqueries())
|
|
DBUG_RETURN(1);
|
|
error= 0;
|
|
|
|
derived_exit:
|
|
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Add having condition as a where clause condition of the given temp table.
|
|
|
|
@param tab Table to which having condition is added.
|
|
|
|
@returns false if success, true if error.
|
|
*/
|
|
|
|
bool JOIN::add_having_as_table_cond(JOIN_TAB *tab)
|
|
{
|
|
tmp_having->update_used_tables();
|
|
table_map used_tables= tab->table->map | OUTER_REF_TABLE_BIT;
|
|
|
|
/* If tmp table is not used then consider conditions of const table also */
|
|
if (!need_tmp)
|
|
used_tables|= const_table_map;
|
|
|
|
DBUG_ENTER("JOIN::add_having_as_table_cond");
|
|
|
|
Item* sort_table_cond= make_cond_for_table(thd, tmp_having, used_tables,
|
|
(table_map) 0, 0, false, false);
|
|
if (sort_table_cond)
|
|
{
|
|
if (!tab->select)
|
|
{
|
|
if (!(tab->select= new SQL_SELECT))
|
|
DBUG_RETURN(true);
|
|
tab->select->head= tab->table;
|
|
}
|
|
if (!tab->select->cond)
|
|
tab->select->cond= sort_table_cond;
|
|
else
|
|
{
|
|
if (!(tab->select->cond=
|
|
new (thd->mem_root) Item_cond_and(thd,
|
|
tab->select->cond,
|
|
sort_table_cond)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
if (sort_table_cond->type() == Item::COND_ITEM)
|
|
sort_table_cond= sort_table_cond->copy_andor_structure(thd);
|
|
if (!(tab->pre_idx_push_select_cond=
|
|
new (thd->mem_root) Item_cond_and(thd,
|
|
tab->pre_idx_push_select_cond,
|
|
sort_table_cond)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (tab->select->cond)
|
|
tab->select->cond->fix_fields_if_needed(thd, 0);
|
|
if (tab->pre_idx_push_select_cond)
|
|
tab->pre_idx_push_select_cond->fix_fields_if_needed(thd, 0);
|
|
tab->select->pre_idx_push_select_cond= tab->pre_idx_push_select_cond;
|
|
tab->set_select_cond(tab->select->cond, __LINE__);
|
|
tab->select_cond->top_level_item();
|
|
DBUG_EXECUTE("where",print_where(tab->select->cond,
|
|
"select and having",
|
|
QT_ORDINARY););
|
|
|
|
having= make_cond_for_table(thd, tmp_having, ~ (table_map) 0,
|
|
~used_tables, 0, false, false);
|
|
if (!having && thd->is_error())
|
|
DBUG_RETURN(true);
|
|
DBUG_EXECUTE("where",
|
|
print_where(having, "having after sort", QT_ORDINARY););
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
bool JOIN::add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *table_fields)
|
|
{
|
|
/*
|
|
this will not walk into semi-join materialization nests but this is ok
|
|
because we will never need to save current rowids for those.
|
|
*/
|
|
for (JOIN_TAB *tab=join_tab; tab < cur; tab++)
|
|
{
|
|
if (!tab->keep_current_rowid)
|
|
continue;
|
|
Item *item= new (thd->mem_root) Item_temptable_rowid(tab->table);
|
|
item->fix_fields(thd, 0);
|
|
table_fields->push_back(item, thd->mem_root);
|
|
cur->tmp_table_param->func_count++;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Set info for aggregation tables
|
|
|
|
@details
|
|
This function finalizes execution plan by taking following actions:
|
|
.) aggregation temporary tables are created, but not instantiated
|
|
(this is done during execution).
|
|
JOIN_TABs for aggregation tables are set appropriately
|
|
(see JOIN::create_postjoin_aggr_table).
|
|
.) prepare fields lists (fields, all_fields, ref_pointer_array slices) for
|
|
each required stage of execution. These fields lists are set for
|
|
working tables' tabs and for the tab of last table in the join.
|
|
.) info for sorting/grouping/dups removal is prepared and saved in
|
|
appropriate tabs. Here is an example:
|
|
|
|
@returns
|
|
false - Ok
|
|
true - Error
|
|
*/
|
|
|
|
bool JOIN::make_aggr_tables_info()
|
|
{
|
|
List<Item> *curr_all_fields= &all_fields;
|
|
List<Item> *curr_fields_list= &fields_list;
|
|
// Avoid UB (applying .. offset to nullptr) when join_tab is nullptr
|
|
JOIN_TAB *curr_tab= join_tab ? join_tab + const_tables : nullptr;
|
|
TABLE *exec_tmp_table= NULL;
|
|
bool distinct= false;
|
|
const bool has_group_by= this->group;
|
|
bool keep_row_order= thd->lex->with_rownum && (group_list || order);
|
|
bool is_having_added_as_table_cond= false;
|
|
DBUG_ENTER("JOIN::make_aggr_tables_info");
|
|
|
|
|
|
sort_and_group_aggr_tab= NULL;
|
|
|
|
if (group_optimized_away)
|
|
implicit_grouping= true;
|
|
|
|
bool implicit_grouping_with_window_funcs= implicit_grouping &&
|
|
select_lex->have_window_funcs();
|
|
bool implicit_grouping_without_tables= implicit_grouping &&
|
|
!tables_list;
|
|
|
|
/*
|
|
Setup last table to provide fields and all_fields lists to the next
|
|
node in the plan.
|
|
*/
|
|
if (join_tab && top_join_tab_count && tables_list)
|
|
{
|
|
join_tab[top_join_tab_count - 1].fields= &fields_list;
|
|
join_tab[top_join_tab_count - 1].all_fields= &all_fields;
|
|
}
|
|
|
|
/*
|
|
All optimization is done. Check if we can use the storage engines
|
|
group by handler to evaluate the group by.
|
|
Some storage engines, like spider can also do joins, group by and
|
|
distinct in the engine, so we do this for all queries, not only
|
|
GROUP BY queries.
|
|
*/
|
|
if (tables_list && top_join_tab_count && !only_const_tables() && !procedure)
|
|
{
|
|
/*
|
|
At the moment we only support push down for queries where
|
|
all tables are in the same storage engine
|
|
*/
|
|
TABLE_LIST *tbl= tables_list;
|
|
handlerton *ht= tbl && tbl->table ? tbl->table->file->partition_ht() : 0;
|
|
for (tbl= tbl->next_local; ht && tbl; tbl= tbl->next_local)
|
|
{
|
|
if (!tbl->table || tbl->table->file->partition_ht() != ht)
|
|
ht= 0;
|
|
}
|
|
|
|
if (ht && ht->create_group_by)
|
|
{
|
|
/*
|
|
Check if the storage engine can intercept the query
|
|
|
|
JOIN::optimize_stage2() might convert DISTINCT into GROUP BY and then
|
|
optimize away GROUP BY (group_list). In such a case, we need to notify
|
|
a storage engine supporting a group by handler of the existence of the
|
|
original DISTINCT. Thus, we set select_distinct || group_optimized_away
|
|
to Query::distinct.
|
|
*/
|
|
Query query= {&all_fields, select_distinct || group_optimized_away,
|
|
tables_list, conds,
|
|
group_list, order ? order : group_list, having,
|
|
&select_lex->master_unit()->lim};
|
|
group_by_handler *gbh= ht->create_group_by(thd, &query);
|
|
|
|
if (gbh)
|
|
{
|
|
if (!(pushdown_query= new (thd->mem_root) Pushdown_query(select_lex,
|
|
gbh)))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
We must store rows in the tmp table if we need to do an ORDER BY
|
|
or DISTINCT and the storage handler can't handle it.
|
|
*/
|
|
need_tmp= query.order_by || query.group_by || query.distinct;
|
|
distinct= query.distinct;
|
|
keep_row_order= query.order_by || query.group_by;
|
|
|
|
order= query.order_by;
|
|
|
|
aggr_tables++;
|
|
curr_tab= join_tab + exec_join_tab_cnt();
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
curr_tab->join= this;
|
|
|
|
if (!(curr_tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
|
|
DBUG_RETURN(1);
|
|
curr_tab->tmp_table_param->func_count= all_fields.elements;
|
|
TABLE* table= create_tmp_table(thd, curr_tab->tmp_table_param,
|
|
all_fields,
|
|
NULL, distinct,
|
|
TRUE, select_options, HA_ROWS_MAX,
|
|
&empty_clex_str, !need_tmp,
|
|
keep_row_order);
|
|
if (!table)
|
|
DBUG_RETURN(1);
|
|
|
|
if (!(curr_tab->aggr= new (thd->mem_root) AGGR_OP(curr_tab)))
|
|
DBUG_RETURN(1);
|
|
curr_tab->aggr->set_write_func(::end_send);
|
|
curr_tab->table= table;
|
|
/*
|
|
Setup reference fields, used by summary functions and group by fields,
|
|
to point to the temporary table.
|
|
The actual switching to the temporary tables fields for HAVING
|
|
and ORDER BY is done in do_select() by calling
|
|
set_items_ref_array(items1).
|
|
*/
|
|
init_items_ref_array();
|
|
items1= ref_ptr_array_slice(2);
|
|
//items1= items0 + all_fields.elements;
|
|
if (change_to_use_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(1);
|
|
|
|
/* Give storage engine access to temporary table */
|
|
gbh->table= table;
|
|
pushdown_query->store_data_in_temp_table= need_tmp;
|
|
pushdown_query->having= having;
|
|
|
|
/*
|
|
Group by and having is calculated by the group_by handler.
|
|
Reset the group by and having
|
|
*/
|
|
DBUG_ASSERT(query.group_by == NULL);
|
|
group= 0; group_list= 0;
|
|
having= tmp_having= 0;
|
|
/*
|
|
Select distinct is handled by handler or by creating an unique index
|
|
over all fields in the temporary table
|
|
*/
|
|
select_distinct= 0;
|
|
order= query.order_by;
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
|
|
fields= curr_fields_list;
|
|
|
|
//todo: new:
|
|
curr_tab->ref_array= &items1;
|
|
curr_tab->all_fields= &tmp_all_fields1;
|
|
curr_tab->fields= &tmp_fields_list1;
|
|
|
|
DBUG_RETURN(thd->is_error());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
The loose index scan access method guarantees that all grouping or
|
|
duplicate row elimination (for distinct) is already performed
|
|
during data retrieval, and that all MIN/MAX functions are already
|
|
computed for each group. Thus all MIN/MAX functions should be
|
|
treated as regular functions, and there is no need to perform
|
|
grouping in the main execution loop.
|
|
Notice that currently loose index scan is applicable only for
|
|
single table queries, thus it is sufficient to test only the first
|
|
join_tab element of the plan for its access method.
|
|
*/
|
|
if (join_tab && top_join_tab_count && tables_list &&
|
|
join_tab->is_using_loose_index_scan())
|
|
tmp_table_param.precomputed_group_by=
|
|
!join_tab->is_using_agg_loose_index_scan();
|
|
|
|
group_list_for_estimates= group_list;
|
|
/* Create a tmp table if distinct or if the sort is too complicated */
|
|
if (need_tmp)
|
|
{
|
|
aggr_tables++;
|
|
curr_tab= join_tab + exec_join_tab_cnt();
|
|
DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size);
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
if (only_const_tables())
|
|
first_select= sub_select_postjoin_aggr;
|
|
|
|
/*
|
|
Create temporary table on first execution of this join.
|
|
(Will be reused if this is a subquery that is executed several times.)
|
|
*/
|
|
init_items_ref_array();
|
|
|
|
ORDER *tmp_group= (ORDER *) 0;
|
|
if (!simple_group && !procedure && !(test_flags & TEST_NO_KEY_GROUP))
|
|
tmp_group= group_list;
|
|
|
|
tmp_table_param.hidden_field_count=
|
|
all_fields.elements - fields_list.elements;
|
|
|
|
distinct= select_distinct && !group_list &&
|
|
!select_lex->have_window_funcs();
|
|
keep_row_order= thd->lex->with_rownum && (group_list || order);
|
|
bool save_sum_fields= (group_list && simple_group) ||
|
|
implicit_grouping_with_window_funcs;
|
|
if (create_postjoin_aggr_table(curr_tab,
|
|
&all_fields, tmp_group,
|
|
save_sum_fields,
|
|
distinct, keep_row_order))
|
|
DBUG_RETURN(true);
|
|
exec_tmp_table= curr_tab->table;
|
|
|
|
if (exec_tmp_table->distinct)
|
|
optimize_distinct();
|
|
|
|
/* Change sum_fields reference to calculated fields in tmp_table */
|
|
items1= ref_ptr_array_slice(2);
|
|
if ((sort_and_group || curr_tab->table->group ||
|
|
tmp_table_param.precomputed_group_by) &&
|
|
!implicit_grouping_without_tables)
|
|
{
|
|
if (change_to_use_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(true);
|
|
}
|
|
else
|
|
{
|
|
if (change_refs_to_tmp_fields(thd, items1,
|
|
tmp_fields_list1, tmp_all_fields1,
|
|
fields_list.elements, all_fields))
|
|
DBUG_RETURN(true);
|
|
}
|
|
curr_all_fields= &tmp_all_fields1;
|
|
curr_fields_list= &tmp_fields_list1;
|
|
// Need to set them now for correct group_fields setup, reset at the end.
|
|
set_items_ref_array(items1);
|
|
curr_tab->ref_array= &items1;
|
|
curr_tab->all_fields= &tmp_all_fields1;
|
|
curr_tab->fields= &tmp_fields_list1;
|
|
set_postjoin_aggr_write_func(curr_tab);
|
|
|
|
/*
|
|
If having is not handled here, it will be checked before the row is sent
|
|
to the client.
|
|
*/
|
|
if (tmp_having &&
|
|
(sort_and_group || (exec_tmp_table->distinct && !group_list) ||
|
|
select_lex->have_window_funcs()))
|
|
{
|
|
/*
|
|
If there is no select distinct and there are no window functions
|
|
then move the having to table conds of tmp table.
|
|
NOTE : We cannot apply having after distinct or window functions
|
|
If columns of having are not part of select distinct,
|
|
then distinct may remove rows which can satisfy having.
|
|
In the case of window functions we *must* make sure to not
|
|
store any rows which don't match HAVING within the temp table,
|
|
as rows will end up being used during their computation.
|
|
*/
|
|
if (!select_distinct && !select_lex->have_window_funcs() &&
|
|
add_having_as_table_cond(curr_tab))
|
|
DBUG_RETURN(true);
|
|
is_having_added_as_table_cond= tmp_having != having;
|
|
|
|
/*
|
|
Having condition which we are not able to add as tmp table conds are
|
|
kept as before. And, this will be applied before storing the rows in
|
|
tmp table.
|
|
*/
|
|
curr_tab->having= having;
|
|
having= NULL; // Already done
|
|
}
|
|
|
|
tmp_table_param.func_count= 0;
|
|
tmp_table_param.field_count+= tmp_table_param.func_count;
|
|
if (sort_and_group || curr_tab->table->group)
|
|
{
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
}
|
|
|
|
if (exec_tmp_table->group)
|
|
{ // Already grouped
|
|
if (!order && !no_order && !skip_sort_order)
|
|
order= group_list; /* order by group */
|
|
group_list= NULL;
|
|
}
|
|
|
|
/*
|
|
If we have different sort & group then we must sort the data by group
|
|
and copy it to another tmp table.
|
|
|
|
This code is also used if we are using distinct something
|
|
we haven't been able to store in the temporary table yet
|
|
like SEC_TO_TIME(SUM(...)).
|
|
|
|
3. Also, this is used when
|
|
- the query has Window functions,
|
|
- the GROUP BY operation is done with OrderedGroupBy algorithm.
|
|
In this case, the first temptable will contain pre-GROUP-BY data. Force
|
|
the creation of the second temporary table. Post-GROUP-BY dataset will be
|
|
written there, and then Window Function processing code will be able to
|
|
process it.
|
|
*/
|
|
if ((group_list &&
|
|
(!test_if_subpart(group_list, order) || select_distinct)) ||
|
|
(select_distinct && tmp_table_param.using_outer_summary_function) ||
|
|
(group_list && !tmp_table_param.quick_group && // (3)
|
|
select_lex->have_window_funcs())) // (3)
|
|
{ /* Must copy to another table */
|
|
DBUG_PRINT("info",("Creating group table"));
|
|
|
|
calc_group_buffer(this, group_list);
|
|
count_field_types(select_lex, &tmp_table_param, tmp_all_fields1,
|
|
select_distinct && !group_list);
|
|
tmp_table_param.hidden_field_count=
|
|
tmp_all_fields1.elements - tmp_fields_list1.elements;
|
|
|
|
curr_tab++;
|
|
aggr_tables++;
|
|
DBUG_ASSERT(curr_tab - join_tab < dbug_join_tab_array_size);
|
|
bzero((void*)curr_tab, sizeof(JOIN_TAB));
|
|
curr_tab->ref.key= -1;
|
|
|
|
/* group data to new table */
|
|
/*
|
|
If the access method is loose index scan then all MIN/MAX
|
|
functions are precomputed, and should be treated as regular
|
|
functions. See extended comment above.
|
|
*/
|
|
if (join_tab->is_using_loose_index_scan())
|
|
tmp_table_param.precomputed_group_by= TRUE;
|
|
|
|
tmp_table_param.hidden_field_count=
|
|
curr_all_fields->elements - curr_fields_list->elements;
|
|
ORDER *dummy= NULL; //TODO can use table->group here also
|
|
|
|
if (create_postjoin_aggr_table(curr_tab, curr_all_fields, dummy, true,
|
|
distinct, keep_row_order))
|
|
DBUG_RETURN(true);
|
|
|
|
if (group_list)
|
|
{
|
|
if (!only_const_tables()) // No need to sort a single row
|
|
{
|
|
if (add_sorting_to_table(curr_tab - 1, group_list))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
if (make_group_fields(this, this))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
// Setup sum funcs only when necessary, otherwise we might break info
|
|
// for the first table
|
|
if (group_list || tmp_table_param.sum_func_count)
|
|
{
|
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
|
DBUG_RETURN(true);
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab->is_using_agg_loose_index_scan()))
|
|
DBUG_RETURN(true);
|
|
group_list= NULL;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
DBUG_RETURN(true);
|
|
}
|
|
// No sum funcs anymore
|
|
DBUG_ASSERT(items2.is_null());
|
|
|
|
items2= ref_ptr_array_slice(3);
|
|
if (change_to_use_tmp_fields(thd, items2,
|
|
tmp_fields_list2, tmp_all_fields2,
|
|
fields_list.elements, tmp_all_fields1))
|
|
DBUG_RETURN(true);
|
|
|
|
curr_fields_list= &tmp_fields_list2;
|
|
curr_all_fields= &tmp_all_fields2;
|
|
set_items_ref_array(items2);
|
|
curr_tab->ref_array= &items2;
|
|
curr_tab->all_fields= &tmp_all_fields2;
|
|
curr_tab->fields= &tmp_fields_list2;
|
|
set_postjoin_aggr_write_func(curr_tab);
|
|
|
|
tmp_table_param.field_count+= tmp_table_param.sum_func_count;
|
|
tmp_table_param.sum_func_count= 0;
|
|
}
|
|
if (curr_tab->table->distinct)
|
|
select_distinct= false; /* Each row is unique */
|
|
|
|
if (select_distinct && !group_list)
|
|
{
|
|
if (having)
|
|
{
|
|
curr_tab->having= having;
|
|
having->update_used_tables();
|
|
}
|
|
/*
|
|
We only need DISTINCT operation if the join is not degenerate.
|
|
If it is, we must not request DISTINCT processing, because
|
|
remove_duplicates() assumes there is a preceding computation step (and
|
|
in the degenerate join, there's none)
|
|
*/
|
|
if (top_join_tab_count && tables_list)
|
|
curr_tab->distinct= true;
|
|
|
|
having= NULL;
|
|
select_distinct= false;
|
|
}
|
|
/* Clean tmp_table_param for the next tmp table. */
|
|
tmp_table_param.field_count= tmp_table_param.sum_func_count=
|
|
tmp_table_param.func_count= 0;
|
|
|
|
tmp_table_param.copy_field= tmp_table_param.copy_field_end=0;
|
|
first_record= sort_and_group=0;
|
|
|
|
if (!group_optimized_away || implicit_grouping_with_window_funcs)
|
|
{
|
|
group= false;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
If grouping has been optimized away, a temporary table is
|
|
normally not needed unless we're explicitly requested to create
|
|
one (e.g. due to a SQL_BUFFER_RESULT hint or INSERT ... SELECT).
|
|
|
|
In this case (grouping was optimized away), temp_table was
|
|
created without a grouping expression and JOIN::exec() will not
|
|
perform the necessary grouping (by the use of end_send_group()
|
|
or end_write_group()) if JOIN::group is set to false.
|
|
*/
|
|
// the temporary table was explicitly requested
|
|
DBUG_ASSERT(select_options & OPTION_BUFFER_RESULT);
|
|
// the temporary table does not have a grouping expression
|
|
DBUG_ASSERT(!curr_tab->table->group);
|
|
}
|
|
calc_group_buffer(this, group_list);
|
|
count_field_types(select_lex, &tmp_table_param, *curr_all_fields, false);
|
|
}
|
|
|
|
if (group ||
|
|
(implicit_grouping && !implicit_grouping_with_window_funcs) ||
|
|
tmp_table_param.sum_func_count)
|
|
{
|
|
if (make_group_fields(this, this))
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_ASSERT(items3.is_null());
|
|
|
|
if (items0.is_null())
|
|
init_items_ref_array();
|
|
items3= ref_ptr_array_slice(4);
|
|
setup_copy_fields(thd, &tmp_table_param,
|
|
items3, tmp_fields_list3, tmp_all_fields3,
|
|
curr_fields_list->elements, *curr_all_fields);
|
|
|
|
curr_fields_list= &tmp_fields_list3;
|
|
curr_all_fields= &tmp_all_fields3;
|
|
set_items_ref_array(items3);
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *last_tab= join_tab + top_join_tab_count + aggr_tables - 1;
|
|
// Set grouped fields on the last table
|
|
last_tab->ref_array= &items3;
|
|
last_tab->all_fields= &tmp_all_fields3;
|
|
last_tab->fields= &tmp_fields_list3;
|
|
}
|
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
|
DBUG_RETURN(true);
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab ||
|
|
!join_tab-> is_using_agg_loose_index_scan()))
|
|
DBUG_RETURN(true);
|
|
if (unlikely(setup_sum_funcs(thd, sum_funcs) || thd->is_error()))
|
|
DBUG_RETURN(true);
|
|
}
|
|
if (group_list || order)
|
|
{
|
|
DBUG_PRINT("info",("Sorting for send_result_set_metadata"));
|
|
THD_STAGE_INFO(thd, stage_sorting_result);
|
|
/* If we have already done the group, add HAVING to sorted table */
|
|
if (tmp_having && !is_having_added_as_table_cond &&
|
|
!group_list && !sort_and_group)
|
|
{
|
|
if (add_having_as_table_cond(curr_tab))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
if (group)
|
|
select_limit= HA_POS_ERROR;
|
|
else if (!need_tmp)
|
|
{
|
|
/*
|
|
We can abort sorting after thd->select_limit rows if there are no
|
|
filter conditions for any tables after the sorted one.
|
|
Filter conditions come in several forms:
|
|
1. as a condition item attached to the join_tab, or
|
|
2. as a keyuse attached to the join_tab (ref access).
|
|
*/
|
|
for (uint i= const_tables + 1; i < top_join_tab_count; i++)
|
|
{
|
|
JOIN_TAB *const tab= join_tab + i;
|
|
if (tab->select_cond || // 1
|
|
(tab->keyuse && !tab->first_inner)) // 2
|
|
{
|
|
/* We have to sort all rows */
|
|
select_limit= HA_POS_ERROR;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
Here we add sorting stage for ORDER BY/GROUP BY clause, if the
|
|
optimiser chose FILESORT to be faster than INDEX SCAN or there is
|
|
no suitable index present.
|
|
OPTION_FOUND_ROWS supersedes LIMIT and is taken into account.
|
|
*/
|
|
DBUG_PRINT("info",("Sorting for order by/group by"));
|
|
ORDER *order_arg= group_list ? group_list : order;
|
|
if (top_join_tab_count + aggr_tables > const_tables &&
|
|
ordered_index_usage !=
|
|
(group_list ? ordered_index_group_by : ordered_index_order_by) &&
|
|
curr_tab->type != JT_CONST &&
|
|
curr_tab->type != JT_EQ_REF) // Don't sort 1 row
|
|
{
|
|
// Sort either first non-const table or the last tmp table
|
|
JOIN_TAB *sort_tab= curr_tab;
|
|
|
|
if (add_sorting_to_table(sort_tab, order_arg))
|
|
DBUG_RETURN(true);
|
|
/*
|
|
filesort_limit: Return only this many rows from filesort().
|
|
We can use select_limit_cnt only if we have no group_by and 1 table.
|
|
This allows us to use Bounded_queue for queries like:
|
|
"select SQL_CALC_FOUND_ROWS * from t1 order by b desc limit 1;"
|
|
m_select_limit == HA_POS_ERROR (we need a full table scan)
|
|
unit->select_limit_cnt == 1 (we only need one row in the result set)
|
|
*/
|
|
sort_tab->filesort->limit=
|
|
(has_group_by || (join_tab + top_join_tab_count > curr_tab + 1)) ?
|
|
select_limit : unit->lim.get_select_limit();
|
|
|
|
if (unit->lim.is_with_ties())
|
|
sort_tab->filesort->limit= HA_POS_ERROR;
|
|
}
|
|
if (!only_const_tables() &&
|
|
!join_tab[const_tables].filesort &&
|
|
!(select_options & SELECT_DESCRIBE))
|
|
{
|
|
/*
|
|
If no IO cache exists for the first table then we are using an
|
|
INDEX SCAN and no filesort. Thus we should not remove the sorted
|
|
attribute on the INDEX SCAN.
|
|
*/
|
|
skip_sort_order= true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Window functions computation step should be attached to the last join_tab
|
|
that's doing aggregation.
|
|
The last join_tab reads the data from the temp. table. It also may do
|
|
- sorting
|
|
- duplicate value removal
|
|
Both of these operations are done after window function computation step.
|
|
*/
|
|
if (select_lex->window_funcs.elements)
|
|
{
|
|
curr_tab= join_tab + total_join_tab_cnt();
|
|
if (!(curr_tab->window_funcs_step= new Window_funcs_computation))
|
|
DBUG_RETURN(true);
|
|
if (curr_tab->window_funcs_step->setup(thd, &select_lex->window_funcs,
|
|
curr_tab))
|
|
DBUG_RETURN(true);
|
|
/* Count that we're using window functions. */
|
|
status_var_increment(thd->status_var.feature_window_functions);
|
|
}
|
|
if (select_lex->custom_agg_func_used())
|
|
status_var_increment(thd->status_var.feature_custom_aggregate_functions);
|
|
|
|
/*
|
|
Allocate Cached_items of ORDER BY for FETCH FIRST .. WITH TIES.
|
|
The order list might have been modified prior to this, but we are
|
|
only interested in the initial order by columns, after all const
|
|
elements are removed.
|
|
*/
|
|
if (unit->lim.is_with_ties())
|
|
{
|
|
/*
|
|
When ORDER BY is eliminated, we make use of the GROUP BY list.
|
|
We've already counted how many elements from ORDER BY
|
|
must be evaluated as part of WITH TIES so we use that.
|
|
*/
|
|
ORDER *order_src = order ? order : group_list;
|
|
if (alloc_order_fields(this, order_src,
|
|
with_ties_order_count))
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
fields= curr_fields_list;
|
|
// Reset before execution
|
|
set_items_ref_array(items0);
|
|
if (join_tab)
|
|
join_tab[exec_join_tab_cnt() + aggr_tables - 1].next_select=
|
|
setup_end_select_func(this);
|
|
group= has_group_by;
|
|
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
|
ORDER *table_group,
|
|
bool save_sum_fields,
|
|
bool distinct,
|
|
bool keep_row_order)
|
|
{
|
|
DBUG_ENTER("JOIN::create_postjoin_aggr_table");
|
|
THD_STAGE_INFO(thd, stage_creating_tmp_table);
|
|
|
|
/*
|
|
Pushing LIMIT to the post-join temporary table creation is not applicable
|
|
when there is ORDER BY or GROUP BY or there is no GROUP BY, but
|
|
there are aggregate functions, because in all these cases we need
|
|
all result rows.
|
|
|
|
We also can not push limit if the limit is WITH TIES, as we do not know
|
|
how many rows we will actually have. This can happen if ORDER BY was
|
|
a constant and removed (during remove_const), thus we have an "unlimited"
|
|
WITH TIES.
|
|
*/
|
|
ha_rows table_rows_limit= ((order == NULL || skip_sort_order) &&
|
|
!table_group &&
|
|
!select_lex->with_sum_func &&
|
|
!unit->lim.is_with_ties()) ? select_limit
|
|
: HA_POS_ERROR;
|
|
|
|
if (!(tab->tmp_table_param= new TMP_TABLE_PARAM(tmp_table_param)))
|
|
DBUG_RETURN(true);
|
|
if (tmp_table_keep_current_rowid)
|
|
add_fields_for_current_rowid(tab, table_fields);
|
|
tab->tmp_table_param->skip_create_table= true;
|
|
TABLE* table= create_tmp_table(thd, tab->tmp_table_param, *table_fields,
|
|
table_group, distinct,
|
|
save_sum_fields, select_options,
|
|
table_rows_limit,
|
|
&empty_clex_str, true, keep_row_order);
|
|
if (!table)
|
|
DBUG_RETURN(true);
|
|
tmp_table_param.using_outer_summary_function=
|
|
tab->tmp_table_param->using_outer_summary_function;
|
|
tab->join= this;
|
|
DBUG_ASSERT(tab > tab->join->join_tab || !top_join_tab_count ||
|
|
!tables_list);
|
|
tab->table= table;
|
|
if (tab > join_tab)
|
|
(tab - 1)->next_select= sub_select_postjoin_aggr;
|
|
|
|
/* if group or order on first table, sort first */
|
|
if ((group_list && simple_group) ||
|
|
(implicit_grouping && select_lex->have_window_funcs()))
|
|
{
|
|
DBUG_PRINT("info",("Sorting for group"));
|
|
THD_STAGE_INFO(thd, stage_sorting_for_group);
|
|
|
|
if (ordered_index_usage != ordered_index_group_by &&
|
|
!only_const_tables() &&
|
|
(join_tab + const_tables)->type != JT_CONST && // Don't sort 1 row
|
|
!implicit_grouping &&
|
|
add_sorting_to_table(join_tab + const_tables, group_list))
|
|
goto err;
|
|
|
|
if (alloc_group_fields(this, group_list))
|
|
goto err;
|
|
if (make_sum_func_list(all_fields, fields_list, true))
|
|
goto err;
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!(tables_list &&
|
|
join_tab->is_using_agg_loose_index_scan())))
|
|
goto err;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
goto err;
|
|
group_list= NULL;
|
|
}
|
|
else
|
|
{
|
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
|
!join_tab->is_using_agg_loose_index_scan()))
|
|
goto err;
|
|
if (setup_sum_funcs(thd, sum_funcs))
|
|
goto err;
|
|
|
|
if (!group_list && !table->distinct && order && simple_order &&
|
|
tab == join_tab + const_tables)
|
|
{
|
|
DBUG_PRINT("info",("Sorting for order"));
|
|
THD_STAGE_INFO(thd, stage_sorting_for_order);
|
|
|
|
if (ordered_index_usage != ordered_index_order_by &&
|
|
!only_const_tables() &&
|
|
add_sorting_to_table(join_tab + const_tables, order))
|
|
goto err;
|
|
order= NULL;
|
|
}
|
|
}
|
|
if (!(tab->aggr= new (thd->mem_root) AGGR_OP(tab)))
|
|
goto err;
|
|
table->reginfo.join_tab= tab;
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
if (table != NULL)
|
|
free_tmp_table(thd, table);
|
|
tab->table= NULL;
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
|
|
void
|
|
JOIN::optimize_distinct()
|
|
{
|
|
for (JOIN_TAB *last_join_tab= join_tab + top_join_tab_count - 1; ;)
|
|
{
|
|
if (select_lex->select_list_tables & last_join_tab->table->map ||
|
|
last_join_tab->use_join_cache)
|
|
break;
|
|
last_join_tab->shortcut_for_distinct= true;
|
|
if (last_join_tab == join_tab)
|
|
break;
|
|
--last_join_tab;
|
|
}
|
|
|
|
/* Optimize "select distinct b from t1 order by key_part_1 limit #" */
|
|
if (order && skip_sort_order && !unit->lim.is_with_ties())
|
|
{
|
|
/* Should already have been optimized away */
|
|
DBUG_ASSERT(ordered_index_usage == ordered_index_order_by);
|
|
if (ordered_index_usage == ordered_index_order_by)
|
|
{
|
|
order= NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Add Filesort object to the given table to sort if with filesort
|
|
|
|
@param tab the JOIN_TAB object to attach created Filesort object to
|
|
@param order List of expressions to sort the table by
|
|
|
|
@note This function moves tab->select, if any, to filesort->select
|
|
|
|
@return false on success, true on OOM
|
|
*/
|
|
|
|
bool
|
|
JOIN::add_sorting_to_table(JOIN_TAB *tab, ORDER *order)
|
|
{
|
|
tab->filesort=
|
|
new (thd->mem_root) Filesort(order, HA_ROWS_MAX, tab->keep_current_rowid,
|
|
tab->select);
|
|
if (!tab->filesort)
|
|
return true;
|
|
|
|
TABLE *table= tab->table;
|
|
if ((tab == join_tab + const_tables) &&
|
|
table->pos_in_table_list->is_sjm_scan_table())
|
|
{
|
|
tab->filesort->set_all_read_bits= TRUE;
|
|
tab->filesort->unpack= unpack_to_base_table_fields;
|
|
}
|
|
|
|
/*
|
|
Select was moved to filesort->select to force join_init_read_record to use
|
|
sorted result instead of reading table through select.
|
|
*/
|
|
if (tab->select)
|
|
{
|
|
tab->select= NULL;
|
|
tab->set_select_cond(NULL, __LINE__);
|
|
}
|
|
tab->read_first_record= join_init_read_record;
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
Setup expression caches for subqueries that need them
|
|
|
|
@details
|
|
The function wraps correlated subquery expressions that return one value
|
|
into objects of the class Item_cache_wrapper setting up an expression
|
|
cache for each of them. The result values of the subqueries are to be
|
|
cached together with the corresponding sets of the parameters - outer
|
|
references of the subqueries.
|
|
|
|
@retval FALSE OK
|
|
@retval TRUE Error
|
|
*/
|
|
|
|
bool JOIN::setup_subquery_caches()
|
|
{
|
|
DBUG_ENTER("JOIN::setup_subquery_caches");
|
|
|
|
/*
|
|
We have to check all this condition together because items created in
|
|
one of this clauses can be moved to another one by optimizer
|
|
*/
|
|
if (select_lex->expr_cache_may_be_used[IN_WHERE] ||
|
|
select_lex->expr_cache_may_be_used[IN_HAVING] ||
|
|
select_lex->expr_cache_may_be_used[IN_ON] ||
|
|
select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
JOIN_TAB *tab;
|
|
if (conds &&
|
|
!(conds= conds->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->select_cond &&
|
|
!(tab->select_cond=
|
|
tab->select_cond->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
if (tab->cache_select && tab->cache_select->cond)
|
|
if (!(tab->cache_select->cond=
|
|
tab->cache_select->
|
|
cond->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (having &&
|
|
!(having= having->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (tmp_having)
|
|
{
|
|
DBUG_ASSERT(having == NULL);
|
|
if (!(tmp_having=
|
|
tmp_having->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
if (select_lex->expr_cache_may_be_used[SELECT_LIST] ||
|
|
select_lex->expr_cache_may_be_used[IN_GROUP_BY] ||
|
|
select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
List_iterator<Item> li(all_fields);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if (!(new_item=
|
|
item->transform(thd, &Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
if (new_item != item)
|
|
{
|
|
thd->change_item_tree(li.ref(), new_item);
|
|
}
|
|
}
|
|
for (ORDER *tmp_group= group_list; tmp_group ; tmp_group= tmp_group->next)
|
|
{
|
|
if (!(*tmp_group->item=
|
|
(*tmp_group->item)->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
if (select_lex->expr_cache_may_be_used[NO_MATTER])
|
|
{
|
|
for (ORDER *ord= order; ord; ord= ord->next)
|
|
{
|
|
if (!(*ord->item=
|
|
(*ord->item)->transform(thd,
|
|
&Item::expr_cache_insert_transformer,
|
|
NULL)))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Shrink join buffers used for preceding tables to reduce the occupied space
|
|
|
|
SYNOPSIS
|
|
shrink_join_buffers()
|
|
jt table up to which the buffers are to be shrunk
|
|
curr_space the size of the space used by the buffers for tables 1..jt
|
|
needed_space the size of the space that has to be used by these buffers
|
|
|
|
DESCRIPTION
|
|
The function makes an attempt to shrink all join buffers used for the
|
|
tables starting from the first up to jt to reduce the total size of the
|
|
space occupied by the buffers used for tables 1,...,jt from curr_space
|
|
to needed_space.
|
|
The function assumes that the buffer for the table jt has not been
|
|
allocated yet.
|
|
|
|
RETURN
|
|
FALSE if all buffer have been successfully shrunk
|
|
TRUE otherwise
|
|
*/
|
|
|
|
bool JOIN::shrink_join_buffers(JOIN_TAB *jt,
|
|
ulonglong curr_space,
|
|
ulonglong needed_space)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_CACHE *cache;
|
|
for (tab= first_linear_tab(this, WITHOUT_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab != jt;
|
|
tab= next_linear_tab(this, tab, WITHOUT_BUSH_ROOTS))
|
|
{
|
|
cache= tab->cache;
|
|
if (cache)
|
|
{
|
|
size_t buff_size;
|
|
if (needed_space < cache->get_min_join_buffer_size())
|
|
return TRUE;
|
|
if (cache->shrink_join_buffer_in_ratio(curr_space, needed_space))
|
|
{
|
|
revise_cache_usage(tab);
|
|
return TRUE;
|
|
}
|
|
buff_size= cache->get_join_buffer_size();
|
|
curr_space-= buff_size;
|
|
if (needed_space < buff_size)
|
|
{
|
|
/*
|
|
Safety: fail if we've exhausted available buffer space with
|
|
reduced join buffers.
|
|
*/
|
|
DBUG_ASSERT(0);
|
|
return TRUE;
|
|
}
|
|
needed_space-= buff_size;
|
|
}
|
|
}
|
|
|
|
cache= jt->cache;
|
|
DBUG_ASSERT(cache);
|
|
if (needed_space < cache->get_min_join_buffer_size())
|
|
return TRUE;
|
|
cache->set_join_buffer_size((size_t)needed_space);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
int
|
|
JOIN::reinit()
|
|
{
|
|
DBUG_ENTER("JOIN::reinit");
|
|
|
|
first_record= false;
|
|
group_sent= false;
|
|
cleaned= false;
|
|
accepted_rows= 0;
|
|
|
|
if (aggr_tables)
|
|
{
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
JOIN_TAB *end_tab= curr_tab + aggr_tables;
|
|
for ( ; curr_tab < end_tab; curr_tab++)
|
|
{
|
|
TABLE *tmp_table= curr_tab->table;
|
|
if (!tmp_table->is_created())
|
|
continue;
|
|
tmp_table->file->extra(HA_EXTRA_RESET_STATE);
|
|
tmp_table->file->ha_delete_all_rows();
|
|
}
|
|
}
|
|
clear_sj_tmp_tables(this);
|
|
if (current_ref_ptrs != items0)
|
|
{
|
|
set_items_ref_array(items0);
|
|
}
|
|
|
|
/* need to reset ref access state (see join_read_key) */
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->ref.key_err= TRUE;
|
|
}
|
|
}
|
|
|
|
clear_sum_funcs();
|
|
|
|
if (no_rows_in_result_called)
|
|
{
|
|
/* Reset effect of possible no_rows_in_result() */
|
|
List_iterator_fast<Item> it(fields_list);
|
|
Item *item;
|
|
no_rows_in_result_called= 0;
|
|
while ((item= it++))
|
|
item->restore_to_before_no_rows_in_result();
|
|
}
|
|
|
|
if (!(select_options & SELECT_DESCRIBE))
|
|
if (init_ftfuncs(thd, select_lex, MY_TEST(order)))
|
|
DBUG_RETURN(1);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
Prepare join result.
|
|
|
|
@details Prepare join result prior to join execution or describing.
|
|
Instantiate derived tables and get schema tables result if necessary.
|
|
|
|
@return
|
|
TRUE An error during derived or schema tables instantiation.
|
|
FALSE Ok
|
|
*/
|
|
|
|
bool JOIN::prepare_result(List<Item> **columns_list)
|
|
{
|
|
DBUG_ENTER("JOIN::prepare_result");
|
|
|
|
error= 0;
|
|
/* Create result tables for materialized views. */
|
|
if (!zero_result_cause &&
|
|
select_lex->handle_derived(thd->lex, DT_CREATE))
|
|
goto err;
|
|
|
|
if (result->prepare2(this))
|
|
goto err;
|
|
|
|
if ((select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
|
|
goto err;
|
|
|
|
DBUG_RETURN(FALSE);
|
|
|
|
err:
|
|
error= 1;
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
|
|
bool JOIN::save_explain_data(Explain_query *output, bool can_overwrite,
|
|
bool need_tmp_table, bool need_order,
|
|
bool distinct)
|
|
{
|
|
DBUG_ENTER("JOIN::save_explain_data");
|
|
DBUG_PRINT("enter", ("Save explain Select_lex: %u (%p) parent lex: %p stmt_lex: %p present select: %u (%p)",
|
|
select_lex->select_number, select_lex,
|
|
select_lex->parent_lex, thd->lex->stmt_lex,
|
|
(output->get_select(select_lex->select_number) ?
|
|
select_lex->select_number : 0),
|
|
(output->get_select(select_lex->select_number) ?
|
|
output->get_select(select_lex->select_number)
|
|
->select_lex : NULL)));
|
|
/*
|
|
If there is SELECT in this statement with the same number it must be the
|
|
same SELECT
|
|
*/
|
|
DBUG_ASSERT(select_lex->select_number == FAKE_SELECT_LEX_ID || !output ||
|
|
!output->get_select(select_lex->select_number) ||
|
|
output->get_select(select_lex->select_number)->select_lex ==
|
|
select_lex);
|
|
|
|
if (select_lex->select_number != FAKE_SELECT_LEX_ID &&
|
|
have_query_plan != JOIN::QEP_NOT_PRESENT_YET &&
|
|
have_query_plan != JOIN::QEP_DELETED && // this happens when there was
|
|
// no QEP ever, but then
|
|
//cleanup() is called multiple times
|
|
output && // for "SET" command in SPs.
|
|
(can_overwrite? true: !output->get_select(select_lex->select_number)))
|
|
{
|
|
const char *message= NULL;
|
|
if (!table_count || !tables_list || zero_result_cause)
|
|
{
|
|
/* It's a degenerate join */
|
|
message= zero_result_cause ? zero_result_cause : "No tables used";
|
|
}
|
|
bool rc= save_explain_data_intern(thd->lex->explain, need_tmp_table,
|
|
need_order, distinct, message);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/*
|
|
Can have join_tab==NULL for degenerate cases (e.g. SELECT .. UNION ... SELECT LIMIT 0)
|
|
*/
|
|
if (select_lex == select_lex->master_unit()->fake_select_lex && join_tab)
|
|
{
|
|
/*
|
|
This is fake_select_lex. It has no query plan, but we need to set up a
|
|
tracker for ANALYZE
|
|
*/
|
|
uint nr= select_lex->master_unit()->first_select()->select_number;
|
|
Explain_union *eu= output->get_union(nr);
|
|
explain= &eu->fake_select_lex_explain;
|
|
join_tab[0].tracker= eu->get_fake_select_lex_tracker();
|
|
for (uint i=0 ; i < exec_join_tab_cnt() + aggr_tables; i++)
|
|
{
|
|
if (join_tab[i].filesort)
|
|
{
|
|
if (!(join_tab[i].filesort->tracker=
|
|
new Filesort_tracker(thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
int JOIN::exec()
|
|
{
|
|
int res;
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_exec_start",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
ANALYZE_START_TRACKING(thd, &explain->time_tracker);
|
|
res= exec_inner();
|
|
ANALYZE_STOP_TRACKING(thd, &explain->time_tracker);
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_exec_end",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
return res;
|
|
}
|
|
|
|
|
|
int JOIN::exec_inner()
|
|
{
|
|
List<Item> *columns_list= &fields_list;
|
|
DBUG_ENTER("JOIN::exec_inner");
|
|
DBUG_ASSERT(optimization_state == JOIN::OPTIMIZATION_DONE);
|
|
|
|
THD_STAGE_INFO(thd, stage_executing);
|
|
|
|
/*
|
|
Enable LIMIT ROWS EXAMINED during query execution if:
|
|
(1) This JOIN is the outermost query (not a subquery or derived table)
|
|
This ensures that the limit is enabled when actual execution begins,
|
|
and not if a subquery is evaluated during optimization of the outer
|
|
query.
|
|
(2) This JOIN is not the result of a UNION. In this case do not apply the
|
|
limit in order to produce the partial query result stored in the
|
|
UNION temp table.
|
|
*/
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_exec(thd, "join_execution");
|
|
trace_exec.add_select_number(select_lex->select_number);
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
|
|
if (!select_lex->outer_select() && // (1)
|
|
select_lex != select_lex->master_unit()->fake_select_lex) // (2)
|
|
thd->lex->set_limit_rows_examined();
|
|
|
|
if (procedure)
|
|
{
|
|
procedure_fields_list= fields_list;
|
|
if (procedure->change_columns(thd, procedure_fields_list) ||
|
|
result->prepare(procedure_fields_list, unit))
|
|
{
|
|
thd->limit_found_rows= 0;
|
|
DBUG_RETURN(0);
|
|
}
|
|
columns_list= &procedure_fields_list;
|
|
}
|
|
if (result->prepare2(this))
|
|
DBUG_RETURN(error);
|
|
|
|
if (!tables_list && (table_count || !select_lex->with_sum_func) &&
|
|
!select_lex->have_window_funcs())
|
|
{ // Only test of functions
|
|
if (select_options & SELECT_DESCRIBE)
|
|
select_describe(this, FALSE, FALSE, FALSE,
|
|
(zero_result_cause?zero_result_cause:"No tables used"));
|
|
else
|
|
{
|
|
if (result->send_result_set_metadata(*columns_list,
|
|
Protocol::SEND_NUM_ROWS |
|
|
Protocol::SEND_EOF))
|
|
{
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/*
|
|
We have to test for 'conds' here as the WHERE may not be constant
|
|
even if we don't have any tables for prepared statements or if
|
|
conds uses something like 'rand()'.
|
|
If the HAVING clause is either impossible or always true, then
|
|
JOIN::having is set to NULL by optimize_cond.
|
|
In this case JOIN::exec must check for JOIN::having_value, in the
|
|
same way it checks for JOIN::cond_value.
|
|
*/
|
|
DBUG_ASSERT(error == 0);
|
|
if (cond_value != Item::COND_FALSE &&
|
|
having_value != Item::COND_FALSE &&
|
|
(!conds || conds->val_int()) &&
|
|
(!having || having->val_int()))
|
|
{
|
|
if (do_send_rows &&
|
|
(procedure ? (procedure->send_row(procedure_fields_list) ||
|
|
procedure->end_of_records()):
|
|
result->send_data_with_check(fields_list, unit, 0)> 0))
|
|
error= 1;
|
|
else
|
|
send_records= ((select_options & OPTION_FOUND_ROWS) ? 1 :
|
|
thd->get_sent_row_count());
|
|
}
|
|
else
|
|
send_records= 0;
|
|
if (likely(!error))
|
|
{
|
|
join_free(); // Unlock all cursors
|
|
error= (int) result->send_eof();
|
|
}
|
|
}
|
|
/* Single select (without union) always returns 0 or 1 row */
|
|
thd->limit_found_rows= send_records;
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
/*
|
|
Evaluate expensive constant conditions that were not evaluated during
|
|
optimization. Do not evaluate them for EXPLAIN statements as these
|
|
condtions may be arbitrarily costly, and because the optimize phase
|
|
might not have produced a complete executable plan for EXPLAINs.
|
|
*/
|
|
if (!zero_result_cause &&
|
|
exec_const_cond && !(select_options & SELECT_DESCRIBE) &&
|
|
!exec_const_cond->val_int())
|
|
zero_result_cause= "Impossible WHERE noticed after reading const tables";
|
|
|
|
/*
|
|
We've called exec_const_cond->val_int(). This may have caused an error.
|
|
*/
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
if (zero_result_cause)
|
|
{
|
|
if (select_lex->have_window_funcs() && send_row_on_empty_set())
|
|
{
|
|
/*
|
|
The query produces just one row but it has window functions.
|
|
|
|
The only way to compute the value of window function(s) is to
|
|
run the entire window function computation step (there is no shortcut).
|
|
*/
|
|
const_tables= table_count;
|
|
first_select= sub_select_postjoin_aggr;
|
|
}
|
|
else
|
|
{
|
|
(void) return_zero_rows(this, result, &select_lex->leaf_tables,
|
|
columns_list,
|
|
send_row_on_empty_set(),
|
|
select_options,
|
|
zero_result_cause,
|
|
having ? having : tmp_having, &all_fields);
|
|
DBUG_RETURN(0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Evaluate all constant expressions with subqueries in the
|
|
ORDER/GROUP clauses to make sure that all subqueries return a
|
|
single row. The evaluation itself will trigger an error if that is
|
|
not the case.
|
|
*/
|
|
if (exec_const_order_group_cond.elements &&
|
|
!(select_options & SELECT_DESCRIBE) &&
|
|
!select_lex->pushdown_select)
|
|
{
|
|
List_iterator_fast<Item> const_item_it(exec_const_order_group_cond);
|
|
Item *cur_const_item;
|
|
StringBuffer<MAX_FIELD_WIDTH> tmp;
|
|
while ((cur_const_item= const_item_it++))
|
|
{
|
|
tmp.set_buffer_if_not_allocated(&my_charset_bin);
|
|
cur_const_item->val_str(&tmp);
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
}
|
|
|
|
if ((this->select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
get_schema_tables_result(this, PROCESSED_BY_JOIN_EXEC))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
{
|
|
select_describe(this, need_tmp,
|
|
order != 0 && !skip_sort_order,
|
|
select_distinct,
|
|
!table_count ? "No tables used" : NullS);
|
|
DBUG_RETURN(0);
|
|
}
|
|
else if (select_lex->pushdown_select)
|
|
{
|
|
/* Execute the query pushed into a foreign engine */
|
|
error= select_lex->pushdown_select->execute();
|
|
DBUG_RETURN(error);
|
|
}
|
|
else
|
|
{
|
|
/* it's a const select, materialize it. */
|
|
select_lex->mark_const_derived(zero_result_cause);
|
|
}
|
|
|
|
/* XXX: When can we have here thd->is_error() not zero? */
|
|
if (unlikely(thd->is_error()))
|
|
{
|
|
error= thd->is_error();
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_sending_data);
|
|
DBUG_PRINT("info", ("%s", thd->proc_info));
|
|
result->send_result_set_metadata(
|
|
procedure ? procedure_fields_list : *fields,
|
|
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF);
|
|
|
|
error= result->view_structure_only() ? false : do_select(this, procedure);
|
|
/* Accumulate the counts from all join iterations of all join parts. */
|
|
thd->ps_report_examined_row_count();
|
|
|
|
DBUG_PRINT("counts", ("thd->examined_row_count: %lu",
|
|
(ulong) thd->get_examined_row_count()));
|
|
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
/**
|
|
Clean up join.
|
|
|
|
@return
|
|
Return error that hold JOIN.
|
|
*/
|
|
|
|
int
|
|
JOIN::destroy()
|
|
{
|
|
DBUG_ENTER("JOIN::destroy");
|
|
|
|
DBUG_PRINT("info", ("select %p (%u) <> JOIN %p",
|
|
select_lex, select_lex->select_number, this));
|
|
select_lex->join= 0;
|
|
|
|
cond_equal= 0;
|
|
having_equal= 0;
|
|
|
|
cleanup(1);
|
|
|
|
if (join_tab)
|
|
{
|
|
for (JOIN_TAB *tab= first_linear_tab(this, WITH_BUSH_ROOTS,
|
|
WITH_CONST_TABLES);
|
|
tab; tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->aggr)
|
|
{
|
|
free_tmp_table(thd, tab->table);
|
|
delete tab->tmp_table_param;
|
|
tab->tmp_table_param= NULL;
|
|
tab->aggr= NULL;
|
|
}
|
|
tab->table= NULL;
|
|
}
|
|
}
|
|
|
|
/* Cleanup items referencing temporary table columns */
|
|
cleanup_item_list(tmp_all_fields1);
|
|
cleanup_item_list(tmp_all_fields3);
|
|
destroy_sj_tmp_tables(this);
|
|
delete_dynamic(&keyuse);
|
|
if (save_qep)
|
|
delete(save_qep);
|
|
if (ext_keyuses_for_splitting)
|
|
delete(ext_keyuses_for_splitting);
|
|
delete procedure;
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
void JOIN::cleanup_item_list(List<Item> &items) const
|
|
{
|
|
DBUG_ENTER("JOIN::cleanup_item_list");
|
|
if (!items.is_empty())
|
|
{
|
|
List_iterator_fast<Item> it(items);
|
|
Item *item;
|
|
while ((item= it++))
|
|
item->cleanup();
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Look for provision of the select_handler interface by a foreign engine.
|
|
Must not be called directly, use find_single_select_handler() or
|
|
find_partial_select_handler() instead.
|
|
|
|
@param
|
|
thd The thread handler
|
|
select_lex SELECT_LEX object, must be passed in the cases of:
|
|
- single select pushdown
|
|
- partial pushdown (part of a UNION/EXCEPT/INTERSECT)
|
|
Must be NULL in case of entire unit pushdown
|
|
select_lex_unit SELECT_LEX_UNIT object, must be passed in the cases of:
|
|
- entire unit pushdown
|
|
- partial pushdown (part of a UNION/EXCEPT/INTERSECT)
|
|
Must be NULL in case of single select pushdown
|
|
|
|
@details
|
|
The function checks that this is an upper level select and if so looks
|
|
through its tables searching for one whose handlerton owns a
|
|
create_select call-back function. If the call of this function returns
|
|
a select_handler interface object then the server will push the select
|
|
query into this engine.
|
|
This function does not check if the select has tables from
|
|
different engines. Such a check must be done inside each engine's
|
|
create_select function.
|
|
Also the engine's create_select function must perform other checks
|
|
to make sure the engine can execute the query.
|
|
|
|
@retval the found select_handler if the search is successful
|
|
0 otherwise
|
|
*/
|
|
|
|
static
|
|
select_handler *find_select_handler_inner(THD *thd,
|
|
SELECT_LEX *select_lex,
|
|
SELECT_LEX_UNIT *select_lex_unit)
|
|
{
|
|
if (select_lex->master_unit()->outer_select() ||
|
|
(select_lex_unit && select_lex->master_unit()->with_clause))
|
|
{
|
|
/*
|
|
Pushdown is not supported neither for non-top-level SELECTs nor for parts
|
|
of SELECT_LEX_UNITs that have CTEs (SELECT_LEX_UNIT::with_clause)
|
|
*/
|
|
return 0;
|
|
}
|
|
|
|
TABLE_LIST *tbl= nullptr;
|
|
// For SQLCOM_INSERT_SELECT the server takes TABLE_LIST
|
|
// from thd->lex->query_tables and skips its first table
|
|
// b/c it is the target table for the INSERT..SELECT.
|
|
if (thd->lex->sql_command != SQLCOM_INSERT_SELECT)
|
|
{
|
|
tbl= select_lex->join->tables_list;
|
|
}
|
|
else if (thd->lex->query_tables &&
|
|
thd->lex->query_tables->next_global)
|
|
{
|
|
tbl= thd->lex->query_tables->next_global;
|
|
}
|
|
else
|
|
return 0;
|
|
|
|
for (;tbl; tbl= tbl->next_global)
|
|
{
|
|
if (!tbl->table)
|
|
continue;
|
|
handlerton *ht= tbl->table->file->partition_ht();
|
|
if (!ht->create_select)
|
|
continue;
|
|
select_handler *sh= ht->create_select(thd, select_lex, select_lex_unit);
|
|
if (sh)
|
|
return sh;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Wrapper for find_select_handler_inner() for the case of single select
|
|
pushdown. See more comments at the description of
|
|
find_select_handler_inner()
|
|
|
|
*/
|
|
select_handler *find_single_select_handler(THD *thd, SELECT_LEX *select_lex)
|
|
{
|
|
return find_select_handler_inner(thd, select_lex, nullptr);
|
|
}
|
|
|
|
|
|
/**
|
|
Wrapper for find_select_handler_inner() for the case of partial select
|
|
pushdown. Partial pushdown means that a unit (i.e. multiple selects combined
|
|
with UNION/EXCEPT/INTERSECT operators) cannot be pushed down to
|
|
the storage engine as a whole but some particular selects of this unit can.
|
|
For example,
|
|
SELECT a FROM federated.t1 -- can be pushed down to Federated
|
|
UNION
|
|
SELECT b FROM local.t2 -- cannot be pushed down, executed locally
|
|
|
|
See more comments at the description of find_select_handler_inner()
|
|
|
|
*/
|
|
select_handler *
|
|
find_partial_select_handler(THD *thd, SELECT_LEX *select_lex,
|
|
SELECT_LEX_UNIT *select_lex_unit)
|
|
{
|
|
return find_select_handler_inner(thd, select_lex, select_lex_unit);
|
|
}
|
|
|
|
|
|
/**
|
|
An entry point to single-unit select (a select without UNION).
|
|
|
|
@param thd thread handler
|
|
@param rref_pointer_array a reference to ref_pointer_array of
|
|
the top-level select_lex for this query
|
|
@param tables list of all tables used in this query.
|
|
The tables have been pre-opened.
|
|
@param fields list of items in SELECT list of the top-level
|
|
select
|
|
e.g. SELECT a, b, c FROM t1 will have Item_field
|
|
for a, b and c in this list.
|
|
@param conds top level item of an expression representing
|
|
WHERE clause of the top level select
|
|
@param og_num total number of ORDER BY and GROUP BY clauses
|
|
arguments
|
|
@param order linked list of ORDER BY agruments
|
|
@param group linked list of GROUP BY arguments
|
|
@param having top level item of HAVING expression
|
|
@param proc_param list of PROCEDUREs
|
|
@param select_options select options (BIG_RESULT, etc)
|
|
@param result an instance of result set handling class.
|
|
This object is responsible for send result
|
|
set rows to the client or inserting them
|
|
into a table.
|
|
@param select_lex the only SELECT_LEX of this query
|
|
@param unit top-level UNIT of this query
|
|
UNIT is an artificial object created by the
|
|
parser for every SELECT clause.
|
|
e.g.
|
|
SELECT * FROM t1 WHERE a1 IN (SELECT * FROM t2)
|
|
has 2 unions.
|
|
|
|
@retval
|
|
FALSE success
|
|
@retval
|
|
TRUE an error
|
|
*/
|
|
|
|
bool
|
|
mysql_select(THD *thd, TABLE_LIST *tables, List<Item> &fields, COND *conds,
|
|
uint og_num, ORDER *order, ORDER *group, Item *having,
|
|
ORDER *proc_param, ulonglong select_options, select_result *result,
|
|
SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex)
|
|
{
|
|
int err= 0;
|
|
bool free_join= 1, exec_error= 0;
|
|
DBUG_ENTER("mysql_select");
|
|
|
|
if (!fields.is_empty())
|
|
select_lex->context.resolve_in_select_list= true;
|
|
JOIN *join;
|
|
if (select_lex->join != 0)
|
|
{
|
|
join= select_lex->join;
|
|
/*
|
|
is it single SELECT in derived table, called in derived table
|
|
creation
|
|
*/
|
|
if (select_lex->get_linkage() != DERIVED_TABLE_TYPE ||
|
|
(select_options & SELECT_DESCRIBE))
|
|
{
|
|
if (select_lex->get_linkage() != GLOBAL_OPTIONS_TYPE)
|
|
{
|
|
/*
|
|
Original join tabs might be overwritten at first
|
|
subselect execution. So we need to restore them.
|
|
*/
|
|
Item_subselect *subselect= select_lex->master_unit()->item;
|
|
if (subselect && subselect->is_uncacheable() && join->reinit())
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
else
|
|
{
|
|
if (!join->prepared &&
|
|
(err= join->prepare(tables, conds, og_num, order, false, group,
|
|
having, proc_param, select_lex, unit)))
|
|
{
|
|
goto err;
|
|
}
|
|
}
|
|
}
|
|
free_join= 0;
|
|
join->select_options= select_options;
|
|
}
|
|
else
|
|
{
|
|
if (thd->lex->describe)
|
|
select_options|= SELECT_DESCRIBE;
|
|
|
|
/*
|
|
When in EXPLAIN, delay deleting the joins so that they are still
|
|
available when we're producing EXPLAIN EXTENDED warning text.
|
|
*/
|
|
if (select_options & SELECT_DESCRIBE)
|
|
free_join= 0;
|
|
|
|
if (!(join= new (thd->mem_root) JOIN(thd, fields, select_options, result)))
|
|
DBUG_RETURN(TRUE);
|
|
THD_STAGE_INFO(thd, stage_init);
|
|
thd->lex->used_tables=0;
|
|
if (!join->prepared &&
|
|
(err= join->prepare(tables, conds, og_num, order, false, group, having,
|
|
proc_param, select_lex, unit)))
|
|
{
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
/* Look for a table owned by an engine with the select_handler interface */
|
|
select_lex->pushdown_select= find_single_select_handler(thd, select_lex);
|
|
|
|
if ((err= join->optimize()))
|
|
{
|
|
goto err; // 1
|
|
}
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
join->conds_history= join->conds;
|
|
join->having_history= (join->having?join->having:join->tmp_having);
|
|
}
|
|
|
|
if (unlikely(thd->is_error()))
|
|
goto err;
|
|
|
|
exec_error= join->exec();
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
select_lex->where= join->conds_history;
|
|
select_lex->having= join->having_history;
|
|
}
|
|
|
|
err:
|
|
thd->push_final_warnings();
|
|
if (select_lex->pushdown_select)
|
|
{
|
|
delete select_lex->pushdown_select;
|
|
select_lex->pushdown_select= NULL;
|
|
}
|
|
|
|
if (free_join)
|
|
{
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
err|= (int)(select_lex->cleanup());
|
|
DBUG_RETURN(exec_error || err || thd->is_error());
|
|
}
|
|
DBUG_RETURN(exec_error || err);
|
|
}
|
|
|
|
|
|
/**
|
|
Approximate how many records are going to be returned by this table in this
|
|
select with this key.
|
|
|
|
@param thd Thread handle
|
|
@param select Select to be examined
|
|
@param table The table of interest
|
|
@param keys The keys of interest
|
|
@param limit Maximum number of rows of interest
|
|
@param quick_count Pointer to where we want the estimate written
|
|
|
|
@return Status
|
|
@retval false Success
|
|
@retval true Error
|
|
|
|
*/
|
|
static bool get_quick_record_count(THD *thd, SQL_SELECT *select,
|
|
TABLE *table,
|
|
const key_map *keys,ha_rows limit,
|
|
ha_rows *quick_count)
|
|
{
|
|
quick_select_return error;
|
|
DBUG_ENTER("get_quick_record_count");
|
|
uchar buff[STACK_BUFF_ALLOC];
|
|
if (unlikely(check_stack_overrun(thd, STACK_MIN_SIZE, buff)))
|
|
DBUG_RETURN(false); // Fatal error flag is set
|
|
if (select)
|
|
{
|
|
select->head=table;
|
|
table->reginfo.impossible_range=0;
|
|
/*
|
|
EQ_FUNC and EQUAL_FUNC already sent unusable key notes (if any)
|
|
during update_ref_and_keys(). Have only other functions raise notes
|
|
from can_optimize_scalar_range().
|
|
*/
|
|
error= select->test_quick_select(thd, *(key_map *)keys, (table_map) 0,
|
|
limit, 0, FALSE,
|
|
TRUE, /* remove_where_parts*/
|
|
FALSE,
|
|
Item_func::BITMAP_EXCEPT_ANY_EQUALITY);
|
|
|
|
if (error == SQL_SELECT::OK)
|
|
{
|
|
if (select->quick)
|
|
{
|
|
/*
|
|
opt_range_condition_rows was updated in test_quick_select to be
|
|
the smallest number of rows in any range.
|
|
select->quick->records is the number of rows in range with
|
|
smallest cost.
|
|
*/
|
|
DBUG_ASSERT(select->quick->records >=
|
|
table->opt_range_condition_rows);
|
|
*quick_count= select->quick->records;
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
if (error == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
table->reginfo.impossible_range=1;
|
|
*quick_count= 0;
|
|
DBUG_RETURN(false);
|
|
}
|
|
if (unlikely(error == SQL_SELECT::ERROR))
|
|
DBUG_RETURN(true);
|
|
|
|
DBUG_PRINT("warning",("Couldn't use record count on const keypart"));
|
|
}
|
|
*quick_count= HA_POS_ERROR;
|
|
DBUG_RETURN(false); /* This shouldn't happen */
|
|
}
|
|
|
|
/*
|
|
This structure is used to collect info on potentially sargable
|
|
predicates in order to check whether they become sargable after
|
|
reading const tables.
|
|
We form a bitmap of indexes that can be used for sargable predicates.
|
|
Only such indexes are involved in range analysis.
|
|
*/
|
|
struct SARGABLE_PARAM
|
|
{
|
|
Field *field; /* field against which to check sargability */
|
|
Item **arg_value; /* values of potential keys for lookups */
|
|
uint num_values; /* number of values in the above array */
|
|
};
|
|
|
|
|
|
/*
|
|
Mark all tables inside a join nest as constant.
|
|
|
|
@detail This is called when there is a local "Impossible WHERE" inside
|
|
a multi-table LEFT JOIN.
|
|
*/
|
|
|
|
void mark_join_nest_as_const(JOIN *join,
|
|
TABLE_LIST *join_nest,
|
|
table_map *found_const_table_map,
|
|
uint *const_count)
|
|
{
|
|
List_iterator<TABLE_LIST> it(join_nest->nested_join->join_list);
|
|
TABLE_LIST *tbl;
|
|
Json_writer_object emb_obj(join->thd);
|
|
Json_writer_object trace_obj(join->thd, "mark_join_nest_as_const");
|
|
Json_writer_array trace_array(join->thd, "members");
|
|
|
|
while ((tbl= it++))
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
mark_join_nest_as_const(join, tbl, found_const_table_map, const_count);
|
|
continue;
|
|
}
|
|
JOIN_TAB *tab= tbl->table->reginfo.join_tab;
|
|
|
|
if (!(join->const_table_map & tab->table->map))
|
|
{
|
|
tab->type= JT_CONST;
|
|
tab->info= ET_IMPOSSIBLE_ON_CONDITION;
|
|
tab->table->const_table= 1;
|
|
|
|
join->const_table_map|= tab->table->map;
|
|
*found_const_table_map|= tab->table->map;
|
|
set_position(join,(*const_count)++,tab,(KEYUSE*) 0);
|
|
mark_as_null_row(tab->table); // All fields are NULL
|
|
|
|
trace_array.add_table_name(tab->table);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Get the condition that can be used to do range analysis/partition
|
|
pruning/etc
|
|
|
|
@detail
|
|
Figure out which condition we can use:
|
|
- For INNER JOIN, we use the WHERE,
|
|
- "t1 LEFT JOIN t2 ON ..." uses t2's ON expression
|
|
- "t1 LEFT JOIN (...) ON ..." uses the join nest's ON expression.
|
|
*/
|
|
|
|
static Item **get_sargable_cond(JOIN *join, TABLE *table)
|
|
{
|
|
Item **retval;
|
|
if (table->pos_in_table_list->on_expr)
|
|
{
|
|
/*
|
|
This is an inner table from a single-table LEFT JOIN, "t1 LEFT JOIN
|
|
t2 ON cond". Use the condition cond.
|
|
*/
|
|
retval= &table->pos_in_table_list->on_expr;
|
|
}
|
|
else if (table->pos_in_table_list->embedding &&
|
|
!table->pos_in_table_list->embedding->sj_on_expr)
|
|
{
|
|
/*
|
|
This is the inner side of a multi-table outer join. Use the
|
|
appropriate ON expression.
|
|
*/
|
|
retval= &(table->pos_in_table_list->embedding->on_expr);
|
|
}
|
|
else
|
|
{
|
|
/* The table is not inner wrt some LEFT JOIN. Use the WHERE clause */
|
|
retval= &join->conds;
|
|
}
|
|
return retval;
|
|
}
|
|
|
|
|
|
/**
|
|
Calculate the best possible join and initialize the join structure.
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Fatal error
|
|
*/
|
|
|
|
static bool
|
|
make_join_statistics(JOIN *join, List<TABLE_LIST> &tables_list,
|
|
DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
int error= 0;
|
|
uint i,table_count,const_count,key;
|
|
uint sort_space;
|
|
table_map found_const_table_map, all_table_map;
|
|
key_map const_ref, eq_part;
|
|
bool has_expensive_keyparts;
|
|
TABLE **table_vector;
|
|
JOIN_TAB *stat,*stat_end,*s,**stat_ref, **stat_vector;
|
|
KEYUSE *keyuse,*start_keyuse;
|
|
table_map outer_join=0;
|
|
table_map no_rows_const_tables= 0;
|
|
SARGABLE_PARAM *sargables= 0;
|
|
List_iterator<TABLE_LIST> ti(tables_list);
|
|
TABLE_LIST *tables;
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("make_join_statistics");
|
|
|
|
table_count=join->table_count;
|
|
|
|
/*
|
|
best_extension_by_limited_search need sort space for 2POSITIION
|
|
objects per remaining table, which gives us
|
|
2*(T + T-1 + T-2 + T-3...1 POSITIONS) = 2*(T+1)/2*T = (T*T+T)
|
|
*/
|
|
join->sort_space= sort_space= (table_count*table_count + table_count);
|
|
|
|
/*
|
|
best_positions is ok to allocate with alloc() as we copy things to it with
|
|
memcpy()
|
|
*/
|
|
|
|
if (!multi_alloc_root(join->thd->mem_root,
|
|
&stat, sizeof(JOIN_TAB)*(table_count),
|
|
&stat_ref, sizeof(JOIN_TAB*)* MAX_TABLES,
|
|
&stat_vector, sizeof(JOIN_TAB*)* (table_count +1),
|
|
&table_vector, sizeof(TABLE*)*(table_count*2),
|
|
&join->positions, sizeof(POSITION)*(table_count + 1),
|
|
&join->sort_positions, sizeof(POSITION)*(sort_space),
|
|
&join->best_positions,
|
|
sizeof(POSITION)*(table_count + 1),
|
|
NullS))
|
|
DBUG_RETURN(1);
|
|
|
|
/* The following should be optimized to only clear critical things */
|
|
bzero((void*)stat, sizeof(JOIN_TAB)* table_count);
|
|
join->top_join_tab_count= table_count;
|
|
|
|
/* Initialize POSITION objects */
|
|
for (i=0 ; i <= table_count ; i++)
|
|
(void) new ((char*) (join->positions + i)) POSITION;
|
|
for (i=0 ; i < sort_space ; i++)
|
|
(void) new ((char*) (join->sort_positions + i)) POSITION;
|
|
|
|
join->best_ref= stat_vector;
|
|
|
|
stat_end=stat+table_count;
|
|
found_const_table_map= all_table_map=0;
|
|
const_count=0;
|
|
|
|
for (s= stat, i= 0; (tables= ti++); s++, i++)
|
|
{
|
|
TABLE_LIST *embedding= tables->embedding;
|
|
TABLE *table= tables->table;
|
|
stat_vector[i]=s;
|
|
table_vector[i]= s->table= table;
|
|
s->tab_list= tables;
|
|
table->pos_in_table_list= tables;
|
|
error= tables->fetch_number_of_rows();
|
|
/* Calculate table->use_stat_records */
|
|
set_statistics_for_table(join->thd, table);
|
|
bitmap_clear_all(&table->cond_set);
|
|
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
const bool all_partitions_pruned_away= table->all_partitions_pruned_away;
|
|
#else
|
|
const bool all_partitions_pruned_away= FALSE;
|
|
#endif
|
|
|
|
DBUG_EXECUTE_IF("bug11747970_raise_error",
|
|
{ join->thd->set_killed(KILL_QUERY_HARD); });
|
|
if (unlikely(error))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
goto error;
|
|
}
|
|
table->opt_range_keys.clear_all();
|
|
table->intersect_keys.clear_all();
|
|
table->reginfo.join_tab=s;
|
|
table->reginfo.not_exists_optimize=0;
|
|
bzero(table->const_key_parts, sizeof(key_part_map)*table->s->total_keys);
|
|
all_table_map|= table->map;
|
|
s->preread_init_done= FALSE;
|
|
s->join=join;
|
|
|
|
s->dependent= tables->dep_tables;
|
|
if (tables->schema_table)
|
|
{
|
|
/*
|
|
Information schema is slow and we don't know how many rows we will
|
|
find. Be setting a moderate ammount of rows we are more likely
|
|
to have it materialized if needed.
|
|
*/
|
|
table->file->stats.records= table->used_stat_records= 100;
|
|
}
|
|
table->opt_range_condition_rows= table->stat_records();
|
|
|
|
s->on_expr_ref= &tables->on_expr;
|
|
if (*s->on_expr_ref)
|
|
{
|
|
/* s is the only inner table of an outer join */
|
|
if (!table->is_filled_at_execution() &&
|
|
((!table->file->stats.records &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
|
|
all_partitions_pruned_away) && !embedding)
|
|
{ // Empty table
|
|
s->dependent= 0; // Ignore LEFT JOIN depend.
|
|
no_rows_const_tables |= table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
continue;
|
|
}
|
|
outer_join|= table->map;
|
|
s->embedding_map= 0;
|
|
for (;embedding; embedding= embedding->embedding)
|
|
s->embedding_map|= embedding->nested_join->nj_map;
|
|
continue;
|
|
}
|
|
if (embedding)
|
|
{
|
|
/* s belongs to a nested join, maybe to several embedded joins */
|
|
s->embedding_map= 0;
|
|
bool inside_an_outer_join= FALSE;
|
|
do
|
|
{
|
|
/*
|
|
If this is a semi-join nest, skip it, and proceed upwards. Maybe
|
|
we're in some outer join nest
|
|
*/
|
|
if (embedding->sj_on_expr)
|
|
{
|
|
embedding= embedding->embedding;
|
|
continue;
|
|
}
|
|
inside_an_outer_join= TRUE;
|
|
NESTED_JOIN *nested_join= embedding->nested_join;
|
|
s->embedding_map|=nested_join->nj_map;
|
|
s->dependent|= embedding->dep_tables;
|
|
embedding= embedding->embedding;
|
|
outer_join|= nested_join->used_tables;
|
|
}
|
|
while (embedding);
|
|
if (inside_an_outer_join)
|
|
continue;
|
|
}
|
|
if (!table->is_filled_at_execution() &&
|
|
(table->s->system ||
|
|
(table->file->stats.records <= 1 &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT)) ||
|
|
all_partitions_pruned_away) &&
|
|
!s->dependent &&
|
|
!table->fulltext_searched && !join->no_const_tables)
|
|
{
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
no_rows_const_tables |= table->map;
|
|
}
|
|
|
|
/* SJ-Materialization handling: */
|
|
if (table->pos_in_table_list->jtbm_subselect &&
|
|
table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
no_rows_const_tables |= table->map;
|
|
table->file->stats.records= 0;
|
|
}
|
|
}
|
|
|
|
stat_vector[i]=0;
|
|
join->outer_join=outer_join;
|
|
|
|
if (join->outer_join)
|
|
{
|
|
/*
|
|
Build transitive closure for relation 'to be dependent on'.
|
|
This will speed up the plan search for many cases with outer joins,
|
|
as well as allow us to catch illegal cross references/
|
|
Warshall's algorithm is used to build the transitive closure.
|
|
As we use bitmaps to represent the relation the complexity
|
|
of the algorithm is O((number of tables)^2).
|
|
|
|
The classic form of the Warshall's algorithm would look like:
|
|
for (i= 0; i < table_count; i++)
|
|
{
|
|
for (j= 0; j < table_count; j++)
|
|
{
|
|
for (k= 0; k < table_count; k++)
|
|
{
|
|
if (bitmap_is_set(stat[j].dependent, i) &&
|
|
bitmap_is_set(stat[i].dependent, k))
|
|
bitmap_set_bit(stat[j].dependent, k);
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
|
|
for (s= stat ; s < stat_end ; s++)
|
|
{
|
|
TABLE *table= s->table;
|
|
for (JOIN_TAB *t= stat ; t < stat_end ; t++)
|
|
{
|
|
if (t->dependent & table->map)
|
|
t->dependent |= table->reginfo.join_tab->dependent;
|
|
}
|
|
if (outer_join & s->table->map)
|
|
s->table->maybe_null= 1;
|
|
}
|
|
/* Catch illegal cross references for outer joins */
|
|
for (i= 0, s= stat ; i < table_count ; i++, s++)
|
|
{
|
|
if (s->dependent & s->table->map)
|
|
{
|
|
join->table_count=0; // Don't use join->table
|
|
my_message(ER_WRONG_OUTER_JOIN,
|
|
ER_THD(join->thd, ER_WRONG_OUTER_JOIN), MYF(0));
|
|
goto error;
|
|
}
|
|
s->key_dependent= s->dependent;
|
|
}
|
|
}
|
|
|
|
{
|
|
for (JOIN_TAB *s= stat ; s < stat_end ; s++)
|
|
{
|
|
TABLE_LIST *tl= s->table->pos_in_table_list;
|
|
if (tl->embedding && tl->embedding->sj_subq_pred)
|
|
{
|
|
s->embedded_dependent= tl->embedding->original_subq_pred_used_tables;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_table_dependencies(thd, stat, join->table_count);
|
|
|
|
if (join->conds || outer_join)
|
|
{
|
|
if (update_ref_and_keys(thd, keyuse_array, stat, join->table_count,
|
|
join->conds, ~outer_join, join->select_lex, &sargables))
|
|
goto error;
|
|
/*
|
|
Keyparts without prefixes may be useful if this JOIN is a subquery, and
|
|
if the subquery may be executed via the IN-EXISTS strategy.
|
|
*/
|
|
bool skip_unprefixed_keyparts=
|
|
!(join->is_in_subquery() &&
|
|
join->unit->item->get_IN_subquery()->test_strategy(SUBS_IN_TO_EXISTS));
|
|
|
|
if (keyuse_array->elements &&
|
|
sort_and_filter_keyuse(join, keyuse_array,
|
|
skip_unprefixed_keyparts))
|
|
goto error;
|
|
DBUG_EXECUTE("opt", print_keyuse_array(keyuse_array););
|
|
if (unlikely(thd->trace_started()))
|
|
print_keyuse_array_for_trace(thd, keyuse_array);
|
|
}
|
|
|
|
join->const_table_map= no_rows_const_tables;
|
|
join->const_tables= const_count;
|
|
eliminate_tables(join);
|
|
join->const_table_map &= ~no_rows_const_tables;
|
|
const_count= join->const_tables;
|
|
found_const_table_map= join->const_table_map;
|
|
|
|
/* Read tables with 0 or 1 rows (system tables) */
|
|
for (POSITION *p_pos=join->positions, *p_end=p_pos+const_count;
|
|
p_pos < p_end ;
|
|
p_pos++)
|
|
{
|
|
s= p_pos->table;
|
|
if (! (s->table->map & join->eliminated_tables))
|
|
{
|
|
int tmp;
|
|
s->type=JT_SYSTEM;
|
|
join->const_table_map|=s->table->map;
|
|
if ((tmp=join_read_const_table(join->thd, s, p_pos)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
{
|
|
found_const_table_map|= s->table->map;
|
|
s->table->pos_in_table_list->optimized_away= TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* loop until no more const tables are found */
|
|
int ref_changed;
|
|
do
|
|
{
|
|
ref_changed = 0;
|
|
more_const_tables_found:
|
|
|
|
/*
|
|
We only have to loop from stat_vector + const_count as
|
|
set_position() will move all const_tables first in stat_vector
|
|
*/
|
|
|
|
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
|
|
{
|
|
TABLE *table= s->table;
|
|
|
|
if (table->is_filled_at_execution())
|
|
continue;
|
|
|
|
/*
|
|
If equi-join condition by a key is null rejecting and after a
|
|
substitution of a const table the key value happens to be null
|
|
then we can state that there are no matches for this equi-join.
|
|
*/
|
|
if ((keyuse= s->keyuse) && *s->on_expr_ref && !s->embedding_map &&
|
|
!(table->map & join->eliminated_tables))
|
|
{
|
|
/*
|
|
When performing an outer join operation if there are no matching rows
|
|
for the single row of the outer table all the inner tables are to be
|
|
null complemented and thus considered as constant tables.
|
|
Here we apply this consideration to the case of outer join operations
|
|
with a single inner table only because the case with nested tables
|
|
would require a more thorough analysis.
|
|
TODO. Apply single row substitution to null complemented inner tables
|
|
for nested outer join operations.
|
|
*/
|
|
while (keyuse->table == table)
|
|
{
|
|
if (!keyuse->is_for_hash_join() &&
|
|
!(keyuse->val->used_tables() & ~join->const_table_map) &&
|
|
keyuse->val->is_null() && keyuse->null_rejecting)
|
|
{
|
|
s->type= JT_CONST;
|
|
s->table->const_table= 1;
|
|
mark_as_null_row(table);
|
|
found_const_table_map|= table->map;
|
|
join->const_table_map|= table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
goto more_const_tables_found;
|
|
}
|
|
keyuse++;
|
|
}
|
|
}
|
|
|
|
if (s->dependent) // If dependent on some table
|
|
{
|
|
// All dep. must be constants
|
|
if (s->dependent & ~(found_const_table_map))
|
|
continue;
|
|
if (table->file->stats.records <= 1L &&
|
|
(table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
|
|
!table->pos_in_table_list->embedding &&
|
|
!((outer_join & table->map) &&
|
|
(*s->on_expr_ref)->is_expensive()))
|
|
{ // system table
|
|
int tmp= 0;
|
|
s->type= JT_SYSTEM;
|
|
join->const_table_map|=table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
if ((tmp= join_read_const_table(join->thd, s,
|
|
join->positions+const_count-1)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
found_const_table_map|= table->map;
|
|
continue;
|
|
}
|
|
}
|
|
/* check if table can be read by key or table only uses const refs */
|
|
if ((keyuse=s->keyuse))
|
|
{
|
|
s->type= JT_REF;
|
|
while (keyuse->table == table)
|
|
{
|
|
if (keyuse->is_for_hash_join())
|
|
{
|
|
keyuse++;
|
|
continue;
|
|
}
|
|
start_keyuse=keyuse;
|
|
key=keyuse->key;
|
|
s->keys.set_bit(key); // TODO: remove this ?
|
|
|
|
const_ref.clear_all();
|
|
eq_part.clear_all();
|
|
has_expensive_keyparts= false;
|
|
do
|
|
{
|
|
if (keyuse->val->type() != Item::NULL_ITEM &&
|
|
!keyuse->optimize &&
|
|
keyuse->keypart != FT_KEYPART)
|
|
{
|
|
if (!((~found_const_table_map) & keyuse->used_tables))
|
|
{
|
|
const_ref.set_bit(keyuse->keypart);
|
|
if (keyuse->val->is_expensive())
|
|
has_expensive_keyparts= true;
|
|
}
|
|
eq_part.set_bit(keyuse->keypart);
|
|
}
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
TABLE_LIST *embedding= table->pos_in_table_list->embedding;
|
|
/*
|
|
TODO (low priority): currently we ignore the const tables that
|
|
are within a semi-join nest which is within an outer join nest.
|
|
The effect of this is that we don't do const substitution for
|
|
such tables.
|
|
*/
|
|
KEY *keyinfo= table->key_info + key;
|
|
uint key_parts= table->actual_n_key_parts(keyinfo);
|
|
if (eq_part.is_prefix(key_parts) &&
|
|
!table->fulltext_searched &&
|
|
(!embedding || (embedding->sj_on_expr && !embedding->embedding)))
|
|
{
|
|
key_map base_part, base_const_ref, base_eq_part;
|
|
base_part.set_prefix(keyinfo->user_defined_key_parts);
|
|
base_const_ref= const_ref;
|
|
base_const_ref.intersect(base_part);
|
|
base_eq_part= eq_part;
|
|
base_eq_part.intersect(base_part);
|
|
|
|
/*
|
|
We can read the const record if we are using a full unique key and
|
|
if the table is not an unopened to be materialized table/view.
|
|
*/
|
|
if ((table->actual_key_flags(keyinfo) & HA_NOSAME) &&
|
|
(!s->table->pos_in_table_list->is_materialized_derived() ||
|
|
s->table->pos_in_table_list->fill_me))
|
|
{
|
|
|
|
if (base_const_ref == base_eq_part &&
|
|
!has_expensive_keyparts &&
|
|
!((outer_join & table->map) &&
|
|
(*s->on_expr_ref)->is_expensive()))
|
|
{ // Found everything for ref.
|
|
int tmp;
|
|
ref_changed = 1;
|
|
s->type= JT_CONST;
|
|
join->const_table_map|=table->map;
|
|
set_position(join,const_count++,s,start_keyuse);
|
|
/* create_ref_for_key will set s->table->const_table */
|
|
if (create_ref_for_key(join, s, start_keyuse, FALSE,
|
|
found_const_table_map))
|
|
goto error;
|
|
if ((tmp=join_read_const_table(join->thd, s,
|
|
join->positions+const_count-1)))
|
|
{
|
|
if (tmp > 0)
|
|
goto error; // Fatal error
|
|
}
|
|
else
|
|
found_const_table_map|= table->map;
|
|
break;
|
|
}
|
|
}
|
|
else if (base_const_ref == base_eq_part)
|
|
s->const_keys.set_bit(key);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (ref_changed);
|
|
|
|
join->sort_by_table= get_sort_by_table(join->order, join->group_list,
|
|
join->select_lex->leaf_tables,
|
|
join->const_table_map);
|
|
join->limit_shortcut_applicable= join_limit_shortcut_is_applicable(join);
|
|
/*
|
|
Update info on indexes that can be used for search lookups as
|
|
reading const tables may has added new sargable predicates.
|
|
*/
|
|
if (const_count && sargables)
|
|
{
|
|
for( ; sargables->field ; sargables++)
|
|
{
|
|
Field *field= sargables->field;
|
|
JOIN_TAB *join_tab= field->table->reginfo.join_tab;
|
|
key_map possible_keys= field->key_start;
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
bool is_const= 1;
|
|
for (uint j=0; j < sargables->num_values; j++)
|
|
is_const&= sargables->arg_value[j]->const_item();
|
|
if (is_const)
|
|
join_tab[0].const_keys.merge(possible_keys);
|
|
}
|
|
}
|
|
|
|
join->impossible_where= false;
|
|
if (join->conds && const_count)
|
|
{
|
|
Item* &conds= join->conds;
|
|
COND_EQUAL *orig_cond_equal = join->cond_equal;
|
|
|
|
conds->update_used_tables();
|
|
conds= conds->remove_eq_conds(join->thd, &join->cond_value, true);
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
join->cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
|
|
join->select_lex->where= conds;
|
|
if (join->cond_value == Item::COND_FALSE)
|
|
{
|
|
join->impossible_where= true;
|
|
conds= (Item*) Item_false;
|
|
}
|
|
|
|
join->cond_equal= NULL;
|
|
if (conds)
|
|
{
|
|
if (conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
join->cond_equal= (&((Item_cond_and *) conds)->m_cond_equal);
|
|
else if (conds->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) conds)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
if (!join->cond_equal)
|
|
join->cond_equal= new COND_EQUAL;
|
|
join->cond_equal->current_level.empty();
|
|
join->cond_equal->current_level.push_back((Item_equal*) conds,
|
|
join->thd->mem_root);
|
|
}
|
|
}
|
|
|
|
if (orig_cond_equal != join->cond_equal)
|
|
{
|
|
/*
|
|
If join->cond_equal has changed all references to it from COND_EQUAL
|
|
objects associated with ON expressions must be updated.
|
|
*/
|
|
for (JOIN_TAB **pos=stat_vector+const_count ; (s= *pos) ; pos++)
|
|
{
|
|
if (*s->on_expr_ref && s->cond_equal &&
|
|
s->cond_equal->upper_levels == orig_cond_equal)
|
|
s->cond_equal->upper_levels= join->cond_equal;
|
|
}
|
|
}
|
|
}
|
|
|
|
join->join_tab= stat;
|
|
join->make_notnull_conds_for_range_scans();
|
|
|
|
/* Calc how many (possible) matched records in each table */
|
|
|
|
/*
|
|
Todo: add a function so that we can add these Json_writer_objects
|
|
easily.
|
|
Another way would be to enclose them in a scope {};
|
|
*/
|
|
{
|
|
Json_writer_object rows_estimation_wrapper(thd);
|
|
Json_writer_array rows_estimation(thd, "rows_estimation");
|
|
|
|
for (s=stat ; s < stat_end ; s++)
|
|
{
|
|
s->startup_cost= 0;
|
|
if (s->type == JT_SYSTEM || s->type == JT_CONST)
|
|
{
|
|
Json_writer_object table_records(thd);
|
|
ha_rows records= 1;
|
|
if (s->type == JT_SYSTEM || s->table->file->stats.records == 0)
|
|
records= s->table->file->stats.records;
|
|
/* zero or one matching row */
|
|
s->records= s->found_records= records;
|
|
s->records_init= s->records_out= rows2double(records);
|
|
s->read_time=1.0;
|
|
table_records.add_table_name(s).
|
|
add("rows", s->found_records).
|
|
add("cost", s->read_time).
|
|
add("table_type", s->type == JT_CONST ?
|
|
"const" : "system");
|
|
continue;
|
|
}
|
|
/*
|
|
Approximate found rows and time to read them
|
|
Update found_records, records, read_time and other scan related
|
|
variables
|
|
*/
|
|
s->estimate_scan_time();
|
|
|
|
if (s->table->is_splittable())
|
|
s->add_keyuses_for_splitting();
|
|
|
|
/*
|
|
Add to stat->const_keys those indexes for which all group fields or
|
|
all select distinct fields participate in one index.
|
|
*/
|
|
add_group_and_distinct_keys(join, s);
|
|
|
|
/* This will be updated in calculate_cond_selectivity_for_table() */
|
|
s->table->set_cond_selectivity(1.0);
|
|
DBUG_ASSERT(s->table->used_stat_records == 0 ||
|
|
s->table->cond_selectivity <=
|
|
s->table->opt_range_condition_rows /
|
|
s->table->used_stat_records);
|
|
/*
|
|
Perform range analysis if there are keys it could use (1).
|
|
Don't do range analysis for materialized subqueries (2).
|
|
Don't do range analysis for materialized derived tables/views (3)
|
|
*/
|
|
if ((!s->const_keys.is_clear_all() ||
|
|
!bitmap_is_clear_all(&s->table->cond_set)) && // (1)
|
|
!s->table->is_filled_at_execution() && // (2)
|
|
!(s->table->pos_in_table_list->derived && // (3)
|
|
s->table->pos_in_table_list->is_materialized_derived())) // (3)
|
|
{
|
|
bool impossible_range= FALSE;
|
|
ha_rows records= HA_ROWS_MAX;
|
|
SQL_SELECT *select= 0;
|
|
Item **sargable_cond= NULL;
|
|
if (!s->const_keys.is_clear_all())
|
|
{
|
|
sargable_cond= get_sargable_cond(join, s->table);
|
|
bool is_sargable_cond_of_where= sargable_cond == &join->conds;
|
|
|
|
select= make_select(s->table, found_const_table_map,
|
|
found_const_table_map,
|
|
*sargable_cond,
|
|
(SORT_INFO*) 0, 1, &error);
|
|
if (!select)
|
|
goto error;
|
|
if (get_quick_record_count(join->thd, select, s->table,
|
|
&s->const_keys, join->row_limit, &records))
|
|
{
|
|
/* There was an error in test_quick_select */
|
|
delete select;
|
|
goto error;
|
|
}
|
|
/*
|
|
Range analyzer might have modified the condition. Put it the new
|
|
condition to where we got it from.
|
|
*/
|
|
*sargable_cond= select->cond;
|
|
|
|
if (is_sargable_cond_of_where &&
|
|
join->conds && join->conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) (join->conds))->functype() ==
|
|
Item_func::COND_AND_FUNC)
|
|
join->cond_equal= &((Item_cond_and*) (join->conds))->m_cond_equal;
|
|
|
|
s->quick=select->quick;
|
|
select->quick=0;
|
|
s->needed_reg=select->needed_reg;
|
|
impossible_range= records == 0 && s->table->reginfo.impossible_range;
|
|
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_USE_ROWID_FILTER))
|
|
s->table->init_cost_info_for_usable_range_rowid_filters(join->thd);
|
|
}
|
|
if (!impossible_range)
|
|
{
|
|
if (!sargable_cond)
|
|
sargable_cond= get_sargable_cond(join, s->table);
|
|
if (join->thd->variables.optimizer_use_condition_selectivity > 1)
|
|
calculate_cond_selectivity_for_table(join->thd, s->table,
|
|
sargable_cond);
|
|
if (s->table->reginfo.impossible_range)
|
|
{
|
|
impossible_range= TRUE;
|
|
records= 0;
|
|
}
|
|
}
|
|
if (impossible_range)
|
|
{
|
|
/*
|
|
Impossible WHERE or ON expression
|
|
In case of ON, we mark that the we match one empty NULL row.
|
|
In case of WHERE, don't set found_const_table_map to get the
|
|
caller to abort with a zero row result.
|
|
*/
|
|
TABLE_LIST *emb= s->table->pos_in_table_list->embedding;
|
|
if (emb && !emb->sj_on_expr && !*s->on_expr_ref)
|
|
{
|
|
/* Mark all tables in a multi-table join nest as const */
|
|
mark_join_nest_as_const(join, emb, &found_const_table_map,
|
|
&const_count);
|
|
}
|
|
else
|
|
{
|
|
double records= 1;
|
|
join->const_table_map|= s->table->map;
|
|
set_position(join,const_count++,s,(KEYUSE*) 0);
|
|
s->type= JT_CONST;
|
|
s->table->const_table= 1;
|
|
if (*s->on_expr_ref)
|
|
{
|
|
/* Generate empty row */
|
|
s->info= ET_IMPOSSIBLE_ON_CONDITION;
|
|
found_const_table_map|= s->table->map;
|
|
mark_as_null_row(s->table); // All fields are NULL
|
|
records= 0;
|
|
}
|
|
s->records_init= s->records_out= records;
|
|
s->found_records= s->records= (ha_rows)records;
|
|
}
|
|
}
|
|
if (records != HA_POS_ERROR)
|
|
{
|
|
s->found_records=records;
|
|
s->read_time= s->quick ? s->quick->read_time : 0.0;
|
|
}
|
|
if (select)
|
|
delete select;
|
|
else
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
add_table_scan_values_to_trace(thd, s);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
add_table_scan_values_to_trace(thd, s);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (pull_out_semijoin_tables(join))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
join->join_tab=stat;
|
|
join->top_join_tab_count= table_count;
|
|
join->map2table=stat_ref;
|
|
join->table= table_vector;
|
|
join->const_tables=const_count;
|
|
join->found_const_table_map=found_const_table_map;
|
|
|
|
if (join->const_tables != join->table_count)
|
|
optimize_keyuse(join, keyuse_array);
|
|
|
|
DBUG_ASSERT(!join->conds || !join->cond_equal ||
|
|
!join->cond_equal->current_level.elements ||
|
|
(join->conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) (join->conds))->functype() ==
|
|
Item_func::COND_AND_FUNC &&
|
|
join->cond_equal ==
|
|
&((Item_cond_and *) (join->conds))->m_cond_equal) ||
|
|
(join->conds->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) (join->conds))->functype() ==
|
|
Item_func::MULT_EQUAL_FUNC &&
|
|
join->cond_equal->current_level.elements == 1 &&
|
|
join->cond_equal->current_level.head() == join->conds));
|
|
|
|
if (optimize_semijoin_nests(join, all_table_map))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
{
|
|
SELECT_LEX_UNIT *unit= join->select_lex->master_unit();
|
|
|
|
/* Find an optimal join order of the non-constant tables. */
|
|
if (join->const_tables != join->table_count)
|
|
{
|
|
if (choose_plan(join, all_table_map & ~join->const_table_map, 0))
|
|
goto error;
|
|
|
|
#ifdef HAVE_valgrind
|
|
// JOIN::positions holds the current query plan. We've already
|
|
// made the plan choice, so we should only use JOIN::best_positions
|
|
for (uint k=join->const_tables; k < join->table_count; k++)
|
|
MEM_UNDEFINED(&join->positions[k], sizeof(join->positions[k]));
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
memcpy((uchar*) join->best_positions,(uchar*) join->positions,
|
|
sizeof(POSITION)*join->const_tables);
|
|
join->join_record_count= 1.0;
|
|
/* Const tables are part of optimizer setup and not counted in cost */
|
|
join->best_read=0.0;
|
|
}
|
|
|
|
if (!(join->select_options & SELECT_DESCRIBE) &&
|
|
unit->derived && unit->derived->is_materialized_derived())
|
|
{
|
|
/*
|
|
Calculate estimated number of rows for materialized derived
|
|
table/view.
|
|
*/
|
|
double records= 1.0;
|
|
ha_rows rows;
|
|
for (i= 0; i < join->table_count ; i++)
|
|
if (double rr= join->best_positions[i].records_read)
|
|
records= COST_MULT(records, rr);
|
|
rows= double_to_rows(records);
|
|
set_if_smaller(rows, unit->lim.get_select_limit());
|
|
join->select_lex->increase_derived_records(rows);
|
|
}
|
|
}
|
|
|
|
if (join->choose_subquery_plan(all_table_map & ~join->const_table_map))
|
|
goto error;
|
|
|
|
DEBUG_SYNC(join->thd, "inside_make_join_statistics");
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
/*
|
|
Need to clean up join_tab from TABLEs in case of error.
|
|
They won't get cleaned up by JOIN::cleanup() because JOIN::join_tab
|
|
may not be assigned yet by this function (which is building join_tab).
|
|
Dangling TABLE::reginfo.join_tab may cause part_of_refkey to choke.
|
|
*/
|
|
{
|
|
TABLE_LIST *tmp_table;
|
|
List_iterator<TABLE_LIST> ti2(tables_list);
|
|
while ((tmp_table= ti2++))
|
|
tmp_table->table->reginfo.join_tab= NULL;
|
|
}
|
|
DBUG_RETURN (1);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Check with keys are used and with tables references with tables
|
|
Updates in stat:
|
|
keys Bitmap of all used keys
|
|
const_keys Bitmap of all keys with may be used with quick_select
|
|
keyuse Pointer to possible keys
|
|
*****************************************************************************/
|
|
|
|
|
|
/**
|
|
Merge new key definitions to old ones, remove those not used in both.
|
|
|
|
This is called for OR between different levels.
|
|
|
|
That is, the function operates on an array of KEY_FIELD elements which has
|
|
two parts:
|
|
|
|
$LEFT_PART $RIGHT_PART
|
|
+-----------------------+-----------------------+
|
|
start new_fields end
|
|
|
|
$LEFT_PART and $RIGHT_PART are arrays that have KEY_FIELD elements for two
|
|
parts of the OR condition. Our task is to produce an array of KEY_FIELD
|
|
elements that would correspond to "$LEFT_PART OR $RIGHT_PART".
|
|
|
|
The rules for combining elements are as follows:
|
|
|
|
(keyfieldA1 AND keyfieldA2 AND ...) OR (keyfieldB1 AND keyfieldB2 AND ...)=
|
|
|
|
= AND_ij (keyfieldA_i OR keyfieldB_j)
|
|
|
|
We discard all (keyfieldA_i OR keyfieldB_j) that refer to different
|
|
fields. For those referring to the same field, the logic is as follows:
|
|
|
|
t.keycol=expr1 OR t.keycol=expr2 -> (since expr1 and expr2 are different
|
|
we can't produce a single equality,
|
|
so produce nothing)
|
|
|
|
t.keycol=expr1 OR t.keycol=expr1 -> t.keycol=expr1
|
|
|
|
t.keycol=expr1 OR t.keycol IS NULL -> t.keycol=expr1, and also set
|
|
KEY_OPTIMIZE_REF_OR_NULL flag
|
|
|
|
The last one is for ref_or_null access. We have handling for this special
|
|
because it's needed for evaluating IN subqueries that are internally
|
|
transformed into
|
|
|
|
@code
|
|
EXISTS(SELECT * FROM t1 WHERE t1.key=outer_ref_field or t1.key IS NULL)
|
|
@endcode
|
|
|
|
See add_key_fields() for discussion of what is and_level.
|
|
|
|
KEY_FIELD::null_rejecting is processed as follows: @n
|
|
result has null_rejecting=true if it is set for both ORed references.
|
|
for example:
|
|
- (t2.key = t1.field OR t2.key = t1.field) -> null_rejecting=true
|
|
- (t2.key = t1.field OR t2.key <=> t1.field) -> null_rejecting=false
|
|
|
|
@todo
|
|
The result of this is that we're missing some 'ref' accesses.
|
|
OptimizerTeam: Fix this
|
|
*/
|
|
|
|
static KEY_FIELD *
|
|
merge_key_fields(KEY_FIELD *start,KEY_FIELD *new_fields,KEY_FIELD *end,
|
|
uint and_level)
|
|
{
|
|
if (start == new_fields)
|
|
return start; // Impossible or
|
|
if (new_fields == end)
|
|
return start; // No new fields, skip all
|
|
|
|
KEY_FIELD *first_free=new_fields;
|
|
|
|
/* Mark all found fields in old array */
|
|
for (; new_fields != end ; new_fields++)
|
|
{
|
|
for (KEY_FIELD *old=start ; old != first_free ; old++)
|
|
{
|
|
if (old->field == new_fields->field)
|
|
{
|
|
/*
|
|
NOTE: below const_item() call really works as "!used_tables()", i.e.
|
|
it can return FALSE where it is feasible to make it return TRUE.
|
|
|
|
The cause is as follows: Some of the tables are already known to be
|
|
const tables (the detection code is in make_join_statistics(),
|
|
above the update_ref_and_keys() call), but we didn't propagate
|
|
information about this: TABLE::const_table is not set to TRUE, and
|
|
Item::update_used_tables() hasn't been called for each item.
|
|
The result of this is that we're missing some 'ref' accesses.
|
|
TODO: OptimizerTeam: Fix this
|
|
*/
|
|
if (!new_fields->val->const_item())
|
|
{
|
|
/*
|
|
If the value matches, we can use the key reference.
|
|
If not, we keep it until we have examined all new values
|
|
*/
|
|
if (old->val->eq(new_fields->val, old->field->binary()))
|
|
{
|
|
old->level= and_level;
|
|
old->optimize= ((old->optimize & new_fields->optimize &
|
|
KEY_OPTIMIZE_EXISTS) |
|
|
((old->optimize | new_fields->optimize) &
|
|
KEY_OPTIMIZE_REF_OR_NULL));
|
|
old->null_rejecting= (old->null_rejecting &&
|
|
new_fields->null_rejecting);
|
|
}
|
|
}
|
|
else if (old->eq_func && new_fields->eq_func &&
|
|
old->val->eq_by_collation(new_fields->val,
|
|
old->field->binary(),
|
|
old->field->charset()))
|
|
|
|
{
|
|
old->level= and_level;
|
|
old->optimize= ((old->optimize & new_fields->optimize &
|
|
KEY_OPTIMIZE_EXISTS) |
|
|
((old->optimize | new_fields->optimize) &
|
|
KEY_OPTIMIZE_REF_OR_NULL));
|
|
old->null_rejecting= (old->null_rejecting &&
|
|
new_fields->null_rejecting);
|
|
}
|
|
else if (old->eq_func && new_fields->eq_func &&
|
|
((old->val->can_eval_in_optimize() && old->val->is_null()) ||
|
|
(!new_fields->val->is_expensive() &&
|
|
new_fields->val->is_null())))
|
|
{
|
|
/* field = expression OR field IS NULL */
|
|
old->level= and_level;
|
|
if (old->field->maybe_null())
|
|
{
|
|
old->optimize= KEY_OPTIMIZE_REF_OR_NULL;
|
|
/* The referred expression can be NULL: */
|
|
old->null_rejecting= 0;
|
|
}
|
|
/*
|
|
Remember the NOT NULL value unless the value does not depend
|
|
on other tables.
|
|
*/
|
|
if (!old->val->used_tables() && !old->val->is_expensive() &&
|
|
old->val->is_null())
|
|
old->val= new_fields->val;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We are comparing two different const. In this case we can't
|
|
use a key-lookup on this so it's better to remove the value
|
|
and let the range optimzier handle it
|
|
*/
|
|
if (old == --first_free) // If last item
|
|
break;
|
|
*old= *first_free; // Remove old value
|
|
old--; // Retry this value
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Remove all not used items */
|
|
for (KEY_FIELD *old=start ; old != first_free ;)
|
|
{
|
|
if (old->level != and_level)
|
|
{ // Not used in all levels
|
|
if (old == --first_free)
|
|
break;
|
|
*old= *first_free; // Remove old value
|
|
continue;
|
|
}
|
|
old++;
|
|
}
|
|
return first_free;
|
|
}
|
|
|
|
|
|
/*
|
|
Given a field, return its index in semi-join's select list, or UINT_MAX
|
|
|
|
DESCRIPTION
|
|
Given a field, we find its table; then see if the table is within a
|
|
semi-join nest and if the field was in select list of the subselect.
|
|
If it was, we return field's index in the select list. The value is used
|
|
by LooseScan strategy.
|
|
*/
|
|
|
|
static uint get_semi_join_select_list_index(Field *field)
|
|
{
|
|
uint res= UINT_MAX;
|
|
TABLE_LIST *emb_sj_nest;
|
|
if ((emb_sj_nest= field->table->pos_in_table_list->embedding) &&
|
|
emb_sj_nest->sj_on_expr)
|
|
{
|
|
Item_in_subselect *subq_pred= emb_sj_nest->sj_subq_pred;
|
|
st_select_lex *subq_lex= subq_pred->unit->first_select();
|
|
uint ncols= subq_pred->left_exp()->cols();
|
|
if (ncols == 1)
|
|
{
|
|
Item *sel_item= subq_lex->ref_pointer_array[0];
|
|
if (sel_item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)sel_item)->field->eq(field))
|
|
{
|
|
res= 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (uint i= 0; i < ncols; i++)
|
|
{
|
|
Item *sel_item= subq_lex->ref_pointer_array[i];
|
|
if (sel_item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)sel_item)->field->eq(field))
|
|
{
|
|
res= i;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
Add a possible key to array of possible keys if it's usable as a key
|
|
|
|
@param key_fields Pointer to add key, if usable
|
|
@param and_level And level, to be stored in KEY_FIELD
|
|
@param cond Condition predicate
|
|
@param field Field used in comparision
|
|
@param eq_func True if we used =, <=> or IS NULL
|
|
@param value Value used for comparison with field
|
|
@param num_values Number of values[] that we are comparing against
|
|
@param usable_tables Tables which can be used for key optimization
|
|
@param sargables IN/OUT Array of found sargable candidates
|
|
@param row_col_no if = n that > 0 then field is compared only
|
|
against the n-th component of row values
|
|
|
|
@note
|
|
If we are doing a NOT NULL comparison on a NOT NULL field in a outer join
|
|
table, we store this to be able to do not exists optimization later.
|
|
|
|
@returns
|
|
*key_fields is incremented if we stored a key in the array
|
|
*/
|
|
|
|
static void
|
|
add_key_field(JOIN *join,
|
|
KEY_FIELD **key_fields,uint and_level, Item_bool_func *cond,
|
|
Field *field, bool eq_func, Item **value, uint num_values,
|
|
table_map usable_tables, SARGABLE_PARAM **sargables,
|
|
uint row_col_no= 0)
|
|
{
|
|
uint optimize= 0;
|
|
if (eq_func &&
|
|
((join->is_allowed_hash_join_access() &&
|
|
field->hash_join_is_possible() &&
|
|
!(field->table->pos_in_table_list->is_materialized_derived() &&
|
|
field->table->is_created())) ||
|
|
(field->table->pos_in_table_list->is_materialized_derived() &&
|
|
!field->table->is_created() && !(field->flags & BLOB_FLAG))))
|
|
{
|
|
optimize= KEY_OPTIMIZE_EQ;
|
|
}
|
|
else if (!(field->flags & PART_KEY_FLAG))
|
|
{
|
|
// Don't remove column IS NULL on a LEFT JOIN table
|
|
if (eq_func && (*value)->type() == Item::NULL_ITEM &&
|
|
field->table->maybe_null && !field->null_ptr)
|
|
{
|
|
optimize= KEY_OPTIMIZE_EXISTS;
|
|
DBUG_ASSERT(num_values == 1);
|
|
}
|
|
}
|
|
if (optimize != KEY_OPTIMIZE_EXISTS)
|
|
{
|
|
table_map used_tables=0;
|
|
bool optimizable=0;
|
|
for (uint i=0; i<num_values; i++)
|
|
{
|
|
Item *curr_val;
|
|
if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
|
|
{
|
|
Item_row *value_tuple= (Item_row *) (value[i]->real_item());
|
|
curr_val= value_tuple->element_index(row_col_no - 1);
|
|
}
|
|
else
|
|
curr_val= value[i];
|
|
table_map value_used_tables= curr_val->used_tables();
|
|
used_tables|= value_used_tables;
|
|
if (!(value_used_tables & (field->table->map | RAND_TABLE_BIT)))
|
|
optimizable=1;
|
|
}
|
|
if (!optimizable)
|
|
return;
|
|
if (!(usable_tables & field->table->map))
|
|
{
|
|
if (!eq_func || (*value)->type() != Item::NULL_ITEM ||
|
|
!field->table->maybe_null || field->null_ptr)
|
|
return; // Can't use left join optimize
|
|
optimize= KEY_OPTIMIZE_EXISTS;
|
|
}
|
|
else
|
|
{
|
|
JOIN_TAB *stat=field->table->reginfo.join_tab;
|
|
key_map possible_keys=field->get_possible_keys();
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
stat[0].keys.merge(possible_keys); // Add possible keys
|
|
|
|
/*
|
|
Save the following cases:
|
|
Field op constant
|
|
Field LIKE constant where constant doesn't start with a wildcard
|
|
Field = field2 where field2 is in a different table
|
|
Field op formula
|
|
Field IS NULL
|
|
Field IS NOT NULL
|
|
Field BETWEEN ...
|
|
Field IN ...
|
|
*/
|
|
if (field->flags & PART_KEY_FLAG)
|
|
{
|
|
stat[0].key_dependent|= used_tables;
|
|
if (field->key_start.bits_set())
|
|
stat[0].key_start_dependent= 1;
|
|
}
|
|
|
|
bool is_const=1;
|
|
for (uint i=0; i<num_values; i++)
|
|
{
|
|
Item *curr_val;
|
|
if (row_col_no && value[i]->real_item()->type() == Item::ROW_ITEM)
|
|
{
|
|
Item_row *value_tuple= (Item_row *) (value[i]->real_item());
|
|
curr_val= value_tuple->element_index(row_col_no - 1);
|
|
}
|
|
else
|
|
curr_val= value[i];
|
|
if (!(is_const&= curr_val->const_item()))
|
|
break;
|
|
}
|
|
if (is_const)
|
|
{
|
|
stat[0].const_keys.merge(possible_keys);
|
|
bitmap_set_bit(&field->table->cond_set, field->field_index);
|
|
}
|
|
else if (!eq_func)
|
|
{
|
|
/*
|
|
Save info to be able check whether this predicate can be
|
|
considered as sargable for range analysis after reading const tables.
|
|
We do not save info about equalities as update_const_equal_items
|
|
will take care of updating info on keys from sargable equalities.
|
|
*/
|
|
(*sargables)--;
|
|
(*sargables)->field= field;
|
|
(*sargables)->arg_value= value;
|
|
(*sargables)->num_values= num_values;
|
|
}
|
|
if (!eq_func) // eq_func is NEVER true when num_values > 1
|
|
return;
|
|
}
|
|
}
|
|
/*
|
|
For the moment eq_func is always true. This slot is reserved for future
|
|
extensions where we want to remembers other things than just eq comparisons
|
|
*/
|
|
DBUG_ASSERT(eq_func);
|
|
/* Store possible eq field */
|
|
(*key_fields)->field= field;
|
|
(*key_fields)->eq_func= eq_func;
|
|
(*key_fields)->val= *value;
|
|
(*key_fields)->cond= cond;
|
|
(*key_fields)->level= and_level;
|
|
(*key_fields)->optimize= optimize;
|
|
/*
|
|
If the condition we are analyzing is NULL-rejecting and at least
|
|
one side of the equalities is NULLable, mark the KEY_FIELD object as
|
|
null-rejecting. This property is used by:
|
|
- add_not_null_conds() to add "column IS NOT NULL" conditions
|
|
- best_access_path() to produce better estimates for NULL-able unique keys.
|
|
*/
|
|
{
|
|
if ((cond->functype() == Item_func::EQ_FUNC ||
|
|
cond->functype() == Item_func::MULT_EQUAL_FUNC) &&
|
|
((*value)->maybe_null() || field->real_maybe_null()))
|
|
(*key_fields)->null_rejecting= true;
|
|
else
|
|
(*key_fields)->null_rejecting= false;
|
|
}
|
|
(*key_fields)->cond_guard= NULL;
|
|
|
|
(*key_fields)->sj_pred_no= get_semi_join_select_list_index(field);
|
|
(*key_fields)++;
|
|
}
|
|
|
|
/**
|
|
Add possible keys to array of possible keys originated from a simple
|
|
predicate.
|
|
|
|
@param key_fields Pointer to add key, if usable
|
|
@param and_level And level, to be stored in KEY_FIELD
|
|
@param cond Condition predicate
|
|
@param field_item Field item used for comparison
|
|
@param eq_func True if we used =, <=> or IS NULL
|
|
@param value Value used for comparison with field_item
|
|
@param num_values Number of values[] that we are comparing against
|
|
@param usable_tables Tables which can be used for key optimization
|
|
@param sargables IN/OUT Array of found sargable candidates
|
|
@param row_col_no if = n that > 0 then field is compared only
|
|
against the n-th component of row values
|
|
|
|
@note
|
|
If field items f1 and f2 belong to the same multiple equality and
|
|
a key is added for f1, the the same key is added for f2.
|
|
|
|
@returns
|
|
*key_fields is incremented if we stored a key in the array
|
|
*/
|
|
|
|
static void
|
|
add_key_equal_fields(JOIN *join, KEY_FIELD **key_fields, uint and_level,
|
|
Item_bool_func *cond, Item *field_item,
|
|
bool eq_func, Item **val,
|
|
uint num_values, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables, uint row_col_no= 0)
|
|
{
|
|
Field *field= ((Item_field *) (field_item->real_item()))->field;
|
|
add_key_field(join, key_fields, and_level, cond, field,
|
|
eq_func, val, num_values, usable_tables, sargables,
|
|
row_col_no);
|
|
Item_equal *item_equal= field_item->get_item_equal();
|
|
if (item_equal)
|
|
{
|
|
/*
|
|
Add to the set of possible key values every substitution of
|
|
the field for an equal field included into item_equal
|
|
*/
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
while (it++)
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
if (!field->eq(equal_field))
|
|
{
|
|
add_key_field(join, key_fields, and_level, cond, equal_field,
|
|
eq_func, val, num_values, usable_tables,
|
|
sargables, row_col_no);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Check if an expression is a non-outer field.
|
|
|
|
Checks if an expression is a field and belongs to the current select.
|
|
|
|
@param field Item expression to check
|
|
|
|
@return boolean
|
|
@retval TRUE the expression is a local field
|
|
@retval FALSE it's something else
|
|
*/
|
|
|
|
static bool
|
|
is_local_field (Item *field)
|
|
{
|
|
return field->real_item()->type() == Item::FIELD_ITEM
|
|
&& !(field->used_tables() & OUTER_REF_TABLE_BIT)
|
|
&& !((Item_field *)field->real_item())->get_depended_from();
|
|
}
|
|
|
|
|
|
/*
|
|
In this and other functions, and_level is a number that is ever-growing
|
|
and is different for the contents of every AND or OR clause. For example,
|
|
when processing clause
|
|
|
|
(a AND b AND c) OR (x AND y)
|
|
|
|
we'll have
|
|
* KEY_FIELD elements for (a AND b AND c) are assigned and_level=1
|
|
* KEY_FIELD elements for (x AND y) are assigned and_level=2
|
|
* OR operation is performed, and whatever elements are left after it are
|
|
assigned and_level=3.
|
|
|
|
The primary reason for having and_level attribute is the OR operation which
|
|
uses and_level to mark KEY_FIELDs that should get into the result of the OR
|
|
operation
|
|
*/
|
|
|
|
|
|
void
|
|
Item_cond_and::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator_fast<Item> li(*argument_list());
|
|
KEY_FIELD *org_key_fields= *key_fields;
|
|
|
|
Item *item;
|
|
while ((item=li++))
|
|
item->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
for (; org_key_fields != *key_fields ; org_key_fields++)
|
|
org_key_fields->level= *and_level;
|
|
}
|
|
|
|
|
|
void
|
|
Item_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator_fast<Item> li(*argument_list());
|
|
KEY_FIELD *org_key_fields= *key_fields;
|
|
|
|
(*and_level)++;
|
|
(li++)->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
KEY_FIELD *start_key_fields= *key_fields;
|
|
(*and_level)++;
|
|
item->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
*key_fields= merge_key_fields(org_key_fields,start_key_fields,
|
|
*key_fields, ++(*and_level));
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_trig_cond::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/*
|
|
Subquery optimization: Conditions that are pushed down into subqueries
|
|
are wrapped into Item_func_trig_cond. We process the wrapped condition
|
|
but need to set cond_guard for KEYUSE elements generated from it.
|
|
*/
|
|
if (!join->group_list && !join->order &&
|
|
join->unit->item &&
|
|
join->unit->item->substype() == Item_subselect::IN_SUBS &&
|
|
!join->unit->is_unit_op())
|
|
{
|
|
KEY_FIELD *save= *key_fields;
|
|
args[0]->add_key_fields(join, key_fields, and_level, usable_tables,
|
|
sargables);
|
|
// Indicate that this ref access candidate is for subquery lookup:
|
|
for (; save != *key_fields; save++)
|
|
save->cond_guard= get_trig_var();
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_between::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/*
|
|
Build list of possible keys for 'a BETWEEN low AND high'.
|
|
It is handled similar to the equivalent condition
|
|
'a >= low AND a <= high':
|
|
*/
|
|
Item_field *field_item;
|
|
bool equal_func= false;
|
|
uint num_values= 2;
|
|
|
|
bool binary_cmp= (args[0]->real_item()->type() == Item::FIELD_ITEM)
|
|
? ((Item_field*) args[0]->real_item())->field->binary()
|
|
: true;
|
|
/*
|
|
Additional optimization: If 'low = high':
|
|
Handle as if the condition was "t.key = low".
|
|
*/
|
|
if (!negated && args[1]->eq(args[2], binary_cmp))
|
|
{
|
|
equal_func= true;
|
|
num_values= 1;
|
|
}
|
|
|
|
/*
|
|
Append keys for 'field <cmp> value[]' if the
|
|
condition is of the form::
|
|
'<field> BETWEEN value[1] AND value[2]'
|
|
*/
|
|
if (is_local_field(args[0]))
|
|
{
|
|
field_item= (Item_field *) (args[0]->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, equal_func, &args[1],
|
|
num_values, usable_tables, sargables);
|
|
}
|
|
/*
|
|
Append keys for 'value[0] <cmp> field' if the
|
|
condition is of the form:
|
|
'value[0] BETWEEN field1 AND field2'
|
|
*/
|
|
for (uint i= 1; i <= num_values; i++)
|
|
{
|
|
if (is_local_field(args[i]))
|
|
{
|
|
field_item= (Item_field *) (args[i]->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, equal_func, args,
|
|
1, usable_tables, sargables);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_in::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
DBUG_ASSERT(arg_count != 2);
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[0]->real_item()), false,
|
|
args + 1, arg_count - 1, usable_tables, sargables);
|
|
}
|
|
else if (key_item()->type() == Item::ROW_ITEM &&
|
|
!(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
Item_row *key_row= (Item_row *) key_item();
|
|
Item **key_col= key_row->addr(0);
|
|
uint row_cols= key_row->cols();
|
|
for (uint i= 0; i < row_cols; i++, key_col++)
|
|
{
|
|
if (is_local_field(*key_col))
|
|
{
|
|
Item_field *field_item= (Item_field *)((*key_col)->real_item());
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
field_item, false, args + 1, arg_count - 1,
|
|
usable_tables, sargables, i + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_ne::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (!(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
/*
|
|
QQ: perhaps test for !is_local_field(args[1]) is not really needed here.
|
|
Other comparison functions, e.g. Item_func_le, Item_func_gt, etc,
|
|
do not have this test. See Item_bool_func2::add_key_fieldoptimize_op().
|
|
Check with the optimizer team.
|
|
*/
|
|
if (is_local_field(args[0]) && !is_local_field(args[1]))
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[0]->real_item()), false,
|
|
&args[1], 1, usable_tables, sargables);
|
|
/*
|
|
QQ: perhaps test for !is_local_field(args[0]) is not really needed here.
|
|
*/
|
|
if (is_local_field(args[1]) && !is_local_field(args[0]))
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) (args[1]->real_item()), false,
|
|
&args[0], 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_like::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
if (is_local_field(args[0]) && with_sargable_pattern())
|
|
{
|
|
/*
|
|
SELECT * FROM t1 WHERE field LIKE const_pattern
|
|
const_pattern starts with a non-wildcard character
|
|
*/
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(), false,
|
|
args + 1, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_bool_func2::add_key_fields_optimize_op(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level,
|
|
table_map usable_tables,
|
|
SARGABLE_PARAM **sargables,
|
|
bool equal_func)
|
|
{
|
|
/* If item is of type 'field op field/constant' add it to key_fields */
|
|
if (is_local_field(args[0]))
|
|
{
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(), equal_func,
|
|
args + 1, 1, usable_tables, sargables);
|
|
}
|
|
if (is_local_field(args[1]))
|
|
{
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[1]->real_item(), equal_func,
|
|
args, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_func_null_predicate::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level,
|
|
table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
/* column_name IS [NOT] NULL */
|
|
if (is_local_field(args[0]) && !(used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
Item *tmp= new (join->thd->mem_root) Item_null(join->thd);
|
|
if (unlikely(!tmp)) // Should never be true
|
|
return;
|
|
add_key_equal_fields(join, key_fields, *and_level, this,
|
|
(Item_field*) args[0]->real_item(),
|
|
functype() == Item_func::ISNULL_FUNC,
|
|
&tmp, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Item_equal::add_key_fields(JOIN *join, KEY_FIELD **key_fields,
|
|
uint *and_level, table_map usable_tables,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
Item *const_item2= get_const();
|
|
Item_equal_fields_iterator it(*this);
|
|
if (const_item2)
|
|
{
|
|
|
|
/*
|
|
For each field field1 from item_equal consider the equality
|
|
field1=const_item as a condition allowing an index access of the table
|
|
with field1 by the keys value of field1.
|
|
*/
|
|
while (it++)
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
add_key_field(join, key_fields, *and_level, this, equal_field,
|
|
TRUE, &const_item2, 1, usable_tables, sargables);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Consider all pairs of different fields included into item_equal.
|
|
For each of them (field1, field1) consider the equality
|
|
field1=field2 as a condition allowing an index access of the table
|
|
with field1 by the keys value of field2.
|
|
*/
|
|
Item_equal_fields_iterator fi(*this);
|
|
while (fi++)
|
|
{
|
|
Field *field= fi.get_curr_field();
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
Field *equal_field= it.get_curr_field();
|
|
if (!field->eq(equal_field))
|
|
{
|
|
add_key_field(join, key_fields, *and_level, this, field,
|
|
TRUE, &item, 1, usable_tables,
|
|
sargables);
|
|
}
|
|
}
|
|
it.rewind();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static inline uint
|
|
max_part_bit(key_part_map bits)
|
|
{
|
|
if (bits == 0)
|
|
return 0;
|
|
/* find first zero bit by reverting all bits and find first bit */
|
|
return my_find_first_bit(~(ulonglong) bits);
|
|
}
|
|
|
|
|
|
/**
|
|
Add a new keuse to the specified array of KEYUSE objects
|
|
|
|
@param[in,out] keyuse_array array of keyuses to be extended
|
|
@param[in] key_field info on the key use occurrence
|
|
@param[in] key key number for the keyse to be added
|
|
@param[in] part key part for the keyuse to be added
|
|
|
|
@note
|
|
The function builds a new KEYUSE object for a key use utilizing the info
|
|
on the left and right parts of the given key use extracted from the
|
|
structure key_field, the key number and key part for this key use.
|
|
The built object is added to the dynamic array keyuse_array.
|
|
|
|
@retval 0 the built object is successfully added
|
|
@retval 1 otherwise
|
|
*/
|
|
|
|
static bool
|
|
add_keyuse(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field,
|
|
uint key, uint part)
|
|
{
|
|
KEYUSE keyuse;
|
|
Field *field= key_field->field;
|
|
|
|
keyuse.table= field->table;
|
|
keyuse.val= key_field->val;
|
|
keyuse.key= key;
|
|
if (!is_hash_join_key_no(key))
|
|
{
|
|
keyuse.keypart=part;
|
|
keyuse.keypart_map= (key_part_map) 1 << part;
|
|
}
|
|
else
|
|
{
|
|
keyuse.keypart= field->field_index;
|
|
keyuse.keypart_map= (key_part_map) 0;
|
|
}
|
|
keyuse.used_tables= key_field->val->used_tables();
|
|
keyuse.optimize= key_field->optimize & KEY_OPTIMIZE_REF_OR_NULL;
|
|
keyuse.ref_table_rows= 0;
|
|
keyuse.null_rejecting= key_field->null_rejecting;
|
|
keyuse.cond_guard= key_field->cond_guard;
|
|
keyuse.sj_pred_no= key_field->sj_pred_no;
|
|
keyuse.validity_ref= 0;
|
|
return (insert_dynamic(keyuse_array,(uchar*) &keyuse));
|
|
}
|
|
|
|
|
|
/*
|
|
Add all keys with uses 'field' for some keypart
|
|
If field->and_level != and_level then only mark key_part as const_part
|
|
|
|
RETURN
|
|
0 - OK
|
|
1 - Out of memory.
|
|
*/
|
|
|
|
static LEX_CSTRING equal_str= { STRING_WITH_LEN("=") };
|
|
|
|
static bool add_key_part(DYNAMIC_ARRAY *keyuse_array, KEY_FIELD *key_field)
|
|
{
|
|
Field *field=key_field->field;
|
|
TABLE *form= field->table;
|
|
THD *thd= form->in_use;
|
|
|
|
if (key_field->eq_func && !(key_field->optimize & KEY_OPTIMIZE_EXISTS))
|
|
{
|
|
for (uint key=0 ; key < form->s->keys ; key++)
|
|
{
|
|
if (!(form->keys_in_use_for_query.is_set(key)))
|
|
continue;
|
|
if (form->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT ||
|
|
form->key_info[key].algorithm == HA_KEY_ALG_RTREE ||
|
|
form->key_info[key].algorithm == HA_KEY_ALG_VECTOR)
|
|
continue;
|
|
|
|
KEY *keyinfo= form->key_info+key;
|
|
uint key_parts= form->actual_n_key_parts(keyinfo);
|
|
for (uint part=0 ; part < key_parts ; part++)
|
|
{
|
|
if (field->eq(form->key_info[key].key_part[part].field))
|
|
{
|
|
Data_type_compatibility compat=
|
|
field->can_optimize_keypart_ref(key_field->cond, key_field->val);
|
|
if (compat == Data_type_compatibility::OK)
|
|
{
|
|
if (add_keyuse(keyuse_array, key_field, key, part))
|
|
return TRUE;
|
|
}
|
|
else if (thd->give_notes_for_unusable_keys())
|
|
{
|
|
field->raise_note_cannot_use_key_part(thd, key, part,
|
|
equal_str,
|
|
key_field->cond->compare_collation(),
|
|
key_field->val,
|
|
compat);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (field->hash_join_is_possible() &&
|
|
(key_field->optimize & KEY_OPTIMIZE_EQ) &&
|
|
key_field->val->used_tables())
|
|
{
|
|
if (field->can_optimize_hash_join(key_field->cond, key_field->val) !=
|
|
Data_type_compatibility::OK)
|
|
return false;
|
|
if (form->is_splittable())
|
|
form->add_splitting_info_for_key_field(key_field);
|
|
/*
|
|
If a key use is extracted from an equi-join predicate then it is
|
|
added not only as a key use for every index whose component can
|
|
be evalusted utilizing this key use, but also as a key use for
|
|
hash join. Such key uses are marked with a special key number.
|
|
*/
|
|
if (add_keyuse(keyuse_array, key_field, get_hash_join_key_no(), 0))
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
static bool
|
|
add_ft_keys(DYNAMIC_ARRAY *keyuse_array,
|
|
JOIN_TAB *stat,COND *cond,table_map usable_tables)
|
|
{
|
|
Item_func_match *cond_func=NULL;
|
|
|
|
if (!cond)
|
|
return FALSE;
|
|
|
|
if (cond->type() == Item::FUNC_ITEM)
|
|
{
|
|
Item_func *func=(Item_func *)cond;
|
|
Item_func::Functype functype= func->functype();
|
|
if (functype == Item_func::FT_FUNC)
|
|
cond_func=(Item_func_match *)cond;
|
|
else if (func->argument_count() == 2)
|
|
{
|
|
Item *arg0=(Item *)(func->arguments()[0]),
|
|
*arg1=(Item *)(func->arguments()[1]);
|
|
if (arg1->const_item() && arg1->cols() == 1 &&
|
|
arg0->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) arg0)->functype() == Item_func::FT_FUNC &&
|
|
((functype == Item_func::GE_FUNC && arg1->val_real() > 0) ||
|
|
(functype == Item_func::GT_FUNC && arg1->val_real() >=0)))
|
|
cond_func= (Item_func_match *) arg0;
|
|
else if (arg0->const_item() && arg0->cols() == 1 &&
|
|
arg1->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) arg1)->functype() == Item_func::FT_FUNC &&
|
|
((functype == Item_func::LE_FUNC && arg0->val_real() > 0) ||
|
|
(functype == Item_func::LT_FUNC && arg0->val_real() >=0)))
|
|
cond_func= (Item_func_match *) arg1;
|
|
}
|
|
}
|
|
else if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
if (add_ft_keys(keyuse_array,stat,item,usable_tables))
|
|
return TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!cond_func || cond_func->key == NO_SUCH_KEY ||
|
|
!(usable_tables & cond_func->table->map))
|
|
return FALSE;
|
|
|
|
KEYUSE keyuse;
|
|
keyuse.table= cond_func->table;
|
|
keyuse.val = cond_func;
|
|
keyuse.key = cond_func->key;
|
|
keyuse.keypart= FT_KEYPART;
|
|
keyuse.used_tables=cond_func->key_item()->used_tables();
|
|
keyuse.optimize= 0;
|
|
keyuse.ref_table_rows= 0;
|
|
keyuse.keypart_map= 0;
|
|
keyuse.sj_pred_no= UINT_MAX;
|
|
keyuse.validity_ref= 0;
|
|
keyuse.null_rejecting= FALSE;
|
|
return insert_dynamic(keyuse_array,(uchar*) &keyuse);
|
|
}
|
|
|
|
|
|
static int
|
|
sort_keyuse(KEYUSE *a,KEYUSE *b)
|
|
{
|
|
int res;
|
|
if (a->table->tablenr != b->table->tablenr)
|
|
return (int) (a->table->tablenr - b->table->tablenr);
|
|
if (a->key != b->key)
|
|
return (int) (a->key - b->key);
|
|
if (a->key == MAX_KEY && b->key == MAX_KEY &&
|
|
a->used_tables != b->used_tables)
|
|
return (int) ((ulong) a->used_tables - (ulong) b->used_tables);
|
|
if (a->keypart != b->keypart)
|
|
return (int) (a->keypart - b->keypart);
|
|
// Place const values before other ones
|
|
if ((res= MY_TEST((a->used_tables & ~OUTER_REF_TABLE_BIT)) -
|
|
MY_TEST((b->used_tables & ~OUTER_REF_TABLE_BIT))))
|
|
return res;
|
|
/* Place rows that are not 'OPTIMIZE_REF_OR_NULL' first */
|
|
return (int) ((a->optimize & KEY_OPTIMIZE_REF_OR_NULL) -
|
|
(b->optimize & KEY_OPTIMIZE_REF_OR_NULL));
|
|
}
|
|
|
|
|
|
/*
|
|
Add to KEY_FIELD array all 'ref' access candidates within nested join.
|
|
|
|
This function populates KEY_FIELD array with entries generated from the
|
|
ON condition of the given nested join, and does the same for nested joins
|
|
contained within this nested join.
|
|
|
|
@param[in] nested_join_table Nested join pseudo-table to process
|
|
@param[in,out] end End of the key field array
|
|
@param[in,out] and_level And-level
|
|
@param[in,out] sargables Array of found sargable candidates
|
|
|
|
|
|
@note
|
|
We can add accesses to the tables that are direct children of this nested
|
|
join (1), and are not inner tables w.r.t their neighbours (2).
|
|
|
|
Example for #1 (outer brackets pair denotes nested join this function is
|
|
invoked for):
|
|
@code
|
|
... LEFT JOIN (t1 LEFT JOIN (t2 ... ) ) ON cond
|
|
@endcode
|
|
Example for #2:
|
|
@code
|
|
... LEFT JOIN (t1 LEFT JOIN t2 ) ON cond
|
|
@endcode
|
|
In examples 1-2 for condition cond, we can add 'ref' access candidates to
|
|
t1 only.
|
|
Example #3:
|
|
@code
|
|
... LEFT JOIN (t1, t2 LEFT JOIN t3 ON inner_cond) ON cond
|
|
@endcode
|
|
Here we can add 'ref' access candidates for t1 and t2, but not for t3.
|
|
*/
|
|
|
|
static void add_key_fields_for_nj(JOIN *join, TABLE_LIST *nested_join_table,
|
|
KEY_FIELD **end, uint *and_level,
|
|
SARGABLE_PARAM **sargables)
|
|
{
|
|
List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
|
|
List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
|
|
bool have_another = FALSE;
|
|
table_map tables= 0;
|
|
TABLE_LIST *table;
|
|
DBUG_ASSERT(nested_join_table->nested_join);
|
|
|
|
while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
|
|
(table= li++))))
|
|
{
|
|
if (table->nested_join)
|
|
{
|
|
if (!table->on_expr)
|
|
{
|
|
/* It's a semi-join nest. Walk into it as if it wasn't a nest */
|
|
have_another= TRUE;
|
|
li2= li;
|
|
li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
|
|
}
|
|
else
|
|
add_key_fields_for_nj(join, table, end, and_level, sargables);
|
|
}
|
|
else
|
|
if (!table->on_expr)
|
|
tables |= table->table->map;
|
|
}
|
|
if (nested_join_table->on_expr)
|
|
nested_join_table->on_expr->add_key_fields(join, end, and_level, tables,
|
|
sargables);
|
|
}
|
|
|
|
|
|
void count_cond_for_nj(SELECT_LEX *sel, TABLE_LIST *nested_join_table)
|
|
{
|
|
List_iterator<TABLE_LIST> li(nested_join_table->nested_join->join_list);
|
|
List_iterator<TABLE_LIST> li2(nested_join_table->nested_join->join_list);
|
|
bool have_another = FALSE;
|
|
TABLE_LIST *table;
|
|
|
|
while ((table= li++) || (have_another && (li=li2, have_another=FALSE,
|
|
(table= li++))))
|
|
if (table->nested_join)
|
|
{
|
|
if (!table->on_expr)
|
|
{
|
|
/* It's a semi-join nest. Walk into it as if it wasn't a nest */
|
|
have_another= TRUE;
|
|
li2= li;
|
|
li= List_iterator<TABLE_LIST>(table->nested_join->join_list);
|
|
}
|
|
else
|
|
count_cond_for_nj(sel, table);
|
|
}
|
|
if (nested_join_table->on_expr)
|
|
nested_join_table->on_expr->walk(&Item::count_sargable_conds, 0, sel);
|
|
|
|
}
|
|
|
|
/**
|
|
Update keyuse array with all possible keys we can use to fetch rows.
|
|
|
|
@param thd
|
|
@param[out] keyuse Put here ordered array of KEYUSE structures
|
|
@param join_tab Array in tablenr_order
|
|
@param tables Number of tables in join
|
|
@param cond WHERE condition (note that the function analyzes
|
|
join_tab[i]->on_expr too)
|
|
@param normal_tables Tables not inner w.r.t some outer join (ones
|
|
for which we can make ref access based the WHERE
|
|
clause)
|
|
@param select_lex current SELECT
|
|
@param[out] sargables Array of found sargable candidates
|
|
|
|
@retval
|
|
0 OK
|
|
@retval
|
|
1 Out of memory.
|
|
*/
|
|
|
|
static bool
|
|
update_ref_and_keys(THD *thd, DYNAMIC_ARRAY *keyuse,JOIN_TAB *join_tab,
|
|
uint tables, COND *cond, table_map normal_tables,
|
|
SELECT_LEX *select_lex, SARGABLE_PARAM **sargables)
|
|
{
|
|
uint and_level,i;
|
|
KEY_FIELD *key_fields, *end, *field;
|
|
uint sz;
|
|
uint m= MY_MAX(select_lex->max_equal_elems,1);
|
|
DBUG_ENTER("update_ref_and_keys");
|
|
DBUG_PRINT("enter", ("normal_tables: %llx", normal_tables));
|
|
|
|
SELECT_LEX *sel=thd->lex->current_select;
|
|
sel->cond_count= 0;
|
|
sel->between_count= 0;
|
|
if (cond)
|
|
cond->walk(&Item::count_sargable_conds, 0, sel);
|
|
for (i=0 ; i < tables ; i++)
|
|
{
|
|
if (*join_tab[i].on_expr_ref)
|
|
(*join_tab[i].on_expr_ref)->walk(&Item::count_sargable_conds, 0, sel);
|
|
}
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
|
|
TABLE_LIST *table;
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
count_cond_for_nj(sel, table);
|
|
}
|
|
}
|
|
|
|
/*
|
|
We use the same piece of memory to store both KEY_FIELD
|
|
and SARGABLE_PARAM structure.
|
|
KEY_FIELD values are placed at the beginning this memory
|
|
while SARGABLE_PARAM values are put at the end.
|
|
All predicates that are used to fill arrays of KEY_FIELD
|
|
and SARGABLE_PARAM structures have at most 2 arguments
|
|
except BETWEEN predicates that have 3 arguments and
|
|
IN predicates.
|
|
This any predicate if it's not BETWEEN/IN can be used
|
|
directly to fill at most 2 array elements, either of KEY_FIELD
|
|
or SARGABLE_PARAM type. For a BETWEEN predicate 3 elements
|
|
can be filled as this predicate is considered as
|
|
saragable with respect to each of its argument.
|
|
An IN predicate can require at most 1 element as currently
|
|
it is considered as sargable only for its first argument.
|
|
Multiple equality can add elements that are filled after
|
|
substitution of field arguments by equal fields. There
|
|
can be not more than select_lex->max_equal_elems such
|
|
substitutions.
|
|
*/
|
|
sz= MY_MAX(sizeof(KEY_FIELD),sizeof(SARGABLE_PARAM))*
|
|
((sel->cond_count*2 + sel->between_count)*m+1);
|
|
if (!(key_fields=(KEY_FIELD*) thd->alloc(sz)))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
and_level= 0;
|
|
field= end= key_fields;
|
|
*sargables= (SARGABLE_PARAM *) key_fields +
|
|
(sz - sizeof((*sargables)[0].field))/sizeof(SARGABLE_PARAM);
|
|
/* set a barrier for the array of SARGABLE_PARAM */
|
|
(*sargables)[0].field= 0;
|
|
|
|
if (my_init_dynamic_array2(thd->mem_root->psi_key, keyuse, sizeof(KEYUSE),
|
|
thd->alloc<KEYUSE>(20), 20, 64,
|
|
MYF(MY_THREAD_SPECIFIC)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (cond)
|
|
{
|
|
KEY_FIELD *saved_field= field;
|
|
cond->add_key_fields(join_tab->join, &end, &and_level, normal_tables,
|
|
sargables);
|
|
for (; field != end ; field++)
|
|
{
|
|
|
|
/* Mark that we can optimize LEFT JOIN */
|
|
if (field->val->type() == Item::NULL_ITEM &&
|
|
!field->field->real_maybe_null())
|
|
field->field->table->reginfo.not_exists_optimize=1;
|
|
}
|
|
field= saved_field;
|
|
}
|
|
for (i=0 ; i < tables ; i++)
|
|
{
|
|
/*
|
|
Block the creation of keys for inner tables of outer joins.
|
|
Here only the outer joins that can not be converted to
|
|
inner joins are left and all nests that can be eliminated
|
|
are flattened.
|
|
In the future when we introduce conditional accesses
|
|
for inner tables in outer joins these keys will be taken
|
|
into account as well.
|
|
*/
|
|
if (*join_tab[i].on_expr_ref)
|
|
(*join_tab[i].on_expr_ref)->add_key_fields(join_tab->join, &end,
|
|
&and_level,
|
|
join_tab[i].table->map,
|
|
sargables);
|
|
}
|
|
|
|
/* Process ON conditions for the nested joins */
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_tab->join->join_list);
|
|
TABLE_LIST *table;
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
add_key_fields_for_nj(join_tab->join, table, &end, &and_level,
|
|
sargables);
|
|
}
|
|
}
|
|
|
|
/* fill keyuse with found key parts */
|
|
for ( ; field != end ; field++)
|
|
{
|
|
if (add_key_part(keyuse,field))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
if (select_lex->ftfunc_list->elements)
|
|
{
|
|
if (add_ft_keys(keyuse,join_tab,cond,normal_tables))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/*
|
|
check if key could be used with eq_ref
|
|
|
|
The assumption is that all previous key parts where used
|
|
*/
|
|
|
|
static void remember_if_eq_ref_key(JOIN *join, KEYUSE *use)
|
|
{
|
|
DBUG_ASSERT(use->keypart != FT_KEYPART && use->key != MAX_KEY);
|
|
TABLE *table= use->table;
|
|
KEY *key= table->key_info+use->key;
|
|
ulong key_flags= table->actual_key_flags(key);
|
|
|
|
/*
|
|
Check if possible eq_ref key
|
|
This may include keys that does not have HA_NULL_PART_KEY
|
|
set, but this is ok as best_access_path will resolve this.
|
|
*/
|
|
if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)))
|
|
{
|
|
uint key_parts= table->actual_n_key_parts(key);
|
|
if (use->keypart+1 == key_parts)
|
|
join->eq_ref_tables|= table->map;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Sort the array of possible keys and remove the following key parts:
|
|
- ref if there is a keypart which is a ref and a const.
|
|
(e.g. if there is a key(a,b) and the clause is a=3 and b=7 and b=t2.d,
|
|
then we skip the key part corresponding to b=t2.d)
|
|
- keyparts without previous keyparts
|
|
(e.g. if there is a key(a,b,c) but only b < 5 (or a=2 and c < 3) is
|
|
used in the query, we drop the partial key parts from consideration).
|
|
Special treatment for ft-keys.
|
|
Update join->eq_ref_tables with a bitmap of all tables that can possible
|
|
have a EQ_REF key.
|
|
|
|
Note that the keys are generated to be used by best_access_path() during
|
|
the optimization stage. Unused keys will later be deleted by
|
|
JOIN::drop_unused_derived_keys().
|
|
*/
|
|
|
|
bool sort_and_filter_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse,
|
|
bool skip_unprefixed_keyparts)
|
|
{
|
|
THD *thd= join->thd;
|
|
KEYUSE key_end, *prev, *save_pos, *use;
|
|
uint found_eq_constant, i;
|
|
bool found_unprefixed_key_part= 0;
|
|
|
|
join->eq_ref_tables= 0;
|
|
DBUG_ASSERT(keyuse->elements);
|
|
|
|
my_qsort(keyuse->buffer, keyuse->elements, sizeof(KEYUSE),
|
|
(qsort_cmp) sort_keyuse);
|
|
|
|
bzero((char*) &key_end, sizeof(key_end)); /* Add for easy testing */
|
|
if (insert_dynamic(keyuse, (uchar*) &key_end))
|
|
return TRUE;
|
|
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_DERIVED_WITH_KEYS))
|
|
generate_derived_keys(keyuse);
|
|
|
|
use= save_pos= dynamic_element(keyuse,0,KEYUSE*);
|
|
prev= &key_end;
|
|
found_eq_constant= 0;
|
|
/* Loop over all elements except the last 'key_end' */
|
|
for (i=0 ; i < keyuse->elements-1 ; i++,use++)
|
|
{
|
|
if (!use->is_for_hash_join())
|
|
{
|
|
if (!(use->used_tables & ~OUTER_REF_TABLE_BIT) &&
|
|
use->optimize != KEY_OPTIMIZE_REF_OR_NULL)
|
|
use->table->const_key_parts[use->key]|= use->keypart_map;
|
|
if (use->keypart != FT_KEYPART)
|
|
{
|
|
if (use->key == prev->key && use->table == prev->table)
|
|
{
|
|
if (prev->keypart == use->keypart && found_eq_constant)
|
|
continue;
|
|
if (prev->keypart+1 < use->keypart)
|
|
{
|
|
found_unprefixed_key_part= 1;
|
|
if (skip_unprefixed_keyparts)
|
|
continue; /* remove */
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Key changed, check if previous key was a primary/unique key lookup
|
|
*/
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
found_unprefixed_key_part= 0;
|
|
if (use->keypart != 0)
|
|
{
|
|
found_unprefixed_key_part= 1;
|
|
if (skip_unprefixed_keyparts)
|
|
continue; /* remove - first found key part must be 0 */
|
|
}
|
|
}
|
|
}
|
|
else /* FT_KEY_PART */
|
|
{
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
found_unprefixed_key_part= 1; // This key cannot be EQ_REF
|
|
}
|
|
prev= use;
|
|
found_eq_constant= !use->used_tables;
|
|
use->table->reginfo.join_tab->checked_keys.set_bit(use->key);
|
|
}
|
|
else
|
|
{
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
prev= &key_end;
|
|
}
|
|
/*
|
|
Old gcc used a memcpy(), which is undefined if save_pos==use:
|
|
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19410
|
|
http://gcc.gnu.org/bugzilla/show_bug.cgi?id=39480
|
|
This also disables a valgrind warning, so better to have the test.
|
|
*/
|
|
if (save_pos != use)
|
|
*save_pos= *use;
|
|
/* Save ptr to first use */
|
|
if (!use->table->reginfo.join_tab->keyuse)
|
|
use->table->reginfo.join_tab->keyuse= save_pos;
|
|
save_pos++;
|
|
}
|
|
if (prev != &key_end && !found_unprefixed_key_part)
|
|
remember_if_eq_ref_key(join, prev);
|
|
i= (uint) (save_pos-(KEYUSE*) keyuse->buffer);
|
|
(void) set_dynamic(keyuse,(uchar*) &key_end,i);
|
|
keyuse->elements= i;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Update some values in keyuse for faster choose_plan() loop.
|
|
*/
|
|
|
|
void optimize_keyuse(JOIN *join, DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
KEYUSE *end,*keyuse= dynamic_element(keyuse_array, 0, KEYUSE*);
|
|
|
|
for (end= keyuse+ keyuse_array->elements ; keyuse < end ; keyuse++)
|
|
{
|
|
table_map map;
|
|
/*
|
|
If we find a ref, assume this table matches a proportional
|
|
part of this table.
|
|
For example 100 records matching a table with 5000 records
|
|
gives 5000/100 = 50 records per key
|
|
Constant tables are ignored.
|
|
To avoid bad matches, we don't make ref_table_rows less than 100.
|
|
*/
|
|
keyuse->ref_table_rows= ~(ha_rows) 0; // If no ref
|
|
if (keyuse->used_tables &
|
|
(map= (keyuse->used_tables & ~join->const_table_map &
|
|
~OUTER_REF_TABLE_BIT)))
|
|
{
|
|
uint n_tables= my_count_bits(map);
|
|
if (n_tables == 1) // Only one table
|
|
{
|
|
DBUG_ASSERT(!(map & PSEUDO_TABLE_BITS)); // Must be a real table
|
|
Table_map_iterator it(map);
|
|
int tablenr= it.next_bit();
|
|
DBUG_ASSERT(tablenr != Table_map_iterator::BITMAP_END);
|
|
TABLE *tmp_table=join->table[tablenr];
|
|
if (tmp_table) // already created
|
|
keyuse->ref_table_rows= MY_MAX(tmp_table->file->stats.records, 100);
|
|
}
|
|
}
|
|
/*
|
|
Outer reference (external field) is constant for single executing
|
|
of subquery
|
|
*/
|
|
if (keyuse->used_tables == OUTER_REF_TABLE_BIT)
|
|
keyuse->ref_table_rows= 1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Check for the presence of AGGFN(DISTINCT a) queries that may be subject
|
|
to loose index scan.
|
|
|
|
Check if the query is a subject to AGGFN(DISTINCT) using loose index scan
|
|
(QUICK_GROUP_MIN_MAX_SELECT).
|
|
Optionally (if out_args is supplied) will push the arguments of
|
|
AGGFN(DISTINCT) to the list
|
|
|
|
Check for every COUNT(DISTINCT), AVG(DISTINCT) or
|
|
SUM(DISTINCT). These can be resolved by Loose Index Scan as long
|
|
as all the aggregate distinct functions refer to the same
|
|
fields. Thus:
|
|
|
|
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT b, a)... => can use LIS
|
|
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT a) ... => can use LIS
|
|
SELECT AGGFN(DISTINCT a, b), AGGFN(DISTINCT a) ... => cannot use LIS
|
|
SELECT AGGFN(DISTINCT a), AGGFN(DISTINCT b) ... => cannot use LIS
|
|
etc.
|
|
|
|
@param join the join to check
|
|
@param[out] out_args Collect the arguments of the aggregate functions
|
|
to a list. We don't worry about duplicates as
|
|
these will be sorted out later in
|
|
get_best_group_min_max.
|
|
|
|
@return does the query qualify for indexed AGGFN(DISTINCT)
|
|
@retval true it does
|
|
@retval false AGGFN(DISTINCT) must apply distinct in it.
|
|
*/
|
|
|
|
bool
|
|
is_indexed_agg_distinct(JOIN *join, List<Item_field> *out_args)
|
|
{
|
|
Item_sum **sum_item_ptr;
|
|
bool result= false;
|
|
|
|
if (join->table_count != 1 || /* reference more than 1 table */
|
|
join->select_distinct || /* or a DISTINCT */
|
|
join->select_lex->olap == ROLLUP_TYPE) /* Check (B3) for ROLLUP */
|
|
return false;
|
|
|
|
Bitmap<MAX_FIELDS> first_aggdistinct_fields;
|
|
bool first_aggdistinct_fields_initialized= false;
|
|
for (sum_item_ptr= join->sum_funcs; *sum_item_ptr; sum_item_ptr++)
|
|
{
|
|
Item_sum *sum_item= *sum_item_ptr;
|
|
Item *expr;
|
|
/* aggregate is not AGGFN(DISTINCT) or more than 1 argument to it */
|
|
switch (sum_item->sum_func())
|
|
{
|
|
case Item_sum::MIN_FUNC:
|
|
case Item_sum::MAX_FUNC:
|
|
continue;
|
|
case Item_sum::COUNT_DISTINCT_FUNC:
|
|
break;
|
|
case Item_sum::AVG_DISTINCT_FUNC:
|
|
case Item_sum::SUM_DISTINCT_FUNC:
|
|
if (sum_item->get_arg_count() == 1)
|
|
break;
|
|
/* fall through */
|
|
default: return false;
|
|
}
|
|
/*
|
|
We arrive here for every COUNT(DISTINCT),AVG(DISTINCT) or SUM(DISTINCT).
|
|
Collect the arguments of the aggregate functions to a list.
|
|
We don't worry about duplicates as these will be sorted out later in
|
|
get_best_group_min_max
|
|
*/
|
|
Bitmap<MAX_FIELDS> cur_aggdistinct_fields;
|
|
cur_aggdistinct_fields.clear_all();
|
|
for (uint i= 0; i < sum_item->get_arg_count(); i++)
|
|
{
|
|
expr= sum_item->get_arg(i);
|
|
/* The AGGFN(DISTINCT) arg is not an attribute? */
|
|
if (expr->real_item()->type() != Item::FIELD_ITEM)
|
|
return false;
|
|
|
|
Item_field* item= static_cast<Item_field*>(expr->real_item());
|
|
if (out_args)
|
|
out_args->push_back(item, join->thd->mem_root);
|
|
|
|
cur_aggdistinct_fields.set_bit(item->field->field_index);
|
|
result= true;
|
|
}
|
|
/*
|
|
If there are multiple aggregate functions, make sure that they all
|
|
refer to exactly the same set of columns.
|
|
*/
|
|
if (!first_aggdistinct_fields_initialized)
|
|
{
|
|
first_aggdistinct_fields= cur_aggdistinct_fields;
|
|
first_aggdistinct_fields_initialized=true;
|
|
}
|
|
else if (first_aggdistinct_fields != cur_aggdistinct_fields)
|
|
return false;
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
|
|
/**
|
|
Discover the indexes that can be used for GROUP BY or DISTINCT queries.
|
|
|
|
If the query has a GROUP BY clause, find all indexes that contain all
|
|
GROUP BY fields, and add those indexes to join->const_keys.
|
|
|
|
If the query has a DISTINCT clause, find all indexes that contain all
|
|
SELECT fields, and add those indexes to join->const_keys.
|
|
This allows later on such queries to be processed by a
|
|
QUICK_GROUP_MIN_MAX_SELECT.
|
|
|
|
@param join
|
|
@param join_tab
|
|
|
|
@return
|
|
None
|
|
*/
|
|
|
|
static void
|
|
add_group_and_distinct_keys(JOIN *join, JOIN_TAB *join_tab)
|
|
{
|
|
List<Item_field> indexed_fields;
|
|
List_iterator<Item_field> indexed_fields_it(indexed_fields);
|
|
ORDER *cur_group;
|
|
Item_field *cur_item;
|
|
key_map possible_keys(0);
|
|
|
|
if (join->group_list)
|
|
{ /* Collect all query fields referenced in the GROUP clause. */
|
|
for (cur_group= join->group_list; cur_group; cur_group= cur_group->next)
|
|
(*cur_group->item)->walk(&Item::collect_item_field_processor, 0,
|
|
&indexed_fields);
|
|
}
|
|
else if (join->select_distinct)
|
|
{ /* Collect all query fields referenced in the SELECT clause. */
|
|
List<Item> &select_items= join->fields_list;
|
|
List_iterator<Item> select_items_it(select_items);
|
|
Item *item;
|
|
while ((item= select_items_it++))
|
|
item->walk(&Item::collect_item_field_processor, 0, &indexed_fields);
|
|
}
|
|
else if (!join->tmp_table_param.sum_func_count ||
|
|
!is_indexed_agg_distinct(join, &indexed_fields))
|
|
{
|
|
/*
|
|
There where no GROUP BY fields and also either no aggregate
|
|
functions or not all aggregate functions where used with the
|
|
same DISTINCT (or MIN() / MAX() that works similarly).
|
|
Nothing to do there.
|
|
*/
|
|
return;
|
|
}
|
|
|
|
if (indexed_fields.elements == 0)
|
|
{
|
|
/* There where no index we could use to satisfy the GROUP BY */
|
|
return;
|
|
}
|
|
|
|
/* Intersect the keys of all group fields. */
|
|
cur_item= indexed_fields_it++;
|
|
possible_keys.merge(cur_item->field->part_of_key);
|
|
while ((cur_item= indexed_fields_it++))
|
|
{
|
|
possible_keys.intersect(cur_item->field->part_of_key);
|
|
}
|
|
|
|
if (!possible_keys.is_clear_all())
|
|
join_tab->const_keys.merge(possible_keys);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Go through all combinations of not marked tables and find the one
|
|
which uses least records
|
|
*****************************************************************************/
|
|
|
|
/** Save const tables first as used tables. */
|
|
|
|
void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key)
|
|
{
|
|
join->positions[idx].table= table;
|
|
join->positions[idx].key=key;
|
|
join->positions[idx].records_read=1.0; /* This is a const table */
|
|
join->positions[idx].records_out=1.0; /* This is a const table */
|
|
join->positions[idx].records_init=1.0; /* This is a const table */
|
|
join->positions[idx].cond_selectivity= 1.0;
|
|
join->positions[idx].ref_depend_map= 0;
|
|
join->positions[idx].partial_join_cardinality= 1;
|
|
|
|
// join->positions[idx].loosescan_key= MAX_KEY; /* Not a LooseScan */
|
|
join->positions[idx].sj_strategy= SJ_OPT_NONE;
|
|
join->positions[idx].use_join_buffer= FALSE;
|
|
join->positions[idx].range_rowid_filter_info= 0;
|
|
|
|
/* Move the const table as down as possible in best_ref */
|
|
JOIN_TAB **pos=join->best_ref+idx+1;
|
|
JOIN_TAB *next=join->best_ref[idx];
|
|
for (;next != table ; pos++)
|
|
{
|
|
JOIN_TAB *tmp=pos[0];
|
|
pos[0]=next;
|
|
next=tmp;
|
|
}
|
|
join->best_ref[idx]=table;
|
|
join->positions[idx].spl_plan= 0;
|
|
join->positions[idx].spl_pd_boundary= 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Estimate how many records we will get if we read just this table and apply
|
|
a part of WHERE that can be checked using only the current table and
|
|
const tables.
|
|
|
|
@param s Current JOIN_TAB
|
|
@param use_cond_selectivity Value of optimizer_use_condition_selectivity.
|
|
If > 1 then use table->cond_selecitivity.
|
|
@return 0.0 No matching rows
|
|
@return >= 1.0 Number of expected matching rows
|
|
|
|
Estimate how many records we will get if we
|
|
- read the given table with its "independent" access method (either quick
|
|
select or full table/index scan),
|
|
- apply the part of WHERE that refers only to this table and const tables.
|
|
- The result cannot be bigger than table records
|
|
|
|
@see also
|
|
table_after_join_selectivity() produces selectivity of condition that is
|
|
checked after joining rows from this table to rows from preceding tables.
|
|
*/
|
|
|
|
static double apply_selectivity_for_table(JOIN_TAB *s,
|
|
uint use_cond_selectivity)
|
|
{
|
|
double dbl_records;
|
|
|
|
if (use_cond_selectivity > 1)
|
|
{
|
|
TABLE *table= s->table;
|
|
double sel= table->cond_selectivity;
|
|
double table_records= rows2double(s->records);
|
|
DBUG_ASSERT(sel >= 0 && sel <= 1.0);
|
|
/*
|
|
table->cond_selectivity will include data from opt_range.
|
|
Here we check that this is indeeded the case.
|
|
Note that if table_records == 0, then 'sel' is probably 1
|
|
*/
|
|
DBUG_ASSERT(table_records == 0 ||
|
|
sel <= s->table->opt_range_condition_rows /
|
|
table_records);
|
|
dbl_records= table_records * sel;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
This is only taking into considering constant key parts used with
|
|
this table!
|
|
If no such conditions existed the following should normally hold:
|
|
s->table->opt_range_condition_rows == s->found_rows ==
|
|
s->records.
|
|
The case when this does not hold is when using 'best splitting'
|
|
in which case s->records may be less than s->found_rows;
|
|
*/
|
|
DBUG_ASSERT(s->table->opt_range_condition_rows <= s->found_records);
|
|
dbl_records= rows2double(MY_MIN(s->table->opt_range_condition_rows,
|
|
s->records));
|
|
}
|
|
|
|
DBUG_ASSERT(dbl_records <= s->records);
|
|
/*
|
|
Ensure we return at least one row if there is any possibility to have
|
|
a matching row. Having rows >= 1.0 helps ensure that when we calculate
|
|
total rows of joins, the number of resulting rows will not be less
|
|
after the join. In other words, we assume there is at least one matching
|
|
row when joining a row with the next table.
|
|
0.0 is returned only if it is guaranteed there are no matching rows
|
|
(for example if the table is empty).
|
|
*/
|
|
return dbl_records ? MY_MAX(dbl_records, MIN_ROWS_AFTER_FILTERING) : 0.0;
|
|
}
|
|
|
|
|
|
/*
|
|
Take into account that the table's WHERE clause has conditions on earlier
|
|
tables that can reduce the number of accepted rows.
|
|
|
|
@param records Number of original rows (after selectivity)
|
|
|
|
If there is a filtering condition on the table (i.e. ref analyzer found
|
|
at least one "table.keyXpartY= exprZ", where exprZ refers only to tables
|
|
preceding this table in the join order we're now considering), then
|
|
assume that 25% of the rows will be filtered out by this condition.
|
|
|
|
This heuristic is supposed to force tables used in exprZ to be before
|
|
this table in join order.
|
|
*/
|
|
inline double use_found_constraint(double records)
|
|
{
|
|
records-= records/4;
|
|
return records ? MY_MAX(records, MIN_ROWS_AFTER_FILTERING) : 0.0;
|
|
}
|
|
|
|
|
|
/*
|
|
Calculate the cost of reading a set of rows trough an index
|
|
|
|
@param eq_ref True if there is only one matching key (EQ_REF)
|
|
|
|
Logically this is identical to the code in multi_range_read_info_const()
|
|
excepts the function also takes into account io_blocks and multiple
|
|
ranges.
|
|
|
|
One main difference between the functions is that
|
|
multi_range_read_info_const() adds a very small cost per range
|
|
MULTI_RANGE_READ_SETUP_COST, to ensure that 'ref' is preferred
|
|
over ranges.
|
|
|
|
Note that this function assumes that index_only_cost is only to be
|
|
used with filtering (as cost.read_cost takes into account both
|
|
clustering and covered keys). index_only_cost does not include
|
|
KEY_COPY_COST as for filtering there is no copying of not accepted
|
|
keys.
|
|
|
|
If eq_ref is not set, it means that we have to do one extra 'read_next'
|
|
on the index to verify that there is not more keys with the same value.
|
|
|
|
WHERE_COST cost is not added to any result.
|
|
*/
|
|
|
|
static ALL_READ_COST cost_for_index_read(const THD *thd, const TABLE *table,
|
|
uint key, ha_rows records,
|
|
bool eq_ref)
|
|
{
|
|
ALL_READ_COST cost;
|
|
handler *file= table->file;
|
|
ha_rows max_seeks;
|
|
ha_rows extra_reads= eq_ref ? 0 : 1;
|
|
DBUG_ENTER("cost_for_index_read");
|
|
|
|
max_seeks= (ha_rows) thd->variables.max_seeks_for_key;
|
|
set_if_bigger(records, 1);
|
|
|
|
if (file->is_clustering_key(key))
|
|
{
|
|
cost.index_cost=
|
|
file->ha_keyread_clustered_time(key, 1, records+extra_reads, 0);
|
|
cost.copy_cost= rows2double(records) * file->ROW_COPY_COST;
|
|
/* There is no 'index_only_read' with a clustered index */
|
|
cost.row_cost= {0,0};
|
|
/* Caping of index_blocks will happen in handler::cost() */
|
|
cost.max_index_blocks= MY_MIN(file->row_blocks(), max_seeks);
|
|
cost.max_row_blocks= 0;
|
|
}
|
|
else if (table->covering_keys.is_set(key) && !table->no_keyread)
|
|
{
|
|
cost.index_cost= file->ha_keyread_time(key, 1, records + extra_reads, 0);
|
|
cost.row_cost= {0,0};
|
|
cost.copy_cost= rows2double(records) * file->KEY_COPY_COST;
|
|
cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
|
|
cost.max_row_blocks= 0;
|
|
}
|
|
else
|
|
{
|
|
cost.index_cost= file->ha_keyread_time(key, 1, records + extra_reads, 0);
|
|
/* ha_rnd_pos_time() includes time for copying the row */
|
|
cost.row_cost= file->ha_rnd_pos_time(records);
|
|
cost.max_index_blocks= MY_MIN(file->index_blocks(key), max_seeks);
|
|
cost.max_row_blocks= MY_MIN(file->row_blocks(), max_seeks);
|
|
cost.copy_cost= 0;
|
|
}
|
|
DBUG_PRINT("statistics", ("index_cost: %.3f row_cost: %.3f",
|
|
file->cost(cost.index_cost),
|
|
file->cost(cost.row_cost)));
|
|
DBUG_RETURN(cost);
|
|
}
|
|
|
|
|
|
/**
|
|
Apply filter if the filter is better than the current cost
|
|
|
|
@param thd Thread handler
|
|
@param table Table
|
|
@param cost Pointer to cost for current cost, which does not
|
|
include WHERE_COST cost. Will be updated to
|
|
new cost if filter is chosen.
|
|
Will be updated to new cost if filter is used.
|
|
@param records_arg Pointer to number of records for the current key.
|
|
Will be updated to records after filter, if filter is
|
|
used.
|
|
@param startup_cost Startup cost. Will be updated if filter is used.
|
|
@param fetch_cost Cost of finding the row, without where compare cost
|
|
@param index_only_cost Cost if fetching '*records_arg' key values
|
|
@param prev_records Number of record combinations in previous tables
|
|
|
|
@return 'this' Filter is used (and variables are updated)
|
|
@return 0 Filter is worse than old plan
|
|
*/
|
|
|
|
Range_rowid_filter_cost_info* Range_rowid_filter_cost_info::
|
|
apply_filter(THD *thd, TABLE *table, ALL_READ_COST *cost,
|
|
double *records_arg,
|
|
double *startup_cost,
|
|
uint ranges, double prev_records)
|
|
{
|
|
handler *file= table->file;
|
|
bool use_filter;
|
|
double new_cost, org_cost, records= *records_arg, new_records;
|
|
double filter_startup_cost= get_setup_cost();
|
|
double filter_lookup_cost= records * lookup_cost();
|
|
double tmp;
|
|
ALL_READ_COST adjusted_cost;
|
|
|
|
/*
|
|
Calculate number of resulting rows after filtering
|
|
Here we trust selectivity and do not adjust rows up even if
|
|
the end result is low. This means that new_records is allowed to be
|
|
be < 1.0
|
|
*/
|
|
new_records= records * selectivity;
|
|
|
|
/*
|
|
Calculate the cost of the filter based on that we had originally
|
|
'records' rows and after the filter only 'new_records' accepted
|
|
rows.
|
|
Note that the rejected rows, we have only done a key read. We only
|
|
fetch the row and compare the where if the filter accepts the
|
|
row id.
|
|
In case of index only read, fetch_cost == index_only_cost. Even in this
|
|
the filter can give a better plan as we have to do less comparisons
|
|
with the WHERE clause.
|
|
|
|
The io_cost is used to take into account that we have to do 1 key
|
|
lookup to find the first matching key in each range.
|
|
*/
|
|
|
|
adjusted_cost= *cost;
|
|
/* We are going to read 'selectivity' fewer rows */
|
|
adjusted_cost.row_cost.io*= selectivity;
|
|
adjusted_cost.row_cost.cpu*= selectivity;
|
|
adjusted_cost.copy_cost*= selectivity; // Cost of copying row or key
|
|
adjusted_cost.index_cost.cpu+= filter_lookup_cost;
|
|
|
|
tmp= prev_records * WHERE_COST_THD(thd);
|
|
org_cost= (file->cost_for_reading_multiple_times(prev_records,
|
|
cost) +
|
|
records * tmp);
|
|
|
|
new_cost= (file->cost_for_reading_multiple_times(prev_records,
|
|
&adjusted_cost) +
|
|
new_records * tmp + filter_startup_cost);
|
|
|
|
DBUG_ASSERT(new_cost >= 0 && new_records >= 0);
|
|
use_filter= new_cost < org_cost;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_filter(thd, "filter");
|
|
trace_filter.add("rowid_filter_index",
|
|
table->key_info[get_key_no()].name).
|
|
add("index_only_cost", file->cost(cost->index_cost)).
|
|
add("filter_startup_cost", filter_startup_cost).
|
|
add("find_key_and_filter_lookup_cost", filter_lookup_cost).
|
|
add("filter_selectivity", selectivity).
|
|
add("original_rows", records).
|
|
add("new_rows", new_records).
|
|
add("original_access_cost", file->cost(cost)).
|
|
add("with_filter_access_cost", file->cost(&adjusted_cost)).
|
|
add("original_found_rows_cost", file->cost(cost->row_cost)).
|
|
add("with_filter_found_rows_cost", file->cost(adjusted_cost.row_cost)).
|
|
add("org_cost", org_cost).
|
|
add("filter_cost", new_cost).
|
|
add("filter_used", use_filter);
|
|
}
|
|
if (use_filter)
|
|
{
|
|
cost->row_cost= adjusted_cost.row_cost;
|
|
cost->index_cost= adjusted_cost.index_cost;
|
|
cost->copy_cost= adjusted_cost.copy_cost;
|
|
*records_arg= new_records;
|
|
(*startup_cost)+= filter_startup_cost;
|
|
return this;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Compute the fanout of hash join operation using EITS data
|
|
|
|
@param join JOIN structure
|
|
@param tab JOIN_TAB for the current table
|
|
@param remaining_tables Map of tables not yet accessable
|
|
@param rnd_records Number of accepted rows in the table, after taking
|
|
selectivity into account.
|
|
@param hj_start_key Pointer to hash key
|
|
@param stats_found Is set to 1 if we found any usable hash key part
|
|
with statistics from analyze.
|
|
*/
|
|
|
|
double hash_join_fanout(JOIN *join, JOIN_TAB *tab, table_map remaining_tables,
|
|
double rnd_records, KEYUSE *hj_start_key,
|
|
bool *stats_found)
|
|
{
|
|
THD *thd= join->thd;
|
|
/*
|
|
Before doing the hash join, we will scan the table and apply the local part
|
|
of the WHERE condition. This will produce rnd_records.
|
|
|
|
The EITS statistics describes the entire table. Calling
|
|
|
|
table->field[N]->get_avg_frequency()
|
|
|
|
produces average #rows in the table with some value.
|
|
|
|
What happens if we filter out rows so that rnd_records rows are left?
|
|
Something between the two outcomes:
|
|
A. filtering removes a fraction of rows for each value:
|
|
avg_frequency=avg_frequency * condition_selectivity
|
|
|
|
B. filtering removes entire groups of rows with the same value, but
|
|
the remaining groups remain of the same size.
|
|
|
|
We make pessimistic assumption and assume B.
|
|
We also handle an edge case: if rnd_records is less than avg_frequency,
|
|
assume we'll get rnd_records rows with the same value, and return
|
|
rnd_records as the fanout estimate.
|
|
*/
|
|
double min_freq= (double) tab->table->stat_records();
|
|
bool found_not_usable_field= 0;
|
|
bool found_usable_field __attribute__((unused))= 0;
|
|
DBUG_ENTER("hash_join_cardinality");
|
|
DBUG_ASSERT(rnd_records > 0 && min_freq > 0);
|
|
|
|
Json_writer_object trace_obj(thd, "hash_join_cardinality");
|
|
|
|
/*
|
|
There can be multiple KEYUSE referring to same or different columns
|
|
|
|
KEYUSE(tbl.col1 = ...)
|
|
KEYUSE(tbl.col1 = ...)
|
|
KEYUSE(tbl.col2 = ...)
|
|
|
|
Hash join code can use multiple columns: (col1, col2) for joining.
|
|
We need n_distinct({col1, col2}).
|
|
|
|
EITS only has statistics on individual columns: n_distinct(col1),
|
|
n_distinct(col2).
|
|
|
|
Our current solution is to be very conservative and use selectivity
|
|
of one column with the lowest avg_frequency.
|
|
|
|
In the future, we should an approach that cautiosly takes into account
|
|
multiple KEYUSEs either multiply by number of equalities or by sqrt
|
|
of the second most selective equality.
|
|
*/
|
|
Json_writer_array trace_arr(thd, "hash_join_columns");
|
|
for (KEYUSE *keyuse= hj_start_key;
|
|
keyuse->table == tab->table && is_hash_join_key_no(keyuse->key);
|
|
keyuse++)
|
|
{
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
tab->access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables))
|
|
{
|
|
Field *field= tab->table->field[keyuse->keypart];
|
|
found_usable_field= 1;
|
|
if (is_eits_usable(field))
|
|
{
|
|
double freq= field->read_stats->get_avg_frequency();
|
|
|
|
Json_writer_object trace_field(thd);
|
|
trace_field.add("field",field->field_name.str).
|
|
add("avg_frequency", freq);
|
|
if (freq < min_freq)
|
|
min_freq= freq;
|
|
*stats_found= 1;
|
|
continue;
|
|
}
|
|
}
|
|
if (!keyuse->validity_ref || *keyuse->validity_ref)
|
|
found_not_usable_field= 1;
|
|
}
|
|
/* Ensure that some part of hash_key is usable */
|
|
DBUG_ASSERT(found_usable_field);
|
|
|
|
trace_arr.end();
|
|
if (found_not_usable_field)
|
|
{
|
|
/*
|
|
We did not't have data for all key fields. Assume that the hash
|
|
will at least limit the number of matched rows to HASH_FANOUT.
|
|
This makes the cost same as when 'hash_join_cardinality=off'
|
|
in the case when no analyze of the tables have been made.
|
|
|
|
However, it may cause problems when min_freq is higher than
|
|
HASH_FANOUT as the optimizer will then assume it is better to
|
|
put the table earlier in the plan when all key parts are not
|
|
usable.
|
|
Note that min_freq can become less than 1.0. This is intentional
|
|
as it matches what happens if OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY
|
|
is not used.
|
|
*/
|
|
double max_expected_records= rnd_records * HASH_FANOUT;
|
|
set_if_smaller(min_freq, max_expected_records);
|
|
trace_obj.add("using_default_hash_fanout", HASH_FANOUT);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Before joining the table with the contents of join buffer, we will
|
|
use the quick select and/or apply the table condition.
|
|
|
|
This will reduce the number of rows joined to rnd_records.
|
|
How will this affect n_distinct?
|
|
Depending on which rows are removed, this can either leave n_distinct as
|
|
is (for some value X, some rows are removed but some are left, leaving the
|
|
number of distinct values the same), or reduce n_distinct in proportion
|
|
with the fraction of rows removed (for some values of X, either all or
|
|
none of the rows with that value are removed).
|
|
|
|
We assume the latter: n_distinct is reduced in proportion the condition
|
|
and quick select's selectivity.
|
|
This is in effect same as applying apply_selectivity_for_table() on
|
|
min_freq as we have already done on rnd_records
|
|
*/
|
|
min_freq*= rnd_records / tab->table->stat_records();
|
|
set_if_bigger(min_freq, HASH_FANOUT);
|
|
}
|
|
|
|
trace_obj.add("rows", min_freq);
|
|
DBUG_RETURN(min_freq);
|
|
}
|
|
|
|
|
|
#ifndef DBUG_OFF
|
|
|
|
static char dbug_join_prefix_buf[256];
|
|
|
|
const char* dbug_print_join_prefix(const POSITION *join_positions,
|
|
uint idx,
|
|
JOIN_TAB *s)
|
|
{
|
|
char *buf= dbug_join_prefix_buf;
|
|
String str(buf, sizeof(dbug_join_prefix_buf), &my_charset_bin);
|
|
str.length(0);
|
|
for (uint i=0; i!=idx; i++)
|
|
{
|
|
str.append(join_positions[i].table->table->alias);
|
|
str.append(',');
|
|
}
|
|
str.append(s->table->alias);
|
|
if (str.c_ptr_safe() == buf)
|
|
return buf;
|
|
else
|
|
return "Couldn't fit into buffer";
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
Find the best access path for an extension of a partial execution
|
|
plan and add this path to the plan.
|
|
|
|
The function finds the best access path to table 's' from the passed
|
|
partial plan where an access path is the general term for any means to
|
|
cacess the data in 's'. An access path may use either an index or a scan,
|
|
whichever is cheaper. The input partial plan is passed via the array
|
|
'join->positions' of length 'idx'. The chosen access method for 's' and its
|
|
cost are stored in 'join->positions[idx]'.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param s the table to be joined by the function
|
|
@param thd thread for the connection that submitted the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param idx the length of the partial plan
|
|
@param disable_jbuf TRUE<=> Don't use join buffering
|
|
@param record_count estimate for the number of records returned by the
|
|
partial plan
|
|
@param pos OUT Table access plan
|
|
@param loose_scan_pos OUT Table plan that uses loosescan, or set cost to
|
|
DBL_MAX if not possible.
|
|
@detail
|
|
Use this to print the current join prefix:
|
|
|
|
dbug_print_join_prefix(join_positions, idx, s)
|
|
|
|
Use this as breakpoint condition to stop at join prefix "t1,t2,t3":
|
|
|
|
$_streq(dbug_print_join_prefix(join_positions, idx, s), "t1,t2,t3")
|
|
|
|
@return
|
|
None
|
|
*/
|
|
|
|
struct best_plan
|
|
{
|
|
double cost; // Smallest cost found
|
|
double records; // Old 'Records'
|
|
double records_read; // Records accessed
|
|
double records_after_filter; // Records_read + filter
|
|
double records_out; // Smallest record count seen
|
|
double prev_record_reads; // Save value from prev_record_reads
|
|
double identical_keys; // Save value from prev_record_reads
|
|
Range_rowid_filter_cost_info *filter; // Best filter
|
|
KEYUSE *key; // Best key
|
|
SplM_plan_info *spl_plan;
|
|
table_map ref_depends_map;
|
|
ulonglong refills; // Join cache refills
|
|
enum join_type type;
|
|
uint forced_index;
|
|
uint max_key_part;
|
|
table_map found_ref;
|
|
bool use_join_buffer;
|
|
};
|
|
|
|
|
|
void
|
|
best_access_path(JOIN *join,
|
|
JOIN_TAB *s,
|
|
table_map remaining_tables,
|
|
const POSITION *join_positions,
|
|
uint idx,
|
|
bool disable_jbuf,
|
|
double record_count,
|
|
POSITION *pos,
|
|
POSITION *loose_scan_pos)
|
|
{
|
|
THD *thd= join->thd;
|
|
uint use_cond_selectivity=
|
|
thd->variables.optimizer_use_condition_selectivity;
|
|
TABLE *table= s->table;
|
|
handler *file= table->file;
|
|
my_bool found_constraint= 0;
|
|
/*
|
|
key_dependent is 0 if all key parts could be used or if there was an
|
|
EQ_REF table found (which uses all key parts). In other words, we cannot
|
|
find a better key for the table even if remaining_tables is reduced.
|
|
Otherwise it's a bitmap of tables that could improve key usage.
|
|
*/
|
|
table_map key_dependent= 0;
|
|
ALL_READ_COST tmp;
|
|
ha_rows rec;
|
|
MY_BITMAP *eq_join_set= &s->table->eq_join_set;
|
|
KEYUSE *hj_start_key= 0;
|
|
table_map spl_pd_boundary= 0;
|
|
Loose_scan_opt loose_scan_opt;
|
|
struct best_plan best;
|
|
Json_writer_object trace_wrapper(thd, "best_access_path");
|
|
DBUG_ENTER("best_access_path");
|
|
|
|
/*
|
|
Assume that there is at least one accepted row from previous table
|
|
combinations.
|
|
This fixes a problem when the selectivity for the preceding table
|
|
combinations becomes so high that record_count becomes << 1.0,
|
|
which makes the cost for the current table so low that it does not
|
|
matter when calculating the best plans.
|
|
*/
|
|
set_if_bigger(record_count, 1.0);
|
|
|
|
best.cost= DBL_MAX;
|
|
best.records= DBL_MAX;
|
|
best.records_read= DBL_MAX;
|
|
best.records_after_filter= DBL_MAX;
|
|
best.records_out= MY_MIN(table->stat_records() * table->cond_selectivity,
|
|
table->opt_range_condition_rows);
|
|
best.prev_record_reads= best.identical_keys= 0;
|
|
best.filter= 0;
|
|
best.key= 0;
|
|
best.max_key_part= 0;
|
|
best.type= JT_UNKNOWN;
|
|
best.forced_index= MAX_KEY;
|
|
best.found_ref= 0;
|
|
best.ref_depends_map= 0;
|
|
best.refills= 0;
|
|
best.use_join_buffer= FALSE;
|
|
best.spl_plan= 0;
|
|
|
|
disable_jbuf= disable_jbuf || idx == join->const_tables;
|
|
|
|
trace_wrapper.add_table_name(s);
|
|
|
|
bitmap_clear_all(eq_join_set);
|
|
|
|
loose_scan_opt.init(join, s, remaining_tables);
|
|
|
|
if (table->is_splittable())
|
|
best.spl_plan= s->choose_best_splitting(idx,
|
|
remaining_tables,
|
|
join_positions,
|
|
&spl_pd_boundary);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object info(thd, "plan_details");
|
|
info.add("record_count", record_count);
|
|
}
|
|
|
|
Json_writer_array trace_paths(thd, "considered_access_paths");
|
|
if (s->keyuse)
|
|
{ /* Use key if possible */
|
|
KEYUSE *keyuse, *start_key= 0;
|
|
const char *cause= NULL;
|
|
uint max_key_part=0;
|
|
enum join_type type= JT_UNKNOWN;
|
|
double cur_cost, copy_cost, cached_prev_record_reads= 0.0;
|
|
table_map cached_prev_ref= ~(table_map) 0;
|
|
|
|
/* Test how we can use keys */
|
|
rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE; // Assumed records/key
|
|
for (keyuse=s->keyuse ; keyuse->table == table ;)
|
|
{
|
|
KEY *keyinfo;
|
|
ulong key_flags;
|
|
uint key_parts;
|
|
key_part_map found_part= 0;
|
|
/* key parts which won't have NULL in lookup tuple */
|
|
key_part_map notnull_part=0;
|
|
table_map found_ref= 0;
|
|
uint key= keyuse->key;
|
|
uint max_const_parts;
|
|
bool ft_key= (keyuse->keypart == FT_KEYPART);
|
|
/* Bitmap of keyparts where the ref access is over 'keypart=const': */
|
|
key_part_map const_part= 0;
|
|
/* The or-null keypart in ref-or-null access: */
|
|
key_part_map ref_or_null_part= 0;
|
|
key_part_map all_parts= 0;
|
|
double startup_cost= s->startup_cost;
|
|
double records_after_filter, records_best_filter, records;
|
|
Range_rowid_filter_cost_info *filter= 0;
|
|
double prev_record_count= record_count;
|
|
double identical_keys= 0;
|
|
|
|
if (is_hash_join_key_no(key))
|
|
{
|
|
/*
|
|
Hash join as any join employing join buffer can be used to join
|
|
only those tables that are joined after the first non const table
|
|
*/
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
idx > join->const_tables)
|
|
{
|
|
if (!hj_start_key)
|
|
hj_start_key= keyuse;
|
|
bitmap_set_bit(eq_join_set, keyuse->keypart);
|
|
}
|
|
keyuse++;
|
|
continue;
|
|
}
|
|
|
|
keyinfo= table->key_info+key;
|
|
key_parts= table->actual_n_key_parts(keyinfo);
|
|
key_flags= table->actual_key_flags(keyinfo);
|
|
|
|
/* Calculate how many key segments of the current key we can use */
|
|
start_key= keyuse;
|
|
|
|
loose_scan_opt.next_ref_key();
|
|
DBUG_PRINT("info", ("Considering ref access on key %s",
|
|
keyuse->table->key_info[keyuse->key].name.str));
|
|
|
|
do /* For each keypart */
|
|
{
|
|
uint keypart= keyuse->keypart;
|
|
table_map best_part_found_ref= 0, key_parts_dependent= 0;
|
|
double best_prev_record_reads= DBL_MAX;
|
|
|
|
do /* For each way to access the keypart */
|
|
{
|
|
/*
|
|
If 1. expression does not refer to forward tables
|
|
2. we won't get two ref-or-null's
|
|
*/
|
|
double ignore;
|
|
all_parts|= keyuse->keypart_map;
|
|
if (!(remaining_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
s->access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables) &&
|
|
!(ref_or_null_part && (keyuse->optimize &
|
|
KEY_OPTIMIZE_REF_OR_NULL)))
|
|
{
|
|
found_part|= keyuse->keypart_map;
|
|
key_parts_dependent= 0;
|
|
if (!(keyuse->used_tables & ~join->const_table_map))
|
|
const_part|= keyuse->keypart_map;
|
|
|
|
if (!keyuse->val->maybe_null() || keyuse->null_rejecting)
|
|
notnull_part|=keyuse->keypart_map;
|
|
|
|
if ((found_ref | keyuse->used_tables) != cached_prev_ref)
|
|
{
|
|
cached_prev_ref= (found_ref | keyuse->used_tables);
|
|
cached_prev_record_reads=
|
|
prev_record_reads(join_positions, idx,
|
|
cached_prev_ref, record_count,
|
|
&ignore);
|
|
}
|
|
if (cached_prev_record_reads < best_prev_record_reads)
|
|
{
|
|
best_prev_record_reads= cached_prev_record_reads;
|
|
best_part_found_ref= (keyuse->used_tables &
|
|
~join->const_table_map);
|
|
}
|
|
if (rec > keyuse->ref_table_rows)
|
|
rec= keyuse->ref_table_rows;
|
|
/*
|
|
If there is one 'key_column IS NULL' expression, we can
|
|
use this ref_or_null optimisation of this field
|
|
*/
|
|
if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
|
|
ref_or_null_part |= keyuse->keypart_map;
|
|
|
|
/*
|
|
Remember if there is a WHERE condition that contains
|
|
'key_part=expression_with_only_accessible_tables'
|
|
We ignore const tables as these are handled by selectivity
|
|
code (const table fields are treated as constants).
|
|
*/
|
|
found_constraint|= (keyuse->used_tables &
|
|
~(remaining_tables |
|
|
join->const_table_map));
|
|
}
|
|
else if (!(found_part & keyuse->keypart_map))
|
|
key_parts_dependent|= keyuse->used_tables;
|
|
|
|
loose_scan_opt.add_keyuse(remaining_tables, keyuse);
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key &&
|
|
keyuse->keypart == keypart);
|
|
/* If we found a usable key, remember the dependent tables */
|
|
if (all_parts & 1)
|
|
key_dependent|= key_parts_dependent;
|
|
found_ref|= best_part_found_ref;
|
|
/* Remember if the key expression used previous non const tables */
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
/*
|
|
Assume that that each key matches a proportional part of table.
|
|
*/
|
|
if (!found_part && !ft_key && !loose_scan_opt.have_a_case())
|
|
continue; // Nothing usable found
|
|
|
|
if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
|
|
rec= MATCHING_ROWS_IN_OTHER_TABLE; // Fix for small tables
|
|
|
|
Json_writer_object trace_access_idx(thd);
|
|
max_const_parts= max_part_bit(const_part);
|
|
|
|
/*
|
|
full text keys require special treatment
|
|
*/
|
|
if (ft_key)
|
|
{
|
|
/*
|
|
Fulltext indexes are preformed the following way:
|
|
- In the prepare step it performs the search, collects all positions
|
|
in an array, sorts it.
|
|
- If optimizer decides to use the ft index access method it simply'
|
|
returns positions from the array one by one
|
|
- If optimizer decides to use something else (another index, table
|
|
scan), then it'll use binary search in the array to find the
|
|
position.
|
|
|
|
The following code puts the cost down to very small as the prep
|
|
step will always be done and the cost to fetch the row from memory
|
|
is very small.
|
|
Alternatively we could use the cost of an EQ_REF here.
|
|
*/
|
|
tmp.reset();
|
|
tmp.row_cost.cpu= file->ROW_COPY_COST;
|
|
/*
|
|
We don't know how many records will match. However, we want to have
|
|
the fulltext search done early, so we put the number of records
|
|
to be very low.
|
|
*/
|
|
records= 1.0;
|
|
type= JT_FT;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("full-text index", keyinfo->name);
|
|
}
|
|
else
|
|
{
|
|
loose_scan_opt.check_ref_access_part1(s, key, start_key, found_part);
|
|
|
|
/* Check if we found full key */
|
|
const key_part_map all_key_parts= PREV_BITS(uint, key_parts);
|
|
if (found_part == all_key_parts && !ref_or_null_part)
|
|
{ /* use eq key */
|
|
max_key_part= (uint) ~0;
|
|
/*
|
|
If the index is a unique index (1), and
|
|
- all its columns are not null (2), or
|
|
- equalities we are using reject NULLs (3)
|
|
then the estimate is rows=1.
|
|
*/
|
|
if ((key_flags & (HA_NOSAME | HA_EXT_NOSAME)) && // (1)
|
|
(!(key_flags & HA_NULL_PART_KEY) || // (2)
|
|
all_key_parts == notnull_part)) // (3)
|
|
{
|
|
/* Check that eq_ref_tables are correctly updated */
|
|
DBUG_ASSERT(join->eq_ref_tables & table->map);
|
|
type= JT_EQ_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
if (!found_ref && table->opt_range_keys.is_set(key))
|
|
{
|
|
/* Ensure that the cost is identical to the range cost */
|
|
table->opt_range[key].get_costs(&tmp);
|
|
}
|
|
else
|
|
{
|
|
tmp= cost_for_index_read(thd, table, key, 1, 1);
|
|
}
|
|
/*
|
|
Calculate how many record read calls will be made taking
|
|
into account that we will cache the last read row.
|
|
*/
|
|
prev_record_count= prev_record_reads(join_positions, idx,
|
|
found_ref, record_count,
|
|
&identical_keys);
|
|
records= 1.0;
|
|
}
|
|
else
|
|
{
|
|
type= JT_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
if (!found_ref)
|
|
{ /* We found a const key */
|
|
/*
|
|
ReuseRangeEstimateForRef-1:
|
|
We get here if we've found a ref(const) (c_i are constants):
|
|
"(keypart1=c1) AND ... AND (keypartN=cN)" [ref_const_cond]
|
|
|
|
If range optimizer was able to construct a "range"
|
|
access on this index, then its condition "quick_cond" was
|
|
eqivalent to ref_const_cond (*), and we can re-use E(#rows)
|
|
from the range optimizer.
|
|
|
|
Proof of (*): By properties of range and ref optimizers
|
|
quick_cond will be equal or tighther than ref_const_cond.
|
|
ref_const_cond already covers "smallest" possible interval -
|
|
a singlepoint interval over all keyparts. Therefore,
|
|
quick_cond is equivalent to ref_const_cond (if it was an
|
|
empty interval we wouldn't have got here).
|
|
*/
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
/* Ensure that the cost is identical to the range cost */
|
|
records= (double) table->opt_range[key].rows;
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
|
|
table->opt_range[key].get_costs(&tmp);
|
|
goto got_cost2;
|
|
}
|
|
/* quick_range couldn't use key! */
|
|
records= (double) s->records/rec;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("used_range_estimates", false).
|
|
add("reason", "not available");
|
|
}
|
|
else
|
|
{
|
|
if (!(records= keyinfo->actual_rec_per_key(key_parts-1)))
|
|
{ /* Prefer longer keys */
|
|
trace_access_idx.add("rec_per_key_stats_missing", true);
|
|
records=
|
|
((double) s->records / (double) rec *
|
|
(1.0 +
|
|
((double) (table->s->max_key_length-keyinfo->key_length) /
|
|
(double) table->s->max_key_length)));
|
|
set_if_smaller(records, (double)s->records);
|
|
if (records < 1.0)
|
|
records= 1.0; /* Can't be as good as a unique */
|
|
}
|
|
|
|
/*
|
|
ReuseRangeEstimateForRef-2: We get here if we could not reuse
|
|
E(#rows) from range optimizer. Make another try:
|
|
|
|
If range optimizer produced E(#rows) for a prefix of the ref
|
|
access we're considering, and that E(#rows) is lower then our
|
|
current estimate, make an adjustment. The criteria of when we
|
|
can make an adjustment is a special case of the criteria used
|
|
in ReuseRangeEstimateForRef-3.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
table->opt_range[key].key_parts <= max_const_parts &&
|
|
table->opt_range[key].ranges == 1 &&
|
|
records > (double) table->opt_range[key].rows)
|
|
{
|
|
records= (double) table->opt_range[key].rows;
|
|
trace_access_idx.add("used_range_estimates", "clipped down");
|
|
}
|
|
else if (unlikely(trace_access_idx.trace_started()))
|
|
{
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
trace_access_idx.
|
|
add("used_range_estimates",false).
|
|
add("reason", "not better than ref estimates");
|
|
}
|
|
else
|
|
{
|
|
trace_access_idx.
|
|
add("used_range_estimates", false).
|
|
add("reason", "not available");
|
|
}
|
|
}
|
|
}
|
|
/* Calculate the cost of the index access */
|
|
tmp= cost_for_index_read(thd, table, key,
|
|
(ha_rows) records, 0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
type = ref_or_null_part ? JT_REF_OR_NULL : JT_REF;
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
trace_access_idx.
|
|
add("access_type", join_type_str[type]).
|
|
add("index", keyinfo->name);
|
|
/*
|
|
Use as much key-parts as possible and a uniq key is better
|
|
than a not unique key
|
|
Set tmp to the cost of the accessing the expected number of
|
|
records.
|
|
*/
|
|
if ((found_part & 1) &&
|
|
(!(table->key_info[key].index_flags & HA_ONLY_WHOLE_INDEX) ||
|
|
found_part == PREV_BITS(uint,keyinfo->user_defined_key_parts)))
|
|
{
|
|
double extra_cost= 0;
|
|
|
|
max_key_part= max_part_bit(found_part);
|
|
bool all_used_equalities_are_const= (max_key_part ==
|
|
max_const_parts);
|
|
/*
|
|
ReuseRangeEstimateForRef-3:
|
|
We're now considering a ref[or_null] access via
|
|
(t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR
|
|
(same-as-above but with one cond replaced
|
|
with "t.keypart_i IS NULL")] (**)
|
|
|
|
Try re-using E(#rows) from "range" optimizer:
|
|
We can do so if "range" optimizer used the same intervals as
|
|
in (**). The intervals used by range optimizer may be not
|
|
available at this point (as "range" access might have chosen to
|
|
create quick select over another index), so we can't compare
|
|
them to (**). We'll make indirect judgements instead.
|
|
The sufficient conditions for re-use are:
|
|
(C1) All e_i in (**) are constants (if
|
|
this is not satisfied we have no way to know which ranges
|
|
will be actually scanned by 'ref' until we execute the
|
|
join)
|
|
(C2) max #key parts in 'range' access == K == max_key_part (this
|
|
is apparently a necessary requirement)
|
|
|
|
We also have a property that "range optimizer produces equal or
|
|
tighter set of scan intervals than ref(const) optimizer". Each
|
|
of the intervals in (**) are "tightest possible" intervals when
|
|
one limits itself to using keyparts 1..K (which we do in #2).
|
|
From here it follows that range access used either one, or
|
|
both of the (I1) and (I2) intervals:
|
|
|
|
(t.keypart1=c1 AND ... AND t.keypartK=eK) (I1)
|
|
(same-as-above but with one cond replaced
|
|
with "t.keypart_i IS NULL") (I2)
|
|
|
|
The remaining part is to exclude the situation where range
|
|
optimizer used one interval while we're considering
|
|
ref-or-null and looking for estimate for two intervals. This
|
|
is done by last limitation:
|
|
|
|
(C3) "range optimizer used (have ref_or_null?2:1) intervals"
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
all_used_equalities_are_const && // (C1)
|
|
table->opt_range[key].key_parts == max_key_part && //(C2)
|
|
(table->opt_range[key].ranges ==
|
|
1 + MY_TEST(ref_or_null_part))) //(C3)
|
|
{
|
|
records= (double) table->opt_range[key].rows;
|
|
table->opt_range[key].get_costs(&tmp);
|
|
/*
|
|
TODO: Disable opt_range testing below for this range as we can
|
|
always use this ref instead.
|
|
*/
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
goto got_cost2;
|
|
}
|
|
else
|
|
{
|
|
/* Check if we have statistic about the distribution */
|
|
if ((records= keyinfo->actual_rec_per_key(max_key_part-1)))
|
|
{
|
|
/*
|
|
Fix for the case where the index statistics is too
|
|
optimistic: If
|
|
(1) We're considering ref(const) and there is quick select
|
|
on the same index,
|
|
(2) and that quick select uses more keyparts (i.e. it will
|
|
scan equal/smaller interval then this ref(const))
|
|
(3) and E(#rows) for quick select is higher then our
|
|
estimate,
|
|
Then
|
|
We'll use E(#rows) from quick select.
|
|
|
|
Q: Why do we choose to use 'ref'? Won't quick select be
|
|
cheaper in some cases ?
|
|
TODO: figure this out and adjust the plan choice if needed.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key))
|
|
{
|
|
double rows;
|
|
if (table->opt_range[key].key_parts >= max_key_part) // (2)
|
|
{
|
|
/*
|
|
Choose range over REF in the case range will always be
|
|
as good or better than REF.
|
|
This is the case when we have only one const range
|
|
and it consist of more parts than what we used for REF.
|
|
*/
|
|
if (all_used_equalities_are_const &&
|
|
table->opt_range[key].key_parts > max_key_part &&
|
|
table->opt_range[key].ranges <=
|
|
(uint) (1 + MY_TEST(ref_or_null_part)))
|
|
{
|
|
trace_access_idx.
|
|
add("chosen", false).
|
|
add("cause", "range is simple and more selective");
|
|
continue; // continue with next key
|
|
}
|
|
}
|
|
rows= (double) table->opt_range[key].rows;
|
|
if (all_used_equalities_are_const && // (1)
|
|
records < rows) // (3)
|
|
{
|
|
trace_access_idx.add("used_range_estimates",
|
|
"clipped up");
|
|
records= rows;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
trace_access_idx.add("rec_per_key_stats_missing", true);
|
|
/*
|
|
Assume that the first key part matches 1% of the file
|
|
and that the whole key matches 10 (duplicates) or 1
|
|
(unique) records.
|
|
Assume also that more key matches proportionally more
|
|
records
|
|
This gives the formula:
|
|
records = (x * (b-a) + a*c-b)/(c-1)
|
|
|
|
b = records matched by whole key
|
|
a = records matched by first key part (1% of all records?)
|
|
c = number of key parts in key
|
|
x = used key parts (1 <= x <= c)
|
|
*/
|
|
double rec_per_key;
|
|
if (!(rec_per_key=(double)
|
|
keyinfo->rec_per_key[keyinfo->user_defined_key_parts-1]))
|
|
rec_per_key=(double) s->records/rec+1;
|
|
|
|
if (!s->records)
|
|
records= 0;
|
|
else if (rec_per_key/(double) s->records >= 0.01)
|
|
records= rec_per_key;
|
|
else
|
|
{
|
|
double a=s->records*0.01;
|
|
if (keyinfo->user_defined_key_parts > 1)
|
|
records= (max_key_part * (rec_per_key - a) +
|
|
a*keyinfo->user_defined_key_parts - rec_per_key)/
|
|
(keyinfo->user_defined_key_parts-1);
|
|
else
|
|
records= rows2double(s->records);
|
|
set_if_bigger(records, MIN_ROWS_AFTER_FILTERING);
|
|
}
|
|
}
|
|
|
|
if (ref_or_null_part)
|
|
{
|
|
/* We need to do two key searches to find row */
|
|
records *= 2.0;
|
|
extra_cost= s->table->file->KEY_LOOKUP_COST;
|
|
}
|
|
|
|
/*
|
|
ReuseRangeEstimateForRef-4: We get here if we could not reuse
|
|
E(#rows) from range optimizer. Make another try:
|
|
|
|
If range optimizer produced E(#rows) for a prefix of the ref
|
|
access we're considering, and that E(#rows) is lower then our
|
|
current estimate, make the adjustment.
|
|
|
|
The decision whether we can re-use the estimate from the range
|
|
optimizer is the same as in ReuseRangeEstimateForRef-3,
|
|
applied to first table->quick_key_parts[key] key parts.
|
|
*/
|
|
if (table->opt_range_keys.is_set(key) &&
|
|
table->opt_range[key].key_parts <= max_const_parts &&
|
|
table->opt_range[key].ranges == (1 +
|
|
MY_TEST(ref_or_null_part &
|
|
const_part)) &&
|
|
records > (double) table->opt_range[key].rows)
|
|
{
|
|
// psergey-merge-sept: remove: if (table->opt_range[key].key_parts <= max_const_parts)
|
|
{
|
|
trace_access_idx.add("used_range_estimates", true);
|
|
records= (double) table->opt_range[key].rows;
|
|
}
|
|
}
|
|
}
|
|
|
|
set_if_smaller(records, (double) s->records);
|
|
tmp= cost_for_index_read(thd, table, key, (ha_rows)records, 0);
|
|
tmp.copy_cost+= extra_cost;
|
|
}
|
|
else
|
|
{
|
|
if (!(found_part & 1))
|
|
cause= "no predicate for first keypart";
|
|
else
|
|
cause= "No full key found";
|
|
trace_access_idx.add("chosen", false).add("cause", cause);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
got_cost2:
|
|
loose_scan_opt.check_ref_access_part2(key, start_key, records,
|
|
file->cost(&tmp) + startup_cost,
|
|
found_ref);
|
|
} /* not ft_key */
|
|
|
|
if (records == DBL_MAX) // Key not usable
|
|
continue;
|
|
|
|
records_best_filter= records_after_filter= records;
|
|
|
|
/*
|
|
Check if we can use a filter.
|
|
Records can be 0 in case of empty tables.
|
|
*/
|
|
if ((found_part & 1) && records &&
|
|
table->can_use_rowid_filter(start_key->key))
|
|
{
|
|
/*
|
|
If we use filter F with selectivity s the the cost of fetching data
|
|
by key using this filter will be
|
|
cost_of_fetching_1_row * rows * s +
|
|
cost_of_fetching_1_key_tuple * rows * (1 - s) +
|
|
cost_of_1_lookup_into_filter * rows
|
|
Without using any filter the cost would be just
|
|
cost_of_fetching_1_row * rows
|
|
|
|
So the gain in access cost per row will be
|
|
cost_of_fetching_1_row * (1 - s) -
|
|
cost_of_fetching_1_key_tuple * (1 - s) -
|
|
cost_of_1_lookup_into_filter
|
|
=
|
|
(cost_of_fetching_1_row - cost_of_fetching_1_key_tuple) * (1 - s)
|
|
- cost_of_1_lookup_into_filter
|
|
|
|
Here we have:
|
|
cost_of_fetching_1_row = tmp/rows
|
|
cost_of_fetching_1_key_tuple = keyread_tmp/rows
|
|
Here's a more detailed explanation that uses the formulas behind
|
|
the function the call filter->get_adjusted_gain(). The function
|
|
takes as a parameter the number of probes/look-ups into the filter
|
|
that is equal to the number of fetched key entries that is equal to
|
|
the number of row fetches when no filter is used (assuming no
|
|
index condition pushdown is employed for the used key access).
|
|
Let this number be N. Then the total gain from using the filter is
|
|
N*a_adj - b where b is the cost of building the filter and
|
|
a_adj is calcilated as follows:
|
|
a - (1-access_cost_factor)*(1-s) =
|
|
(1+1_cond_eval_cost)*(1-s)-1_probe_cost - (1-access_cost_factor)*(1-s)
|
|
= (1-s)*(1_cond_eval_cost+access_cost_factor) - 1_probe_cost.
|
|
Here ((1-s)*(1_cond_eval_cost) * N is the gain from checking less
|
|
conditions pushed into the table, 1_probe_cost*N is the cost of the
|
|
probes and (1*s) * access_cost_factor * N must be the gain from
|
|
accessing less rows.
|
|
It does not matter how we calculate the cost of N full row fetches
|
|
cost_of_fetching_N_rows or
|
|
how we calculate the cost of fetching N key entries
|
|
cost_of_fetching_N_key_entries
|
|
the gain from less row fetches will be
|
|
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) * (1-s)
|
|
and this should be equal to (1*s) * access_cost_factor * N.
|
|
Thus access_cost_factor must be calculated as
|
|
(cost_of_fetching_N_rows - cost_of_fetching_N_key_entries) / N.
|
|
|
|
For safety we clip cost_of_fetching_N_key_entries by the value
|
|
of cost_of_fetching_N_row though formally it's not necessary.
|
|
|
|
We cannot use filter with JT_EQ_REF as in this case 'tmp' is
|
|
number of rows from prev_record_read() and keyread_tmp is 0. These
|
|
numbers are not usable with rowid filter code.
|
|
*/
|
|
filter= table->best_range_rowid_filter(start_key->key,
|
|
records,
|
|
file->cost(&tmp),
|
|
file->cost(tmp.index_cost),
|
|
prev_record_count,
|
|
&records_best_filter);
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
|
|
if (filter)
|
|
filter= filter->apply_filter(thd, table, &tmp,
|
|
&records_after_filter,
|
|
&startup_cost,
|
|
1, prev_record_count);
|
|
}
|
|
|
|
/*
|
|
Take into account WHERE and setup cost.
|
|
We have to check the WHERE for all previous row combinations
|
|
(record_count).
|
|
'prev_record_count' is either 'record_count', or in case of
|
|
EQ_REF the estimated number of index_read() calls to the
|
|
engine when taking the one row read cache into account.
|
|
*/
|
|
copy_cost= (record_count * records_after_filter * WHERE_COST_THD(thd) +
|
|
startup_cost);
|
|
|
|
cur_cost= (file->cost_for_reading_multiple_times(prev_record_count,
|
|
&tmp) +
|
|
copy_cost);
|
|
|
|
if (unlikely(trace_access_idx.trace_started()))
|
|
{
|
|
if (prev_record_count != record_count)
|
|
trace_access_idx.add("prev_record_count", prev_record_count);
|
|
trace_access_idx.
|
|
add("rows", records_after_filter).
|
|
add("cost", cur_cost);
|
|
}
|
|
|
|
|
|
/*
|
|
The COST_EPS is here to ensure we use the first key if there are
|
|
two 'identical keys' that could be used.
|
|
*/
|
|
if (cur_cost + COST_EPS < best.cost)
|
|
{
|
|
trace_access_idx.add("chosen", true);
|
|
best.cost= cur_cost;
|
|
/*
|
|
We use 'records' instead of 'records_after_filter' here as we want
|
|
to have EXPLAIN print the number of rows found by the key access.
|
|
*/
|
|
best.records= records; // Records before filter!
|
|
best.records_read= records;
|
|
/*
|
|
If we are using 'use_cond_selectivity > 1' then
|
|
table_after_join_selectivity() may take into account other
|
|
filters that what is currently used so we have to use
|
|
records_after_filter. If 'use_cond_selectivity <= 1 then we
|
|
can use information from the best filter.
|
|
*/
|
|
best.records_after_filter= ((use_cond_selectivity > 1) ?
|
|
records_after_filter :
|
|
records_best_filter);
|
|
best.prev_record_reads= prev_record_count;
|
|
best.identical_keys= identical_keys;
|
|
best.key= start_key;
|
|
best.found_ref= found_ref;
|
|
best.max_key_part= max_key_part;
|
|
best.ref_depends_map= found_ref;
|
|
best.filter= filter;
|
|
best.type= type;
|
|
}
|
|
else if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_access_idx.
|
|
add("chosen", false).
|
|
add("cause", cause ? cause : "cost");
|
|
}
|
|
set_if_smaller(best.records_out, records);
|
|
} /* for each key */
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
No usable keys found. However, there may still be an option to use
|
|
"Range checked for each record" when all depending tables has
|
|
been read. s->key_dependent tells us which tables these could be and
|
|
s->key_start_dependent tells us if a first key part was used.
|
|
s->key_dependent may include more tables than could be used,
|
|
but this is ok as not having any usable keys is a rare thing and
|
|
the performance penalty for extra table bits is that
|
|
best_extension_by_limited_search() would not be able to prune tables
|
|
earlier.
|
|
Example query:
|
|
SELECT * FROM t1,t2 where t1.key1=t2.key1 OR t2.key2<1
|
|
*/
|
|
if (s->key_start_dependent)
|
|
key_dependent= s->key_dependent;
|
|
|
|
/* Add dependency for sub queries */
|
|
key_dependent|= s->embedded_dependent;
|
|
|
|
} /* if (s->keyuse) */
|
|
|
|
|
|
/* Check that s->key_dependent contains all used_tables found in s->keyuse */
|
|
key_dependent&= ~PSEUDO_TABLE_BITS;
|
|
DBUG_ASSERT((key_dependent & (s->key_dependent | s->embedded_dependent)) ==
|
|
key_dependent);
|
|
|
|
/*
|
|
If there is no key to access the table, but there is an equi-join
|
|
predicate connecting the table with the privious tables then we
|
|
consider the possibility of using hash join.
|
|
We need also to check that:
|
|
(1) s is inner table of semi-join -> join cache is allowed for semijoins
|
|
(2) s is inner table of outer join -> join cache is allowed for outer joins
|
|
*/
|
|
if (idx > join->const_tables && best.key == 0 &&
|
|
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
|
|
join->max_allowed_join_cache_level > 2 &&
|
|
!bitmap_is_clear_all(eq_join_set) && !disable_jbuf &&
|
|
(!s->emb_sj_nest ||
|
|
join->allowed_semijoin_with_cache) && // (1)
|
|
(!(table->map & join->outer_join) ||
|
|
join->allowed_outer_join_with_cache)) // (2)
|
|
{
|
|
Json_writer_object trace_access_hash(thd);
|
|
double refills, row_copy_cost, copy_cost, cur_cost, where_cost;
|
|
double matching_combinations, fanout= 0.0, join_sel;
|
|
trace_access_hash.add("type", "hash");
|
|
trace_access_hash.add("index", "hj-key");
|
|
/* Estimate the cost of the hash join access to the table */
|
|
double rnd_records;
|
|
bool stats_found= 0;
|
|
|
|
rnd_records= apply_selectivity_for_table(s, use_cond_selectivity);
|
|
DBUG_ASSERT(rnd_records <= rows2double(s->found_records) + 0.5);
|
|
DBUG_ASSERT(hj_start_key);
|
|
|
|
fanout= rnd_records;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY) &&
|
|
rnd_records > 0)
|
|
{
|
|
/*
|
|
Starting from this point, rnd_records should not be used anymore.
|
|
Use "fanout" for an estimate of # matching records.
|
|
*/
|
|
fanout= hash_join_fanout(join, s, remaining_tables, rnd_records,
|
|
hj_start_key, &stats_found);
|
|
set_if_smaller(best.records_out, fanout);
|
|
join_sel= 1.0;
|
|
}
|
|
if (!stats_found)
|
|
{
|
|
/*
|
|
No OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY or no field statistics
|
|
found.
|
|
|
|
Take into account if there is non constant constraints used with
|
|
earlier tables in the where expression.
|
|
If yes, this will set fanout to rnd_records/4.
|
|
We estimate that there will be HASH_FANOUT (10%)
|
|
hash matches / row.
|
|
*/
|
|
fanout= ((found_constraint) ?
|
|
use_found_constraint(rnd_records) :
|
|
rnd_records);
|
|
set_if_smaller(best.records_out, fanout * HASH_FANOUT);
|
|
join_sel= HASH_FANOUT;
|
|
}
|
|
|
|
/*
|
|
The following cost calculation is identical to the cost calculation for
|
|
the join cache later on, except for the HASH_FANOUT
|
|
*/
|
|
if (s->quick)
|
|
{
|
|
/*
|
|
Cost of reading rows through opt_range including comparing the rows
|
|
with the attached WHERE clause.
|
|
*/
|
|
cur_cost= s->quick->read_time;
|
|
}
|
|
else
|
|
cur_cost= s->cached_scan_and_compare_time;
|
|
|
|
/* We read the table as many times as join buffer becomes full. */
|
|
refills= (1.0 + floor((double) cache_record_length(join,idx) *
|
|
record_count /
|
|
(double) thd->variables.join_buff_size));
|
|
cur_cost= COST_MULT(cur_cost, refills);
|
|
|
|
|
|
/*
|
|
Cost of doing the hash lookup and check all matching rows with the
|
|
WHERE clause.
|
|
We assume here that, thanks to the hash, we don't have to compare all
|
|
row combinations, only a fanout or HASH_FANOUT (10%) rows in the cache.
|
|
*/
|
|
row_copy_cost= (ROW_COPY_COST_THD(thd) *
|
|
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
|
|
matching_combinations= fanout * join_sel * record_count;
|
|
copy_cost= (record_count * row_copy_cost +
|
|
matching_combinations *
|
|
((idx - join->const_tables) * row_copy_cost));
|
|
where_cost= matching_combinations * WHERE_COST_THD(thd);
|
|
cur_cost= COST_ADD(cur_cost, copy_cost + where_cost);
|
|
|
|
best.cost= cur_cost;
|
|
best.records_read= best.records_after_filter= rows2double(s->records);
|
|
best.records= rnd_records; // Records after where (Legacy value)
|
|
best.key= hj_start_key;
|
|
best.ref_depends_map= 0;
|
|
best.use_join_buffer= TRUE;
|
|
best.filter= 0;
|
|
best.type= JT_HASH;
|
|
best.refills= double_to_ulonglong(ceil(refills));
|
|
if (unlikely(trace_access_hash.trace_started()))
|
|
trace_access_hash.
|
|
add("rows", rnd_records).
|
|
add("rows_after_hash", fanout * join_sel).
|
|
add("refills", refills).
|
|
add("jbuf_use_cost", copy_cost).
|
|
add("extra_cond_check_cost", where_cost).
|
|
add("total_cost", best.cost).
|
|
add("chosen", true);
|
|
}
|
|
|
|
/*
|
|
Don't test table scan if it can't be better.
|
|
Prefer key lookup if we would use the same key for scanning.
|
|
|
|
Don't do a table scan on InnoDB tables, if we can read the used
|
|
parts of the row from any of the used index.
|
|
This is because table scans uses index and we would not win
|
|
anything by using a table scan.
|
|
|
|
A word for word translation of the below if-statement in sergefp's
|
|
understanding: we check if we should use table scan if:
|
|
(1) The found 'ref' access produces more records than a table scan
|
|
(or index scan, or quick select), or 'ref' is more expensive than
|
|
any of them.
|
|
(2) This doesn't hold: the best way to perform table scan is to to perform
|
|
'range' access using index IDX, and the best way to perform 'ref'
|
|
access is to use the same index IDX, with the same or more key parts.
|
|
(note: it is not clear how this rule is/should be extended to
|
|
index_merge quick selects). Also if we have a hash join we prefer that
|
|
over a table scan. This heuristic doesn't apply if the quick select
|
|
uses the group-by min-max optimization.
|
|
(3) See above note about InnoDB.
|
|
(4) NOT ("FORCE INDEX(...)" is used for table and there is 'ref' access
|
|
path, but there is no quick select)
|
|
If the condition in the above brackets holds, then the only possible
|
|
"table scan" access method is ALL/index (there is no quick select).
|
|
Since we have a 'ref' access path, and FORCE INDEX instructs us to
|
|
choose it over ALL/index, there is no need to consider a full table
|
|
scan.
|
|
(5) Non-flattenable semi-joins: don't consider doing a scan of temporary
|
|
table if we had an option to make lookups into it. In real-world cases,
|
|
lookups are cheaper than full scans, but when the table is small, they
|
|
can be [considered to be] more expensive, which causes lookups not to
|
|
be used for cases with small datasets, which is annoying.
|
|
*/
|
|
Json_writer_object trace_access_scan(thd);
|
|
if ((best.records_read >= s->found_records ||
|
|
best.cost > s->read_time) && // (1)
|
|
!(best.key && best.key->key == MAX_KEY) && // (2)
|
|
!(s->quick &&
|
|
s->quick->get_type() != QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX && // (2)
|
|
best.key && s->quick->index == best.key->key && // (2)
|
|
table->opt_range_keys.is_set(best.key->key) && // (2)
|
|
best.max_key_part >= table->opt_range[best.key->key].key_parts) &&// (2)
|
|
!((file->ha_table_flags() & HA_TABLE_SCAN_ON_INDEX) && // (3)
|
|
!table->covering_keys.is_clear_all() && best.key && !s->quick) &&// (3)
|
|
!(table->force_index_join && best.key && !s->quick) && // (4)
|
|
!(best.key && table->pos_in_table_list->jtbm_subselect)) // (5)
|
|
{ // Check full join
|
|
double records_after_filter, org_records;
|
|
double records_best_filter, cur_cost;
|
|
Range_rowid_filter_cost_info *filter= 0;
|
|
double startup_cost= s->startup_cost;
|
|
const char *scan_type= "";
|
|
enum join_type type;
|
|
uint forced_index= MAX_KEY;
|
|
bool force_plan= 0, use_join_buffer= 0;
|
|
ulonglong refills= 1;
|
|
ALL_READ_COST cost;
|
|
|
|
/*
|
|
Range optimizer never proposes a RANGE if it isn't better
|
|
than FULL: so if RANGE is present, it's always preferred to FULL.
|
|
Here we estimate its cost.
|
|
*/
|
|
|
|
if (s->quick)
|
|
{
|
|
/*
|
|
For each record we:
|
|
- read record range through 'quick'
|
|
- skip rows which does not satisfy WHERE constraints
|
|
*/
|
|
|
|
/*
|
|
Use record count from range optimizer.
|
|
This is done to make records found comparable to what we get with
|
|
'ref' access.
|
|
*/
|
|
org_records= records_after_filter= rows2double(s->found_records);
|
|
records_best_filter= org_records;
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
|
|
if (s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
{
|
|
uint key_no= s->quick->index;
|
|
TABLE::OPT_RANGE *range= &table->opt_range[key_no];
|
|
|
|
/*
|
|
Ensure that 'range' and 's' are coming from the same source
|
|
The complex 'double' comparison is there because floating point
|
|
registers complications when costs are calculated.
|
|
*/
|
|
DBUG_ASSERT(range->rows >= s->found_records);
|
|
DBUG_ASSERT((range->cost.total_cost() == 0.0 &&
|
|
s->quick->read_time == 0.0) ||
|
|
compare_cost(range->cost.total_cost(),
|
|
s->quick->read_time));
|
|
DBUG_ASSERT(compare_cost(range->cost.comp_cost,
|
|
range->rows * file->WHERE_COST));
|
|
|
|
/* Get range cost. This does not include cost of the WHERE */
|
|
range->get_costs(&cost);
|
|
/* Ensure that cost from opt_range are correct */
|
|
DBUG_ASSERT(compare_cost(file->cost_no_capping(&cost) +
|
|
range->cost.comp_cost +
|
|
range->cost.setup_cost,
|
|
s->quick->read_time));
|
|
|
|
if (table->can_use_rowid_filter(key_no))
|
|
{
|
|
filter= table->best_range_rowid_filter(key_no,
|
|
rows2double(range->rows),
|
|
file->cost(&cost),
|
|
file->cost(cost.index_cost),
|
|
record_count,
|
|
&records_best_filter);
|
|
set_if_smaller(best.records_out, records_best_filter);
|
|
if (filter)
|
|
{
|
|
filter= filter->apply_filter(thd, table, &cost,
|
|
&records_after_filter,
|
|
&startup_cost,
|
|
range->ranges,
|
|
record_count);
|
|
if (filter)
|
|
{
|
|
set_if_smaller(best.records_out, records_after_filter);
|
|
table->opt_range[key_no].selectivity= filter->selectivity;
|
|
}
|
|
}
|
|
}
|
|
if (best.key && key_no == best.key->key &&
|
|
!best.found_ref &&
|
|
best.max_key_part < table->opt_range[best.key->key].key_parts &&
|
|
table->opt_range[best.key->key].ranges == 1)
|
|
{
|
|
/*
|
|
Force to use range as it is using the 'best key' and using more
|
|
key parts (and thus will read less rows)
|
|
*/
|
|
force_plan= 1;
|
|
}
|
|
type= JT_RANGE;
|
|
/*
|
|
We cannot use range->cost.cmp_cost here as records_after_filter
|
|
is be different if filter is used.
|
|
*/
|
|
cost.copy_cost+= (records_after_filter * file->WHERE_COST +
|
|
range->cost.setup_cost);
|
|
}
|
|
else
|
|
{
|
|
type= JT_INDEX_MERGE;
|
|
/*
|
|
We don't know exactly from where the costs comes from.
|
|
Let's store it in copy_cost.
|
|
Note that s->quick->read_time includes the cost of comparing
|
|
the row with the where clause (WHERE_COST)
|
|
*/
|
|
cost.reset();
|
|
cost.copy_cost= s->quick->read_time;
|
|
}
|
|
loose_scan_opt.check_range_access(join, idx, s->quick);
|
|
}
|
|
else
|
|
{
|
|
double records_table_filter;
|
|
|
|
/* We will now calculate cost of scan, with or without join buffer */
|
|
records_best_filter= records_after_filter=
|
|
apply_selectivity_for_table(s, use_cond_selectivity);
|
|
records_table_filter= ((found_constraint) ?
|
|
use_found_constraint(records_after_filter) :
|
|
records_after_filter);
|
|
|
|
DBUG_ASSERT(records_after_filter <= s->records);
|
|
DBUG_ASSERT(records_after_filter <= s->found_records);
|
|
|
|
set_if_smaller(best.records_out, records_table_filter);
|
|
|
|
org_records= rows2double(s->records);
|
|
|
|
/* Estimate cost of reading table. */
|
|
if (s->cached_forced_index_type)
|
|
{
|
|
type= s->cached_forced_index_type;
|
|
cost= s->cached_forced_index_cost;
|
|
forced_index= s->cached_forced_index;
|
|
}
|
|
else
|
|
{
|
|
if (table->force_index_join && !best.key)
|
|
{
|
|
/*
|
|
The query is using 'forced_index' and we did not find a usable key.
|
|
Calculate cost of a table scan with the forced index.
|
|
*/
|
|
type= JT_NEXT;
|
|
if (s->cached_covering_key != MAX_KEY)
|
|
{
|
|
/* Use value from estimate_scan_time */
|
|
forced_index= s->cached_covering_key;
|
|
cost= s->cached_scan_and_compare_cost;
|
|
}
|
|
else
|
|
{
|
|
#ifdef FORCE_INDEX_SHOULD_FORCE_INDEX_SCAN
|
|
/* No cached key, use shortest allowed key */
|
|
key_map keys= *file->keys_to_use_for_scanning();
|
|
keys.intersect(table->keys_in_use_for_query);
|
|
if ((forced_index= find_shortest_key(table, &keys)) < MAX_KEY)
|
|
{
|
|
cost= cost_for_index_read(thd, table,
|
|
forced_index,
|
|
s->records, 0);
|
|
/* Calculate cost of checking the attached WHERE */
|
|
cost.copy_cost+= s->records * file->WHERE_COST;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
/* No usable key, use table scan */
|
|
cost= s->cached_scan_and_compare_cost;
|
|
type= JT_ALL;
|
|
}
|
|
}
|
|
}
|
|
else // table scan
|
|
{
|
|
cost= s->cached_scan_and_compare_cost;
|
|
type= JT_ALL;
|
|
}
|
|
/* Cache result for other calls */
|
|
s->cached_forced_index_type= type;
|
|
s->cached_forced_index_cost= cost;
|
|
s->cached_forced_index= forced_index;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Note: the condition checked here is very out of date and incorrect.
|
|
Below, we use a more accurate check when assigning the value of
|
|
best.use_join_buffer.
|
|
*/
|
|
if ((s->table->map & join->outer_join) || disable_jbuf)
|
|
{
|
|
/*
|
|
Simple scan
|
|
We estimate we have to read org_records rows.
|
|
records_after_filter rows will survive the where check of constants.
|
|
'best.records_out' rows will survive after the check against columns
|
|
from previous tables.
|
|
*/
|
|
scan_type= "scan";
|
|
|
|
/*
|
|
We have to compare each row set against all previous row combinations
|
|
*/
|
|
cur_cost= file->cost_for_reading_multiple_times(record_count,
|
|
&cost);
|
|
}
|
|
else
|
|
{
|
|
/* Scan trough join cache */
|
|
double cmp_time, row_copy_cost, tmp_refills;
|
|
|
|
/*
|
|
Note that the cost of checking all rows against the table specific
|
|
WHERE is already included in cur_cost.
|
|
*/
|
|
scan_type= "scan_with_join_cache";
|
|
|
|
/* Calculate cost of refills */
|
|
tmp_refills= (1.0 + floor((double) cache_record_length(join,idx) *
|
|
(record_count /
|
|
(double) thd->variables.join_buff_size)));
|
|
cur_cost= file->cost_for_reading_multiple_times(tmp_refills,
|
|
&cost);
|
|
refills= double_to_ulonglong(ceil(tmp_refills));
|
|
|
|
/* We come here only if there are already rows in the join cache */
|
|
DBUG_ASSERT(idx != join->const_tables);
|
|
/*
|
|
records_after_filter is the number of rows that have survived
|
|
the table specific WHERE check that only involves constants.
|
|
|
|
Calculate cost of:
|
|
- Copying all previous record combinations to the join cache
|
|
- Copying the tables from the join cache to table records
|
|
- Checking the WHERE against the final row combination
|
|
*/
|
|
row_copy_cost= (ROW_COPY_COST_THD(thd) *
|
|
JOIN_CACHE_ROW_COPY_COST_FACTOR(thd));
|
|
cmp_time= (record_count * row_copy_cost +
|
|
records_after_filter * record_count *
|
|
((idx - join->const_tables) * row_copy_cost +
|
|
WHERE_COST_THD(thd)));
|
|
cur_cost= COST_ADD(cur_cost, cmp_time);
|
|
use_join_buffer= 1;
|
|
}
|
|
|
|
/* Splitting technique cannot be used with join cache */
|
|
if (table->is_splittable())
|
|
startup_cost+= table->get_materialization_cost();
|
|
cur_cost+= startup_cost;
|
|
|
|
if (unlikely(trace_access_scan.trace_started()))
|
|
{
|
|
trace_access_scan.
|
|
add("access_type",
|
|
type == JT_ALL ? scan_type : join_type_str[type]);
|
|
if (type == JT_RANGE)
|
|
trace_access_scan.
|
|
add("range_index", table->key_info[s->quick->index].name);
|
|
trace_access_scan.
|
|
add("rows", org_records).
|
|
add("rows_after_filter", records_after_filter).
|
|
add("rows_out", best.records_out).
|
|
add("cost", cur_cost);
|
|
if (use_join_buffer)
|
|
trace_access_scan.
|
|
add("cost_without_join_buffer",
|
|
file->cost_for_reading_multiple_times(record_count, &cost));
|
|
if (type == JT_ALL)
|
|
{
|
|
trace_access_scan.add("index_only",
|
|
(s->cached_covering_key != MAX_KEY));
|
|
}
|
|
}
|
|
|
|
if (cur_cost + COST_EPS < best.cost || force_plan)
|
|
{
|
|
/*
|
|
If the table has a range (s->quick is set) make_join_select()
|
|
will ensure that this will be used
|
|
*/
|
|
best.cost= cur_cost;
|
|
best.records_read= org_records; // Records accessed
|
|
best.records= records_after_filter; // Records to be checked against
|
|
// previous row combinations
|
|
|
|
/*
|
|
If we are using 'use_cond_selectivity > 1' then
|
|
table_after_join_selectivity may take into account other
|
|
filters that what is currently used so we have to use
|
|
records_after_filter. If 'use_cond_selectivity <= 1 then we
|
|
can use information from the best filter.
|
|
*/
|
|
best.records_after_filter= ((use_cond_selectivity > 1) ?
|
|
records_after_filter :
|
|
records_best_filter);
|
|
best.key= 0;
|
|
best.forced_index= forced_index;
|
|
/*
|
|
filter is only set if
|
|
s->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE
|
|
*/
|
|
best.filter= filter;
|
|
/* range/index_merge/ALL/index access method are "independent", so: */
|
|
best.ref_depends_map= 0;
|
|
best.use_join_buffer= use_join_buffer ||
|
|
MY_TEST(!disable_jbuf &&
|
|
(join->allowed_outer_join_with_cache ||
|
|
!(s->table->map & join->outer_join)));
|
|
best.refills= refills;
|
|
best.spl_plan= 0;
|
|
best.type= type;
|
|
trace_access_scan.add("chosen", true);
|
|
}
|
|
else
|
|
trace_access_scan.add("chosen", false);
|
|
}
|
|
else
|
|
{
|
|
if (unlikely(trace_access_scan.trace_started()))
|
|
trace_access_scan.
|
|
add("type", "scan").
|
|
add("chosen", false).
|
|
add("cause", "cost");
|
|
}
|
|
|
|
crash_if_first_double_is_bigger(best.records_out, best.records);
|
|
crash_if_first_double_is_bigger(best.records_out, best.records_read);
|
|
|
|
/* Update the cost information for the current partial plan */
|
|
pos->loops= record_count;
|
|
pos->records_init= best.records_read;
|
|
pos->records_after_filter= best.records_after_filter;
|
|
pos->records_read= best.records;
|
|
pos->records_out= best.records_out;
|
|
pos->prev_record_reads= best.prev_record_reads;
|
|
pos->identical_keys= best.identical_keys;
|
|
pos->read_time= best.cost;
|
|
pos->key= best.key;
|
|
pos->forced_index= best.forced_index;
|
|
pos->type= best.type;
|
|
pos->table= s;
|
|
pos->ref_depend_map= best.ref_depends_map;
|
|
pos->loosescan_picker.loosescan_key= MAX_KEY;
|
|
pos->use_join_buffer= best.use_join_buffer;
|
|
pos->firstmatch_with_join_buf= 0;
|
|
pos->spl_plan= best.spl_plan;
|
|
pos->spl_pd_boundary= best.spl_plan ? spl_pd_boundary: 0;
|
|
pos->range_rowid_filter_info= best.filter;
|
|
pos->key_dependent= (best.type == JT_EQ_REF ? (table_map) 0 :
|
|
key_dependent & remaining_tables);
|
|
pos->refills= best.refills;
|
|
|
|
loose_scan_opt.save_to_position(s, record_count, pos->records_out,
|
|
loose_scan_pos);
|
|
|
|
if (!best.key &&
|
|
idx == join->const_tables && // First table
|
|
table == join->sort_by_table &&
|
|
join->unit->lim.get_select_limit() >= best.records) // QQQ Why?
|
|
{
|
|
trace_access_scan.add("use_tmp_table", true);
|
|
join->sort_by_table= (TABLE*) 1; // Must use temporary table
|
|
}
|
|
trace_access_scan.end();
|
|
trace_paths.end();
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
print_best_access_for_table(thd, pos);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Find JOIN_TAB's embedding (i.e, parent) subquery.
|
|
- For merged semi-joins, tables inside the semi-join nest have their
|
|
semi-join nest as parent. We intentionally ignore results of table
|
|
pullout action here.
|
|
- For non-merged semi-joins (JTBM tabs), the embedding subquery is the
|
|
JTBM join tab itself.
|
|
*/
|
|
|
|
static TABLE_LIST* get_emb_subq(JOIN_TAB *tab)
|
|
{
|
|
TABLE_LIST *tlist= tab->table->pos_in_table_list;
|
|
if (tlist->jtbm_subselect)
|
|
return tlist;
|
|
TABLE_LIST *embedding= tlist->embedding;
|
|
if (!embedding || !embedding->sj_subq_pred)
|
|
return NULL;
|
|
return embedding;
|
|
}
|
|
|
|
|
|
/*
|
|
Choose initial table order that "helps" semi-join optimizations.
|
|
|
|
The idea is that we should start with the order that is the same as the one
|
|
we would have had if we had semijoin=off:
|
|
- Top-level tables go first
|
|
- subquery tables are grouped together by the subquery they are in,
|
|
- subquery tables are attached where the subquery predicate would have been
|
|
attached if we had semi-join off.
|
|
|
|
This function relies on join_tab_cmp()/join_tab_cmp_straight() to produce
|
|
certain pre-liminary ordering, see compare_embedding_subqueries() for its
|
|
description.
|
|
*/
|
|
|
|
static void choose_initial_table_order(JOIN *join)
|
|
{
|
|
TABLE_LIST *emb_subq;
|
|
JOIN_TAB **tab= join->best_ref + join->const_tables;
|
|
JOIN_TAB **tabs_end= tab + join->table_count - join->const_tables;
|
|
DBUG_ENTER("choose_initial_table_order");
|
|
|
|
/* Find where the top-level JOIN_TABs end and subquery JOIN_TABs start */
|
|
for (; tab != tabs_end; tab++)
|
|
{
|
|
if ((emb_subq= get_emb_subq(*tab)))
|
|
break;
|
|
}
|
|
uint n_subquery_tabs= (uint)(tabs_end - tab);
|
|
|
|
if (!n_subquery_tabs)
|
|
DBUG_VOID_RETURN;
|
|
|
|
/* Copy the subquery JOIN_TABs to a separate array */
|
|
JOIN_TAB *subquery_tabs[MAX_TABLES];
|
|
memcpy(subquery_tabs, tab, sizeof(JOIN_TAB*) * n_subquery_tabs);
|
|
|
|
JOIN_TAB **last_top_level_tab= tab;
|
|
JOIN_TAB **subq_tab= subquery_tabs;
|
|
JOIN_TAB **subq_tabs_end= subquery_tabs + n_subquery_tabs;
|
|
TABLE_LIST *cur_subq_nest= NULL;
|
|
for (; subq_tab < subq_tabs_end; subq_tab++)
|
|
{
|
|
if (get_emb_subq(*subq_tab)!= cur_subq_nest)
|
|
{
|
|
/*
|
|
Reached the part of subquery_tabs that covers tables in some subquery.
|
|
*/
|
|
cur_subq_nest= get_emb_subq(*subq_tab);
|
|
|
|
/* Determine how many tables the subquery has */
|
|
JOIN_TAB **last_tab_for_subq;
|
|
for (last_tab_for_subq= subq_tab;
|
|
last_tab_for_subq < subq_tabs_end &&
|
|
get_emb_subq(*last_tab_for_subq) == cur_subq_nest;
|
|
last_tab_for_subq++) {}
|
|
uint n_subquery_tables= (uint)(last_tab_for_subq - subq_tab);
|
|
|
|
/*
|
|
Walk the original array and find where this subquery would have been
|
|
attached to
|
|
*/
|
|
table_map need_tables= cur_subq_nest->original_subq_pred_used_tables;
|
|
need_tables &= ~(join->const_table_map | PSEUDO_TABLE_BITS);
|
|
for (JOIN_TAB **top_level_tab= join->best_ref + join->const_tables;
|
|
top_level_tab < last_top_level_tab;
|
|
//top_level_tab < join->best_ref + join->table_count;
|
|
top_level_tab++)
|
|
{
|
|
need_tables &= ~(*top_level_tab)->table->map;
|
|
/* Check if this is the place where subquery should be attached */
|
|
if (!need_tables)
|
|
{
|
|
/* Move away the top-level tables that are after top_level_tab */
|
|
size_t top_tail_len= last_top_level_tab - top_level_tab - 1;
|
|
memmove(top_level_tab + 1 + n_subquery_tables, top_level_tab + 1,
|
|
sizeof(JOIN_TAB*)*top_tail_len);
|
|
last_top_level_tab += n_subquery_tables;
|
|
memcpy(top_level_tab + 1, subq_tab, sizeof(JOIN_TAB*)*n_subquery_tables);
|
|
break;
|
|
}
|
|
}
|
|
DBUG_ASSERT(!need_tables);
|
|
subq_tab += n_subquery_tables - 1;
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Selects and invokes a search strategy for an optimal query plan.
|
|
|
|
The function checks user-configurable parameters that control the search
|
|
strategy for an optimal plan, selects the search method and then invokes
|
|
it. Each specific optimization procedure stores the final optimal plan in
|
|
the array 'join->best_positions', and the cost of the plan in
|
|
'join->best_read'.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
@param join_tables set of the tables in the query
|
|
@param emb_sjm_nest List of tables in case of materialized semi-join nest
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE Fatal error
|
|
*/
|
|
|
|
bool
|
|
choose_plan(JOIN *join, table_map join_tables, TABLE_LIST *emb_sjm_nest)
|
|
{
|
|
uint search_depth= join->thd->variables.optimizer_search_depth;
|
|
uint use_cond_selectivity=
|
|
join->thd->variables.optimizer_use_condition_selectivity;
|
|
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
|
|
THD *thd= join->thd;
|
|
qsort2_cmp jtab_sort_func;
|
|
DBUG_ENTER("choose_plan");
|
|
|
|
join->limit_optimization_mode= false;
|
|
join->cur_embedding_map= 0;
|
|
join->extra_heuristic_pruning= false;
|
|
join->prune_level= join->thd->variables.optimizer_prune_level;
|
|
|
|
reset_nj_counters(join, join->join_list);
|
|
|
|
if ((join->emb_sjm_nest= emb_sjm_nest))
|
|
{
|
|
/* We're optimizing semi-join materialization nest, so put the
|
|
tables from this semi-join as first
|
|
*/
|
|
jtab_sort_func= join_tab_cmp_embedded_first;
|
|
/*
|
|
If we are searching for the execution plan of a materialized semi-join
|
|
nest then allowed_tables contains bits only for the tables from this
|
|
nest.
|
|
*/
|
|
join->allowed_tables= (emb_sjm_nest->sj_inner_tables &
|
|
~join->const_table_map);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
if (SELECT_STRAIGHT_JOIN option is set)
|
|
reorder tables so dependent tables come after tables they depend
|
|
on, otherwise keep tables in the order they were specified in the query
|
|
else
|
|
Apply heuristic: pre-sort all access plans with respect to the number
|
|
of records accessed.
|
|
*/
|
|
jtab_sort_func= straight_join ? join_tab_cmp_straight : join_tab_cmp;
|
|
join->allowed_tables= ~join->const_table_map;
|
|
}
|
|
|
|
/*
|
|
psergey-todo: if we're not optimizing an SJM nest,
|
|
- sort that outer tables are first, and each sjm nest follows
|
|
- then, put each [sjm_table1, ... sjm_tableN] sub-array right where
|
|
WHERE clause pushdown would have put it.
|
|
*/
|
|
my_qsort2(join->best_ref + join->const_tables,
|
|
join->table_count - join->const_tables, sizeof(JOIN_TAB*),
|
|
jtab_sort_func, (void*) emb_sjm_nest);
|
|
|
|
Json_writer_object wrapper(thd);
|
|
Json_writer_array trace_plan(thd,"considered_execution_plans");
|
|
|
|
if (!emb_sjm_nest)
|
|
choose_initial_table_order(join);
|
|
|
|
/*
|
|
Note: constant tables are already in the join prefix. We don't
|
|
put them into the cur_sj_inner_tables, though.
|
|
*/
|
|
|
|
join->cur_sj_inner_tables= 0;
|
|
|
|
if (straight_join)
|
|
{
|
|
optimize_straight_join(join, join_tables);
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(search_depth <= MAX_TABLES + 1);
|
|
if (search_depth == 0)
|
|
/* Automatically determine a reasonable value for 'search_depth' */
|
|
search_depth= determine_search_depth(join);
|
|
|
|
if (join->prune_level >= 1 &&
|
|
search_depth >= thd->variables.optimizer_extra_pruning_depth)
|
|
{
|
|
join->extra_heuristic_pruning= true;
|
|
}
|
|
|
|
double limit_cost= DBL_MAX;
|
|
double limit_record_count;
|
|
POSITION *limit_plan= NULL;
|
|
|
|
/* First, build a join plan that can short-cut ORDER BY...LIMIT */
|
|
if (join->limit_shortcut_applicable && !join->emb_sjm_nest)
|
|
{
|
|
bool res;
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_array trace(join->thd, "join_limit_shortcut_plan_search");
|
|
join->limit_optimization_mode= true;
|
|
res= greedy_search(join, join_tables, search_depth,
|
|
use_cond_selectivity);
|
|
join->limit_optimization_mode= false;
|
|
|
|
if (res)
|
|
DBUG_RETURN(TRUE);
|
|
DBUG_ASSERT(join->best_read != DBL_MAX);
|
|
|
|
/*
|
|
We've built a join order. Adjust its cost based on ORDER BY...LIMIT
|
|
short-cutting.
|
|
*/
|
|
limit_plan= join_limit_shortcut_finalize_plan(join, &limit_cost);
|
|
limit_record_count= join->join_record_count;
|
|
}
|
|
|
|
/* The main call to search for the query plan: */
|
|
if (greedy_search(join, join_tables, search_depth, use_cond_selectivity))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
DBUG_ASSERT(join->best_read != DBL_MAX);
|
|
if (limit_plan && limit_cost < join->best_read)
|
|
{
|
|
/* Plan that uses ORDER BY ... LIMIT shortcutting is better. */
|
|
memcpy((uchar*)join->best_positions, (uchar*)limit_plan,
|
|
sizeof(POSITION)*join->table_count);
|
|
join->best_read= limit_cost;
|
|
join->join_record_count= limit_record_count;
|
|
}
|
|
}
|
|
|
|
join->emb_sjm_nest= 0;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Compare two join tabs based on the subqueries they are from.
|
|
- top-level join tabs go first
|
|
- then subqueries are ordered by their select_id (we're using this
|
|
criteria because we need a cross-platform, deterministic ordering)
|
|
|
|
@return
|
|
0 - equal
|
|
-1 - jt1 < jt2
|
|
1 - jt1 > jt2
|
|
*/
|
|
|
|
static int compare_embedding_subqueries(JOIN_TAB *jt1, JOIN_TAB *jt2)
|
|
{
|
|
/* Determine if the first table is originally from a subquery */
|
|
TABLE_LIST *tbl1= jt1->table->pos_in_table_list;
|
|
uint tbl1_select_no;
|
|
if (tbl1->jtbm_subselect)
|
|
{
|
|
tbl1_select_no=
|
|
tbl1->jtbm_subselect->unit->first_select()->select_number;
|
|
}
|
|
else if (tbl1->embedding && tbl1->embedding->sj_subq_pred)
|
|
{
|
|
tbl1_select_no=
|
|
tbl1->embedding->sj_subq_pred->unit->first_select()->select_number;
|
|
}
|
|
else
|
|
tbl1_select_no= 1; /* Top-level */
|
|
|
|
/* Same for the second table */
|
|
TABLE_LIST *tbl2= jt2->table->pos_in_table_list;
|
|
uint tbl2_select_no;
|
|
if (tbl2->jtbm_subselect)
|
|
{
|
|
tbl2_select_no=
|
|
tbl2->jtbm_subselect->unit->first_select()->select_number;
|
|
}
|
|
else if (tbl2->embedding && tbl2->embedding->sj_subq_pred)
|
|
{
|
|
tbl2_select_no=
|
|
tbl2->embedding->sj_subq_pred->unit->first_select()->select_number;
|
|
}
|
|
else
|
|
tbl2_select_no= 1; /* Top-level */
|
|
|
|
/*
|
|
Put top-level tables in front. Tables from within subqueries must follow,
|
|
grouped by their owner subquery. We don't care about the order that
|
|
subquery groups are in, because choose_initial_table_order() will re-order
|
|
the groups.
|
|
*/
|
|
if (tbl1_select_no != tbl2_select_no)
|
|
return tbl1_select_no > tbl2_select_no ? 1 : -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Compare two JOIN_TAB objects based on the number of accessed records.
|
|
|
|
@param ptr1 pointer to first JOIN_TAB object
|
|
@param ptr2 pointer to second JOIN_TAB object
|
|
|
|
NOTES
|
|
The order relation implemented by join_tab_cmp() is not transitive,
|
|
i.e. it is possible to choose such a, b and c that (a < b) && (b < c)
|
|
but (c < a). This implies that result of a sort using the relation
|
|
implemented by join_tab_cmp() depends on the order in which
|
|
elements are compared, i.e. the result is implementation-specific.
|
|
Example:
|
|
a: dependent = 0x0 table->map = 0x1 found_records = 3 ptr = 0x907e6b0
|
|
b: dependent = 0x0 table->map = 0x2 found_records = 3 ptr = 0x907e838
|
|
c: dependent = 0x6 table->map = 0x10 found_records = 2 ptr = 0x907ecd0
|
|
|
|
As for subqueries, this function must produce order that can be fed to
|
|
choose_initial_table_order().
|
|
|
|
@retval
|
|
1 if first is bigger
|
|
@retval
|
|
-1 if second is bigger
|
|
@retval
|
|
0 if equal
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp(const void *dummy, const void* ptr1, const void* ptr2)
|
|
{
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
int cmp;
|
|
|
|
if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
|
|
return cmp;
|
|
/*
|
|
After that do ordering according to numbers of
|
|
records in the table.
|
|
*/
|
|
if (jt1->found_records > jt2->found_records)
|
|
return 1;
|
|
if (jt1->found_records < jt2->found_records)
|
|
return -1;
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Same as join_tab_cmp, but for use with SELECT_STRAIGHT_JOIN.
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp_straight(const void *dummy, const void* ptr1, const void* ptr2)
|
|
{
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
|
|
/*
|
|
We don't do subquery flattening if the parent or child select has
|
|
STRAIGHT_JOIN modifier. It is complicated to implement and the semantics
|
|
is hardly useful.
|
|
*/
|
|
DBUG_ASSERT(!jt1->emb_sj_nest);
|
|
DBUG_ASSERT(!jt2->emb_sj_nest);
|
|
|
|
int cmp;
|
|
if ((cmp= compare_embedding_subqueries(jt1, jt2)) != 0)
|
|
return cmp;
|
|
|
|
/*
|
|
We have to check dependency with straight_join as we don't reorder
|
|
later as we do for other plans in best_extension_by_limited_search().
|
|
*/
|
|
if (jt1->dependent & jt2->table->map)
|
|
return 1;
|
|
if (jt2->dependent & jt1->table->map)
|
|
return -1;
|
|
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/*
|
|
Same as join_tab_cmp but tables from within the given semi-join nest go
|
|
first. Used when the optimizing semi-join materialization nests.
|
|
*/
|
|
|
|
static int
|
|
join_tab_cmp_embedded_first(const void *emb, const void* ptr1, const void* ptr2)
|
|
{
|
|
const TABLE_LIST *emb_nest= (TABLE_LIST*) emb;
|
|
JOIN_TAB *jt1= *(JOIN_TAB**) ptr1;
|
|
JOIN_TAB *jt2= *(JOIN_TAB**) ptr2;
|
|
|
|
if (jt1->emb_sj_nest == emb_nest && jt2->emb_sj_nest != emb_nest)
|
|
return -1;
|
|
if (jt1->emb_sj_nest != emb_nest && jt2->emb_sj_nest == emb_nest)
|
|
return 1;
|
|
|
|
if (jt1->found_records > jt2->found_records)
|
|
return 1;
|
|
if (jt1->found_records < jt2->found_records)
|
|
return -1;
|
|
|
|
return jt1 > jt2 ? 1 : (jt1 < jt2 ? -1 : 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Heuristic procedure to automatically guess a reasonable degree of
|
|
exhaustiveness for the greedy search procedure.
|
|
|
|
The procedure estimates the optimization time and selects a search depth
|
|
big enough to result in a near-optimal QEP, that doesn't take too long to
|
|
find. If the number of tables in the query exceeds some constant, then
|
|
search_depth is set to this constant.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
|
|
@note
|
|
This is an extremely simplistic implementation that serves as a stub for a
|
|
more advanced analysis of the join. Ideally the search depth should be
|
|
determined by learning from previous query optimizations, because it will
|
|
depend on the CPU power (and other factors).
|
|
|
|
@todo
|
|
this value should be determined dynamically, based on statistics:
|
|
uint max_tables_for_exhaustive_opt= 7;
|
|
|
|
@todo
|
|
this value could be determined by some mapping of the form:
|
|
depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
|
|
|
|
@return
|
|
A positive integer that specifies the search depth (and thus the
|
|
exhaustiveness) of the depth-first search algorithm used by
|
|
'greedy_search'.
|
|
*/
|
|
|
|
static uint
|
|
determine_search_depth(JOIN *join)
|
|
{
|
|
uint table_count= join->table_count - join->const_tables;
|
|
uint search_depth;
|
|
/* TODO: this value should be determined dynamically, based on statistics: */
|
|
uint max_tables_for_exhaustive_opt= 7;
|
|
|
|
if (table_count <= max_tables_for_exhaustive_opt)
|
|
search_depth= table_count+1; // use exhaustive for small number of tables
|
|
else
|
|
/*
|
|
TODO: this value could be determined by some mapping of the form:
|
|
depth : table_count -> [max_tables_for_exhaustive_opt..MAX_EXHAUSTIVE]
|
|
*/
|
|
search_depth= max_tables_for_exhaustive_opt; // use greedy search
|
|
|
|
return search_depth;
|
|
}
|
|
|
|
|
|
/**
|
|
Select the best ways to access the tables in a query without reordering them.
|
|
|
|
Find the best access paths for each query table and compute their costs
|
|
according to their order in the array 'join->best_ref' (thus without
|
|
reordering the join tables). The function calls sequentially
|
|
'best_access_path' for each table in the query to select the best table
|
|
access method. The final optimal plan is stored in the array
|
|
'join->best_positions', and the corresponding cost in 'join->best_read'.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of the tables in the query
|
|
|
|
@note
|
|
This function can be applied to:
|
|
- queries with STRAIGHT_JOIN
|
|
- internally to compute the cost of an arbitrary QEP
|
|
@par
|
|
Thus 'optimize_straight_join' can be used at any stage of the query
|
|
optimization process to finalize a QEP as it is.
|
|
*/
|
|
|
|
static void
|
|
optimize_straight_join(JOIN *join, table_map remaining_tables)
|
|
{
|
|
JOIN_TAB *s;
|
|
uint idx= join->const_tables;
|
|
bool disable_jbuf= join->thd->variables.join_cache_level == 0;
|
|
double record_count= 1.0;
|
|
double read_time= 0.0;
|
|
uint use_cond_selectivity=
|
|
join->thd->variables.optimizer_use_condition_selectivity;
|
|
POSITION loose_scan_pos;
|
|
THD *thd= join->thd;
|
|
|
|
for (JOIN_TAB **pos= join->best_ref + idx ; (s= *pos) ; pos++)
|
|
{
|
|
POSITION *position= join->positions + idx;
|
|
Json_writer_object trace_one_table(thd);
|
|
double original_record_count, current_record_count;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
/* Find the best access method from 's' to the current partial plan */
|
|
best_access_path(join, s, remaining_tables, join->positions, idx,
|
|
disable_jbuf, record_count,
|
|
position, &loose_scan_pos);
|
|
|
|
/* Compute the cost of the new plan extended with 's' */
|
|
current_record_count= COST_MULT(record_count, position->records_out);
|
|
read_time= COST_ADD(read_time, position->read_time);
|
|
original_record_count= current_record_count;
|
|
optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count,
|
|
&read_time, &loose_scan_pos);
|
|
if (position->sj_strategy != SJ_OPT_NONE && original_record_count)
|
|
{
|
|
/* Adjust records_out to contain the final number of rows */
|
|
double ratio= current_record_count / original_record_count;
|
|
if (ratio < 1)
|
|
{
|
|
position->records_out*= ratio;
|
|
}
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("sj_rows_out", position->records_out).
|
|
add("sj_rows_for_plan", current_record_count).
|
|
add("sj_filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
}
|
|
|
|
remaining_tables&= ~(s->table->map);
|
|
if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
|
|
{
|
|
double pushdown_cond_selectivity, records_out;
|
|
pushdown_cond_selectivity= table_after_join_selectivity(join, idx, s,
|
|
remaining_tables,
|
|
&records_out);
|
|
if (unlikely(thd->trace_started()) &&
|
|
pushdown_cond_selectivity != 1.0)
|
|
{
|
|
trace_one_table.
|
|
add("rows_out", records_out).
|
|
add("pushdown_cond_selectivity", pushdown_cond_selectivity).
|
|
add("filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
position->cond_selectivity= pushdown_cond_selectivity;
|
|
position->records_out= records_out;
|
|
current_record_count= COST_MULT(record_count, records_out);
|
|
}
|
|
else
|
|
position->cond_selectivity= 1.0;
|
|
|
|
position->partial_join_cardinality= current_record_count;
|
|
++idx;
|
|
record_count= current_record_count;
|
|
}
|
|
|
|
if (join->sort_by_table &&
|
|
join->sort_by_table != join->positions[join->const_tables].table->table)
|
|
{
|
|
/*
|
|
We may have to make a temp table, note that this is only a
|
|
heuristic since we cannot know for sure at this point if we
|
|
we are going to use addon fields or to have flush sorting to
|
|
disk. We also don't know the temporary table will be in memory
|
|
or disk.
|
|
The following calculation takes a middle ground where assume
|
|
we can sort the keys in memory but have to use a disk based
|
|
temporary table to retrive the rows.
|
|
This cost is probably much bigger than it has to be...
|
|
*/
|
|
double sort_cost;
|
|
sort_cost= (get_qsort_sort_cost((ha_rows)record_count, 0) +
|
|
record_count *
|
|
DISK_TEMPTABLE_LOOKUP_COST(thd));
|
|
{
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.add("estimated_cost_for_sorting", sort_cost);
|
|
}
|
|
}
|
|
read_time= COST_ADD(read_time, sort_cost);
|
|
}
|
|
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
|
|
sizeof(POSITION)*idx);
|
|
join->join_record_count= record_count;
|
|
join->best_read= read_time;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a good, possibly optimal, query execution plan (QEP) by a greedy search.
|
|
|
|
The search procedure uses a hybrid greedy/exhaustive search with controlled
|
|
exhaustiveness. The search is performed in N = card(remaining_tables)
|
|
steps. Each step evaluates how promising is each of the unoptimized tables,
|
|
selects the most promising table, and extends the current partial QEP with
|
|
that table. Currenly the most 'promising' table is the one with least
|
|
expensive extension.\
|
|
|
|
There are two extreme cases:
|
|
-# When (card(remaining_tables) < search_depth), the estimate finds the
|
|
best complete continuation of the partial QEP. This continuation can be
|
|
used directly as a result of the search.
|
|
-# When (search_depth == 1) the 'best_extension_by_limited_search'
|
|
consideres the extension of the current QEP with each of the remaining
|
|
unoptimized tables.
|
|
|
|
All other cases are in-between these two extremes. Thus the parameter
|
|
'search_depth' controlls the exhaustiveness of the search. The higher the
|
|
value, the longer the optimization time and possibly the better the
|
|
resulting plan. The lower the value, the fewer alternative plans are
|
|
estimated, but the more likely to get a bad QEP.
|
|
|
|
All intermediate and final results of the procedure are stored in 'join':
|
|
- join->positions : modified for every partial QEP that is explored
|
|
- join->best_positions: modified for the current best complete QEP
|
|
- join->best_read : modified for the current best complete QEP
|
|
- join->best_ref : might be partially reordered
|
|
|
|
The final optimal plan is stored in 'join->best_positions', and its
|
|
corresponding cost in 'join->best_read'.
|
|
|
|
@note
|
|
The following pseudocode describes the algorithm of 'greedy_search':
|
|
|
|
@code
|
|
procedure greedy_search
|
|
input: remaining_tables
|
|
output: pplan;
|
|
{
|
|
pplan = <>;
|
|
do {
|
|
(t, a) = best_extension(pplan, remaining_tables);
|
|
pplan = concat(pplan, (t, a));
|
|
remaining_tables = remaining_tables - t;
|
|
} while (remaining_tables != {})
|
|
return pplan;
|
|
}
|
|
|
|
@endcode
|
|
where 'best_extension' is a placeholder for a procedure that selects the
|
|
most "promising" of all tables in 'remaining_tables'.
|
|
Currently this estimate is performed by calling
|
|
'best_extension_by_limited_search' to evaluate all extensions of the
|
|
current QEP of size 'search_depth', thus the complexity of 'greedy_search'
|
|
mainly depends on that of 'best_extension_by_limited_search'.
|
|
|
|
@par
|
|
If 'best_extension()' == 'best_extension_by_limited_search()', then the
|
|
worst-case complexity of this algorithm is <=
|
|
O(N*N^search_depth/search_depth). When serch_depth >= N, then the
|
|
complexity of greedy_search is O(N!).
|
|
|
|
@par
|
|
In the future, 'greedy_search' might be extended to support other
|
|
implementations of 'best_extension', e.g. some simpler quadratic procedure.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param search_depth controlls the exhaustiveness of the search
|
|
@param use_cond_selectivity specifies how the selectivity of the conditions
|
|
pushed to a table should be taken into account
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE Fatal error
|
|
*/
|
|
|
|
static bool
|
|
greedy_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint search_depth,
|
|
uint use_cond_selectivity)
|
|
{
|
|
double record_count= 1.0;
|
|
double read_time= 0.0;
|
|
uint idx= join->const_tables; // index into 'join->best_ref'
|
|
uint best_idx;
|
|
uint size_remain; // cardinality of remaining_tables
|
|
table_map usable_tables, eq_ref_tables;
|
|
POSITION best_pos;
|
|
JOIN_TAB *best_table; // the next plan node to be added to the curr QEP
|
|
// ==join->tables or # tables in the sj-mat nest we're optimizing
|
|
uint n_tables __attribute__((unused));
|
|
DBUG_ENTER("greedy_search");
|
|
DBUG_ASSERT(!(remaining_tables & join->const_table_map));
|
|
|
|
/* number of tables that remain to be optimized */
|
|
usable_tables= (join->emb_sjm_nest ?
|
|
(join->emb_sjm_nest->sj_inner_tables &
|
|
~join->const_table_map & remaining_tables):
|
|
remaining_tables);
|
|
n_tables= size_remain= my_count_bits(usable_tables);
|
|
|
|
join->next_sort_position= join->sort_positions;
|
|
do {
|
|
/*
|
|
Find the extension of the current QEP with the lowest cost
|
|
We are using remaining_table instead of usable tables here as
|
|
in case of an emb_sjm_nest, we want to be able to check if
|
|
an embedded table is depending on an outer table.
|
|
*/
|
|
join->best_read= DBL_MAX;
|
|
if ((int) best_extension_by_limited_search(join, remaining_tables, idx,
|
|
record_count,
|
|
read_time, search_depth,
|
|
use_cond_selectivity,
|
|
&eq_ref_tables) <
|
|
(int) SEARCH_OK)
|
|
DBUG_RETURN(TRUE);
|
|
/*
|
|
'best_read < DBL_MAX' means that optimizer managed to find
|
|
some plan and updated 'best_positions' array accordingly.
|
|
*/
|
|
DBUG_ASSERT(join->best_read < DBL_MAX);
|
|
|
|
if (size_remain <= search_depth)
|
|
{
|
|
/*
|
|
'join->best_positions' contains a complete optimal extension of the
|
|
current partial QEP.
|
|
*/
|
|
DBUG_EXECUTE("opt", print_plan(join, n_tables,
|
|
record_count, read_time, read_time,
|
|
"optimal"););
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/* select the first table in the optimal extension as most promising */
|
|
best_pos= join->best_positions[idx];
|
|
best_table= best_pos.table;
|
|
/*
|
|
Each subsequent loop of 'best_extension_by_limited_search' uses
|
|
'join->positions' for cost estimates, therefore we have to update its
|
|
value.
|
|
*/
|
|
join->positions[idx]= best_pos;
|
|
|
|
/*
|
|
Update the interleaving state after extending the current partial plan
|
|
with a new table.
|
|
We are doing this here because best_extension_by_limited_search reverts
|
|
the interleaving state to the one of the non-extended partial plan
|
|
on exit.
|
|
*/
|
|
bool is_interleave_error __attribute__((unused))=
|
|
check_interleaving_with_nj (best_table);
|
|
/* This has been already checked by best_extension_by_limited_search */
|
|
DBUG_ASSERT(!is_interleave_error);
|
|
|
|
/*
|
|
Also, update the semi-join optimization state. Information about the
|
|
picked semi-join operation is in best_pos->...picker, but we need to
|
|
update the global state in the JOIN object, too.
|
|
*/
|
|
if (!join->emb_sjm_nest)
|
|
update_sj_state(join, best_table, idx, remaining_tables);
|
|
|
|
/* find the position of 'best_table' in 'join->best_ref' */
|
|
best_idx= idx;
|
|
JOIN_TAB *pos= join->best_ref[best_idx];
|
|
while (pos && best_table != pos)
|
|
pos= join->best_ref[++best_idx];
|
|
DBUG_ASSERT((pos != NULL)); // should always find 'best_table'
|
|
|
|
/*
|
|
Move 'best_table' at the first free position in the array of joins
|
|
We don't need to keep the array sorted as
|
|
best_extension_by_limited_search() will sort them.
|
|
*/
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], join->best_ref[best_idx]);
|
|
|
|
/* compute the cost of the new plan extended with 'best_table' */
|
|
record_count= COST_MULT(record_count, join->positions[idx].records_read);
|
|
read_time= COST_ADD(read_time, join->positions[idx].read_time);
|
|
|
|
remaining_tables&= ~(best_table->table->map);
|
|
--size_remain;
|
|
++idx;
|
|
|
|
DBUG_EXECUTE("opt", print_plan(join, idx,
|
|
record_count, read_time, read_time,
|
|
"extended"););
|
|
} while (TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Get cost of execution and fanout produced by selected tables in the join
|
|
prefix (where prefix is defined as prefix in depth-first traversal)
|
|
|
|
@param end_tab_idx The number of last tab to be taken into
|
|
account (in depth-first traversal prefix)
|
|
@param filter_map Bitmap of tables whose cost/fanout are to
|
|
be taken into account.
|
|
@param read_time_arg [out] store read time here
|
|
@param record_count_arg [out] store record count here
|
|
|
|
@note
|
|
|
|
@returns
|
|
read_time_arg and record_count_arg contain the computed cost and fanout
|
|
*/
|
|
|
|
void JOIN::get_partial_cost_and_fanout(int end_tab_idx,
|
|
table_map filter_map,
|
|
double *read_time_arg,
|
|
double *record_count_arg)
|
|
{
|
|
double record_count= 1;
|
|
double read_time= 0.0;
|
|
double sj_inner_fanout= 1.0;
|
|
JOIN_TAB *end_tab= NULL;
|
|
JOIN_TAB *tab;
|
|
int i;
|
|
int last_sj_table= MAX_TABLES;
|
|
|
|
/*
|
|
Handle a special case where the join is degenerate, and produces no
|
|
records
|
|
*/
|
|
if (table_count == const_tables)
|
|
{
|
|
*read_time_arg= 0.0;
|
|
/*
|
|
We return 1, because
|
|
- it is the pessimistic estimate (there might be grouping)
|
|
- it's safer, as we're less likely to hit the edge cases in
|
|
calculations.
|
|
*/
|
|
*record_count_arg=1.0;
|
|
return;
|
|
}
|
|
|
|
for (tab= first_depth_first_tab(this), i= const_tables;
|
|
tab;
|
|
tab= next_depth_first_tab(this, tab), i++)
|
|
{
|
|
end_tab= tab;
|
|
if (i == end_tab_idx)
|
|
break;
|
|
}
|
|
|
|
for (tab= first_depth_first_tab(this), i= const_tables;
|
|
;
|
|
tab= next_depth_first_tab(this, tab), i++)
|
|
{
|
|
if (end_tab->bush_root_tab && end_tab->bush_root_tab == tab)
|
|
{
|
|
/*
|
|
We've entered the SJM nest that contains the end_tab. The caller is
|
|
- interested in fanout inside the nest (because that's how many times
|
|
we'll invoke the attached WHERE conditions)
|
|
- not interested in cost
|
|
*/
|
|
record_count= 1.0;
|
|
read_time= 0.0;
|
|
}
|
|
|
|
/*
|
|
Ignore fanout (but not cost) from sj-inner tables, as long as
|
|
the range that processes them finishes before the end_tab
|
|
*/
|
|
if (tab->sj_strategy != SJ_OPT_NONE)
|
|
{
|
|
sj_inner_fanout= 1.0;
|
|
last_sj_table= i + tab->n_sj_tables;
|
|
}
|
|
|
|
table_map cur_table_map;
|
|
if (tab->table)
|
|
cur_table_map= tab->table->map;
|
|
else
|
|
{
|
|
/* This is a SJ-Materialization nest. Check all of its tables */
|
|
TABLE *first_child= tab->bush_children->start->table;
|
|
TABLE_LIST *sjm_nest= first_child->pos_in_table_list->embedding;
|
|
cur_table_map= sjm_nest->nested_join->used_tables;
|
|
}
|
|
if (tab->records_read && (cur_table_map & filter_map))
|
|
{
|
|
record_count= COST_MULT(record_count, tab->records_read);
|
|
read_time= COST_ADD(read_time, tab->read_time);
|
|
if (tab->emb_sj_nest)
|
|
sj_inner_fanout= COST_MULT(sj_inner_fanout, tab->records_read);
|
|
}
|
|
|
|
if (i == last_sj_table)
|
|
{
|
|
record_count /= sj_inner_fanout;
|
|
sj_inner_fanout= 1.0;
|
|
last_sj_table= MAX_TABLES;
|
|
}
|
|
|
|
if (tab == end_tab)
|
|
break;
|
|
}
|
|
*read_time_arg= read_time;
|
|
*record_count_arg= record_count;
|
|
}
|
|
|
|
|
|
/*
|
|
Get prefix cost and fanout. This function is different from
|
|
get_partial_cost_and_fanout:
|
|
- it operates on a JOIN that haven't yet finished its optimization phase (in
|
|
particular, fix_semijoin_strategies_for_picked_join_order() and
|
|
get_best_combination() haven't been called)
|
|
- it assumes the the join prefix doesn't have any semi-join plans
|
|
|
|
These assumptions are met by the caller of the function.
|
|
*/
|
|
|
|
void JOIN::get_prefix_cost_and_fanout(uint n_tables,
|
|
double *read_time_arg,
|
|
double *record_count_arg)
|
|
{
|
|
double record_count= 1;
|
|
double read_time= 0.0;
|
|
for (uint i= const_tables; i < n_tables + const_tables ; i++)
|
|
{
|
|
if (best_positions[i].records_read)
|
|
{
|
|
record_count= COST_MULT(record_count, best_positions[i].records_read);
|
|
read_time= COST_ADD(read_time, best_positions[i].read_time);
|
|
}
|
|
}
|
|
*read_time_arg= read_time;
|
|
*record_count_arg= record_count;
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the number of rows that query execution will read.
|
|
|
|
@todo This is a very pessimistic upper bound. Use join selectivity
|
|
when available to produce a more realistic number.
|
|
*/
|
|
|
|
double JOIN::get_examined_rows()
|
|
{
|
|
double examined_rows;
|
|
double prev_fanout= 1;
|
|
double records;
|
|
JOIN_TAB *tab= first_breadth_first_tab();
|
|
JOIN_TAB *prev_tab= tab;
|
|
|
|
records= (double)tab->get_examined_rows();
|
|
|
|
while ((tab= next_breadth_first_tab(first_breadth_first_tab(),
|
|
top_join_tab_count, tab)))
|
|
{
|
|
prev_fanout= COST_MULT(prev_fanout, prev_tab->records_read);
|
|
records=
|
|
COST_ADD(records,
|
|
COST_MULT((double) (tab->get_examined_rows()), prev_fanout));
|
|
prev_tab= tab;
|
|
}
|
|
examined_rows= records;
|
|
return examined_rows;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Get the selectivity of equalities between columns when joining a table
|
|
|
|
@param join The optimized join
|
|
@param idx The number of tables in the evaluated partual join
|
|
@param s The table to be joined for evaluation
|
|
@param rem_tables The bitmap of tables to be joined later
|
|
@param keyparts The number of key parts to used when joining s
|
|
@param ref_keyuse_steps Array of references to keyuses employed to join s
|
|
*/
|
|
|
|
static
|
|
double table_multi_eq_cond_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables, uint keyparts,
|
|
uint16 *ref_keyuse_steps)
|
|
{
|
|
double sel= 1.0;
|
|
COND_EQUAL *cond_equal= join->cond_equal;
|
|
|
|
if (!cond_equal || !cond_equal->current_level.elements || !s->keyuse)
|
|
return sel;
|
|
|
|
Item_equal *item_equal;
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
TABLE *table= s->table;
|
|
table_map table_bit= table->map;
|
|
POSITION *pos= &join->positions[idx];
|
|
|
|
while ((item_equal= it++))
|
|
{
|
|
/*
|
|
Check whether we need to take into account the selectivity of
|
|
multiple equality item_equal. If this is the case multiply
|
|
the current value of sel by this selectivity
|
|
*/
|
|
table_map used_tables= item_equal->used_tables();
|
|
if (!(used_tables & table_bit))
|
|
continue;
|
|
if (item_equal->get_const())
|
|
continue;
|
|
|
|
bool adjust_sel= FALSE;
|
|
Item_equal_fields_iterator fi(*item_equal);
|
|
while((fi++) && !adjust_sel)
|
|
{
|
|
Field *fld= fi.get_curr_field();
|
|
if (fld->table->map != table_bit)
|
|
continue;
|
|
if (pos->key == 0)
|
|
adjust_sel= TRUE;
|
|
else
|
|
{
|
|
uint i;
|
|
KEYUSE *keyuse= pos->key;
|
|
uint key= keyuse->key;
|
|
for (i= 0; i < keyparts; i++)
|
|
{
|
|
if (i > 0)
|
|
keyuse+= ref_keyuse_steps[i-1];
|
|
uint fldno;
|
|
if (is_hash_join_key_no(key))
|
|
fldno= keyuse->keypart;
|
|
else
|
|
fldno= table->key_info[key].key_part[i].fieldnr - 1;
|
|
if (fld->field_index == fldno)
|
|
break;
|
|
}
|
|
keyuse= pos->key;
|
|
|
|
if (i == keyparts)
|
|
{
|
|
/*
|
|
Field fld is included in multiple equality item_equal
|
|
and is not a part of the ref key.
|
|
The selectivity of the multiple equality must be taken
|
|
into account unless one of the ref arguments is
|
|
equal to fld.
|
|
*/
|
|
adjust_sel= TRUE;
|
|
for (uint j= 0; j < keyparts && adjust_sel; j++)
|
|
{
|
|
if (j > 0)
|
|
keyuse+= ref_keyuse_steps[j-1];
|
|
Item *ref_item= keyuse->val;
|
|
if (ref_item->real_item()->type() == Item::FIELD_ITEM)
|
|
{
|
|
Item_field *field_item= (Item_field *) (ref_item->real_item());
|
|
if (item_equal->contains(field_item->field))
|
|
adjust_sel= FALSE;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (adjust_sel)
|
|
{
|
|
/*
|
|
If ref == 0 and there are no fields in the multiple equality
|
|
item_equal that belong to the tables joined prior to s
|
|
then the selectivity of multiple equality will be set to 1.0.
|
|
*/
|
|
double eq_fld_sel= 1.0;
|
|
fi.rewind();
|
|
while ((fi++))
|
|
{
|
|
double curr_eq_fld_sel;
|
|
Field *fld= fi.get_curr_field();
|
|
if (!(fld->table->map & ~(table_bit | rem_tables)))
|
|
continue;
|
|
curr_eq_fld_sel= get_column_avg_frequency(fld) /
|
|
fld->table->stat_records();
|
|
if (curr_eq_fld_sel < 1.0)
|
|
set_if_bigger(eq_fld_sel, curr_eq_fld_sel);
|
|
}
|
|
sel*= eq_fld_sel;
|
|
}
|
|
}
|
|
return sel;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Get the selectivity of conditions when joining a table
|
|
|
|
@param join The optimized join
|
|
@param s The table to be joined for evaluation
|
|
@param rem_tables The bitmap of tables to be joined later
|
|
@param new_records_out OUT Set to number of rows accepted
|
|
|
|
@detail
|
|
Get selectivity of conditions that can be applied when joining this table
|
|
with previous tables.
|
|
|
|
For quick selects and full table scans, selectivity of COND(this_table)
|
|
is accounted for in apply_selectivity_for_table(). Here, we only count
|
|
selectivity of COND(this_table, previous_tables).
|
|
|
|
For other access methods, we need to calculate selectivity of the whole
|
|
condition, "COND(this_table) AND COND(this_table, previous_tables)".
|
|
|
|
@retval
|
|
selectivity of the conditions imposed on the rows of s related to
|
|
the rows that we are expected to read (position->records_init).
|
|
*/
|
|
|
|
static
|
|
double table_after_join_selectivity(JOIN *join, uint idx, JOIN_TAB *s,
|
|
table_map rem_tables,
|
|
double *new_records_out)
|
|
{
|
|
uint16 ref_keyuse_steps_buf[MAX_REF_PARTS];
|
|
uint ref_keyuse_size= MAX_REF_PARTS;
|
|
uint16 *ref_keyuse_steps= ref_keyuse_steps_buf;
|
|
Field *field;
|
|
TABLE *table= s->table;
|
|
MY_BITMAP *read_set= table->read_set;
|
|
POSITION *pos= &join->positions[idx];
|
|
double sel, records_out= pos->records_out;
|
|
uint keyparts= 0;
|
|
uint found_part_ref_or_null= 0;
|
|
|
|
if (pos->key != 0)
|
|
{
|
|
sel= table->cond_selectivity;
|
|
/*
|
|
A ref access or hash join is used for this table. ref access is created
|
|
from
|
|
|
|
tbl.keypart1=expr1 AND tbl.keypart2=expr2 AND ...
|
|
|
|
and it will only return rows for which this condition is satisified.
|
|
Suppose, certain expr{i} is a constant. Since ref access only returns
|
|
rows that satisfy
|
|
|
|
tbl.keypart{i}=const (*)
|
|
|
|
then selectivity of this equality should not be counted in return value
|
|
of this function. This function uses the value of
|
|
|
|
table->cond_selectivity=selectivity(COND(tbl)) (**)
|
|
|
|
as a starting point. This value includes selectivity of equality (*). We
|
|
should somehow discount it.
|
|
|
|
Looking at calculate_cond_selectivity_for_table(), one can see that that
|
|
the value is not necessarily a direct multiplicand in
|
|
table->cond_selectivity
|
|
|
|
There are three possible ways to discount
|
|
1. There is a potential range access on t.keypart{i}=const.
|
|
(an important special case: the used ref access has a const prefix for
|
|
which a range estimate is available)
|
|
|
|
2. The field has a histogram. field[x]->cond_selectivity has the data.
|
|
|
|
3. Use index stats on this index:
|
|
rec_per_key[key_part+1]/rec_per_key[key_part]
|
|
|
|
(TODO: more details about the "t.key=othertable.col" case)
|
|
*/
|
|
KEYUSE *keyuse= pos->key;
|
|
KEYUSE *prev_ref_keyuse= keyuse;
|
|
uint key= keyuse->key;
|
|
bool used_range_selectivity= false;
|
|
|
|
/*
|
|
Check if we have a prefix of key=const that matches a quick select.
|
|
*/
|
|
if (!is_hash_join_key_no(key) && table->opt_range_keys.is_set(key))
|
|
{
|
|
key_part_map quick_key_map= (key_part_map(1) <<
|
|
table->opt_range[key].key_parts) - 1;
|
|
if (s->type == JT_RANGE ||
|
|
(table->opt_range[key].rows && (table->const_key_parts[key] & 1)))
|
|
{
|
|
/*
|
|
We are either using a range or we are using a REF which the
|
|
same key as an active range and the first key part is a constant.
|
|
|
|
In both cases we have to discount the selectivity for the range
|
|
as otherwise we are using the selectivity twice.
|
|
*/
|
|
for (; quick_key_map & 1 ; quick_key_map>>= 1)
|
|
{
|
|
while (keyuse->table == table && keyuse->key == key &&
|
|
keyuse->keypart == keyparts)
|
|
{
|
|
keyuse++;
|
|
}
|
|
keyparts++;
|
|
}
|
|
/*
|
|
Here we discount selectivity of the constant range CR. To calculate
|
|
this selectivity we use elements from the quick_rows[] array.
|
|
If we have indexes i1,...,ik with the same prefix compatible
|
|
with CR any of the estimate quick_rows[i1], ... quick_rows[ik] could
|
|
be used for this calculation but here we don't know which one was
|
|
actually used. So sel could be greater than 1 and we have to cap it.
|
|
However if sel becomes greater than 2 then with high probability
|
|
something went wrong.
|
|
*/
|
|
DBUG_ASSERT(sel <= 1.0);
|
|
DBUG_ASSERT(table->opt_range[key].rows <=
|
|
(double) table->stat_records());
|
|
sel /= ((double) table->opt_range[key].rows /
|
|
(double) table->stat_records());
|
|
set_if_smaller(sel, 1.0);
|
|
used_range_selectivity= true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Go through the "keypart{N}=..." equalities and find those that were
|
|
already taken into account in table->cond_selectivity.
|
|
*/
|
|
keyuse= pos->key;
|
|
keyparts=0;
|
|
while (keyuse->table == table && keyuse->key == key)
|
|
{
|
|
if (!(keyuse->used_tables & (rem_tables | table->map)))
|
|
{
|
|
if (are_tables_local(s, keyuse->val->used_tables()))
|
|
{
|
|
if (is_hash_join_key_no(key))
|
|
{
|
|
if (keyparts == keyuse->keypart)
|
|
keyparts++;
|
|
}
|
|
else
|
|
{
|
|
if (keyparts == keyuse->keypart &&
|
|
!((keyuse->val->used_tables()) & ~pos->ref_depend_map) &&
|
|
!(found_part_ref_or_null & keyuse->optimize))
|
|
{
|
|
/* Found a KEYUSE object that will be used by ref access */
|
|
keyparts++;
|
|
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
|
|
}
|
|
}
|
|
|
|
if (keyparts > keyuse->keypart)
|
|
{
|
|
/* Ok this is the keyuse that will be used for ref access */
|
|
if (!used_range_selectivity && keyuse->val->const_item())
|
|
{
|
|
uint fldno;
|
|
if (is_hash_join_key_no(key))
|
|
fldno= keyuse->keypart;
|
|
else
|
|
fldno= table->key_info[key].key_part[keyparts-1].fieldnr - 1;
|
|
|
|
if (table->field[fldno]->cond_selectivity > 0)
|
|
{
|
|
sel /= table->field[fldno]->cond_selectivity;
|
|
set_if_smaller(sel, 1.0);
|
|
}
|
|
/*
|
|
TODO: we could do better here:
|
|
1. cond_selectivity might be =1 (the default) because quick
|
|
select on some index prevented us from analyzing
|
|
histogram for this column.
|
|
2. we could get an estimate through this?
|
|
rec_per_key[key_part-1] / rec_per_key[key_part]
|
|
*/
|
|
}
|
|
if (keyparts > 1)
|
|
{
|
|
/*
|
|
Prepare to set ref_keyuse_steps[keyparts-2]: resize the array
|
|
if it is not large enough
|
|
*/
|
|
if (keyparts - 2 >= ref_keyuse_size)
|
|
{
|
|
uint new_size= MY_MAX(ref_keyuse_size*2, keyparts);
|
|
void *new_buf;
|
|
if (!(new_buf= my_malloc(PSI_INSTRUMENT_ME,
|
|
sizeof(*ref_keyuse_steps)*new_size,
|
|
MYF(0))))
|
|
{
|
|
sel= 1.0; // As if no selectivity was computed
|
|
goto exit;
|
|
}
|
|
memcpy(new_buf, ref_keyuse_steps,
|
|
sizeof(*ref_keyuse_steps)*ref_keyuse_size);
|
|
if (ref_keyuse_steps != ref_keyuse_steps_buf)
|
|
my_free(ref_keyuse_steps);
|
|
|
|
ref_keyuse_steps= (uint16*)new_buf;
|
|
ref_keyuse_size= new_size;
|
|
}
|
|
|
|
ref_keyuse_steps[keyparts-2]= (uint16)(keyuse - prev_ref_keyuse);
|
|
prev_ref_keyuse= keyuse;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
keyuse++;
|
|
}
|
|
/*
|
|
If the field f from the table is equal to a field from one the
|
|
earlier joined tables then the selectivity of the range conditions
|
|
over the field f must be discounted.
|
|
|
|
We need to discount selectivity only if we're using ref-based
|
|
access method (and have sel!=1).
|
|
If we use ALL/range/index_merge, then sel==1, and no need to discount.
|
|
*/
|
|
for (Field **f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
|
|
{
|
|
if (!bitmap_is_set(read_set, field->field_index) ||
|
|
!field->next_equal_field)
|
|
continue;
|
|
for (Field *next_field= field->next_equal_field;
|
|
next_field != field;
|
|
next_field= next_field->next_equal_field)
|
|
{
|
|
if (!(next_field->table->map & rem_tables) &&
|
|
next_field->table != table)
|
|
{
|
|
if (field->cond_selectivity > 0)
|
|
{
|
|
sel/= field->cond_selectivity;
|
|
set_if_smaller(sel, 1.0);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
We have now calculated a more exact 'records_out' taking more index
|
|
costs into account.
|
|
pos->records_out previously contained the smallest record count for
|
|
all range or ref access, which should not be smaller than what we
|
|
calculated above.
|
|
*/
|
|
records_out= pos->records_init * sel;
|
|
set_if_smaller(records_out, pos->records_out);
|
|
}
|
|
|
|
sel= table_multi_eq_cond_selectivity(join, idx, s, rem_tables,
|
|
keyparts, ref_keyuse_steps);
|
|
records_out*= sel;
|
|
|
|
/*
|
|
Update sel to be relative pos->records_read as that is what some old
|
|
code expects. Newer code should just use 'position->records_out' instead.
|
|
*/
|
|
if (pos->records_read == 0)
|
|
sel= 1.0;
|
|
else
|
|
{
|
|
sel= records_out / pos->records_read;
|
|
DBUG_ASSERT(sel >= 0.0 && sel <= 1.00001);
|
|
if (sel > 1.0)
|
|
sel= 1.0;
|
|
}
|
|
|
|
exit:
|
|
*new_records_out= records_out;
|
|
if (ref_keyuse_steps != ref_keyuse_steps_buf)
|
|
my_free(ref_keyuse_steps);
|
|
return sel;
|
|
}
|
|
|
|
|
|
/*
|
|
Check if the table is an EQ_REF or similar table and there is no cost
|
|
to gain by moveing it to a later stage.
|
|
We call such a table a edge table (or hanging leaf) as it will read at
|
|
most one row and will not add to the number of row combinations in the join.
|
|
*/
|
|
|
|
static inline enum_best_search
|
|
check_if_edge_table(POSITION *pos,
|
|
double pushdown_cond_selectivity)
|
|
{
|
|
|
|
if ((pos->type == JT_EQ_REF ||
|
|
(pos->type == JT_REF &&
|
|
pos->records_init == 1 &&
|
|
!pos->range_rowid_filter_info)) &&
|
|
pushdown_cond_selectivity >= 0.999)
|
|
return SEARCH_FOUND_EDGE;
|
|
return SEARCH_OK;
|
|
}
|
|
|
|
|
|
struct SORT_POSITION
|
|
{
|
|
JOIN_TAB **join_tab;
|
|
POSITION *position;
|
|
};
|
|
|
|
|
|
/*
|
|
Sort SORT_POSITIONS according to expected number of rows found
|
|
If number of combinations are the same sort according to join_tab order
|
|
(same table order as used in the original SQL query)
|
|
*/
|
|
|
|
static int
|
|
sort_positions(SORT_POSITION *a, SORT_POSITION *b)
|
|
{
|
|
int cmp;
|
|
if ((cmp= compare_embedding_subqueries(*a->join_tab, *b->join_tab)) != 0)
|
|
return cmp;
|
|
|
|
if (a->position->records_read > b->position->records_read)
|
|
return 1;
|
|
if (a->position->records_read < b->position->records_read)
|
|
return -1;
|
|
return CMP_NUM(*a->join_tab, *b->join_tab);
|
|
}
|
|
|
|
|
|
/*
|
|
Call best_access_path() for a set of tables and collect results
|
|
|
|
@param join JOIN object
|
|
@param trace_one_table Current optimizer_trace
|
|
@param pos Pointer to remanining tables
|
|
@param allowed_tables bitmap of allowed tables. On return set to
|
|
the collected tables.
|
|
@param store_poisition Points to where to store next found SORT_POSITION.
|
|
Will be updated to next free position.
|
|
@param stop_on_eq_ref Stop searching for more tables if we found an EQ_REF
|
|
table.
|
|
|
|
@return
|
|
0 Normal
|
|
1 Eq_ref table found (only if stop_on_eq_ref is used)
|
|
|
|
join->next_sort_position will be update to next free position.
|
|
*/
|
|
|
|
static bool
|
|
get_costs_for_tables(JOIN *join, table_map remaining_tables, uint idx,
|
|
double record_count,
|
|
Json_writer_object *trace_one_table,
|
|
JOIN_TAB **pos, SORT_POSITION **store_position,
|
|
table_map *allowed_tables,
|
|
bool stop_on_eq_ref)
|
|
{
|
|
THD *thd= join->thd;
|
|
POSITION *sort_position= join->next_sort_position;
|
|
SORT_POSITION *sort_end= *store_position;
|
|
JOIN_TAB *s;
|
|
table_map found_tables= 0;
|
|
bool found_eq_ref= 0;
|
|
bool disable_jbuf= join->thd->variables.join_cache_level == 0;
|
|
DBUG_ENTER("get_plans_for_tables");
|
|
|
|
s= *pos;
|
|
do
|
|
{
|
|
table_map real_table_bit= s->table->map;
|
|
if ((*allowed_tables & real_table_bit) &&
|
|
!(remaining_tables & s->dependent))
|
|
{
|
|
#ifdef DBUG_ASSERT_EXISTS
|
|
DBUG_ASSERT(!check_interleaving_with_nj(s));
|
|
restore_prev_nj_state(s); // Revert effect of check_... call
|
|
#endif
|
|
sort_end->join_tab= pos;
|
|
sort_end->position= sort_position;
|
|
|
|
|
|
Json_writer_object wrapper(thd);
|
|
/* Find the best access method from 's' to the current partial plan */
|
|
best_access_path(join, s, remaining_tables, join->positions, idx,
|
|
disable_jbuf, record_count,
|
|
sort_position, sort_position + 1);
|
|
found_tables|= s->table->map;
|
|
sort_end++;
|
|
sort_position+= 2;
|
|
if (unlikely(stop_on_eq_ref) && sort_position[-2].type == JT_EQ_REF)
|
|
{
|
|
/* Found an eq_ref tables. Use this, ignoring the other tables */
|
|
found_eq_ref= 1;
|
|
if (found_tables == s->table->map)
|
|
break; // First table
|
|
|
|
/* Store the found eq_ref table first in store_position */
|
|
sort_position-= 2;
|
|
*allowed_tables= s->table->map;
|
|
(*store_position)->join_tab= pos;
|
|
(*store_position)->position= sort_position;
|
|
(*store_position)++;
|
|
join->next_sort_position[0]= sort_position[0];
|
|
join->next_sort_position[1]= sort_position[1];
|
|
join->next_sort_position+= 2;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Verify that 'allowed_current_tables' was calculated correctly */
|
|
DBUG_ASSERT((remaining_tables & s->dependent) ||
|
|
!(remaining_tables & real_table_bit) ||
|
|
!(*allowed_tables & real_table_bit) ||
|
|
check_interleaving_with_nj(s));
|
|
}
|
|
} while ((s= *++pos));
|
|
|
|
*allowed_tables= found_tables;
|
|
*store_position= sort_end;
|
|
join->next_sort_position= sort_position;
|
|
DBUG_RETURN(found_eq_ref);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if it is potentally possible to short-cut the JOIN execution due to
|
|
ORDER BY ... LIMIT clause
|
|
|
|
@detail
|
|
It is possible when the join has "ORDER BY ... LIMIT n" clause, and the
|
|
sort+limit operation is done right after the join operation (there's no
|
|
grouping or DISTINCT in between).
|
|
Then we can potentially build a join plan that enumerates rows in the
|
|
ORDER BY order and so will be able to terminate as soon as it has produced
|
|
#limit rows.
|
|
|
|
Note that it is not a requirement that sort_by_table has an index that
|
|
matches ORDER BY. If it doesn't have one, the optimizer will pass
|
|
sort_by_table to filesort. Reading from sort_by_table won't use
|
|
short-cutting but the rest of the join will.
|
|
*/
|
|
|
|
static
|
|
bool join_limit_shortcut_is_applicable(const JOIN *join)
|
|
{
|
|
/*
|
|
Any post-join operation like GROUP BY or DISTINCT or window functions
|
|
means we cannot short-cut join execution
|
|
*/
|
|
if (!join->thd->variables.optimizer_join_limit_pref_ratio ||
|
|
!join->order ||
|
|
join->select_limit == HA_POS_ERROR ||
|
|
join->group_list ||
|
|
join->select_distinct ||
|
|
join->select_options & SELECT_BIG_RESULT ||
|
|
join->rollup.state != ROLLUP::STATE_NONE ||
|
|
join->select_lex->have_window_funcs() ||
|
|
join->select_lex->with_sum_func)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Cannot do short-cutting if
|
|
(1) ORDER BY refers to more than one table or
|
|
(2) the table it refers to cannot be first table in the join order
|
|
*/
|
|
if (!join->sort_by_table || // (1)
|
|
join->sort_by_table->reginfo.join_tab->dependent) // (2)
|
|
return false;
|
|
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_object trace(join->thd, "join_limit_shortcut_is_applicable");
|
|
trace.add("applicable", 1);
|
|
/* It looks like we can short-cut limit due to join */
|
|
return true;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Check if we could use an index-based access method to produce rows
|
|
in the order for ORDER BY ... LIMIT.
|
|
|
|
@detail
|
|
This should do what test_if_skip_sort_order() does. We can't use that
|
|
function directly, because:
|
|
|
|
1. We're at the join optimization stage and have not done query plan
|
|
fix-ups done in get_best_combination() and co.
|
|
|
|
2. The code in test_if_skip_sort_order() does modify query plan structures,
|
|
for example it may change the table's quick select. This is done even if
|
|
it's called with no_changes=true parameter.
|
|
|
|
@param access_method_changed OUT Whether the function changed the access
|
|
method to get rows in desired order.
|
|
@param new_access_cost OUT if access method changed: its cost.
|
|
|
|
@return
|
|
true - Can skip sorting
|
|
false - Cannot skip sorting
|
|
*/
|
|
|
|
bool test_if_skip_sort_order_early(JOIN *join,
|
|
bool *access_method_changed,
|
|
double *new_access_cost)
|
|
{
|
|
const POSITION *pos= &join->best_positions[join->const_tables];
|
|
TABLE *table= pos->table->table;
|
|
key_map usable_keys= table->keys_in_use_for_order_by;
|
|
|
|
*access_method_changed= false;
|
|
|
|
// Step #1: Find indexes that produce the required ordering.
|
|
if (find_indexes_matching_order(join, table, join->order, &usable_keys))
|
|
return false; // Cannot skip sorting
|
|
|
|
// Step #2: Check if the index we're using produces the needed ordering
|
|
uint ref_key;
|
|
if (pos->key)
|
|
{
|
|
// Mirror the (wrong) logic in test_if_skip_sort_order:
|
|
if (pos->spl_plan || pos->type == JT_REF_OR_NULL)
|
|
return false; // Use filesort
|
|
|
|
ref_key= pos->key->key;
|
|
}
|
|
else
|
|
{
|
|
if (pos->table->quick)
|
|
{
|
|
if (pos->table->quick->get_type() == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
ref_key= pos->table->quick->index;
|
|
else
|
|
ref_key= MAX_KEY;
|
|
}
|
|
else
|
|
ref_key= MAX_KEY;
|
|
}
|
|
|
|
if (ref_key != MAX_KEY && usable_keys.is_set(ref_key))
|
|
{
|
|
return true; // we're using an index that produces the reqired ordering.
|
|
}
|
|
|
|
/*
|
|
Step #3: check if we can switch to using an index that would produce the
|
|
ordering.
|
|
(But don't actually switch, this will be done by test_if_skip_sort_order)
|
|
*/
|
|
int best_key= -1;
|
|
uint UNINIT_VAR(best_key_parts);
|
|
uint saved_best_key_parts= 0;
|
|
int best_key_direction= 0;
|
|
JOIN_TAB *tab= pos->table;
|
|
ha_rows new_limit;
|
|
double new_read_time;
|
|
if (test_if_cheaper_ordering(/*in_join_optimizer */TRUE,
|
|
tab, join->order, table, usable_keys,
|
|
ref_key, join->select_limit,
|
|
&best_key, &best_key_direction,
|
|
&new_limit, &new_read_time,
|
|
&best_key_parts,
|
|
&saved_best_key_parts))
|
|
{
|
|
// Ok found a way to skip sorting
|
|
*access_method_changed= true;
|
|
*new_access_cost= new_read_time;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/*
|
|
Compute the cost of join assuming we only need fraction of the output.
|
|
*/
|
|
|
|
double recompute_join_cost_with_limit(const JOIN *join, bool skip_sorting,
|
|
double *first_table_cost,
|
|
double fraction)
|
|
{
|
|
POSITION *pos= join->best_positions + join->const_tables;
|
|
/*
|
|
Generally, we assume that producing X% of output takes X% of the cost.
|
|
*/
|
|
double partial_join_cost= join->best_read * fraction;
|
|
|
|
if (skip_sorting)
|
|
{
|
|
/*
|
|
First table produces rows in required order. Two options:
|
|
|
|
A. first_table_cost=NULL means we use whatever access method the join
|
|
optimizer has picked. Its cost was included in join->best_read and
|
|
we've already took a fraction of it.
|
|
|
|
B. first_table_cost!=NULL means we will need to switch to another access
|
|
method, we have the cost to read rows to produce #LIMIT rows in join
|
|
output.
|
|
*/
|
|
if (first_table_cost)
|
|
{
|
|
/*
|
|
Subtract the remainder of the first table's cost we had in
|
|
join->best_read:
|
|
*/
|
|
partial_join_cost -= pos->read_time*fraction;
|
|
partial_join_cost -= pos->records_read*fraction * WHERE_COST_THD(join->thd);
|
|
|
|
/* Add the cost of the new access method we've got: */
|
|
partial_join_cost= COST_ADD(partial_join_cost, *first_table_cost);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
DBUG_ASSERT(!first_table_cost);
|
|
/*
|
|
Cannot skip sorting. We read the first table entirely, then sort it.
|
|
|
|
partial_join_cost includes pos->read_time*fraction. Add to it
|
|
pos->read_time*(1-fraction) so we have the cost to read the entire first
|
|
table. Do the same for costs of checking the WHERE.
|
|
*/
|
|
double extra_first_table_cost= pos->read_time * (1.0 - fraction);
|
|
double extra_first_table_where= pos->records_read * (1.0 - fraction) *
|
|
WHERE_COST_THD(join->thd);
|
|
|
|
partial_join_cost= COST_ADD(partial_join_cost,
|
|
COST_ADD(extra_first_table_cost,
|
|
extra_first_table_where));
|
|
}
|
|
return partial_join_cost;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Finalize building the join order which allows to short-cut the join
|
|
execution.
|
|
|
|
@detail
|
|
This is called after we have produced a join order that allows short-
|
|
cutting.
|
|
Here, we decide if it is cheaper to use this one or the original join
|
|
order.
|
|
*/
|
|
|
|
POSITION *join_limit_shortcut_finalize_plan(JOIN *join, double *cost)
|
|
{
|
|
Json_writer_object wrapper(join->thd);
|
|
Json_writer_object trace(join->thd, "join_limit_shortcut_choice");
|
|
|
|
double fraction= join->select_limit / join->join_record_count;
|
|
trace.add("limit_fraction", fraction);
|
|
|
|
/* Check which fraction of join output we need */
|
|
if (fraction >= 1.0)
|
|
{
|
|
trace.add("skip_adjustment", "no short-cutting");
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
Check if the first table's access method produces the required ordering.
|
|
Possible options:
|
|
1. Yes: we can just take a fraction of the execution cost.
|
|
2A No: change the access method to one that does produce the required
|
|
ordering, update the costs.
|
|
2B No: Need to pass the first table to filesort().
|
|
*/
|
|
bool skip_sorting;
|
|
bool access_method_changed;
|
|
double new_access_cost;
|
|
{
|
|
Json_writer_array tmp(join->thd, "test_if_skip_sort_order_early");
|
|
skip_sorting= test_if_skip_sort_order_early(join,
|
|
&access_method_changed,
|
|
&new_access_cost);
|
|
}
|
|
trace.add("can_skip_filesort", skip_sorting);
|
|
|
|
double cost_with_shortcut=
|
|
recompute_join_cost_with_limit(join, skip_sorting,
|
|
access_method_changed ?
|
|
&new_access_cost : (double*)0,
|
|
fraction);
|
|
double risk_ratio=
|
|
(double)join->thd->variables.optimizer_join_limit_pref_ratio;
|
|
trace.add("full_join_cost", join->best_read);
|
|
trace.add("risk_ratio", risk_ratio);
|
|
trace.add("shortcut_join_cost", cost_with_shortcut);
|
|
cost_with_shortcut *= risk_ratio;
|
|
trace.add("shortcut_cost_with_risk", cost_with_shortcut);
|
|
if (cost_with_shortcut < join->best_read)
|
|
{
|
|
trace.add("use_shortcut_cost", true);
|
|
POSITION *pos= (POSITION*)memdup_root(join->thd->mem_root,
|
|
join->best_positions,
|
|
sizeof(POSITION)*
|
|
(join->table_count + 1));
|
|
*cost= cost_with_shortcut;
|
|
return pos;
|
|
}
|
|
trace.add("use_shortcut_cost", false);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
If we're in Limit Optimization Mode, allow only join->sort_by_table as
|
|
the first table in the join order
|
|
*/
|
|
|
|
static
|
|
bool join_limit_shortcut_limits_tables(const JOIN *join, uint idx, table_map *map)
|
|
{
|
|
if (join->limit_optimization_mode && idx == join->const_tables)
|
|
{
|
|
*map= join->sort_by_table->map;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a good, possibly optimal, query execution plan (QEP) by a possibly
|
|
exhaustive search.
|
|
|
|
The procedure searches for the optimal ordering of the query tables in set
|
|
'remaining_tables' of size N, and the corresponding optimal access paths to
|
|
each table. The choice of a table order and an access path for each table
|
|
constitutes a query execution plan (QEP) that fully specifies how to
|
|
execute the query.
|
|
|
|
The maximal size of the found plan is controlled by the parameter
|
|
'search_depth'. When search_depth == N, the resulting plan is complete and
|
|
can be used directly as a QEP. If search_depth < N, the found plan consists
|
|
of only some of the query tables. Such "partial" optimal plans are useful
|
|
only as input to query optimization procedures, and cannot be used directly
|
|
to execute a query.
|
|
|
|
The algorithm begins with an empty partial plan stored in 'join->positions'
|
|
and a set of N tables - 'remaining_tables'. Each step of the algorithm
|
|
evaluates the cost of the partial plan extended by all access plans for
|
|
each of the relations in 'remaining_tables', expands the current partial
|
|
plan with the access plan that results in lowest cost of the expanded
|
|
partial plan, and removes the corresponding relation from
|
|
'remaining_tables'. The algorithm continues until it either constructs a
|
|
complete optimal plan, or constructs an optimal plartial plan with size =
|
|
search_depth.
|
|
|
|
The final optimal plan is stored in 'join->best_positions'. The
|
|
corresponding cost of the optimal plan is in 'join->best_read'.
|
|
|
|
@note
|
|
The procedure uses a recursive depth-first search where the depth of the
|
|
recursion (and thus the exhaustiveness of the search) is controlled by the
|
|
parameter 'search_depth'.
|
|
|
|
@note
|
|
The pseudocode below describes the algorithm of
|
|
'best_extension_by_limited_search'. The worst-case complexity of this
|
|
algorithm is O(N*N^search_depth/search_depth). When serch_depth >= N, then
|
|
the complexity of greedy_search is O(N!).
|
|
|
|
@code
|
|
procedure best_extension_by_limited_search(
|
|
pplan in, // in, partial plan of tables-joined-so-far
|
|
pplan_cost, // in, cost of pplan
|
|
remaining_tables, // in, set of tables not referenced in pplan
|
|
best_plan_so_far, // in/out, best plan found so far
|
|
best_plan_so_far_cost,// in/out, cost of best_plan_so_far
|
|
search_depth) // in, maximum size of the plans being considered
|
|
{
|
|
for each table T from remaining_tables
|
|
{
|
|
// Calculate the cost of using table T as above
|
|
cost = complex-series-of-calculations;
|
|
|
|
// Add the cost to the cost so far.
|
|
pplan_cost+= cost;
|
|
|
|
if (pplan_cost >= best_plan_so_far_cost)
|
|
// pplan_cost already too great, stop search
|
|
continue;
|
|
|
|
pplan= expand plan by best_access_method;
|
|
remaining_tables= remaining_tables - table T;
|
|
if (remaining_tables is not an empty set
|
|
and
|
|
search_depth > 1)
|
|
{
|
|
best_extension_by_limited_search(pplan, pplan_cost,
|
|
remaining_tables,
|
|
best_plan_so_far,
|
|
best_plan_so_far_cost,
|
|
search_depth - 1);
|
|
}
|
|
else
|
|
{
|
|
best_plan_so_far_cost= pplan_cost;
|
|
best_plan_so_far= pplan;
|
|
}
|
|
}
|
|
}
|
|
@endcode
|
|
|
|
@note
|
|
When 'best_extension_by_limited_search' is called for the first time,
|
|
'join->best_read' must be set to the largest possible value (e.g. DBL_MAX).
|
|
The actual implementation provides a way to optionally use pruning
|
|
heuristic to reduce the search space by skipping some partial plans.
|
|
|
|
@note
|
|
The parameter 'search_depth' provides control over the recursion
|
|
depth, and thus the size of the resulting optimal plan.
|
|
|
|
@param join pointer to the structure providing all context info
|
|
for the query
|
|
@param remaining_tables set of tables not included into the partial plan yet
|
|
@param idx length of the partial QEP in 'join->positions';
|
|
since a depth-first search is used, also corresponds
|
|
to the current depth of the search tree;
|
|
also an index in the array 'join->best_ref';
|
|
@param record_count estimate for the number of records returned by the
|
|
best partial plan
|
|
@param read_time the cost of the best partial plan
|
|
@param search_depth maximum depth of the recursion and thus size of the
|
|
found optimal plan
|
|
(0 < search_depth <= join->tables+1).
|
|
(values: 0 = EXHAUSTIVE, 1 = PRUNE_BY_TIME_OR_ROWS)
|
|
@param use_cond_selectivity specifies how the selectivity of the conditions
|
|
pushed to a table should be taken into account
|
|
|
|
@retval
|
|
enum_best_search::SEARCH_OK All fine
|
|
@retval
|
|
enum_best_search::SEARCH_FOUND_EDGE All remaning tables are edge tables
|
|
@retval
|
|
enum_best_search::SEARCH_ABORT Killed by user
|
|
@retval
|
|
enum_best_search::SEARCH_ERROR Fatal error
|
|
*/
|
|
|
|
|
|
static enum_best_search
|
|
best_extension_by_limited_search(JOIN *join,
|
|
table_map remaining_tables,
|
|
uint idx,
|
|
double record_count,
|
|
double read_time,
|
|
uint search_depth,
|
|
uint use_cond_selectivity,
|
|
table_map *processed_eq_ref_tables)
|
|
{
|
|
THD *thd= join->thd;
|
|
/*
|
|
'join' is a partial plan with lower cost than the best plan so far,
|
|
so continue expanding it further with the tables in 'remaining_tables'.
|
|
*/
|
|
JOIN_TAB *s;
|
|
double best_record_count= DBL_MAX;
|
|
double best_read_time= DBL_MAX;
|
|
enum_best_search best_res;
|
|
uint tables_left= join->table_count - idx, found_tables;
|
|
uint accepted_tables __attribute__((unused));
|
|
table_map found_eq_ref_tables= 0, used_eq_ref_table= 0;
|
|
table_map allowed_tables, allowed_current_tables;
|
|
SORT_POSITION *sort= (SORT_POSITION*) alloca(sizeof(SORT_POSITION)*tables_left);
|
|
SORT_POSITION *sort_end;
|
|
DBUG_ENTER("best_extension_by_limited_search");
|
|
DBUG_EXECUTE_IF("show_explain_probe_best_ext_lim_search",
|
|
if (dbug_user_var_equals_int(thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(thd, 1);
|
|
);
|
|
|
|
if (unlikely(thd->check_killed())) // Abort
|
|
DBUG_RETURN(SEARCH_ABORT);
|
|
|
|
DBUG_EXECUTE("opt", print_plan(join, idx, record_count, read_time, read_time,
|
|
"part_plan"););
|
|
status_var_increment(thd->status_var.optimizer_join_prefixes_check_calls);
|
|
|
|
if (join->emb_sjm_nest)
|
|
{
|
|
/*
|
|
If we are searching for the execution plan of a materialized semi-join nest
|
|
then allowed_tables contains bits only for the tables from this nest.
|
|
*/
|
|
allowed_tables= (join->emb_sjm_nest->sj_inner_tables & remaining_tables);
|
|
allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
allowed_tables is used to check if there are tables left that can improve
|
|
a key search and to see if there are more tables to add in next iteration.
|
|
allowed_current_tables tells us which tables we can add to the current
|
|
plan at this stage.
|
|
*/
|
|
allowed_tables= remaining_tables;
|
|
allowed_current_tables= join->get_allowed_nj_tables(idx) & remaining_tables;
|
|
table_map sort_table;
|
|
if (join_limit_shortcut_limits_tables(join, idx, &sort_table))
|
|
allowed_current_tables= sort_table;
|
|
}
|
|
DBUG_ASSERT(allowed_tables & remaining_tables);
|
|
|
|
sort_end= sort;
|
|
{
|
|
Json_writer_object trace_one_table(thd);
|
|
JOIN_TAB **best_ref= join->best_ref + idx;
|
|
if (unlikely(thd->trace_started()))
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
|
|
Json_writer_array arr(thd, "get_costs_for_tables");
|
|
|
|
if (idx > join->const_tables && join->prune_level >= 2 &&
|
|
join->positions[idx-1].type == JT_EQ_REF &&
|
|
(join->eq_ref_tables & allowed_current_tables))
|
|
{
|
|
/* Previous table was an EQ REF table, only add other possible EQ_REF
|
|
tables to the chain, stop after first one is found.
|
|
*/
|
|
table_map table_map= join->eq_ref_tables & allowed_current_tables;
|
|
if (get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end,
|
|
&table_map, 1))
|
|
used_eq_ref_table= (*sort->join_tab)->table->map;
|
|
else
|
|
{
|
|
/* We didn't find another EQ_REF table, add remaining tables */
|
|
if ((table_map= allowed_current_tables & ~table_map))
|
|
get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end, &table_map,
|
|
0);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
table_map table_map= allowed_current_tables;
|
|
get_costs_for_tables(join, remaining_tables, idx, record_count,
|
|
&trace_one_table, best_ref, &sort_end, &table_map,
|
|
0);
|
|
}
|
|
found_tables= (uint) (sort_end - sort);
|
|
DBUG_ASSERT(found_tables > 0);
|
|
|
|
/*
|
|
Sort tables in ascending order of generated row combinations
|
|
*/
|
|
if (found_tables > 1)
|
|
my_qsort(sort, found_tables, sizeof(SORT_POSITION),
|
|
(qsort_cmp) sort_positions);
|
|
}
|
|
DBUG_ASSERT(join->next_sort_position <=
|
|
join->sort_positions + join->sort_space);
|
|
|
|
accepted_tables= 0;
|
|
double min_rec_count= DBL_MAX;
|
|
double min_rec_count_read_time= DBL_MAX;
|
|
|
|
double min_cost= DBL_MAX;
|
|
double min_cost_record_count= DBL_MAX;
|
|
|
|
for (SORT_POSITION *pos= sort ; pos < sort_end ; pos++)
|
|
{
|
|
s= *pos->join_tab;
|
|
if (!(found_eq_ref_tables & s->table->map) &&
|
|
!check_interleaving_with_nj(s))
|
|
{
|
|
table_map real_table_bit= s->table->map;
|
|
double current_record_count, current_read_time, original_record_count;
|
|
double partial_join_cardinality;
|
|
POSITION *position= join->positions + idx, *loose_scan_pos;
|
|
double pushdown_cond_selectivity;
|
|
Json_writer_object trace_one_table(thd);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_plan_prefix(&trace_one_table, join, idx, remaining_tables);
|
|
trace_one_table.add_table_name(s);
|
|
}
|
|
|
|
accepted_tables++;
|
|
*position= *pos->position; // Get stored result
|
|
loose_scan_pos= pos->position+1;
|
|
|
|
/* Compute the cost of the new plan extended with 's' */
|
|
current_record_count= COST_MULT(record_count, position->records_out);
|
|
current_read_time= COST_ADD(read_time, position->read_time);
|
|
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("rows_for_plan", current_record_count).
|
|
add("cost_for_plan", current_read_time);
|
|
}
|
|
original_record_count= current_record_count;
|
|
optimize_semi_joins(join, remaining_tables, idx, ¤t_record_count,
|
|
¤t_read_time, loose_scan_pos);
|
|
if (position->sj_strategy != SJ_OPT_NONE)
|
|
{
|
|
/* Adjust records_out and current_record_count after semi join */
|
|
double ratio= current_record_count / original_record_count;
|
|
if (ratio < 1.0)
|
|
position->records_out*= ratio;
|
|
if (unlikely(trace_one_table.trace_started()))
|
|
{
|
|
trace_one_table.
|
|
add("sj_rows_out", position->records_out).
|
|
add("sj_rows_for_plan", current_record_count).
|
|
add("sj_filtered", safe_filtered(position->records_out,
|
|
position->records_init));
|
|
}
|
|
}
|
|
/* Expand only partial plans with lower cost than the best QEP so far */
|
|
if (current_read_time + COST_EPS >= join->best_read)
|
|
{
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"prune_by_cost"););
|
|
trace_one_table
|
|
.add("pruned_by_cost", true)
|
|
.add("current_cost", current_read_time)
|
|
.add("best_cost", join->best_read);
|
|
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
Prune some less promising partial plans. This heuristic may miss
|
|
the optimal QEPs, thus it results in a non-exhaustive search.
|
|
*/
|
|
if (join->prune_level >= 1)
|
|
{
|
|
// Collect the members with min_cost and min_read_time.
|
|
bool min_rec_hit= false;
|
|
bool min_cost_hit= false;
|
|
|
|
if (join->extra_heuristic_pruning &&
|
|
(!(position->key_dependent & allowed_tables) ||
|
|
position->records_read < 2.0))
|
|
{
|
|
if (current_record_count < min_rec_count)
|
|
{
|
|
min_rec_count= current_record_count;
|
|
min_rec_count_read_time= current_read_time;
|
|
min_rec_hit= true;
|
|
}
|
|
|
|
if (current_read_time < min_cost)
|
|
{
|
|
min_cost_record_count= current_record_count;
|
|
min_cost= current_read_time;
|
|
min_cost_hit= true;
|
|
}
|
|
}
|
|
|
|
if (best_record_count > current_record_count ||
|
|
best_read_time > current_read_time ||
|
|
(idx == join->const_tables && // 's' is the first table in the QEP
|
|
s->table == join->sort_by_table))
|
|
{
|
|
/*
|
|
Store the current record count and cost as the best
|
|
possible cost at this level if the following holds:
|
|
- It's the lowest record number and cost so far
|
|
- There is no remaing table that could improve index usage
|
|
or we found an EQ_REF or REF key with less than 2
|
|
matching records (good enough).
|
|
*/
|
|
if (best_record_count >= current_record_count &&
|
|
best_read_time >= current_read_time &&
|
|
(!(position->key_dependent & join->allowed_tables) ||
|
|
position->records_read < 2.0))
|
|
{
|
|
best_record_count= current_record_count;
|
|
best_read_time= current_read_time;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Typically, we get here if:
|
|
best_record_count < current_record_count &&
|
|
best_read_time < current_read_time
|
|
That is, both record_count and read_time are worse than the best_
|
|
ones. This plan doesn't look promising, prune it away.
|
|
*/
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"pruned_by_heuristic"););
|
|
trace_one_table.add("pruned_by_heuristic", true);
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
|
|
const char* prune_reason= NULL;
|
|
if (!min_rec_hit &&
|
|
current_record_count >= min_rec_count &&
|
|
current_read_time >= min_rec_count_read_time)
|
|
prune_reason= "min_record_count";
|
|
|
|
if (!min_cost_hit &&
|
|
current_record_count >= min_cost_record_count &&
|
|
current_read_time >= min_cost)
|
|
prune_reason= "min_read_time";
|
|
|
|
if (prune_reason)
|
|
{
|
|
trace_one_table.add("pruned_by_heuristic", prune_reason);
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
pushdown_cond_selectivity= 1.0;
|
|
/*
|
|
TODO: When a semi-join strategy is applied (sj_strategy!=SJ_OPT_NONE),
|
|
we should account for selectivity from table_after_join_selectivity().
|
|
(Condition filtering is performed before the semi-join removes some
|
|
fanout so this might require moving the code around)
|
|
*/
|
|
if (use_cond_selectivity > 1 && position->sj_strategy == SJ_OPT_NONE)
|
|
{
|
|
pushdown_cond_selectivity=
|
|
table_after_join_selectivity(join, idx, s,
|
|
remaining_tables & ~real_table_bit,
|
|
&position->records_out);
|
|
|
|
if (unlikely(trace_one_table.trace_started()) &&
|
|
pushdown_cond_selectivity != 1.0)
|
|
trace_one_table.
|
|
add("pushdown_cond_selectivity", pushdown_cond_selectivity).
|
|
add("filtered", safe_filtered(position->records_out,
|
|
position->records_init)).
|
|
add("rows_out", position->records_out);
|
|
}
|
|
join->positions[idx].cond_selectivity= pushdown_cond_selectivity;
|
|
|
|
partial_join_cardinality= record_count * position->records_out;
|
|
join->positions[idx].partial_join_cardinality= partial_join_cardinality;
|
|
|
|
if (unlikely(thd->trace_started()) && pushdown_cond_selectivity < 1.0 &&
|
|
partial_join_cardinality < current_record_count)
|
|
trace_one_table
|
|
.add("selectivity", pushdown_cond_selectivity)
|
|
.add("estimated_join_cardinality", partial_join_cardinality);
|
|
|
|
|
|
if ((search_depth > 1) && (remaining_tables & ~real_table_bit) &
|
|
allowed_tables)
|
|
{
|
|
/* Recursively expand the current partial plan */
|
|
Json_writer_array trace_rest(thd, "rest_of_plan");
|
|
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);
|
|
best_res=
|
|
best_extension_by_limited_search(join,
|
|
remaining_tables &
|
|
~real_table_bit,
|
|
idx + 1,
|
|
partial_join_cardinality,
|
|
current_read_time,
|
|
search_depth - 1,
|
|
use_cond_selectivity,
|
|
&found_eq_ref_tables);
|
|
swap_variables(JOIN_TAB*, join->best_ref[idx], *pos->join_tab);
|
|
|
|
if ((int) best_res < (int) SEARCH_OK)
|
|
goto end; // Return best_res
|
|
if (best_res == SEARCH_FOUND_EDGE &&
|
|
check_if_edge_table(join->positions+ idx,
|
|
pushdown_cond_selectivity) !=
|
|
SEARCH_FOUND_EDGE)
|
|
best_res= SEARCH_OK;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
'join' is either the best partial QEP with 'search_depth' relations,
|
|
or the best complete QEP so far, whichever is smaller.
|
|
*/
|
|
if (join->sort_by_table &&
|
|
join->sort_by_table !=
|
|
join->positions[join->const_tables].table->table)
|
|
{
|
|
/*
|
|
We may have to make a temp table, note that this is only a
|
|
heuristic since we cannot know for sure at this point if we
|
|
we are going to use addon fields or to have flush sorting to
|
|
disk. We also don't know the temporary table will be in memory
|
|
or disk.
|
|
The following calculation takes a middle ground where assume
|
|
we can sort the keys in memory but have to use a disk based
|
|
temporary table to retrive the rows.
|
|
This cost is probably much bigger than it has to be...
|
|
*/
|
|
double sort_cost;
|
|
sort_cost= (get_qsort_sort_cost((ha_rows)current_record_count,0) +
|
|
current_record_count *
|
|
DISK_TEMPTABLE_LOOKUP_COST(thd));
|
|
trace_one_table.add("cost_for_sorting", sort_cost);
|
|
current_read_time= COST_ADD(current_read_time, sort_cost);
|
|
}
|
|
if (current_read_time < join->best_read)
|
|
{
|
|
memcpy((uchar*) join->best_positions, (uchar*) join->positions,
|
|
sizeof(POSITION) * (idx + 1));
|
|
join->join_record_count= partial_join_cardinality;
|
|
join->best_read= current_read_time;
|
|
}
|
|
DBUG_EXECUTE("opt", print_plan(join, idx+1,
|
|
current_record_count,
|
|
read_time,
|
|
current_read_time,
|
|
"full_plan"););
|
|
best_res= check_if_edge_table(join->positions + idx,
|
|
pushdown_cond_selectivity);
|
|
}
|
|
restore_prev_nj_state(s);
|
|
restore_prev_sj_state(remaining_tables, s, idx);
|
|
if (best_res == SEARCH_FOUND_EDGE)
|
|
{
|
|
if (pos+1 < sort_end) // If not last table
|
|
trace_one_table.add("pruned_by_hanging_leaf", true);
|
|
goto end;
|
|
}
|
|
}
|
|
}
|
|
DBUG_ASSERT(accepted_tables > 0);
|
|
best_res= SEARCH_OK;
|
|
|
|
end:
|
|
join->next_sort_position-= found_tables*2;
|
|
if (used_eq_ref_table)
|
|
*processed_eq_ref_tables|= used_eq_ref_table | found_eq_ref_tables;
|
|
else
|
|
*processed_eq_ref_tables= 0;
|
|
DBUG_RETURN(best_res);
|
|
}
|
|
|
|
|
|
/**
|
|
Find how much space the prevous read not const tables takes in cache.
|
|
*/
|
|
|
|
void JOIN_TAB::calc_used_field_length(bool max_fl)
|
|
{
|
|
uint null_fields,blobs,fields;
|
|
ulong rec_length;
|
|
Field **f_ptr,*field;
|
|
uint uneven_bit_fields;
|
|
MY_BITMAP *read_set= table->read_set;
|
|
|
|
uneven_bit_fields= null_fields= blobs= fields= rec_length=0;
|
|
for (f_ptr=table->field ; (field= *f_ptr) ; f_ptr++)
|
|
{
|
|
if (bitmap_is_set(read_set, field->field_index))
|
|
{
|
|
uint flags=field->flags;
|
|
fields++;
|
|
rec_length+=field->pack_length();
|
|
if (flags & BLOB_FLAG)
|
|
blobs++;
|
|
if (!(flags & NOT_NULL_FLAG))
|
|
null_fields++;
|
|
if (field->type() == MYSQL_TYPE_BIT &&
|
|
((Field_bit*)field)->bit_len)
|
|
uneven_bit_fields++;
|
|
}
|
|
}
|
|
if (null_fields || uneven_bit_fields)
|
|
rec_length+=(table->s->null_fields+7)/8;
|
|
if (table->maybe_null)
|
|
rec_length+=sizeof(my_bool);
|
|
|
|
/* Take into account that DuplicateElimination may need to store rowid */
|
|
uint rowid_add_size= 0;
|
|
if (keep_current_rowid)
|
|
{
|
|
rowid_add_size= table->file->ref_length;
|
|
rec_length += rowid_add_size;
|
|
fields++;
|
|
}
|
|
|
|
if (max_fl)
|
|
{
|
|
// TODO: to improve this estimate for max expected length
|
|
if (blobs)
|
|
{
|
|
ulong blob_length= table->file->stats.mean_rec_length;
|
|
if (ULONG_MAX - rec_length > blob_length)
|
|
rec_length+= blob_length;
|
|
else
|
|
rec_length= ULONG_MAX;
|
|
}
|
|
max_used_fieldlength= rec_length;
|
|
}
|
|
else if (table->file->stats.mean_rec_length)
|
|
set_if_smaller(rec_length, table->file->stats.mean_rec_length + rowid_add_size);
|
|
|
|
used_fields=fields;
|
|
used_fieldlength=rec_length;
|
|
used_blobs=blobs;
|
|
used_null_fields= null_fields;
|
|
used_uneven_bit_fields= uneven_bit_fields;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Extract pushdown conditions for a table scan
|
|
|
|
@details
|
|
This functions extracts pushdown conditions usable when this table is scanned.
|
|
The conditions are extracted either from WHERE or from ON expressions.
|
|
The conditions are attached to the field cache_select of this table.
|
|
|
|
@note
|
|
Currently the extracted conditions are used only by BNL and BNLH join.
|
|
algorithms.
|
|
|
|
@retval 0 on success
|
|
1 otherwise
|
|
*/
|
|
|
|
int JOIN_TAB::make_scan_filter()
|
|
{
|
|
COND *tmp;
|
|
DBUG_ENTER("make_scan_filter");
|
|
|
|
Item *cond= is_inner_table_of_outer_join() ?
|
|
*get_first_inner_table()->on_expr_ref : join->conds;
|
|
|
|
if (cond)
|
|
{
|
|
if ((tmp= make_cond_for_table(join->thd, cond,
|
|
join->const_table_map | table->map,
|
|
table->map, -1, FALSE, TRUE)))
|
|
{
|
|
DBUG_EXECUTE("where",print_where(tmp,"cache", QT_ORDINARY););
|
|
if (!(cache_select=
|
|
(SQL_SELECT*) join->thd->memdup((uchar*) select,
|
|
sizeof(SQL_SELECT))))
|
|
DBUG_RETURN(1);
|
|
cache_select->cond= tmp;
|
|
cache_select->read_tables=join->const_table_map;
|
|
}
|
|
else if (join->thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Check whether hash join algorithm can be used to join this table
|
|
|
|
@details
|
|
This function finds out whether the ref items that have been chosen
|
|
by the planner to access this table can be used for hash join algorithms.
|
|
The answer depends on a certain property of the the fields of the
|
|
joined tables on which the hash join key is built.
|
|
|
|
@note
|
|
At present the function is supposed to be called only after the function
|
|
get_best_combination has been called.
|
|
|
|
@retval TRUE it's possible to use hash join to join this table
|
|
@retval FALSE otherwise
|
|
*/
|
|
|
|
bool JOIN_TAB::hash_join_is_possible()
|
|
{
|
|
if (type != JT_REF && type != JT_EQ_REF)
|
|
return FALSE;
|
|
if (!is_ref_for_hash_join())
|
|
{
|
|
KEY *keyinfo= table->key_info + ref.key;
|
|
return keyinfo->key_part[0].field->hash_join_is_possible();
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Check whether a KEYUSE can be really used for access this join table
|
|
|
|
@param join Join structure with the best join order
|
|
for which the check is performed
|
|
@param keyuse Evaluated KEYUSE structure
|
|
|
|
@details
|
|
This function is supposed to be used after the best execution plan have been
|
|
already chosen and the JOIN_TAB array for the best join order been already set.
|
|
For a given KEYUSE to access this JOIN_TAB in the best execution plan the
|
|
function checks whether it really can be used. The function first performs
|
|
the check with access_from_tables_is_allowed(). If it succeeds it checks
|
|
whether the keyuse->val does not use some fields of a materialized semijoin
|
|
nest that cannot be used to build keys to access outer tables.
|
|
Such KEYUSEs exists for the query like this:
|
|
select * from ot
|
|
where ot.c in (select it1.c from it1, it2 where it1.c=f(it2.c))
|
|
Here we have two KEYUSEs to access table ot: with val=it1.c and val=f(it2.c).
|
|
However if the subquery was materialized the second KEYUSE cannot be employed
|
|
to access ot.
|
|
|
|
@retval true the given keyuse can be used for ref access of this JOIN_TAB
|
|
@retval false otherwise
|
|
*/
|
|
|
|
bool JOIN_TAB::keyuse_is_valid_for_access_in_chosen_plan(JOIN *join,
|
|
KEYUSE *keyuse)
|
|
{
|
|
if (!access_from_tables_is_allowed(keyuse->used_tables,
|
|
join->sjm_lookup_tables))
|
|
return false;
|
|
if (join->sjm_scan_tables & table->map)
|
|
return true;
|
|
table_map keyuse_sjm_scan_tables= keyuse->used_tables &
|
|
join->sjm_scan_tables;
|
|
if (!keyuse_sjm_scan_tables)
|
|
return true;
|
|
uint sjm_tab_nr= 0;
|
|
while (!(keyuse_sjm_scan_tables & table_map(1) << sjm_tab_nr))
|
|
sjm_tab_nr++;
|
|
JOIN_TAB *sjm_tab= join->map2table[sjm_tab_nr];
|
|
TABLE_LIST *emb_sj_nest= sjm_tab->emb_sj_nest;
|
|
if (!(emb_sj_nest->sj_mat_info && emb_sj_nest->sj_mat_info->is_used &&
|
|
emb_sj_nest->sj_mat_info->is_sj_scan))
|
|
return true;
|
|
st_select_lex *sjm_sel= emb_sj_nest->sj_subq_pred->unit->first_select();
|
|
for (uint i= 0; i < sjm_sel->item_list.elements; i++)
|
|
{
|
|
DBUG_ASSERT(sjm_sel->ref_pointer_array[i]->real_item()->type() == Item::FIELD_ITEM);
|
|
if (keyuse->val->real_item()->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *field = ((Item_field*)sjm_sel->ref_pointer_array[i]->real_item())->field;
|
|
if (field->eq(((Item_field*)keyuse->val->real_item())->field))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
static uint
|
|
cache_record_length(JOIN *join,uint idx)
|
|
{
|
|
uint length=0;
|
|
JOIN_TAB **pos,**end;
|
|
|
|
for (pos=join->best_ref+join->const_tables,end=join->best_ref+idx ;
|
|
pos != end ;
|
|
pos++)
|
|
{
|
|
JOIN_TAB *join_tab= *pos;
|
|
length+= join_tab->get_used_fieldlength();
|
|
}
|
|
return length;
|
|
}
|
|
|
|
/*
|
|
Estimate the number of engine ha_index_read_calls for EQ_REF tables
|
|
when taking into account the one-row-cache in join_read_always_key()
|
|
|
|
SYNOPSIS
|
|
@param position All previous tables best_access_path() information.
|
|
@param idx Number of (previous) tables in positions.
|
|
@param record_count Number of incoming record combinations
|
|
@param found_ref Bitmap of tables that is used to construct the key
|
|
used with the index read.
|
|
|
|
@return # The number of estimated calls that cannot be cached by the
|
|
the one-row-cache. In other words, number of expected
|
|
calls to engine ha_read_read_map().
|
|
Between 1 and record_count or 0 if record_count == 0
|
|
|
|
DESCRIPTION
|
|
The one-row-cache gives a great benefit when there are multiple consecutive
|
|
calls to ha_index_read() with the same key. In this case we can skip
|
|
calling the engine (and in the future also skip to check the key
|
|
condition), which can notably increase the performance.
|
|
|
|
Assuming most of the rows are cached, there is no notable saving to be
|
|
made trying to calculate the total number of distinct key values that will
|
|
be used. The performance of a ha_index_read_call() is about the same even
|
|
if we repeatedly read the same set of rows.
|
|
|
|
This code works by calculating the number of identical key sequences
|
|
found in the record stream.
|
|
The number of expected distinct calls can then be calculated as
|
|
records_count / sequences.
|
|
|
|
Some things to note:
|
|
- record_count == PRODUCT(records_out) over all tables[0...idx-1]
|
|
- position->prev_record_reads contains the number of identical
|
|
sequences found for previous EQ_REF tables.
|
|
|
|
Assume a join prefix of t1,t2,t3,t4 and t4 is an EQ_REF table.
|
|
We have the following combinations that we have to consider:
|
|
|
|
======
|
|
1) No JOIN_CACHE usage, tables depend only on one previous table
|
|
|
|
Row combinations are generated as:
|
|
- for all rows in t1
|
|
- for all rows in t2
|
|
- for all rows in t3
|
|
or
|
|
t1.1,t2.1,t3.1, t1.1,t2.1,t3.2, t1.1,t2.1,t3.3... # Only t3 row changes
|
|
(until no more rows in t3., ie t3.records_out times)
|
|
t1.1,t2.2,t3.1, t1.1,t2.2,t3.2, t1.1,t2.2,t3.3... # t2.2 read
|
|
(above repeated until no more rows in t2 and t3)
|
|
t1.2,t2.1,t3.1, t1.2,t2.1,t3.2, t1.2,t2.1,t3.3... # t1.2 read
|
|
|
|
If t4 is an EQ_REF table that is depending of one of the
|
|
previous tables, the number of identical keys can be calculated
|
|
as the multiplication of records_out of the tables in between
|
|
the t4 and its first dependency.
|
|
|
|
Let's consider cases where t4 depends on different previous tables:
|
|
WHERE t4.a=t3.a
|
|
no caching as t3 can change for each row
|
|
engine_calls: record_count
|
|
|
|
WHERE t4.a=t2.a
|
|
t4 is not depending on t3. The number of repeated rows are:
|
|
t1.1,t2.1,t3.1 to t1.1,t2.1,t3.last # t3.records_out rows
|
|
t1.1,t2.2,t3.1 to t1.1,t2.2,t3.last # t3.records_out rows
|
|
...
|
|
t1.2,t2.1,t3.1 to t1.2,t2.1,t3.last
|
|
...
|
|
t1.last,t2.last.t3.1 to t1.last,t2.last.1,t3.last
|
|
|
|
For each combination of t1 and t2 there are t3.records_out repeated
|
|
rows with equal key value
|
|
engine_calls: record_count / t3.records_out calls =
|
|
t1.records_out * t2.records_out
|
|
|
|
WHERE t4.a=t1.a
|
|
The repeated sequences:
|
|
t1.1,t2.1,t3.1 to t1.1,t2.last,t3.last
|
|
t1.2,t2.1,t3.1 to t2.1,t2.last,t3.last
|
|
repeated rows: t2.records_out * t3.records_out
|
|
engine_calls: record_count/repeated_rows = t1.records_out
|
|
|
|
If t4 depends on a table that uses EQ_REF access, we can multipy that
|
|
table's repeated_rows with current table's repeated_rows to take that
|
|
into account.
|
|
|
|
=====
|
|
2) Keys depending on multiple tables
|
|
|
|
In this case we have to stop searching after we find the first
|
|
table we depend upon.
|
|
We have to also disregard the number of repeated rows for the
|
|
found table. This can be seen from (assuming tables t1...t6):
|
|
|
|
WHERE t6.a=t4.a and t6.a=t3.a and t4.a= t2.a
|
|
- Here t4 is not depending on t3 (and thus there is a
|
|
t3.records_out identical keys for t4). However t6 key will
|
|
change for each t3 row and t6 cannot thus use
|
|
t3.identical_keys
|
|
|
|
WHERE t4.key_part1=t1.a and t4.key_part2= t3.a
|
|
As t4.key_part2 will change for every row, one-row-cache will not
|
|
be hit
|
|
|
|
WHERE t4.key_part1=t1.a and t4.key_part2= t2.a
|
|
t4.key will change when t1 or t2 changes
|
|
This is the same case as above for WHERE t4.a = t2.a
|
|
engine_calls: record_count / t3.records_out calls
|
|
|
|
=====
|
|
3) JOIN_CACHE is used
|
|
|
|
If any table is using join_cache as this changes the row
|
|
combinations seen by following tables. Using join cache for a
|
|
table T# will have T# rows repeated for the next table as many
|
|
times there are combinations in the cache. The the cache will
|
|
re-read and the operations repeats 'refill-1' number of times.
|
|
|
|
Table rows from table just before T# will come in 'random order',
|
|
from the point of the next tables.
|
|
|
|
Assuming t3 is using a cache, t4 will see the rows coming in the
|
|
following order:
|
|
t1.1,t2.1,t3.1, t1.1,t2.2,t3.1, t1.1,t2.3,t3.1...
|
|
(t3.1 repeated 't2.records_out' times)
|
|
t1.2,t2.1,t3.1, t1.2,t2.2,t3.1, t1.2,t2.3,t3.1...
|
|
(Next row in t1 used)
|
|
t1.1,t2.1,t3.2, t1.1,t2.2,t3.2, t1.1,t2.3,t3.2...
|
|
(Restarting all t1 & t2 combinations for t3.2)
|
|
|
|
WHERE t4.a=t3.a
|
|
- There is a repeated sequence of t3.records_out rows for
|
|
each t1,t2 row combination.
|
|
engine_calls= record_count / t3.records_out
|
|
|
|
WHERE t4.a=t2.a
|
|
t2 changes for each row
|
|
engine_calls= record_count
|
|
|
|
WHERE t4.a=t1.a
|
|
repeated rows= t2.records_out
|
|
engine_calls= record_count / t2.records_out
|
|
|
|
A refill of the join cache will restart the row sequences
|
|
(we have 'refill' more sequences), so we will have to do 'refill' times
|
|
more engine read calls.
|
|
|
|
=====
|
|
Expectations of the accuracy of the return value
|
|
|
|
- The value is always between 1 and record_count
|
|
- The returned value should almost always larger than the true number of
|
|
engine calls.
|
|
|
|
- Assuming that every row has different values for all other columns for
|
|
echo unique key value and record_count is accurate:
|
|
- If a table is depending on multiple tables, the return value may be
|
|
notable larger than real value.
|
|
- If there is no join cache the value should be exact.
|
|
- If there is a join cache, but no refills calculated or done then
|
|
the value should be exact.
|
|
- If there was more join_cache refills than was calculated, the value
|
|
may be slightly to low.
|
|
- If the number of refills is equal or less than was calculated the value
|
|
should be larger than the expected engine read calls. The more refills,
|
|
the less exact the number will be.
|
|
*/
|
|
|
|
static double
|
|
prev_record_reads(const POSITION *position, uint idx, table_map found_ref,
|
|
double record_count, double *identical_keys)
|
|
{
|
|
double found= 1.0;
|
|
const POSITION *pos_end= position - 1;
|
|
const POSITION *cur_pos= position + idx;
|
|
|
|
/* Safety against const tables */
|
|
if (unlikely(!found_ref))
|
|
goto end;
|
|
|
|
for (const POSITION *pos= cur_pos-1; pos != pos_end; pos--)
|
|
{
|
|
if (found_ref & pos->table->table->map)
|
|
{
|
|
/* Found a table we depend on */
|
|
found_ref= ~pos->table->table->map;
|
|
if (!found_ref)
|
|
{
|
|
/*
|
|
No more dependencies. We can use the cached values to improve things
|
|
a bit
|
|
*/
|
|
if (pos->type == JT_EQ_REF)
|
|
found= COST_MULT(found, pos->identical_keys);
|
|
else if (pos->use_join_buffer)
|
|
found= COST_MULT(found, pos->loops / pos->refills);
|
|
}
|
|
break;
|
|
}
|
|
if (unlikely(pos->use_join_buffer))
|
|
{
|
|
/* Each refill can change the cached key */
|
|
found/= pos->refills;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We are not depending on the current table.
|
|
There are 'records_out' rows with identical rows
|
|
value for our depending tables.
|
|
*/
|
|
found= COST_MULT(found, pos->records_out);
|
|
}
|
|
}
|
|
|
|
/*
|
|
In most case found should <= record_count.
|
|
|
|
However if there was a reduction of rows (records_out < 1) before
|
|
the referencing table then found could be >= record_count.
|
|
To get resonable numbers, we limit prev_record_read to be between
|
|
1.0 and record_count as we have to always do at least one read
|
|
anyway.
|
|
*/
|
|
|
|
end:
|
|
if (unlikely(found > record_count))
|
|
found= record_count;
|
|
if (unlikely(found <= 1.0))
|
|
found= 1.0;
|
|
*identical_keys= found;
|
|
return record_count / found;
|
|
}
|
|
|
|
|
|
/*
|
|
Enumerate join tabs in breadth-first fashion, including const tables.
|
|
*/
|
|
|
|
static JOIN_TAB *next_breadth_first_tab(JOIN_TAB *first_top_tab,
|
|
uint n_top_tabs_count, JOIN_TAB *tab)
|
|
{
|
|
n_top_tabs_count += tab->join->aggr_tables;
|
|
if (!tab->bush_root_tab)
|
|
{
|
|
/* We're at top level. Get the next top-level tab */
|
|
tab++;
|
|
if (tab < first_top_tab + n_top_tabs_count)
|
|
return tab;
|
|
|
|
/* No more top-level tabs. Switch to enumerating SJM nest children */
|
|
tab= first_top_tab;
|
|
}
|
|
else
|
|
{
|
|
/* We're inside of an SJM nest */
|
|
if (!tab->last_leaf_in_bush)
|
|
{
|
|
/* There's one more table in the nest, return it. */
|
|
return ++tab;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
There are no more tables in this nest. Get out of it and then we'll
|
|
proceed to the next nest.
|
|
*/
|
|
tab= tab->bush_root_tab + 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Ok, "tab" points to a top-level table, and we need to find the next SJM
|
|
nest and enter it.
|
|
*/
|
|
for (; tab < first_top_tab + n_top_tabs_count; tab++)
|
|
{
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
/*
|
|
Enumerate JOIN_TABs in "EXPLAIN order". This order
|
|
- const tabs are included
|
|
- we enumerate "optimization tabs".
|
|
-
|
|
*/
|
|
|
|
JOIN_TAB *first_explain_order_tab(JOIN* join)
|
|
{
|
|
JOIN_TAB* tab;
|
|
tab= join->join_tab;
|
|
if (!tab)
|
|
return NULL; /* Can happen when when the tables were optimized away */
|
|
return (tab->bush_children) ? tab->bush_children->start : tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *next_explain_order_tab(JOIN* join, JOIN_TAB* tab)
|
|
{
|
|
/* If we're inside SJM nest and have reached its end, get out */
|
|
if (tab->last_leaf_in_bush)
|
|
return tab->bush_root_tab;
|
|
|
|
/* Move to next tab in the array we're traversing */
|
|
tab++;
|
|
|
|
if (tab == join->join_tab + join->top_join_tab_count)
|
|
return NULL; /* Outside SJM nest and reached EOF */
|
|
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
|
|
return tab;
|
|
}
|
|
|
|
|
|
|
|
JOIN_TAB *first_top_level_tab(JOIN *join, enum enum_with_const_tables const_tbls)
|
|
{
|
|
JOIN_TAB *tab= join->join_tab;
|
|
if (const_tbls == WITHOUT_CONST_TABLES)
|
|
{
|
|
if (join->const_tables == join->table_count || !tab)
|
|
return NULL;
|
|
tab += join->const_tables;
|
|
}
|
|
return tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *next_top_level_tab(JOIN *join, JOIN_TAB *tab)
|
|
{
|
|
tab= next_breadth_first_tab(join->first_breadth_first_tab(),
|
|
join->top_join_tab_count, tab);
|
|
if (tab && tab->bush_root_tab)
|
|
tab= NULL;
|
|
return tab;
|
|
}
|
|
|
|
|
|
JOIN_TAB *first_linear_tab(JOIN *join,
|
|
enum enum_with_bush_roots include_bush_roots,
|
|
enum enum_with_const_tables const_tbls)
|
|
{
|
|
JOIN_TAB *first= join->join_tab;
|
|
|
|
if (!first)
|
|
return NULL;
|
|
|
|
if (const_tbls == WITHOUT_CONST_TABLES)
|
|
first+= join->const_tables;
|
|
|
|
if (first >= join->join_tab + join->top_join_tab_count)
|
|
return NULL; /* All are const tables */
|
|
|
|
if (first->bush_children && include_bush_roots == WITHOUT_BUSH_ROOTS)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
return first->bush_children->start;
|
|
}
|
|
|
|
return first;
|
|
}
|
|
|
|
|
|
/*
|
|
A helper function to loop over all join's join_tab in sequential fashion
|
|
|
|
DESCRIPTION
|
|
Depending on include_bush_roots parameter, JOIN_TABs that represent
|
|
SJM-scan/lookups are either returned or omitted.
|
|
|
|
SJM-Bush children are returned right after (or in place of) their container
|
|
join tab (TODO: does anybody depend on this? A: make_join_readinfo() seems
|
|
to)
|
|
|
|
For example, if we have this structure:
|
|
|
|
ot1--ot2--sjm1----------------ot3-...
|
|
|
|
|
+--it1--it2--it3
|
|
|
|
calls to next_linear_tab( include_bush_roots=TRUE) will return:
|
|
|
|
ot1 ot2 sjm1 it1 it2 it3 ot3 ...
|
|
|
|
while calls to next_linear_tab( include_bush_roots=FALSE) will return:
|
|
|
|
ot1 ot2 it1 it2 it3 ot3 ...
|
|
|
|
(note that sjm1 won't be returned).
|
|
*/
|
|
|
|
JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab,
|
|
enum enum_with_bush_roots include_bush_roots)
|
|
{
|
|
if (include_bush_roots == WITH_BUSH_ROOTS && tab->bush_children)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
return tab->bush_children->start;
|
|
}
|
|
|
|
DBUG_ASSERT(!tab->last_leaf_in_bush || tab->bush_root_tab);
|
|
|
|
if (tab->bush_root_tab) /* Are we inside an SJM nest */
|
|
{
|
|
/* Inside SJM nest */
|
|
if (!tab->last_leaf_in_bush)
|
|
return tab+1; /* Return next in nest */
|
|
/* Continue from the sjm on the top level */
|
|
tab= tab->bush_root_tab;
|
|
}
|
|
|
|
/* If no more JOIN_TAB's on the top level */
|
|
if (++tab >= join->join_tab + join->exec_join_tab_cnt() + join->aggr_tables)
|
|
return NULL;
|
|
|
|
if (include_bush_roots == WITHOUT_BUSH_ROOTS && tab->bush_children)
|
|
{
|
|
/* This JOIN_TAB is a SJM nest; Start from first table in nest */
|
|
tab= tab->bush_children->start;
|
|
}
|
|
return tab;
|
|
}
|
|
|
|
|
|
/*
|
|
Start to iterate over all join tables in bush-children-first order, excluding
|
|
the const tables (see next_depth_first_tab() comment for details)
|
|
*/
|
|
|
|
JOIN_TAB *first_depth_first_tab(JOIN* join)
|
|
{
|
|
JOIN_TAB* tab;
|
|
/* This means we're starting the enumeration */
|
|
if (join->const_tables == join->top_join_tab_count || !join->join_tab)
|
|
return NULL;
|
|
|
|
tab= join->join_tab + join->const_tables;
|
|
|
|
return (tab->bush_children) ? tab->bush_children->start : tab;
|
|
}
|
|
|
|
|
|
/*
|
|
A helper function to iterate over all join tables in bush-children-first order
|
|
|
|
DESCRIPTION
|
|
|
|
For example, for this join plan
|
|
|
|
ot1--ot2--sjm1------------ot3-...
|
|
|
|
|
|
|
|
it1--it2--it3
|
|
|
|
call to first_depth_first_tab() will return ot1, and subsequent calls to
|
|
next_depth_first_tab() will return:
|
|
|
|
ot2 it1 it2 it3 sjm ot3 ...
|
|
*/
|
|
|
|
JOIN_TAB *next_depth_first_tab(JOIN* join, JOIN_TAB* tab)
|
|
{
|
|
/* If we're inside SJM nest and have reached its end, get out */
|
|
if (tab->last_leaf_in_bush)
|
|
return tab->bush_root_tab;
|
|
|
|
/* Move to next tab in the array we're traversing */
|
|
tab++;
|
|
|
|
if (tab == join->join_tab +join->top_join_tab_count)
|
|
return NULL; /* Outside SJM nest and reached EOF */
|
|
|
|
if (tab->bush_children)
|
|
return tab->bush_children->start;
|
|
|
|
return tab;
|
|
}
|
|
|
|
|
|
bool JOIN::check_two_phase_optimization(THD *thd)
|
|
{
|
|
if (check_for_splittable_materialized())
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool JOIN::inject_cond_into_where(Item *injected_cond)
|
|
{
|
|
Item *where_item= injected_cond;
|
|
List<Item> *and_args= NULL;
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
and_args= ((Item_cond*) conds)->argument_list();
|
|
if (cond_equal)
|
|
and_args->disjoin((List<Item> *) &cond_equal->current_level);
|
|
}
|
|
|
|
where_item= and_items(thd, conds, where_item);
|
|
if (where_item->fix_fields_if_needed(thd, 0))
|
|
return true;
|
|
thd->change_item_tree(&select_lex->where, where_item);
|
|
select_lex->where->top_level_item();
|
|
conds= select_lex->where;
|
|
|
|
if (and_args && cond_equal)
|
|
{
|
|
and_args= ((Item_cond*) conds)->argument_list();
|
|
List_iterator<Item_equal> li(cond_equal->current_level);
|
|
Item_equal *elem;
|
|
while ((elem= li++))
|
|
{
|
|
and_args->push_back(elem, thd->mem_root);
|
|
}
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
static Item * const null_ptr= NULL;
|
|
|
|
|
|
/*
|
|
Set up join struct according to the picked join order in
|
|
|
|
SYNOPSIS
|
|
get_best_combination()
|
|
join The join to process (the picked join order is mainly in
|
|
join->best_positions)
|
|
|
|
DESCRIPTION
|
|
Setup join structures according the picked join order
|
|
- finalize semi-join strategy choices (see
|
|
fix_semijoin_strategies_for_picked_join_order)
|
|
- create join->join_tab array and put there the JOIN_TABs in the join order
|
|
- create data structures describing ref access methods.
|
|
|
|
NOTE
|
|
In this function we switch from pre-join-optimization JOIN_TABs to
|
|
post-join-optimization JOIN_TABs. This is achieved by copying the entire
|
|
JOIN_TAB objects.
|
|
|
|
RETURN
|
|
FALSE OK
|
|
TRUE Out of memory
|
|
*/
|
|
|
|
bool JOIN::get_best_combination()
|
|
{
|
|
uint tablenr;
|
|
table_map used_tables;
|
|
JOIN_TAB *j;
|
|
KEYUSE *keyuse;
|
|
JOIN_TAB *sjm_nest_end= NULL;
|
|
JOIN_TAB *sjm_nest_root= NULL;
|
|
DBUG_ENTER("get_best_combination");
|
|
|
|
/*
|
|
Additional plan nodes for postjoin tmp tables:
|
|
1? + // For GROUP BY
|
|
1? + // For DISTINCT
|
|
1? + // For aggregation functions aggregated in outer query
|
|
// when used with distinct
|
|
1? + // For ORDER BY
|
|
1? // buffer result
|
|
Up to 2 tmp tables are actually used, but it's hard to tell exact number
|
|
at this stage.
|
|
*/
|
|
uint aggr_tables= (group_list ? 1 : 0) +
|
|
(select_distinct ?
|
|
(tmp_table_param.using_outer_summary_function ? 2 : 1) : 0) +
|
|
(order ? 1 : 0) +
|
|
(select_options & (SELECT_BIG_RESULT | OPTION_BUFFER_RESULT) ? 1 : 0) ;
|
|
|
|
if (aggr_tables == 0)
|
|
aggr_tables= 1; /* For group by pushdown */
|
|
|
|
if (select_lex->window_specs.elements)
|
|
aggr_tables++;
|
|
|
|
if (aggr_tables > 2)
|
|
aggr_tables= 2;
|
|
|
|
full_join=0;
|
|
hash_join= FALSE;
|
|
|
|
fix_semijoin_strategies_for_picked_join_order(this);
|
|
top_join_tab_count= get_number_of_tables_at_top_level(this);
|
|
|
|
#ifndef DBUG_OFF
|
|
dbug_join_tab_array_size= top_join_tab_count + aggr_tables;
|
|
#endif
|
|
/*
|
|
NOTE: The above computation of aggr_tables can produce wrong result because some
|
|
of the variables it uses may change their values after we leave this function.
|
|
Known examples:
|
|
- Dangerous: using_outer_summary_function=false at this point. Added
|
|
DBUG_ASSERT below to demonstrate. Can this cause us to allocate less
|
|
space than we would need?
|
|
- Not dangerous: select_distinct can be true here but be assigned false
|
|
afterwards.
|
|
*/
|
|
aggr_tables= 2;
|
|
DBUG_ASSERT(!tmp_table_param.using_outer_summary_function);
|
|
if (!(join_tab= thd->alloc<JOIN_TAB>(top_join_tab_count + aggr_tables)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (inject_splitting_cond_for_all_tables_with_split_opt())
|
|
goto error;
|
|
|
|
JOIN_TAB_RANGE *root_range;
|
|
if (!(root_range= new (thd->mem_root) JOIN_TAB_RANGE))
|
|
goto error;
|
|
root_range->start= join_tab;
|
|
/* root_range->end will be set later */
|
|
join_tab_ranges.empty();
|
|
|
|
if (join_tab_ranges.push_back(root_range, thd->mem_root))
|
|
goto error;
|
|
|
|
for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
|
|
{
|
|
TABLE *form;
|
|
POSITION *cur_pos= &best_positions[tablenr];
|
|
if (cur_pos->sj_strategy == SJ_OPT_MATERIALIZE ||
|
|
cur_pos->sj_strategy == SJ_OPT_MATERIALIZE_SCAN)
|
|
{
|
|
/*
|
|
Ok, we've entered an SJ-Materialization semi-join (note that this can't
|
|
be done recursively, semi-joins are not allowed to be nested).
|
|
1. Put into main join order a JOIN_TAB that represents a lookup or scan
|
|
in the temptable.
|
|
*/
|
|
bzero((void*)j, sizeof(JOIN_TAB));
|
|
j->join= this;
|
|
j->table= NULL; //temporary way to tell SJM tables from others.
|
|
j->ref.key = -1;
|
|
j->on_expr_ref= (Item**) &null_ptr;
|
|
/* The unique index is always in 'possible keys' in EXPLAIN */
|
|
j->keys= key_map(1);
|
|
|
|
/*
|
|
2. Proceed with processing SJM nest's join tabs, putting them into the
|
|
sub-order
|
|
*/
|
|
SJ_MATERIALIZATION_INFO *sjm= cur_pos->table->emb_sj_nest->sj_mat_info;
|
|
j->records_read= (sjm->is_sj_scan? sjm->rows : 1.0);
|
|
j->records_init= j->records_out= j->records_read;
|
|
j->records= (ha_rows) j->records_read;
|
|
j->cond_selectivity= 1.0;
|
|
j->join_read_time= 0.0; /* Not saved currently */
|
|
j->join_loops= 0.0;
|
|
JOIN_TAB *jt;
|
|
JOIN_TAB_RANGE *jt_range;
|
|
if (!(jt= thd->alloc<JOIN_TAB>(sjm->tables)) ||
|
|
!(jt_range= new JOIN_TAB_RANGE))
|
|
goto error;
|
|
jt_range->start= jt;
|
|
jt_range->end= jt + sjm->tables;
|
|
join_tab_ranges.push_back(jt_range, thd->mem_root);
|
|
j->bush_children= jt_range;
|
|
sjm_nest_end= jt + sjm->tables;
|
|
sjm_nest_root= j;
|
|
|
|
j= jt;
|
|
}
|
|
|
|
*j= *cur_pos->table;
|
|
|
|
j->bush_root_tab= sjm_nest_root;
|
|
|
|
form= table[tablenr]= j->table;
|
|
form->reginfo.join_tab=j;
|
|
DBUG_PRINT("info",("type: %d", j->type));
|
|
if (j->type == JT_CONST)
|
|
goto loop_end; // Handled in make_join_stat..
|
|
|
|
j->loosescan_match_tab= NULL; //non-nulls will be set later
|
|
j->inside_loosescan_range= FALSE;
|
|
j->ref.key = -1;
|
|
j->ref.key_parts=0;
|
|
|
|
if (j->type == JT_SYSTEM)
|
|
goto loop_end;
|
|
|
|
if (!(keyuse= cur_pos->key))
|
|
{
|
|
if (cur_pos->type == JT_NEXT) // Forced index
|
|
{
|
|
j->type= JT_NEXT;
|
|
j->index= cur_pos->forced_index;
|
|
}
|
|
else
|
|
j->type= JT_ALL;
|
|
if (cur_pos->use_join_buffer &&
|
|
tablenr != const_tables)
|
|
full_join= 1;
|
|
}
|
|
if ((j->type == JT_REF || j->type == JT_EQ_REF) &&
|
|
is_hash_join_key_no(j->ref.key))
|
|
hash_join= TRUE;
|
|
|
|
j->range_rowid_filter_info=
|
|
cur_pos->range_rowid_filter_info;
|
|
|
|
/*
|
|
Save records_read in JOIN_TAB so that select_describe()/etc don't have
|
|
to access join->best_positions[].
|
|
*/
|
|
j->records_init= cur_pos->records_init;
|
|
j->records_read= cur_pos->records_read;
|
|
j->records_out= cur_pos->records_out;
|
|
j->join_read_time= cur_pos->read_time;
|
|
j->join_loops= cur_pos->loops;
|
|
|
|
loop_end:
|
|
j->cond_selectivity= cur_pos->cond_selectivity;
|
|
DBUG_ASSERT(j->cond_selectivity <= 1.0);
|
|
crash_if_first_double_is_bigger(j->records_out,
|
|
j->records_init *
|
|
(j->range_rowid_filter_info ?
|
|
j->range_rowid_filter_info->selectivity :
|
|
1.0));
|
|
|
|
map2table[j->table->tablenr]= j;
|
|
|
|
/* If we've reached the end of sjm nest, switch back to main sequence */
|
|
if (j + 1 == sjm_nest_end)
|
|
{
|
|
j->last_leaf_in_bush= TRUE;
|
|
j= sjm_nest_root;
|
|
sjm_nest_root= NULL;
|
|
sjm_nest_end= NULL;
|
|
}
|
|
}
|
|
root_range->end= j;
|
|
|
|
used_tables= OUTER_REF_TABLE_BIT; // Outer row is already read
|
|
for (j=join_tab, tablenr=0 ; tablenr < table_count ; tablenr++,j++)
|
|
{
|
|
if (j->bush_children)
|
|
j= j->bush_children->start;
|
|
|
|
used_tables|= j->table->map;
|
|
if (j->type != JT_CONST && j->type != JT_SYSTEM)
|
|
{
|
|
if ((keyuse= best_positions[tablenr].key) &&
|
|
create_ref_for_key(this, j, keyuse, TRUE, used_tables))
|
|
goto error; // Something went wrong
|
|
}
|
|
if (j->last_leaf_in_bush)
|
|
j= j->bush_root_tab;
|
|
}
|
|
|
|
top_join_tab_count= (uint)(join_tab_ranges.head()->end -
|
|
join_tab_ranges.head()->start);
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
print_final_join_order(this);
|
|
|
|
update_depend_map(this);
|
|
DBUG_RETURN(0);
|
|
|
|
error:
|
|
/* join_tab was not correctly setup. Don't use it */
|
|
join_tab= 0;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/**
|
|
Create a descriptor of hash join key to access a given join table
|
|
|
|
@param join join which the join table belongs to
|
|
@param join_tab the join table to access
|
|
@param org_keyuse beginning of the key uses to join this table
|
|
@param used_tables bitmap of the previous tables
|
|
|
|
@details
|
|
This function first finds key uses that can be utilized by the hash join
|
|
algorithm to join join_tab to the previous tables marked in the bitmap
|
|
used_tables. The tested key uses are taken from the array of all key uses
|
|
for 'join' starting from the position org_keyuse. After all interesting key
|
|
uses have been found the function builds a descriptor of the corresponding
|
|
key that is used by the hash join algorithm would it be chosen to join
|
|
the table join_tab.
|
|
|
|
@retval FALSE the descriptor for a hash join key is successfully created
|
|
@retval TRUE otherwise
|
|
*/
|
|
|
|
static bool create_hj_key_for_table(JOIN *join, JOIN_TAB *join_tab,
|
|
KEYUSE *org_keyuse, table_map used_tables)
|
|
{
|
|
KEY *keyinfo;
|
|
KEY_PART_INFO *key_part_info;
|
|
KEYUSE *keyuse= org_keyuse;
|
|
uint key_parts= 0;
|
|
THD *thd= join->thd;
|
|
TABLE *table= join_tab->table;
|
|
bool first_keyuse= TRUE;
|
|
DBUG_ENTER("create_hj_key_for_table");
|
|
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
|
|
are_tables_local(join_tab, keyuse->used_tables))
|
|
{
|
|
if (first_keyuse)
|
|
{
|
|
key_parts++;
|
|
}
|
|
else
|
|
{
|
|
KEYUSE *curr= org_keyuse;
|
|
for( ; curr < keyuse; curr++)
|
|
{
|
|
if (curr->keypart == keyuse->keypart &&
|
|
!(~used_tables & curr->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
|
|
curr) &&
|
|
are_tables_local(join_tab, curr->used_tables))
|
|
break;
|
|
}
|
|
if (curr == keyuse)
|
|
key_parts++;
|
|
}
|
|
}
|
|
first_keyuse= FALSE;
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->is_for_hash_join());
|
|
if (!key_parts)
|
|
DBUG_RETURN(TRUE);
|
|
/* This memory is allocated only once for the joined table join_tab */
|
|
if (!(keyinfo= thd->alloc<KEY>(1)) ||
|
|
!(key_part_info = thd->alloc<KEY_PART_INFO>(key_parts)))
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts = key_parts;
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->key_part= key_part_info;
|
|
keyinfo->key_length=0;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->flags= HA_GENERATED_KEY;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->name.str= "$hj";
|
|
keyinfo->name.length= 3;
|
|
keyinfo->rec_per_key= thd->calloc<ulong>(key_parts);
|
|
if (!keyinfo->rec_per_key)
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo->key_part= key_part_info;
|
|
|
|
first_keyuse= TRUE;
|
|
keyuse= org_keyuse;
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) &&
|
|
are_tables_local(join_tab, keyuse->used_tables))
|
|
{
|
|
bool add_key_part= TRUE;
|
|
if (!first_keyuse)
|
|
{
|
|
for(KEYUSE *curr= org_keyuse; curr < keyuse; curr++)
|
|
{
|
|
if (curr->keypart == keyuse->keypart &&
|
|
!(~used_tables & curr->used_tables) &&
|
|
join_tab->keyuse_is_valid_for_access_in_chosen_plan(join,
|
|
curr) &&
|
|
are_tables_local(join_tab, curr->used_tables))
|
|
{
|
|
keyuse->keypart= NO_KEYPART;
|
|
add_key_part= FALSE;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (add_key_part)
|
|
{
|
|
Field *field= table->field[keyuse->keypart];
|
|
uint fieldnr= keyuse->keypart+1;
|
|
table->create_key_part_by_field(key_part_info, field, fieldnr);
|
|
keyinfo->key_length += key_part_info->store_length;
|
|
key_part_info++;
|
|
}
|
|
}
|
|
first_keyuse= FALSE;
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->is_for_hash_join());
|
|
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->ext_key_part_map= 0;
|
|
|
|
join_tab->hj_key= keyinfo;
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/*
|
|
Check if a set of tables specified by used_tables can be accessed when
|
|
we're doing scan on join_tab jtab.
|
|
*/
|
|
static bool are_tables_local(JOIN_TAB *jtab, table_map used_tables)
|
|
{
|
|
if (jtab->bush_root_tab)
|
|
{
|
|
/*
|
|
jtab is inside execution join nest. We may not refer to outside tables,
|
|
except the const tables.
|
|
*/
|
|
table_map local_tables= jtab->emb_sj_nest->nested_join->used_tables |
|
|
jtab->join->const_table_map |
|
|
OUTER_REF_TABLE_BIT;
|
|
return !MY_TEST(used_tables & ~local_tables);
|
|
}
|
|
|
|
/*
|
|
If we got here then jtab is at top level.
|
|
- all other tables at top level are accessible,
|
|
- tables in join nests are accessible too, because all their columns that
|
|
are needed at top level will be unpacked when scanning the
|
|
materialization table.
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
static bool create_ref_for_key(JOIN *join, JOIN_TAB *j,
|
|
KEYUSE *org_keyuse, bool allow_full_scan,
|
|
table_map used_tables)
|
|
{
|
|
uint keyparts, length, key;
|
|
TABLE *table;
|
|
KEY *keyinfo;
|
|
KEYUSE *keyuse= org_keyuse;
|
|
bool ftkey= (keyuse->keypart == FT_KEYPART);
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("create_ref_for_key");
|
|
|
|
/* Use best key from find_best */
|
|
table= j->table;
|
|
key= keyuse->key;
|
|
if (!is_hash_join_key_no(key))
|
|
keyinfo= table->key_info+key;
|
|
else
|
|
{
|
|
if (create_hj_key_for_table(join, j, org_keyuse, used_tables))
|
|
DBUG_RETURN(TRUE);
|
|
keyinfo= j->hj_key;
|
|
}
|
|
|
|
if (ftkey)
|
|
{
|
|
Item_func_match *ifm=(Item_func_match *)keyuse->val;
|
|
|
|
length=0;
|
|
keyparts=1;
|
|
ifm->join_key=1;
|
|
}
|
|
else
|
|
{
|
|
keyparts=length=0;
|
|
uint found_part_ref_or_null= 0;
|
|
/*
|
|
Calculate length for the used key
|
|
Stop if there is a missing key part or when we find second key_part
|
|
with KEY_OPTIMIZE_REF_OR_NULL
|
|
*/
|
|
do
|
|
{
|
|
if (!(~used_tables & keyuse->used_tables) &&
|
|
(!keyuse->validity_ref || *keyuse->validity_ref) &&
|
|
j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse))
|
|
{
|
|
if (are_tables_local(j, keyuse->val->used_tables()))
|
|
{
|
|
if ((is_hash_join_key_no(key) && keyuse->keypart != NO_KEYPART) ||
|
|
(!is_hash_join_key_no(key) && keyparts == keyuse->keypart &&
|
|
!(found_part_ref_or_null & keyuse->optimize)))
|
|
{
|
|
length+= keyinfo->key_part[keyparts].store_length;
|
|
keyparts++;
|
|
found_part_ref_or_null|= keyuse->optimize & ~KEY_OPTIMIZE_EQ;
|
|
}
|
|
}
|
|
}
|
|
keyuse++;
|
|
} while (keyuse->table == table && keyuse->key == key);
|
|
|
|
if (!keyparts && allow_full_scan)
|
|
{
|
|
/* It's a LooseIndexScan strategy scanning whole index */
|
|
j->type= JT_ALL; // TODO: Check if this should be JT_NEXT
|
|
j->index= key;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
DBUG_ASSERT(length > 0);
|
|
DBUG_ASSERT(keyparts != 0);
|
|
} /* not ftkey */
|
|
|
|
/* set up fieldref */
|
|
j->ref.key_parts= keyparts;
|
|
j->ref.key_length= length;
|
|
j->ref.key= (int) key;
|
|
if (!(j->ref.key_buff= thd->calloc<uchar>(ALIGN_SIZE(length)*2)) ||
|
|
!(j->ref.key_copy= thd->alloc<store_key*>(keyparts+1)) ||
|
|
!(j->ref.items= thd->alloc<Item*>(keyparts)) ||
|
|
!(j->ref.cond_guards= thd->alloc<bool*>(keyparts)))
|
|
{
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
j->ref.key_buff2=j->ref.key_buff+ALIGN_SIZE(length);
|
|
j->ref.key_err=1;
|
|
j->ref.has_record= FALSE;
|
|
j->ref.null_rejecting= 0;
|
|
j->ref.disable_cache= FALSE;
|
|
j->ref.null_ref_part= NO_REF_PART;
|
|
j->ref.const_ref_part_map= 0;
|
|
j->ref.uses_splitting= FALSE;
|
|
keyuse=org_keyuse;
|
|
|
|
store_key **ref_key= j->ref.key_copy;
|
|
uchar *key_buff=j->ref.key_buff, *null_ref_key= 0;
|
|
uint null_ref_part= NO_REF_PART;
|
|
bool keyuse_uses_no_tables= TRUE;
|
|
uint not_null_keyparts= 0;
|
|
if (ftkey)
|
|
{
|
|
j->ref.items[0]=((Item_func*)(keyuse->val))->key_item();
|
|
/* Predicates pushed down into subquery can't be used FT access */
|
|
j->ref.cond_guards[0]= NULL;
|
|
if (keyuse->used_tables)
|
|
DBUG_RETURN(TRUE); // not supported yet. SerG
|
|
|
|
j->type=JT_FT;
|
|
}
|
|
else
|
|
{
|
|
uint i;
|
|
for (i=0 ; i < keyparts ; keyuse++,i++)
|
|
{
|
|
while (((~used_tables) & keyuse->used_tables) ||
|
|
(keyuse->validity_ref && !(*keyuse->validity_ref)) ||
|
|
!j->keyuse_is_valid_for_access_in_chosen_plan(join, keyuse) ||
|
|
keyuse->keypart == NO_KEYPART ||
|
|
(keyuse->keypart !=
|
|
(is_hash_join_key_no(key) ?
|
|
keyinfo->key_part[i].field->field_index : i)) ||
|
|
!are_tables_local(j, keyuse->val->used_tables()))
|
|
keyuse++; /* Skip other parts */
|
|
|
|
uint maybe_null= MY_TEST(keyinfo->key_part[i].null_bit);
|
|
j->ref.items[i]=keyuse->val; // Save for cond removal
|
|
j->ref.cond_guards[i]= keyuse->cond_guard;
|
|
|
|
if (!keyuse->val->maybe_null() || keyuse->null_rejecting)
|
|
not_null_keyparts++;
|
|
/*
|
|
Set ref.null_rejecting to true only if we are going to inject a
|
|
"keyuse->val IS NOT NULL" predicate.
|
|
*/
|
|
Item *real= (keyuse->val)->real_item();
|
|
if (keyuse->null_rejecting && (real->type() == Item::FIELD_ITEM) &&
|
|
((Item_field*)real)->field->maybe_null())
|
|
j->ref.null_rejecting|= (key_part_map)1 << i;
|
|
|
|
keyuse_uses_no_tables= keyuse_uses_no_tables && !keyuse->used_tables;
|
|
j->ref.uses_splitting |= (keyuse->validity_ref != NULL);
|
|
/*
|
|
We don't want to compute heavy expressions in EXPLAIN, an example would
|
|
select * from t1 where t1.key=(select thats very heavy);
|
|
|
|
(select thats very heavy) => is a constant here
|
|
eg: (select avg(order_cost) from orders) => constant but expensive
|
|
*/
|
|
if (!keyuse->val->used_tables() && !thd->lex->describe)
|
|
{ // Compare against constant
|
|
store_key_item tmp(thd,
|
|
keyinfo->key_part[i].field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
keyinfo->key_part[i].length,
|
|
keyuse->val,
|
|
FALSE);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
tmp.copy(thd);
|
|
j->ref.const_ref_part_map |= key_part_map(1) << i ;
|
|
}
|
|
else
|
|
{
|
|
*ref_key++= get_store_key(thd,
|
|
keyuse,join->const_table_map,
|
|
&keyinfo->key_part[i],
|
|
key_buff, maybe_null);
|
|
if (!keyuse->val->used_tables())
|
|
j->ref.const_ref_part_map |= key_part_map(1) << i ;
|
|
}
|
|
/*
|
|
Remember if we are going to use REF_OR_NULL
|
|
But only if field _really_ can be null i.e. we force JT_REF
|
|
instead of JT_REF_OR_NULL in case if field can't be null
|
|
*/
|
|
if ((keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL) && maybe_null)
|
|
{
|
|
null_ref_key= key_buff;
|
|
null_ref_part= i;
|
|
}
|
|
key_buff+= keyinfo->key_part[i].store_length;
|
|
}
|
|
} /* not ftkey */
|
|
*ref_key=0; // end_marker
|
|
if (j->type == JT_FT)
|
|
DBUG_RETURN(0);
|
|
ulong key_flags= j->table->actual_key_flags(keyinfo);
|
|
if (j->type == JT_CONST)
|
|
j->table->const_table= 1;
|
|
else if (!((keyparts == keyinfo->user_defined_key_parts &&
|
|
(
|
|
(key_flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME ||
|
|
/* Unique key and all keyparts are NULL rejecting */
|
|
((key_flags & HA_NOSAME) && keyparts == not_null_keyparts)
|
|
)) ||
|
|
/* true only for extended keys */
|
|
(MY_TEST(key_flags & HA_EXT_NOSAME) &&
|
|
keyparts == keyinfo->ext_key_parts) ) ||
|
|
null_ref_key)
|
|
{
|
|
/* Must read with repeat */
|
|
j->type= null_ref_key ? JT_REF_OR_NULL : JT_REF;
|
|
j->ref.null_ref_key= null_ref_key;
|
|
j->ref.null_ref_part= null_ref_part;
|
|
}
|
|
else if (keyuse_uses_no_tables)
|
|
{
|
|
/*
|
|
This happen if we are using a constant expression in the ON part
|
|
of an LEFT JOIN.
|
|
SELECT * FROM a LEFT JOIN b ON b.key=30
|
|
Here we should not mark the table as a 'const' as a field may
|
|
have a 'normal' value or a NULL value.
|
|
*/
|
|
j->type=JT_CONST;
|
|
}
|
|
else
|
|
j->type=JT_EQ_REF;
|
|
|
|
if (j->type == JT_EQ_REF)
|
|
j->read_record.unlock_row= join_read_key_unlock_row;
|
|
else if (j->type == JT_CONST)
|
|
j->read_record.unlock_row= join_const_unlock_row;
|
|
else
|
|
j->read_record.unlock_row= rr_unlock_row;
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
|
|
static store_key *
|
|
get_store_key(THD *thd, KEYUSE *keyuse, table_map used_tables,
|
|
KEY_PART_INFO *key_part, uchar *key_buff, uint maybe_null)
|
|
{
|
|
if (!((~used_tables) & keyuse->used_tables)) // if const item
|
|
{
|
|
return new store_key_const_item(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
keyuse->val);
|
|
}
|
|
else if (keyuse->val->type() == Item::FIELD_ITEM ||
|
|
(keyuse->val->type() == Item::REF_ITEM &&
|
|
((((Item_ref*)keyuse->val)->ref_type() == Item_ref::OUTER_REF &&
|
|
(*(Item_ref**)((Item_ref*)keyuse->val)->ref)->ref_type() ==
|
|
Item_ref::DIRECT_REF) ||
|
|
((Item_ref*)keyuse->val)->ref_type() == Item_ref::VIEW_REF) &&
|
|
keyuse->val->real_item()->type() == Item::FIELD_ITEM))
|
|
return new store_key_field(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
((Item_field*) keyuse->val->real_item())->field,
|
|
keyuse->val->real_item()->full_name());
|
|
|
|
return new store_key_item(thd,
|
|
key_part->field,
|
|
key_buff + maybe_null,
|
|
maybe_null ? key_buff : 0,
|
|
key_part->length,
|
|
keyuse->val, FALSE);
|
|
}
|
|
|
|
|
|
inline void add_cond_and_fix(THD *thd, Item **e1, Item *e2)
|
|
{
|
|
if (*e1)
|
|
{
|
|
if (!e2)
|
|
return;
|
|
Item *res;
|
|
if ((res= new (thd->mem_root) Item_cond_and(thd, *e1, e2)))
|
|
{
|
|
res->fix_fields(thd, 0);
|
|
res->update_used_tables();
|
|
*e1= res;
|
|
}
|
|
}
|
|
else
|
|
*e1= e2;
|
|
}
|
|
|
|
|
|
/**
|
|
Add to join_tab->select_cond[i] "table.field IS NOT NULL" conditions
|
|
we've inferred from ref/eq_ref access performed.
|
|
|
|
This function is a part of "Early NULL-values filtering for ref access"
|
|
optimization.
|
|
|
|
Example of this optimization:
|
|
For query SELECT * FROM t1,t2 WHERE t2.key=t1.field @n
|
|
and plan " any-access(t1), ref(t2.key=t1.field) " @n
|
|
add "t1.field IS NOT NULL" to t1's table condition. @n
|
|
|
|
Description of the optimization:
|
|
|
|
We look through equalities chosen to perform ref/eq_ref access,
|
|
pick equalities that have form "tbl.part_of_key = othertbl.field"
|
|
(where othertbl is a non-const table and othertbl.field may be NULL)
|
|
and add them to conditions on correspoding tables (othertbl in this
|
|
example).
|
|
|
|
Exception from that is the case when referred_tab->join != join.
|
|
I.e. don't add NOT NULL constraints from any embedded subquery.
|
|
Consider this query:
|
|
@code
|
|
SELECT A.f2 FROM t1 LEFT JOIN t2 A ON A.f2 = f1
|
|
WHERE A.f3=(SELECT MIN(f3) FROM t2 C WHERE A.f4 = C.f4) OR A.f3 IS NULL;
|
|
@endocde
|
|
Here condition A.f3 IS NOT NULL is going to be added to the WHERE
|
|
condition of the embedding query.
|
|
Another example:
|
|
SELECT * FROM t10, t11 WHERE (t10.a < 10 OR t10.a IS NULL)
|
|
AND t11.b <=> t10.b AND (t11.a = (SELECT MAX(a) FROM t12
|
|
WHERE t12.b = t10.a ));
|
|
Here condition t10.a IS NOT NULL is going to be added.
|
|
In both cases addition of NOT NULL condition will erroneously reject
|
|
some rows of the result set.
|
|
referred_tab->join != join constraint would disallow such additions.
|
|
|
|
This optimization doesn't affect the choices that ref, range, or join
|
|
optimizer make. This was intentional because this was added after 4.1
|
|
was GA.
|
|
|
|
Implementation overview
|
|
1. update_ref_and_keys() accumulates info about null-rejecting
|
|
predicates in in KEY_FIELD::null_rejecting
|
|
1.1 add_key_part saves these to KEYUSE.
|
|
2. create_ref_for_key copies them to TABLE_REF.
|
|
3. add_not_null_conds adds "x IS NOT NULL" to join_tab->select_cond of
|
|
appropiate JOIN_TAB members.
|
|
*/
|
|
|
|
static void add_not_null_conds(JOIN *join)
|
|
{
|
|
JOIN_TAB *tab;
|
|
DBUG_ENTER("add_not_null_conds");
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->type == JT_REF || tab->type == JT_EQ_REF ||
|
|
tab->type == JT_REF_OR_NULL)
|
|
{
|
|
for (uint keypart= 0; keypart < tab->ref.key_parts; keypart++)
|
|
{
|
|
if (tab->ref.null_rejecting & ((key_part_map)1 << keypart))
|
|
{
|
|
Item *item= tab->ref.items[keypart];
|
|
Item *notnull;
|
|
Item *real= item->real_item();
|
|
if (real->can_eval_in_optimize() && real->type() != Item::FIELD_ITEM)
|
|
{
|
|
/*
|
|
It could be constant instead of field after constant
|
|
propagation.
|
|
*/
|
|
continue;
|
|
}
|
|
DBUG_ASSERT(real->type() == Item::FIELD_ITEM);
|
|
Item_field *not_null_item= (Item_field*)real;
|
|
JOIN_TAB *referred_tab= not_null_item->field->table->reginfo.join_tab;
|
|
/*
|
|
For UPDATE queries such as:
|
|
UPDATE t1 SET t1.f2=(SELECT MAX(t2.f4) FROM t2 WHERE t2.f3=t1.f1);
|
|
not_null_item is the t1.f1, but it's referred_tab is 0.
|
|
*/
|
|
if (!(notnull= new (join->thd->mem_root)
|
|
Item_func_isnotnull(join->thd, item)))
|
|
DBUG_VOID_RETURN;
|
|
/*
|
|
We need to do full fix_fields() call here in order to have correct
|
|
notnull->const_item(). This is needed e.g. by test_quick_select
|
|
when it is called from make_join_select after this function is
|
|
called.
|
|
*/
|
|
if (notnull->fix_fields(join->thd, ¬null))
|
|
DBUG_VOID_RETURN;
|
|
|
|
DBUG_EXECUTE("where",print_where(notnull,
|
|
(referred_tab ?
|
|
referred_tab->table->alias.c_ptr() :
|
|
"outer_ref_cond"),
|
|
QT_ORDINARY););
|
|
if (!tab->first_inner)
|
|
{
|
|
COND *new_cond= (referred_tab && referred_tab->join == join) ?
|
|
referred_tab->select_cond :
|
|
join->outer_ref_cond;
|
|
add_cond_and_fix(join->thd, &new_cond, notnull);
|
|
if (referred_tab && referred_tab->join == join)
|
|
referred_tab->set_select_cond(new_cond, __LINE__);
|
|
else
|
|
join->outer_ref_cond= new_cond;
|
|
}
|
|
else
|
|
add_cond_and_fix(join->thd, tab->first_inner->on_expr_ref, notnull);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
Build a predicate guarded by match variables for embedding outer joins.
|
|
The function recursively adds guards for predicate cond
|
|
assending from tab to the first inner table next embedding
|
|
nested outer join and so on until it reaches root_tab
|
|
(root_tab can be 0).
|
|
|
|
In other words:
|
|
add_found_match_trig_cond(tab->first_inner_tab, y, 0) is the way one should
|
|
wrap parts of WHERE. The idea is that the part of WHERE should be only
|
|
evaluated after we've finished figuring out whether outer joins.
|
|
^^^ is the above correct?
|
|
|
|
@param tab the first inner table for most nested outer join
|
|
@param cond the predicate to be guarded (must be set)
|
|
@param root_tab the first inner table to stop
|
|
|
|
@return
|
|
- pointer to the guarded predicate, if success
|
|
- 0, otherwise
|
|
*/
|
|
|
|
static COND*
|
|
add_found_match_trig_cond(THD *thd, JOIN_TAB *tab, COND *cond,
|
|
JOIN_TAB *root_tab)
|
|
{
|
|
COND *tmp;
|
|
DBUG_ASSERT(cond != 0);
|
|
if (tab == root_tab)
|
|
return cond;
|
|
if ((tmp= add_found_match_trig_cond(thd, tab->first_upper, cond, root_tab)))
|
|
tmp= new (thd->mem_root) Item_func_trig_cond(thd, tmp, &tab->found);
|
|
if (tmp)
|
|
{
|
|
tmp->quick_fix_field();
|
|
tmp->update_used_tables();
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
|
|
bool TABLE_LIST::is_active_sjm()
|
|
{
|
|
return sj_mat_info && sj_mat_info->is_used;
|
|
}
|
|
|
|
|
|
/**
|
|
Fill in outer join related info for the execution plan structure.
|
|
|
|
For each outer join operation left after simplification of the
|
|
original query the function set up the following pointers in the linear
|
|
structure join->join_tab representing the selected execution plan.
|
|
The first inner table t0 for the operation is set to refer to the last
|
|
inner table tk through the field t0->last_inner.
|
|
Any inner table ti for the operation are set to refer to the first
|
|
inner table ti->first_inner.
|
|
The first inner table t0 for the operation is set to refer to the
|
|
first inner table of the embedding outer join operation, if there is any,
|
|
through the field t0->first_upper.
|
|
The on expression for the outer join operation is attached to the
|
|
corresponding first inner table through the field t0->on_expr_ref.
|
|
Here ti are structures of the JOIN_TAB type.
|
|
|
|
In other words, for each join tab, set
|
|
- first_inner
|
|
- last_inner
|
|
- first_upper
|
|
- on_expr_ref, cond_equal
|
|
|
|
EXAMPLE. For the query:
|
|
@code
|
|
SELECT * FROM t1
|
|
LEFT JOIN
|
|
(t2, t3 LEFT JOIN t4 ON t3.a=t4.a)
|
|
ON (t1.a=t2.a AND t1.b=t3.b)
|
|
WHERE t1.c > 5,
|
|
@endcode
|
|
|
|
given the execution plan with the table order t1,t2,t3,t4
|
|
is selected, the following references will be set;
|
|
t4->last_inner=[t4], t4->first_inner=[t4], t4->first_upper=[t2]
|
|
t2->last_inner=[t4], t2->first_inner=t3->first_inner=[t2],
|
|
on expression (t1.a=t2.a AND t1.b=t3.b) will be attached to
|
|
*t2->on_expr_ref, while t3.a=t4.a will be attached to *t4->on_expr_ref.
|
|
|
|
@param join reference to the info fully describing the query
|
|
|
|
@note
|
|
The function assumes that the simplification procedure has been
|
|
already applied to the join query (see simplify_joins).
|
|
This function can be called only after the execution plan
|
|
has been chosen.
|
|
*/
|
|
|
|
static bool
|
|
make_outerjoin_info(JOIN *join)
|
|
{
|
|
DBUG_ENTER("make_outerjoin_info");
|
|
|
|
/*
|
|
Create temp. tables for merged SJ-Materialization nests. We need to do
|
|
this now, because further code relies on tab->table and
|
|
tab->table->pos_in_table_list being set.
|
|
*/
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
if (setup_sj_materialization_part1(tab))
|
|
DBUG_RETURN(TRUE);
|
|
tab->table->reginfo.join_tab= tab;
|
|
}
|
|
}
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE *table= tab->table;
|
|
TABLE_LIST *tbl= table->pos_in_table_list;
|
|
TABLE_LIST *embedding= tbl->embedding;
|
|
|
|
if (tbl->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT))
|
|
{
|
|
/*
|
|
Table tab is the only one inner table for outer join.
|
|
(Like table t4 for the table reference t3 LEFT JOIN t4 ON t3.a=t4.a
|
|
is in the query above.)
|
|
*/
|
|
tab->last_inner= tab->first_inner= tab;
|
|
tab->on_expr_ref= &tbl->on_expr;
|
|
tab->cond_equal= tbl->cond_equal;
|
|
if (embedding && !embedding->is_active_sjm())
|
|
tab->first_upper= embedding->nested_join->first_nested;
|
|
}
|
|
else if (!embedding)
|
|
tab->table->reginfo.not_exists_optimize= 0;
|
|
|
|
for ( ; embedding ; embedding= embedding->embedding)
|
|
{
|
|
if (embedding->is_active_sjm())
|
|
{
|
|
/*
|
|
We're trying to walk out of an SJ-Materialization nest.
|
|
Don't do this.
|
|
*/
|
|
break;
|
|
}
|
|
/* Ignore sj-nests: */
|
|
if (!(embedding->on_expr && embedding->outer_join))
|
|
{
|
|
tab->table->reginfo.not_exists_optimize= 0;
|
|
continue;
|
|
}
|
|
NESTED_JOIN *nested_join= embedding->nested_join;
|
|
if (!nested_join->counter)
|
|
{
|
|
/*
|
|
Table tab is the first inner table for nested_join.
|
|
Save reference to it in the nested join structure.
|
|
*/
|
|
nested_join->first_nested= tab;
|
|
tab->on_expr_ref= &embedding->on_expr;
|
|
tab->cond_equal= tbl->cond_equal;
|
|
if (embedding->embedding)
|
|
tab->first_upper= embedding->embedding->nested_join->first_nested;
|
|
}
|
|
if (!tab->first_inner)
|
|
tab->first_inner= nested_join->first_nested;
|
|
if (++nested_join->counter < nested_join->n_tables)
|
|
break;
|
|
/* Table tab is the last inner table for nested join. */
|
|
nested_join->first_nested->last_inner= tab;
|
|
}
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Build a temporary join prefix condition for JOIN_TABs up to the last tab
|
|
|
|
@param ret OUT the condition is returned here
|
|
|
|
@return
|
|
false OK
|
|
true Out of memory
|
|
|
|
@detail
|
|
Walk through the join prefix (from the first table to the last_tab) and
|
|
build a condition:
|
|
|
|
join_tab_1_cond AND join_tab_2_cond AND ... AND last_tab_conds
|
|
|
|
The condition is only intended to be used by the range optimizer, so:
|
|
- it is not normalized (can have Item_cond_and inside another
|
|
Item_cond_and)
|
|
- it does not include join->exec_const_cond and other similar conditions.
|
|
*/
|
|
|
|
bool build_tmp_join_prefix_cond(JOIN *join, JOIN_TAB *last_tab, Item **ret)
|
|
{
|
|
THD *const thd= join->thd;
|
|
Item_cond_and *all_conds= NULL;
|
|
|
|
Item *res= NULL;
|
|
|
|
// Pick the ON-expression. Use the same logic as in get_sargable_cond():
|
|
if (last_tab->on_expr_ref)
|
|
res= *last_tab->on_expr_ref;
|
|
else if (last_tab->table->pos_in_table_list &&
|
|
last_tab->table->pos_in_table_list->embedding &&
|
|
!last_tab->table->pos_in_table_list->embedding->sj_on_expr)
|
|
{
|
|
res= last_tab->table->pos_in_table_list->embedding->on_expr;
|
|
}
|
|
|
|
for (JOIN_TAB *tab= first_depth_first_tab(join);
|
|
tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
if (tab->select_cond)
|
|
{
|
|
if (!res)
|
|
res= tab->select_cond;
|
|
else
|
|
{
|
|
if (!all_conds)
|
|
{
|
|
if (!(all_conds= new (thd->mem_root)Item_cond_and(thd, res,
|
|
tab->select_cond)))
|
|
return true;
|
|
res= all_conds;
|
|
}
|
|
else
|
|
all_conds->add(tab->select_cond, thd->mem_root);
|
|
}
|
|
}
|
|
if (tab == last_tab)
|
|
break;
|
|
}
|
|
*ret= all_conds? all_conds: res;
|
|
return false;
|
|
}
|
|
|
|
|
|
static bool
|
|
make_join_select(JOIN *join,SQL_SELECT *select,COND *cond)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("make_join_select");
|
|
if (select)
|
|
{
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_conditions(thd, "attaching_conditions_to_tables");
|
|
Json_writer_array trace_attached_comp(thd,
|
|
"attached_conditions_computation");
|
|
add_not_null_conds(join);
|
|
table_map used_tables;
|
|
/*
|
|
Step #1: Extract constant condition
|
|
- Extract and check the constant part of the WHERE
|
|
- Extract constant parts of ON expressions from outer
|
|
joins and attach them appropriately.
|
|
*/
|
|
if (cond) /* Because of QUICK_GROUP_MIN_MAX_SELECT */
|
|
{ /* there may be a select without a cond. */
|
|
if (join->table_count > 1)
|
|
cond->update_used_tables(); // Tablenr may have changed
|
|
|
|
/*
|
|
Extract expressions that depend on constant tables
|
|
1. Const part of the join's WHERE clause can be checked immediately
|
|
and if it is not satisfied then the join has empty result
|
|
2. Constant parts of outer joins' ON expressions must be attached
|
|
there inside the triggers.
|
|
*/
|
|
{ // Check const tables
|
|
Item* const_cond= NULL;
|
|
const_cond= make_cond_for_table(thd, cond,
|
|
join->const_table_map,
|
|
(table_map) 0, -1, FALSE, FALSE);
|
|
if (!const_cond && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
|
|
/* Add conditions added by add_not_null_conds(). */
|
|
for (uint i= 0 ; i < join->const_tables ; i++)
|
|
add_cond_and_fix(thd, &const_cond,
|
|
join->join_tab[i].select_cond);
|
|
|
|
DBUG_EXECUTE("where",print_where(const_cond,"constants",
|
|
QT_ORDINARY););
|
|
|
|
if (const_cond)
|
|
{
|
|
Json_writer_object trace_const_cond(thd);
|
|
trace_const_cond.add("condition_on_constant_tables", const_cond);
|
|
if (const_cond->is_expensive())
|
|
{
|
|
if (unlikely(trace_const_cond.trace_started()))
|
|
trace_const_cond.
|
|
add("evalualted", "false").
|
|
add("cause", "expensive cond");
|
|
}
|
|
else
|
|
{
|
|
bool const_cond_result;
|
|
{
|
|
Json_writer_array a(thd, "computing_condition");
|
|
const_cond_result= const_cond->val_int() != 0;
|
|
}
|
|
if (!const_cond_result)
|
|
{
|
|
DBUG_PRINT("info",("Found impossible WHERE condition"));
|
|
if (unlikely(trace_const_cond.trace_started()))
|
|
trace_const_cond.
|
|
add("evalualted", "true").
|
|
add("found", "impossible where");
|
|
join->exec_const_cond= NULL;
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
join->exec_const_cond= const_cond;
|
|
}
|
|
|
|
if (join->table_count != join->const_tables)
|
|
{
|
|
COND *outer_ref_cond= make_cond_for_table(thd, cond,
|
|
join->const_table_map |
|
|
OUTER_REF_TABLE_BIT,
|
|
OUTER_REF_TABLE_BIT,
|
|
-1, FALSE, FALSE);
|
|
if (outer_ref_cond)
|
|
{
|
|
add_cond_and_fix(thd, &outer_ref_cond, join->outer_ref_cond);
|
|
join->outer_ref_cond= outer_ref_cond;
|
|
|
|
Json_writer_object trace(thd);
|
|
trace.add("outer_ref_cond", outer_ref_cond);
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
else
|
|
{
|
|
COND *pseudo_bits_cond=
|
|
make_cond_for_table(thd, cond,
|
|
join->const_table_map |
|
|
PSEUDO_TABLE_BITS,
|
|
PSEUDO_TABLE_BITS,
|
|
-1, FALSE, FALSE);
|
|
if (pseudo_bits_cond)
|
|
{
|
|
add_cond_and_fix(thd, &pseudo_bits_cond,
|
|
join->pseudo_bits_cond);
|
|
join->pseudo_bits_cond= pseudo_bits_cond;
|
|
|
|
Json_writer_object trace(thd);
|
|
trace.add("pseudo_bits_cond", pseudo_bits_cond);
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Step #2: Extract WHERE/ON parts
|
|
*/
|
|
|
|
uint i;
|
|
for (i= join->top_join_tab_count - 1; i >= join->const_tables; i--)
|
|
{
|
|
if (!join->join_tab[i].bush_children)
|
|
break;
|
|
}
|
|
uint last_top_base_tab_idx= i;
|
|
|
|
table_map save_used_tables= 0;
|
|
used_tables=((select->const_tables=join->const_table_map) |
|
|
OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
|
|
JOIN_TAB *tab;
|
|
table_map current_map;
|
|
i= join->const_tables;
|
|
for (tab= first_depth_first_tab(join); tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
bool is_hj;
|
|
|
|
/*
|
|
first_inner is the X in queries like:
|
|
SELECT * FROM t1 LEFT OUTER JOIN (t2 JOIN t3) ON X
|
|
*/
|
|
JOIN_TAB *first_inner_tab= tab->first_inner;
|
|
COND *tmp;
|
|
|
|
if (!tab->bush_children)
|
|
current_map= tab->table->map;
|
|
else
|
|
current_map= tab->bush_children->start->emb_sj_nest->sj_inner_tables;
|
|
|
|
/*
|
|
Tables that are within SJ-Materialization nests cannot have their
|
|
conditions referring to preceding non-const tables.
|
|
- If we're looking at the first SJM table, reset used_tables
|
|
to refer to only allowed tables
|
|
*/
|
|
if (tab->emb_sj_nest && tab->emb_sj_nest->sj_mat_info &&
|
|
tab->emb_sj_nest->sj_mat_info->is_used &&
|
|
!(used_tables & tab->emb_sj_nest->sj_inner_tables))
|
|
{
|
|
save_used_tables= used_tables;
|
|
used_tables= join->const_table_map | OUTER_REF_TABLE_BIT |
|
|
RAND_TABLE_BIT;
|
|
}
|
|
|
|
used_tables|=current_map;
|
|
|
|
if ((tab->type == JT_REF || tab->type == JT_RANGE) && tab->quick &&
|
|
(((uint) tab->ref.key == tab->quick->index &&
|
|
tab->ref.key_length < tab->quick->max_used_key_length) ||
|
|
(!is_hash_join_key_no(tab->ref.key) &&
|
|
tab->table->intersect_keys.is_set(tab->ref.key))))
|
|
{
|
|
/* Range uses longer key; Use this instead of ref on key */
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object ref_to_range(thd);
|
|
ref_to_range.
|
|
add("ref_to_range", true).
|
|
add("cause", "range uses longer key");
|
|
}
|
|
tab->type= JT_RANGE;
|
|
tab->use_quick=1;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts=0; // Don't use ref key.
|
|
join->best_positions[i].records_read= rows2double(tab->quick->records);
|
|
|
|
/*
|
|
We will use join cache here : prevent sorting of the first
|
|
table only and sort at the end.
|
|
*/
|
|
if (i != join->const_tables &&
|
|
join->table_count > join->const_tables + 1 &&
|
|
join->best_positions[i].use_join_buffer)
|
|
join->full_join= 1;
|
|
}
|
|
|
|
tmp= NULL;
|
|
|
|
if (cond)
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
// Reached the materialization tab
|
|
tmp= make_cond_after_sjm(thd, cond, cond, save_used_tables,
|
|
used_tables, /*inside_or_clause=*/FALSE);
|
|
used_tables= save_used_tables | used_tables;
|
|
save_used_tables= 0;
|
|
}
|
|
else
|
|
{
|
|
tmp= make_cond_for_table(thd, cond, used_tables, current_map, i,
|
|
FALSE, FALSE);
|
|
if (!tmp && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
|
|
if (tab == join->join_tab + last_top_base_tab_idx)
|
|
{
|
|
/*
|
|
This pushes conjunctive conditions of WHERE condition such that:
|
|
- their used_tables() contain RAND_TABLE_BIT
|
|
- the conditions does not refer to any fields
|
|
(such like rand() > 0.5)
|
|
*/
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
COND *rand_cond= make_cond_for_table(thd, cond, used_tables,
|
|
rand_table_bit, -1,
|
|
FALSE, FALSE);
|
|
if (rand_cond)
|
|
add_cond_and_fix(thd, &tmp, rand_cond);
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
/* Add conditions added by add_not_null_conds(). */
|
|
if (tab->select_cond)
|
|
add_cond_and_fix(thd, &tmp, tab->select_cond);
|
|
}
|
|
|
|
is_hj= (tab->type == JT_REF || tab->type == JT_EQ_REF) &&
|
|
(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT) &&
|
|
((join->max_allowed_join_cache_level+1)/2 == 2 ||
|
|
((join->max_allowed_join_cache_level+1)/2 > 2 &&
|
|
is_hash_join_key_no(tab->ref.key))) &&
|
|
(!tab->emb_sj_nest ||
|
|
join->allowed_semijoin_with_cache) &&
|
|
(!(tab->table->map & join->outer_join) ||
|
|
join->allowed_outer_join_with_cache);
|
|
|
|
if (cond && !tmp && tab->quick)
|
|
{ // Outer join
|
|
if ((tab->type != JT_ALL && tab->type != JT_RANGE) && !is_hj)
|
|
{
|
|
/*
|
|
Don't use the quick method
|
|
We come here in the case where we have 'key=constant' and
|
|
the test is removed by make_cond_for_table()
|
|
*/
|
|
delete tab->quick;
|
|
tab->quick= 0;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Hack to handle the case where we only refer to a table
|
|
in the ON part of an OUTER JOIN. In this case we want the code
|
|
below to check if we should use 'quick' instead.
|
|
*/
|
|
DBUG_PRINT("info", ("Item_int"));
|
|
tmp= (Item*) Item_true;
|
|
}
|
|
|
|
}
|
|
if (tmp || !cond || tab->type == JT_REF || tab->type == JT_REF_OR_NULL ||
|
|
tab->type == JT_EQ_REF || first_inner_tab)
|
|
{
|
|
DBUG_EXECUTE("where",print_where(tmp,
|
|
tab->table ?
|
|
tab->table->alias.c_ptr() :"sjm-nest",
|
|
QT_ORDINARY););
|
|
SQL_SELECT *sel= tab->select= ((SQL_SELECT*)
|
|
thd->memdup((uchar*) select,
|
|
sizeof(*select)));
|
|
if (!sel)
|
|
DBUG_RETURN(1); // End of memory
|
|
/*
|
|
If tab is an inner table of an outer join operation,
|
|
add a match guard to the pushed down predicate.
|
|
The guard will turn the predicate on only after
|
|
the first match for outer tables is encountered.
|
|
*/
|
|
if (cond && tmp)
|
|
{
|
|
/*
|
|
Because of QUICK_GROUP_MIN_MAX_SELECT there may be a select without
|
|
a cond, so neutralize the hack above.
|
|
*/
|
|
COND *tmp_cond;
|
|
if (!(tmp_cond= add_found_match_trig_cond(thd, first_inner_tab, tmp,
|
|
0)))
|
|
DBUG_RETURN(1);
|
|
sel->cond= tmp_cond;
|
|
tab->set_select_cond(tmp_cond, __LINE__);
|
|
/* Push condition to storage engine if this is enabled
|
|
and the condition is not guarded */
|
|
if (tab->table)
|
|
{
|
|
tab->table->file->pushed_cond= NULL;
|
|
if ((tab->table->file->ha_table_flags() &
|
|
HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
|
|
!first_inner_tab)
|
|
{
|
|
Json_writer_object wrap(thd);
|
|
Json_writer_object trace_cp(thd, "table_condition_pushdown");
|
|
trace_cp.add_table_name(tab->table);
|
|
|
|
COND *push_cond=
|
|
make_cond_for_table(thd, tmp_cond, current_map, current_map,
|
|
-1, FALSE, FALSE);
|
|
if (push_cond)
|
|
{
|
|
trace_cp.add("push_cond", push_cond);
|
|
/* Push condition to handler */
|
|
if (!tab->table->file->cond_push(push_cond))
|
|
tab->table->file->pushed_cond= push_cond;
|
|
}
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sel->cond= NULL;
|
|
tab->set_select_cond(NULL, __LINE__);
|
|
}
|
|
|
|
sel->head=tab->table;
|
|
DBUG_EXECUTE("where",
|
|
print_where(tmp,
|
|
tab->table ? tab->table->alias.c_ptr() :
|
|
"(sjm-nest)",
|
|
QT_ORDINARY););
|
|
if (tab->quick)
|
|
{
|
|
/* Use quick key read if it's a constant and it's not used
|
|
with key reading */
|
|
if ((tab->needed_reg.is_clear_all() && tab->type != JT_EQ_REF &&
|
|
tab->type != JT_FT &&
|
|
((tab->type != JT_CONST && tab->type != JT_REF) ||
|
|
(uint) tab->ref.key == tab->quick->index)) || is_hj)
|
|
{
|
|
DBUG_ASSERT(tab->quick->is_valid());
|
|
sel->quick=tab->quick; // Use value from get_quick_...
|
|
sel->quick_keys.clear_all();
|
|
sel->needed_reg.clear_all();
|
|
if (is_hj && tab->rowid_filter)
|
|
tab->clear_range_rowid_filter();
|
|
}
|
|
else
|
|
{
|
|
delete tab->quick;
|
|
}
|
|
tab->quick=0;
|
|
}
|
|
uint ref_key= (sel->head ?
|
|
(uint) sel->head->reginfo.join_tab->ref.key+1 :
|
|
0);
|
|
if (i == join->const_tables && ref_key)
|
|
{
|
|
if (!tab->const_keys.is_clear_all() &&
|
|
tab->table->reginfo.impossible_range)
|
|
DBUG_RETURN(1);
|
|
}
|
|
else if ((tab->type == JT_ALL || tab->type == JT_NEXT))
|
|
{
|
|
if (!tab->const_keys.is_clear_all() &&
|
|
tab->table->reginfo.impossible_range)
|
|
DBUG_RETURN(1); // Impossible range
|
|
/*
|
|
We plan to scan all rows either with table or index scan
|
|
Check again if we should use an index.
|
|
|
|
There are two cases:
|
|
1) There could be an index usage the refers to a previous
|
|
table that we didn't consider before, but could be consider
|
|
now as a "last resort". For example
|
|
SELECT * from t1,t2 where t1.a between t2.a and t2.b;
|
|
2) If the current table is the first non const table
|
|
and there is a limit it still possibly beneficial
|
|
to use the index even if the index range is big as
|
|
we can stop when we've found limit rows.
|
|
|
|
(1) - Don't switch the used index if we are using semi-join
|
|
LooseScan on this table. Using different index will not
|
|
produce the desired ordering and de-duplication.
|
|
*/
|
|
|
|
if (!tab->table->is_filled_at_execution() &&
|
|
!tab->loosescan_match_tab && // (1)
|
|
((cond && (!tab->keys.is_subset(tab->const_keys) &&
|
|
i > join->const_tables)) ||
|
|
(!tab->const_keys.is_clear_all() && i == join->const_tables &&
|
|
join->unit->lim.get_select_limit() <
|
|
join->best_positions[i].records_read &&
|
|
!(join->select_options & OPTION_FOUND_ROWS))))
|
|
{
|
|
/* Join with outer join condition */
|
|
COND *orig_cond=sel->cond;
|
|
|
|
if (build_tmp_join_prefix_cond(join, tab, &sel->cond))
|
|
return true;
|
|
|
|
/*
|
|
We can't call sel->cond->fix_fields,
|
|
as it will break tab->on_expr if it's AND condition
|
|
(fix_fields currently removes extra AND/OR levels).
|
|
Yet attributes of the just built condition are not needed.
|
|
Thus we call sel->cond->quick_fix_field for safety.
|
|
*/
|
|
if (sel->cond && !sel->cond->fixed())
|
|
sel->cond->quick_fix_field();
|
|
quick_select_return res;
|
|
|
|
if ((res= sel->test_quick_select(thd, tab->keys,
|
|
((used_tables & ~ current_map) |
|
|
OUTER_REF_TABLE_BIT),
|
|
(join->select_options &
|
|
OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit()),
|
|
0,
|
|
FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL)) ==
|
|
SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
{
|
|
/*
|
|
Before reporting "Impossible WHERE" for the whole query
|
|
we have to check isn't it only "impossible ON" instead
|
|
*/
|
|
sel->cond=orig_cond;
|
|
if (!*tab->on_expr_ref ||
|
|
(res= sel->test_quick_select(thd, tab->keys,
|
|
used_tables & ~ current_map,
|
|
(join->select_options &
|
|
OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit()),
|
|
0, FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_NONE)) ==
|
|
SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
DBUG_RETURN(1); // Impossible WHERE
|
|
}
|
|
else
|
|
sel->cond=orig_cond;
|
|
|
|
if (res == SQL_SELECT::ERROR)
|
|
DBUG_RETURN(1); /* Some error in one of test_quick_select calls */
|
|
|
|
/* Fix for EXPLAIN */
|
|
if (sel->quick)
|
|
{
|
|
join->best_positions[i].records_read=
|
|
(double) sel->quick->records;
|
|
set_if_smaller(join->best_positions[i].records_out,
|
|
rows2double(sel->head->opt_range_condition_rows));
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
sel->head->opt_range_condition_rows may have been
|
|
updated to a smaller number than before by a call to
|
|
test_quick_select. This can happen even if the range
|
|
optimizer decided to not use the range (sel->quick was
|
|
not set).
|
|
*/
|
|
set_if_smaller(join->best_positions[i].records_out,
|
|
rows2double(sel->head->opt_range_condition_rows));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sel->needed_reg=tab->needed_reg;
|
|
}
|
|
sel->quick_keys= tab->table->opt_range_keys;
|
|
if (!sel->quick_keys.is_subset(tab->checked_keys) ||
|
|
!sel->needed_reg.is_subset(tab->checked_keys))
|
|
{
|
|
handler *file= tab->table->file;
|
|
/*
|
|
"Range checked for each record" is a "last resort" access method
|
|
that should only be used when the other option is a cross-product
|
|
join.
|
|
|
|
We use the following condition (it's approximate):
|
|
1. There are potential keys for (sel->needed_reg)
|
|
2. There were no possible ways to construct a quick select, or
|
|
the quick select would be more expensive than the full table
|
|
scan.
|
|
*/
|
|
tab->use_quick= (!sel->needed_reg.is_clear_all() &&
|
|
(sel->quick_keys.is_clear_all() ||
|
|
(sel->quick &&
|
|
sel->quick->read_time >
|
|
file->cost(file->ha_scan_and_compare_time(tab->table->file-> stats.records))))) ?
|
|
2 : 1;
|
|
sel->read_tables= used_tables & ~current_map;
|
|
sel->quick_keys.clear_all();
|
|
}
|
|
if (i != join->const_tables && tab->use_quick != 2 &&
|
|
!tab->first_inner)
|
|
{ /* Read with cache */
|
|
/*
|
|
TODO: the execution also gets here when we will not be using
|
|
join buffer. Review these cases and perhaps, remove this call.
|
|
(The final decision whether to use join buffer is made in
|
|
check_join_cache_usage, so we should only call make_scan_filter()
|
|
there, too).
|
|
*/
|
|
if (tab->make_scan_filter())
|
|
DBUG_RETURN(1);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Push down conditions from all ON expressions.
|
|
Each of these conditions are guarded by a variable
|
|
that turns if off just before null complemented row for
|
|
outer joins is formed. Thus, the condition from an
|
|
'on expression' are guaranteed not to be checked for
|
|
the null complemented row.
|
|
*/
|
|
|
|
/*
|
|
First push down constant conditions from ON expressions.
|
|
- Each pushed-down condition is wrapped into trigger which is
|
|
enabled only for non-NULL-complemented record
|
|
- The condition is attached to the first_inner_table.
|
|
|
|
With regards to join nests:
|
|
- if we start at top level, don't walk into nests
|
|
- if we start inside a nest, stay within that nest.
|
|
*/
|
|
JOIN_TAB *start_from= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->start :
|
|
join->join_tab + join->const_tables;
|
|
JOIN_TAB *end_with= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->end :
|
|
join->join_tab + join->top_join_tab_count;
|
|
for (JOIN_TAB *join_tab= start_from;
|
|
join_tab != end_with;
|
|
join_tab++)
|
|
{
|
|
if (*join_tab->on_expr_ref)
|
|
{
|
|
JOIN_TAB *cond_tab= join_tab->first_inner;
|
|
COND *tmp_cond= make_cond_for_table(thd, *join_tab->on_expr_ref,
|
|
join->const_table_map,
|
|
(table_map) 0, -1, FALSE, FALSE);
|
|
if (!tmp_cond)
|
|
{
|
|
if (!thd->is_error())
|
|
continue;
|
|
DBUG_RETURN(1);
|
|
}
|
|
tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
|
|
&cond_tab->not_null_compl);
|
|
if (!tmp_cond)
|
|
DBUG_RETURN(1);
|
|
tmp_cond->quick_fix_field();
|
|
cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond :
|
|
new (thd->mem_root) Item_cond_and(thd, cond_tab->select_cond,
|
|
tmp_cond);
|
|
if (!cond_tab->select_cond)
|
|
DBUG_RETURN(1);
|
|
cond_tab->select_cond->quick_fix_field();
|
|
cond_tab->select_cond->update_used_tables();
|
|
if (cond_tab->select)
|
|
cond_tab->select->cond= cond_tab->select_cond;
|
|
}
|
|
}
|
|
|
|
|
|
/* Push down non-constant conditions from ON expressions */
|
|
JOIN_TAB *last_tab= tab;
|
|
|
|
/*
|
|
while we're inside of an outer join and last_tab is
|
|
the last of its tables ...
|
|
*/
|
|
while (first_inner_tab && first_inner_tab->last_inner == last_tab)
|
|
{
|
|
/*
|
|
Table tab is the last inner table of an outer join.
|
|
An on expression is always attached to it.
|
|
*/
|
|
COND *on_expr= *first_inner_tab->on_expr_ref;
|
|
|
|
table_map used_tables2= (join->const_table_map |
|
|
OUTER_REF_TABLE_BIT | RAND_TABLE_BIT);
|
|
|
|
start_from= tab->bush_root_tab?
|
|
tab->bush_root_tab->bush_children->start :
|
|
join->join_tab + join->const_tables;
|
|
for (JOIN_TAB *inner_tab= start_from;
|
|
inner_tab <= last_tab;
|
|
inner_tab++)
|
|
{
|
|
DBUG_ASSERT(inner_tab->table);
|
|
current_map= inner_tab->table->map;
|
|
used_tables2|= current_map;
|
|
/*
|
|
psergey: have put the -1 below. It's bad, will need to fix it.
|
|
*/
|
|
COND *tmp_cond= make_cond_for_table(thd, on_expr, used_tables2,
|
|
current_map,
|
|
/*(inner_tab - first_tab)*/ -1,
|
|
FALSE, FALSE);
|
|
if (!tmp_cond && thd->is_error())
|
|
DBUG_RETURN(1);
|
|
if (tab == last_tab)
|
|
{
|
|
/*
|
|
This pushes conjunctive conditions of ON expression of an outer
|
|
join such that:
|
|
- their used_tables() contain RAND_TABLE_BIT
|
|
- the conditions does not refer to any fields
|
|
(such like rand() > 0.5)
|
|
*/
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
COND *rand_cond= make_cond_for_table(thd, on_expr, used_tables2,
|
|
rand_table_bit, -1,
|
|
FALSE, FALSE);
|
|
if (rand_cond)
|
|
add_cond_and_fix(thd, &tmp_cond, rand_cond);
|
|
else if (thd->is_error())
|
|
DBUG_RETURN(1);
|
|
}
|
|
bool is_sjm_lookup_tab= FALSE;
|
|
if (inner_tab->bush_children)
|
|
{
|
|
/*
|
|
'inner_tab' is an SJ-Materialization tab, i.e. we have a join
|
|
order like this:
|
|
|
|
ot1 sjm_tab LEFT JOIN ot2 ot3
|
|
^ ^
|
|
'tab'-+ +--- left join we're adding triggers for
|
|
|
|
LEFT JOIN's ON expression may not have references to subquery
|
|
columns. The subquery was in the WHERE clause, so IN-equality
|
|
is in the WHERE clause, also.
|
|
However, equality propagation code may have propagated the
|
|
IN-equality into ON expression, and we may get things like
|
|
|
|
subquery_inner_table=const
|
|
|
|
in the ON expression. We must not check such conditions during
|
|
SJM-lookup, because 1) subquery_inner_table has no valid current
|
|
row (materialization temp.table has it instead), and 2) they
|
|
would be true anyway.
|
|
*/
|
|
SJ_MATERIALIZATION_INFO *sjm=
|
|
inner_tab->bush_children->start->emb_sj_nest->sj_mat_info;
|
|
if (sjm->is_used && !sjm->is_sj_scan)
|
|
is_sjm_lookup_tab= TRUE;
|
|
}
|
|
|
|
if (inner_tab == first_inner_tab && inner_tab->on_precond &&
|
|
!is_sjm_lookup_tab)
|
|
add_cond_and_fix(thd, &tmp_cond, inner_tab->on_precond);
|
|
if (tmp_cond && !is_sjm_lookup_tab)
|
|
{
|
|
JOIN_TAB *cond_tab= (inner_tab < first_inner_tab ?
|
|
first_inner_tab : inner_tab);
|
|
Item **sel_cond_ref= (inner_tab < first_inner_tab ?
|
|
&first_inner_tab->on_precond :
|
|
&inner_tab->select_cond);
|
|
/*
|
|
First add the guards for match variables of
|
|
all embedding outer join operations.
|
|
*/
|
|
if (!(tmp_cond= add_found_match_trig_cond(thd,
|
|
cond_tab->first_inner,
|
|
tmp_cond,
|
|
first_inner_tab)))
|
|
DBUG_RETURN(1);
|
|
/*
|
|
Now add the guard turning the predicate off for
|
|
the null complemented row.
|
|
*/
|
|
DBUG_PRINT("info", ("Item_func_trig_cond"));
|
|
tmp_cond= new (thd->mem_root) Item_func_trig_cond(thd, tmp_cond,
|
|
&first_inner_tab->
|
|
not_null_compl);
|
|
DBUG_PRINT("info", ("Item_func_trig_cond %p",
|
|
tmp_cond));
|
|
if (tmp_cond)
|
|
tmp_cond->quick_fix_field();
|
|
/* Add the predicate to other pushed down predicates */
|
|
DBUG_PRINT("info", ("Item_cond_and"));
|
|
*sel_cond_ref= !(*sel_cond_ref) ?
|
|
tmp_cond :
|
|
new (thd->mem_root) Item_cond_and(thd, *sel_cond_ref, tmp_cond);
|
|
DBUG_PRINT("info", ("Item_cond_and %p",
|
|
(*sel_cond_ref)));
|
|
if (!(*sel_cond_ref))
|
|
DBUG_RETURN(1);
|
|
(*sel_cond_ref)->quick_fix_field();
|
|
(*sel_cond_ref)->update_used_tables();
|
|
if (cond_tab->select)
|
|
cond_tab->select->cond= cond_tab->select_cond;
|
|
}
|
|
}
|
|
first_inner_tab= first_inner_tab->first_upper;
|
|
}
|
|
if (!tab->bush_children)
|
|
i++;
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
trace_attached_comp.end();
|
|
Json_writer_array trace_attached_summary(thd,
|
|
"attached_conditions_summary");
|
|
for (tab= first_depth_first_tab(join); tab;
|
|
tab= next_depth_first_tab(join, tab))
|
|
{
|
|
if (!tab->table)
|
|
continue;
|
|
Item *const cond = tab->select_cond;
|
|
Json_writer_object trace_one_table(thd);
|
|
trace_one_table.add_table_name(tab);
|
|
trace_one_table.add("attached_condition", cond);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static
|
|
uint get_next_field_for_derived_key(uchar *arg)
|
|
{
|
|
KEYUSE *keyuse= *(KEYUSE **) arg;
|
|
if (!keyuse)
|
|
return (uint) (-1);
|
|
TABLE *table= keyuse->table;
|
|
uint key= keyuse->key;
|
|
uint fldno= keyuse->keypart;
|
|
uint keypart= keyuse->keypart_map == (key_part_map) 1 ?
|
|
0 : (keyuse-1)->keypart+1;
|
|
for ( ;
|
|
keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
|
|
keyuse++)
|
|
keyuse->keypart= keypart;
|
|
if (keyuse->key != key)
|
|
keyuse= 0;
|
|
*((KEYUSE **) arg)= keyuse;
|
|
return fldno;
|
|
}
|
|
|
|
|
|
static
|
|
uint get_next_field_for_derived_key_simple(uchar *arg)
|
|
{
|
|
KEYUSE *keyuse= *(KEYUSE **) arg;
|
|
if (!keyuse)
|
|
return (uint) (-1);
|
|
TABLE *table= keyuse->table;
|
|
uint key= keyuse->key;
|
|
uint fldno= keyuse->keypart;
|
|
for ( ;
|
|
keyuse->table == table && keyuse->key == key && keyuse->keypart == fldno;
|
|
keyuse++)
|
|
;
|
|
if (keyuse->key != key)
|
|
keyuse= 0;
|
|
*((KEYUSE **) arg)= keyuse;
|
|
return fldno;
|
|
}
|
|
|
|
static
|
|
bool generate_derived_keys_for_table(KEYUSE *keyuse, uint count, uint keys)
|
|
{
|
|
TABLE *table= keyuse->table;
|
|
if (table->alloc_keys(keys))
|
|
return TRUE;
|
|
uint key_count= 0;
|
|
KEYUSE *first_keyuse= keyuse;
|
|
uint prev_part= keyuse->keypart;
|
|
uint parts= 0;
|
|
uint i= 0;
|
|
|
|
for ( ; i < count && key_count < keys; )
|
|
{
|
|
do
|
|
{
|
|
keyuse->key= table->s->keys;
|
|
keyuse->keypart_map= (key_part_map) (1 << parts);
|
|
keyuse++;
|
|
i++;
|
|
}
|
|
while (i < count && keyuse->used_tables == first_keyuse->used_tables &&
|
|
keyuse->keypart == prev_part);
|
|
parts++;
|
|
if (i < count && keyuse->used_tables == first_keyuse->used_tables)
|
|
{
|
|
prev_part= keyuse->keypart;
|
|
}
|
|
else
|
|
{
|
|
KEYUSE *save_first_keyuse= first_keyuse;
|
|
if (table->check_tmp_key(table->s->keys, parts,
|
|
get_next_field_for_derived_key_simple,
|
|
(uchar *) &first_keyuse))
|
|
|
|
{
|
|
JOIN_TAB *tab;
|
|
first_keyuse= save_first_keyuse;
|
|
if (table->add_tmp_key(table->s->keys, parts,
|
|
get_next_field_for_derived_key,
|
|
(uchar *) &first_keyuse,
|
|
FALSE))
|
|
return TRUE;
|
|
table->reginfo.join_tab->keys.set_bit(table->s->keys - 1);
|
|
tab= table->reginfo.join_tab;
|
|
for (uint i=0; i < parts; i++)
|
|
tab->key_dependent|= save_first_keyuse[i].used_tables;
|
|
}
|
|
else
|
|
{
|
|
/* Mark keyuses for this key to be excluded */
|
|
for (KEYUSE *curr=save_first_keyuse; curr < keyuse; curr++)
|
|
{
|
|
curr->key= MAX_KEY;
|
|
}
|
|
}
|
|
first_keyuse= keyuse;
|
|
key_count++;
|
|
parts= 0;
|
|
prev_part= keyuse->keypart;
|
|
}
|
|
}
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
static
|
|
bool generate_derived_keys(DYNAMIC_ARRAY *keyuse_array)
|
|
{
|
|
KEYUSE *keyuse, *end_keyuse;
|
|
size_t elements= keyuse_array->elements;
|
|
TABLE *prev_table= 0;
|
|
|
|
DBUG_ASSERT(elements > 0);
|
|
/* The last element is an end marker */
|
|
DBUG_ASSERT(dynamic_element(keyuse_array, elements-1,
|
|
KEYUSE*)[0].table == 0);
|
|
|
|
for (keyuse= dynamic_element(keyuse_array, 0, KEYUSE*),
|
|
end_keyuse= keyuse + elements - 1;
|
|
keyuse < end_keyuse;
|
|
keyuse++)
|
|
{
|
|
DBUG_ASSERT(keyuse->table);
|
|
|
|
KEYUSE *first_table_keyuse= NULL;
|
|
table_map last_used_tables= 0;
|
|
uint count= 0;
|
|
uint keys= 0;
|
|
TABLE_LIST *derived= NULL;
|
|
|
|
if (keyuse->table != prev_table)
|
|
derived= keyuse->table->pos_in_table_list;
|
|
|
|
if (!derived->is_materialized_derived())
|
|
continue;
|
|
|
|
for (;;)
|
|
{
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
prev_table= keyuse->table;
|
|
while (keyuse->table == prev_table && keyuse->key != MAX_KEY)
|
|
keyuse++;
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
keyuse--;
|
|
break;
|
|
}
|
|
first_table_keyuse= keyuse;
|
|
last_used_tables= keyuse->used_tables;
|
|
count= 0;
|
|
keys= 0;
|
|
}
|
|
else if (keyuse->used_tables != last_used_tables)
|
|
{
|
|
keys++;
|
|
last_used_tables= keyuse->used_tables;
|
|
}
|
|
count++;
|
|
keyuse++;
|
|
if (keyuse->table != prev_table)
|
|
{
|
|
if (generate_derived_keys_for_table(first_table_keyuse, count, ++keys))
|
|
return TRUE;
|
|
keyuse--;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Drops unused keys for each materialized derived table/view
|
|
|
|
@details
|
|
For materialized derived tables only ref access can be used, it employs
|
|
only one index, thus we don't need the rest. For each materialized derived
|
|
table/view call TABLE::use_index to save one index chosen by the optimizer
|
|
and free others. No key is chosen then all keys will be dropped.
|
|
*/
|
|
|
|
void JOIN::drop_unused_derived_keys()
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
|
|
TABLE *tmp_tbl= tab->table;
|
|
/*
|
|
Skip placeholders and already created tables (we cannot change keys
|
|
for created tables)
|
|
*/
|
|
if (!tmp_tbl || tmp_tbl->is_created())
|
|
continue;
|
|
if (!tmp_tbl->pos_in_table_list->is_materialized_derived())
|
|
continue;
|
|
|
|
/*
|
|
tmp_tbl->max_keys is the number of keys pre-allocated in
|
|
TABLE::alloc_keys(). Can be 0 if alloc_keys() was not called.
|
|
|
|
tmp_tbl->s->keys is number of keys defined for the table.
|
|
Normally 0 or 1 (= unique key)
|
|
*/
|
|
|
|
if (likely(tmp_tbl->s->keys) && tab->ref.key >= 0 &&
|
|
!tab->is_ref_for_hash_join())
|
|
{
|
|
if (tmp_tbl->s->keys > 1)
|
|
{
|
|
/* remove all keys except the chosen one and unique keys */
|
|
tmp_tbl->use_index(tab->ref.key, &tab->keys);
|
|
}
|
|
/*
|
|
We dropped all keys except the chosen one and unique keys.
|
|
The choosen one is stored as the first key (number 0).
|
|
*/
|
|
tab->ref.key= 0;
|
|
}
|
|
else if (tmp_tbl->s->keys)
|
|
{
|
|
/* The query cannot use keys, remove all non unique keys */
|
|
tmp_tbl->use_index(-1, &tab->keys);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Evaluate the bitmap of used tables for items from the select list
|
|
*/
|
|
|
|
inline void JOIN::eval_select_list_used_tables()
|
|
{
|
|
select_list_used_tables= 0;
|
|
Item *item;
|
|
List_iterator_fast<Item> it(fields_list);
|
|
while ((item= it++))
|
|
{
|
|
select_list_used_tables|= item->used_tables();
|
|
}
|
|
Item_outer_ref *ref;
|
|
List_iterator_fast<Item_outer_ref> ref_it(select_lex->inner_refs_list);
|
|
while ((ref= ref_it++))
|
|
{
|
|
item= ref->outer_ref;
|
|
select_list_used_tables|= item->used_tables();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Determine {after which table we'll produce ordered set}
|
|
|
|
SYNOPSIS
|
|
make_join_orderinfo()
|
|
join
|
|
|
|
|
|
DESCRIPTION
|
|
Determine if the set is already ordered for ORDER BY, so it can
|
|
disable join cache because it will change the ordering of the results.
|
|
Code handles sort table that is at any location (not only first after
|
|
the const tables) despite the fact that it's currently prohibited.
|
|
We must disable join cache if the first non-const table alone is
|
|
ordered. If there is a temp table the ordering is done as a last
|
|
operation and doesn't prevent join cache usage.
|
|
|
|
RETURN
|
|
Number of table after which the set will be ordered
|
|
join->tables if we don't need an ordered set
|
|
*/
|
|
|
|
static uint make_join_orderinfo(JOIN *join)
|
|
{
|
|
/*
|
|
This function needs to be fixed to take into account that we now have SJM
|
|
nests.
|
|
*/
|
|
DBUG_ASSERT(0);
|
|
|
|
JOIN_TAB *tab;
|
|
if (join->need_tmp)
|
|
return join->table_count;
|
|
tab= join->get_sort_by_join_tab();
|
|
return tab ? (uint)(tab-join->join_tab) : join->table_count;
|
|
}
|
|
|
|
/*
|
|
Deny usage of join buffer for the specified table
|
|
|
|
SYNOPSIS
|
|
set_join_cache_denial()
|
|
tab join table for which join buffer usage is to be denied
|
|
|
|
DESCRIPTION
|
|
The function denies usage of join buffer when joining the table 'tab'.
|
|
The table is marked as not employing any join buffer. If a join cache
|
|
object has been already allocated for the table this object is destroyed.
|
|
|
|
RETURN
|
|
none
|
|
*/
|
|
|
|
static
|
|
void set_join_cache_denial(JOIN_TAB *join_tab)
|
|
{
|
|
if (join_tab->cache)
|
|
{
|
|
/*
|
|
If there is a previous cache linked to this cache through the
|
|
next_cache pointer: remove the link.
|
|
*/
|
|
if (join_tab->cache->prev_cache)
|
|
join_tab->cache->prev_cache->next_cache= 0;
|
|
/*
|
|
Same for the next_cache
|
|
*/
|
|
if (join_tab->cache->next_cache)
|
|
join_tab->cache->next_cache->prev_cache= 0;
|
|
|
|
join_tab->cache->free();
|
|
join_tab->cache= 0;
|
|
}
|
|
if (join_tab->use_join_cache)
|
|
{
|
|
join_tab->use_join_cache= FALSE;
|
|
join_tab->used_join_cache_level= 0;
|
|
/*
|
|
It could be only sub_select(). It could not be sub_seject_sjm because we
|
|
don't do join buffering for the first table in sjm nest.
|
|
*/
|
|
join_tab[-1].next_select= sub_select;
|
|
join_tab[-1].cached_pfs_batch_update= join_tab[-1].pfs_batch_update();
|
|
if (join_tab->type == JT_REF && join_tab->is_ref_for_hash_join())
|
|
{
|
|
join_tab->type= JT_ALL;
|
|
join_tab->ref.key_parts= 0;
|
|
}
|
|
join_tab->join->return_tab= join_tab;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
The default implementation of unlock-row method of READ_RECORD,
|
|
used in all access methods.
|
|
*/
|
|
|
|
void rr_unlock_row(st_join_table *tab)
|
|
{
|
|
READ_RECORD *info= &tab->read_record;
|
|
info->table->file->unlock_row();
|
|
}
|
|
|
|
|
|
/**
|
|
Pick the appropriate access method functions
|
|
|
|
Sets the functions for the selected table access method
|
|
|
|
@param tab Table reference to put access method
|
|
*/
|
|
|
|
static void
|
|
pick_table_access_method(JOIN_TAB *tab)
|
|
{
|
|
switch (tab->type)
|
|
{
|
|
case JT_REF:
|
|
tab->read_first_record= join_read_always_key;
|
|
tab->read_record.read_record_func= join_read_next_same;
|
|
break;
|
|
|
|
case JT_REF_OR_NULL:
|
|
tab->read_first_record= join_read_always_key_or_null;
|
|
tab->read_record.read_record_func= join_read_next_same_or_null;
|
|
break;
|
|
|
|
case JT_CONST:
|
|
tab->read_first_record= join_read_const;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
case JT_EQ_REF:
|
|
tab->read_first_record= join_read_key;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
case JT_FT:
|
|
tab->read_first_record= join_ft_read_first;
|
|
tab->read_record.read_record_func= join_ft_read_next;
|
|
break;
|
|
|
|
case JT_SYSTEM:
|
|
tab->read_first_record= join_read_system;
|
|
tab->read_record.read_record_func= join_no_more_records;
|
|
break;
|
|
|
|
/* keep gcc happy */
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Revise usage of join buffer for the specified table and the whole nest
|
|
|
|
SYNOPSIS
|
|
revise_cache_usage()
|
|
tab join table for which join buffer usage is to be revised
|
|
|
|
DESCRIPTION
|
|
The function revise the decision to use a join buffer for the table 'tab'.
|
|
If this table happened to be among the inner tables of a nested outer join/
|
|
semi-join the functions denies usage of join buffers for all of them
|
|
|
|
RETURN
|
|
none
|
|
*/
|
|
|
|
static
|
|
void revise_cache_usage(JOIN_TAB *join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_TAB *first_inner;
|
|
|
|
if (join_tab->first_inner)
|
|
{
|
|
JOIN_TAB *end_tab= join_tab;
|
|
for (first_inner= join_tab->first_inner;
|
|
first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
for (tab= end_tab; tab >= first_inner; tab--)
|
|
set_join_cache_denial(tab);
|
|
end_tab= first_inner;
|
|
}
|
|
}
|
|
else if (join_tab->first_sj_inner_tab)
|
|
{
|
|
first_inner= join_tab->first_sj_inner_tab;
|
|
for (tab= join_tab; tab >= first_inner; tab--)
|
|
{
|
|
set_join_cache_denial(tab);
|
|
}
|
|
}
|
|
else set_join_cache_denial(join_tab);
|
|
}
|
|
|
|
|
|
/*
|
|
end_select-compatible function that writes the record into a sjm temptable
|
|
|
|
SYNOPSIS
|
|
end_sj_materialize()
|
|
join The join
|
|
join_tab Points to right after the last join_tab in materialization bush
|
|
end_of_records FALSE <=> This call is made to pass another record
|
|
combination
|
|
TRUE <=> EOF (no action)
|
|
|
|
DESCRIPTION
|
|
This function is used by semi-join materialization to capture suquery's
|
|
resultset and write it into the temptable (that is, materialize it).
|
|
|
|
NOTE
|
|
This function is used only for semi-join materialization. Non-semijoin
|
|
materialization uses different mechanism.
|
|
|
|
RETURN
|
|
NESTED_LOOP_OK
|
|
NESTED_LOOP_ERROR
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_sj_materialize(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
int error;
|
|
THD *thd= join->thd;
|
|
SJ_MATERIALIZATION_INFO *sjm= join_tab[-1].emb_sj_nest->sj_mat_info;
|
|
DBUG_ENTER("end_sj_materialize");
|
|
if (!end_of_records)
|
|
{
|
|
TABLE *table= sjm->table;
|
|
|
|
List_iterator<Item> it(sjm->sjm_table_cols);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (item->is_null())
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
fill_record(thd, table, table->field, sjm->sjm_table_cols, true, false,
|
|
true);
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
/* create_myisam_from_heap will generate error if needed */
|
|
if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
|
|
create_internal_tmp_table_from_heap(thd, table,
|
|
sjm->sjm_table_param.start_recinfo,
|
|
&sjm->sjm_table_param.recinfo, error, 1, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
Check whether a join buffer can be used to join the specified table
|
|
|
|
SYNOPSIS
|
|
check_join_cache_usage()
|
|
tab joined table to check join buffer usage for
|
|
options options of the join
|
|
no_jbuf_after don't use join buffering after table with this number
|
|
prev_tab previous join table
|
|
|
|
DESCRIPTION
|
|
The function finds out whether the table 'tab' can be joined using a join
|
|
buffer. This check is performed after the best execution plan for 'join'
|
|
has been chosen. If the function decides that a join buffer can be employed
|
|
then it selects the most appropriate join cache object that contains this
|
|
join buffer.
|
|
The result of the check and the type of the the join buffer to be used
|
|
depend on:
|
|
- the access method to access rows of the joined table
|
|
- whether the join table is an inner table of an outer join or semi-join
|
|
- whether the optimizer switches
|
|
outer_join_with_cache, semijoin_with_cache, join_cache_incremental,
|
|
join_cache_hashed, join_cache_bka,
|
|
are set on or off
|
|
- the join cache level set for the query
|
|
- the join 'options'.
|
|
|
|
In any case join buffer is not used if the number of the joined table is
|
|
greater than 'no_jbuf_after'. It's also never used if the value of
|
|
join_cache_level is equal to 0.
|
|
If the optimizer switch outer_join_with_cache is off no join buffer is
|
|
used for outer join operations.
|
|
If the optimizer switch semijoin_with_cache is off no join buffer is used
|
|
for semi-join operations.
|
|
If the optimizer switch join_cache_incremental is off no incremental join
|
|
buffers are used.
|
|
If the optimizer switch join_cache_hashed is off then the optimizer uses
|
|
neither BNLH algorithm, nor BKAH algorithm to perform join operations.
|
|
|
|
If the optimizer switch join_cache_bka is off then the optimizer uses
|
|
neither BKA algorithm, nor BKAH algorithm to perform join operation.
|
|
The valid settings for join_cache_level lay in the interval 0..8.
|
|
If it set to 0 no join buffers are used to perform join operations.
|
|
Currently we differentiate between join caches of 8 levels:
|
|
1 : non-incremental join cache used for BNL join algorithm
|
|
2 : incremental join cache used for BNL join algorithm
|
|
3 : non-incremental join cache used for BNLH join algorithm
|
|
4 : incremental join cache used for BNLH join algorithm
|
|
5 : non-incremental join cache used for BKA join algorithm
|
|
6 : incremental join cache used for BKA join algorithm
|
|
7 : non-incremental join cache used for BKAH join algorithm
|
|
8 : incremental join cache used for BKAH join algorithm
|
|
If the value of join_cache_level is set to n then no join caches of
|
|
levels higher than n can be employed.
|
|
|
|
If the optimizer switches outer_join_with_cache, semijoin_with_cache,
|
|
join_cache_incremental, join_cache_hashed, join_cache_bka are all on
|
|
the following rules are applied.
|
|
If join_cache_level==1|2 then join buffer is used for inner joins, outer
|
|
joins and semi-joins with 'JT_ALL' access method. In this case a
|
|
JOIN_CACHE_BNL object is employed.
|
|
If join_cache_level==3|4 and then join buffer is used for a join operation
|
|
(inner join, outer join, semi-join) with 'JT_REF'/'JT_EQREF' access method
|
|
then a JOIN_CACHE_BNLH object is employed.
|
|
If an index is used to access rows of the joined table and the value of
|
|
join_cache_level==5|6 then a JOIN_CACHE_BKA object is employed.
|
|
If an index is used to access rows of the joined table and the value of
|
|
join_cache_level==7|8 then a JOIN_CACHE_BKAH object is employed.
|
|
If the value of join_cache_level is odd then creation of a non-linked
|
|
join cache is forced.
|
|
|
|
Currently for any join operation a join cache of the level of the
|
|
highest allowed and applicable level is used.
|
|
For example, if join_cache_level is set to 6 and the optimizer switch
|
|
join_cache_bka is off, while the optimizer switch join_cache_hashed is
|
|
on then for any inner join operation with JT_REF/JT_EQREF access method
|
|
to the joined table the BNLH join algorithm will be used, while for
|
|
the table accessed by the JT_ALL methods the BNL algorithm will be used.
|
|
|
|
If the function decides that a join buffer can be used to join the table
|
|
'tab' then it sets the value of tab->use_join_buffer to TRUE and assigns
|
|
the selected join cache object to the field 'cache' of the previous
|
|
join table.
|
|
If the function creates a join cache object it tries to initialize it. The
|
|
failure to do this results in an invocation of the function that destructs
|
|
the created object.
|
|
If the function decides that but some reasons no join buffer can be used
|
|
for a table it calls the function revise_cache_usage that checks
|
|
whether join cache should be denied for some previous tables. In this case
|
|
a pointer to the first table for which join cache usage has been denied
|
|
is passed in join->return_val (see the function set_join_cache_denial).
|
|
|
|
The functions changes the value the fields tab->icp_other_tables_ok and
|
|
tab->idx_cond_fact_out to FALSE if the chosen join cache algorithm
|
|
requires it.
|
|
|
|
NOTES
|
|
An inner table of a nested outer join or a nested semi-join can be currently
|
|
joined only when a linked cache object is employed. In these cases setting
|
|
join_cache_incremental to 'off' results in denial of usage of any join
|
|
buffer when joining the table.
|
|
For a nested outer join/semi-join, currently, we either use join buffers for
|
|
all inner tables or for none of them.
|
|
Some engines (e.g. Falcon) currently allow to use only a join cache
|
|
of the type JOIN_CACHE_BKAH when the joined table is accessed through
|
|
an index. For these engines setting the value of join_cache_level to 5 or 6
|
|
results in that no join buffer is used to join the table.
|
|
|
|
RETURN VALUE
|
|
cache level if cache is used, otherwise returns 0
|
|
|
|
TODO
|
|
Support BKA inside SJ-Materialization nests. When doing this, we'll need
|
|
to only store sj-inner tables in the join buffer.
|
|
#if 0
|
|
JOIN_TAB *first_tab= join->join_tab+join->const_tables;
|
|
uint n_tables= i-join->const_tables;
|
|
/ *
|
|
We normally put all preceding tables into the join buffer, except
|
|
for the constant tables.
|
|
If we're inside a semi-join materialization nest, e.g.
|
|
|
|
outer_tbl1 outer_tbl2 ( inner_tbl1, inner_tbl2 ) ...
|
|
^-- we're here
|
|
|
|
then we need to put into the join buffer only the tables from
|
|
within the nest.
|
|
* /
|
|
if (i >= first_sjm_table && i < last_sjm_table)
|
|
{
|
|
n_tables= i - first_sjm_table; // will be >0 if we got here
|
|
first_tab= join->join_tab + first_sjm_table;
|
|
}
|
|
#endif
|
|
*/
|
|
|
|
static
|
|
uint check_join_cache_usage(JOIN_TAB *tab,
|
|
ulonglong options,
|
|
uint no_jbuf_after,
|
|
uint table_index,
|
|
JOIN_TAB *prev_tab)
|
|
{
|
|
uint flags= 0;
|
|
ha_rows rows= 0;
|
|
uint bufsz= 4096;
|
|
JOIN_CACHE *prev_cache=0;
|
|
JOIN *join= tab->join;
|
|
MEM_ROOT *root= join->thd->mem_root;
|
|
uint cache_level= tab->used_join_cache_level;
|
|
bool force_unlinked_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_INCREMENTAL_BIT);
|
|
bool no_hashed_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_HASHED_BIT);
|
|
bool no_bka_cache=
|
|
!(join->allowed_join_cache_types & JOIN_CACHE_BKA_BIT);
|
|
|
|
join->return_tab= 0;
|
|
|
|
if (tab->no_forced_join_cache)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Don't use join cache if @@join_cache_level==0 or this table is the first
|
|
one join suborder (either at top level or inside a bush)
|
|
*/
|
|
if (cache_level == 0 || !prev_tab)
|
|
return 0;
|
|
|
|
if (force_unlinked_cache && (cache_level%2 == 0))
|
|
cache_level--;
|
|
|
|
if (options & SELECT_NO_JOIN_CACHE)
|
|
goto no_join_cache;
|
|
|
|
if (tab->use_quick == 2)
|
|
goto no_join_cache;
|
|
|
|
if (tab->table->map & join->complex_firstmatch_tables)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Don't use join cache if we're inside a join tab range covered by LooseScan
|
|
strategy (TODO: LooseScan is very similar to FirstMatch so theoretically it
|
|
should be possible to use join buffering in the same way we're using it for
|
|
multi-table firstmatch ranges).
|
|
*/
|
|
if (tab->inside_loosescan_range)
|
|
goto no_join_cache;
|
|
|
|
if (tab->is_inner_table_of_semijoin() &&
|
|
!join->allowed_semijoin_with_cache)
|
|
goto no_join_cache;
|
|
if (tab->is_inner_table_of_outer_join() &&
|
|
!join->allowed_outer_join_with_cache)
|
|
goto no_join_cache;
|
|
|
|
if (tab->table->pos_in_table_list->table_function &&
|
|
!tab->table->pos_in_table_list->table_function->join_cache_allowed())
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
Non-linked join buffers can't guarantee one match
|
|
*/
|
|
if (tab->is_nested_inner())
|
|
{
|
|
if (force_unlinked_cache || cache_level == 1)
|
|
goto no_join_cache;
|
|
if (cache_level & 1)
|
|
cache_level--;
|
|
}
|
|
|
|
/*
|
|
Don't use BKA for materialized tables. We could actually have a
|
|
meaningful use of BKA when linked join buffers are used.
|
|
|
|
The problem is, the temp.table is not filled (actually not even opened
|
|
properly) yet, and this doesn't let us call
|
|
handler->multi_range_read_info(). It is possible to come up with
|
|
estimates, etc. without acessing the table, but it seems not to worth the
|
|
effort now.
|
|
*/
|
|
if (tab->table->pos_in_table_list->is_materialized_derived())
|
|
{
|
|
no_bka_cache= true;
|
|
/*
|
|
Don't use hash join algorithm if the temporary table for the rows
|
|
of the derived table will be created with an equi-join key.
|
|
*/
|
|
if (tab->table->s->keys)
|
|
no_hashed_cache= true;
|
|
}
|
|
|
|
/*
|
|
Don't use join buffering if we're dictated not to by no_jbuf_after
|
|
(This is not meaningfully used currently)
|
|
*/
|
|
if (table_index > no_jbuf_after)
|
|
goto no_join_cache;
|
|
|
|
/*
|
|
TODO: BNL join buffer should be perfectly ok with tab->bush_children.
|
|
*/
|
|
if (tab->loosescan_match_tab || tab->bush_children)
|
|
goto no_join_cache;
|
|
|
|
for (JOIN_TAB *first_inner= tab->first_inner; first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
if (first_inner != tab &&
|
|
(!first_inner->use_join_cache || !(tab-1)->use_join_cache))
|
|
goto no_join_cache;
|
|
}
|
|
if (tab->first_sj_inner_tab && tab->first_sj_inner_tab != tab &&
|
|
(!tab->first_sj_inner_tab->use_join_cache || !(tab-1)->use_join_cache))
|
|
goto no_join_cache;
|
|
if (!prev_tab->use_join_cache)
|
|
{
|
|
/*
|
|
Check whether table tab and the previous one belong to the same nest of
|
|
inner tables and if so do not use join buffer when joining table tab.
|
|
*/
|
|
if (tab->first_inner && tab != tab->first_inner)
|
|
{
|
|
for (JOIN_TAB *first_inner= tab[-1].first_inner;
|
|
first_inner;
|
|
first_inner= first_inner->first_upper)
|
|
{
|
|
if (first_inner == tab->first_inner)
|
|
goto no_join_cache;
|
|
}
|
|
}
|
|
else if (tab->first_sj_inner_tab && tab != tab->first_sj_inner_tab &&
|
|
tab->first_sj_inner_tab == tab[-1].first_sj_inner_tab)
|
|
goto no_join_cache;
|
|
}
|
|
|
|
prev_cache= prev_tab->cache;
|
|
|
|
switch (tab->type) {
|
|
case JT_NEXT:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
if (cache_level == 1)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BNL(join, tab, prev_cache)))
|
|
{
|
|
tab->icp_other_tables_ok= FALSE;
|
|
/* If make_join_select() hasn't called make_scan_filter(), do it now */
|
|
if (!tab->cache_select && tab->make_scan_filter())
|
|
goto no_join_cache;
|
|
return (2 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_REF:
|
|
case JT_EQ_REF:
|
|
if (cache_level <=2 || (no_hashed_cache && no_bka_cache))
|
|
goto no_join_cache;
|
|
if (tab->ref.is_access_triggered())
|
|
goto no_join_cache;
|
|
|
|
if (!tab->is_ref_for_hash_join() && !no_bka_cache)
|
|
{
|
|
Cost_estimate cost;
|
|
cost.reset();
|
|
flags= HA_MRR_NO_NULL_ENDPOINTS | HA_MRR_SINGLE_POINT;
|
|
if (tab->table->covering_keys.is_set(tab->ref.key))
|
|
flags|= HA_MRR_INDEX_ONLY;
|
|
rows= tab->table->file->multi_range_read_info(tab->ref.key, 10, 20,
|
|
tab->ref.key_parts,
|
|
&bufsz, &flags, &cost);
|
|
}
|
|
|
|
if ((cache_level <=4 && !no_hashed_cache) || no_bka_cache ||
|
|
tab->is_ref_for_hash_join() ||
|
|
((flags & HA_MRR_NO_ASSOCIATION) && cache_level <=6))
|
|
{
|
|
if (!tab->hash_join_is_possible() ||
|
|
tab->make_scan_filter())
|
|
goto no_join_cache;
|
|
if (cache_level == 3)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BNLH(join, tab, prev_cache)))
|
|
{
|
|
tab->icp_other_tables_ok= FALSE;
|
|
return (4 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
}
|
|
if (cache_level > 4 && no_bka_cache)
|
|
goto no_join_cache;
|
|
|
|
if ((flags & HA_MRR_NO_ASSOCIATION) &&
|
|
(cache_level <= 6 || no_hashed_cache))
|
|
goto no_join_cache;
|
|
|
|
if ((rows != HA_POS_ERROR) && !(flags & HA_MRR_USE_DEFAULT_IMPL))
|
|
{
|
|
if (cache_level <= 6 || no_hashed_cache)
|
|
{
|
|
if (cache_level == 5)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BKA(join, tab, flags, prev_cache)))
|
|
return (6 - MY_TEST(!prev_cache));
|
|
goto no_join_cache;
|
|
}
|
|
else
|
|
{
|
|
if (cache_level == 7)
|
|
prev_cache= 0;
|
|
if ((tab->cache= new (root) JOIN_CACHE_BKAH(join, tab, flags, prev_cache)))
|
|
{
|
|
tab->idx_cond_fact_out= FALSE;
|
|
return (8 - MY_TEST(!prev_cache));
|
|
}
|
|
goto no_join_cache;
|
|
}
|
|
}
|
|
goto no_join_cache;
|
|
default : ;
|
|
}
|
|
|
|
no_join_cache:
|
|
if (tab->type != JT_ALL && tab->type != JT_RANGE && tab->is_ref_for_hash_join())
|
|
{
|
|
tab->type= JT_ALL;
|
|
tab->ref.key_parts= 0;
|
|
}
|
|
revise_cache_usage(tab);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Check whether join buffers can be used to join tables of a join
|
|
|
|
SYNOPSIS
|
|
check_join_cache_usage()
|
|
join join whose tables are to be checked
|
|
options options of the join
|
|
no_jbuf_after don't use join buffering after table with this number
|
|
(The tables are assumed to be numbered in
|
|
first_linear_tab(join, WITHOUT_CONST_TABLES),
|
|
next_linear_tab(join, WITH_CONST_TABLES) order).
|
|
|
|
DESCRIPTION
|
|
For each table after the first non-constant table the function checks
|
|
whether the table can be joined using a join buffer. If the function decides
|
|
that a join buffer can be employed then it selects the most appropriate join
|
|
cache object that contains this join buffer whose level is not greater
|
|
than join_cache_level set for the join. To make this check the function
|
|
calls the function check_join_cache_usage for every non-constant table.
|
|
|
|
NOTES
|
|
In some situations (e.g. for nested outer joins, for nested semi-joins) only
|
|
incremental buffers can be used. If it turns out that for some inner table
|
|
no join buffer can be used then any inner table of an outer/semi-join nest
|
|
cannot use join buffer. In the case when already chosen buffer must be
|
|
denied for a table the function recalls check_join_cache_usage()
|
|
starting from this table. The pointer to the table from which the check
|
|
has to be restarted is returned in join->return_val (see the description
|
|
of check_join_cache_usage).
|
|
*/
|
|
|
|
void check_join_cache_usage_for_tables(JOIN *join, ulonglong options,
|
|
uint no_jbuf_after)
|
|
{
|
|
JOIN_TAB *tab;
|
|
JOIN_TAB *prev_tab;
|
|
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->used_join_cache_level= join->max_allowed_join_cache_level;
|
|
}
|
|
|
|
uint idx= join->const_tables;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
restart:
|
|
tab->icp_other_tables_ok= TRUE;
|
|
tab->idx_cond_fact_out= TRUE;
|
|
|
|
/*
|
|
Check if we have a preceding join_tab, as something that will feed us
|
|
records that we could buffer. We don't have it, if
|
|
- this is the first non-const table in the join order,
|
|
- this is the first table inside an SJM nest.
|
|
*/
|
|
prev_tab= tab - 1;
|
|
if (tab == join->join_tab + join->const_tables ||
|
|
(tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab))
|
|
prev_tab= NULL;
|
|
|
|
switch (tab->type) {
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_EQ_REF:
|
|
case JT_REF:
|
|
case JT_REF_OR_NULL:
|
|
case JT_NEXT:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
tab->used_join_cache_level= check_join_cache_usage(tab, options,
|
|
no_jbuf_after,
|
|
idx,
|
|
prev_tab);
|
|
tab->use_join_cache= MY_TEST(tab->used_join_cache_level);
|
|
/*
|
|
psergey-merge: todo: raise the question that this is really stupid that
|
|
we can first allocate a join buffer, then decide not to use it and free
|
|
it.
|
|
*/
|
|
if (join->return_tab)
|
|
{
|
|
tab= join->return_tab;
|
|
goto restart;
|
|
}
|
|
break;
|
|
default:
|
|
tab->used_join_cache_level= 0;
|
|
}
|
|
if (!tab->bush_children)
|
|
idx++;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Remove pushdown conditions that are already checked by the scan phase
|
|
of BNL/BNLH joins.
|
|
|
|
@note
|
|
If the single-table condition for this table will be used by a
|
|
blocked join to pre-filter this table's rows, there is no need
|
|
to re-check the same single-table condition for each joined record.
|
|
|
|
This method removes from JOIN_TAB::select_cond and JOIN_TAB::select::cond
|
|
all top-level conjuncts that also appear in in JOIN_TAB::cache_select::cond.
|
|
*/
|
|
|
|
void JOIN_TAB::remove_redundant_bnl_scan_conds()
|
|
{
|
|
if (!(select_cond && cache_select && cache &&
|
|
(cache->get_join_alg() == JOIN_CACHE::BNL_JOIN_ALG ||
|
|
cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)))
|
|
return;
|
|
|
|
/*
|
|
select->cond is not processed separately. This method assumes it is always
|
|
the same as select_cond.
|
|
*/
|
|
if (select && select->cond != select_cond)
|
|
return;
|
|
|
|
if (is_cond_and(select_cond))
|
|
{
|
|
List_iterator<Item> pushed_cond_li(*((Item_cond*) select_cond)->argument_list());
|
|
Item *pushed_item;
|
|
Item_cond_and *reduced_select_cond= new (join->thd->mem_root)
|
|
Item_cond_and(join->thd);
|
|
|
|
if (is_cond_and(cache_select->cond))
|
|
{
|
|
List_iterator<Item> scan_cond_li(*((Item_cond*) cache_select->cond)->argument_list());
|
|
Item *scan_item;
|
|
while ((pushed_item= pushed_cond_li++))
|
|
{
|
|
bool found_cond= false;
|
|
scan_cond_li.rewind();
|
|
while ((scan_item= scan_cond_li++))
|
|
{
|
|
if (pushed_item->eq(scan_item, 0))
|
|
{
|
|
found_cond= true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found_cond)
|
|
reduced_select_cond->add(pushed_item, join->thd->mem_root);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
while ((pushed_item= pushed_cond_li++))
|
|
{
|
|
if (!pushed_item->eq(cache_select->cond, 0))
|
|
reduced_select_cond->add(pushed_item, join->thd->mem_root);
|
|
}
|
|
}
|
|
|
|
/*
|
|
JOIN_CACHE::check_match uses JOIN_TAB::select->cond instead of
|
|
JOIN_TAB::select_cond. set_cond() sets both pointers.
|
|
*/
|
|
if (reduced_select_cond->argument_list()->is_empty())
|
|
set_cond(NULL);
|
|
else if (reduced_select_cond->argument_list()->elements == 1)
|
|
set_cond(reduced_select_cond->argument_list()->head());
|
|
else
|
|
{
|
|
reduced_select_cond->quick_fix_field();
|
|
set_cond(reduced_select_cond);
|
|
}
|
|
}
|
|
else if (select_cond->eq(cache_select->cond, 0))
|
|
set_cond(NULL);
|
|
}
|
|
|
|
|
|
/*
|
|
Plan refinement stage: do various setup things for the executor
|
|
|
|
SYNOPSIS
|
|
make_join_readinfo()
|
|
join Join being processed
|
|
options Join's options (checking for SELECT_DESCRIBE,
|
|
SELECT_NO_JOIN_CACHE)
|
|
no_jbuf_after Don't use join buffering after table with this number.
|
|
|
|
DESCRIPTION
|
|
Plan refinement stage: do various set ups for the executioner
|
|
- set up use of join buffering
|
|
- push index conditions
|
|
- increment relevant counters
|
|
- etc
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Out of memory
|
|
*/
|
|
|
|
static bool
|
|
make_join_readinfo(JOIN *join, ulonglong options, uint no_jbuf_after)
|
|
{
|
|
JOIN_TAB *tab;
|
|
uint i;
|
|
DBUG_ENTER("make_join_readinfo");
|
|
|
|
Json_writer_object trace_wrapper(join->thd);
|
|
Json_writer_array trace_arr(join->thd, "make_join_readinfo");
|
|
|
|
bool statistics= MY_TEST(!(join->select_options & SELECT_DESCRIBE));
|
|
bool sorted= 1;
|
|
|
|
join->complex_firstmatch_tables= table_map(0);
|
|
|
|
if (!join->select_lex->sj_nests.is_empty() &&
|
|
setup_semijoin_dups_elimination(join, options, no_jbuf_after))
|
|
DBUG_RETURN(TRUE); /* purecov: inspected */
|
|
|
|
/* For const tables, set partial_join_cardinality to 1. */
|
|
for (tab= join->join_tab; tab != join->join_tab + join->const_tables; tab++)
|
|
tab->partial_join_cardinality= 1;
|
|
|
|
JOIN_TAB *prev_tab= NULL;
|
|
i= join->const_tables;
|
|
for (tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
prev_tab=tab, tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
/*
|
|
The approximation below for partial join cardinality is not good because
|
|
- it does not take into account some pushdown predicates
|
|
- it does not differentiate between inner joins, outer joins and
|
|
semi-joins.
|
|
Later it should be improved.
|
|
*/
|
|
|
|
if (tab->bush_root_tab && tab->bush_root_tab->bush_children->start == tab)
|
|
prev_tab= NULL;
|
|
DBUG_ASSERT(tab->bush_children ||
|
|
tab->table == join->best_positions[i].table->table);
|
|
|
|
tab->partial_join_cardinality= join->best_positions[i].records_read *
|
|
(prev_tab ?
|
|
prev_tab->partial_join_cardinality : 1);
|
|
if (!tab->bush_children)
|
|
i++;
|
|
}
|
|
|
|
check_join_cache_usage_for_tables(join, options, no_jbuf_after);
|
|
|
|
JOIN_TAB *first_tab;
|
|
for (tab= first_tab= first_linear_tab(join,
|
|
WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(join, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
if (tab->bush_children)
|
|
{
|
|
if (setup_sj_materialization_part2(tab))
|
|
return TRUE;
|
|
}
|
|
|
|
TABLE *table=tab->table;
|
|
uint jcl= tab->used_join_cache_level;
|
|
tab->read_record.table= table;
|
|
tab->read_record.unlock_row= rr_unlock_row;
|
|
tab->read_record.print_error= true;
|
|
tab->sorted= sorted;
|
|
sorted= 0; // only first must be sorted
|
|
|
|
|
|
/*
|
|
We should not set tab->next_select for the last table in the
|
|
SMJ-nest, as setup_sj_materialization() has already set it to
|
|
end_sj_materialize.
|
|
*/
|
|
if (!(tab->bush_root_tab &&
|
|
tab->bush_root_tab->bush_children->end == tab + 1))
|
|
tab->next_select= sub_select; /* normal select */
|
|
|
|
if (tab->loosescan_match_tab)
|
|
{
|
|
if (!(tab->loosescan_buf= join->thd->alloc<uchar>(tab->loosescan_key_len)))
|
|
return TRUE; /* purecov: inspected */
|
|
tab->sorted= TRUE;
|
|
}
|
|
table->status=STATUS_NO_RECORD;
|
|
pick_table_access_method (tab);
|
|
|
|
if (jcl)
|
|
tab[-1].next_select=sub_select_cache;
|
|
|
|
if (tab->cache && tab->cache->get_join_alg() == JOIN_CACHE::BNLH_JOIN_ALG)
|
|
tab->type= JT_HASH;
|
|
|
|
switch (tab->type) {
|
|
case JT_SYSTEM: // Only happens with left join
|
|
case JT_CONST: // Only happens with left join
|
|
/* Only happens with outer joins */
|
|
tab->read_first_record= tab->type == JT_SYSTEM ? join_read_system
|
|
: join_read_const;
|
|
tab->read_record.unlock_row= join_const_unlock_row;
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_EQ_REF:
|
|
tab->read_record.unlock_row= join_read_key_unlock_row;
|
|
/* fall through */
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_REF_OR_NULL:
|
|
case JT_REF:
|
|
if (tab->select)
|
|
{
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
}
|
|
delete tab->quick;
|
|
tab->quick=0;
|
|
if (!(table->covering_keys.is_set(tab->ref.key) && !table->no_keyread) &&
|
|
(!jcl || jcl > 4) && !tab->ref.is_access_triggered())
|
|
push_index_cond(tab, tab->ref.key);
|
|
break;
|
|
case JT_NEXT: // Index scan
|
|
DBUG_ASSERT(!tab->quick);
|
|
if (tab->select)
|
|
{
|
|
/*
|
|
select->quick may be set if there was a possible range and
|
|
it had a higher cost than a table scan.
|
|
*/
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
}
|
|
if (tab->use_quick == 2)
|
|
{
|
|
join->thd->set_status_no_good_index_used();
|
|
tab->read_first_record= join_init_quick_read_record;
|
|
if (statistics)
|
|
join->thd->inc_status_select_range_check();
|
|
}
|
|
else
|
|
{
|
|
tab->read_first_record= join_read_first;
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_scan();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
|
|
}
|
|
}
|
|
break;
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
case JT_HASH:
|
|
{
|
|
bool have_quick_select= tab->select && tab->select->quick;
|
|
/*
|
|
If previous table use cache
|
|
If the incoming data set is already sorted don't use cache.
|
|
Also don't use cache if this is the first table in semi-join
|
|
materialization nest.
|
|
*/
|
|
/* These init changes read_record */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
join->thd->set_status_no_good_index_used();
|
|
tab->read_first_record= join_init_quick_read_record;
|
|
if (statistics)
|
|
join->thd->inc_status_select_range_check();
|
|
}
|
|
else
|
|
{
|
|
if (!tab->bush_children)
|
|
tab->read_first_record= join_init_read_record;
|
|
if (tab == first_tab)
|
|
{
|
|
if (tab->select && tab->select->quick)
|
|
{
|
|
if (statistics)
|
|
join->thd->inc_status_select_range();
|
|
}
|
|
else
|
|
{
|
|
join->thd->set_status_no_index_used();
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_scan();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_SCAN;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (have_quick_select)
|
|
{
|
|
if (statistics)
|
|
join->thd->inc_status_select_full_range_join();
|
|
}
|
|
else
|
|
{
|
|
join->thd->set_status_no_index_used();
|
|
if (statistics)
|
|
{
|
|
join->thd->inc_status_select_full_join();
|
|
join->thd->query_plan_flags|= QPLAN_FULL_JOIN;
|
|
}
|
|
}
|
|
}
|
|
if (!table->no_keyread)
|
|
{
|
|
if (!(have_quick_select &&
|
|
tab->select->quick->index != MAX_KEY && //not index_merge
|
|
table->covering_keys.is_set(tab->select->quick->index)) &&
|
|
(!table->covering_keys.is_clear_all() && ! have_quick_select))
|
|
{ // Only read index tree
|
|
if (tab->loosescan_match_tab)
|
|
tab->index= tab->loosescan_key;
|
|
else
|
|
tab->index= tab->cached_covering_key;
|
|
tab->read_first_record= join_read_first;
|
|
/* Read with index_first / index_next */
|
|
tab->type= tab->type == JT_ALL ? JT_NEXT : JT_HASH_NEXT;
|
|
}
|
|
}
|
|
if (have_quick_select &&
|
|
tab->select->quick->index != MAX_KEY &&
|
|
!tab->table->covering_keys.is_set(tab->select->quick->index))
|
|
push_index_cond(tab, tab->select->quick->index);
|
|
}
|
|
break;
|
|
}
|
|
case JT_FT:
|
|
break;
|
|
/* purecov: begin deadcode */
|
|
default:
|
|
DBUG_PRINT("error",("Table type %d found",tab->type));
|
|
break;
|
|
case JT_UNKNOWN:
|
|
case JT_MAYBE_REF:
|
|
abort();
|
|
/* purecov: end */
|
|
}
|
|
tab->cached_pfs_batch_update= tab->pfs_batch_update();
|
|
|
|
DBUG_EXECUTE("where",
|
|
char buff[256];
|
|
String str(buff,sizeof(buff),system_charset_info);
|
|
str.length(0);
|
|
if (tab->table)
|
|
str.append(tab->table->alias);
|
|
else
|
|
str.append(STRING_WITH_LEN("<no_table_name>"));
|
|
str.append(STRING_WITH_LEN(" final_pushdown_cond"));
|
|
print_where(tab->select_cond, str.c_ptr_safe(), QT_ORDINARY););
|
|
}
|
|
uint n_top_tables= (uint)(join->join_tab_ranges.head()->end -
|
|
join->join_tab_ranges.head()->start);
|
|
|
|
join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */
|
|
|
|
/*
|
|
If a join buffer is used to join a table the ordering by an index
|
|
for the first non-constant table cannot be employed anymore.
|
|
*/
|
|
for (tab= join->join_tab + join->const_tables ;
|
|
tab != join->join_tab + n_top_tables ; tab++)
|
|
{
|
|
if (tab->use_join_cache)
|
|
{
|
|
JOIN_TAB *sort_by_tab= join->group && join->simple_group &&
|
|
join->group_list ?
|
|
join->join_tab+join->const_tables :
|
|
join->get_sort_by_join_tab();
|
|
/*
|
|
It could be that sort_by_tab==NULL, and the plan is to use filesort()
|
|
on the first table.
|
|
*/
|
|
if (join->order)
|
|
{
|
|
join->simple_order= 0;
|
|
join->need_tmp= 1;
|
|
}
|
|
|
|
if (join->group && !join->group_optimized_away)
|
|
{
|
|
join->need_tmp= 1;
|
|
join->simple_group= 0;
|
|
}
|
|
|
|
if (sort_by_tab)
|
|
{
|
|
join->need_tmp= 1;
|
|
join->simple_order= join->simple_group= 0;
|
|
if (sort_by_tab->type == JT_NEXT &&
|
|
!sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
|
|
{
|
|
sort_by_tab->type= JT_ALL;
|
|
sort_by_tab->read_first_record= join_init_read_record;
|
|
}
|
|
else if (sort_by_tab->type == JT_HASH_NEXT &&
|
|
!sort_by_tab->table->covering_keys.is_set(sort_by_tab->index))
|
|
{
|
|
sort_by_tab->type= JT_HASH;
|
|
sort_by_tab->read_first_record= join_init_read_record;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Give error if we some tables are done with a full join.
|
|
|
|
This is used by multi_table_update and multi_table_delete when running
|
|
in safe mode.
|
|
|
|
@param join Join condition
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error (full join used)
|
|
*/
|
|
|
|
bool error_if_full_join(JOIN *join)
|
|
{
|
|
for (JOIN_TAB *tab=first_top_level_tab(join, WITH_CONST_TABLES); tab;
|
|
tab= next_top_level_tab(join, tab))
|
|
{
|
|
if ((tab->type == JT_ALL || tab->type == JT_NEXT))
|
|
{
|
|
my_message(ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE,
|
|
ER_THD(join->thd,
|
|
ER_UPDATE_WITHOUT_KEY_IN_SAFE_MODE), MYF(0));
|
|
return(1);
|
|
}
|
|
}
|
|
return(0);
|
|
}
|
|
|
|
|
|
/**
|
|
build_range_rowid_filter()
|
|
|
|
Build range rowid filter. This function should only be called if
|
|
need_to_build_rowid_filter is true
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error, transaction should be rolled back
|
|
*/
|
|
|
|
bool JOIN_TAB::build_range_rowid_filter()
|
|
{
|
|
|
|
DBUG_ASSERT(need_to_build_rowid_filter && rowid_filter);
|
|
|
|
/**
|
|
The same handler object (table->file) is used to build a filter
|
|
and to perfom a primary table access (by the main query).
|
|
|
|
To estimate the time for filter building tracker should be changed
|
|
and after building of the filter has been finished it should be
|
|
switched back to the previos tracker.
|
|
*/
|
|
|
|
Exec_time_tracker *table_tracker= table->file->get_time_tracker();
|
|
Rowid_filter_tracker *rowid_tracker= rowid_filter->get_tracker();
|
|
table->file->set_time_tracker(rowid_tracker->get_time_tracker());
|
|
rowid_tracker->start_tracking(join->thd);
|
|
Rowid_filter::build_return_code build_rc= rowid_filter->build();
|
|
if (build_rc != Rowid_filter::SUCCESS)
|
|
{
|
|
/* Failed building rowid filter */
|
|
clear_range_rowid_filter();
|
|
}
|
|
need_to_build_rowid_filter= false;
|
|
rowid_tracker->stop_tracking(join->thd);
|
|
table->file->set_time_tracker(table_tracker);
|
|
return (build_rc == Rowid_filter::FATAL_ERROR);
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
Clear used rowid filter
|
|
|
|
Note that rowid_filter is allocated on mem_root and not really freed!
|
|
Only the rowid data is freed.
|
|
*/
|
|
|
|
void JOIN_TAB::clear_range_rowid_filter()
|
|
{
|
|
delete rowid_filter;
|
|
rowid_filter= 0;
|
|
need_to_build_rowid_filter= false;
|
|
range_rowid_filter_info= 0;
|
|
}
|
|
|
|
/**
|
|
cleanup JOIN_TAB.
|
|
|
|
DESCRIPTION
|
|
This is invoked when we've finished all join executions.
|
|
*/
|
|
|
|
void JOIN_TAB::cleanup()
|
|
{
|
|
DBUG_ENTER("JOIN_TAB::cleanup");
|
|
|
|
DBUG_PRINT("enter", ("tab: %p table %s.%s",
|
|
this,
|
|
(table ? table->s->db.str : "?"),
|
|
(table ? table->s->table_name.str : "?")));
|
|
delete select;
|
|
select= 0;
|
|
delete quick;
|
|
quick= 0;
|
|
if (rowid_filter)
|
|
clear_range_rowid_filter();
|
|
if (cache)
|
|
{
|
|
cache->free();
|
|
cache= 0;
|
|
}
|
|
limit= 0;
|
|
// Free select that was created for filesort outside of create_sort_index
|
|
if (filesort && filesort->select && !filesort->own_select)
|
|
delete filesort->select;
|
|
delete filesort;
|
|
filesort= NULL;
|
|
if (table)
|
|
{
|
|
table->file->ha_end_keyread();
|
|
if (type == JT_FT)
|
|
table->file->ha_ft_end();
|
|
else
|
|
table->file->ha_index_or_rnd_end();
|
|
preread_init_done= FALSE;
|
|
if (table->pos_in_table_list && table->pos_in_table_list->jtbm_subselect)
|
|
{
|
|
if (table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
/*
|
|
Set this to NULL so that cleanup_empty_jtbm_semi_joins() doesn't
|
|
attempt to make another free_tmp_table call.
|
|
*/
|
|
table->pos_in_table_list->table= NULL;
|
|
free_tmp_table(join->thd, table);
|
|
}
|
|
else
|
|
{
|
|
TABLE_LIST *tmp= table->pos_in_table_list;
|
|
end_read_record(&read_record);
|
|
tmp->jtbm_subselect->cleanup();
|
|
/*
|
|
The above call freed the materialized temptable. Set it to NULL so
|
|
that we don't attempt to touch it if JOIN_TAB::cleanup() is invoked
|
|
multiple times (it may be)
|
|
*/
|
|
tmp->table= NULL;
|
|
}
|
|
table= NULL;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
/*if (table->pos_in_table_list && table->pos_in_table_list->derived)
|
|
{
|
|
delete table->pos_in_table_list->derived->derived->dt_handler;
|
|
}*/
|
|
|
|
/*
|
|
We need to reset this for next select
|
|
(Tested in part_of_refkey)
|
|
*/
|
|
table->reginfo.join_tab= 0;
|
|
}
|
|
end_read_record(&read_record);
|
|
explain_plan= NULL;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the time to get rows of the joined table
|
|
|
|
Updates found_records, records, cached_covering_key, read_time and
|
|
cache_scan_and_compare_time
|
|
*/
|
|
|
|
void JOIN_TAB::estimate_scan_time()
|
|
{
|
|
THD *thd= join->thd;
|
|
handler *file= table->file;
|
|
double row_copy_cost, copy_cost;
|
|
ALL_READ_COST * const cost= &cached_scan_and_compare_cost;
|
|
cost->reset();
|
|
|
|
cached_covering_key= MAX_KEY;
|
|
if (table->is_created())
|
|
{
|
|
if (table->is_filled_at_execution())
|
|
{
|
|
get_delayed_table_estimates(table, &records, &read_time,
|
|
&startup_cost);
|
|
table->opt_range_condition_rows= records;
|
|
table->used_stat_records= records;
|
|
cost->row_cost.cpu= read_time;
|
|
row_copy_cost= file->ROW_COPY_COST;
|
|
}
|
|
else
|
|
{
|
|
records= table->stat_records();
|
|
/*
|
|
table->opt_range_condition_rows has already been set to
|
|
table->file->stats.records
|
|
*/
|
|
DBUG_ASSERT(table->opt_range_condition_rows == records);
|
|
|
|
if (!table->covering_keys.is_clear_all() && ! table->no_keyread)
|
|
{
|
|
cached_covering_key= find_shortest_key(table, &table->covering_keys);
|
|
cost->index_cost= file->ha_key_scan_time(cached_covering_key, records);
|
|
read_time= file->cost(cost->index_cost);
|
|
row_copy_cost= 0; // Included in ha_key_scan_time
|
|
}
|
|
else
|
|
{
|
|
cost->row_cost= file->ha_scan_time(records);
|
|
read_time= file->cost(cost->row_cost);
|
|
row_copy_cost= 0; // Included in ha_scan_time
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The following is same as calling
|
|
TABLE_SHARE::update_optimizer_costs, but without locks
|
|
*/
|
|
if (table->s->db_type() == heap_hton)
|
|
memcpy(&table->s->optimizer_costs, &heap_optimizer_costs,
|
|
sizeof(heap_optimizer_costs));
|
|
else
|
|
memcpy(&table->s->optimizer_costs, &tmp_table_optimizer_costs,
|
|
sizeof(tmp_table_optimizer_costs));
|
|
file->set_optimizer_costs(thd);
|
|
table->s->optimizer_costs_inited=1;
|
|
|
|
records= table->stat_records();
|
|
DBUG_ASSERT(table->opt_range_condition_rows == records);
|
|
cost->row_cost= table->file->ha_scan_time(MY_MAX(records, 1000));
|
|
read_time= file->cost(cost->row_cost);
|
|
row_copy_cost= table->s->optimizer_costs.row_copy_cost;
|
|
}
|
|
|
|
found_records= records;
|
|
copy_cost= (records * (row_copy_cost + WHERE_COST_THD(thd)));
|
|
cached_scan_and_compare_time= read_time + copy_cost;
|
|
cost->copy_cost+= copy_cost;
|
|
|
|
/*
|
|
Assume we only need to do physical IO once even if we scan the file
|
|
multiple times.
|
|
*/
|
|
cost->max_index_blocks= (longlong) ceil(cost->index_cost.io);
|
|
cost->max_row_blocks= (longlong) ceil(cost->row_cost.io);
|
|
DBUG_ASSERT(compare_cost(cached_scan_and_compare_time,
|
|
file->cost(cost)));
|
|
}
|
|
|
|
|
|
/**
|
|
Estimate the number of rows that an access method will read from a table.
|
|
|
|
@todo: why not use JOIN_TAB::found_records or JOIN_TAB::records_read
|
|
*/
|
|
|
|
double JOIN_TAB::get_examined_rows()
|
|
{
|
|
double examined_rows;
|
|
const SQL_SELECT *sel= get_sql_select();
|
|
|
|
if (sel && sel->quick && use_quick != 2)
|
|
{
|
|
examined_rows= (double) sel->quick->records;
|
|
DBUG_ASSERT(examined_rows == sel->quick->records);
|
|
}
|
|
else if (type == JT_NEXT || type == JT_ALL || type == JT_RANGE ||
|
|
type == JT_HASH || type == JT_HASH_NEXT)
|
|
{
|
|
if (limit)
|
|
{
|
|
/*
|
|
@todo This estimate is wrong, a LIMIT query may examine much more rows
|
|
than the LIMIT itself.
|
|
*/
|
|
examined_rows= (double)limit;
|
|
}
|
|
else
|
|
{
|
|
if (table->is_filled_at_execution())
|
|
examined_rows= (double)records;
|
|
else
|
|
{
|
|
/*
|
|
handler->info(HA_STATUS_VARIABLE) has been called in
|
|
make_join_statistics()
|
|
*/
|
|
examined_rows= (double)table->stat_records();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
examined_rows= records_init;
|
|
|
|
if (examined_rows >= (double) HA_ROWS_MAX)
|
|
return (double) HA_ROWS_MAX;
|
|
return examined_rows;
|
|
}
|
|
|
|
|
|
/**
|
|
Initialize the join_tab before reading.
|
|
Currently only derived table/view materialization is done here.
|
|
|
|
TODO: consider moving this together with join_tab_execution_startup
|
|
*/
|
|
|
|
bool JOIN_TAB::preread_init()
|
|
{
|
|
TABLE_LIST *derived= table->pos_in_table_list;
|
|
DBUG_ENTER("JOIN_TAB::preread_init");
|
|
|
|
if (!derived || !derived->is_materialized_derived())
|
|
{
|
|
preread_init_done= TRUE;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
/* Materialize derived table/view. */
|
|
if ((!derived->get_unit()->executed ||
|
|
derived->is_recursive_with_table() ||
|
|
derived->get_unit()->uncacheable) &&
|
|
mysql_handle_single_derived(join->thd->lex,
|
|
derived, DT_CREATE | DT_FILL))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
if (!(derived->get_unit()->uncacheable & UNCACHEABLE_DEPENDENT) ||
|
|
derived->is_nonrecursive_derived_with_rec_ref() ||
|
|
is_split_derived)
|
|
preread_init_done= TRUE;
|
|
if (select && select->quick)
|
|
select->quick->replace_handler(table->file);
|
|
|
|
DBUG_EXECUTE_IF("show_explain_probe_join_tab_preread",
|
|
if (dbug_user_var_equals_int(join->thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(join->thd, 1);
|
|
);
|
|
|
|
/* init ftfuns for just initialized derived table */
|
|
if (table->fulltext_searched)
|
|
if (init_ftfuncs(join->thd, join->select_lex, MY_TEST(join->order)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
pfs_batch_update()
|
|
|
|
Check if the used table will do a lot of read calls in a row without
|
|
any intervening read calls to any other tables.
|
|
|
|
@return 0 No
|
|
@return 1 Yes
|
|
|
|
If yes, then the handler will be informed about this with the
|
|
start_psi_batch_mode() / end_psi_batch_mode() calls
|
|
|
|
This is currently used only to speed up performance schema code for
|
|
multiple reads.
|
|
|
|
In the future we may also inform the engine about this. The engine
|
|
could use this information to cache the used pages, keep blocks
|
|
locked in the page cache and similar things to speed up repeated
|
|
reads.
|
|
|
|
The return value of this function is cached in
|
|
JOIN_TAB::cached_pfs_batch_update
|
|
*/
|
|
|
|
bool JOIN_TAB::pfs_batch_update()
|
|
{
|
|
/*
|
|
Use PFS batch mode if
|
|
1. tab is an inner-most table, or
|
|
2. will read more than one row (not eq_ref or const access type)
|
|
3. no subqueries
|
|
*/
|
|
|
|
return join->join_tab + join->table_count - 1 == this && // 1
|
|
type != JT_EQ_REF && type != JT_CONST && type != JT_SYSTEM && // 2
|
|
(!select_cond || !select_cond->with_subquery()); // 3
|
|
}
|
|
|
|
|
|
/**
|
|
Build a TABLE_REF structure for index lookup in the temporary table
|
|
|
|
@param thd Thread handle
|
|
@param tmp_key The temporary table key
|
|
@param it The iterator of items for lookup in the key
|
|
@param skip Number of fields from the beginning to skip
|
|
|
|
@details
|
|
Build TABLE_REF object for lookup in the key 'tmp_key' using items
|
|
accessible via item iterator 'it'.
|
|
|
|
@retval TRUE Error
|
|
@retval FALSE OK
|
|
*/
|
|
|
|
bool TABLE_REF::tmp_table_index_lookup_init(THD *thd,
|
|
KEY *tmp_key,
|
|
Item_iterator &it,
|
|
bool value,
|
|
uint skip)
|
|
{
|
|
uint tmp_key_parts= tmp_key->user_defined_key_parts;
|
|
uint i;
|
|
DBUG_ENTER("TABLE_REF::tmp_table_index_lookup_init");
|
|
|
|
key= 0; /* The only temp table index. */
|
|
key_length= tmp_key->key_length;
|
|
if (!(key_buff= thd->calloc<uchar>(ALIGN_SIZE(tmp_key->key_length) * 2)) ||
|
|
!(key_copy= thd->alloc<store_key*>(tmp_key_parts + 1)) ||
|
|
!(items= thd->alloc<Item*>(tmp_key_parts)))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
key_buff2= key_buff + ALIGN_SIZE(tmp_key->key_length);
|
|
|
|
KEY_PART_INFO *cur_key_part= tmp_key->key_part;
|
|
store_key **ref_key= key_copy;
|
|
uchar *cur_ref_buff= key_buff;
|
|
|
|
it.open();
|
|
for (i= 0; i < skip; i++) it.next();
|
|
for (i= 0; i < tmp_key_parts; i++, cur_key_part++, ref_key++)
|
|
{
|
|
Item *item= it.next();
|
|
DBUG_ASSERT(item);
|
|
items[i]= item;
|
|
int null_count= MY_TEST(cur_key_part->field->real_maybe_null());
|
|
*ref_key= new store_key_item(thd, cur_key_part->field,
|
|
/* TIMOUR:
|
|
the NULL byte is taken into account in
|
|
cur_key_part->store_length, so instead of
|
|
cur_ref_buff + MY_TEST(maybe_null), we could
|
|
use that information instead.
|
|
*/
|
|
cur_ref_buff + null_count,
|
|
null_count ? cur_ref_buff : 0,
|
|
cur_key_part->length, items[i], value);
|
|
cur_ref_buff+= cur_key_part->store_length;
|
|
}
|
|
*ref_key= NULL; /* End marker. */
|
|
key_err= 1;
|
|
key_parts= tmp_key_parts;
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
Check if ref access uses "Full scan on NULL key" (i.e. it actually alternates
|
|
between ref access and full table scan)
|
|
*/
|
|
|
|
bool TABLE_REF::is_access_triggered()
|
|
{
|
|
for (uint i = 0; i < key_parts; i++)
|
|
{
|
|
if (cond_guards[i])
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Partially cleanup JOIN after it has executed: close index or rnd read
|
|
(table cursors), free quick selects.
|
|
|
|
This function is called in the end of execution of a JOIN, before the used
|
|
tables are unlocked and closed.
|
|
|
|
For a join that is resolved using a temporary table, the first sweep is
|
|
performed against actual tables and an intermediate result is inserted
|
|
into the temprorary table.
|
|
The last sweep is performed against the temporary table. Therefore,
|
|
the base tables and associated buffers used to fill the temporary table
|
|
are no longer needed, and this function is called to free them.
|
|
|
|
For a join that is performed without a temporary table, this function
|
|
is called after all rows are sent, but before EOF packet is sent.
|
|
|
|
For a simple SELECT with no subqueries this function performs a full
|
|
cleanup of the JOIN and calls mysql_unlock_read_tables to free used base
|
|
tables.
|
|
|
|
If a JOIN is executed for a subquery or if it has a subquery, we can't
|
|
do the full cleanup and need to do a partial cleanup only.
|
|
- If a JOIN is not the top level join, we must not unlock the tables
|
|
because the outer select may not have been evaluated yet, and we
|
|
can't unlock only selected tables of a query.
|
|
- Additionally, if this JOIN corresponds to a correlated subquery, we
|
|
should not free quick selects and join buffers because they will be
|
|
needed for the next execution of the correlated subquery.
|
|
- However, if this is a JOIN for a [sub]select, which is not
|
|
a correlated subquery itself, but has subqueries, we can free it
|
|
fully and also free JOINs of all its subqueries. The exception
|
|
is a subquery in SELECT list, e.g: @n
|
|
SELECT a, (select MY_MAX(b) from t1) group by c @n
|
|
This subquery will not be evaluated at first sweep and its value will
|
|
not be inserted into the temporary table. Instead, it's evaluated
|
|
when selecting from the temporary table. Therefore, it can't be freed
|
|
here even though it's not correlated.
|
|
|
|
@todo
|
|
Unlock tables even if the join isn't top level select in the tree
|
|
*/
|
|
|
|
void JOIN::join_free()
|
|
{
|
|
SELECT_LEX_UNIT *tmp_unit;
|
|
SELECT_LEX *sl;
|
|
/*
|
|
Optimization: if not EXPLAIN and we are done with the JOIN,
|
|
free all tables.
|
|
*/
|
|
bool full= !(select_lex->uncacheable) && !(thd->lex->describe);
|
|
bool can_unlock= full;
|
|
DBUG_ENTER("JOIN::join_free");
|
|
|
|
cleanup(full);
|
|
|
|
for (tmp_unit= select_lex->first_inner_unit();
|
|
tmp_unit;
|
|
tmp_unit= tmp_unit->next_unit())
|
|
{
|
|
if (tmp_unit->with_element && tmp_unit->with_element->is_recursive)
|
|
continue;
|
|
for (sl= tmp_unit->first_select(); sl; sl= sl->next_select())
|
|
{
|
|
Item_subselect *subselect= sl->master_unit()->item;
|
|
bool full_local= full && (!subselect || subselect->is_evaluated());
|
|
/*
|
|
If this join is evaluated, we can fully clean it up and clean up all
|
|
its underlying joins even if they are correlated -- they will not be
|
|
used any more anyway.
|
|
If this join is not yet evaluated, we still must clean it up to
|
|
close its table cursors -- it may never get evaluated, as in case of
|
|
... HAVING FALSE OR a IN (SELECT ...))
|
|
but all table cursors must be closed before the unlock.
|
|
*/
|
|
sl->cleanup_all_joins(full_local);
|
|
/* Can't unlock if at least one JOIN is still needed */
|
|
can_unlock= can_unlock && full_local;
|
|
}
|
|
}
|
|
/*
|
|
We are not using tables anymore
|
|
Unlock all tables. We may be in an INSERT .... SELECT statement.
|
|
*/
|
|
if (can_unlock && lock && thd->lock && ! thd->locked_tables_mode &&
|
|
!(select_options & SELECT_NO_UNLOCK) &&
|
|
!select_lex->subquery_in_having &&
|
|
(select_lex == (thd->lex->unit.fake_select_lex ?
|
|
thd->lex->unit.fake_select_lex :
|
|
thd->lex->first_select_lex())))
|
|
{
|
|
/*
|
|
TODO: unlock tables even if the join isn't top level select in the
|
|
tree.
|
|
*/
|
|
mysql_unlock_read_tables(thd, lock); // Don't free join->lock
|
|
lock= 0;
|
|
}
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
Free resources of given join.
|
|
|
|
@param full true if we should free all resources, call with full==1
|
|
should be last, before it this function can be called with
|
|
full==0
|
|
|
|
@note
|
|
With subquery this function definitely will be called several times,
|
|
but even for simple query it can be called several times.
|
|
*/
|
|
|
|
void JOIN::cleanup(bool full)
|
|
{
|
|
DBUG_ENTER("JOIN::cleanup");
|
|
DBUG_PRINT("enter", ("select: %d (%p) join: %p full: %u",
|
|
select_lex->select_number, select_lex, this,
|
|
(uint) full));
|
|
|
|
if (full)
|
|
have_query_plan= QEP_DELETED;
|
|
|
|
if (original_join_tab)
|
|
{
|
|
/* Free the original optimized join created for the group_by_handler */
|
|
join_tab= original_join_tab;
|
|
original_join_tab= 0;
|
|
}
|
|
|
|
if (join_tab)
|
|
{
|
|
JOIN_TAB *tab;
|
|
|
|
if (full)
|
|
{
|
|
/*
|
|
Call cleanup() on join tabs used by the join optimization
|
|
(join->join_tab may now be pointing to result of make_simple_join
|
|
reading from the temporary table)
|
|
|
|
We also need to check table_count to handle various degenerate joins
|
|
w/o tables: they don't have some members initialized and
|
|
WALK_OPTIMIZATION_TABS may not work correctly for them.
|
|
*/
|
|
if (top_join_tab_count && tables_list)
|
|
{
|
|
for (tab= first_breadth_first_tab(); tab;
|
|
tab= next_breadth_first_tab(first_breadth_first_tab(),
|
|
top_join_tab_count, tab))
|
|
{
|
|
tab->cleanup();
|
|
delete tab->filesort_result;
|
|
tab->filesort_result= NULL;
|
|
}
|
|
}
|
|
cleaned= true;
|
|
//psergey2: added (Q: why not in the above loop?)
|
|
{
|
|
JOIN_TAB *curr_tab= join_tab + exec_join_tab_cnt();
|
|
for (uint i= 0; i < aggr_tables; i++, curr_tab++)
|
|
{
|
|
if (curr_tab->aggr)
|
|
{
|
|
free_tmp_table(thd, curr_tab->table);
|
|
curr_tab->table= NULL;
|
|
delete curr_tab->tmp_table_param;
|
|
curr_tab->tmp_table_param= NULL;
|
|
curr_tab->aggr= NULL;
|
|
|
|
delete curr_tab->filesort_result;
|
|
curr_tab->filesort_result= NULL;
|
|
}
|
|
}
|
|
aggr_tables= 0; // psergey3
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITH_CONST_TABLES); tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
tab->partial_cleanup();
|
|
}
|
|
}
|
|
}
|
|
if (full)
|
|
{
|
|
cleanup_empty_jtbm_semi_joins(this, join_list);
|
|
|
|
// Run Cached_item DTORs!
|
|
group_fields.delete_elements();
|
|
order_fields.delete_elements();
|
|
|
|
/*
|
|
We can't call delete_elements() on copy_funcs as this will cause
|
|
problems in free_elements() as some of the elements are then deleted.
|
|
*/
|
|
tmp_table_param.copy_funcs.empty();
|
|
/*
|
|
If we have tmp_join and 'this' JOIN is not tmp_join and
|
|
tmp_table_param.copy_field's of them are equal then we have to remove
|
|
pointer to tmp_table_param.copy_field from tmp_join, because it will
|
|
be removed in tmp_table_param.cleanup().
|
|
*/
|
|
tmp_table_param.cleanup();
|
|
|
|
delete pushdown_query;
|
|
pushdown_query= 0;
|
|
|
|
if (!join_tab)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table_ref;
|
|
while ((table_ref= li++))
|
|
{
|
|
if (table_ref->table &&
|
|
table_ref->jtbm_subselect &&
|
|
table_ref->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
free_tmp_table(thd, table_ref->table);
|
|
table_ref->table= NULL;
|
|
}
|
|
}
|
|
}
|
|
free_pushdown_handlers(*join_list);
|
|
}
|
|
/* Restore ref array to original state */
|
|
if (current_ref_ptrs != items0)
|
|
{
|
|
set_items_ref_array(items0);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
Clean up all derived pushdown handlers in this join.
|
|
|
|
@detail
|
|
Note that dt_handler is picked at the prepare stage (as opposed
|
|
to optimization stage where one could expect this).
|
|
Because of that, we have to do cleanups in this function that is called
|
|
from JOIN::cleanup() and not in JOIN_TAB::cleanup.
|
|
*/
|
|
void JOIN::free_pushdown_handlers(List<TABLE_LIST>& join_list)
|
|
{
|
|
List_iterator<TABLE_LIST> li(join_list);
|
|
TABLE_LIST *table_ref;
|
|
while ((table_ref= li++))
|
|
{
|
|
if (table_ref->nested_join)
|
|
free_pushdown_handlers(table_ref->nested_join->join_list);
|
|
if (table_ref->pushdown_derived)
|
|
{
|
|
delete table_ref->pushdown_derived;
|
|
table_ref->pushdown_derived= NULL;
|
|
}
|
|
delete table_ref->dt_handler;
|
|
table_ref->dt_handler= NULL;
|
|
}
|
|
}
|
|
|
|
/**
|
|
Remove the following expressions from ORDER BY and GROUP BY:
|
|
Constant expressions @n
|
|
Expression that only uses tables that are of type EQ_REF and the reference
|
|
is in the ORDER list or if all refereed tables are of the above type.
|
|
|
|
In the following, the X field can be removed:
|
|
@code
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t1.a,t2.X
|
|
SELECT * FROM t1,t2,t3 WHERE t1.a=t2.a AND t2.b=t3.b ORDER BY t1.a,t3.X
|
|
@endcode
|
|
|
|
These can't be optimized:
|
|
@code
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.X,t1.a
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a AND t1.b=t2.b ORDER BY t1.a,t2.c
|
|
SELECT * FROM t1,t2 WHERE t1.a=t2.a ORDER BY t2.b,t1.a
|
|
@endcode
|
|
|
|
TODO: this function checks ORDER::used, which can only have a value of 0.
|
|
*/
|
|
|
|
static bool
|
|
eq_ref_table(JOIN *join, ORDER *start_order, JOIN_TAB *tab)
|
|
{
|
|
if (tab->cached_eq_ref_table) // If cached
|
|
return tab->eq_ref_table;
|
|
tab->cached_eq_ref_table=1;
|
|
/* We can skip const tables only if not an outer table */
|
|
if (tab->type == JT_CONST && !tab->first_inner)
|
|
return (tab->eq_ref_table=1); /* purecov: inspected */
|
|
if (tab->type != JT_EQ_REF || tab->table->maybe_null)
|
|
return (tab->eq_ref_table=0); // We must use this
|
|
Item **ref_item=tab->ref.items;
|
|
Item **end=ref_item+tab->ref.key_parts;
|
|
uint found=0;
|
|
table_map map=tab->table->map;
|
|
|
|
for (; ref_item != end ; ref_item++)
|
|
{
|
|
if (! (*ref_item)->const_item())
|
|
{ // Not a const ref
|
|
ORDER *order;
|
|
for (order=start_order ; order ; order=order->next)
|
|
{
|
|
if ((*ref_item)->eq(order->item[0],0))
|
|
break;
|
|
}
|
|
if (order)
|
|
{
|
|
if (!(order->used & map))
|
|
{
|
|
found++;
|
|
order->used|= map;
|
|
}
|
|
continue; // Used in ORDER BY
|
|
}
|
|
if (!only_eq_ref_tables(join,start_order, (*ref_item)->used_tables()))
|
|
return (tab->eq_ref_table=0);
|
|
}
|
|
}
|
|
/* Check that there was no reference to table before sort order */
|
|
for (; found && start_order ; start_order=start_order->next)
|
|
{
|
|
if (start_order->used & map)
|
|
{
|
|
found--;
|
|
continue;
|
|
}
|
|
if (start_order->depend_map & map)
|
|
return (tab->eq_ref_table=0);
|
|
}
|
|
return tab->eq_ref_table=1;
|
|
}
|
|
|
|
|
|
static bool
|
|
only_eq_ref_tables(JOIN *join,ORDER *order,table_map tables)
|
|
{
|
|
tables&= ~PSEUDO_TABLE_BITS;
|
|
for (JOIN_TAB **tab=join->map2table ; tables ; tab++, tables>>=1)
|
|
{
|
|
if (tables & 1 && !eq_ref_table(join, order, *tab))
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
/** Update the dependency map for the tables. */
|
|
|
|
static void update_depend_map(JOIN *join)
|
|
{
|
|
JOIN_TAB *join_tab;
|
|
for (join_tab= first_linear_tab(join, WITH_BUSH_ROOTS, WITH_CONST_TABLES);
|
|
join_tab;
|
|
join_tab= next_linear_tab(join, join_tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE_REF *ref= &join_tab->ref;
|
|
table_map depend_map=0;
|
|
Item **item=ref->items;
|
|
uint i;
|
|
for (i=0 ; i < ref->key_parts ; i++,item++)
|
|
depend_map|=(*item)->used_tables();
|
|
depend_map&= ~OUTER_REF_TABLE_BIT;
|
|
ref->depend_map= depend_map;
|
|
for (JOIN_TAB **tab=join->map2table;
|
|
depend_map ;
|
|
tab++,depend_map>>=1 )
|
|
{
|
|
if (depend_map & 1)
|
|
ref->depend_map|=(*tab)->ref.depend_map;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/** Update the dependency map for the sort order. */
|
|
|
|
static void update_depend_map_for_order(JOIN *join, ORDER *order)
|
|
{
|
|
for (; order ; order=order->next)
|
|
{
|
|
table_map depend_map;
|
|
order->item[0]->update_used_tables();
|
|
order->depend_map=depend_map=order->item[0]->used_tables();
|
|
order->used= 0;
|
|
// Not item_sum(), RAND() and no reference to table outside of sub select
|
|
if (!(order->depend_map & (OUTER_REF_TABLE_BIT | RAND_TABLE_BIT))
|
|
&& !order->item[0]->with_sum_func() &&
|
|
join->join_tab)
|
|
{
|
|
for (JOIN_TAB **tab=join->map2table;
|
|
depend_map ;
|
|
tab++, depend_map>>=1)
|
|
{
|
|
if (depend_map & 1)
|
|
order->depend_map|=(*tab)->ref.depend_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Remove all constants from ORDER and check if ORDER only contains simple
|
|
expressions.
|
|
|
|
We also remove all duplicate expressions, keeping only the first one.
|
|
|
|
simple_order is set to 1 if sort_order only uses fields from head table
|
|
and the head table is not a LEFT JOIN table.
|
|
|
|
@param join Join handler
|
|
@param first_order List of SORT or GROUP order
|
|
@param cond WHERE statement
|
|
@param change_list Set to 1 if we should remove things from list.
|
|
If this is not set, then only simple_order is
|
|
calculated. This is not set when we
|
|
are using ROLLUP
|
|
@param simple_order Set to 1 if we are only using simple
|
|
expressions.
|
|
|
|
@return
|
|
Returns new sort order
|
|
*/
|
|
|
|
static ORDER *
|
|
remove_const(JOIN *join,ORDER *first_order, COND *cond,
|
|
bool change_list, bool *simple_order)
|
|
{
|
|
/*
|
|
We can't do ORDER BY using filesort if the select list contains a non
|
|
deterministic value like RAND() or ROWNUM().
|
|
For example:
|
|
SELECT a,ROWNUM() FROM t1 ORDER BY a;
|
|
|
|
If we would first sort the table 't1', the ROWNUM() column would be
|
|
generated during end_send() and the order would be wrong.
|
|
|
|
Previously we had here also a test of ROLLUP:
|
|
'join->rollup.state == ROLLUP::STATE_NONE'
|
|
|
|
I deleted this because the ROLLUP was never enforced because of a
|
|
bug where the inital value of simple_order was ignored. Having
|
|
ROLLUP tested now when the code is fixed, causes many test failure
|
|
and some wrong results so better to leave the code as it was
|
|
related to ROLLUP.
|
|
*/
|
|
*simple_order= !join->select_lex->rownum_in_field_list;
|
|
if (join->only_const_tables())
|
|
return change_list ? 0 : first_order; // No need to sort
|
|
|
|
ORDER *order,**prev_ptr, *tmp_order;
|
|
table_map UNINIT_VAR(first_table); /* protected by first_is_base_table */
|
|
table_map not_const_tables= ~join->const_table_map;
|
|
table_map ref;
|
|
bool first_is_base_table= FALSE;
|
|
DBUG_ENTER("remove_const");
|
|
|
|
/*
|
|
Join tab is set after make_join_statistics() has been called.
|
|
In case of one table with GROUP BY this function is called before
|
|
join_tab is set for the GROUP_BY expression
|
|
*/
|
|
if (join->join_tab)
|
|
{
|
|
if (join->join_tab[join->const_tables].table)
|
|
{
|
|
first_table= join->join_tab[join->const_tables].table->map;
|
|
first_is_base_table= TRUE;
|
|
}
|
|
|
|
/*
|
|
Cleanup to avoid interference of calls of this function for
|
|
ORDER BY and GROUP BY
|
|
*/
|
|
for (JOIN_TAB *tab= join->join_tab + join->const_tables;
|
|
tab < join->join_tab + join->top_join_tab_count;
|
|
tab++)
|
|
tab->cached_eq_ref_table= FALSE;
|
|
|
|
JOIN_TAB *head= join->join_tab + join->const_tables;
|
|
*simple_order&= head->on_expr_ref[0] == NULL;
|
|
if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
|
|
{
|
|
uint u1, u2, u3, u4;
|
|
/*
|
|
normally the condition is (see filesort_use_addons())
|
|
|
|
length + sortlength <= max_length_for_sort_data
|
|
|
|
but for HA_SLOW_RND_POS tables we relax it a bit, as the alternative
|
|
is to use a temporary table, which is rather expensive.
|
|
|
|
TODO proper cost estimations
|
|
*/
|
|
*simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3, &u4);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
first_is_base_table= FALSE;
|
|
first_table= 0; // Not used, for gcc
|
|
}
|
|
|
|
prev_ptr= &first_order;
|
|
|
|
/* NOTE: A variable of not_const_tables ^ first_table; breaks gcc 2.7 */
|
|
|
|
update_depend_map_for_order(join, first_order);
|
|
for (order=first_order; order ; order=order->next)
|
|
{
|
|
table_map order_tables=order->item[0]->used_tables();
|
|
if (order->item[0]->with_sum_func() ||
|
|
order->item[0]->with_window_func() ||
|
|
/*
|
|
If the outer table of an outer join is const (either by itself or
|
|
after applying WHERE condition), grouping on a field from such a
|
|
table will be optimized away and filesort without temporary table
|
|
will be used unless we prevent that now. Filesort is not fit to
|
|
handle joins and the join condition is not applied. We can't detect
|
|
the case without an expensive test, however, so we force temporary
|
|
table for all queries containing more than one table, ROLLUP, and an
|
|
outer join.
|
|
*/
|
|
(join->table_count > 1 && join->rollup.state == ROLLUP::STATE_INITED &&
|
|
join->outer_join))
|
|
*simple_order=0; // Must do a temp table to sort
|
|
else if (!(order_tables & not_const_tables))
|
|
{
|
|
if (order->item[0]->with_subquery())
|
|
{
|
|
/*
|
|
Delay the evaluation of constant ORDER and/or GROUP expressions that
|
|
contain subqueries until the execution phase.
|
|
*/
|
|
join->exec_const_order_group_cond.push_back(order->item[0],
|
|
join->thd->mem_root);
|
|
}
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
if (order_tables & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT))
|
|
*simple_order=0;
|
|
else
|
|
{
|
|
if (cond && const_expression_in_where(cond,order->item[0]))
|
|
{
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
if (first_is_base_table &&
|
|
(ref=order_tables & (not_const_tables ^ first_table)))
|
|
{
|
|
if (!(order_tables & first_table) &&
|
|
only_eq_ref_tables(join,first_order, ref))
|
|
{
|
|
DBUG_PRINT("info",("removing: %s", order->item[0]->full_name()));
|
|
continue;
|
|
}
|
|
/*
|
|
UseMultipleEqualitiesToRemoveTempTable:
|
|
Can use multiple-equalities here to check that ORDER BY columns
|
|
can be used without tmp. table.
|
|
*/
|
|
bool can_subst_to_first_table= false;
|
|
if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) &&
|
|
first_is_base_table &&
|
|
order->item[0]->real_item()->type() == Item::FIELD_ITEM &&
|
|
join->cond_equal)
|
|
{
|
|
table_map first_table_bit=
|
|
join->join_tab[join->const_tables].table->map;
|
|
|
|
Item *item= order->item[0];
|
|
|
|
/*
|
|
TODO: equality substitution in the context of ORDER BY is
|
|
sometimes allowed when it is not allowed in the general case.
|
|
|
|
We make the below call for its side effect: it will locate the
|
|
multiple equality the item belongs to and set item->item_equal
|
|
accordingly.
|
|
*/
|
|
Item *res= item->propagate_equal_fields(join->thd,
|
|
Value_source::
|
|
Context_identity(),
|
|
join->cond_equal);
|
|
Item_equal *item_eq;
|
|
if ((item_eq= res->get_item_equal()))
|
|
{
|
|
Item *first= item_eq->get_first(NO_PARTICULAR_TAB, NULL);
|
|
if (first->const_item() || first->used_tables() ==
|
|
first_table_bit)
|
|
{
|
|
can_subst_to_first_table= true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!can_subst_to_first_table)
|
|
{
|
|
*simple_order=0; // Must do a temp table to sort
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/* Remove ORDER BY entries that we have seen before */
|
|
for (tmp_order= first_order;
|
|
tmp_order != order;
|
|
tmp_order= tmp_order->next)
|
|
{
|
|
if (tmp_order->item[0]->eq(order->item[0],1))
|
|
break;
|
|
}
|
|
if (tmp_order != order)
|
|
continue; // Duplicate order by. Remove
|
|
|
|
if (change_list)
|
|
*prev_ptr= order; // use this entry
|
|
prev_ptr= &order->next;
|
|
}
|
|
if (change_list)
|
|
*prev_ptr=0;
|
|
if (prev_ptr == &first_order) // Nothing to sort/group
|
|
*simple_order=1;
|
|
#ifndef DBUG_OFF
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_PRINT("error",("Error from remove_const"));
|
|
#endif
|
|
DBUG_PRINT("exit",("simple_order: %d",(int) *simple_order));
|
|
DBUG_RETURN(first_order);
|
|
}
|
|
|
|
|
|
/**
|
|
Filter out ORDER items those are equal to constants in WHERE
|
|
|
|
This function is a limited version of remove_const() for use
|
|
with non-JOIN statements (i.e. single-table UPDATE and DELETE).
|
|
|
|
|
|
@param order Linked list of ORDER BY arguments
|
|
@param cond WHERE expression
|
|
|
|
@return pointer to new filtered ORDER list or NULL if whole list eliminated
|
|
|
|
@note
|
|
This function overwrites input order list.
|
|
*/
|
|
|
|
ORDER *simple_remove_const(ORDER *order, COND *where)
|
|
{
|
|
if (!order || !where)
|
|
return order;
|
|
|
|
ORDER *first= NULL, *prev= NULL;
|
|
for (; order; order= order->next)
|
|
{
|
|
DBUG_ASSERT(!order->item[0]->with_sum_func()); // should never happen
|
|
if (!const_expression_in_where(where, order->item[0]))
|
|
{
|
|
if (!first)
|
|
first= order;
|
|
if (prev)
|
|
prev->next= order;
|
|
prev= order;
|
|
}
|
|
}
|
|
if (prev)
|
|
prev->next= NULL;
|
|
return first;
|
|
}
|
|
|
|
|
|
/*
|
|
Set all fields in the table to have a null value
|
|
|
|
@param tables Table list
|
|
*/
|
|
|
|
static void make_tables_null_complemented(List<TABLE_LIST> *tables)
|
|
{
|
|
List_iterator<TABLE_LIST> ti(*tables);
|
|
TABLE_LIST *table;
|
|
while ((table= ti++))
|
|
{
|
|
/*
|
|
Don't touch semi-join materialization tables, as the a join_free()
|
|
call may have freed them (and HAVING clause can't have references to
|
|
them anyway).
|
|
*/
|
|
if (!table->is_jtbm())
|
|
{
|
|
TABLE *tbl= table->table;
|
|
mark_as_null_row(tbl); // Set fields to NULL
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static int
|
|
return_zero_rows(JOIN *join, select_result *result, List<TABLE_LIST> *tables,
|
|
List<Item> *fields, bool send_row, ulonglong select_options,
|
|
const char *info, Item *having, List<Item> *all_fields)
|
|
{
|
|
DBUG_ENTER("return_zero_rows");
|
|
|
|
if (select_options & SELECT_DESCRIBE)
|
|
{
|
|
select_describe(join, FALSE, FALSE, FALSE, info);
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (send_row)
|
|
{
|
|
/*
|
|
Set all tables to have NULL row. This is needed as we will be evaluating
|
|
HAVING condition.
|
|
*/
|
|
make_tables_null_complemented(tables);
|
|
|
|
List_iterator_fast<Item> it(*all_fields);
|
|
Item *item;
|
|
/*
|
|
Inform all items (especially aggregating) to calculate HAVING correctly,
|
|
also we will need it for sending results.
|
|
*/
|
|
join->no_rows_in_result_called= 1;
|
|
while ((item= it++))
|
|
item->no_rows_in_result();
|
|
if (having && having->val_int() == 0)
|
|
send_row=0;
|
|
}
|
|
|
|
/* Update results for FOUND_ROWS */
|
|
if (!join->send_row_on_empty_set())
|
|
{
|
|
join->thd->limit_found_rows= 0;
|
|
}
|
|
|
|
if (!(result->send_result_set_metadata(*fields,
|
|
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF)))
|
|
{
|
|
bool send_error= FALSE;
|
|
if (send_row)
|
|
send_error= result->send_data_with_check(*fields, join->unit, 0) > 0;
|
|
if (likely(!send_error))
|
|
result->send_eof(); // Should be safe
|
|
}
|
|
/*
|
|
JOIN::join_free() must be called after the virtual method
|
|
select::send_result_set_metadata() returned control since
|
|
implementation of this method could use data strutcures
|
|
that are released by the method JOIN::join_free().
|
|
*/
|
|
join->join_free();
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Reset table rows to contain a null-complement row (all fields are null)
|
|
|
|
Used only in JOIN::clear() and in do_select() if there where no matching rows.
|
|
|
|
@param join JOIN
|
|
@param cleared_tables Used to mark all cleared tables in the map. Needed for
|
|
unclear_tables() to know which tables to restore to
|
|
their original state.
|
|
*/
|
|
|
|
static void clear_tables(JOIN *join, table_map *cleared_tables)
|
|
{
|
|
DBUG_ASSERT(cleared_tables);
|
|
for (uint i= 0 ; i < join->table_count ; i++)
|
|
{
|
|
TABLE *table= join->table[i];
|
|
|
|
if (table->null_row)
|
|
continue; // Nothing more to do
|
|
(*cleared_tables)|= (((table_map) 1) << i);
|
|
if (table->s->null_bytes)
|
|
{
|
|
/*
|
|
Remember null bits for the record so that we can restore the
|
|
original const record in unclear_tables()
|
|
*/
|
|
memcpy(table->record[1], table->null_flags, table->s->null_bytes);
|
|
}
|
|
mark_as_null_row(table); // All fields are NULL
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reverse null marking for tables and restore null bits.
|
|
This return the tables to the state of before clear_tables().
|
|
|
|
We have to do this because the tables may be re-used in a sub query
|
|
and the subquery will assume that the const tables contains the original
|
|
data before clear_tables().
|
|
*/
|
|
|
|
static void unclear_tables(JOIN *join, table_map *cleared_tables)
|
|
{
|
|
for (uint i= 0 ; i < join->table_count ; i++)
|
|
{
|
|
if ((*cleared_tables) & (((table_map) 1) << i))
|
|
{
|
|
TABLE *table= join->table[i];
|
|
if (table->s->null_bytes)
|
|
memcpy(table->null_flags, table->record[1], table->s->null_bytes);
|
|
unmark_as_null_row(table);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Make som simple condition optimization:
|
|
If there is a test 'field = const' change all refs to 'field' to 'const'
|
|
Remove all dummy tests 'item = item', 'const op const'.
|
|
Remove all 'item is NULL', when item can never be null!
|
|
item->marker should be 0 for all items on entry
|
|
Return in cond_value FALSE if condition is impossible (1 = 2)
|
|
*****************************************************************************/
|
|
|
|
class COND_CMP :public ilink {
|
|
public:
|
|
static void *operator new(size_t size, MEM_ROOT *mem_root)
|
|
{
|
|
return alloc_root(mem_root, size);
|
|
}
|
|
static void operator delete(void *ptr __attribute__((unused)),
|
|
size_t size __attribute__((unused)))
|
|
{ TRASH_FREE(ptr, size); }
|
|
|
|
static void operator delete(void *, MEM_ROOT*) {}
|
|
|
|
Item *and_level;
|
|
Item_bool_func2 *cmp_func;
|
|
COND_CMP(Item *a,Item_bool_func2 *b) :and_level(a),cmp_func(b) {}
|
|
};
|
|
|
|
/**
|
|
Find the multiple equality predicate containing a field.
|
|
|
|
The function retrieves the multiple equalities accessed through
|
|
the con_equal structure from current level and up looking for
|
|
an equality containing field. It stops retrieval as soon as the equality
|
|
is found and set up inherited_fl to TRUE if it's found on upper levels.
|
|
|
|
@param cond_equal multiple equalities to search in
|
|
@param field field to look for
|
|
@param[out] inherited_fl set up to TRUE if multiple equality is found
|
|
on upper levels (not on current level of
|
|
cond_equal)
|
|
|
|
@return
|
|
- Item_equal for the found multiple equality predicate if a success;
|
|
- NULL otherwise.
|
|
*/
|
|
|
|
Item_equal *find_item_equal(COND_EQUAL *cond_equal, Field *field,
|
|
bool *inherited_fl)
|
|
{
|
|
Item_equal *item= 0;
|
|
bool in_upper_level= FALSE;
|
|
while (cond_equal)
|
|
{
|
|
List_iterator_fast<Item_equal> li(cond_equal->current_level);
|
|
while ((item= li++))
|
|
{
|
|
if (item->contains(field))
|
|
goto finish;
|
|
}
|
|
in_upper_level= TRUE;
|
|
cond_equal= cond_equal->upper_levels;
|
|
}
|
|
in_upper_level= FALSE;
|
|
finish:
|
|
*inherited_fl= in_upper_level;
|
|
return item;
|
|
}
|
|
|
|
|
|
/**
|
|
Check whether an equality can be used to build multiple equalities.
|
|
|
|
This function first checks whether the equality (left_item=right_item)
|
|
is a simple equality i.e. the one that equates a field with another field
|
|
or a constant (field=field_item or field=const_item).
|
|
If this is the case the function looks for a multiple equality
|
|
in the lists referenced directly or indirectly by cond_equal inferring
|
|
the given simple equality. If it doesn't find any, it builds a multiple
|
|
equality that covers the predicate, i.e. the predicate can be inferred
|
|
from this multiple equality.
|
|
The built multiple equality could be obtained in such a way:
|
|
create a binary multiple equality equivalent to the predicate, then
|
|
merge it, if possible, with one of old multiple equalities.
|
|
This guarantees that the set of multiple equalities covering equality
|
|
predicates will be minimal.
|
|
|
|
EXAMPLE:
|
|
For the where condition
|
|
@code
|
|
WHERE a=b AND b=c AND
|
|
(b=2 OR f=e)
|
|
@endcode
|
|
the check_equality will be called for the following equality
|
|
predicates a=b, b=c, b=2 and f=e.
|
|
- For a=b it will be called with *cond_equal=(0,[]) and will transform
|
|
*cond_equal into (0,[Item_equal(a,b)]).
|
|
- For b=c it will be called with *cond_equal=(0,[Item_equal(a,b)])
|
|
and will transform *cond_equal into CE=(0,[Item_equal(a,b,c)]).
|
|
- For b=2 it will be called with *cond_equal=(ptr(CE),[])
|
|
and will transform *cond_equal into (ptr(CE),[Item_equal(2,a,b,c)]).
|
|
- For f=e it will be called with *cond_equal=(ptr(CE), [])
|
|
and will transform *cond_equal into (ptr(CE),[Item_equal(f,e)]).
|
|
|
|
@note
|
|
Now only fields that have the same type definitions (verified by
|
|
the Field::eq_def method) are placed to the same multiple equalities.
|
|
Because of this some equality predicates are not eliminated and
|
|
can be used in the constant propagation procedure.
|
|
We could weeken the equlity test as soon as at least one of the
|
|
equal fields is to be equal to a constant. It would require a
|
|
more complicated implementation: we would have to store, in
|
|
general case, its own constant for each fields from the multiple
|
|
equality. But at the same time it would allow us to get rid
|
|
of constant propagation completely: it would be done by the call
|
|
to cond->build_equal_items().
|
|
|
|
|
|
The implementation does not follow exactly the above rules to
|
|
build a new multiple equality for the equality predicate.
|
|
If it processes the equality of the form field1=field2, it
|
|
looks for multiple equalities me1 containig field1 and me2 containing
|
|
field2. If only one of them is found the fuction expands it with
|
|
the lacking field. If multiple equalities for both fields are
|
|
found they are merged. If both searches fail a new multiple equality
|
|
containing just field1 and field2 is added to the existing
|
|
multiple equalities.
|
|
If the function processes the predicate of the form field1=const,
|
|
it looks for a multiple equality containing field1. If found, the
|
|
function checks the constant of the multiple equality. If the value
|
|
is unknown, it is setup to const. Otherwise the value is compared with
|
|
const and the evaluation of the equality predicate is performed.
|
|
When expanding/merging equality predicates from the upper levels
|
|
the function first copies them for the current level. It looks
|
|
acceptable, as this happens rarely. The implementation without
|
|
copying would be much more complicated.
|
|
|
|
For description of how equality propagation works with SJM nests, grep
|
|
for EqualityPropagationAndSjmNests.
|
|
|
|
@param left_item left term of the quality to be checked
|
|
@param right_item right term of the equality to be checked
|
|
@param item equality item if the equality originates from a condition
|
|
predicate, 0 if the equality is the result of row
|
|
elimination
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
equality
|
|
|
|
@retval
|
|
TRUE if the predicate is a simple equality predicate to be used
|
|
for building multiple equalities
|
|
@retval
|
|
FALSE otherwise
|
|
*/
|
|
|
|
bool check_simple_equality(THD *thd, const Item::Context &ctx,
|
|
Item *left_item, Item *right_item,
|
|
COND_EQUAL *cond_equal)
|
|
{
|
|
Item *orig_left_item= left_item;
|
|
Item *orig_right_item= right_item;
|
|
if (left_item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_ref::Ref_Type left_ref= ((Item_ref*)left_item)->ref_type();
|
|
|
|
if (left_ref == Item_ref::VIEW_REF ||
|
|
left_ref == Item_ref::REF)
|
|
{
|
|
if (((Item_ref*)left_item)->get_depended_from())
|
|
return FALSE;
|
|
if (left_ref == Item_ref::VIEW_REF &&
|
|
((Item_direct_view_ref*)left_item)->get_null_ref_table() !=
|
|
NO_NULL_TABLE &&
|
|
!left_item->real_item()->used_tables())
|
|
return FALSE;
|
|
left_item= left_item->real_item();
|
|
}
|
|
}
|
|
if (right_item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_ref::Ref_Type right_ref= ((Item_ref*)right_item)->ref_type();
|
|
if (right_ref == Item_ref::VIEW_REF ||
|
|
(right_ref == Item_ref::REF))
|
|
{
|
|
if (((Item_ref*)right_item)->get_depended_from())
|
|
return FALSE;
|
|
if (right_ref == Item_ref::VIEW_REF &&
|
|
((Item_direct_view_ref*)right_item)->get_null_ref_table() !=
|
|
NO_NULL_TABLE &&
|
|
!right_item->real_item()->used_tables())
|
|
return FALSE;
|
|
right_item= right_item->real_item();
|
|
}
|
|
}
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
right_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)left_item)->get_depended_from() &&
|
|
!((Item_field*)right_item)->get_depended_from())
|
|
{
|
|
/* The predicate the form field1=field2 is processed */
|
|
|
|
Field *left_field= ((Item_field*) left_item)->field;
|
|
Field *right_field= ((Item_field*) right_item)->field;
|
|
|
|
if (!left_field->eq_def(right_field) &&
|
|
!fields_equal_using_narrowing(thd, left_field, right_field))
|
|
return FALSE;
|
|
|
|
/* Search for multiple equalities containing field1 and/or field2 */
|
|
bool left_copyfl, right_copyfl;
|
|
Item_equal *left_item_equal=
|
|
find_item_equal(cond_equal, left_field, &left_copyfl);
|
|
Item_equal *right_item_equal=
|
|
find_item_equal(cond_equal, right_field, &right_copyfl);
|
|
|
|
/* As (NULL=NULL) != TRUE we can't just remove the predicate f=f */
|
|
if (left_field->eq(right_field)) /* f = f */
|
|
return (!(left_field->maybe_null() && !left_item_equal));
|
|
|
|
if (left_item_equal && left_item_equal == right_item_equal)
|
|
{
|
|
/*
|
|
The equality predicate is inference of one of the existing
|
|
multiple equalities, i.e the condition is already covered
|
|
by upper level equalities
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
/* Copy the found multiple equalities at the current level if needed */
|
|
if (left_copyfl)
|
|
{
|
|
/* left_item_equal of an upper level contains left_item */
|
|
left_item_equal= new (thd->mem_root) Item_equal(thd, left_item_equal);
|
|
left_item_equal->set_context_field(((Item_field*) left_item));
|
|
cond_equal->current_level.push_back(left_item_equal, thd->mem_root);
|
|
}
|
|
if (right_copyfl)
|
|
{
|
|
/* right_item_equal of an upper level contains right_item */
|
|
right_item_equal= new (thd->mem_root) Item_equal(thd, right_item_equal);
|
|
right_item_equal->set_context_field(((Item_field*) right_item));
|
|
cond_equal->current_level.push_back(right_item_equal, thd->mem_root);
|
|
}
|
|
|
|
if (left_item_equal)
|
|
{
|
|
/* left item was found in the current or one of the upper levels */
|
|
if (! right_item_equal)
|
|
left_item_equal->add(orig_right_item, thd->mem_root);
|
|
else
|
|
{
|
|
/* Merge two multiple equalities forming a new one */
|
|
left_item_equal->merge(thd, right_item_equal);
|
|
/* Remove the merged multiple equality from the list */
|
|
List_iterator<Item_equal> li(cond_equal->current_level);
|
|
while ((li++) != right_item_equal) ;
|
|
li.remove();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* left item was not found neither the current nor in upper levels */
|
|
if (right_item_equal)
|
|
right_item_equal->add(orig_left_item, thd->mem_root);
|
|
else
|
|
{
|
|
/* None of the fields was found in multiple equalities */
|
|
Type_handler_hybrid_field_type
|
|
tmp(orig_left_item->type_handler_for_comparison());
|
|
if (tmp.aggregate_for_comparison(orig_right_item->
|
|
type_handler_for_comparison()))
|
|
return false;
|
|
Item_equal *item_equal=
|
|
new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
|
|
orig_left_item, orig_right_item,
|
|
false);
|
|
item_equal->set_context_field((Item_field*)left_item);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
{
|
|
/* The predicate of the form field=const/const=field is processed */
|
|
Item *const_item= 0;
|
|
Item_field *field_item= 0;
|
|
Item *orig_field_item= 0;
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)left_item)->get_depended_from() &&
|
|
right_item->can_eval_in_optimize())
|
|
{
|
|
orig_field_item= orig_left_item;
|
|
field_item= (Item_field *) left_item;
|
|
const_item= right_item;
|
|
}
|
|
else if (right_item->type() == Item::FIELD_ITEM &&
|
|
!((Item_field*)right_item)->get_depended_from() &&
|
|
left_item->can_eval_in_optimize())
|
|
{
|
|
orig_field_item= orig_right_item;
|
|
field_item= (Item_field *) right_item;
|
|
const_item= left_item;
|
|
}
|
|
|
|
if (const_item &&
|
|
field_item->field->test_if_equality_guarantees_uniqueness(const_item))
|
|
{
|
|
/*
|
|
field_item and const_item are arguments of a scalar or a row
|
|
comparison function:
|
|
WHERE column=constant
|
|
WHERE (column, ...) = (constant, ...)
|
|
|
|
The owner comparison function has previously called fix_fields(),
|
|
so field_item and const_item should be directly comparable items,
|
|
field_item->cmp_context and const_item->cmp_context should be set.
|
|
In case of string comparison, charsets and collations of
|
|
field_item and const_item should have already be aggregated
|
|
for comparison, all necessary character set converters installed
|
|
and fixed.
|
|
|
|
In case of string comparison, const_item can be either:
|
|
- a weaker constant that does not need to be converted to field_item:
|
|
WHERE latin1_field = 'latin1_const'
|
|
WHERE varbinary_field = 'latin1_const'
|
|
WHERE latin1_bin_field = 'latin1_general_ci_const'
|
|
- a stronger constant that does not need to be converted to field_item:
|
|
WHERE latin1_field = binary 0xDF
|
|
WHERE latin1_field = 'a' COLLATE latin1_bin
|
|
- a result of conversion (e.g. from the session character set)
|
|
to the character set of field_item:
|
|
WHERE latin1_field = 'utf8_string_with_latin1_repertoire'
|
|
*/
|
|
bool copyfl;
|
|
|
|
Item_equal *item_equal = find_item_equal(cond_equal,
|
|
field_item->field, ©fl);
|
|
if (copyfl)
|
|
{
|
|
item_equal= new (thd->mem_root) Item_equal(thd, item_equal);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
item_equal->set_context_field(field_item);
|
|
}
|
|
Item *const_item2= field_item->field->get_equal_const_item(thd, ctx,
|
|
const_item);
|
|
if (!const_item2)
|
|
return false;
|
|
|
|
if (item_equal)
|
|
{
|
|
/*
|
|
The flag cond_false will be set to 1 after this, if item_equal
|
|
already contains a constant and its value is not equal to
|
|
the value of const_item.
|
|
*/
|
|
item_equal->add_const(thd, const_item2);
|
|
}
|
|
else
|
|
{
|
|
Type_handler_hybrid_field_type
|
|
tmp(orig_left_item->type_handler_for_comparison());
|
|
if (tmp.aggregate_for_comparison(orig_right_item->
|
|
type_handler_for_comparison()))
|
|
return false;
|
|
item_equal= new (thd->mem_root) Item_equal(thd, tmp.type_handler(),
|
|
const_item2,
|
|
orig_field_item, true);
|
|
item_equal->set_context_field(field_item);
|
|
cond_equal->current_level.push_back(item_equal, thd->mem_root);
|
|
}
|
|
return TRUE;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Convert row equalities into a conjunction of regular equalities.
|
|
|
|
The function converts a row equality of the form (E1,...,En)=(E'1,...,E'n)
|
|
into a list of equalities E1=E'1,...,En=E'n. For each of these equalities
|
|
Ei=E'i the function checks whether it is a simple equality or a row
|
|
equality. If it is a simple equality it is used to expand multiple
|
|
equalities of cond_equal. If it is a row equality it converted to a
|
|
sequence of equalities between row elements. If Ei=E'i is neither a
|
|
simple equality nor a row equality the item for this predicate is added
|
|
to eq_list.
|
|
|
|
@param thd thread handle
|
|
@param left_row left term of the row equality to be processed
|
|
@param right_row right term of the row equality to be processed
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
predicate
|
|
@param eq_list results of conversions of row equalities that are not
|
|
simple enough to form multiple equalities
|
|
|
|
@retval
|
|
TRUE if conversion has succeeded (no fatal error)
|
|
@retval
|
|
FALSE otherwise
|
|
*/
|
|
|
|
static bool check_row_equality(THD *thd, const Arg_comparator *comparators,
|
|
Item *left_row, Item_row *right_row,
|
|
COND_EQUAL *cond_equal, List<Item>* eq_list)
|
|
{
|
|
uint n= left_row->cols();
|
|
for (uint i= 0 ; i < n; i++)
|
|
{
|
|
bool is_converted;
|
|
Item *left_item= left_row->element_index(i);
|
|
Item *right_item= right_row->element_index(i);
|
|
if (left_item->type() == Item::ROW_ITEM &&
|
|
right_item->type() == Item::ROW_ITEM)
|
|
{
|
|
/*
|
|
Item_splocal for ROW SP variables return Item::ROW_ITEM.
|
|
Here we know that left_item and right_item are not Item_splocal,
|
|
because ROW SP variables with nested ROWs are not supported yet.
|
|
It's safe to cast left_item and right_item to Item_row.
|
|
*/
|
|
DBUG_ASSERT(!left_item->get_item_splocal());
|
|
DBUG_ASSERT(!right_item->get_item_splocal());
|
|
is_converted= check_row_equality(thd,
|
|
comparators[i].subcomparators(),
|
|
(Item_row *) left_item,
|
|
(Item_row *) right_item,
|
|
cond_equal, eq_list);
|
|
}
|
|
else
|
|
{
|
|
const Arg_comparator *tmp= &comparators[i];
|
|
is_converted= check_simple_equality(thd,
|
|
Item::Context(Item::ANY_SUBST,
|
|
tmp->compare_type_handler(),
|
|
tmp->compare_collation()),
|
|
left_item, right_item,
|
|
cond_equal);
|
|
}
|
|
|
|
if (!is_converted)
|
|
{
|
|
Item_func_eq *eq_item;
|
|
if (!(eq_item= new (thd->mem_root) Item_func_eq(thd, left_item, right_item)) ||
|
|
eq_item->set_cmp_func(thd))
|
|
return FALSE;
|
|
eq_item->quick_fix_field();
|
|
eq_list->push_back(eq_item, thd->mem_root);
|
|
}
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/**
|
|
Eliminate row equalities and form multiple equalities predicates.
|
|
|
|
This function checks whether the item is a simple equality
|
|
i.e. the one that equates a field with another field or a constant
|
|
(field=field_item or field=constant_item), or, a row equality.
|
|
For a simple equality the function looks for a multiple equality
|
|
in the lists referenced directly or indirectly by cond_equal inferring
|
|
the given simple equality. If it doesn't find any, it builds/expands
|
|
multiple equality that covers the predicate.
|
|
Row equalities are eliminated substituted for conjunctive regular
|
|
equalities which are treated in the same way as original equality
|
|
predicates.
|
|
|
|
@param thd thread handle
|
|
@param item predicate to process
|
|
@param cond_equal multiple equalities that must hold together with the
|
|
predicate
|
|
@param eq_list results of conversions of row equalities that are not
|
|
simple enough to form multiple equalities
|
|
|
|
@retval
|
|
TRUE if re-writing rules have been applied
|
|
@retval
|
|
FALSE otherwise, i.e.
|
|
if the predicate is not an equality,
|
|
or, if the equality is neither a simple one nor a row equality,
|
|
or, if the procedure fails by a fatal error.
|
|
*/
|
|
|
|
bool Item_func_eq::check_equality(THD *thd, COND_EQUAL *cond_equal,
|
|
List<Item> *eq_list)
|
|
{
|
|
Item *left_item= arguments()[0];
|
|
Item *right_item= arguments()[1];
|
|
|
|
if (left_item->type() == Item::ROW_ITEM &&
|
|
right_item->type() == Item::ROW_ITEM)
|
|
{
|
|
/*
|
|
Item_splocal::type() for ROW variables returns Item::ROW_ITEM.
|
|
Distinguish ROW-type Item_splocal from Item_row.
|
|
Example query:
|
|
SELECT 1 FROM DUAL WHERE row_sp_variable=ROW(100,200);
|
|
*/
|
|
if (left_item->get_item_splocal() ||
|
|
right_item->get_item_splocal())
|
|
return false;
|
|
return check_row_equality(thd,
|
|
cmp.subcomparators(),
|
|
(Item_row *) left_item,
|
|
(Item_row *) right_item,
|
|
cond_equal, eq_list);
|
|
}
|
|
return check_simple_equality(thd,
|
|
Context(ANY_SUBST,
|
|
compare_type_handler(),
|
|
compare_collation()),
|
|
left_item, right_item, cond_equal);
|
|
}
|
|
|
|
|
|
/**
|
|
Item_xxx::build_equal_items()
|
|
|
|
Replace all equality predicates in a condition referenced by "this"
|
|
by multiple equality items.
|
|
|
|
At each 'and' level the function detects items for equality predicates
|
|
and replaced them by a set of multiple equality items of class Item_equal,
|
|
taking into account inherited equalities from upper levels.
|
|
If an equality predicate is used not in a conjunction it's just
|
|
replaced by a multiple equality predicate.
|
|
For each 'and' level the function set a pointer to the inherited
|
|
multiple equalities in the cond_equal field of the associated
|
|
object of the type Item_cond_and.
|
|
The function also traverses the cond tree and and for each field reference
|
|
sets a pointer to the multiple equality item containing the field, if there
|
|
is any. If this multiple equality equates fields to a constant the
|
|
function replaces the field reference by the constant in the cases
|
|
when the field is not of a string type or when the field reference is
|
|
just an argument of a comparison predicate.
|
|
The function also determines the maximum number of members in
|
|
equality lists of each Item_cond_and object assigning it to
|
|
thd->lex->current_select->max_equal_elems.
|
|
|
|
@note
|
|
Multiple equality predicate =(f1,..fn) is equivalent to the conjuction of
|
|
f1=f2, .., fn-1=fn. It substitutes any inference from these
|
|
equality predicates that is equivalent to the conjunction.
|
|
Thus, =(a1,a2,a3) can substitute for ((a1=a3) AND (a2=a3) AND (a2=a1)) as
|
|
it is equivalent to ((a1=a2) AND (a2=a3)).
|
|
The function always makes a substitution of all equality predicates occurred
|
|
in a conjuction for a minimal set of multiple equality predicates.
|
|
This set can be considered as a canonical representation of the
|
|
sub-conjunction of the equality predicates.
|
|
E.g. (t1.a=t2.b AND t2.b>5 AND t1.a=t3.c) is replaced by
|
|
(=(t1.a,t2.b,t3.c) AND t2.b>5), not by
|
|
(=(t1.a,t2.b) AND =(t1.a,t3.c) AND t2.b>5);
|
|
while (t1.a=t2.b AND t2.b>5 AND t3.c=t4.d) is replaced by
|
|
(=(t1.a,t2.b) AND =(t3.c=t4.d) AND t2.b>5),
|
|
but if additionally =(t4.d,t2.b) is inherited, it
|
|
will be replaced by (=(t1.a,t2.b,t3.c,t4.d) AND t2.b>5)
|
|
|
|
The function performs the substitution in a recursive descent by
|
|
the condtion tree, passing to the next AND level a chain of multiple
|
|
equality predicates which have been built at the upper levels.
|
|
The Item_equal items built at the level are attached to other
|
|
non-equality conjucts as a sublist. The pointer to the inherited
|
|
multiple equalities is saved in the and condition object (Item_cond_and).
|
|
This chain allows us for any field reference occurrence easily to find a
|
|
multiple equality that must be held for this occurrence.
|
|
For each AND level we do the following:
|
|
- scan it for all equality predicate (=) items
|
|
- join them into disjoint Item_equal() groups
|
|
- process the included OR conditions recursively to do the same for
|
|
lower AND levels.
|
|
|
|
We need to do things in this order as lower AND levels need to know about
|
|
all possible Item_equal objects in upper levels.
|
|
|
|
@param thd thread handle
|
|
@param inherited path to all inherited multiple equality items
|
|
|
|
@return
|
|
pointer to the transformed condition,
|
|
whose Used_tables_and_const_cache is up to date,
|
|
so no additional update_used_tables() is needed on the result.
|
|
*/
|
|
|
|
COND *Item_cond_and::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
Item_equal *item_equal;
|
|
COND_EQUAL cond_equal;
|
|
cond_equal.upper_levels= inherited;
|
|
|
|
if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
|
|
return this; // Fatal error flag is set!
|
|
|
|
List<Item> eq_list;
|
|
List<Item> *cond_args= argument_list();
|
|
|
|
List_iterator<Item> li(*cond_args);
|
|
Item *item;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
Retrieve all conjuncts of this level detecting the equality
|
|
that are subject to substitution by multiple equality items and
|
|
removing each such predicate from the conjunction after having
|
|
found/created a multiple equality whose inference the predicate is.
|
|
*/
|
|
while ((item= li++))
|
|
{
|
|
/*
|
|
PS/SP note: we can safely remove a node from AND-OR
|
|
structure here because it's restored before each
|
|
re-execution of any prepared statement/stored procedure.
|
|
*/
|
|
if (item->check_equality(thd, &cond_equal, &eq_list))
|
|
li.remove();
|
|
}
|
|
|
|
/*
|
|
Check if we eliminated all the predicates of the level, e.g.
|
|
(a=a AND b=b AND a=a).
|
|
*/
|
|
if (!cond_args->elements &&
|
|
!cond_equal.current_level.elements &&
|
|
!eq_list.elements)
|
|
return (Item*) Item_true;
|
|
|
|
List_iterator_fast<Item_equal> it(cond_equal.current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
item_equal->set_link_equal_fields(link_item_fields);
|
|
item_equal->fix_fields(thd, NULL);
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
}
|
|
|
|
m_cond_equal.copy(cond_equal);
|
|
cond_equal.current_level= m_cond_equal.current_level;
|
|
inherited= &m_cond_equal;
|
|
|
|
/*
|
|
Make replacement of equality predicates for lower levels
|
|
of the condition expression.
|
|
*/
|
|
li.rewind();
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
|
|
!= item)
|
|
{
|
|
/* This replacement happens only for standalone equalities */
|
|
/*
|
|
This is ok with PS/SP as the replacement is done for
|
|
cond_args of an AND/OR item, which are restored for each
|
|
execution of PS/SP.
|
|
*/
|
|
li.replace(new_item);
|
|
}
|
|
}
|
|
cond_args->append(&eq_list);
|
|
cond_args->append((List<Item> *)&cond_equal.current_level);
|
|
update_used_tables();
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= &m_cond_equal;
|
|
return this;
|
|
}
|
|
|
|
|
|
COND *Item_cond::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
List<Item> *cond_args= argument_list();
|
|
|
|
List_iterator<Item> li(*cond_args);
|
|
Item *item;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
Make replacement of equality predicates for lower levels
|
|
of the condition expression.
|
|
Update used_tables_cache and const_item_cache on the way.
|
|
*/
|
|
used_tables_and_const_cache_init();
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item;
|
|
if ((new_item= item->build_equal_items(thd, inherited, false, NULL))
|
|
!= item)
|
|
{
|
|
/* This replacement happens only for standalone equalities */
|
|
/*
|
|
This is ok with PS/SP as the replacement is done for
|
|
arguments of an AND/OR item, which are restored for each
|
|
execution of PS/SP.
|
|
*/
|
|
li.replace(new_item);
|
|
}
|
|
used_tables_and_const_cache_join(new_item);
|
|
}
|
|
return this;
|
|
}
|
|
|
|
|
|
COND *Item_func_eq::build_equal_items(THD *thd,
|
|
COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
COND_EQUAL cond_equal;
|
|
cond_equal.upper_levels= inherited;
|
|
List<Item> eq_list;
|
|
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
/*
|
|
If an equality predicate forms the whole and level,
|
|
we call it standalone equality and it's processed here.
|
|
E.g. in the following where condition
|
|
WHERE a=5 AND (b=5 or a=c)
|
|
(b=5) and (a=c) are standalone equalities.
|
|
In general we can't leave alone standalone eqalities:
|
|
for WHERE a=b AND c=d AND (b=c OR d=5)
|
|
b=c is replaced by =(a,b,c,d).
|
|
*/
|
|
if (Item_func_eq::check_equality(thd, &cond_equal, &eq_list))
|
|
{
|
|
Item_equal *item_equal;
|
|
int n= cond_equal.current_level.elements + eq_list.elements;
|
|
if (n == 0)
|
|
return (Item*) Item_true;
|
|
else if (n == 1)
|
|
{
|
|
if ((item_equal= cond_equal.current_level.pop()))
|
|
{
|
|
item_equal->fix_fields(thd, NULL);
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
item_equal->upper_levels= inherited;
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= new (thd->mem_root) COND_EQUAL(item_equal,
|
|
thd->mem_root);
|
|
return item_equal;
|
|
}
|
|
Item *res= eq_list.pop();
|
|
res->update_used_tables();
|
|
DBUG_ASSERT(res->type() == FUNC_ITEM);
|
|
return res;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Here a new AND level must be created. It can happen only
|
|
when a row equality is processed as a standalone predicate.
|
|
*/
|
|
Item_cond_and *and_cond= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
and_cond->quick_fix_field();
|
|
List<Item> *cond_args= and_cond->argument_list();
|
|
List_iterator_fast<Item_equal> it(cond_equal.current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
if (item_equal->fix_length_and_dec(thd))
|
|
return NULL;
|
|
item_equal->update_used_tables();
|
|
set_if_bigger(thd->lex->current_select->max_equal_elems,
|
|
item_equal->n_field_items());
|
|
}
|
|
and_cond->m_cond_equal.copy(cond_equal);
|
|
cond_equal.current_level= and_cond->m_cond_equal.current_level;
|
|
cond_args->append((List<Item> *)&cond_equal.current_level);
|
|
and_cond->update_used_tables();
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= &and_cond->m_cond_equal;
|
|
return and_cond;
|
|
}
|
|
}
|
|
return Item_func::build_equal_items(thd, inherited, link_item_fields,
|
|
cond_equal_ref);
|
|
}
|
|
|
|
|
|
COND *Item_func::build_equal_items(THD *thd, COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
/*
|
|
For each field reference in cond, not from equal item predicates,
|
|
set a pointer to the multiple equality it belongs to (if there is any)
|
|
as soon the field is not of a string type or the field reference is
|
|
an argument of a comparison predicate.
|
|
*/
|
|
COND *cond= propagate_equal_fields(thd, Context_boolean(), inherited);
|
|
cond->update_used_tables();
|
|
DBUG_ASSERT(cond == this);
|
|
DBUG_ASSERT(!cond_equal_ref || !cond_equal_ref[0]);
|
|
return cond;
|
|
}
|
|
|
|
|
|
COND *Item_equal::build_equal_items(THD *thd, COND_EQUAL *inherited,
|
|
bool link_item_fields,
|
|
COND_EQUAL **cond_equal_ref)
|
|
{
|
|
COND *cond= Item_func::build_equal_items(thd, inherited, link_item_fields,
|
|
cond_equal_ref);
|
|
if (cond_equal_ref)
|
|
*cond_equal_ref= new (thd->mem_root) COND_EQUAL(this, thd->mem_root);
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Build multiple equalities for a condition and all on expressions that
|
|
inherit these multiple equalities.
|
|
|
|
The function first applies the cond->build_equal_items() method
|
|
to build all multiple equalities for condition cond utilizing equalities
|
|
referred through the parameter inherited. The extended set of
|
|
equalities is returned in the structure referred by the cond_equal_ref
|
|
parameter. After this the function calls itself recursively for
|
|
all on expressions whose direct references can be found in join_list
|
|
and who inherit directly the multiple equalities just having built.
|
|
|
|
@note
|
|
The on expression used in an outer join operation inherits all equalities
|
|
from the on expression of the embedding join, if there is any, or
|
|
otherwise - from the where condition.
|
|
This fact is not obvious, but presumably can be proved.
|
|
Consider the following query:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t2.a=t4.a
|
|
WHERE t1.a=t2.a;
|
|
@endcode
|
|
If the on expression in the query inherits =(t1.a,t2.a), then we
|
|
can build the multiple equality =(t1.a,t2.a,t3.a,t4.a) that infers
|
|
the equality t3.a=t4.a. Although the on expression
|
|
t1.a=t3.a AND t2.a=t4.a AND t3.a=t4.a is not equivalent to the one
|
|
in the query the latter can be replaced by the former: the new query
|
|
will return the same result set as the original one.
|
|
|
|
Interesting that multiple equality =(t1.a,t2.a,t3.a,t4.a) allows us
|
|
to use t1.a=t3.a AND t3.a=t4.a under the on condition:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
This query equivalent to:
|
|
@code
|
|
SELECT * FROM (t1 LEFT JOIN (t3,t4) ON t1.a=t3.a AND t3.a=t4.a),t2
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
Similarly the original query can be rewritten to the query:
|
|
@code
|
|
SELECT * FROM (t1,t2) LEFT JOIN (t3,t4) ON t2.a=t4.a AND t3.a=t4.a
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
that is equivalent to:
|
|
@code
|
|
SELECT * FROM (t2 LEFT JOIN (t3,t4)ON t2.a=t4.a AND t3.a=t4.a), t1
|
|
WHERE t1.a=t2.a
|
|
@endcode
|
|
Thus, applying equalities from the where condition we basically
|
|
can get more freedom in performing join operations.
|
|
Although we don't use this property now, it probably makes sense to use
|
|
it in the future.
|
|
@param thd Thread handler
|
|
@param cond condition to build the multiple equalities for
|
|
@param inherited path to all inherited multiple equality items
|
|
@param join_list list of join tables to which the condition
|
|
refers to
|
|
@ignore_on_conds TRUE <-> do not build multiple equalities
|
|
for on expressions
|
|
@param[out] cond_equal_ref pointer to the structure to place built
|
|
equalities in
|
|
@param link_equal_items equal fields are to be linked
|
|
|
|
@return
|
|
pointer to the transformed condition containing multiple equalities
|
|
*/
|
|
|
|
static COND *build_equal_items(JOIN *join, COND *cond,
|
|
COND_EQUAL *inherited,
|
|
List<TABLE_LIST> *join_list,
|
|
bool ignore_on_conds,
|
|
COND_EQUAL **cond_equal_ref,
|
|
bool link_equal_fields)
|
|
{
|
|
THD *thd= join->thd;
|
|
|
|
*cond_equal_ref= NULL;
|
|
|
|
if (cond)
|
|
{
|
|
cond= cond->build_equal_items(thd, inherited, link_equal_fields,
|
|
cond_equal_ref);
|
|
if (*cond_equal_ref)
|
|
{
|
|
(*cond_equal_ref)->upper_levels= inherited;
|
|
inherited= *cond_equal_ref;
|
|
}
|
|
}
|
|
|
|
if (join_list && !ignore_on_conds)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
{
|
|
if (table->on_expr)
|
|
{
|
|
List<TABLE_LIST> *nested_join_list= table->nested_join ?
|
|
&table->nested_join->join_list : NULL;
|
|
/*
|
|
We can modify table->on_expr because its old value will
|
|
be restored before re-execution of PS/SP.
|
|
*/
|
|
table->on_expr= build_equal_items(join, table->on_expr, inherited,
|
|
nested_join_list, ignore_on_conds,
|
|
&table->cond_equal);
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
const char *table_name;
|
|
if (table->nested_join)
|
|
table_name= table->nested_join->join_list.head()->alias.str;
|
|
else
|
|
table_name= table->alias.str;
|
|
trace_condition(join->thd, "ON expr", "build_equal_items",
|
|
table->on_expr, table_name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Compare field items by table order in the execution plan.
|
|
|
|
If field1 and field2 belong to different tables then
|
|
field1 considered as better than field2 if the table containing
|
|
field1 is accessed earlier than the table containing field2.
|
|
The function finds out what of two fields is better according
|
|
this criteria.
|
|
If field1 and field2 belong to the same table then the result
|
|
of comparison depends on whether the fields are parts of
|
|
the key that are used to access this table.
|
|
|
|
@param field1 first field item to compare
|
|
@param field2 second field item to compare
|
|
@param table_join_idx index to tables determining table order
|
|
|
|
@retval
|
|
1 if field1 is better than field2
|
|
@retval
|
|
-1 if field2 is better than field1
|
|
@retval
|
|
0 otherwise
|
|
*/
|
|
|
|
static int compare_fields_by_table_order(Item *field1,
|
|
Item *field2,
|
|
void *table_join_idx)
|
|
{
|
|
int cmp= 0;
|
|
bool outer_ref= 0;
|
|
Item *field1_real= field1->real_item();
|
|
Item *field2_real= field2->real_item();
|
|
|
|
if (field1->const_item() || field1_real->const_item())
|
|
return -1;
|
|
if (field2->const_item() || field2_real->const_item())
|
|
return 1;
|
|
Item_field *f1= (Item_field *) field1_real;
|
|
Item_field *f2= (Item_field *) field2_real;
|
|
if (f1->used_tables() & OUTER_REF_TABLE_BIT)
|
|
{
|
|
outer_ref= 1;
|
|
cmp= -1;
|
|
}
|
|
if (f2->used_tables() & OUTER_REF_TABLE_BIT)
|
|
{
|
|
outer_ref= 1;
|
|
cmp++;
|
|
}
|
|
if (outer_ref)
|
|
return cmp;
|
|
JOIN_TAB **idx= (JOIN_TAB **) table_join_idx;
|
|
|
|
JOIN_TAB *tab1= idx[f1->field->table->tablenr];
|
|
JOIN_TAB *tab2= idx[f2->field->table->tablenr];
|
|
|
|
/*
|
|
if one of the table is inside a merged SJM nest and another one isn't,
|
|
compare SJM bush roots of the tables.
|
|
*/
|
|
if (tab1->bush_root_tab != tab2->bush_root_tab)
|
|
{
|
|
if (tab1->bush_root_tab)
|
|
tab1= tab1->bush_root_tab;
|
|
|
|
if (tab2->bush_root_tab)
|
|
tab2= tab2->bush_root_tab;
|
|
}
|
|
|
|
cmp= (int)(tab1 - tab2);
|
|
|
|
if (!cmp)
|
|
{
|
|
/* Fields f1, f2 belong to the same table */
|
|
|
|
JOIN_TAB *tab= idx[f1->field->table->tablenr];
|
|
uint keyno= MAX_KEY;
|
|
if (tab->ref.key_parts)
|
|
keyno= tab->ref.key;
|
|
else if (tab->select && tab->select->quick)
|
|
keyno = tab->select->quick->index;
|
|
if (keyno != MAX_KEY)
|
|
{
|
|
if (f1->field->part_of_key.is_set(keyno))
|
|
cmp= -1;
|
|
if (f2->field->part_of_key.is_set(keyno))
|
|
cmp++;
|
|
/*
|
|
Here:
|
|
if both f1, f2 are components of the key tab->ref.key then cmp==0,
|
|
if only f1 is a component of the key then cmp==-1 (f1 is better),
|
|
if only f2 is a component of the key then cmp==1, (f2 is better),
|
|
if none of f1,f1 is component of the key cmp==0.
|
|
*/
|
|
if (!cmp)
|
|
{
|
|
KEY *key_info= tab->table->key_info + keyno;
|
|
for (uint i= 0; i < key_info->user_defined_key_parts; i++)
|
|
{
|
|
Field *fld= key_info->key_part[i].field;
|
|
if (fld->eq(f1->field))
|
|
{
|
|
cmp= -1; // f1 is better
|
|
break;
|
|
}
|
|
if (fld->eq(f2->field))
|
|
{
|
|
cmp= 1; // f2 is better
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!cmp)
|
|
cmp= f1->field->field_index-f2->field->field_index;
|
|
}
|
|
return cmp < 0 ? -1 : (cmp ? 1 : 0);
|
|
}
|
|
|
|
|
|
static TABLE_LIST* embedding_sjm(Item *item)
|
|
{
|
|
Item_field *item_field= (Item_field *) (item->real_item());
|
|
TABLE_LIST *nest= item_field->field->table->pos_in_table_list->embedding;
|
|
if (nest && nest->sj_mat_info && nest->sj_mat_info->is_used)
|
|
return nest;
|
|
else
|
|
return NULL;
|
|
}
|
|
|
|
/**
|
|
Generate minimal set of simple equalities equivalent to a multiple equality.
|
|
|
|
The function retrieves the fields of the multiple equality item
|
|
item_equal and for each field f:
|
|
- if item_equal contains const it generates the equality f=const_item;
|
|
- otherwise, if f is not the first field, generates the equality
|
|
f=item_equal->get_first().
|
|
All generated equality are added to the cond conjunction.
|
|
|
|
@param cond condition to add the generated equality to
|
|
@param upper_levels structure to access multiple equality of upper levels
|
|
@param item_equal multiple equality to generate simple equality from
|
|
|
|
@note
|
|
Before generating an equality function checks that it has not
|
|
been generated for multiple equalities of the upper levels.
|
|
E.g. for the following where condition
|
|
WHERE a=5 AND ((a=b AND b=c) OR c>4)
|
|
the upper level AND condition will contain =(5,a),
|
|
while the lower level AND condition will contain =(5,a,b,c).
|
|
When splitting =(5,a,b,c) into a separate equality predicates
|
|
we should omit 5=a, as we have it already in the upper level.
|
|
The following where condition gives us a more complicated case:
|
|
WHERE t1.a=t2.b AND t3.c=t4.d AND (t2.b=t3.c OR t4.e>5 ...) AND ...
|
|
Given the tables are accessed in the order t1->t2->t3->t4 for
|
|
the selected query execution plan the lower level multiple
|
|
equality =(t1.a,t2.b,t3.c,t4.d) formally should be converted to
|
|
t1.a=t2.b AND t1.a=t3.c AND t1.a=t4.d. But t1.a=t2.a will be
|
|
generated for the upper level. Also t3.c=t4.d will be generated there.
|
|
So only t1.a=t3.c should be left in the lower level.
|
|
If cond is equal to 0, then not more then one equality is generated
|
|
and a pointer to it is returned as the result of the function.
|
|
|
|
Equality substutution and semi-join materialization nests:
|
|
|
|
In case join order looks like this:
|
|
|
|
outer_tbl1 outer_tbl2 SJM (inner_tbl1 inner_tbl2) outer_tbl3
|
|
|
|
We must not construct equalities like
|
|
|
|
outer_tbl1.col = inner_tbl1.col
|
|
|
|
because they would get attached to inner_tbl1 and will get evaluated
|
|
during materialization phase, when we don't have current value of
|
|
outer_tbl1.col.
|
|
|
|
Item_equal::get_first() also takes similar measures for dealing with
|
|
equality substitution in presense of SJM nests.
|
|
|
|
Grep for EqualityPropagationAndSjmNests for a more verbose description.
|
|
|
|
@return
|
|
- The condition with generated simple equalities or
|
|
a pointer to the simple generated equality, if success.
|
|
- 0, otherwise.
|
|
*/
|
|
|
|
Item *eliminate_item_equal(THD *thd, COND *cond, COND_EQUAL *upper_levels,
|
|
Item_equal *item_equal)
|
|
{
|
|
List<Item> eq_list;
|
|
Item_func_eq *eq_item= 0;
|
|
if (((Item *) item_equal)->const_item() && !item_equal->val_int())
|
|
return (Item*) Item_false;
|
|
Item *item_const= item_equal->get_const();
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
Item *head;
|
|
TABLE_LIST *current_sjm= NULL;
|
|
Item *current_sjm_head= NULL;
|
|
|
|
DBUG_ASSERT(!cond ||
|
|
cond->is_bool_literal() ||
|
|
(cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) cond)->functype() == Item_func::EQ_FUNC) ||
|
|
(cond->type() == Item::COND_ITEM &&
|
|
((Item_func *) cond)->functype() == Item_func::COND_AND_FUNC));
|
|
|
|
/*
|
|
Pick the "head" item: the constant one or the first in the join order
|
|
(if the first in the join order happends to be inside an SJM nest, that's
|
|
ok, because this is where the value will be unpacked after
|
|
materialization).
|
|
*/
|
|
if (item_const)
|
|
head= item_const;
|
|
else
|
|
{
|
|
TABLE_LIST *emb_nest;
|
|
head= item_equal->get_first(NO_PARTICULAR_TAB, NULL);
|
|
it++;
|
|
if ((emb_nest= embedding_sjm(head)))
|
|
{
|
|
current_sjm= emb_nest;
|
|
current_sjm_head= head;
|
|
}
|
|
}
|
|
|
|
Item *field_item;
|
|
/*
|
|
For each other item, generate "item=head" equality (except the tables that
|
|
are within SJ-Materialization nests, for those "head" is defined
|
|
differently)
|
|
*/
|
|
while ((field_item= it++))
|
|
{
|
|
Item_equal *upper= field_item->find_item_equal(upper_levels);
|
|
Item *item= field_item;
|
|
TABLE_LIST *field_sjm= embedding_sjm(field_item);
|
|
if (!field_sjm)
|
|
{
|
|
current_sjm= NULL;
|
|
current_sjm_head= NULL;
|
|
}
|
|
|
|
/*
|
|
Check if "field_item=head" equality is already guaranteed to be true
|
|
on upper AND-levels.
|
|
*/
|
|
if (upper)
|
|
{
|
|
TABLE_LIST *native_sjm= embedding_sjm(item_equal->context_field);
|
|
Item *upper_const= upper->get_const();
|
|
if (item_const && upper_const)
|
|
{
|
|
/*
|
|
Upper item also has "field_item=const".
|
|
Don't produce equality if const is equal to item_const.
|
|
*/
|
|
Item_func_eq *func= new (thd->mem_root) Item_func_eq(thd, item_const, upper_const);
|
|
func->set_cmp_func(thd);
|
|
func->quick_fix_field();
|
|
if (func->val_int())
|
|
item= 0;
|
|
}
|
|
else
|
|
{
|
|
Item_equal_fields_iterator li(*item_equal);
|
|
while ((item= li++) != field_item)
|
|
{
|
|
if (embedding_sjm(item) == field_sjm &&
|
|
item->find_item_equal(upper_levels) == upper)
|
|
break;
|
|
}
|
|
}
|
|
if (embedding_sjm(field_item) != native_sjm)
|
|
item= NULL; /* Don't produce equality */
|
|
}
|
|
|
|
bool produce_equality= MY_TEST(item == field_item);
|
|
if (!item_const && field_sjm && field_sjm != current_sjm)
|
|
{
|
|
/* Entering an SJM nest */
|
|
current_sjm_head= field_item;
|
|
if (!field_sjm->sj_mat_info->is_sj_scan)
|
|
produce_equality= FALSE;
|
|
}
|
|
|
|
if (produce_equality)
|
|
{
|
|
if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
|
|
return 0;
|
|
|
|
/*
|
|
If we're inside an SJM-nest (current_sjm!=NULL), and the multi-equality
|
|
doesn't include a constant, we should produce equality with the first
|
|
of the equal items in this SJM (except for the first element inside the
|
|
SJM. For that, we produce the equality with the "head" item).
|
|
|
|
In other cases, get the "head" item, which is either first of the
|
|
equals on top level, or the constant.
|
|
*/
|
|
Item *head_item= (!item_const && current_sjm &&
|
|
current_sjm_head != field_item) ? current_sjm_head: head;
|
|
eq_item= new (thd->mem_root) Item_func_eq(thd,
|
|
field_item->remove_item_direct_ref(),
|
|
head_item->remove_item_direct_ref());
|
|
|
|
if (!eq_item || eq_item->set_cmp_func(thd))
|
|
return 0;
|
|
eq_item->eval_not_null_tables(0);
|
|
eq_item->quick_fix_field();
|
|
}
|
|
current_sjm= field_sjm;
|
|
}
|
|
|
|
/*
|
|
We have produced zero, one, or more pair-wise equalities eq_i. We want to
|
|
return an expression in form:
|
|
|
|
cond AND eq_1 AND eq_2 AND eq_3 AND ...
|
|
|
|
'cond' is a parameter for this function, which may be NULL, an Item_bool(1),
|
|
or an Item_func_eq or an Item_cond_and.
|
|
|
|
We want to return a well-formed condition: no nested Item_cond_and objects,
|
|
or Item_cond_and with a single child:
|
|
- if 'cond' is an Item_cond_and, we add eq_i as its tail
|
|
- if 'cond' is Item_bool(1), we return eq_i
|
|
- otherwise, we create our own Item_cond_and and put 'cond' at the front of
|
|
it.
|
|
- if we have only one condition to return, we don't create an Item_cond_and
|
|
*/
|
|
|
|
if (eq_item && eq_list.push_back(eq_item, thd->mem_root))
|
|
return 0;
|
|
COND *res= 0;
|
|
switch (eq_list.elements)
|
|
{
|
|
case 0:
|
|
res= cond ? cond : (Item*) Item_true;
|
|
break;
|
|
case 1:
|
|
if (!cond || cond->is_bool_literal())
|
|
res= eq_item;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
if (!res)
|
|
{
|
|
if (cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
res= cond;
|
|
((Item_cond *) res)->add_at_end(&eq_list);
|
|
}
|
|
else if (eq_list.push_front(cond, thd->mem_root))
|
|
return 0;
|
|
}
|
|
}
|
|
if (!res)
|
|
res= new (thd->mem_root) Item_cond_and(thd, eq_list);
|
|
if (res)
|
|
{
|
|
res->quick_fix_field();
|
|
res->update_used_tables();
|
|
res->eval_not_null_tables(0);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/**
|
|
Substitute every field reference in a condition by the best equal field
|
|
and eliminate all multiple equality predicates.
|
|
|
|
The function retrieves the cond condition and for each encountered
|
|
multiple equality predicate it sorts the field references in it
|
|
according to the order of tables specified by the table_join_idx
|
|
parameter. Then it eliminates the multiple equality predicate it
|
|
replacing it by the conjunction of simple equality predicates
|
|
equating every field from the multiple equality to the first
|
|
field in it, or to the constant, if there is any.
|
|
After this the function retrieves all other conjuncted
|
|
predicates substitute every field reference by the field reference
|
|
to the first equal field or equal constant if there are any.
|
|
|
|
@param context_tab Join tab that 'cond' will be attached to, or
|
|
NO_PARTICULAR_TAB. See notes above.
|
|
@param cond condition to process
|
|
@param cond_equal multiple equalities to take into consideration
|
|
@param table_join_idx index to tables determining field preference
|
|
@param do_substitution if false: do not do any field substitution
|
|
|
|
@note
|
|
At the first glance full sort of fields in multiple equality
|
|
seems to be an overkill. Yet it's not the case due to possible
|
|
new fields in multiple equality item of lower levels. We want
|
|
the order in them to comply with the order of upper levels.
|
|
|
|
context_tab may be used to specify which join tab `cond` will be
|
|
attached to. There are two possible cases:
|
|
|
|
1. context_tab != NO_PARTICULAR_TAB
|
|
We're doing substitution for an Item which will be evaluated in the
|
|
context of a particular item. For example, if the optimizer does a
|
|
ref access on "tbl1.key= expr" then
|
|
= equality substitution will be perfomed on 'expr'
|
|
= it is known in advance that 'expr' will be evaluated when
|
|
table t1 is accessed.
|
|
Note that in this kind of substution we never have to replace Item_equal
|
|
objects. For example, for
|
|
|
|
t.key= func(col1=col2 AND col2=const)
|
|
|
|
we will not build Item_equal or do equality substution (if we decide to,
|
|
this function will need to be fixed to handle it)
|
|
|
|
2. context_tab == NO_PARTICULAR_TAB
|
|
We're doing substitution in WHERE/ON condition, which is not yet
|
|
attached to any particular join_tab. We will use information about the
|
|
chosen join order to make "optimal" substitions, i.e. those that allow
|
|
to apply filtering as soon as possible. See eliminate_item_equal() and
|
|
Item_equal::get_first() for details.
|
|
|
|
@return
|
|
The transformed condition, or NULL in case of error
|
|
*/
|
|
|
|
static COND* substitute_for_best_equal_field(THD *thd, JOIN_TAB *context_tab,
|
|
COND *cond,
|
|
COND_EQUAL *cond_equal,
|
|
void *table_join_idx,
|
|
bool do_substitution)
|
|
{
|
|
Item_equal *item_equal;
|
|
COND *org_cond= cond; // Return this in case of fatal error
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
|
|
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
if (and_level)
|
|
{
|
|
cond_equal= &((Item_cond_and *) cond)->m_cond_equal;
|
|
cond_list->disjoin((List<Item> *) &cond_equal->current_level);/* remove Item_equal objects from the AND. */
|
|
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
while ((item_equal= it++))
|
|
{
|
|
item_equal->sort(&compare_fields_by_table_order, table_join_idx);
|
|
}
|
|
}
|
|
|
|
List_iterator<Item> li(*cond_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
Item *new_item= substitute_for_best_equal_field(thd, context_tab,
|
|
item, cond_equal,
|
|
table_join_idx,
|
|
do_substitution);
|
|
/*
|
|
This works OK with PS/SP re-execution as changes are made to
|
|
the arguments of AND/OR items only
|
|
*/
|
|
if (new_item && new_item != item)
|
|
li.replace(new_item);
|
|
}
|
|
|
|
if (and_level)
|
|
{
|
|
COND *eq_cond= 0;
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
bool false_eq_cond= FALSE;
|
|
bool all_deleted= true;
|
|
while ((item_equal= it++))
|
|
{
|
|
if (item_equal->get_extraction_flag() == MARKER_DELETION)
|
|
continue;
|
|
all_deleted= false;
|
|
eq_cond= eliminate_item_equal(thd, eq_cond, cond_equal->upper_levels,
|
|
item_equal);
|
|
if (!eq_cond)
|
|
{
|
|
eq_cond= 0;
|
|
break;
|
|
}
|
|
else if (eq_cond->is_bool_literal() && !eq_cond->val_bool())
|
|
{
|
|
/*
|
|
This occurs when eliminate_item_equal() founds that cond is
|
|
always false and substitutes it with Item_int 0.
|
|
Due to this, value of item_equal will be 0, so just return it.
|
|
*/
|
|
cond= eq_cond;
|
|
false_eq_cond= TRUE;
|
|
break;
|
|
}
|
|
}
|
|
if (eq_cond && !false_eq_cond)
|
|
{
|
|
/* Insert the generated equalities before all other conditions */
|
|
if (eq_cond->type() == Item::COND_ITEM)
|
|
((Item_cond *) cond)->add_at_head(
|
|
((Item_cond *) eq_cond)->argument_list());
|
|
else
|
|
{
|
|
if (cond_list->is_empty())
|
|
cond= eq_cond;
|
|
else
|
|
{
|
|
/* Do not add an equality condition if it's always true */
|
|
if (!eq_cond->is_bool_literal() &&
|
|
cond_list->push_front(eq_cond, thd->mem_root))
|
|
eq_cond= 0;
|
|
}
|
|
}
|
|
}
|
|
if (!eq_cond && !all_deleted)
|
|
{
|
|
/*
|
|
We are out of memory doing the transformation.
|
|
This is a fatal error now. However we bail out by returning the
|
|
original condition that we had before we started the transformation.
|
|
*/
|
|
cond_list->append((List<Item> *) &cond_equal->current_level);
|
|
}
|
|
}
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
item_equal= (Item_equal *) cond;
|
|
item_equal->sort(&compare_fields_by_table_order, table_join_idx);
|
|
cond_equal= item_equal->upper_levels;
|
|
if (cond_equal && cond_equal->current_level.head() == item_equal)
|
|
cond_equal= cond_equal->upper_levels;
|
|
if (item_equal->get_extraction_flag() == MARKER_DELETION)
|
|
return 0;
|
|
cond= eliminate_item_equal(thd, 0, cond_equal, item_equal);
|
|
return cond ? cond : org_cond;
|
|
}
|
|
else if (do_substitution)
|
|
{
|
|
while (cond_equal)
|
|
{
|
|
List_iterator_fast<Item_equal> it(cond_equal->current_level);
|
|
while((item_equal= it++))
|
|
{
|
|
REPLACE_EQUAL_FIELD_ARG arg= {item_equal, context_tab};
|
|
if (!(cond= cond->transform(thd, &Item::replace_equal_field,
|
|
(uchar *) &arg)))
|
|
return 0;
|
|
}
|
|
cond_equal= cond_equal->upper_levels;
|
|
}
|
|
}
|
|
return cond;
|
|
}
|
|
|
|
|
|
/**
|
|
Check appearance of new constant items in multiple equalities
|
|
of a condition after reading a constant table.
|
|
|
|
The function retrieves the cond condition and for each encountered
|
|
multiple equality checks whether new constants have appeared after
|
|
reading the constant (single row) table tab. If so it adjusts
|
|
the multiple equality appropriately.
|
|
|
|
@param cond condition whose multiple equalities are to be checked
|
|
@param table constant table that has been read
|
|
@param const_key mark key parts as constant
|
|
*/
|
|
|
|
static void update_const_equal_items(THD *thd, COND *cond, JOIN_TAB *tab,
|
|
bool const_key)
|
|
{
|
|
if (!(cond->used_tables() & tab->table->map))
|
|
return;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_list= ((Item_cond*) cond)->argument_list();
|
|
List_iterator_fast<Item> li(*cond_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
update_const_equal_items(thd, item, tab,
|
|
cond->is_top_level_item() &&
|
|
((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC);
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
Item_equal *item_equal= (Item_equal *) cond;
|
|
bool contained_const= item_equal->get_const() != NULL;
|
|
item_equal->update_const(thd);
|
|
if (!contained_const && item_equal->get_const())
|
|
{
|
|
/* Update keys for range analysis */
|
|
Item_equal_fields_iterator it(*item_equal);
|
|
while (it++)
|
|
{
|
|
Field *field= it.get_curr_field();
|
|
JOIN_TAB *stat= field->table->reginfo.join_tab;
|
|
key_map possible_keys= field->key_start;
|
|
possible_keys.intersect(field->table->keys_in_use_for_query);
|
|
stat[0].const_keys.merge(possible_keys);
|
|
|
|
/*
|
|
For each field in the multiple equality (for which we know that it
|
|
is a constant) we have to find its corresponding key part, and set
|
|
that key part in const_key_parts.
|
|
*/
|
|
if (!possible_keys.is_clear_all())
|
|
{
|
|
TABLE *field_tab= field->table;
|
|
KEYUSE *use;
|
|
for (use= stat->keyuse; use && use->table == field_tab; use++)
|
|
if (const_key &&
|
|
!use->is_for_hash_join() && possible_keys.is_set(use->key) &&
|
|
field_tab->key_info[use->key].key_part[use->keypart].field ==
|
|
field)
|
|
field_tab->const_key_parts[use->key]|= use->keypart_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Check if
|
|
WHERE expr=value AND expr=const
|
|
can be rewritten as:
|
|
WHERE const=value AND expr=const
|
|
|
|
@param target - the target operator whose "expr" argument will be
|
|
replaced to "const".
|
|
@param target_expr - the target's "expr" which will be replaced to "const".
|
|
@param target_value - the target's second argument, it will remain unchanged.
|
|
@param source - the equality expression ("=" or "<=>") that
|
|
can be used to rewrite the "target" part
|
|
(under certain conditions, see the code).
|
|
@param source_expr - the source's "expr". It should be exactly equal to
|
|
the target's "expr" to make condition rewrite possible.
|
|
@param source_const - the source's "const" argument, it will be inserted
|
|
into "target" instead of "expr".
|
|
*/
|
|
static bool
|
|
can_change_cond_ref_to_const(Item_bool_func2 *target,
|
|
Item *target_expr, Item *target_value,
|
|
Item_bool_func2 *source,
|
|
Item *source_expr, Item *source_const)
|
|
{
|
|
return target_expr->eq(source_expr,0) &&
|
|
target_value != source_const &&
|
|
target->compare_type_handler()->
|
|
can_change_cond_ref_to_const(target, target_expr, target_value,
|
|
source, source_expr, source_const);
|
|
}
|
|
|
|
|
|
/*
|
|
change field = field to field = const for each found field = const in the
|
|
and_level
|
|
*/
|
|
|
|
static void
|
|
change_cond_ref_to_const(THD *thd, I_List<COND_CMP> *save_list,
|
|
Item *and_father, Item *cond,
|
|
Item_bool_func2 *field_value_owner,
|
|
Item *field, Item *value)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
change_cond_ref_to_const(thd, save_list,and_level ? cond : item, item,
|
|
field_value_owner, field, value);
|
|
return;
|
|
}
|
|
if (cond->eq_cmp_result() == Item::COND_OK)
|
|
return; // Not a boolean function
|
|
|
|
Item_bool_func2 *func= (Item_bool_func2*) cond;
|
|
Item **args= func->arguments();
|
|
Item *left_item= args[0];
|
|
Item *right_item= args[1];
|
|
Item_func::Functype functype= func->functype();
|
|
|
|
if (can_change_cond_ref_to_const(func, right_item, left_item,
|
|
field_value_owner, field, value))
|
|
{
|
|
Item *tmp=value->clone_item(thd);
|
|
if (tmp)
|
|
{
|
|
tmp->collation.set(right_item->collation);
|
|
thd->change_item_tree(args + 1, tmp);
|
|
func->update_used_tables();
|
|
if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
|
|
&& and_father != cond && !left_item->const_item())
|
|
{
|
|
cond->marker= MARKER_CHANGE_COND;
|
|
COND_CMP *tmp2;
|
|
/* Will work, even if malloc would fail */
|
|
if ((tmp2= new (thd->mem_root) COND_CMP(and_father, func)))
|
|
save_list->push_back(tmp2);
|
|
}
|
|
/*
|
|
LIKE can be optimized for BINARY/VARBINARY/BLOB columns, e.g.:
|
|
|
|
from: WHERE CONCAT(c1)='const1' AND CONCAT(c1) LIKE 'const2'
|
|
to: WHERE CONCAT(c1)='const1' AND 'const1' LIKE 'const2'
|
|
|
|
So make sure to use set_cmp_func() only for non-LIKE operators.
|
|
*/
|
|
if (functype != Item_func::LIKE_FUNC)
|
|
((Item_bool_rowready_func2*) func)->set_cmp_func(thd);
|
|
}
|
|
}
|
|
else if (can_change_cond_ref_to_const(func, left_item, right_item,
|
|
field_value_owner, field, value))
|
|
{
|
|
Item *tmp= value->clone_item(thd);
|
|
if (tmp)
|
|
{
|
|
tmp->collation.set(left_item->collation);
|
|
thd->change_item_tree(args, tmp);
|
|
value= tmp;
|
|
func->update_used_tables();
|
|
if ((functype == Item_func::EQ_FUNC || functype == Item_func::EQUAL_FUNC)
|
|
&& and_father != cond && !right_item->const_item())
|
|
{
|
|
args[0]= args[1]; // For easy check
|
|
thd->change_item_tree(args + 1, value);
|
|
cond->marker= MARKER_CHANGE_COND;
|
|
COND_CMP *tmp2;
|
|
/* Will work, even if malloc would fail */
|
|
if ((tmp2=new (thd->mem_root) COND_CMP(and_father, func)))
|
|
save_list->push_back(tmp2);
|
|
}
|
|
if (functype != Item_func::LIKE_FUNC)
|
|
((Item_bool_rowready_func2*) func)->set_cmp_func(thd);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void
|
|
propagate_cond_constants(THD *thd, I_List<COND_CMP> *save_list,
|
|
COND *and_father, COND *cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() ==
|
|
Item_func::COND_AND_FUNC;
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
I_List<COND_CMP> save;
|
|
while ((item=li++))
|
|
{
|
|
propagate_cond_constants(thd, &save,and_level ? cond : item, item);
|
|
}
|
|
if (and_level)
|
|
{ // Handle other found items
|
|
I_List_iterator<COND_CMP> cond_itr(save);
|
|
COND_CMP *cond_cmp;
|
|
while ((cond_cmp=cond_itr++))
|
|
{
|
|
Item **args= cond_cmp->cmp_func->arguments();
|
|
if (!args[0]->const_item())
|
|
change_cond_ref_to_const(thd, &save,cond_cmp->and_level,
|
|
cond_cmp->and_level,
|
|
cond_cmp->cmp_func, args[0], args[1]);
|
|
}
|
|
}
|
|
}
|
|
else if (and_father != cond && cond->marker == MARKER_UNUSED) // In a AND group
|
|
{
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
(((Item_func*) cond)->functype() == Item_func::EQ_FUNC ||
|
|
((Item_func*) cond)->functype() == Item_func::EQUAL_FUNC))
|
|
{
|
|
Item_bool_func2 *func= dynamic_cast<Item_bool_func2*>(cond);
|
|
Item **args= func->arguments();
|
|
bool left_const= args[0]->can_eval_in_optimize();
|
|
bool right_const= args[1]->can_eval_in_optimize();
|
|
if (!(left_const && right_const) &&
|
|
args[0]->cmp_type() == args[1]->cmp_type())
|
|
{
|
|
if (right_const)
|
|
{
|
|
resolve_const_item(thd, &args[1], args[0]);
|
|
func->update_used_tables();
|
|
change_cond_ref_to_const(thd, save_list, and_father, and_father,
|
|
func, args[0], args[1]);
|
|
}
|
|
else if (left_const)
|
|
{
|
|
resolve_const_item(thd, &args[0], args[1]);
|
|
func->update_used_tables();
|
|
change_cond_ref_to_const(thd, save_list, and_father, and_father,
|
|
func, args[1], args[0]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
Simplify joins replacing outer joins by inner joins whenever it's
|
|
possible.
|
|
|
|
The function, during a retrieval of join_list, eliminates those
|
|
outer joins that can be converted into inner join, possibly nested.
|
|
It also moves the on expressions for the converted outer joins
|
|
and from inner joins to conds.
|
|
The function also calculates some attributes for nested joins:
|
|
- used_tables
|
|
- not_null_tables
|
|
- dep_tables.
|
|
- on_expr_dep_tables
|
|
The first two attributes are used to test whether an outer join can
|
|
be substituted for an inner join. The third attribute represents the
|
|
relation 'to be dependent on' for tables. If table t2 is dependent
|
|
on table t1, then in any evaluated execution plan table access to
|
|
table t2 must precede access to table t2. This relation is used also
|
|
to check whether the query contains invalid cross-references.
|
|
The forth attribute is an auxiliary one and is used to calculate
|
|
dep_tables.
|
|
As the attribute dep_tables qualifies possibles orders of tables in the
|
|
execution plan, the dependencies required by the straight join
|
|
modifiers are reflected in this attribute as well.
|
|
The function also removes all braces that can be removed from the join
|
|
expression without changing its meaning.
|
|
|
|
@note
|
|
An outer join can be replaced by an inner join if the where condition
|
|
or the on expression for an embedding nested join contains a conjunctive
|
|
predicate rejecting null values for some attribute of the inner tables.
|
|
|
|
E.g. in the query:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
|
|
@endcode
|
|
the predicate t2.b < 5 rejects nulls.
|
|
The query is converted first to:
|
|
@code
|
|
SELECT * FROM t1 INNER JOIN t2 ON t2.a=t1.a WHERE t2.b < 5
|
|
@endcode
|
|
then to the equivalent form:
|
|
@code
|
|
SELECT * FROM t1, t2 ON t2.a=t1.a WHERE t2.b < 5 AND t2.a=t1.a
|
|
@endcode
|
|
|
|
|
|
Similarly the following query:
|
|
@code
|
|
SELECT * from t1 LEFT JOIN (t2, t3) ON t2.a=t1.a t3.b=t1.b
|
|
WHERE t2.c < 5
|
|
@endcode
|
|
is converted to:
|
|
@code
|
|
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a t3.b=t1.b
|
|
|
|
@endcode
|
|
|
|
One conversion might trigger another:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a
|
|
LEFT JOIN t3 ON t3.b=t2.b
|
|
WHERE t3 IS NOT NULL =>
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t1.a, t3
|
|
WHERE t3 IS NOT NULL AND t3.b=t2.b =>
|
|
SELECT * FROM t1, t2, t3
|
|
WHERE t3 IS NOT NULL AND t3.b=t2.b AND t2.a=t1.a
|
|
@endcode
|
|
|
|
The function removes all unnecessary braces from the expression
|
|
produced by the conversions.
|
|
E.g.
|
|
@code
|
|
SELECT * FROM t1, (t2, t3) WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
|
|
@endcode
|
|
finally is converted to:
|
|
@code
|
|
SELECT * FROM t1, t2, t3 WHERE t2.c < 5 AND t2.a=t1.a AND t3.b=t1.b
|
|
|
|
@endcode
|
|
|
|
|
|
It also will remove braces from the following queries:
|
|
@code
|
|
SELECT * from (t1 LEFT JOIN t2 ON t2.a=t1.a) LEFT JOIN t3 ON t3.b=t2.b
|
|
SELECT * from (t1, (t2,t3)) WHERE t1.a=t2.a AND t2.b=t3.b.
|
|
@endcode
|
|
|
|
The benefit of this simplification procedure is that it might return
|
|
a query for which the optimizer can evaluate execution plan with more
|
|
join orders. With a left join operation the optimizer does not
|
|
consider any plan where one of the inner tables is before some of outer
|
|
tables.
|
|
|
|
IMPLEMENTATION
|
|
The function is implemented by a recursive procedure. On the recursive
|
|
ascent all attributes are calculated, all outer joins that can be
|
|
converted are replaced and then all unnecessary braces are removed.
|
|
As join list contains join tables in the reverse order sequential
|
|
elimination of outer joins does not require extra recursive calls.
|
|
|
|
SEMI-JOIN NOTES
|
|
Remove all semi-joins that have are within another semi-join (i.e. have
|
|
an "ancestor" semi-join nest)
|
|
|
|
EXAMPLES
|
|
Here is an example of a join query with invalid cross references:
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN t2 ON t2.a=t3.a LEFT JOIN t3 ON t3.b=t1.b
|
|
@endcode
|
|
|
|
@param join reference to the query info
|
|
@param join_list list representation of the join to be converted
|
|
@param conds conditions to add on expressions for converted joins
|
|
@param top true <=> conds is the where condition
|
|
@param in_sj TRUE <=> processing semi-join nest's children
|
|
@return
|
|
- The new condition, if success
|
|
- 0, otherwise
|
|
*/
|
|
|
|
static COND *
|
|
simplify_joins(JOIN *join, List<TABLE_LIST> *join_list, COND *conds, bool top,
|
|
bool in_sj)
|
|
{
|
|
TABLE_LIST *table;
|
|
NESTED_JOIN *nested_join;
|
|
TABLE_LIST *prev_table= 0;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
bool straight_join= MY_TEST(join->select_options & SELECT_STRAIGHT_JOIN);
|
|
DBUG_ENTER("simplify_joins");
|
|
|
|
/*
|
|
Try to simplify join operations from join_list.
|
|
The most outer join operation is checked for conversion first.
|
|
*/
|
|
while ((table= li++))
|
|
{
|
|
table_map used_tables;
|
|
table_map not_null_tables= (table_map) 0;
|
|
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
/*
|
|
If the element of join_list is a nested join apply
|
|
the procedure to its nested join list first.
|
|
*/
|
|
if (table->on_expr)
|
|
{
|
|
Item *expr= table->on_expr;
|
|
/*
|
|
If an on expression E is attached to the table,
|
|
check all null rejected predicates in this expression.
|
|
If such a predicate over an attribute belonging to
|
|
an inner table of an embedded outer join is found,
|
|
the outer join is converted to an inner join and
|
|
the corresponding on expression is added to E.
|
|
*/
|
|
expr= simplify_joins(join, &nested_join->join_list,
|
|
expr, FALSE, in_sj || table->sj_on_expr);
|
|
|
|
if (!table->prep_on_expr || expr != table->on_expr)
|
|
{
|
|
DBUG_ASSERT(expr);
|
|
|
|
table->on_expr= expr;
|
|
table->prep_on_expr= expr->copy_andor_structure(join->thd);
|
|
}
|
|
}
|
|
nested_join->used_tables= (table_map) 0;
|
|
nested_join->not_null_tables=(table_map) 0;
|
|
conds= simplify_joins(join, &nested_join->join_list, conds, top,
|
|
in_sj || table->sj_on_expr);
|
|
used_tables= nested_join->used_tables;
|
|
not_null_tables= nested_join->not_null_tables;
|
|
/* The following two might become unequal after table elimination: */
|
|
nested_join->n_tables= nested_join->join_list.elements;
|
|
}
|
|
else
|
|
{
|
|
if (!table->prep_on_expr)
|
|
table->prep_on_expr= table->on_expr;
|
|
used_tables= table->get_map();
|
|
if (conds)
|
|
not_null_tables= conds->not_null_tables();
|
|
}
|
|
|
|
if (table->embedding)
|
|
{
|
|
table->embedding->nested_join->used_tables|= used_tables;
|
|
table->embedding->nested_join->not_null_tables|= not_null_tables;
|
|
}
|
|
|
|
if (!(table->outer_join & (JOIN_TYPE_LEFT | JOIN_TYPE_RIGHT)) ||
|
|
(used_tables & not_null_tables))
|
|
{
|
|
/*
|
|
For some of the inner tables there are conjunctive predicates
|
|
that reject nulls => the outer join can be replaced by an inner join.
|
|
*/
|
|
if (table->outer_join && !table->embedding && table->table)
|
|
table->table->maybe_null= FALSE;
|
|
table->outer_join= 0;
|
|
if (!(straight_join || table->straight))
|
|
{
|
|
table->dep_tables= 0;
|
|
TABLE_LIST *embedding= table->embedding;
|
|
while (embedding)
|
|
{
|
|
if (embedding->nested_join->join_list.head()->outer_join)
|
|
{
|
|
if (!embedding->sj_subq_pred)
|
|
table->dep_tables= embedding->dep_tables;
|
|
break;
|
|
}
|
|
embedding= embedding->embedding;
|
|
}
|
|
}
|
|
if (table->on_expr)
|
|
{
|
|
/* Add ON expression to the WHERE or upper-level ON condition. */
|
|
if (conds)
|
|
{
|
|
conds= and_conds(join->thd, conds, table->on_expr);
|
|
conds->top_level_item();
|
|
/* conds is always a new item as both cond and on_expr existed */
|
|
DBUG_ASSERT(!conds->fixed());
|
|
conds->fix_fields(join->thd, &conds);
|
|
}
|
|
else
|
|
conds= table->on_expr;
|
|
table->prep_on_expr= table->on_expr= 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Only inner tables of non-convertible outer joins
|
|
remain with on_expr.
|
|
*/
|
|
if (table->on_expr)
|
|
{
|
|
table_map table_on_expr_used_tables= table->on_expr->used_tables();
|
|
table->dep_tables|= table_on_expr_used_tables;
|
|
if (table->embedding)
|
|
{
|
|
table->dep_tables&= ~table->embedding->nested_join->used_tables;
|
|
/*
|
|
Embedding table depends on tables used
|
|
in embedded on expressions.
|
|
*/
|
|
table->embedding->on_expr_dep_tables|= table_on_expr_used_tables;
|
|
}
|
|
else
|
|
table->dep_tables&= ~table->get_map();
|
|
}
|
|
|
|
if (prev_table)
|
|
{
|
|
/* The order of tables is reverse: prev_table follows table */
|
|
if (prev_table->straight || straight_join)
|
|
prev_table->dep_tables|= used_tables;
|
|
if (prev_table->on_expr)
|
|
{
|
|
prev_table->dep_tables|= table->on_expr_dep_tables;
|
|
table_map prev_used_tables= prev_table->nested_join ?
|
|
prev_table->nested_join->used_tables :
|
|
prev_table->get_map();
|
|
/*
|
|
If on expression contains only references to inner tables
|
|
we still make the inner tables dependent on the outer tables.
|
|
It would be enough to set dependency only on one outer table
|
|
for them. Yet this is really a rare case.
|
|
Note:
|
|
RAND_TABLE_BIT mask should not be counted as it
|
|
prevents update of inner table dependences.
|
|
For example it might happen if RAND() function
|
|
is used in JOIN ON clause.
|
|
*/
|
|
if (!((prev_table->on_expr->used_tables() &
|
|
~(OUTER_REF_TABLE_BIT | RAND_TABLE_BIT)) &
|
|
~prev_used_tables))
|
|
prev_table->dep_tables|= used_tables;
|
|
}
|
|
}
|
|
prev_table= table;
|
|
}
|
|
|
|
/*
|
|
Flatten nested joins that can be flattened.
|
|
no ON expression and not a semi-join => can be flattened.
|
|
*/
|
|
li.rewind();
|
|
while ((table= li++))
|
|
{
|
|
nested_join= table->nested_join;
|
|
if (table->sj_on_expr && !in_sj)
|
|
{
|
|
/*
|
|
If this is a semi-join that is not contained within another semi-join
|
|
leave it intact (otherwise it is flattened)
|
|
*/
|
|
/*
|
|
Make sure that any semi-join appear in
|
|
the join->select_lex->sj_nests list only once
|
|
*/
|
|
List_iterator_fast<TABLE_LIST> sj_it(join->select_lex->sj_nests);
|
|
TABLE_LIST *sj_nest;
|
|
while ((sj_nest= sj_it++))
|
|
{
|
|
if (table == sj_nest)
|
|
break;
|
|
}
|
|
if (sj_nest)
|
|
continue;
|
|
join->select_lex->sj_nests.push_back(table, join->thd->mem_root);
|
|
|
|
/*
|
|
Also, walk through semi-join children and mark those that are now
|
|
top-level
|
|
*/
|
|
TABLE_LIST *tbl;
|
|
List_iterator<TABLE_LIST> it(nested_join->join_list);
|
|
while ((tbl= it++))
|
|
{
|
|
if (!tbl->on_expr && tbl->table)
|
|
tbl->table->maybe_null= FALSE;
|
|
}
|
|
}
|
|
else if (nested_join && !table->on_expr)
|
|
{
|
|
TABLE_LIST *tbl;
|
|
List_iterator<TABLE_LIST> it(nested_join->join_list);
|
|
List<TABLE_LIST> repl_list;
|
|
while ((tbl= it++))
|
|
{
|
|
tbl->embedding= table->embedding;
|
|
if (!tbl->embedding && !tbl->on_expr && tbl->table)
|
|
tbl->table->maybe_null= FALSE;
|
|
tbl->join_list= table->join_list;
|
|
repl_list.push_back(tbl, join->thd->mem_root);
|
|
tbl->dep_tables|= table->dep_tables;
|
|
}
|
|
li.replace(repl_list);
|
|
}
|
|
}
|
|
DBUG_RETURN(conds);
|
|
}
|
|
|
|
|
|
/**
|
|
Assign each nested join structure a bit in nested_join_map.
|
|
|
|
Assign each nested join structure (except ones that embed only one element
|
|
and so are redundant) a bit in nested_join_map.
|
|
|
|
@param join Join being processed
|
|
@param join_list List of tables
|
|
@param first_unused Number of first unused bit in nested_join_map before the
|
|
call
|
|
|
|
@note
|
|
This function is called after simplify_joins(), when there are no
|
|
redundant nested joins, #non_redundant_nested_joins <= #tables_in_join so
|
|
we will not run out of bits in nested_join_map.
|
|
|
|
@return
|
|
First unused bit in nested_join_map after the call.
|
|
*/
|
|
|
|
static uint build_bitmap_for_nested_joins(List<TABLE_LIST> *join_list,
|
|
uint first_unused)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table;
|
|
DBUG_ENTER("build_bitmap_for_nested_joins");
|
|
while ((table= li++))
|
|
{
|
|
NESTED_JOIN *nested_join;
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
/*
|
|
It is guaranteed by simplify_joins() function that a nested join
|
|
that has only one child represents a single table VIEW (and the child
|
|
is an underlying table). We don't assign bits to such nested join
|
|
structures because
|
|
1. it is redundant (a "sequence" of one table cannot be interleaved
|
|
with anything)
|
|
2. we could run out bits in nested_join_map otherwise.
|
|
*/
|
|
if (nested_join->n_tables != 1)
|
|
{
|
|
/* Don't assign bits to sj-nests */
|
|
if (table->on_expr)
|
|
nested_join->nj_map= (nested_join_map) 1 << first_unused++;
|
|
first_unused= build_bitmap_for_nested_joins(&nested_join->join_list,
|
|
first_unused);
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(first_unused);
|
|
}
|
|
|
|
|
|
/**
|
|
Set NESTED_JOIN::counter and n_tables in all nested joins in passed list.
|
|
|
|
For all nested joins contained in the passed join_list (including its
|
|
children), set:
|
|
- nested_join->counter=0
|
|
- nested_join->n_tables= {number of non-degenerate direct children}.
|
|
|
|
Non-degenerate means non-const base table or a join nest that has a
|
|
non-degenerate child.
|
|
|
|
@param join_list List of nested joins to process. It may also contain base
|
|
tables which will be ignored.
|
|
*/
|
|
|
|
static uint reset_nj_counters(JOIN *join, List<TABLE_LIST> *join_list)
|
|
{
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *table;
|
|
DBUG_ENTER("reset_nj_counters");
|
|
uint n=0;
|
|
while ((table= li++))
|
|
{
|
|
NESTED_JOIN *nested_join;
|
|
bool is_eliminated_nest= FALSE;
|
|
if ((nested_join= table->nested_join))
|
|
{
|
|
nested_join->counter= 0;
|
|
nested_join->n_tables= reset_nj_counters(join, &nested_join->join_list);
|
|
if (!nested_join->n_tables)
|
|
is_eliminated_nest= TRUE;
|
|
}
|
|
const table_map removed_tables= join->eliminated_tables |
|
|
join->const_table_map;
|
|
|
|
if ((table->nested_join && !is_eliminated_nest) ||
|
|
(!table->nested_join && (table->table->map & ~removed_tables)))
|
|
n++;
|
|
}
|
|
DBUG_RETURN(n);
|
|
}
|
|
|
|
|
|
/**
|
|
Check interleaving with an inner tables of an outer join for
|
|
extension table.
|
|
|
|
Check if table next_tab can be added to current partial join order, and
|
|
if yes, record that it has been added.
|
|
|
|
The function assumes that both current partial join order and its
|
|
extension with next_tab are valid wrt table dependencies.
|
|
|
|
@verbatim
|
|
IMPLEMENTATION
|
|
LIMITATIONS ON JOIN ORDER
|
|
The nested [outer] joins executioner algorithm imposes these
|
|
limitations on join order:
|
|
1. "Outer tables first" - any "outer" table must be before any
|
|
corresponding "inner" table.
|
|
2. "No interleaving" - tables inside a nested join must form a
|
|
continuous sequence in join order (i.e. the sequence must not be
|
|
interrupted by tables that are outside of this nested join).
|
|
|
|
#1 is checked elsewhere, this function checks #2 provided that #1 has
|
|
been already checked.
|
|
|
|
WHY NEED NON-INTERLEAVING
|
|
Consider an example:
|
|
|
|
select * from t0 join t1 left join (t2 join t3) on cond1
|
|
|
|
The join order "t1 t2 t0 t3" is invalid:
|
|
|
|
table t0 is outside of the nested join, so WHERE condition
|
|
for t0 is attached directly to t0 (without triggers, and it
|
|
may be used to access t0). Applying WHERE(t0) to (t2,t0,t3)
|
|
record is invalid as we may miss combinations of (t1, t2, t3)
|
|
that satisfy condition cond1, and produce a null-complemented
|
|
(t1, t2.NULLs, t3.NULLs) row, which should not have been
|
|
produced.
|
|
|
|
If table t0 is not between t2 and t3, the problem doesn't exist:
|
|
If t0 is located after (t2,t3), WHERE(t0) is applied after nested
|
|
join processing has finished.
|
|
If t0 is located before (t2,t3), predicates like WHERE_cond(t0, t2)
|
|
are wrapped into condition triggers, which takes care of correct
|
|
nested join processing.
|
|
|
|
HOW IT IS IMPLEMENTED
|
|
The limitations on join order can be rephrased as follows: for valid
|
|
join order one must be able to:
|
|
1. write down the used tables in the join order on one line.
|
|
2. for each nested join, put one '(' and one ')' on the said line
|
|
3. write "LEFT JOIN" and "ON (...)" where appropriate
|
|
4. get a query equivalent to the query we're trying to execute.
|
|
|
|
Calls to check_interleaving_with_nj() are equivalent to writing the
|
|
above described line from left to right.
|
|
|
|
A single check_interleaving_with_nj(A,B) call is equivalent
|
|
to writing table B and appropriate brackets on condition that
|
|
table A and appropriate brackets is the last what was
|
|
written. Graphically the transition is as follows:
|
|
|
|
+---- current position
|
|
|
|
|
... last_tab ))) | ( next_tab ) )..) | ...
|
|
X Y Z |
|
|
+- need to move to this
|
|
position.
|
|
|
|
Notes about the position:
|
|
The caller guarantees that there is no more then one X-bracket by
|
|
checking "!(remaining_tables & s->dependent)" before calling this
|
|
function. X-bracket may have a pair in Y-bracket.
|
|
|
|
When "writing" we store/update this auxilary info about the current
|
|
position:
|
|
1. join->cur_embedding_map - bitmap of pairs of brackets (aka nested
|
|
joins) we've opened but didn't close.
|
|
2. {each NESTED_JOIN structure not simplified away}->counter - number
|
|
of this nested join's children that have already been added to to
|
|
the partial join order.
|
|
@endverbatim
|
|
|
|
@param next_tab Table we're going to extend the current partial join with
|
|
|
|
@retval
|
|
FALSE Join order extended, nested joins info about current join
|
|
order (see NOTE section) updated.
|
|
@retval
|
|
TRUE Requested join order extension not allowed.
|
|
*/
|
|
|
|
static bool check_interleaving_with_nj(JOIN_TAB *next_tab)
|
|
{
|
|
JOIN *join= next_tab->join;
|
|
|
|
if (join->cur_embedding_map & ~next_tab->embedding_map)
|
|
{
|
|
/*
|
|
next_tab is outside of the "pair of brackets" we're currently in.
|
|
Cannot add it.
|
|
*/
|
|
return TRUE;
|
|
}
|
|
|
|
TABLE_LIST *next_emb= next_tab->table->pos_in_table_list->embedding;
|
|
/*
|
|
Do update counters for "pairs of brackets" that we've left (marked as
|
|
X,Y,Z in the above picture)
|
|
*/
|
|
for (;next_emb && next_emb != join->emb_sjm_nest;
|
|
next_emb= next_emb->embedding)
|
|
{
|
|
if (!next_emb->sj_on_expr)
|
|
{
|
|
next_emb->nested_join->counter++;
|
|
if (next_emb->nested_join->counter == 1)
|
|
{
|
|
/*
|
|
next_emb is the first table inside a nested join we've "entered". In
|
|
the picture above, we're looking at the 'X' bracket. Don't exit yet
|
|
as X bracket might have Y pair bracket.
|
|
*/
|
|
join->cur_embedding_map |= next_emb->nested_join->nj_map;
|
|
}
|
|
|
|
DBUG_ASSERT(next_emb->nested_join->n_tables >=
|
|
next_emb->nested_join->counter);
|
|
|
|
if (next_emb->nested_join->n_tables !=
|
|
next_emb->nested_join->counter)
|
|
break;
|
|
/*
|
|
We're currently at Y or Z-bracket as depicted in the above picture.
|
|
Mark that we've left it and continue walking up the brackets hierarchy.
|
|
*/
|
|
join->cur_embedding_map &= ~next_emb->nested_join->nj_map;
|
|
}
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Nested joins perspective: Remove the last table from the join order.
|
|
|
|
The algorithm is the reciprocal of check_interleaving_with_nj(), hence
|
|
parent join nest nodes are updated only when the last table in its child
|
|
node is removed. The ASCII graphic below will clarify.
|
|
|
|
%A table nesting such as <tt> t1 x [ ( t2 x t3 ) x ( t4 x t5 ) ] </tt>is
|
|
represented by the below join nest tree.
|
|
|
|
@verbatim
|
|
NJ1
|
|
_/ / \
|
|
_/ / NJ2
|
|
_/ / / \
|
|
/ / / \
|
|
t1 x [ (t2 x t3) x (t4 x t5) ]
|
|
@endverbatim
|
|
|
|
At the point in time when check_interleaving_with_nj() adds the table t5 to
|
|
the query execution plan, QEP, it also directs the node named NJ2 to mark
|
|
the table as covered. NJ2 does so by incrementing its @c counter
|
|
member. Since all of NJ2's tables are now covered by the QEP, the algorithm
|
|
proceeds up the tree to NJ1, incrementing its counter as well. All join
|
|
nests are now completely covered by the QEP.
|
|
|
|
restore_prev_nj_state() does the above in reverse. As seen above, the node
|
|
NJ1 contains the nodes t2, t3, and NJ2. Its counter being equal to 3 means
|
|
that the plan covers t2, t3, and NJ2, @e and that the sub-plan (t4 x t5)
|
|
completely covers NJ2. The removal of t5 from the partial plan will first
|
|
decrement NJ2's counter to 1. It will then detect that NJ2 went from being
|
|
completely to partially covered, and hence the algorithm must continue
|
|
upwards to NJ1 and decrement its counter to 2. %A subsequent removal of t4
|
|
will however not influence NJ1 since it did not un-cover the last table in
|
|
NJ2.
|
|
|
|
SYNOPSIS
|
|
restore_prev_nj_state()
|
|
last join table to remove, it is assumed to be the last in current
|
|
partial join order.
|
|
|
|
DESCRIPTION
|
|
|
|
Remove the last table from the partial join order and update the nested
|
|
joins counters and join->cur_embedding_map. It is ok to call this
|
|
function for the first table in join order (for which
|
|
check_interleaving_with_nj has not been called)
|
|
|
|
@param last join table to remove, it is assumed to be the last in current
|
|
partial join order.
|
|
*/
|
|
|
|
static void restore_prev_nj_state(JOIN_TAB *last)
|
|
{
|
|
TABLE_LIST *last_emb= last->table->pos_in_table_list->embedding;
|
|
JOIN *join= last->join;
|
|
for (;last_emb != NULL && last_emb != join->emb_sjm_nest;
|
|
last_emb= last_emb->embedding)
|
|
{
|
|
if (!last_emb->sj_on_expr)
|
|
{
|
|
NESTED_JOIN *nest= last_emb->nested_join;
|
|
DBUG_ASSERT(nest->counter > 0);
|
|
|
|
bool was_fully_covered= nest->is_fully_covered();
|
|
|
|
join->cur_embedding_map|= nest->nj_map;
|
|
|
|
if (--nest->counter == 0)
|
|
join->cur_embedding_map&= ~nest->nj_map;
|
|
|
|
if (!was_fully_covered)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Compute allowed_top_level_tables - a bitmap of tables one can put into the
|
|
join order if the last table in the join prefix is not inside any outer
|
|
join nest.
|
|
|
|
NESTED_JOIN::direct_children_map - a bitmap of tables ... if the last
|
|
table in the join prefix is inside the join nest.
|
|
|
|
Note: it looks like a sensible way to do this is a top-down descent on
|
|
JOIN::join_list, but apparently that list is missing I_S tables.
|
|
e.g. for SHOW TABLES WHERE col IN (SELECT ...) it will just have a
|
|
semi-join nest.
|
|
*/
|
|
|
|
void JOIN::calc_allowed_top_level_tables(SELECT_LEX *lex)
|
|
{
|
|
TABLE_LIST *tl;
|
|
List_iterator<TABLE_LIST> ti(lex->leaf_tables);
|
|
DBUG_ENTER("JOIN::calc_allowed_top_level_tables");
|
|
DBUG_ASSERT(allowed_top_level_tables == 0); // Should only be called once
|
|
|
|
while ((tl= ti++))
|
|
{
|
|
table_map map;
|
|
TABLE_LIST *embedding= tl->embedding;
|
|
|
|
if (tl->table)
|
|
map= tl->table->map;
|
|
else
|
|
{
|
|
DBUG_ASSERT(tl->jtbm_subselect);
|
|
map= table_map(1) << tl->jtbm_table_no;
|
|
}
|
|
|
|
if (!(embedding= tl->embedding))
|
|
{
|
|
allowed_top_level_tables |= map;
|
|
continue;
|
|
}
|
|
|
|
// Walk out of any semi-join nests
|
|
while (embedding && !embedding->on_expr)
|
|
{
|
|
// semi-join nest or an INSERT-INTO view...
|
|
embedding->nested_join->direct_children_map |= map;
|
|
embedding= embedding->embedding;
|
|
}
|
|
|
|
// Ok we are in the parent nested outer join nest.
|
|
if (!embedding)
|
|
{
|
|
allowed_top_level_tables |= map;
|
|
continue;
|
|
}
|
|
embedding->nested_join->direct_children_map |= map;
|
|
|
|
// Walk to grand-parent join nest.
|
|
embedding= embedding->embedding;
|
|
|
|
// Walk out of any semi-join nests
|
|
while (embedding && !embedding->on_expr)
|
|
{
|
|
DBUG_ASSERT(embedding->sj_on_expr);
|
|
embedding->nested_join->direct_children_map |= map;
|
|
embedding= embedding->embedding;
|
|
}
|
|
|
|
if (embedding)
|
|
{
|
|
DBUG_ASSERT(embedding->on_expr); // Impossible, see above
|
|
embedding->nested_join->direct_children_map |= map;
|
|
}
|
|
else
|
|
allowed_top_level_tables |= map;
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/*
|
|
Get the tables that one is allowed to have as the next table in the
|
|
current plan
|
|
*/
|
|
|
|
table_map JOIN::get_allowed_nj_tables(uint idx)
|
|
{
|
|
TABLE_LIST *last_emb;
|
|
if (idx > const_tables &&
|
|
(last_emb= positions[idx-1].table->table->pos_in_table_list->embedding))
|
|
{
|
|
for (;last_emb && last_emb != emb_sjm_nest;
|
|
last_emb= last_emb->embedding)
|
|
{
|
|
if (!last_emb->sj_on_expr)
|
|
{
|
|
NESTED_JOIN *nest= last_emb->nested_join;
|
|
if (!nest->is_fully_covered())
|
|
{
|
|
// Return tables that are direct members of this join nest
|
|
return nest->direct_children_map;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// Return bitmap of tables not in any join nest
|
|
if (emb_sjm_nest)
|
|
return emb_sjm_nest->nested_join->direct_children_map;
|
|
return allowed_top_level_tables;
|
|
}
|
|
|
|
|
|
/*
|
|
Change access methods not to use join buffering and adjust costs accordingly
|
|
|
|
SYNOPSIS
|
|
optimize_wo_join_buffering()
|
|
join
|
|
first_tab The first tab to do re-optimization for
|
|
last_tab The last tab to do re-optimization for
|
|
last_remaining_tables Bitmap of tables that are not in the
|
|
[0...last_tab] join prefix
|
|
first_alt TRUE <=> Use the LooseScan plan for the first_tab
|
|
no_jbuf_before Don't allow to use join buffering before this
|
|
table
|
|
outer_rec_count OUT New output record count
|
|
reopt_cost OUT New join prefix cost
|
|
|
|
DESCRIPTION
|
|
Given a join prefix [0; ... first_tab], change the access to the tables
|
|
in the [first_tab; last_tab] not to use join buffering. This is needed
|
|
because some semi-join strategies cannot be used together with the join
|
|
buffering.
|
|
In general case the best table order in [first_tab; last_tab] range with
|
|
join buffering is different from the best order without join buffering but
|
|
we don't try finding a better join order. (TODO ask Igor why did we
|
|
chose not to do this in the end. that's actually the difference from the
|
|
forking approach)
|
|
*/
|
|
|
|
void optimize_wo_join_buffering(JOIN *join, uint first_tab, uint last_tab,
|
|
table_map last_remaining_tables,
|
|
bool first_alt, uint no_jbuf_before,
|
|
double *outer_rec_count, double *reopt_cost)
|
|
{
|
|
double cost, rec_count;
|
|
table_map reopt_remaining_tables= last_remaining_tables;
|
|
uint i;
|
|
THD *thd= join->thd;
|
|
Json_writer_temp_disable trace_wo_join_buffering(thd);
|
|
|
|
if (first_tab > join->const_tables)
|
|
{
|
|
cost= join->positions[first_tab - 1].prefix_cost;
|
|
rec_count= join->positions[first_tab - 1].prefix_record_count;
|
|
}
|
|
else
|
|
{
|
|
cost= 0.0;
|
|
rec_count= 1;
|
|
}
|
|
|
|
*outer_rec_count= rec_count;
|
|
for (i= first_tab; i <= last_tab; i++)
|
|
reopt_remaining_tables |= join->positions[i].table->table->map;
|
|
|
|
/*
|
|
best_access_path() optimization depends on the value of
|
|
join->cur_sj_inner_tables. Our goal in this function is to do a
|
|
re-optimization with disabled join buffering, but no other changes.
|
|
In order to achieve this, cur_sj_inner_tables needs have the same
|
|
value it had during the original invocations of best_access_path.
|
|
|
|
We know that this function, optimize_wo_join_buffering() is called to
|
|
re-optimize semi-join join order range, which allows to conclude that
|
|
the "original" value of cur_sj_inner_tables was 0.
|
|
*/
|
|
table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables;
|
|
join->cur_sj_inner_tables= 0;
|
|
|
|
double inner_fanout= 1.0;
|
|
|
|
for (i= first_tab; i <= last_tab; i++)
|
|
{
|
|
JOIN_TAB *rs= join->positions[i].table;
|
|
POSITION pos, loose_scan_pos;
|
|
|
|
if ((i == first_tab && first_alt) || join->positions[i].use_join_buffer)
|
|
{
|
|
/* Find the best access method that would not use join buffering */
|
|
best_access_path(join, rs, reopt_remaining_tables,
|
|
join->positions, i,
|
|
TRUE, rec_count,
|
|
&pos, &loose_scan_pos);
|
|
if ((i == first_tab && first_alt))
|
|
pos= loose_scan_pos;
|
|
}
|
|
else
|
|
pos= join->positions[i];
|
|
|
|
reopt_remaining_tables &= ~rs->table->map;
|
|
cost= COST_ADD(cost, pos.read_time);
|
|
|
|
double records_out= pos.records_out;
|
|
/*
|
|
The (i != last_tab) is here to mimic what
|
|
best_extension_by_limited_search() does: do not call
|
|
table_after_join_selectivity() for the join_tab where the semi-join
|
|
strategy is applied
|
|
*/
|
|
if (i != last_tab &&
|
|
join->thd->variables.optimizer_use_condition_selectivity > 1)
|
|
{
|
|
table_map real_table_bit= rs->table->map;
|
|
double __attribute__((unused)) pushdown_cond_selectivity;
|
|
pushdown_cond_selectivity=
|
|
table_after_join_selectivity(join, i, rs,
|
|
reopt_remaining_tables &
|
|
~real_table_bit, &records_out);
|
|
join->positions[i].partial_join_cardinality= rec_count * pushdown_cond_selectivity;
|
|
}
|
|
else
|
|
join->positions[i].partial_join_cardinality= COST_MULT(rec_count, records_out);
|
|
|
|
rec_count= COST_MULT(rec_count, records_out);
|
|
*outer_rec_count= COST_MULT(*outer_rec_count, records_out);
|
|
|
|
if (rs->emb_sj_nest)
|
|
inner_fanout= COST_MULT(inner_fanout, records_out);
|
|
}
|
|
|
|
/* Discount the fanout produced by the subquery */
|
|
if (inner_fanout > 1.0)
|
|
*outer_rec_count /= inner_fanout;
|
|
|
|
join->cur_sj_inner_tables= save_cur_sj_inner_tables;
|
|
|
|
*reopt_cost= cost;
|
|
if (rec_count < *outer_rec_count)
|
|
{
|
|
/*
|
|
The tables inside the subquery produce smaller fanout than outer tables.
|
|
This can happen in edge cases.
|
|
*/
|
|
*outer_rec_count= rec_count;
|
|
}
|
|
}
|
|
|
|
|
|
static COND *
|
|
optimize_cond(JOIN *join, COND *conds,
|
|
List<TABLE_LIST> *join_list, bool ignore_on_conds,
|
|
Item::cond_result *cond_value, COND_EQUAL **cond_equal,
|
|
int flags)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("optimize_cond");
|
|
|
|
if (!conds)
|
|
{
|
|
*cond_value= Item::COND_TRUE;
|
|
if (!ignore_on_conds)
|
|
build_equal_items(join, NULL, NULL, join_list, ignore_on_conds,
|
|
cond_equal);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Build all multiple equality predicates and eliminate equality
|
|
predicates that can be inferred from these multiple equalities.
|
|
For each reference of a field included into a multiple equality
|
|
that occurs in a function set a pointer to the multiple equality
|
|
predicate. Substitute a constant instead of this field if the
|
|
multiple equality contains a constant.
|
|
*/
|
|
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_cond(thd, "condition_processing");
|
|
|
|
if (unlikely(trace_cond.trace_started()))
|
|
trace_cond.
|
|
add("condition", join->conds == conds ? "WHERE" : "HAVING").
|
|
add("original_condition", conds);
|
|
|
|
Json_writer_array trace_steps(thd, "steps");
|
|
DBUG_EXECUTE("where", print_where(conds, "original", QT_ORDINARY););
|
|
conds= build_equal_items(join, conds, NULL, join_list,
|
|
ignore_on_conds, cond_equal,
|
|
MY_TEST(flags & OPT_LINK_EQUAL_FIELDS));
|
|
DBUG_EXECUTE("where",print_where(conds,"after equal_items", QT_ORDINARY););
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object equal_prop_wrapper(thd);
|
|
equal_prop_wrapper.
|
|
add("transformation", "equality_propagation").
|
|
add("resulting_condition", conds);
|
|
}
|
|
|
|
/* change field = field to field = const for each found field = const */
|
|
propagate_cond_constants(thd, (I_List<COND_CMP> *) 0, conds, conds);
|
|
/*
|
|
Remove all instances of item == item
|
|
Remove all and-levels where CONST item != CONST item
|
|
*/
|
|
DBUG_EXECUTE("where",print_where(conds,"after const change", QT_ORDINARY););
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object const_prop_wrapper(thd);
|
|
const_prop_wrapper.
|
|
add("transformation", "constant_propagation").
|
|
add("resulting_condition", conds);
|
|
}
|
|
conds= conds->remove_eq_conds(thd, cond_value, true);
|
|
if (conds && conds->type() == Item::COND_ITEM &&
|
|
((Item_cond*) conds)->functype() == Item_func::COND_AND_FUNC)
|
|
*cond_equal= &((Item_cond_and*) conds)->m_cond_equal;
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
Json_writer_object cond_removal_wrapper(thd);
|
|
cond_removal_wrapper.
|
|
add("transformation", "trivial_condition_removal").
|
|
add("resulting_condition", conds);
|
|
}
|
|
DBUG_EXECUTE("info",print_where(conds,"after remove", QT_ORDINARY););
|
|
}
|
|
DBUG_RETURN(conds);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Propagate multiple equalities to the sub-expressions of a condition
|
|
|
|
@param thd thread handle
|
|
@param cond the condition where equalities are to be propagated
|
|
@param *new_equalities the multiple equalities to be propagated
|
|
@param inherited path to all inherited multiple equality items
|
|
@param[out] is_simplifiable_cond 'cond' may be simplified after the
|
|
propagation of the equalities
|
|
|
|
@details
|
|
The function recursively traverses the tree of the condition 'cond' and
|
|
for each its AND sub-level of any depth the function merges the multiple
|
|
equalities from the list 'new_equalities' into the multiple equalities
|
|
attached to the AND item created for this sub-level.
|
|
The function also [re]sets references to the equalities formed by the
|
|
merges of multiple equalities in all field items occurred in 'cond'
|
|
that are encountered in the equalities.
|
|
If the result of any merge of multiple equalities is an impossible
|
|
condition the function returns TRUE in the parameter is_simplifiable_cond.
|
|
*/
|
|
|
|
void propagate_new_equalities(THD *thd, Item *cond,
|
|
List<Item_equal> *new_equalities,
|
|
COND_EQUAL *inherited,
|
|
bool *is_simplifiable_cond)
|
|
{
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= ((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC;
|
|
if (and_level)
|
|
{
|
|
Item_cond_and *cond_and= (Item_cond_and *) cond;
|
|
List<Item_equal> *cond_equalities= &cond_and->m_cond_equal.current_level;
|
|
cond_and->m_cond_equal.upper_levels= inherited;
|
|
if (!cond_equalities->is_empty() && cond_equalities != new_equalities)
|
|
{
|
|
Item_equal *equal_item;
|
|
List_iterator<Item_equal> it(*new_equalities);
|
|
while ((equal_item= it++))
|
|
{
|
|
equal_item->merge_into_list(thd, cond_equalities, true, true);
|
|
}
|
|
List_iterator<Item_equal> ei(*cond_equalities);
|
|
while ((equal_item= ei++))
|
|
{
|
|
if (equal_item->const_item() && !equal_item->val_int())
|
|
{
|
|
*is_simplifiable_cond= true;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Item *item;
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
while ((item= li++))
|
|
{
|
|
COND_EQUAL *new_inherited= and_level && item->type() == Item::COND_ITEM ?
|
|
&((Item_cond_and *) cond)->m_cond_equal :
|
|
inherited;
|
|
propagate_new_equalities(thd, item, new_equalities, new_inherited,
|
|
is_simplifiable_cond);
|
|
}
|
|
}
|
|
else if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
Item_equal *equal_item;
|
|
List_iterator<Item_equal> it(*new_equalities);
|
|
Item_equal *equality= (Item_equal *) cond;
|
|
equality->upper_levels= inherited;
|
|
while ((equal_item= it++))
|
|
{
|
|
equality->merge_with_check(thd, equal_item, true);
|
|
}
|
|
if (equality->const_item() && !equality->val_int())
|
|
*is_simplifiable_cond= true;
|
|
}
|
|
else
|
|
{
|
|
cond= cond->propagate_equal_fields(thd,
|
|
Item::Context_boolean(), inherited);
|
|
cond->update_used_tables();
|
|
}
|
|
}
|
|
|
|
/*
|
|
Check if cond_is_datetime_is_null() is true for the condition cond, or
|
|
for any of its AND/OR-children
|
|
*/
|
|
bool cond_has_datetime_is_null(Item *cond)
|
|
{
|
|
if (cond_is_datetime_is_null(cond))
|
|
return true;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
List<Item> *cond_arg_list= ((Item_cond*) cond)->argument_list();
|
|
List_iterator<Item> li(*cond_arg_list);
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (cond_has_datetime_is_null(item))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Check if passed condtition has for of
|
|
|
|
not_null_date_col IS NULL
|
|
|
|
where not_null_date_col has a datte or datetime type
|
|
*/
|
|
|
|
bool cond_is_datetime_is_null(Item *cond)
|
|
{
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::ISNULL_FUNC)
|
|
{
|
|
return ((Item_func_isnull*) cond)->arg_is_datetime_notnull_field();
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Evaluate all constant boolean sub-expressions in a condition
|
|
|
|
@param thd thread handle
|
|
@param cond condition where where to evaluate constant sub-expressions
|
|
@param[out] cond_value : the returned value of the condition
|
|
(TRUE/FALSE/UNKNOWN:
|
|
Item::COND_TRUE/Item::COND_FALSE/Item::COND_OK)
|
|
@return
|
|
the item that is the result of the substitution of all inexpensive constant
|
|
boolean sub-expressions into cond, or,
|
|
NULL if the condition is constant and is evaluated to FALSE.
|
|
|
|
@details
|
|
This function looks for all inexpensive constant boolean sub-expressions in
|
|
the given condition 'cond' and substitutes them for their values.
|
|
For example, the condition 2 > (5 + 1) or a < (10 / 2)
|
|
will be transformed to the condition a < (10 / 2).
|
|
Note that a constant sub-expression is evaluated only if it is constant and
|
|
inexpensive. A sub-expression with an uncorrelated subquery may be evaluated
|
|
only if the subquery is considered as inexpensive.
|
|
The function does not evaluate a constant sub-expression if it is not on one
|
|
of AND/OR levels of the condition 'cond'. For example, the subquery in the
|
|
condition a > (select max(b) from t1 where b > 5) will never be evaluated
|
|
by this function.
|
|
If a constant boolean sub-expression is evaluated to TRUE then:
|
|
- when the sub-expression is a conjunct of an AND formula it is simply
|
|
removed from this formula
|
|
- when the sub-expression is a disjunct of an OR formula the whole OR
|
|
formula is converted to TRUE
|
|
If a constant boolean sub-expression is evaluated to FALSE then:
|
|
- when the sub-expression is a disjunct of an OR formula it is simply
|
|
removed from this formula
|
|
- when the sub-expression is a conjuct of an AND formula the whole AND
|
|
formula is converted to FALSE
|
|
When a disjunct/conjunct is removed from an OR/AND formula it might happen
|
|
that there is only one conjunct/disjunct remaining. In this case this
|
|
remaining disjunct/conjunct must be merged into underlying AND/OR formula,
|
|
because AND/OR levels must alternate in the same way as they alternate
|
|
after fix_fields() is called for the original condition.
|
|
The specifics of merging a formula f into an AND formula A appears
|
|
when A contains multiple equalities and f contains multiple equalities.
|
|
In this case the multiple equalities from f and A have to be merged.
|
|
After this the resulting multiple equalities have to be propagated into
|
|
the all AND/OR levels of the formula A (see propagate_new_equalities()).
|
|
The propagation of multiple equalities might result in forming multiple
|
|
equalities that are always FALSE. This, in its turn, might trigger further
|
|
simplification of the condition.
|
|
|
|
@note
|
|
EXAMPLE 1:
|
|
SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5 OR 1 != 1);
|
|
First 1 != 1 will be removed from the second conjunct:
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5 AND a = 5);
|
|
Then (b = 5 AND a = 5) will be merged into the top level condition:
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 1) AND (b = 5) AND (a = 5);
|
|
Then (b = 5), (a = 5) will be propagated into the disjuncs of
|
|
(b = 1 OR a = 1):
|
|
=> SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
|
|
(a = 1) AND (b = 5) AND (a = 5)) AND
|
|
(b = 5) AND (a = 5)
|
|
=> SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
|
|
(FALSE AND (b = 5))) AND
|
|
(b = 5) AND (a = 5)
|
|
After this an additional call of remove_eq_conds() converts it
|
|
to FALSE
|
|
|
|
EXAMPLE 2:
|
|
SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5 OR 1 != 1);
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5 AND a = 5);
|
|
=> SELECT * FROM t1 WHERE (b = 1 OR a = 5) AND (b = 5) AND (a = 5);
|
|
=> SELECT * FROM t1 WHERE ((b = 1) AND (b = 5) AND (a = 5) OR
|
|
(a = 5) AND (b = 5) AND (a = 5)) AND
|
|
(b = 5) AND (a = 5)
|
|
=> SELECT * FROM t1 WHERE ((FALSE AND (a = 5)) OR
|
|
((b = 5) AND (a = 5))) AND
|
|
(b = 5) AND (a = 5)
|
|
After this an additional call of remove_eq_conds() converts it to
|
|
=> SELECT * FROM t1 WHERE (b = 5) AND (a = 5)
|
|
*/
|
|
|
|
|
|
COND *
|
|
Item_cond::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
bool and_level= functype() == Item_func::COND_AND_FUNC;
|
|
List<Item> *cond_arg_list= argument_list();
|
|
|
|
if (check_stack_overrun(thd, STACK_MIN_SIZE, NULL))
|
|
{
|
|
*cond_value= Item::COND_FALSE;
|
|
return (COND*) 0; // Fatal error flag is set!
|
|
}
|
|
|
|
if (and_level)
|
|
{
|
|
/*
|
|
Remove multiple equalities that became always true (e.g. after
|
|
constant row substitution).
|
|
They would be removed later in the function anyway, but the list of
|
|
them cond_equal.current_level also must be adjusted correspondingly.
|
|
So it's easier to do it at one pass through the list of the equalities.
|
|
*/
|
|
List<Item_equal> *cond_equalities=
|
|
&((Item_cond_and *) this)->m_cond_equal.current_level;
|
|
cond_arg_list->disjoin((List<Item> *) cond_equalities);
|
|
List_iterator<Item_equal> it(*cond_equalities);
|
|
Item_equal *eq_item;
|
|
while ((eq_item= it++))
|
|
{
|
|
if (eq_item->const_item() && eq_item->val_int())
|
|
it.remove();
|
|
}
|
|
cond_arg_list->append((List<Item> *) cond_equalities);
|
|
}
|
|
|
|
List<Item_equal> new_equalities;
|
|
List_iterator<Item> li(*cond_arg_list);
|
|
bool should_fix_fields= 0;
|
|
Item::cond_result tmp_cond_value;
|
|
Item *item;
|
|
|
|
/*
|
|
If the list cond_arg_list became empty then it consisted only
|
|
of always true multiple equalities.
|
|
*/
|
|
*cond_value= cond_arg_list->elements ? Item::COND_UNDEF : Item::COND_TRUE;
|
|
|
|
while ((item=li++))
|
|
{
|
|
Item *new_item= item->remove_eq_conds(thd, &tmp_cond_value, false);
|
|
if (!new_item)
|
|
{
|
|
/* This can happen only when item is converted to TRUE or FALSE */
|
|
li.remove();
|
|
}
|
|
else if (item != new_item)
|
|
{
|
|
/*
|
|
This can happen when:
|
|
- item was an OR formula converted to one disjunct
|
|
- item was an AND formula converted to one conjunct
|
|
In these cases the disjunct/conjunct must be merged into the
|
|
argument list of cond.
|
|
*/
|
|
if (new_item->type() == Item::COND_ITEM &&
|
|
item->type() == Item::COND_ITEM)
|
|
{
|
|
DBUG_ASSERT(functype() == ((Item_cond *) new_item)->functype());
|
|
List<Item> *new_item_arg_list=
|
|
((Item_cond *) new_item)->argument_list();
|
|
if (and_level)
|
|
{
|
|
/*
|
|
If new_item is an AND formula then multiple equalities
|
|
of new_item_arg_list must merged into multiple equalities
|
|
of cond_arg_list.
|
|
*/
|
|
List<Item_equal> *new_item_equalities=
|
|
&((Item_cond_and *) new_item)->m_cond_equal.current_level;
|
|
if (!new_item_equalities->is_empty())
|
|
{
|
|
/*
|
|
Cut the multiple equalities from the new_item_arg_list and
|
|
append them on the list new_equalities. Later the equalities
|
|
from this list will be merged into the multiple equalities
|
|
of cond_arg_list all together.
|
|
*/
|
|
new_item_arg_list->disjoin((List<Item> *) new_item_equalities);
|
|
new_equalities.append(new_item_equalities);
|
|
}
|
|
}
|
|
if (new_item_arg_list->is_empty())
|
|
li.remove();
|
|
else
|
|
{
|
|
uint cnt= new_item_arg_list->elements;
|
|
li.replace(*new_item_arg_list);
|
|
/* Make iterator li ignore new items */
|
|
for (cnt--; cnt; cnt--)
|
|
li++;
|
|
should_fix_fields= 1;
|
|
}
|
|
}
|
|
else if (and_level &&
|
|
new_item->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) new_item)->functype() ==
|
|
Item_func::MULT_EQUAL_FUNC)
|
|
{
|
|
li.remove();
|
|
new_equalities.push_back((Item_equal *) new_item, thd->mem_root);
|
|
}
|
|
else
|
|
{
|
|
if (new_item->type() == Item::COND_ITEM &&
|
|
((Item_cond*) new_item)->functype() == functype())
|
|
{
|
|
List<Item> *new_item_arg_list=
|
|
((Item_cond *) new_item)->argument_list();
|
|
uint cnt= new_item_arg_list->elements;
|
|
li.replace(*new_item_arg_list);
|
|
/* Make iterator li ignore new items */
|
|
for (cnt--; cnt; cnt--)
|
|
li++;
|
|
}
|
|
else
|
|
li.replace(new_item);
|
|
should_fix_fields= 1;
|
|
}
|
|
}
|
|
if (*cond_value == Item::COND_UNDEF)
|
|
*cond_value= tmp_cond_value;
|
|
switch (tmp_cond_value) {
|
|
case Item::COND_OK: // Not TRUE or FALSE
|
|
if (and_level || *cond_value == Item::COND_FALSE)
|
|
*cond_value=tmp_cond_value;
|
|
break;
|
|
case Item::COND_FALSE:
|
|
if (and_level)
|
|
{
|
|
*cond_value= tmp_cond_value;
|
|
return (COND*) 0; // Always false
|
|
}
|
|
break;
|
|
case Item::COND_TRUE:
|
|
if (!and_level)
|
|
{
|
|
*cond_value= tmp_cond_value;
|
|
return (COND*) 0; // Always true
|
|
}
|
|
break;
|
|
case Item::COND_UNDEF: // Impossible
|
|
break; /* purecov: deadcode */
|
|
}
|
|
}
|
|
COND *cond= this;
|
|
if (!new_equalities.is_empty())
|
|
{
|
|
DBUG_ASSERT(and_level);
|
|
/*
|
|
Merge multiple equalities that were cut from the results of
|
|
simplification of OR formulas converted into AND formulas.
|
|
These multiple equalities are to be merged into the
|
|
multiple equalities of cond_arg_list.
|
|
*/
|
|
COND_EQUAL *cond_equal= &((Item_cond_and *) this)->m_cond_equal;
|
|
List<Item_equal> *cond_equalities= &cond_equal->current_level;
|
|
cond_arg_list->disjoin((List<Item> *) cond_equalities);
|
|
Item_equal *equality;
|
|
List_iterator_fast<Item_equal> it(new_equalities);
|
|
while ((equality= it++))
|
|
{
|
|
equality->upper_levels= cond_equal->upper_levels;
|
|
equality->merge_into_list(thd, cond_equalities, false, false);
|
|
List_iterator_fast<Item_equal> ei(*cond_equalities);
|
|
while ((equality= ei++))
|
|
{
|
|
if (equality->const_item() && !equality->val_int())
|
|
{
|
|
*cond_value= Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
}
|
|
cond_arg_list->append((List<Item> *) cond_equalities);
|
|
/*
|
|
Propagate the newly formed multiple equalities to
|
|
the all AND/OR levels of cond
|
|
*/
|
|
bool is_simplifiable_cond= false;
|
|
propagate_new_equalities(thd, this, cond_equalities,
|
|
cond_equal->upper_levels,
|
|
&is_simplifiable_cond);
|
|
/*
|
|
If the above propagation of multiple equalities brings us
|
|
to multiple equalities that are always FALSE then try to
|
|
simplify the condition with remove_eq_cond() again.
|
|
*/
|
|
if (is_simplifiable_cond)
|
|
{
|
|
if (!(cond= cond->remove_eq_conds(thd, cond_value, false)))
|
|
return cond;
|
|
}
|
|
should_fix_fields= 1;
|
|
}
|
|
if (should_fix_fields)
|
|
cond->update_used_tables();
|
|
|
|
if (!((Item_cond*) cond)->argument_list()->elements ||
|
|
*cond_value != Item::COND_OK)
|
|
return (COND*) 0;
|
|
if (((Item_cond*) cond)->argument_list()->elements == 1)
|
|
{ // Remove list
|
|
item= ((Item_cond*) cond)->argument_list()->head();
|
|
((Item_cond*) cond)->argument_list()->empty();
|
|
return item;
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return cond;
|
|
}
|
|
|
|
|
|
COND *
|
|
Item::remove_eq_conds(THD *thd, Item::cond_result *cond_value, bool top_level_arg)
|
|
{
|
|
if (can_eval_in_optimize())
|
|
{
|
|
*cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return this; // Point at next and level
|
|
}
|
|
|
|
|
|
COND *
|
|
Item_bool_func2::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
if (can_eval_in_optimize())
|
|
{
|
|
*cond_value= eval_const_cond() ? Item::COND_TRUE : Item::COND_FALSE;
|
|
return (COND*) 0;
|
|
}
|
|
if ((*cond_value= eq_cmp_result()) != Item::COND_OK)
|
|
{
|
|
if (args[0]->eq(args[1], true))
|
|
{
|
|
if (*cond_value == Item::COND_FALSE ||
|
|
!args[0]->maybe_null() || functype() == Item_func::EQUAL_FUNC)
|
|
return (COND*) 0; // Compare of identical items
|
|
}
|
|
}
|
|
*cond_value= Item::COND_OK;
|
|
return this; // Point at next and level
|
|
}
|
|
|
|
|
|
/**
|
|
Remove const and eq items. Return new item, or NULL if no condition
|
|
cond_value is set to according:
|
|
COND_OK query is possible (field = constant)
|
|
COND_TRUE always true ( 1 = 1 )
|
|
COND_FALSE always false ( 1 = 2 )
|
|
|
|
SYNPOSIS
|
|
remove_eq_conds()
|
|
thd THD environment
|
|
cond the condition to handle
|
|
cond_value the resulting value of the condition
|
|
|
|
NOTES
|
|
calls the inner_remove_eq_conds to check all the tree reqursively
|
|
|
|
RETURN
|
|
*COND with the simplified condition
|
|
*/
|
|
|
|
COND *
|
|
Item_func_isnull::remove_eq_conds(THD *thd, Item::cond_result *cond_value,
|
|
bool top_level_arg)
|
|
{
|
|
Item *real_item= args[0]->real_item();
|
|
if (real_item->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *field= ((Item_field*) real_item)->field;
|
|
|
|
if ((field->flags & NOT_NULL_FLAG) &&
|
|
field->type_handler()->cond_notnull_field_isnull_to_field_eq_zero())
|
|
{
|
|
/* fix to replace 'NULL' dates with '0' (shreeve@uci.edu) */
|
|
/*
|
|
See BUG#12594011
|
|
Documentation says that
|
|
SELECT datetime_notnull d FROM t1 WHERE d IS NULL
|
|
shall return rows where d=='0000-00-00'
|
|
|
|
Thus, for DATE and DATETIME columns defined as NOT NULL,
|
|
"date_notnull IS NULL" has to be modified to
|
|
"date_notnull IS NULL OR date_notnull == 0" (if outer join)
|
|
"date_notnull == 0" (otherwise)
|
|
|
|
*/
|
|
|
|
Item *item0= (Item*) Item_false;
|
|
Item *eq_cond= new(thd->mem_root) Item_func_eq(thd, args[0], item0);
|
|
if (!eq_cond)
|
|
return this;
|
|
|
|
COND *cond= this;
|
|
if (field->table->pos_in_table_list->is_inner_table_of_outer_join())
|
|
{
|
|
// outer join: transform "col IS NULL" to "col IS NULL or col=0"
|
|
Item *or_cond= new(thd->mem_root) Item_cond_or(thd, eq_cond, this);
|
|
if (!or_cond)
|
|
return this;
|
|
cond= or_cond;
|
|
}
|
|
else
|
|
{
|
|
// not outer join: transform "col IS NULL" to "col=0"
|
|
cond= eq_cond;
|
|
}
|
|
|
|
cond->fix_fields(thd, &cond);
|
|
/*
|
|
Note: although args[0] is a field, cond can still be a constant
|
|
(in case field is a part of a dependent subquery).
|
|
|
|
Note: we call cond->Item::remove_eq_conds() non-virtually (statically)
|
|
for performance purpose.
|
|
A non-qualified call, i.e. just cond->remove_eq_conds(),
|
|
would call Item_bool_func2::remove_eq_conds() instead, which would
|
|
try to do some extra job to detect if args[0] and args[1] are
|
|
equivalent items. We know they are not (we have field=0 here).
|
|
*/
|
|
return cond->Item::remove_eq_conds(thd, cond_value, false);
|
|
}
|
|
|
|
/*
|
|
Handles this special case for some ODBC applications:
|
|
The are requesting the row that was just updated with a auto_increment
|
|
value with this construct:
|
|
|
|
SELECT * from table_name where auto_increment_column IS NULL
|
|
This will be changed to:
|
|
SELECT * from table_name where auto_increment_column = LAST_INSERT_ID
|
|
|
|
Note, this substitution is done if the NULL test is the only condition!
|
|
If the NULL test is a part of a more complex condition, it is not
|
|
substituted and is treated normally:
|
|
WHERE auto_increment IS NULL AND something_else
|
|
*/
|
|
|
|
if (top_level_arg) // "auto_increment_column IS NULL" is the only condition
|
|
{
|
|
if (field->flags & AUTO_INCREMENT_FLAG && !field->table->maybe_null &&
|
|
(thd->variables.option_bits & OPTION_AUTO_IS_NULL) &&
|
|
(thd->first_successful_insert_id_in_prev_stmt > 0 &&
|
|
thd->substitute_null_with_insert_id))
|
|
{
|
|
query_cache_abort(thd, &thd->query_cache_tls);
|
|
COND *new_cond, *cond= this;
|
|
/* If this fails, we will catch it later before executing query */
|
|
if ((new_cond= new (thd->mem_root) Item_func_eq(thd, args[0],
|
|
new (thd->mem_root) Item_int(thd, "last_insert_id()",
|
|
thd->read_first_successful_insert_id_in_prev_stmt(),
|
|
MY_INT64_NUM_DECIMAL_DIGITS))))
|
|
{
|
|
cond= new_cond;
|
|
/*
|
|
Item_func_eq can't be fixed after creation so we do not check
|
|
cond->fixed(), also it do not need tables so we use 0 as second
|
|
argument.
|
|
*/
|
|
cond->fix_fields(thd, &cond);
|
|
}
|
|
/*
|
|
IS NULL should be mapped to LAST_INSERT_ID only for first row, so
|
|
clear for next row
|
|
*/
|
|
thd->substitute_null_with_insert_id= FALSE;
|
|
|
|
*cond_value= Item::COND_OK;
|
|
return cond;
|
|
}
|
|
}
|
|
}
|
|
return Item::remove_eq_conds(thd, cond_value, top_level_arg);
|
|
}
|
|
|
|
|
|
/**
|
|
Check if equality can be used in removing components of GROUP BY/DISTINCT
|
|
|
|
@param l the left comparison argument (a field if any)
|
|
@param r the right comparison argument (a const of any)
|
|
|
|
@details
|
|
Checks if an equality predicate can be used to take away
|
|
DISTINCT/GROUP BY because it is known to be true for exactly one
|
|
distinct value (e.g. <expr> == <const>).
|
|
Arguments must be compared in the native type of the left argument
|
|
and (for strings) in the native collation of the left argument.
|
|
Otherwise, for example,
|
|
<string_field> = <int_const> may match more than 1 distinct value or
|
|
the <string_field>.
|
|
|
|
@note We don't need to aggregate l and r collations here, because r -
|
|
the constant item - has already been converted to a proper collation
|
|
for comparison. We only need to compare this collation with field's collation.
|
|
|
|
@retval true can be used
|
|
@retval false cannot be used
|
|
*/
|
|
|
|
/*
|
|
psergey-todo: this returns false for int_column='1234' (here '1234' is a
|
|
constant. Need to discuss this with Bar).
|
|
|
|
See also Field::test_if_equality_guaranees_uniqueness(const Item *item);
|
|
*/
|
|
static bool
|
|
test_if_equality_guarantees_uniqueness(Item *l, Item *r)
|
|
{
|
|
return (r->const_item() || !(r->used_tables() & ~OUTER_REF_TABLE_BIT)) &&
|
|
item_cmp_type(l, r) == l->cmp_type() &&
|
|
(l->cmp_type() != STRING_RESULT ||
|
|
l->collation.collation == r->collation.collation);
|
|
}
|
|
|
|
|
|
/*
|
|
Return TRUE if i1 and i2 (if any) are equal items,
|
|
or if i1 is a wrapper item around the f2 field.
|
|
*/
|
|
|
|
static bool equal(Item *i1, Item *i2, Field *f2)
|
|
{
|
|
DBUG_ASSERT((i2 == NULL) ^ (f2 == NULL));
|
|
|
|
if (i2 != NULL)
|
|
return i1->eq(i2, 1);
|
|
else if (i1->type() == Item::FIELD_ITEM)
|
|
return f2->eq(((Item_field *) i1)->field);
|
|
else
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if a field or an item is equal to a constant value in WHERE
|
|
|
|
@param cond WHERE clause expression
|
|
@param comp_item Item to find in WHERE expression
|
|
(if comp_field != NULL)
|
|
@param comp_field Field to find in WHERE expression
|
|
(if comp_item != NULL)
|
|
@param[out] const_item intermediate arg, set to Item pointer to NULL
|
|
|
|
@return TRUE if the field is a constant value in WHERE
|
|
|
|
@note
|
|
comp_item and comp_field parameters are mutually exclusive.
|
|
*/
|
|
bool
|
|
const_expression_in_where(COND *cond, Item *comp_item, Field *comp_field,
|
|
Item **const_item)
|
|
{
|
|
DBUG_ASSERT((comp_item == NULL) ^ (comp_field == NULL));
|
|
|
|
Item *intermediate= NULL;
|
|
if (const_item == NULL)
|
|
const_item= &intermediate;
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
bool and_level= (((Item_cond*) cond)->functype()
|
|
== Item_func::COND_AND_FUNC);
|
|
List_iterator_fast<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
bool res=const_expression_in_where(item, comp_item, comp_field,
|
|
const_item);
|
|
if (res) // Is a const value
|
|
{
|
|
if (and_level)
|
|
return 1;
|
|
}
|
|
else if (!and_level)
|
|
return 0;
|
|
}
|
|
return and_level ? 0 : 1;
|
|
}
|
|
else if (cond->eq_cmp_result() != Item::COND_OK)
|
|
{ // boolean compare function
|
|
Item_func* func= (Item_func*) cond;
|
|
if (func->functype() != Item_func::EQUAL_FUNC &&
|
|
func->functype() != Item_func::EQ_FUNC)
|
|
return 0;
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0];
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1];
|
|
if (equal(left_item, comp_item, comp_field))
|
|
{
|
|
if (test_if_equality_guarantees_uniqueness (left_item, right_item))
|
|
{
|
|
if (*const_item)
|
|
return right_item->eq(*const_item, 1);
|
|
*const_item=right_item;
|
|
return 1;
|
|
}
|
|
}
|
|
else if (equal(right_item, comp_item, comp_field))
|
|
{
|
|
if (test_if_equality_guarantees_uniqueness (right_item, left_item))
|
|
{
|
|
if (*const_item)
|
|
return left_item->eq(*const_item, 1);
|
|
*const_item=left_item;
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/****************************************************************************
|
|
Create internal temporary table
|
|
****************************************************************************/
|
|
|
|
Field *Item::create_tmp_field_int(MEM_ROOT *root, TABLE *table,
|
|
uint convert_int_length)
|
|
{
|
|
const Type_handler *h= &type_handler_slong;
|
|
if (max_char_length() > convert_int_length)
|
|
h= &type_handler_slonglong;
|
|
if (unsigned_flag)
|
|
h= h->type_handler_unsigned();
|
|
return h->make_and_init_table_field(root, &name, Record_addr(maybe_null()),
|
|
*this, table);
|
|
}
|
|
|
|
Field *Item::tmp_table_field_from_field_type_maybe_null(MEM_ROOT *root,
|
|
TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param,
|
|
bool is_explicit_null)
|
|
{
|
|
/*
|
|
item->type() == CONST_ITEM excluded due to making fields for counter
|
|
With help of Item_uint
|
|
*/
|
|
DBUG_ASSERT(!param->make_copy_field() || type() == CONST_ITEM);
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
if ((result= tmp_table_field_from_field_type(root, table)))
|
|
{
|
|
if (result && is_explicit_null)
|
|
result->is_created_from_null_item= true;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_sum::create_tmp_field(MEM_ROOT *root, bool group, TABLE *table)
|
|
{
|
|
Field *UNINIT_VAR(new_field);
|
|
|
|
switch (cmp_type()) {
|
|
case REAL_RESULT:
|
|
{
|
|
new_field= new (root)
|
|
Field_double(max_char_length(), maybe_null(), &name, decimals, TRUE);
|
|
break;
|
|
}
|
|
case INT_RESULT:
|
|
case TIME_RESULT:
|
|
case DECIMAL_RESULT:
|
|
case STRING_RESULT:
|
|
new_field= tmp_table_field_from_field_type(root, table);
|
|
break;
|
|
case ROW_RESULT:
|
|
// This case should never be chosen
|
|
DBUG_ASSERT(0);
|
|
new_field= 0;
|
|
break;
|
|
}
|
|
if (new_field)
|
|
new_field->init(table);
|
|
return new_field;
|
|
}
|
|
|
|
|
|
/**
|
|
Create a temporary field for Item_field (or its descendant),
|
|
either direct or referenced by an Item_ref.
|
|
|
|
param->modify_item is set when we create a field for an internal temporary
|
|
table. In this case we have to ensure the new field name is identical to
|
|
the original field name as the field will info will be sent to the client.
|
|
In other cases, the field name is set from orig_item or name if org_item is
|
|
not set.
|
|
*/
|
|
|
|
Field *
|
|
Item_field::create_tmp_field_from_item_field(MEM_ROOT *root, TABLE *new_table,
|
|
Item_ref *orig_item,
|
|
const Tmp_field_param *param)
|
|
{
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
const Lex_ident_column *new_name= (orig_item ? &orig_item->name :
|
|
!param->modify_item() ? &name :
|
|
&field->field_name);
|
|
|
|
/*
|
|
If item have to be able to store NULLs but underlaid field can't do it,
|
|
create_tmp_field_from_field() can't be used for tmp field creation.
|
|
*/
|
|
if (((maybe_null() && in_rollup()) ||
|
|
(new_table->in_use->create_tmp_table_for_derived && /* for mat. view/dt */
|
|
orig_item && orig_item->maybe_null())) &&
|
|
!field->maybe_null())
|
|
{
|
|
/*
|
|
The item the ref points to may have maybe_null flag set while
|
|
the ref doesn't have it. This may happen for outer fields
|
|
when the outer query decided at some point after name resolution phase
|
|
that this field might be null. Take this into account here.
|
|
*/
|
|
Record_addr rec(orig_item ? orig_item->maybe_null() : maybe_null());
|
|
const Type_handler *handler= type_handler()->
|
|
type_handler_for_tmp_table(this);
|
|
result= handler->make_and_init_table_field(root, new_name,
|
|
rec, *this, new_table);
|
|
}
|
|
else if (param->table_cant_handle_bit_fields() &&
|
|
field->type() == MYSQL_TYPE_BIT)
|
|
{
|
|
const Type_handler *handler=
|
|
Type_handler::type_handler_long_or_longlong(max_char_length(), true);
|
|
result= handler->make_and_init_table_field(root, new_name,
|
|
Record_addr(maybe_null()),
|
|
*this, new_table);
|
|
}
|
|
else
|
|
{
|
|
bool tmp_maybe_null= param->modify_item() ? maybe_null() :
|
|
field->maybe_null();
|
|
result= field->create_tmp_field(root, new_table, tmp_maybe_null);
|
|
if (result && ! param->modify_item())
|
|
result->field_name= *new_name;
|
|
}
|
|
if (result && param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_field::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
DBUG_ASSERT(!is_result_field());
|
|
Field *result;
|
|
src->set_field(field);
|
|
if (!(result= create_tmp_field_from_item_field(root, table, NULL, param)))
|
|
return NULL;
|
|
if (!(field->flags & NO_DEFAULT_VALUE_FLAG) &&
|
|
field->eq_def(result))
|
|
src->set_default_field(field);
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_default_value::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
if (field->default_value || (field->flags & BLOB_FLAG))
|
|
{
|
|
/*
|
|
We have to use a copy function when using a blob with default value
|
|
as the we have to calculate the default value before we can use it.
|
|
*/
|
|
get_tmp_field_src(src, param);
|
|
Field *result= tmp_table_field_from_field_type(root, table);
|
|
if (result && param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
/*
|
|
Same code as in Item_field::create_tmp_field_ex, except no default field
|
|
handling
|
|
*/
|
|
src->set_field(field);
|
|
return create_tmp_field_from_item_field(root, table, nullptr, param);
|
|
}
|
|
|
|
|
|
Field *Item_ref::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
Item *item= real_item();
|
|
DBUG_ASSERT(is_result_field());
|
|
if (item->type() == Item::FIELD_ITEM)
|
|
{
|
|
Field *result;
|
|
Item_field *field= (Item_field*) item;
|
|
Tmp_field_param prm2(*param);
|
|
prm2.set_modify_item(false);
|
|
src->set_field(field->field);
|
|
if (!(result= field->create_tmp_field_from_item_field(root, table,
|
|
this, &prm2)))
|
|
return NULL;
|
|
if (param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
return Item_result_field::create_tmp_field_ex(root, table, src, param);
|
|
}
|
|
|
|
|
|
void Item_result_field::get_tmp_field_src(Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
if (param->make_copy_field())
|
|
{
|
|
DBUG_ASSERT(result_field);
|
|
src->set_field(result_field);
|
|
}
|
|
else
|
|
{
|
|
src->set_item_result_field(this); // Save for copy_funcs
|
|
}
|
|
}
|
|
|
|
|
|
Field *
|
|
Item_result_field::create_tmp_field_ex_from_handler(
|
|
MEM_ROOT *root,
|
|
TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param,
|
|
const Type_handler *h)
|
|
{
|
|
/*
|
|
Possible Item types:
|
|
- Item_cache_wrapper (only for CREATE..SELECT ?)
|
|
- Item_func
|
|
- Item_subselect
|
|
*/
|
|
DBUG_ASSERT(fixed());
|
|
DBUG_ASSERT(is_result_field());
|
|
DBUG_ASSERT(type() != NULL_ITEM);
|
|
get_tmp_field_src(src, param);
|
|
Field *result;
|
|
if ((result= h->make_and_init_table_field(root, &name,
|
|
Record_addr(maybe_null()),
|
|
*this, table)) &&
|
|
param->modify_item())
|
|
result_field= result;
|
|
return result;
|
|
}
|
|
|
|
|
|
Field *Item_func_sp::create_tmp_field_ex(MEM_ROOT *root, TABLE *table,
|
|
Tmp_field_src *src,
|
|
const Tmp_field_param *param)
|
|
{
|
|
Field *result;
|
|
get_tmp_field_src(src, param);
|
|
if ((result= sp_result_field->create_tmp_field(root, table)))
|
|
{
|
|
result->field_name= name;
|
|
if (param->modify_item())
|
|
result_field= result;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
|
|
static bool make_json_valid_expr(TABLE *table, Field *field)
|
|
{
|
|
THD *thd= table->in_use;
|
|
Query_arena backup_arena;
|
|
Item *expr, *item_field;
|
|
|
|
if (!table->expr_arena && table->init_expr_arena(thd->mem_root))
|
|
return 1;
|
|
|
|
thd->set_n_backup_active_arena(table->expr_arena, &backup_arena);
|
|
if ((item_field= new (thd->mem_root) Item_field(thd, field)) &&
|
|
(expr= new (thd->mem_root) Item_func_json_valid(thd, item_field)))
|
|
field->check_constraint= add_virtual_expression(thd, expr);
|
|
thd->restore_active_arena(table->expr_arena, &backup_arena);
|
|
return field->check_constraint == NULL;
|
|
}
|
|
|
|
|
|
/**
|
|
Create field for temporary table.
|
|
|
|
@param table Temporary table
|
|
@param item Item to create a field for
|
|
@param type Type of item (normally item->type)
|
|
@param copy_func If set and item is a function, store copy of item
|
|
in this array
|
|
@param from_field if field will be created using other field as example,
|
|
pointer example field will be written here
|
|
@param default_field If field has a default value field, store it here
|
|
@param group 1 if we are going to do a relative group by on result
|
|
@param modify_item 1 if item->result_field should point to new item.
|
|
This is relevent for how fill_record() is going to
|
|
work:
|
|
If modify_item is 1 then fill_record() will update
|
|
the record in the original table.
|
|
If modify_item is 0 then fill_record() will update
|
|
the temporary table
|
|
@param table_cant_handle_bit_fields
|
|
Set to 1 if the temporary table cannot handle bit
|
|
fields. Only set for heap tables when the bit field
|
|
is part of an index.
|
|
@param make_copy_field
|
|
Set when using with rollup when we want to have
|
|
an exact copy of the field.
|
|
@retval
|
|
0 on error
|
|
@retval
|
|
new_created field
|
|
Create a temporary field for Item_field (or its descendant),
|
|
either direct or referenced by an Item_ref.
|
|
*/
|
|
Field *create_tmp_field(TABLE *table, Item *item,
|
|
Item ***copy_func, Field **from_field,
|
|
Field **default_field,
|
|
bool group, bool modify_item,
|
|
bool table_cant_handle_bit_fields,
|
|
bool make_copy_field)
|
|
{
|
|
Tmp_field_src src;
|
|
Tmp_field_param prm(group, modify_item, table_cant_handle_bit_fields,
|
|
make_copy_field);
|
|
Field *result= item->create_tmp_field_ex(table->in_use->mem_root,
|
|
table, &src, &prm);
|
|
if (is_json_type(item) && make_json_valid_expr(table, result))
|
|
result= NULL;
|
|
|
|
*from_field= src.field();
|
|
*default_field= src.default_field();
|
|
if (src.item_result_field())
|
|
*((*copy_func)++)= src.item_result_field();
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
Set up column usage bitmaps for a temporary table
|
|
|
|
IMPLEMENTATION
|
|
For temporary tables, we need one bitmap with all columns set and
|
|
a tmp_set bitmap to be used by things like filesort.
|
|
*/
|
|
|
|
void
|
|
setup_tmp_table_column_bitmaps(TABLE *table, uchar *bitmaps, uint field_count)
|
|
{
|
|
uint bitmap_size= bitmap_buffer_size(field_count);
|
|
|
|
DBUG_ASSERT(table->s->virtual_fields == 0);
|
|
|
|
my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->tmp_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->eq_join_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->cond_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
bitmaps+= bitmap_size;
|
|
my_bitmap_init(&table->has_value_set,
|
|
(my_bitmap_map*) bitmaps, field_count);
|
|
/* write_set and all_set are copies of read_set */
|
|
table->def_write_set= table->def_read_set;
|
|
table->s->all_set= table->def_read_set;
|
|
bitmap_set_all(&table->s->all_set);
|
|
table->default_column_bitmaps();
|
|
}
|
|
|
|
|
|
Create_tmp_table::Create_tmp_table(ORDER *group, bool distinct,
|
|
bool save_sum_fields,
|
|
ulonglong select_options,
|
|
ha_rows rows_limit)
|
|
:m_alloced_field_count(0),
|
|
m_using_unique_constraint(false),
|
|
m_temp_pool_slot(MY_BIT_NONE),
|
|
m_group(group),
|
|
m_distinct(distinct),
|
|
m_save_sum_fields(save_sum_fields),
|
|
m_with_cycle(false),
|
|
m_select_options(select_options),
|
|
m_rows_limit(rows_limit),
|
|
m_group_null_items(0),
|
|
current_counter(other)
|
|
{
|
|
m_field_count[Create_tmp_table::distinct]= 0;
|
|
m_field_count[Create_tmp_table::other]= 0;
|
|
m_null_count[Create_tmp_table::distinct]= 0;
|
|
m_null_count[Create_tmp_table::other]= 0;
|
|
m_blobs_count[Create_tmp_table::distinct]= 0;
|
|
m_blobs_count[Create_tmp_table::other]= 0;
|
|
m_uneven_bit[Create_tmp_table::distinct]= 0;
|
|
m_uneven_bit[Create_tmp_table::other]= 0;
|
|
}
|
|
|
|
|
|
void Create_tmp_table::add_field(TABLE *table, Field *field, uint fieldnr,
|
|
bool force_not_null_cols)
|
|
{
|
|
DBUG_ASSERT(!field->field_name.str ||
|
|
strlen(field->field_name.str) == field->field_name.length);
|
|
|
|
if (force_not_null_cols)
|
|
{
|
|
field->flags|= NOT_NULL_FLAG;
|
|
field->null_ptr= NULL;
|
|
}
|
|
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
m_null_count[current_counter]++;
|
|
|
|
table->s->reclength+= field->pack_length();
|
|
|
|
// Assign it here, before update_data_type_statistics() changes m_blob_count
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
table->s->blob_field[m_blob_count]= fieldnr;
|
|
m_blobs_count[current_counter]++;
|
|
}
|
|
|
|
table->field[fieldnr]= field;
|
|
field->field_index= fieldnr;
|
|
|
|
field->update_data_type_statistics(this);
|
|
}
|
|
|
|
|
|
/**
|
|
Create a temp table according to a field list.
|
|
|
|
Given field pointers are changed to point at tmp_table for
|
|
send_result_set_metadata. The table object is self contained: it's
|
|
allocated in its own memory root, as well as Field objects
|
|
created for table columns.
|
|
This function will replace Item_sum items in 'fields' list with
|
|
corresponding Item_field items, pointing at the fields in the
|
|
temporary table, unless this was prohibited by TRUE
|
|
value of argument save_sum_fields. The Item_field objects
|
|
are created in THD memory root.
|
|
|
|
@param thd thread handle
|
|
@param param a description used as input to create the table
|
|
@param fields list of items that will be used to define
|
|
column types of the table (also see NOTES)
|
|
@param group Create an unique key over all group by fields.
|
|
This is used to retrive the row during
|
|
end_write_group() and update them.
|
|
@param distinct should table rows be distinct
|
|
@param save_sum_fields see NOTES
|
|
@param select_options Optiions for how the select is run.
|
|
See sql_priv.h for a list of options.
|
|
@param rows_limit Maximum number of rows to insert into the
|
|
temporary table
|
|
@param table_alias possible name of the temporary table that can
|
|
be used for name resolving; can be "".
|
|
@param do_not_open only create the TABLE object, do not
|
|
open the table in the engine
|
|
@param keep_row_order rows need to be read in the order they were
|
|
inserted, the engine should preserve this order
|
|
*/
|
|
|
|
TABLE *Create_tmp_table::start(THD *thd,
|
|
TMP_TABLE_PARAM *param,
|
|
const LEX_CSTRING *table_alias)
|
|
{
|
|
MEM_ROOT *mem_root_save, own_root;
|
|
TABLE *table;
|
|
TABLE_SHARE *share;
|
|
uint copy_func_count= param->func_count;
|
|
char *tmpname,path[FN_REFLEN];
|
|
Field **reg_field;
|
|
uint *blob_field;
|
|
key_part_map *const_key_parts;
|
|
/* Treat sum functions as normal ones when loose index scan is used. */
|
|
m_save_sum_fields|= param->precomputed_group_by;
|
|
DBUG_ENTER("Create_tmp_table::start");
|
|
DBUG_PRINT("enter",
|
|
("table_alias: '%s' distinct: %d save_sum_fields: %d "
|
|
"rows_limit: %lu group: %d", table_alias->str,
|
|
(int) m_distinct, (int) m_save_sum_fields,
|
|
(ulong) m_rows_limit, MY_TEST(m_group)));
|
|
|
|
if (use_temp_pool && !(test_flags & TEST_KEEP_TMP_TABLES))
|
|
m_temp_pool_slot = temp_pool_set_next();
|
|
|
|
if (m_temp_pool_slot != MY_BIT_NONE) // we got a slot
|
|
sprintf(path, "%s-%s-%lx-%i", tmp_file_prefix, param->tmp_name,
|
|
current_pid, m_temp_pool_slot);
|
|
else
|
|
{
|
|
/* if we run out of slots or we are not using tempool */
|
|
sprintf(path, "%s-%s-%lx-%llx-%x", tmp_file_prefix, param->tmp_name,
|
|
current_pid, thd->thread_id, thd->tmp_table++);
|
|
}
|
|
|
|
/*
|
|
No need to change table name to lower case as we are only creating
|
|
MyISAM, Aria or HEAP tables here
|
|
*/
|
|
fn_format(path, path, mysql_tmpdir, "", MY_REPLACE_EXT|MY_UNPACK_FILENAME);
|
|
|
|
if (m_group)
|
|
{
|
|
ORDER **prev= &m_group;
|
|
if (!param->quick_group)
|
|
m_group= 0; // Can't use group key
|
|
else for (ORDER *tmp= m_group ; tmp ; tmp= tmp->next)
|
|
{
|
|
/* Exclude found constant from the list */
|
|
if ((*tmp->item)->const_item())
|
|
{
|
|
*prev= tmp->next;
|
|
param->group_parts--;
|
|
continue;
|
|
}
|
|
else
|
|
prev= &(tmp->next);
|
|
/*
|
|
marker == 4 means two things:
|
|
- store NULLs in the key, and
|
|
- convert BIT fields to 64-bit long, needed because MEMORY tables
|
|
can't index BIT fields.
|
|
*/
|
|
(*tmp->item)->marker= MARKER_NULL_KEY; // Store null in key
|
|
if ((*tmp->item)->too_big_for_varchar())
|
|
m_using_unique_constraint= true;
|
|
}
|
|
if (param->group_length >= MAX_BLOB_WIDTH)
|
|
m_using_unique_constraint= true;
|
|
if (m_group)
|
|
m_distinct= 0; // Can't use distinct
|
|
}
|
|
|
|
m_alloced_field_count= param->field_count+param->func_count+param->sum_func_count;
|
|
DBUG_ASSERT(m_alloced_field_count);
|
|
const uint field_count= m_alloced_field_count;
|
|
|
|
/*
|
|
When loose index scan is employed as access method, it already
|
|
computes all groups and the result of all aggregate functions. We
|
|
make space for the items of the aggregate function in the list of
|
|
functions TMP_TABLE_PARAM::items_to_copy, so that the values of
|
|
these items are stored in the temporary table.
|
|
*/
|
|
if (param->precomputed_group_by)
|
|
copy_func_count+= param->sum_func_count;
|
|
param->copy_func_count= copy_func_count;
|
|
|
|
init_sql_alloc(key_memory_TABLE, &own_root, TABLE_ALLOC_BLOCK_SIZE, 0,
|
|
MYF(MY_THREAD_SPECIFIC));
|
|
|
|
if (!multi_alloc_root(&own_root,
|
|
&table, sizeof(*table),
|
|
&share, sizeof(*share),
|
|
®_field, sizeof(Field*) * (field_count+1),
|
|
&m_default_field, sizeof(Field*) * (field_count),
|
|
&blob_field, sizeof(uint)*(field_count+1),
|
|
&m_from_field, sizeof(Field*)*field_count,
|
|
¶m->items_to_copy,
|
|
sizeof(param->items_to_copy[0])*(copy_func_count+1),
|
|
¶m->keyinfo, sizeof(*param->keyinfo),
|
|
&m_key_part_info,
|
|
sizeof(*m_key_part_info)*(param->group_parts+1),
|
|
¶m->start_recinfo,
|
|
sizeof(*param->start_recinfo)*(field_count*2+4),
|
|
¶m->rec_per_key, sizeof(ulong)*param->group_parts,
|
|
&tmpname, (uint) strlen(path)+1,
|
|
&m_group_buff, (m_group && ! m_using_unique_constraint ?
|
|
param->group_length : 0),
|
|
&m_bitmaps, bitmap_buffer_size(field_count)*6,
|
|
&const_key_parts, sizeof(*const_key_parts),
|
|
NullS))
|
|
{
|
|
DBUG_RETURN(NULL); /* purecov: inspected */
|
|
}
|
|
/* Copy_field belongs to TMP_TABLE_PARAM, allocate it in THD mem_root */
|
|
if (!(param->copy_field= new (thd->mem_root) Copy_field[field_count]))
|
|
{
|
|
free_root(&own_root, MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NULL); /* purecov: inspected */
|
|
}
|
|
strmov(tmpname, path);
|
|
/* make table according to fields */
|
|
|
|
bzero((char*) table,sizeof(*table));
|
|
bzero((char*) reg_field, sizeof(Field*) * (field_count+1));
|
|
bzero((char*) m_default_field, sizeof(Field*) * (field_count));
|
|
bzero((char*) m_from_field, sizeof(Field*) * field_count);
|
|
/* const_key_parts is used in sort_and_filter_keyuse */
|
|
bzero((char*) const_key_parts, sizeof(*const_key_parts));
|
|
|
|
table->mem_root= own_root;
|
|
mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
table->field=reg_field;
|
|
table->const_key_parts= const_key_parts;
|
|
table->alias.set(table_alias->str, table_alias->length, table_alias_charset);
|
|
|
|
table->reginfo.lock_type=TL_WRITE; /* Will be updated */
|
|
table->map=1;
|
|
table->temp_pool_slot= m_temp_pool_slot;
|
|
table->copy_blobs= 1;
|
|
table->in_use= thd;
|
|
table->no_rows_with_nulls= param->force_not_null_cols;
|
|
table->expr_arena= thd;
|
|
|
|
table->s= share;
|
|
init_tmp_table_share(thd, share, "", 0, "(temporary)", tmpname, true);
|
|
share->blob_field= blob_field;
|
|
share->table_charset= param->table_charset;
|
|
share->primary_key= MAX_KEY; // Indicate no primary key
|
|
if (param->schema_table)
|
|
share->db= Lex_ident_db(INFORMATION_SCHEMA_NAME);
|
|
|
|
param->using_outer_summary_function= 0;
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(table);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::add_fields(THD *thd,
|
|
TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
List<Item> &fields)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::add_fields");
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->field);
|
|
DBUG_ASSERT(table->s->blob_field);
|
|
DBUG_ASSERT(table->s->reclength == 0);
|
|
DBUG_ASSERT(table->s->fields == 0);
|
|
DBUG_ASSERT(table->s->blob_fields == 0);
|
|
|
|
const bool not_all_columns= !(m_select_options & TMP_TABLE_ALL_COLUMNS);
|
|
bool distinct_record_structure= m_distinct;
|
|
uint fieldnr= 0;
|
|
TABLE_SHARE *share= table->s;
|
|
Item **copy_func= param->items_to_copy;
|
|
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
List_iterator_fast<Item> li(fields);
|
|
Item *item;
|
|
Field **tmp_from_field= m_from_field;
|
|
while (!m_with_cycle && (item= li++))
|
|
if (item->is_in_with_cycle())
|
|
{
|
|
m_with_cycle= true;
|
|
/*
|
|
Following distinct_record_structure is (m_distinct || m_with_cycle)
|
|
|
|
Note: distinct_record_structure can be true even if m_distinct is
|
|
false, for example for incr_table in recursive CTE
|
|
(see select_union_recursive::create_result_table)
|
|
*/
|
|
distinct_record_structure= true;
|
|
}
|
|
li.rewind();
|
|
while ((item=li++))
|
|
{
|
|
uint uneven_delta;
|
|
current_counter= (((param->hidden_field_count < (fieldnr + 1)) &&
|
|
distinct_record_structure &&
|
|
(!m_with_cycle || item->is_in_with_cycle())) ?
|
|
distinct :
|
|
other);
|
|
Item::Type type= item->type();
|
|
if (type == Item::COPY_STR_ITEM)
|
|
{
|
|
item= ((Item_copy *)item)->get_item();
|
|
type= item->type();
|
|
}
|
|
if (not_all_columns)
|
|
{
|
|
if (item->with_sum_func() && type != Item::SUM_FUNC_ITEM)
|
|
{
|
|
if (item->used_tables() & OUTER_REF_TABLE_BIT)
|
|
item->update_used_tables();
|
|
if ((item->real_type() == Item::SUBSELECT_ITEM) ||
|
|
(item->used_tables() & ~OUTER_REF_TABLE_BIT))
|
|
{
|
|
/*
|
|
Mark that the we have ignored an item that refers to a summary
|
|
function. We need to know this if someone is going to use
|
|
DISTINCT on the result.
|
|
*/
|
|
param->using_outer_summary_function=1;
|
|
continue;
|
|
}
|
|
}
|
|
if (item->const_item() &&
|
|
param->hidden_field_count < (fieldnr + 1))
|
|
continue; // We don't have to store this
|
|
}
|
|
if (type == Item::SUM_FUNC_ITEM && !m_group && !m_save_sum_fields)
|
|
{ /* Can't calc group yet */
|
|
Item_sum *sum_item= (Item_sum *) item;
|
|
sum_item->result_field=0;
|
|
for (uint i= 0 ; i < sum_item->get_arg_count() ; i++)
|
|
{
|
|
Item *arg= sum_item->get_arg(i);
|
|
if (!arg->const_item())
|
|
{
|
|
Item *tmp_item;
|
|
Field *new_field=
|
|
create_tmp_field(table, arg, ©_func,
|
|
tmp_from_field, &m_default_field[fieldnr],
|
|
m_group != 0, not_all_columns,
|
|
distinct_record_structure , false);
|
|
if (!new_field)
|
|
goto err; // Should be OOM
|
|
tmp_from_field++;
|
|
|
|
thd->mem_root= mem_root_save;
|
|
if (!(tmp_item= new (thd->mem_root)
|
|
Item_field(thd, new_field)))
|
|
goto err;
|
|
((Item_field*) tmp_item)->set_refers_to_temp_table();
|
|
arg= sum_item->set_arg(i, thd, tmp_item);
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
uneven_delta= m_uneven_bit_length;
|
|
add_field(table, new_field, fieldnr++, param->force_not_null_cols);
|
|
m_field_count[current_counter]++;
|
|
m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta);
|
|
|
|
if (!(new_field->flags & NOT_NULL_FLAG))
|
|
{
|
|
/*
|
|
new_field->maybe_null() is still false, it will be
|
|
changed below. But we have to setup Item_field correctly
|
|
*/
|
|
arg->set_maybe_null();
|
|
}
|
|
if (current_counter == distinct)
|
|
new_field->flags|= FIELD_PART_OF_TMP_UNIQUE;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The last parameter to create_tmp_field_ex() is a bit tricky:
|
|
|
|
We need to set it to 0 in union, to get fill_record() to modify the
|
|
temporary table.
|
|
We need to set it to 1 on multi-table-update and in select to
|
|
write rows to the temporary table.
|
|
We here distinguish between UNION and multi-table-updates by the fact
|
|
that in the later case group is set to the row pointer.
|
|
|
|
The test for item->marker == MARKER_NULL_KEY is ensure we
|
|
don't create a group-by key over a bit field as heap tables
|
|
can't handle that.
|
|
*/
|
|
DBUG_ASSERT(!param->schema_table);
|
|
Field *new_field=
|
|
create_tmp_field(table, item, ©_func,
|
|
tmp_from_field, &m_default_field[fieldnr],
|
|
m_group != 0,
|
|
!param->force_copy_fields &&
|
|
(not_all_columns || m_group !=0),
|
|
/*
|
|
If item->marker == MARKER_NULL_KEY then we
|
|
force create_tmp_field to create a 64-bit
|
|
longs for BIT fields because HEAP tables
|
|
can't index BIT fields directly. We do the
|
|
same for distinct, as we want the distinct
|
|
index to be usable in this case too.
|
|
*/
|
|
item->marker == MARKER_NULL_KEY ||
|
|
param->bit_fields_as_long,
|
|
param->force_copy_fields);
|
|
if (unlikely(!new_field))
|
|
{
|
|
if (unlikely(thd->is_fatal_error))
|
|
goto err; // Got OOM
|
|
continue; // Some kind of const item
|
|
}
|
|
if (type == Item::SUM_FUNC_ITEM)
|
|
{
|
|
Item_sum *agg_item= (Item_sum *) item;
|
|
/*
|
|
Update the result field only if it has never been set, or if the
|
|
created temporary table is not to be used for subquery
|
|
materialization.
|
|
|
|
The reason is that for subqueries that require
|
|
materialization as part of their plan, we create the
|
|
'external' temporary table needed for IN execution, after
|
|
the 'internal' temporary table needed for grouping. Since
|
|
both the external and the internal temporary tables are
|
|
created for the same list of SELECT fields of the subquery,
|
|
setting 'result_field' for each invocation of
|
|
create_tmp_table overrides the previous value of
|
|
'result_field'.
|
|
|
|
The condition below prevents the creation of the external
|
|
temp table to override the 'result_field' that was set for
|
|
the internal temp table.
|
|
*/
|
|
if (!agg_item->result_field || !param->materialized_subquery)
|
|
agg_item->result_field= new_field;
|
|
}
|
|
tmp_from_field++;
|
|
|
|
uneven_delta= m_uneven_bit_length;
|
|
add_field(table, new_field, fieldnr++, param->force_not_null_cols);
|
|
m_field_count[current_counter]++;
|
|
m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta);
|
|
|
|
if (item->marker == MARKER_NULL_KEY && item->maybe_null())
|
|
{
|
|
m_group_null_items++;
|
|
new_field->flags|= GROUP_FLAG;
|
|
}
|
|
if (current_counter == distinct)
|
|
new_field->flags|= FIELD_PART_OF_TMP_UNIQUE;
|
|
}
|
|
}
|
|
|
|
DBUG_ASSERT(fieldnr == m_field_count[other] + m_field_count[distinct]);
|
|
DBUG_ASSERT(m_blob_count == m_blobs_count[other] + m_blobs_count[distinct]);
|
|
share->fields= fieldnr;
|
|
share->blob_fields= m_blob_count;
|
|
table->field[fieldnr]= 0; // End marker
|
|
share->blob_field[m_blob_count]= 0; // End marker
|
|
copy_func[0]= 0; // End marker
|
|
param->func_count= (uint) (copy_func - param->items_to_copy);
|
|
DBUG_ASSERT(param->func_count <= param->copy_func_count);
|
|
|
|
share->column_bitmap_size= bitmap_buffer_size(share->fields);
|
|
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::choose_engine(THD *thd, TABLE *table,
|
|
TMP_TABLE_PARAM *param)
|
|
{
|
|
TABLE_SHARE *share= table->s;
|
|
DBUG_ENTER("Create_tmp_table::choose_engine");
|
|
/*
|
|
If result table is small; use a heap, otherwise TMP_TABLE_HTON (Aria)
|
|
In the future we should try making storage engine selection more dynamic
|
|
*/
|
|
|
|
if (share->blob_fields || m_using_unique_constraint ||
|
|
(thd->variables.big_tables &&
|
|
!(m_select_options & SELECT_SMALL_RESULT)) ||
|
|
(m_select_options & TMP_TABLE_FORCE_MYISAM) ||
|
|
thd->variables.tmp_memory_table_size == 0)
|
|
{
|
|
share->db_plugin= ha_lock_engine(0, TMP_ENGINE_HTON);
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
share->db_type());
|
|
if (m_group &&
|
|
(param->group_parts > table->file->max_key_parts() ||
|
|
param->group_length > table->file->max_key_length()))
|
|
m_using_unique_constraint= true;
|
|
}
|
|
else
|
|
{
|
|
share->db_plugin= ha_lock_engine(0, heap_hton);
|
|
table->file= get_new_handler(share, &table->mem_root,
|
|
share->db_type());
|
|
}
|
|
DBUG_RETURN(!table->file);
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::finalize(THD *thd,
|
|
TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
bool do_not_open, bool keep_row_order)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::finalize");
|
|
DBUG_ASSERT(table);
|
|
|
|
uint null_pack_length[2];
|
|
uint null_pack_base[2];
|
|
uint null_counter[2]= {0, 0};
|
|
uint whole_null_pack_length;
|
|
bool use_packed_rows= false;
|
|
bool save_abort_on_warning;
|
|
uchar *pos;
|
|
uchar *null_flags;
|
|
KEY *keyinfo= param->keyinfo;
|
|
TMP_ENGINE_COLUMNDEF *recinfo;
|
|
TABLE_SHARE *share= table->s;
|
|
Copy_field *copy= param->copy_field;
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
DBUG_ASSERT(m_alloced_field_count >= share->fields);
|
|
DBUG_ASSERT(m_alloced_field_count >= share->blob_fields);
|
|
|
|
if (choose_engine(thd, table, param))
|
|
goto err;
|
|
|
|
if (table->file->set_ha_share_ref(&share->ha_share))
|
|
{
|
|
delete table->file;
|
|
table->file= 0;
|
|
goto err;
|
|
}
|
|
table->file->set_table(table);
|
|
|
|
if (!m_using_unique_constraint)
|
|
share->reclength+= m_group_null_items; // null flag is stored separately
|
|
|
|
if (share->blob_fields == 0)
|
|
{
|
|
/* We need to ensure that first byte is not 0 for the delete link */
|
|
if (m_field_count[other])
|
|
m_null_count[other]++;
|
|
else
|
|
m_null_count[distinct]++;
|
|
}
|
|
|
|
null_pack_length[other]= (m_null_count[other] + 7 +
|
|
m_uneven_bit[other]) / 8;
|
|
null_pack_base[other]= 0;
|
|
null_pack_length[distinct]= (m_null_count[distinct] + 7 +
|
|
m_uneven_bit[distinct]) / 8;
|
|
null_pack_base[distinct]= null_pack_length[other];
|
|
whole_null_pack_length= null_pack_length[other] +
|
|
null_pack_length[distinct];
|
|
share->reclength+= whole_null_pack_length;
|
|
if (!share->reclength)
|
|
share->reclength= 1; // Dummy select
|
|
share->stored_rec_length= share->reclength;
|
|
/* Use packed rows if there is blobs or a lot of space to gain */
|
|
if (share->blob_fields ||
|
|
(string_total_length() >= STRING_TOTAL_LENGTH_TO_PACK_ROWS &&
|
|
(share->reclength / string_total_length() <= RATIO_TO_PACK_ROWS ||
|
|
string_total_length() / string_count() >= AVG_STRING_LENGTH_TO_PACK_ROWS)))
|
|
use_packed_rows= 1;
|
|
|
|
{
|
|
uint alloc_length= ALIGN_SIZE(share->reclength + MI_UNIQUE_HASH_LENGTH+1);
|
|
share->rec_buff_length= alloc_length;
|
|
if (!(table->record[0]= (uchar*)
|
|
alloc_root(&table->mem_root, alloc_length*3)))
|
|
goto err;
|
|
table->record[1]= table->record[0]+alloc_length;
|
|
share->default_values= table->record[1]+alloc_length;
|
|
}
|
|
|
|
setup_tmp_table_column_bitmaps(table, m_bitmaps, table->s->fields);
|
|
|
|
recinfo=param->start_recinfo;
|
|
null_flags=(uchar*) table->record[0];
|
|
pos=table->record[0]+ whole_null_pack_length;
|
|
if (whole_null_pack_length)
|
|
{
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
recinfo->type=FIELD_NORMAL;
|
|
recinfo->length= whole_null_pack_length;
|
|
recinfo++;
|
|
bfill(null_flags, whole_null_pack_length, 255); // Set null fields
|
|
|
|
table->null_flags= (uchar*) table->record[0];
|
|
share->null_fields= m_null_count[other] + m_null_count[distinct];
|
|
share->null_bytes= share->null_bytes_for_compare= whole_null_pack_length;
|
|
}
|
|
|
|
if (share->blob_fields == 0)
|
|
{
|
|
null_counter[(m_field_count[other] ? other : distinct)]++;
|
|
}
|
|
|
|
/* Protect against warnings in field_conv() in the next loop*/
|
|
save_abort_on_warning= thd->abort_on_warning;
|
|
thd->abort_on_warning= 0;
|
|
|
|
for (uint i= 0; i < share->fields; i++, recinfo++)
|
|
{
|
|
Field *field= table->field[i];
|
|
uint length;
|
|
bzero((uchar*) recinfo,sizeof(*recinfo));
|
|
|
|
current_counter= ((field->flags & FIELD_PART_OF_TMP_UNIQUE) ?
|
|
distinct :
|
|
other);
|
|
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
recinfo->null_bit= (uint8)1 << (null_counter[current_counter] & 7);
|
|
recinfo->null_pos= (null_pack_base[current_counter] +
|
|
null_counter[current_counter]/8);
|
|
field->move_field(pos, null_flags + recinfo->null_pos, recinfo->null_bit);
|
|
null_counter[current_counter]++;
|
|
}
|
|
else
|
|
field->move_field(pos,(uchar*) 0,0);
|
|
if (field->type() == MYSQL_TYPE_BIT)
|
|
{
|
|
/* We have to reserve place for extra bits among null bits */
|
|
((Field_bit*) field)->set_bit_ptr(null_flags +
|
|
null_pack_base[current_counter] +
|
|
null_counter[current_counter]/8,
|
|
null_counter[current_counter] & 7);
|
|
null_counter[current_counter]+= (field->field_length & 7);
|
|
}
|
|
field->reset();
|
|
|
|
/*
|
|
Test if there is a default field value. The test for ->ptr is to skip
|
|
'offset' fields generated by initialize_tables
|
|
*/
|
|
if (m_default_field[i] && m_default_field[i]->ptr)
|
|
{
|
|
/*
|
|
default_field[i] is set only in the cases when 'field' can
|
|
inherit the default value that is defined for the field referred
|
|
by the Item_field object from which 'field' has been created.
|
|
*/
|
|
Field *orig_field= m_default_field[i];
|
|
/* Get the value from default_values */
|
|
if (orig_field->is_null_in_record(orig_field->table->s->default_values))
|
|
field->set_null();
|
|
else
|
|
{
|
|
/*
|
|
Copy default value. We have to use field_conv() for copy, instead of
|
|
memcpy(), because bit_fields may be stored differently.
|
|
But otherwise we copy as is, in particular, ignore NO_ZERO_DATE, etc
|
|
*/
|
|
Use_relaxed_field_copy urfc(thd);
|
|
my_ptrdiff_t ptr_diff= (orig_field->table->s->default_values -
|
|
orig_field->table->record[0]);
|
|
field->set_notnull();
|
|
orig_field->move_field_offset(ptr_diff);
|
|
field_conv(field, orig_field);
|
|
orig_field->move_field_offset(-ptr_diff);
|
|
}
|
|
}
|
|
|
|
if (m_from_field[i])
|
|
{ /* Not a table Item */
|
|
copy->set(field, m_from_field[i], m_save_sum_fields);
|
|
copy++;
|
|
}
|
|
length=field->pack_length_in_rec();
|
|
pos+= length;
|
|
|
|
/* Make entry for create table */
|
|
recinfo->length=length;
|
|
recinfo->type= field->tmp_engine_column_type(use_packed_rows);
|
|
|
|
// fix table name in field entry
|
|
field->set_table_name(&table->alias);
|
|
}
|
|
/* Handle group_null_items */
|
|
bzero(pos, table->s->reclength - (pos - table->record[0]));
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
|
|
thd->abort_on_warning= save_abort_on_warning;
|
|
param->copy_field_end= copy;
|
|
param->recinfo= recinfo; // Pointer to after last field
|
|
store_record(table,s->default_values); // Make empty default record
|
|
|
|
if (thd->variables.tmp_memory_table_size == ~ (ulonglong) 0) // No limit
|
|
share->max_rows= ~(ha_rows) 0;
|
|
else
|
|
share->max_rows= (ha_rows) (((share->db_type() == heap_hton) ?
|
|
MY_MIN(thd->variables.tmp_memory_table_size,
|
|
thd->variables.max_heap_table_size) :
|
|
thd->variables.tmp_disk_table_size) /
|
|
share->reclength);
|
|
set_if_bigger(share->max_rows,1); // For dummy start options
|
|
/*
|
|
Push the LIMIT clause to the temporary table creation, so that we
|
|
materialize only up to 'rows_limit' records instead of all result records.
|
|
*/
|
|
set_if_smaller(share->max_rows, m_rows_limit);
|
|
param->end_write_records= m_rows_limit;
|
|
|
|
if (m_group)
|
|
{
|
|
DBUG_PRINT("info",("Creating group key in temporary table"));
|
|
table->group= m_group; /* Table is grouped by key */
|
|
param->group_buff= m_group_buff;
|
|
share->total_keys= share->keys= 1;
|
|
table->key_info= table->s->key_info= keyinfo;
|
|
table->keys_in_use_for_query.set_bit(0);
|
|
share->keys_in_use.set_bit(0);
|
|
keyinfo->key_part= m_key_part_info;
|
|
keyinfo->flags=HA_NOSAME | HA_BINARY_PACK_KEY | HA_PACK_KEY;
|
|
if (m_using_unique_constraint)
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->usable_key_parts=keyinfo->user_defined_key_parts=
|
|
param->group_parts;
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
share->ext_key_parts= share->key_parts= keyinfo->ext_key_parts;
|
|
keyinfo->key_length=0;
|
|
keyinfo->rec_per_key= param->rec_per_key;
|
|
keyinfo->read_stats= NULL;
|
|
keyinfo->collected_stats= NULL;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->name= group_key;
|
|
keyinfo->comment.str= 0;
|
|
ORDER *cur_group= m_group;
|
|
for (; cur_group ; cur_group= cur_group->next, m_key_part_info++)
|
|
{
|
|
Field *field=(*cur_group->item)->get_tmp_table_field();
|
|
DBUG_ASSERT(field->table == table);
|
|
bool maybe_null=(*cur_group->item)->maybe_null();
|
|
m_key_part_info->null_bit=0;
|
|
m_key_part_info->field= field;
|
|
m_key_part_info->fieldnr= field->field_index + 1;
|
|
if (cur_group == m_group)
|
|
field->key_start.set_bit(0);
|
|
m_key_part_info->offset= field->offset(table->record[0]);
|
|
m_key_part_info->length= (uint16) field->key_length();
|
|
m_key_part_info->type= (uint8) field->key_type();
|
|
m_key_part_info->key_type =
|
|
((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
|
|
0 : FIELDFLAG_BINARY;
|
|
m_key_part_info->key_part_flag= 0;
|
|
if (!m_using_unique_constraint)
|
|
{
|
|
cur_group->buff=(char*) m_group_buff;
|
|
|
|
if (maybe_null && !field->null_bit)
|
|
{
|
|
/*
|
|
This can only happen in the unusual case where an outer join
|
|
table was found to be not-nullable by the optimizer and we
|
|
the item can't really be null.
|
|
We solve this by marking the item as !maybe_null to ensure
|
|
that the key,field and item definition match.
|
|
*/
|
|
maybe_null= 0;
|
|
(*cur_group->item)->base_flags&= ~item_base_t::MAYBE_NULL;
|
|
}
|
|
|
|
if (!(cur_group->field= field->new_key_field(thd->mem_root,table,
|
|
m_group_buff +
|
|
MY_TEST(maybe_null),
|
|
m_key_part_info->length,
|
|
field->null_ptr,
|
|
field->null_bit)))
|
|
goto err; /* purecov: inspected */
|
|
|
|
if (maybe_null)
|
|
{
|
|
/*
|
|
To be able to group on NULL, we reserved place in group_buff
|
|
for the NULL flag just before the column. (see above).
|
|
The field data is after this flag.
|
|
The NULL flag is updated in 'end_update()' and 'end_write()'
|
|
*/
|
|
keyinfo->flags|= HA_NULL_ARE_EQUAL; // def. that NULL == NULL
|
|
m_key_part_info->null_bit=field->null_bit;
|
|
m_key_part_info->null_offset= (uint) (field->null_ptr -
|
|
(uchar*) table->record[0]);
|
|
cur_group->buff++; // Pointer to field data
|
|
m_group_buff++; // Skipp null flag
|
|
}
|
|
m_group_buff+= cur_group->field->pack_length();
|
|
}
|
|
keyinfo->key_length+= m_key_part_info->length;
|
|
}
|
|
/*
|
|
Ensure we didn't overrun the group buffer. The < is only true when
|
|
some maybe_null fields was changed to be not null fields.
|
|
*/
|
|
DBUG_ASSERT(m_using_unique_constraint ||
|
|
m_group_buff <= param->group_buff + param->group_length);
|
|
}
|
|
|
|
if (m_distinct && (share->fields != param->hidden_field_count ||
|
|
m_with_cycle))
|
|
{
|
|
uint i;
|
|
Field **reg_field;
|
|
/*
|
|
Create an unique key or an unique constraint over all columns
|
|
that should be in the result. In the temporary table, there are
|
|
'param->hidden_field_count' extra columns, whose null bits are stored
|
|
in the first 'hidden_null_pack_length' bytes of the row.
|
|
*/
|
|
DBUG_PRINT("info",("hidden_field_count: %d", param->hidden_field_count));
|
|
|
|
keyinfo->flags= 0;
|
|
if (m_blobs_count[distinct])
|
|
{
|
|
/*
|
|
Special mode for index creation in MyISAM used to support unique
|
|
indexes on blobs with arbitrary length. Such indexes cannot be
|
|
used for lookups.
|
|
*/
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
}
|
|
keyinfo->user_defined_key_parts= m_field_count[distinct] +
|
|
((keyinfo->flags & HA_UNIQUE_HASH) ?
|
|
MY_TEST(null_pack_length[distinct]) : 0);
|
|
keyinfo->ext_key_parts= keyinfo->user_defined_key_parts;
|
|
keyinfo->usable_key_parts= keyinfo->user_defined_key_parts;
|
|
table->distinct= 1;
|
|
share->total_keys= share->keys= 1;
|
|
share->ext_key_parts= share->key_parts= keyinfo->ext_key_parts;
|
|
if (!(m_key_part_info= (KEY_PART_INFO*)
|
|
alloc_root(&table->mem_root,
|
|
keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO))))
|
|
goto err;
|
|
bzero((void*) m_key_part_info, keyinfo->user_defined_key_parts * sizeof(KEY_PART_INFO));
|
|
table->keys_in_use_for_query.set_bit(0);
|
|
share->keys_in_use.set_bit(0);
|
|
table->key_info= table->s->key_info= keyinfo;
|
|
keyinfo->key_part= m_key_part_info;
|
|
keyinfo->flags|= (HA_NOSAME | HA_NULL_ARE_EQUAL | HA_BINARY_PACK_KEY |
|
|
HA_PACK_KEY);
|
|
keyinfo->ext_key_flags= keyinfo->flags;
|
|
keyinfo->key_length= 0; // Will compute the sum of the parts below.
|
|
keyinfo->name= distinct_key;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNDEF;
|
|
keyinfo->is_statistics_from_stat_tables= FALSE;
|
|
keyinfo->read_stats= NULL;
|
|
keyinfo->collected_stats= NULL;
|
|
|
|
/*
|
|
Needed by non-merged semi-joins: SJ-Materialized table must have a valid
|
|
rec_per_key array, because it participates in join optimization. Since
|
|
the table has no data, the only statistics we can provide is "unknown",
|
|
i.e. zero values.
|
|
|
|
(For table record count, we calculate and set JOIN_TAB::found_records,
|
|
see get_delayed_table_estimates()).
|
|
*/
|
|
size_t rpk_size= keyinfo->user_defined_key_parts * sizeof(keyinfo->rec_per_key[0]);
|
|
if (!(keyinfo->rec_per_key= (ulong*) alloc_root(&table->mem_root,
|
|
rpk_size)))
|
|
goto err;
|
|
bzero(keyinfo->rec_per_key, rpk_size);
|
|
|
|
/*
|
|
Create an extra field to hold NULL bits so that unique indexes on
|
|
blobs can distinguish NULL from 0. This extra field is not needed
|
|
when we do not use UNIQUE indexes for blobs.
|
|
*/
|
|
if (null_pack_length[distinct] && (keyinfo->flags & HA_UNIQUE_HASH))
|
|
{
|
|
m_key_part_info->null_bit=0;
|
|
m_key_part_info->offset= null_pack_base[distinct];
|
|
m_key_part_info->length= null_pack_length[distinct];
|
|
m_key_part_info->field= new Field_string(table->record[0],
|
|
(uint32) m_key_part_info->length,
|
|
(uchar*) 0,
|
|
(uint) 0,
|
|
Field::NONE,
|
|
&null_clex_str, &my_charset_bin);
|
|
if (!m_key_part_info->field)
|
|
goto err;
|
|
m_key_part_info->field->init(table);
|
|
m_key_part_info->key_type=FIELDFLAG_BINARY;
|
|
m_key_part_info->type= HA_KEYTYPE_BINARY;
|
|
m_key_part_info->fieldnr= m_key_part_info->field->field_index + 1;
|
|
m_key_part_info++;
|
|
}
|
|
/* Create a distinct key over the columns we are going to return */
|
|
for (i= param->hidden_field_count, reg_field= table->field + i ;
|
|
i < share->fields;
|
|
i++, reg_field++)
|
|
{
|
|
if (!((*reg_field)->flags & FIELD_PART_OF_TMP_UNIQUE))
|
|
continue;
|
|
m_key_part_info->field= *reg_field;
|
|
(*reg_field)->flags |= PART_KEY_FLAG;
|
|
if (m_key_part_info == keyinfo->key_part)
|
|
(*reg_field)->key_start.set_bit(0);
|
|
m_key_part_info->null_bit= (*reg_field)->null_bit;
|
|
m_key_part_info->null_offset= (uint) ((*reg_field)->null_ptr -
|
|
(uchar*) table->record[0]);
|
|
|
|
m_key_part_info->offset= (*reg_field)->offset(table->record[0]);
|
|
m_key_part_info->length= (uint16) (*reg_field)->pack_length();
|
|
m_key_part_info->fieldnr= (*reg_field)->field_index + 1;
|
|
/* TODO:
|
|
The below method of computing the key format length of the
|
|
key part is a copy/paste from opt_range.cc, and table.cc.
|
|
This should be factored out, e.g. as a method of Field.
|
|
In addition it is not clear if any of the Field::*_length
|
|
methods is supposed to compute the same length. If so, it
|
|
might be reused.
|
|
*/
|
|
m_key_part_info->store_length= m_key_part_info->length;
|
|
|
|
if ((*reg_field)->real_maybe_null())
|
|
{
|
|
m_key_part_info->store_length+= HA_KEY_NULL_LENGTH;
|
|
m_key_part_info->key_part_flag |= HA_NULL_PART;
|
|
}
|
|
m_key_part_info->key_part_flag|= (*reg_field)->key_part_flag();
|
|
m_key_part_info->store_length+= (*reg_field)->key_part_length_bytes();
|
|
keyinfo->key_length+= m_key_part_info->store_length;
|
|
|
|
m_key_part_info->type= (uint8) (*reg_field)->key_type();
|
|
m_key_part_info->key_type =
|
|
((ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_TEXT ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT1 ||
|
|
(ha_base_keytype) m_key_part_info->type == HA_KEYTYPE_VARTEXT2) ?
|
|
0 : FIELDFLAG_BINARY;
|
|
|
|
m_key_part_info++;
|
|
}
|
|
}
|
|
if (share->keys)
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
|
|
if (unlikely(thd->is_fatal_error)) // If end of memory
|
|
goto err; /* purecov: inspected */
|
|
share->db_record_offset= 1;
|
|
table->used_for_duplicate_elimination= (param->sum_func_count == 0 &&
|
|
(table->group || table->distinct));
|
|
table->keep_row_order= keep_row_order;
|
|
|
|
if (!do_not_open)
|
|
{
|
|
if (instantiate_tmp_table(table, param->keyinfo, param->start_recinfo,
|
|
¶m->recinfo, m_select_options))
|
|
goto err;
|
|
}
|
|
|
|
/* record[0] and share->default_values should now have been set up */
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
MEM_CHECK_DEFINED(share->default_values, table->s->reclength);
|
|
|
|
empty_record(table);
|
|
table->status= STATUS_NO_RECORD;
|
|
thd->mem_root= mem_root_save;
|
|
|
|
DBUG_RETURN(false);
|
|
|
|
err:
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true); /* purecov: inspected */
|
|
}
|
|
|
|
|
|
bool Create_tmp_table::add_schema_fields(THD *thd, TABLE *table,
|
|
TMP_TABLE_PARAM *param,
|
|
const ST_SCHEMA_TABLE &schema_table)
|
|
{
|
|
DBUG_ENTER("Create_tmp_table::add_schema_fields");
|
|
DBUG_ASSERT(table);
|
|
DBUG_ASSERT(table->field);
|
|
DBUG_ASSERT(table->s->blob_field);
|
|
DBUG_ASSERT(table->s->reclength == 0);
|
|
DBUG_ASSERT(table->s->fields == 0);
|
|
DBUG_ASSERT(table->s->blob_fields == 0);
|
|
|
|
TABLE_SHARE *share= table->s;
|
|
ST_FIELD_INFO *defs= schema_table.fields_info;
|
|
uint fieldnr;
|
|
MEM_ROOT *mem_root_save= thd->mem_root;
|
|
thd->mem_root= &table->mem_root;
|
|
|
|
for (fieldnr= 0; !defs[fieldnr].end_marker(); fieldnr++)
|
|
{
|
|
const ST_FIELD_INFO &def= defs[fieldnr];
|
|
Record_addr addr(def.nullable());
|
|
const Type_handler *h= def.type_handler();
|
|
Field *field= h->make_schema_field(&table->mem_root, table, addr, def);
|
|
if (!field)
|
|
{
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(true); // EOM
|
|
}
|
|
field->init(table);
|
|
field->flags|= NO_DEFAULT_VALUE_FLAG;
|
|
add_field(table, field, fieldnr, param->force_not_null_cols);
|
|
}
|
|
|
|
share->fields= fieldnr;
|
|
share->blob_fields= m_blob_count;
|
|
table->field[fieldnr]= 0; // End marker
|
|
share->blob_field[m_blob_count]= 0; // End marker
|
|
param->func_count= 0;
|
|
share->column_bitmap_size= bitmap_buffer_size(share->fields);
|
|
|
|
thd->mem_root= mem_root_save;
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
void Create_tmp_table::cleanup_on_failure(THD *thd, TABLE *table)
|
|
{
|
|
if (table)
|
|
free_tmp_table(thd, table);
|
|
if (m_temp_pool_slot != MY_BIT_NONE)
|
|
temp_pool_clear_bit(m_temp_pool_slot);
|
|
}
|
|
|
|
|
|
TABLE *create_tmp_table(THD *thd, TMP_TABLE_PARAM *param, List<Item> &fields,
|
|
ORDER *group, bool distinct, bool save_sum_fields,
|
|
ulonglong select_options, ha_rows rows_limit,
|
|
const LEX_CSTRING *table_alias, bool do_not_open,
|
|
bool keep_row_order)
|
|
{
|
|
TABLE *table;
|
|
Create_tmp_table maker(group, distinct, save_sum_fields, select_options,
|
|
rows_limit);
|
|
if (!(table= maker.start(thd, param, table_alias)) ||
|
|
maker.add_fields(thd, table, param, fields) ||
|
|
maker.finalize(thd, table, param, do_not_open, keep_row_order))
|
|
{
|
|
maker.cleanup_on_failure(thd, table);
|
|
return NULL;
|
|
}
|
|
return table;
|
|
}
|
|
|
|
|
|
TABLE *create_tmp_table_for_schema(THD *thd, TMP_TABLE_PARAM *param,
|
|
const ST_SCHEMA_TABLE &schema_table,
|
|
longlong select_options,
|
|
const LEX_CSTRING &table_alias,
|
|
bool do_not_open, bool keep_row_order)
|
|
{
|
|
TABLE *table;
|
|
Create_tmp_table maker((ORDER *) NULL, false, false,
|
|
select_options, HA_ROWS_MAX);
|
|
if (!(table= maker.start(thd, param, &table_alias)) ||
|
|
maker.add_schema_fields(thd, table, param, schema_table) ||
|
|
maker.finalize(thd, table, param, do_not_open, keep_row_order))
|
|
{
|
|
maker.cleanup_on_failure(thd, table);
|
|
return NULL;
|
|
}
|
|
return table;
|
|
}
|
|
|
|
|
|
/****************************************************************************/
|
|
|
|
void *Virtual_tmp_table::operator new(size_t size, THD *thd) throw()
|
|
{
|
|
return (Virtual_tmp_table *) alloc_root(thd->mem_root, size);
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::init(uint field_count)
|
|
{
|
|
uint *blob_field;
|
|
uchar *bitmaps;
|
|
DBUG_ENTER("Virtual_tmp_table::init");
|
|
if (!multi_alloc_root(in_use->mem_root,
|
|
&s, sizeof(*s),
|
|
&field, (field_count + 1) * sizeof(Field*),
|
|
&blob_field, (field_count + 1) * sizeof(uint),
|
|
&bitmaps, bitmap_buffer_size(field_count) * 6,
|
|
NullS))
|
|
DBUG_RETURN(true);
|
|
s->reset();
|
|
s->blob_field= blob_field;
|
|
setup_tmp_table_column_bitmaps(this, bitmaps, field_count);
|
|
m_alloced_field_count= field_count;
|
|
DBUG_RETURN(false);
|
|
};
|
|
|
|
|
|
bool Virtual_tmp_table::add(List<Spvar_definition> &field_list)
|
|
{
|
|
/* Create all fields and calculate the total length of record */
|
|
Spvar_definition *cdef; /* column definition */
|
|
List_iterator_fast<Spvar_definition> it(field_list);
|
|
DBUG_ENTER("Virtual_tmp_table::add");
|
|
while ((cdef= it++))
|
|
{
|
|
Field *tmp;
|
|
Record_addr addr(f_maybe_null(cdef->pack_flag));
|
|
if (!(tmp= cdef->make_field(s, in_use->mem_root, &addr, &cdef->field_name)))
|
|
DBUG_RETURN(true);
|
|
add(tmp);
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
void Virtual_tmp_table::setup_field_pointers()
|
|
{
|
|
uchar *null_pos= record[0];
|
|
uchar *field_pos= null_pos + s->null_bytes;
|
|
uint null_bit= 1;
|
|
|
|
for (Field **cur_ptr= field; *cur_ptr; ++cur_ptr)
|
|
{
|
|
Field *cur_field= *cur_ptr;
|
|
if ((cur_field->flags & NOT_NULL_FLAG))
|
|
cur_field->move_field(field_pos);
|
|
else
|
|
{
|
|
cur_field->move_field(field_pos, (uchar*) null_pos, null_bit);
|
|
null_bit<<= 1;
|
|
if (null_bit == (uint)1 << 8)
|
|
{
|
|
++null_pos;
|
|
null_bit= 1;
|
|
}
|
|
}
|
|
if (cur_field->key_type() == HA_KEYTYPE_BIT)
|
|
{
|
|
/* This is a Field_bit since key_type is HA_KEYTYPE_BIT */
|
|
DBUG_ASSERT(cur_field->type() == MYSQL_TYPE_BIT);
|
|
static_cast<Field_bit*>(cur_field)->set_bit_ptr(null_pos, null_bit);
|
|
null_bit+= cur_field->field_length & 7;
|
|
if (null_bit > 7)
|
|
{
|
|
null_pos++;
|
|
null_bit-= 8;
|
|
}
|
|
}
|
|
cur_field->reset();
|
|
field_pos+= cur_field->pack_length();
|
|
}
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::open()
|
|
{
|
|
// Make sure that we added all the fields we planned to:
|
|
DBUG_ASSERT(s->fields == m_alloced_field_count);
|
|
field[s->fields]= NULL; // mark the end of the list
|
|
s->blob_field[s->blob_fields]= 0; // mark the end of the list
|
|
|
|
uint null_pack_length= (s->null_fields + 7) / 8; // NULL-bit array length
|
|
s->reclength+= null_pack_length;
|
|
s->rec_buff_length= ALIGN_SIZE(s->reclength + 1);
|
|
if (!(record[0]= in_use->alloc<uchar>(s->rec_buff_length)))
|
|
return true;
|
|
if (null_pack_length)
|
|
{
|
|
null_flags= (uchar*) record[0];
|
|
s->null_bytes= s->null_bytes_for_compare= null_pack_length;
|
|
}
|
|
setup_field_pointers();
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_find_field_by_name(uint *idx,
|
|
const LEX_CSTRING &name) const
|
|
{
|
|
Field *f;
|
|
for (uint i= 0; (f= field[i]); i++)
|
|
{
|
|
// Use the same comparison style with sp_context::find_variable()
|
|
if (f->field_name.streq(name))
|
|
{
|
|
*idx= i;
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool
|
|
Virtual_tmp_table::sp_find_field_by_name_or_error(uint *idx,
|
|
const LEX_CSTRING &var_name,
|
|
const LEX_CSTRING &field_name)
|
|
const
|
|
{
|
|
if (sp_find_field_by_name(idx, field_name))
|
|
{
|
|
my_error(ER_ROW_VARIABLE_DOES_NOT_HAVE_FIELD, MYF(0),
|
|
var_name.str, field_name.str);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_set_all_fields_from_item_list(THD *thd,
|
|
List<Item> &items)
|
|
{
|
|
DBUG_ASSERT(s->fields == items.elements);
|
|
List_iterator<Item> it(items);
|
|
Item *item;
|
|
for (uint i= 0 ; (item= it++) ; i++)
|
|
{
|
|
if (field[i]->sp_prepare_and_store_item(thd, &item))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool Virtual_tmp_table::sp_set_all_fields_from_item(THD *thd, Item *value)
|
|
{
|
|
DBUG_ASSERT(value->fixed());
|
|
DBUG_ASSERT(value->cols() == s->fields);
|
|
for (uint i= 0; i < value->cols(); i++)
|
|
{
|
|
if (field[i]->sp_prepare_and_store_item(thd, value->addr(i)))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool open_tmp_table(TABLE *table)
|
|
{
|
|
int error;
|
|
if (unlikely((error= table->file->ha_open(table, table->s->path.str, O_RDWR,
|
|
HA_OPEN_TMP_TABLE |
|
|
HA_OPEN_INTERNAL_TABLE |
|
|
HA_OPEN_SIZE_TRACKING))))
|
|
{
|
|
table->file->print_error(error, MYF(0)); /* purecov: inspected */
|
|
table->db_stat= 0;
|
|
return 1;
|
|
}
|
|
table->db_stat= HA_OPEN_KEYFILE;
|
|
(void) table->file->extra(HA_EXTRA_QUICK); /* Faster */
|
|
table->file->set_optimizer_costs(table->in_use);
|
|
if (!table->is_created())
|
|
{
|
|
table->set_created();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
#ifdef USE_ARIA_FOR_TMP_TABLES
|
|
/*
|
|
Create internal (MyISAM or Maria) temporary table
|
|
|
|
SYNOPSIS
|
|
create_internal_tmp_table()
|
|
table Table object that descrimes the table to be created
|
|
keyinfo Description of the index (there is always one index)
|
|
start_recinfo engine's column descriptions
|
|
recinfo INOUT End of engine's column descriptions
|
|
options Option bits
|
|
|
|
DESCRIPTION
|
|
Create an internal emporary table according to passed description. The is
|
|
assumed to have one unique index or constraint.
|
|
|
|
The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
|
|
|
|
1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
|
|
when there are many nullable columns)
|
|
2. Table columns
|
|
3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
|
|
|
|
This function may use the free element to create hash column for unique
|
|
constraint.
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Error. my_error() have been called
|
|
*/
|
|
|
|
|
|
bool create_internal_tmp_table(TABLE *table, KEY *org_keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
int error;
|
|
MARIA_KEYDEF *keydefs= 0, *keydef;
|
|
MARIA_UNIQUEDEF uniquedef;
|
|
TABLE_SHARE *share= table->s;
|
|
MARIA_CREATE_INFO create_info;
|
|
bool use_unique= false;
|
|
DBUG_ENTER("create_internal_tmp_table");
|
|
|
|
if (share->keys)
|
|
{ // Get keys for ni_create
|
|
HA_KEYSEG *seg;
|
|
DBUG_ASSERT(share->key_parts);
|
|
|
|
if (!(multi_alloc_root(&table->mem_root,
|
|
&seg, sizeof(*seg) * share->key_parts,
|
|
&keydefs, sizeof(*keydefs) * share->keys,
|
|
NullS)))
|
|
goto err;
|
|
keydef= keydefs;
|
|
|
|
bzero(seg, sizeof(*seg) * share->key_parts);
|
|
|
|
/* Note that share->keys may change in the loop ! */
|
|
for (KEY *keyinfo= org_keyinfo, *end_keyinfo= keyinfo + share->keys;
|
|
keyinfo < end_keyinfo ;
|
|
keyinfo++)
|
|
{
|
|
/*
|
|
Note that a similar check is performed during
|
|
subquery_types_allow_materialization. See MDEV-7122 for more details as
|
|
to why. Whenever this changes, it must be updated there as well, for
|
|
all tmp_table engines.
|
|
*/
|
|
if (keyinfo->key_length > table->file->max_key_length() ||
|
|
keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
|
|
(keyinfo->flags & HA_UNIQUE_HASH))
|
|
{
|
|
if (!(keyinfo->flags & (HA_NOSAME | HA_UNIQUE_HASH)))
|
|
{
|
|
my_error(ER_INTERNAL_ERROR, MYF(0),
|
|
"Using too big key for internal temp tables");
|
|
DBUG_RETURN(1);
|
|
}
|
|
/* Can't create a key; Make a unique constraint instead of a key */
|
|
share->total_keys= --share->keys;
|
|
share->key_parts-= keyinfo->user_defined_key_parts;
|
|
share->ext_key_parts-= keyinfo->ext_key_parts;
|
|
use_unique= true;
|
|
bzero((char*) &uniquedef,sizeof(uniquedef));
|
|
uniquedef.keysegs= keyinfo->user_defined_key_parts;
|
|
uniquedef.seg=seg;
|
|
uniquedef.null_are_equal=1;
|
|
keyinfo->flags|= HA_UNIQUE_HASH;
|
|
keyinfo->algorithm= HA_KEY_ALG_UNIQUE_HASH;
|
|
|
|
/* Create extra column for hash value */
|
|
bzero((uchar*) *recinfo,sizeof(**recinfo));
|
|
(*recinfo)->type= FIELD_CHECK;
|
|
(*recinfo)->length= MARIA_UNIQUE_HASH_LENGTH;
|
|
(*recinfo)++;
|
|
|
|
/* Avoid warnings from valgrind */
|
|
bzero(table->record[0]+ share->reclength, MARIA_UNIQUE_HASH_LENGTH);
|
|
bzero(share->default_values+ share->reclength,
|
|
MARIA_UNIQUE_HASH_LENGTH);
|
|
share->reclength+= MARIA_UNIQUE_HASH_LENGTH;
|
|
}
|
|
else
|
|
{
|
|
/* Create a key */
|
|
bzero((char*) keydef,sizeof(*keydef));
|
|
/*
|
|
We are using a GROUP BY on something that contains NULL
|
|
In this case we have to tell Aria that two NULL should
|
|
on INSERT be regarded at the same value.
|
|
*/
|
|
keydef->flag= (keyinfo->flags & HA_NOSAME) | HA_NULL_ARE_EQUAL;
|
|
keydef->keysegs= keyinfo->user_defined_key_parts;
|
|
keydef->seg= seg;
|
|
keydef++;
|
|
}
|
|
for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
|
|
{
|
|
Field *field=keyinfo->key_part[i].field;
|
|
seg->flag= 0;
|
|
seg->language= field->charset()->number;
|
|
seg->length= keyinfo->key_part[i].length;
|
|
seg->start= keyinfo->key_part[i].offset;
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
seg->type=
|
|
((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
|
|
HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
|
|
seg->bit_start= (uint8)(field->pack_length() -
|
|
portable_sizeof_char_ptr);
|
|
seg->flag= HA_BLOB_PART;
|
|
seg->length=0; // Whole blob in unique constraint
|
|
}
|
|
else
|
|
{
|
|
seg->type= keyinfo->key_part[i].type;
|
|
/* Tell handler if it can do suffic space compression */
|
|
if (field->real_type() == MYSQL_TYPE_STRING &&
|
|
keyinfo->key_part[i].length > 32)
|
|
seg->flag|= HA_SPACE_PACK;
|
|
}
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
seg->null_bit= field->null_bit;
|
|
seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
|
|
}
|
|
}
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
}
|
|
}
|
|
bzero((char*) &create_info,sizeof(create_info));
|
|
create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
|
|
|
|
/*
|
|
The logic for choosing the record format:
|
|
The STATIC_RECORD format is the fastest one, because it's so simple,
|
|
so we use this by default for short rows.
|
|
BLOCK_RECORD caches both row and data, so this is generally faster than
|
|
DYNAMIC_RECORD. The one exception is when we write to tmp table and
|
|
want to use keys for duplicate elimination as with BLOCK RECORD
|
|
we first write the row, then check for key conflicts and then we have to
|
|
delete the row. The cases when this can happen is when there is
|
|
a group by and no sum functions or if distinct is used.
|
|
*/
|
|
{
|
|
enum data_file_type file_type= table->no_rows ? NO_RECORD :
|
|
(share->reclength < 64 && !share->blob_fields ? STATIC_RECORD :
|
|
table->used_for_duplicate_elimination ? DYNAMIC_RECORD : BLOCK_RECORD);
|
|
uint create_flags= HA_CREATE_TMP_TABLE | HA_CREATE_INTERNAL_TABLE |
|
|
(table->keep_row_order ? HA_PRESERVE_INSERT_ORDER : 0);
|
|
|
|
if (file_type != NO_RECORD && encrypt_tmp_disk_tables)
|
|
{
|
|
/* encryption is only supported for BLOCK_RECORD */
|
|
file_type= BLOCK_RECORD;
|
|
if (table->used_for_duplicate_elimination)
|
|
{
|
|
/*
|
|
sql-layer expect the last column to be stored/restored also
|
|
when it's null.
|
|
|
|
This is probably a bug (that sql-layer doesn't annotate
|
|
the column as not-null) but both heap, aria-static, aria-dynamic and
|
|
myisam has this property. aria-block_record does not since it
|
|
does not store null-columns at all.
|
|
Emulate behaviour by making column not-nullable when creating the
|
|
table.
|
|
*/
|
|
uint cols= (uint)(*recinfo-start_recinfo);
|
|
start_recinfo[cols-1].null_bit= 0;
|
|
}
|
|
}
|
|
|
|
if (unlikely((error= maria_create(share->path.str, file_type, share->keys,
|
|
keydefs, (uint) (*recinfo-start_recinfo),
|
|
start_recinfo, use_unique, &uniquedef,
|
|
&create_info, create_flags))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
table->db_stat=0;
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
table->in_use->inc_status_created_tmp_disk_tables();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
share->db_record_offset= 1;
|
|
table->set_created();
|
|
DBUG_RETURN(0);
|
|
err:
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
#else
|
|
|
|
/*
|
|
Create internal (MyISAM or Maria) temporary table
|
|
|
|
SYNOPSIS
|
|
create_internal_tmp_table()
|
|
table Table object that descrimes the table to be created
|
|
keyinfo Description of the index (there is always one index)
|
|
start_recinfo engine's column descriptions
|
|
recinfo INOUT End of engine's column descriptions
|
|
options Option bits
|
|
|
|
DESCRIPTION
|
|
Create an internal emporary table according to passed description. The is
|
|
assumed to have one unique index or constraint.
|
|
|
|
The passed array or TMP_ENGINE_COLUMNDEF structures must have this form:
|
|
|
|
1. 1-byte column (afaiu for 'deleted' flag) (note maybe not 1-byte
|
|
when there are many nullable columns)
|
|
2. Table columns
|
|
3. One free TMP_ENGINE_COLUMNDEF element (*recinfo points here)
|
|
|
|
This function may use the free element to create hash column for unique
|
|
constraint.
|
|
|
|
RETURN
|
|
FALSE - OK
|
|
TRUE - Error ; my_error() has been called.
|
|
*/
|
|
|
|
/* Create internal MyISAM temporary table */
|
|
|
|
bool create_internal_tmp_table(TABLE *table, KEY *org_keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
int error;
|
|
MI_KEYDEF keydef;
|
|
MI_UNIQUEDEF uniquedef;
|
|
TABLE_SHARE *share= table->s;
|
|
DBUG_ENTER("create_internal_tmp_table");
|
|
|
|
if (share->keys)
|
|
{ // Get keys for ni_create
|
|
bool using_unique_constraint=0;
|
|
HA_KEYSEG *seg= (HA_KEYSEG*) alloc_root(&table->mem_root,
|
|
sizeof(*seg) *
|
|
share->user_defined_key_parts);
|
|
if (!seg)
|
|
goto err;
|
|
|
|
bzero(seg, sizeof(*seg) * share->user_defined_key_parts);
|
|
/*
|
|
Note that a similar check is performed during
|
|
subquery_types_allow_materialization. See MDEV-7122 for more details as
|
|
to why. Whenever this changes, it must be updated there as well, for
|
|
all tmp_table engines.
|
|
*/
|
|
if (keyinfo->key_length > table->file->max_key_length() ||
|
|
keyinfo->user_defined_key_parts > table->file->max_key_parts() ||
|
|
share->uniques)
|
|
{
|
|
/* Can't create a key; Make a unique constraint instead of a key */
|
|
share->keys= 0;
|
|
share->key_parts= share->ext_key_parts= 0;
|
|
share->uniques= 1;
|
|
using_unique_constraint=1;
|
|
bzero((char*) &uniquedef,sizeof(uniquedef));
|
|
uniquedef.keysegs=keyinfo->user_defined_key_parts;
|
|
uniquedef.seg=seg;
|
|
uniquedef.null_are_equal=1;
|
|
|
|
/* Create extra column for hash value */
|
|
bzero((uchar*) *recinfo,sizeof(**recinfo));
|
|
(*recinfo)->type= FIELD_CHECK;
|
|
(*recinfo)->length=MI_UNIQUE_HASH_LENGTH;
|
|
(*recinfo)++;
|
|
/* Avoid warnings from valgrind */
|
|
bzero(table->record[0]+ share->reclength, MI_UNIQUE_HASH_LENGTH);
|
|
bzero(share->default_values+ share->reclength, MI_UNIQUE_HASH_LENGTH);
|
|
share->reclength+= MI_UNIQUE_HASH_LENGTH;
|
|
}
|
|
else
|
|
{
|
|
/* Create an unique key */
|
|
bzero((char*) &keydef,sizeof(keydef));
|
|
keydef.flag= ((keyinfo->flags & HA_NOSAME) | HA_BINARY_PACK_KEY |
|
|
HA_PACK_KEY);
|
|
keydef.keysegs= keyinfo->user_defined_key_parts;
|
|
keydef.seg= seg;
|
|
}
|
|
for (uint i=0; i < keyinfo->user_defined_key_parts ; i++,seg++)
|
|
{
|
|
Field *field=keyinfo->key_part[i].field;
|
|
seg->flag= 0;
|
|
seg->language= field->charset()->number;
|
|
seg->length= keyinfo->key_part[i].length;
|
|
seg->start= keyinfo->key_part[i].offset;
|
|
if (field->flags & BLOB_FLAG)
|
|
{
|
|
seg->type=
|
|
((keyinfo->key_part[i].key_type & FIELDFLAG_BINARY) ?
|
|
HA_KEYTYPE_VARBINARY2 : HA_KEYTYPE_VARTEXT2);
|
|
seg->bit_start= (uint8)(field->pack_length() - portable_sizeof_char_ptr);
|
|
seg->flag= HA_BLOB_PART;
|
|
seg->length=0; // Whole blob in unique constraint
|
|
}
|
|
else
|
|
{
|
|
seg->type= keyinfo->key_part[i].type;
|
|
/* Tell handler if it can do suffic space compression */
|
|
if (field->real_type() == MYSQL_TYPE_STRING &&
|
|
keyinfo->key_part[i].length > 4)
|
|
seg->flag|= HA_SPACE_PACK;
|
|
}
|
|
if (!(field->flags & NOT_NULL_FLAG))
|
|
{
|
|
seg->null_bit= field->null_bit;
|
|
seg->null_pos= (uint) (field->null_ptr - (uchar*) table->record[0]);
|
|
/*
|
|
We are using a GROUP BY on something that contains NULL
|
|
In this case we have to tell MyISAM that two NULL should
|
|
on INSERT be regarded at the same value
|
|
*/
|
|
if (!using_unique_constraint)
|
|
keydef.flag|= HA_NULL_ARE_EQUAL;
|
|
}
|
|
}
|
|
if (share->keys)
|
|
keyinfo->index_flags= table->file->index_flags(0, 0, 1);
|
|
}
|
|
MI_CREATE_INFO create_info;
|
|
bzero((char*) &create_info,sizeof(create_info));
|
|
create_info.data_file_length= table->in_use->variables.tmp_disk_table_size;
|
|
|
|
if (unlikely((error= mi_create(share->path.str, share->keys, &keydef,
|
|
(uint) (*recinfo-start_recinfo),
|
|
start_recinfo,
|
|
share->uniques, &uniquedef,
|
|
&create_info,
|
|
HA_CREATE_TMP_TABLE |
|
|
HA_CREATE_INTERNAL_TABLE |
|
|
((share->db_create_options &
|
|
HA_OPTION_PACK_RECORD) ?
|
|
HA_PACK_RECORD : 0)
|
|
))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
table->db_stat=0;
|
|
goto err;
|
|
}
|
|
table->in_use->inc_status_created_tmp_disk_tables();
|
|
table->in_use->inc_status_created_tmp_tables();
|
|
share->db_record_offset= 1;
|
|
table->set_created();
|
|
DBUG_RETURN(0);
|
|
err:
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
#endif /* USE_ARIA_FOR_TMP_TABLES */
|
|
|
|
|
|
/*
|
|
If a HEAP table gets full, create a internal table in MyISAM or Maria
|
|
and copy all rows to this
|
|
|
|
In case of error, my_error() or handler::print_error() will be called.
|
|
Note that in case of error, table->file->ha_rnd_end() may have been called!
|
|
*/
|
|
|
|
|
|
bool
|
|
create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
int error,
|
|
bool ignore_last_dupp_key_error,
|
|
bool *is_duplicate)
|
|
{
|
|
TABLE new_table;
|
|
TABLE_SHARE share;
|
|
const char *save_proc_info;
|
|
int write_err= 0;
|
|
String tmp_alias;
|
|
DBUG_ENTER("create_internal_tmp_table_from_heap");
|
|
if (is_duplicate)
|
|
*is_duplicate= FALSE;
|
|
|
|
if (table->s->db_type() != heap_hton || error != HA_ERR_RECORD_FILE_FULL)
|
|
{
|
|
/*
|
|
We don't want this error to be converted to a warning, e.g. in case of
|
|
INSERT IGNORE ... SELECT.
|
|
*/
|
|
table->file->print_error(error, MYF(ME_FATAL));
|
|
DBUG_RETURN(1);
|
|
}
|
|
new_table= *table;
|
|
share= *table->s;
|
|
new_table.s= &share;
|
|
new_table.s->db_plugin= ha_lock_engine(thd, TMP_ENGINE_HTON);
|
|
if (unlikely(!(new_table.file= get_new_handler(&share, &new_table.mem_root,
|
|
TMP_ENGINE_HTON))))
|
|
DBUG_RETURN(1); // End of memory
|
|
|
|
if (unlikely(new_table.file->set_ha_share_ref(&share.ha_share)))
|
|
{
|
|
delete new_table.file;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
save_proc_info=thd->proc_info;
|
|
THD_STAGE_INFO(thd, stage_converting_heap_to_myisam);
|
|
|
|
new_table.no_rows= table->no_rows;
|
|
if (create_internal_tmp_table(&new_table, table->key_info, start_recinfo,
|
|
recinfo,
|
|
thd->lex->first_select_lex()->options |
|
|
thd->variables.option_bits))
|
|
goto err2;
|
|
if (open_tmp_table(&new_table))
|
|
{
|
|
TMP_ENGINE_HTON->drop_table(TMP_ENGINE_HTON, new_table.s->path.str);
|
|
goto err2;
|
|
}
|
|
if (table->file->indexes_are_disabled())
|
|
new_table.file->ha_disable_indexes(key_map(0), false);
|
|
table->file->ha_index_or_rnd_end();
|
|
if (table->file->ha_rnd_init_with_error(1))
|
|
DBUG_RETURN(1);
|
|
if (new_table.no_rows)
|
|
new_table.file->extra(HA_EXTRA_NO_ROWS);
|
|
else
|
|
{
|
|
/* update table->file->stats.records */
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
new_table.file->ha_start_bulk_insert(table->file->stats.records);
|
|
}
|
|
|
|
/*
|
|
copy all old rows from heap table to MyISAM table
|
|
This is the only code that uses record[1] to read/write but this
|
|
is safe as this is a temporary MyISAM table without timestamp/autoincrement
|
|
or partitioning.
|
|
*/
|
|
while (!table->file->ha_rnd_next(new_table.record[1]))
|
|
{
|
|
write_err= new_table.file->ha_write_tmp_row(new_table.record[1]);
|
|
DBUG_EXECUTE_IF("raise_error", write_err= HA_ERR_FOUND_DUPP_KEY ;);
|
|
if (write_err)
|
|
goto err;
|
|
if (unlikely(thd->check_killed()))
|
|
goto err_killed;
|
|
}
|
|
if (!new_table.no_rows && (write_err= new_table.file->ha_end_bulk_insert()))
|
|
goto err;
|
|
/* copy row that filled HEAP table */
|
|
if (unlikely((write_err=new_table.file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
|
|
!ignore_last_dupp_key_error)
|
|
goto err;
|
|
if (is_duplicate)
|
|
*is_duplicate= TRUE;
|
|
}
|
|
else
|
|
{
|
|
if (is_duplicate)
|
|
*is_duplicate= FALSE;
|
|
}
|
|
|
|
/* remove heap table and change to use myisam table */
|
|
(void) table->file->ha_rnd_end();
|
|
(void) table->file->ha_close(); // This deletes the table !
|
|
delete table->file;
|
|
table->file=0;
|
|
plugin_unlock(0, table->s->db_plugin);
|
|
share.db_plugin= my_plugin_lock(0, share.db_plugin);
|
|
new_table.s= table->s; // Keep old share
|
|
|
|
/*
|
|
The following work with alias has to be done as new_table.alias() may have
|
|
been reallocated and we want to keep the original one.
|
|
*/
|
|
tmp_alias.move(table->alias);
|
|
*table= new_table;
|
|
table->alias.move(tmp_alias);
|
|
new_table.alias.free();
|
|
/* Get the new share */
|
|
*table->s= share;
|
|
|
|
table->file->change_table_ptr(table, table->s);
|
|
table->use_all_columns();
|
|
if (save_proc_info)
|
|
thd_proc_info(thd, (!strcmp(save_proc_info,"Copying to tmp table") ?
|
|
"Copying to tmp table on disk" : save_proc_info));
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
DBUG_PRINT("error",("Got error: %d",write_err));
|
|
table->file->print_error(write_err, MYF(0));
|
|
err_killed:
|
|
(void) table->file->ha_rnd_end();
|
|
(void) new_table.file->drop_table(new_table.s->path.str);
|
|
err2:
|
|
delete new_table.file;
|
|
thd_proc_info(thd, save_proc_info);
|
|
table->mem_root= new_table.mem_root;
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
void
|
|
free_tmp_table(THD *thd, TABLE *entry)
|
|
{
|
|
MEM_ROOT own_root= entry->mem_root;
|
|
const char *save_proc_info;
|
|
DBUG_ENTER("free_tmp_table");
|
|
DBUG_PRINT("enter",("table: %s alias: %s",entry->s->table_name.str,
|
|
entry->alias.c_ptr()));
|
|
|
|
save_proc_info=thd->proc_info;
|
|
THD_STAGE_INFO(thd, stage_removing_tmp_table);
|
|
|
|
if (entry->file && entry->is_created())
|
|
{
|
|
if (entry->db_stat)
|
|
{
|
|
/* The table was properly opened in open_tmp_table() */
|
|
entry->file->ha_index_or_rnd_end();
|
|
entry->file->info(HA_STATUS_VARIABLE);
|
|
thd->tmp_tables_size+= (entry->file->stats.data_file_length +
|
|
entry->file->stats.index_file_length);
|
|
}
|
|
/*
|
|
This is an internal temporary table, we should not call ha_drop_table()
|
|
as it will mark the transaction read/write
|
|
*/
|
|
DBUG_ASSERT(entry->s->tmp_table == SYSTEM_TMP_TABLE ||
|
|
entry->s->tmp_table == INTERNAL_TMP_TABLE);
|
|
entry->file->drop_table(entry->s->path.str);
|
|
delete entry->file;
|
|
entry->file= NULL;
|
|
entry->reset_created();
|
|
}
|
|
|
|
/* free blobs */
|
|
for (Field **ptr=entry->field ; *ptr ; ptr++)
|
|
(*ptr)->free();
|
|
|
|
if (entry->temp_pool_slot != MY_BIT_NONE)
|
|
temp_pool_clear_bit(entry->temp_pool_slot);
|
|
|
|
plugin_unlock(0, entry->s->db_plugin);
|
|
entry->alias.free();
|
|
|
|
if (entry->pos_in_table_list && entry->pos_in_table_list->table)
|
|
{
|
|
DBUG_ASSERT(entry->pos_in_table_list->table == entry);
|
|
entry->pos_in_table_list->table= NULL;
|
|
}
|
|
|
|
free_root(&own_root, MYF(0)); /* the table is allocated in its own root */
|
|
thd_proc_info(thd, save_proc_info);
|
|
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Set write_func of AGGR_OP object
|
|
|
|
@param join_tab JOIN_TAB of the corresponding tmp table
|
|
|
|
@details
|
|
Function sets up write_func according to how AGGR_OP object that
|
|
is attached to the given join_tab will be used in the query.
|
|
*/
|
|
|
|
void set_postjoin_aggr_write_func(JOIN_TAB *tab)
|
|
{
|
|
JOIN *join= tab->join;
|
|
TABLE *table= tab->table;
|
|
AGGR_OP *aggr= tab->aggr;
|
|
TMP_TABLE_PARAM *tmp_tbl= tab->tmp_table_param;
|
|
|
|
DBUG_ASSERT(table && aggr);
|
|
|
|
if (table->group && tmp_tbl->sum_func_count &&
|
|
!tmp_tbl->precomputed_group_by)
|
|
{
|
|
/*
|
|
Note for MyISAM tmp tables: if uniques is true keys won't be
|
|
created.
|
|
*/
|
|
if (table->s->keys && !table->s->have_unique_constraint())
|
|
{
|
|
DBUG_PRINT("info",("Using end_update"));
|
|
aggr->set_write_func(end_update);
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info",("Using end_unique_update"));
|
|
aggr->set_write_func(end_unique_update);
|
|
}
|
|
}
|
|
else if (join->sort_and_group && !tmp_tbl->precomputed_group_by &&
|
|
!join->sort_and_group_aggr_tab && join->tables_list &&
|
|
join->top_join_tab_count)
|
|
{
|
|
DBUG_PRINT("info",("Using end_write_group"));
|
|
aggr->set_write_func(end_write_group);
|
|
join->sort_and_group_aggr_tab= tab;
|
|
}
|
|
else
|
|
{
|
|
DBUG_PRINT("info",("Using end_write"));
|
|
aggr->set_write_func(end_write);
|
|
if (tmp_tbl->precomputed_group_by)
|
|
{
|
|
/*
|
|
A preceding call to create_tmp_table in the case when loose
|
|
index scan is used guarantees that
|
|
TMP_TABLE_PARAM::items_to_copy has enough space for the group
|
|
by functions. It is OK here to use memcpy since we copy
|
|
Item_sum pointers into an array of Item pointers.
|
|
*/
|
|
memcpy(tmp_tbl->items_to_copy + tmp_tbl->func_count,
|
|
join->sum_funcs,
|
|
sizeof(Item*)*tmp_tbl->sum_func_count);
|
|
tmp_tbl->items_to_copy[tmp_tbl->func_count+tmp_tbl->sum_func_count]= 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
@details
|
|
Rows produced by a join sweep may end up in a temporary table or be sent
|
|
to a client. Set the function of the nested loop join algorithm which
|
|
handles final fully constructed and matched records.
|
|
|
|
@param join join to setup the function for.
|
|
|
|
@return
|
|
end_select function to use. This function can't fail.
|
|
*/
|
|
|
|
Next_select_func setup_end_select_func(JOIN *join)
|
|
{
|
|
TMP_TABLE_PARAM *tmp_tbl= &join->tmp_table_param;
|
|
|
|
/*
|
|
Choose method for presenting result to user. Use end_send_group
|
|
if the query requires grouping (has a GROUP BY clause and/or one or
|
|
more aggregate functions). Use end_send if the query should not
|
|
be grouped.
|
|
*/
|
|
if (join->sort_and_group && !tmp_tbl->precomputed_group_by)
|
|
{
|
|
DBUG_PRINT("info",("Using end_send_group"));
|
|
return end_send_group;
|
|
}
|
|
DBUG_PRINT("info",("Using end_send"));
|
|
return end_send;
|
|
}
|
|
|
|
|
|
/**
|
|
Make a join of all tables and write it on socket or to table.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 if error is sent
|
|
@retval
|
|
-1 if error should be sent
|
|
*/
|
|
|
|
static int
|
|
do_select(JOIN *join, Procedure *procedure)
|
|
{
|
|
int rc= 0;
|
|
enum_nested_loop_state error= NESTED_LOOP_OK;
|
|
uint top_level_tables= join->exec_join_tab_cnt();
|
|
DBUG_ENTER("do_select");
|
|
|
|
if (join->pushdown_query)
|
|
{
|
|
/* Select fields are in the temporary table */
|
|
join->fields= &join->tmp_fields_list1;
|
|
/* Setup HAVING to work with fields in temporary table */
|
|
join->set_items_ref_array(join->items1);
|
|
/* The storage engine will take care of the group by query result */
|
|
int res= join->pushdown_query->execute(join);
|
|
|
|
if (res)
|
|
DBUG_RETURN(res);
|
|
|
|
if (join->pushdown_query->store_data_in_temp_table)
|
|
{
|
|
JOIN_TAB *last_tab= join->join_tab + top_level_tables;
|
|
last_tab->next_select= end_send;
|
|
last_tab->cached_pfs_batch_update= last_tab->pfs_batch_update();
|
|
|
|
enum_nested_loop_state state= last_tab->aggr->end_send();
|
|
if (state >= NESTED_LOOP_OK)
|
|
state= sub_select(join, last_tab, true);
|
|
|
|
if (state < NESTED_LOOP_OK)
|
|
res= 1;
|
|
|
|
if (join->result->send_eof())
|
|
res= 1;
|
|
}
|
|
DBUG_RETURN(res);
|
|
}
|
|
|
|
join->procedure= procedure;
|
|
join->duplicate_rows= join->send_records=0;
|
|
|
|
if (join->only_const_tables() && !join->need_tmp)
|
|
{
|
|
Next_select_func end_select= setup_end_select_func(join);
|
|
|
|
/*
|
|
HAVING will be checked after processing aggregate functions,
|
|
But WHERE should checked here (we alredy have read tables).
|
|
Notice that make_join_select() splits all conditions in this case
|
|
into two groups exec_const_cond and outer_ref_cond.
|
|
If join->table_count == join->const_tables then it is
|
|
sufficient to check only the condition pseudo_bits_cond.
|
|
*/
|
|
DBUG_ASSERT(join->outer_ref_cond == NULL);
|
|
if (!join->pseudo_bits_cond || join->pseudo_bits_cond->val_int())
|
|
{
|
|
// HAVING will be checked by end_select
|
|
error= (*end_select)(join, 0, 0);
|
|
if (error >= NESTED_LOOP_OK)
|
|
error= (*end_select)(join, 0, 1);
|
|
|
|
/*
|
|
If we don't go through evaluate_join_record(), do the counting
|
|
here. join->send_records is increased on success in end_send(),
|
|
so we don't touch it here.
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
}
|
|
else if (join->send_row_on_empty_set())
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (end_select == end_send_group)
|
|
{
|
|
/*
|
|
Was a grouping query but we did not find any rows. In this case
|
|
we clear all tables to get null in any referenced fields,
|
|
like in case of:
|
|
SELECT MAX(a) AS f1, a AS f2 FROM t1 WHERE VALUE(a) IS NOT NULL
|
|
*/
|
|
clear_tables(join, &cleared_tables);
|
|
}
|
|
if (!join->having || join->having->val_int())
|
|
{
|
|
List<Item> *columns_list= (procedure ? &join->procedure_fields_list :
|
|
join->fields);
|
|
rc= join->result->send_data_with_check(*columns_list,
|
|
join->unit, 0) > 0;
|
|
}
|
|
/*
|
|
We have to remove the null markings from the tables as this table
|
|
may be part of a sub query that is re-evaluated
|
|
*/
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
}
|
|
/*
|
|
An error can happen when evaluating the conds
|
|
(the join condition and piece of where clause
|
|
relevant to this join table).
|
|
*/
|
|
if (unlikely(join->thd->is_error()))
|
|
error= NESTED_LOOP_ERROR;
|
|
}
|
|
else
|
|
{
|
|
DBUG_EXECUTE_IF("show_explain_probe_do_select",
|
|
if (dbug_user_var_equals_int(join->thd,
|
|
"show_explain_probe_select_id",
|
|
join->select_lex->select_number))
|
|
dbug_serve_apcs(join->thd, 1);
|
|
);
|
|
|
|
/*
|
|
We have to update the cached_pfs_batch_update as
|
|
join_tab->select_cond may have changed.
|
|
|
|
This can happen in case of group by where some sub queries are not
|
|
needed anymore. This is checked by main.ps
|
|
*/
|
|
if (top_level_tables)
|
|
join->join_tab[top_level_tables-1].cached_pfs_batch_update=
|
|
join->join_tab[top_level_tables-1].pfs_batch_update();
|
|
|
|
JOIN_TAB *join_tab= join->join_tab +
|
|
(join->tables_list ? join->const_tables : 0);
|
|
if (join->outer_ref_cond && !join->outer_ref_cond->val_int())
|
|
error= NESTED_LOOP_NO_MORE_ROWS;
|
|
else
|
|
error= join->first_select(join,join_tab,0);
|
|
if (error >= NESTED_LOOP_OK && likely(join->thd->killed != ABORT_QUERY))
|
|
error= join->first_select(join,join_tab,1);
|
|
}
|
|
|
|
join->thd->limit_found_rows= join->send_records - join->duplicate_rows;
|
|
|
|
if (error == NESTED_LOOP_NO_MORE_ROWS ||
|
|
unlikely(join->thd->killed == ABORT_QUERY))
|
|
error= NESTED_LOOP_OK;
|
|
|
|
/*
|
|
For "order by with limit", we cannot rely on send_records, but need
|
|
to use the rowcount read originally into the join_tab applying the
|
|
filesort. There cannot be any post-filtering conditions, nor any
|
|
following join_tabs in this case, so this rowcount properly represents
|
|
the correct number of qualifying rows.
|
|
*/
|
|
if (join->order)
|
|
{
|
|
// Save # of found records prior to cleanup
|
|
JOIN_TAB *sort_tab;
|
|
JOIN_TAB *join_tab= join->join_tab;
|
|
uint const_tables= join->const_tables;
|
|
|
|
// Take record count from first non constant table or from last tmp table
|
|
if (join->aggr_tables > 0)
|
|
sort_tab= join_tab + join->top_join_tab_count + join->aggr_tables - 1;
|
|
else
|
|
{
|
|
DBUG_ASSERT(!join->only_const_tables());
|
|
sort_tab= join_tab + const_tables;
|
|
}
|
|
if (sort_tab->filesort &&
|
|
join->select_options & OPTION_FOUND_ROWS &&
|
|
sort_tab->filesort->sortorder &&
|
|
sort_tab->filesort->limit != HA_POS_ERROR)
|
|
{
|
|
join->thd->limit_found_rows= sort_tab->records;
|
|
}
|
|
}
|
|
|
|
{
|
|
/*
|
|
The following will unlock all cursors if the command wasn't an
|
|
update command
|
|
*/
|
|
join->join_free(); // Unlock all cursors
|
|
}
|
|
if (error == NESTED_LOOP_OK)
|
|
{
|
|
/*
|
|
Sic: this branch works even if rc != 0, e.g. when
|
|
send_data above returns an error.
|
|
*/
|
|
if (unlikely(join->result->send_eof()))
|
|
rc= 1; // Don't send error
|
|
DBUG_PRINT("info",("%ld records output", (long) join->send_records));
|
|
}
|
|
else
|
|
rc= -1;
|
|
#ifndef DBUG_OFF
|
|
if (rc)
|
|
{
|
|
DBUG_PRINT("error",("Error: do_select() failed"));
|
|
}
|
|
#endif
|
|
rc= join->thd->is_error() ? -1 : rc;
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Instantiates temporary table
|
|
|
|
@param table Table object that describes the table to be
|
|
instantiated
|
|
@param keyinfo Description of the index (there is always one index)
|
|
@param start_recinfo Column descriptions
|
|
@param recinfo INOUT End of column descriptions
|
|
@param options Option bits
|
|
|
|
@details
|
|
Creates tmp table and opens it.
|
|
|
|
@return
|
|
FALSE - OK
|
|
TRUE - Error
|
|
*/
|
|
|
|
bool instantiate_tmp_table(TABLE *table, KEY *keyinfo,
|
|
TMP_ENGINE_COLUMNDEF *start_recinfo,
|
|
TMP_ENGINE_COLUMNDEF **recinfo,
|
|
ulonglong options)
|
|
{
|
|
DBUG_ASSERT(table->s->keys == 0 || table->key_info == keyinfo);
|
|
DBUG_ASSERT(table->s->keys <= 1);
|
|
if (table->s->db_type() == TMP_ENGINE_HTON)
|
|
{
|
|
/*
|
|
If it is not heap (in-memory) table then convert index to unique
|
|
constrain.
|
|
*/
|
|
MEM_CHECK_DEFINED(table->record[0], table->s->reclength);
|
|
if (create_internal_tmp_table(table, keyinfo, start_recinfo, recinfo,
|
|
options))
|
|
return TRUE;
|
|
// Make empty record so random data is not written to disk
|
|
empty_record(table);
|
|
table->status= STATUS_NO_RECORD;
|
|
}
|
|
if (open_tmp_table(table))
|
|
return TRUE;
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Accumulate rows of the result of an aggregation operation in a tmp table
|
|
|
|
@param join pointer to the structure providing all context info for the query
|
|
@param join_tab the JOIN_TAB object to which the operation is attached
|
|
@param end_records TRUE <=> all records were accumulated, send them further
|
|
|
|
@details
|
|
This function accumulates records of the aggreagation operation for
|
|
the node join_tab from the execution plan in a tmp table. To add a new
|
|
record the function calls join_tab->aggr->put_records.
|
|
When there is no more records to save, in this
|
|
case the end_of_records argument == true, function tells the operation to
|
|
send records further by calling aggr->send_records().
|
|
When all records are sent this function passes 'end_of_records' signal
|
|
further by calling sub_select() with end_of_records argument set to
|
|
true. After that aggr->end_send() is called to tell the operation that
|
|
it could end internal buffer scan.
|
|
|
|
@note
|
|
This function is not expected to be called when dynamic range scan is
|
|
used to scan join_tab because range scans aren't used for tmp tables.
|
|
|
|
@return
|
|
return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select_postjoin_aggr(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
enum_nested_loop_state rc;
|
|
AGGR_OP *aggr= join_tab->aggr;
|
|
|
|
/* This function cannot be called if join_tab has no associated aggregation */
|
|
DBUG_ASSERT(aggr != NULL);
|
|
|
|
DBUG_ENTER("sub_select_aggr_tab");
|
|
|
|
if (join->thd->killed)
|
|
{
|
|
/* The user has aborted the execution of the query */
|
|
join->thd->send_kill_message();
|
|
DBUG_RETURN(NESTED_LOOP_KILLED);
|
|
}
|
|
|
|
if (end_of_records)
|
|
{
|
|
rc= aggr->end_send();
|
|
if (rc >= NESTED_LOOP_OK)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
rc= aggr->put_record();
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
|
|
/*
|
|
Fill the join buffer with partial records, retrieve all full matches for
|
|
them
|
|
|
|
SYNOPSIS
|
|
sub_select_cache()
|
|
join pointer to the structure providing all context info for the
|
|
query
|
|
join_tab the first next table of the execution plan to be retrieved
|
|
end_records true when we need to perform final steps of the retrieval
|
|
|
|
DESCRIPTION
|
|
For a given table Ti= join_tab from the sequence of tables of the chosen
|
|
execution plan T1,...,Ti,...,Tn the function just put the partial record
|
|
t1,...,t[i-1] into the join buffer associated with table Ti unless this
|
|
is the last record added into the buffer. In this case, the function
|
|
additionally finds all matching full records for all partial
|
|
records accumulated in the buffer, after which it cleans the buffer up.
|
|
If a partial join record t1,...,ti is extended utilizing a dynamic
|
|
range scan then it is not put into the join buffer. Rather all matching
|
|
records are found for it at once by the function sub_select.
|
|
|
|
NOTES
|
|
The function implements the algorithmic schema for both Blocked Nested
|
|
Loop Join and Batched Key Access Join. The difference can be seen only at
|
|
the level of of the implementation of the put_record and join_records
|
|
virtual methods for the cache object associated with the join_tab.
|
|
The put_record method accumulates records in the cache, while the
|
|
join_records method builds all matching join records and send them into
|
|
the output stream.
|
|
|
|
RETURN
|
|
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select_cache(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
enum_nested_loop_state rc;
|
|
JOIN_CACHE *cache= join_tab->cache;
|
|
int err;
|
|
DBUG_ENTER("sub_select_cache");
|
|
|
|
/*
|
|
This function cannot be called if join_tab has no associated join
|
|
buffer
|
|
*/
|
|
DBUG_ASSERT(cache != NULL);
|
|
|
|
join_tab->cache->reset_join(join);
|
|
|
|
if (end_of_records)
|
|
{
|
|
rc= cache->join_records(FALSE);
|
|
if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
|
|
rc == NESTED_LOOP_QUERY_LIMIT)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
/* The user has aborted the execution of the query */
|
|
DBUG_RETURN(NESTED_LOOP_KILLED);
|
|
}
|
|
join_tab->jbuf_loops_tracker->on_scan_init();
|
|
|
|
if (!(err= test_if_use_dynamic_range_scan(join_tab)))
|
|
{
|
|
if (!cache->put_record())
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
/*
|
|
We has decided that after the record we've just put into the buffer
|
|
won't add any more records. Now try to find all the matching
|
|
extensions for all records in the buffer.
|
|
*/
|
|
rc= cache->join_records(FALSE);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
if (err < 0)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
/*
|
|
TODO: Check whether we really need the call below and we can't do
|
|
without it. If it's not the case remove it.
|
|
*/
|
|
rc= cache->join_records(TRUE);
|
|
if (rc == NESTED_LOOP_OK || rc == NESTED_LOOP_NO_MORE_ROWS ||
|
|
rc == NESTED_LOOP_QUERY_LIMIT)
|
|
rc= sub_select(join, join_tab, end_of_records);
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/**
|
|
Retrieve records ends with a given beginning from the result of a join.
|
|
|
|
For a given partial join record consisting of records from the tables
|
|
preceding the table join_tab in the execution plan, the function
|
|
retrieves all matching full records from the result set and
|
|
send them to the result set stream.
|
|
|
|
@note
|
|
The function effectively implements the final (n-k) nested loops
|
|
of nested loops join algorithm, where k is the ordinal number of
|
|
the join_tab table and n is the total number of tables in the join query.
|
|
It performs nested loops joins with all conjunctive predicates from
|
|
the where condition pushed as low to the tables as possible.
|
|
E.g. for the query
|
|
@code
|
|
SELECT * FROM t1,t2,t3
|
|
WHERE t1.a=t2.a AND t2.b=t3.b AND t1.a BETWEEN 5 AND 9
|
|
@endcode
|
|
the predicate (t1.a BETWEEN 5 AND 9) will be pushed to table t1,
|
|
given the selected plan prescribes to nest retrievals of the
|
|
joined tables in the following order: t1,t2,t3.
|
|
A pushed down predicate are attached to the table which it pushed to,
|
|
at the field join_tab->select_cond.
|
|
When executing a nested loop of level k the function runs through
|
|
the rows of 'join_tab' and for each row checks the pushed condition
|
|
attached to the table.
|
|
If it is false the function moves to the next row of the
|
|
table. If the condition is true the function recursively executes (n-k-1)
|
|
remaining embedded nested loops.
|
|
The situation becomes more complicated if outer joins are involved in
|
|
the execution plan. In this case the pushed down predicates can be
|
|
checked only at certain conditions.
|
|
Suppose for the query
|
|
@code
|
|
SELECT * FROM t1 LEFT JOIN (t2,t3) ON t3.a=t1.a
|
|
WHERE t1>2 AND (t2.b>5 OR t2.b IS NULL)
|
|
@endcode
|
|
the optimizer has chosen a plan with the table order t1,t2,t3.
|
|
The predicate P1=t1>2 will be pushed down to the table t1, while the
|
|
predicate P2=(t2.b>5 OR t2.b IS NULL) will be attached to the table
|
|
t2. But the second predicate can not be unconditionally tested right
|
|
after a row from t2 has been read. This can be done only after the
|
|
first row with t3.a=t1.a has been encountered.
|
|
Thus, the second predicate P2 is supplied with a guarded value that are
|
|
stored in the field 'found' of the first inner table for the outer join
|
|
(table t2). When the first row with t3.a=t1.a for the current row
|
|
of table t1 appears, the value becomes true. For now on the predicate
|
|
is evaluated immediately after the row of table t2 has been read.
|
|
When the first row with t3.a=t1.a has been encountered all
|
|
conditions attached to the inner tables t2,t3 must be evaluated.
|
|
Only when all of them are true the row is sent to the output stream.
|
|
If not, the function returns to the lowest nest level that has a false
|
|
attached condition.
|
|
The predicates from on expressions are also pushed down. If in the
|
|
the above example the on expression were (t3.a=t1.a AND t2.a=t1.a),
|
|
then t1.a=t2.a would be pushed down to table t2, and without any
|
|
guard.
|
|
If after the run through all rows of table t2, the first inner table
|
|
for the outer join operation, it turns out that no matches are
|
|
found for the current row of t1, then current row from table t1
|
|
is complemented by nulls for t2 and t3. Then the pushed down predicates
|
|
are checked for the composed row almost in the same way as it had
|
|
been done for the first row with a match. The only difference is
|
|
the predicates from on expressions are not checked.
|
|
|
|
@par
|
|
@b IMPLEMENTATION
|
|
@par
|
|
The function forms output rows for a current partial join of k
|
|
tables tables recursively.
|
|
For each partial join record ending with a certain row from
|
|
join_tab it calls sub_select that builds all possible matching
|
|
tails from the result set.
|
|
To be able check predicates conditionally items of the class
|
|
Item_func_trig_cond are employed.
|
|
An object of this class is constructed from an item of class COND
|
|
and a pointer to a guarding boolean variable.
|
|
When the value of the guard variable is true the value of the object
|
|
is the same as the value of the predicate, otherwise it's just returns
|
|
true.
|
|
To carry out a return to a nested loop level of join table t the pointer
|
|
to t is remembered in the field 'return_rtab' of the join structure.
|
|
Consider the following query:
|
|
@code
|
|
SELECT * FROM t1,
|
|
LEFT JOIN
|
|
(t2, t3 LEFT JOIN (t4,t5) ON t5.a=t3.a)
|
|
ON t4.a=t2.a
|
|
WHERE (t2.b=5 OR t2.b IS NULL) AND (t4.b=2 OR t4.b IS NULL)
|
|
@endcode
|
|
Suppose the chosen execution plan dictates the order t1,t2,t3,t4,t5
|
|
and suppose for a given joined rows from tables t1,t2,t3 there are
|
|
no rows in the result set yet.
|
|
When first row from t5 that satisfies the on condition
|
|
t5.a=t3.a is found, the pushed down predicate t4.b=2 OR t4.b IS NULL
|
|
becomes 'activated', as well the predicate t4.a=t2.a. But
|
|
the predicate (t2.b=5 OR t2.b IS NULL) can not be checked until
|
|
t4.a=t2.a becomes true.
|
|
In order not to re-evaluate the predicates that were already evaluated
|
|
as attached pushed down predicates, a pointer to the the first
|
|
most inner unmatched table is maintained in join_tab->first_unmatched.
|
|
Thus, when the first row from t5 with t5.a=t3.a is found
|
|
this pointer for t5 is changed from t4 to t2.
|
|
|
|
@par
|
|
@b STRUCTURE @b NOTES
|
|
@par
|
|
join_tab->first_unmatched points always backwards to the first inner
|
|
table of the embedding nested join, if any.
|
|
|
|
@param join pointer to the structure providing all context info for
|
|
the query
|
|
@param join_tab the first next table of the execution plan to be retrieved
|
|
@param end_records true when we need to perform final steps of retrival
|
|
|
|
@return
|
|
return one of enum_nested_loop_state, except NESTED_LOOP_NO_MORE_ROWS.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records)
|
|
{
|
|
int error;
|
|
enum_nested_loop_state rc;
|
|
DBUG_ENTER("sub_select");
|
|
|
|
if (join_tab->split_derived_to_update && !end_of_records)
|
|
{
|
|
table_map tab_map= join_tab->split_derived_to_update;
|
|
for (uint i= 0; tab_map; i++, tab_map>>= 1)
|
|
{
|
|
if (tab_map & 1)
|
|
join->map2table[i]->preread_init_done= false;
|
|
}
|
|
}
|
|
|
|
/* Restore state if mark_as_null_row() have been called */
|
|
if (join_tab->last_inner)
|
|
{
|
|
JOIN_TAB *last_inner_tab= join_tab->last_inner;
|
|
for (JOIN_TAB *jt= join_tab; jt <= last_inner_tab; jt++)
|
|
jt->table->null_row= 0;
|
|
}
|
|
else
|
|
join_tab->table->null_row=0;
|
|
|
|
if (end_of_records)
|
|
{
|
|
enum_nested_loop_state nls=
|
|
(*join_tab->next_select)(join,join_tab+1,end_of_records);
|
|
DBUG_RETURN(nls);
|
|
}
|
|
join_tab->tracker->r_scans++;
|
|
|
|
rc= NESTED_LOOP_OK;
|
|
|
|
for (SJ_TMP_TABLE *flush_dups_table= join_tab->flush_weedout_table;
|
|
flush_dups_table;
|
|
flush_dups_table= flush_dups_table->next_flush_table)
|
|
{
|
|
flush_dups_table->sj_weedout_delete_rows();
|
|
}
|
|
|
|
if (!join_tab->preread_init_done && join_tab->preread_init())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (unlikely(join_tab->rowid_filter))
|
|
{
|
|
if (unlikely(join_tab->need_to_build_rowid_filter))
|
|
{
|
|
if (join_tab->build_range_rowid_filter())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
/*
|
|
We have to check join_tab->rowid_filter again as the above
|
|
function may have cleared it in case of errors.
|
|
*/
|
|
if (join_tab->rowid_filter && join_tab->rowid_filter->is_empty())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
else if (join_tab->rowid_filter->is_empty())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
|
|
join->return_tab= join_tab;
|
|
|
|
if (join_tab->last_inner)
|
|
{
|
|
/* join_tab is the first inner table for an outer join operation. */
|
|
|
|
/* Set initial state of guard variables for this table.*/
|
|
join_tab->found=0;
|
|
join_tab->not_null_compl= 1;
|
|
|
|
/* Set first_unmatched for the last inner table of this group */
|
|
join_tab->last_inner->first_unmatched= join_tab;
|
|
if (join_tab->on_precond && !join_tab->on_precond->val_int())
|
|
rc= NESTED_LOOP_NO_MORE_ROWS;
|
|
}
|
|
join->thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
|
|
if (rc != NESTED_LOOP_NO_MORE_ROWS &&
|
|
(rc= join_tab_execution_startup(join_tab)) < 0)
|
|
DBUG_RETURN(rc);
|
|
|
|
if (join_tab->loosescan_match_tab)
|
|
join_tab->loosescan_match_tab->found_match= FALSE;
|
|
|
|
DBUG_ASSERT(join_tab->cached_pfs_batch_update == join_tab->pfs_batch_update());
|
|
if (join_tab->cached_pfs_batch_update)
|
|
join_tab->table->file->start_psi_batch_mode();
|
|
|
|
if (rc != NESTED_LOOP_NO_MORE_ROWS)
|
|
{
|
|
error= (*join_tab->read_first_record)(join_tab);
|
|
if (!error && join_tab->keep_current_rowid)
|
|
join_tab->table->file->position(join_tab->table->record[0]);
|
|
rc= evaluate_join_record(join, join_tab, error);
|
|
}
|
|
|
|
bool skip_over= FALSE;
|
|
READ_RECORD *info= &join_tab->read_record;
|
|
|
|
while (rc == NESTED_LOOP_OK && join->return_tab >= join_tab)
|
|
{
|
|
if (join_tab->loosescan_match_tab &&
|
|
join_tab->loosescan_match_tab->found_match)
|
|
{
|
|
KEY *key= join_tab->table->key_info + join_tab->loosescan_key;
|
|
key_copy(join_tab->loosescan_buf, join_tab->table->record[0], key,
|
|
join_tab->loosescan_key_len);
|
|
skip_over= TRUE;
|
|
}
|
|
|
|
error= info->read_record();
|
|
|
|
if (skip_over && likely(!error))
|
|
{
|
|
if (!key_cmp(join_tab->table->key_info[join_tab->loosescan_key].key_part,
|
|
join_tab->loosescan_buf, join_tab->loosescan_key_len))
|
|
{
|
|
/*
|
|
This is the LooseScan action: skip over records with the same key
|
|
value if we already had a match for them.
|
|
*/
|
|
continue;
|
|
}
|
|
join_tab->loosescan_match_tab->found_match= FALSE;
|
|
skip_over= FALSE;
|
|
}
|
|
|
|
if (join_tab->keep_current_rowid && likely(!error))
|
|
join_tab->table->file->position(join_tab->table->record[0]);
|
|
|
|
rc= evaluate_join_record(join, join_tab, error);
|
|
}
|
|
|
|
if (rc == NESTED_LOOP_NO_MORE_ROWS)
|
|
{
|
|
if (join_tab->last_inner && !join_tab->found)
|
|
{
|
|
rc= evaluate_null_complemented_join_record(join, join_tab);
|
|
if (rc == NESTED_LOOP_NO_MORE_ROWS)
|
|
rc= NESTED_LOOP_OK;
|
|
}
|
|
else
|
|
rc= NESTED_LOOP_OK;
|
|
}
|
|
|
|
if (join_tab->cached_pfs_batch_update)
|
|
join_tab->table->file->end_psi_batch_mode();
|
|
|
|
DBUG_RETURN(rc);
|
|
}
|
|
|
|
/**
|
|
@brief Process one row of the nested loop join.
|
|
|
|
This function will evaluate parts of WHERE/ON clauses that are
|
|
applicable to the partial row on hand and in case of success
|
|
submit this row to the next level of the nested loop.
|
|
|
|
@param join - The join object
|
|
@param join_tab - The most inner join_tab being processed
|
|
@param error > 0: Error, terminate processing
|
|
= 0: (Partial) row is available
|
|
< 0: No more rows available at this level
|
|
@return Nested loop state (Ok, No_more_rows, Error, Killed)
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
evaluate_join_record(JOIN *join, JOIN_TAB *join_tab,
|
|
int error)
|
|
{
|
|
bool shortcut_for_distinct= join_tab->shortcut_for_distinct;
|
|
ha_rows found_records=join->found_records;
|
|
COND *select_cond= join_tab->select_cond;
|
|
bool select_cond_result= TRUE;
|
|
DBUG_ENTER("evaluate_join_record");
|
|
DBUG_PRINT("enter",
|
|
("evaluate_join_record join: %p join_tab: %p "
|
|
"cond: %p abort: %d alias %s",
|
|
join, join_tab, select_cond, error,
|
|
join_tab->table->alias.ptr()));
|
|
|
|
if (error > 0 || unlikely(join->thd->is_error())) // Fatal error
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (error < 0)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
if (unlikely(join->thd->check_killed())) // Aborted by user
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
|
|
join_tab->tracker->r_rows++;
|
|
|
|
if (select_cond)
|
|
{
|
|
select_cond_result= MY_TEST(select_cond->val_int());
|
|
|
|
/* check for errors evaluating the condition */
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
|
|
if (select_cond_result)
|
|
{
|
|
/*
|
|
There is no select condition or the attached pushed down
|
|
condition is true => a match is found.
|
|
*/
|
|
join_tab->tracker->r_rows_after_where++;
|
|
|
|
bool found= 1;
|
|
while (join_tab->first_unmatched && found)
|
|
{
|
|
/*
|
|
The while condition is always false if join_tab is not
|
|
the last inner join table of an outer join operation.
|
|
*/
|
|
JOIN_TAB *first_unmatched= join_tab->first_unmatched;
|
|
/*
|
|
Mark that a match for current outer table is found.
|
|
This activates push down conditional predicates attached
|
|
to the all inner tables of the outer join.
|
|
*/
|
|
first_unmatched->found= 1;
|
|
for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
|
|
{
|
|
/*
|
|
Check whether 'not exists' optimization can be used here.
|
|
If tab->table->reginfo.not_exists_optimize is set to true
|
|
then WHERE contains a conjunctive predicate IS NULL over
|
|
a non-nullable field of tab. When activated this predicate
|
|
will filter out all records with matches for the left part
|
|
of the outer join whose inner tables start from the
|
|
first_unmatched table and include table tab. To safely use
|
|
'not exists' optimization we have to check that the
|
|
IS NULL predicate is really activated, i.e. all guards
|
|
that wrap it are in the 'open' state.
|
|
*/
|
|
bool not_exists_opt_is_applicable=
|
|
tab->table->reginfo.not_exists_optimize;
|
|
for (JOIN_TAB *first_upper= first_unmatched->first_upper;
|
|
not_exists_opt_is_applicable && first_upper;
|
|
first_upper= first_upper->first_upper)
|
|
{
|
|
if (!first_upper->found)
|
|
not_exists_opt_is_applicable= false;
|
|
}
|
|
/* Check all predicates that has just been activated. */
|
|
/*
|
|
Actually all predicates non-guarded by first_unmatched->found
|
|
will be re-evaluated again. It could be fixed, but, probably,
|
|
it's not worth doing now.
|
|
*/
|
|
if (tab->select_cond)
|
|
{
|
|
const longlong res= tab->select_cond->val_int();
|
|
if (join->thd->is_error())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (!res)
|
|
{
|
|
/* The condition attached to table tab is false */
|
|
if (tab == join_tab)
|
|
{
|
|
found= 0;
|
|
if (not_exists_opt_is_applicable)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Set a return point if rejected predicate is attached
|
|
not to the last table of the current nest level.
|
|
*/
|
|
join->return_tab= tab;
|
|
if (not_exists_opt_is_applicable)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
else
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
Check whether join_tab is not the last inner table
|
|
for another embedding outer join.
|
|
*/
|
|
if ((first_unmatched= first_unmatched->first_upper) &&
|
|
first_unmatched->last_inner != join_tab)
|
|
first_unmatched= 0;
|
|
join_tab->first_unmatched= first_unmatched;
|
|
}
|
|
|
|
JOIN_TAB *return_tab= join->return_tab;
|
|
join_tab->found_match= TRUE;
|
|
|
|
if (join_tab->check_weed_out_table && found)
|
|
{
|
|
int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
|
|
DBUG_PRINT("info", ("weedout_check: %d", res));
|
|
if (res == -1)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
else if (res == 1)
|
|
found= FALSE;
|
|
}
|
|
else if (join_tab->do_firstmatch)
|
|
{
|
|
/*
|
|
We should return to the join_tab->do_firstmatch after we have
|
|
enumerated all the suffixes for current prefix row combination
|
|
*/
|
|
return_tab= join_tab->do_firstmatch;
|
|
}
|
|
|
|
/*
|
|
It was not just a return to lower loop level when one
|
|
of the newly activated predicates is evaluated as false
|
|
(See above join->return_tab= tab).
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
DBUG_PRINT("counts", ("examined_rows: %llu found: %d",
|
|
(ulonglong) join->thd->m_examined_row_count, (int) found));
|
|
|
|
if (found)
|
|
{
|
|
enum enum_nested_loop_state rc;
|
|
/* A match from join_tab is found for the current partial join. */
|
|
rc= (*join_tab->next_select)(join, join_tab+1, 0);
|
|
join->thd->get_stmt_da()->inc_current_row_for_warning();
|
|
if (rc != NESTED_LOOP_OK && rc != NESTED_LOOP_NO_MORE_ROWS)
|
|
DBUG_RETURN(rc);
|
|
if (return_tab < join->return_tab)
|
|
join->return_tab= return_tab;
|
|
|
|
/* check for errors evaluating the condition */
|
|
if (unlikely(join->thd->is_error()))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
|
|
if (join->return_tab < join_tab)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
/*
|
|
Test if this was a SELECT DISTINCT query on a table that
|
|
was not in the field list; In this case we can abort if
|
|
we found a row, as no new rows can be added to the result.
|
|
*/
|
|
if (shortcut_for_distinct && found_records != join->found_records)
|
|
DBUG_RETURN(NESTED_LOOP_NO_MORE_ROWS);
|
|
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
The condition pushed down to the table join_tab rejects all rows
|
|
with the beginning coinciding with the current partial join.
|
|
*/
|
|
join->thd->inc_examined_row_count_fast();
|
|
}
|
|
|
|
join->thd->get_stmt_da()->inc_current_row_for_warning();
|
|
join_tab->read_record.unlock_row(join_tab);
|
|
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
/**
|
|
|
|
@details
|
|
Construct a NULL complimented partial join record and feed it to the next
|
|
level of the nested loop. This function is used in case we have
|
|
an OUTER join and no matching record was found.
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
evaluate_null_complemented_join_record(JOIN *join, JOIN_TAB *join_tab)
|
|
{
|
|
/*
|
|
The table join_tab is the first inner table of a outer join operation
|
|
and no matches has been found for the current outer row.
|
|
*/
|
|
JOIN_TAB *last_inner_tab= join_tab->last_inner;
|
|
/* Cache variables for faster loop */
|
|
COND *select_cond;
|
|
for ( ; join_tab <= last_inner_tab ; join_tab++)
|
|
{
|
|
/* Change the the values of guard predicate variables. */
|
|
join_tab->found= 1;
|
|
join_tab->not_null_compl= 0;
|
|
/* The outer row is complemented by nulls for each inner tables */
|
|
restore_record(join_tab->table,s->default_values); // Make empty record
|
|
mark_as_null_row(join_tab->table); // For group by without error
|
|
select_cond= join_tab->select_cond;
|
|
/* Check all attached conditions for inner table rows. */
|
|
if (select_cond && !select_cond->val_int())
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
join_tab--;
|
|
/*
|
|
The row complemented by nulls might be the first row
|
|
of embedding outer joins.
|
|
If so, perform the same actions as in the code
|
|
for the first regular outer join row above.
|
|
*/
|
|
for ( ; ; )
|
|
{
|
|
JOIN_TAB *first_unmatched= join_tab->first_unmatched;
|
|
if ((first_unmatched= first_unmatched->first_upper) &&
|
|
first_unmatched->last_inner != join_tab)
|
|
first_unmatched= 0;
|
|
join_tab->first_unmatched= first_unmatched;
|
|
if (!first_unmatched)
|
|
break;
|
|
first_unmatched->found= 1;
|
|
for (JOIN_TAB *tab= first_unmatched; tab <= join_tab; tab++)
|
|
{
|
|
if (tab->select_cond && !tab->select_cond->val_int())
|
|
{
|
|
join->return_tab= tab;
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
The row complemented by nulls satisfies all conditions
|
|
attached to inner tables.
|
|
*/
|
|
if (join_tab->check_weed_out_table)
|
|
{
|
|
int res= join_tab->check_weed_out_table->sj_weedout_check_row(join->thd);
|
|
if (res == -1)
|
|
return NESTED_LOOP_ERROR;
|
|
else if (res == 1)
|
|
return NESTED_LOOP_OK;
|
|
}
|
|
else if (join_tab->do_firstmatch)
|
|
{
|
|
/*
|
|
We should return to the join_tab->do_firstmatch after we have
|
|
enumerated all the suffixes for current prefix row combination
|
|
*/
|
|
if (join_tab->do_firstmatch < join->return_tab)
|
|
join->return_tab= join_tab->do_firstmatch;
|
|
}
|
|
|
|
/*
|
|
Send the row complemented by nulls to be joined with the
|
|
remaining tables.
|
|
*/
|
|
return (*join_tab->next_select)(join, join_tab+1, 0);
|
|
}
|
|
|
|
/*****************************************************************************
|
|
The different ways to read a record
|
|
Returns -1 if row was not found, 0 if row was found and 1 on errors
|
|
*****************************************************************************/
|
|
|
|
/** Help function when we get some an error from the table handler. */
|
|
|
|
int report_error(TABLE *table, int error)
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE || error == HA_ERR_KEY_NOT_FOUND)
|
|
{
|
|
table->status= STATUS_GARBAGE;
|
|
return -1; // key not found; ok
|
|
}
|
|
/*
|
|
Locking reads can legally return also these errors, do not
|
|
print them to the .err log
|
|
*/
|
|
if (error != HA_ERR_LOCK_DEADLOCK && error != HA_ERR_LOCK_WAIT_TIMEOUT
|
|
&& error != HA_ERR_TABLE_DEF_CHANGED && !table->in_use->killed)
|
|
sql_print_error("Got error %d when reading table '%s'",
|
|
error, table->s->path.str);
|
|
table->file->print_error(error,MYF(0));
|
|
return 1;
|
|
}
|
|
|
|
|
|
int safe_index_read(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
if (unlikely((error=
|
|
table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT))))
|
|
return report_error(table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Reads content of constant table
|
|
|
|
@param tab table
|
|
@param pos position of table in query plan
|
|
|
|
@retval 0 ok, one row was found or one NULL-complemented row was created
|
|
@retval -1 ok, no row was found and no NULL-complemented row was created
|
|
@retval 1 error
|
|
*/
|
|
|
|
static int
|
|
join_read_const_table(THD *thd, JOIN_TAB *tab, POSITION *pos)
|
|
{
|
|
int error;
|
|
TABLE_LIST *tbl;
|
|
DBUG_ENTER("join_read_const_table");
|
|
TABLE *table=tab->table;
|
|
table->const_table=1;
|
|
table->null_row=0;
|
|
table->status=STATUS_NO_RECORD;
|
|
|
|
if (tab->table->pos_in_table_list->is_materialized_derived() &&
|
|
!tab->table->pos_in_table_list->fill_me)
|
|
{
|
|
DBUG_ASSERT(0);
|
|
//TODO: don't get here at all
|
|
/*
|
|
Skip materialized derived tables/views as they temporary table is not
|
|
opened yet.
|
|
*/
|
|
DBUG_RETURN(0);
|
|
}
|
|
else if (tab->table->pos_in_table_list->jtbm_subselect &&
|
|
tab->table->pos_in_table_list->jtbm_subselect->is_jtbm_const_tab)
|
|
{
|
|
/* Row will not be found */
|
|
int res;
|
|
if (tab->table->pos_in_table_list->jtbm_subselect->jtbm_const_row_found)
|
|
res= 0;
|
|
else
|
|
res= -1;
|
|
DBUG_RETURN(res);
|
|
}
|
|
else if (tab->type == JT_SYSTEM)
|
|
{
|
|
if (unlikely((error=join_read_system(tab))))
|
|
{ // Info for DESCRIBE
|
|
tab->info= ET_CONST_ROW_NOT_FOUND;
|
|
/* Mark for EXPLAIN that the row was not found */
|
|
pos->records_read= pos->records_out= 0.0;
|
|
pos->ref_depend_map= 0;
|
|
if (!table->pos_in_table_list->outer_join || error > 0)
|
|
DBUG_RETURN(error);
|
|
}
|
|
/*
|
|
The optimizer trust the engine that when stats.records is 0, there
|
|
was no found rows
|
|
*/
|
|
DBUG_ASSERT(table->file->stats.records > 0 || error);
|
|
}
|
|
else
|
|
{
|
|
error=join_read_const(tab);
|
|
if (unlikely(error))
|
|
{
|
|
tab->info= ET_UNIQUE_ROW_NOT_FOUND;
|
|
/* Mark for EXPLAIN that the row was not found */
|
|
pos->records_read= pos->records_out= 0.0;
|
|
pos->ref_depend_map= 0;
|
|
if (!table->pos_in_table_list->outer_join || error > 0)
|
|
DBUG_RETURN(error);
|
|
}
|
|
}
|
|
/*
|
|
Evaluate an on-expression only if it is not considered expensive.
|
|
This mainly prevents executing subqueries in optimization phase.
|
|
This is necessary since proper setup for such execution has not been
|
|
done at this stage.
|
|
*/
|
|
if (*tab->on_expr_ref && !table->null_row &&
|
|
!(*tab->on_expr_ref)->is_expensive())
|
|
{
|
|
#if !defined(DBUG_OFF) && defined(NOT_USING_ITEM_EQUAL)
|
|
/*
|
|
This test could be very useful to find bugs in the optimizer
|
|
where we would call this function with an expression that can't be
|
|
evaluated yet. We can't have this enabled by default as long as
|
|
have items like Item_equal, that doesn't report they are const but
|
|
they can still be called even if they contain not const items.
|
|
*/
|
|
(*tab->on_expr_ref)->update_used_tables();
|
|
DBUG_ASSERT((*tab->on_expr_ref)->const_item());
|
|
#endif
|
|
if ((table->null_row= MY_TEST((*tab->on_expr_ref)->val_int() == 0)))
|
|
mark_as_null_row(table);
|
|
}
|
|
if (!table->null_row && ! tab->join->mixed_implicit_grouping)
|
|
table->maybe_null= 0;
|
|
|
|
{
|
|
JOIN *join= tab->join;
|
|
List_iterator<TABLE_LIST> ti(join->select_lex->leaf_tables);
|
|
/* Check appearance of new constant items in Item_equal objects */
|
|
if (join->conds)
|
|
update_const_equal_items(thd, join->conds, tab, TRUE);
|
|
while ((tbl= ti++))
|
|
{
|
|
TABLE_LIST *embedded;
|
|
TABLE_LIST *embedding= tbl;
|
|
do
|
|
{
|
|
embedded= embedding;
|
|
if (embedded->on_expr)
|
|
update_const_equal_items(thd, embedded->on_expr, tab, TRUE);
|
|
embedding= embedded->embedding;
|
|
}
|
|
while (embedding &&
|
|
embedding->nested_join->join_list.head() == embedded);
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/**
|
|
Read a constant table when there is at most one matching row, using a table
|
|
scan.
|
|
|
|
@param tab Table to read
|
|
|
|
@retval 0 Row was found
|
|
@retval -1 Row was not found
|
|
@retval 1 Got an error (other than row not found) during read
|
|
*/
|
|
static int
|
|
join_read_system(JOIN_TAB *tab)
|
|
{
|
|
TABLE *table= tab->table;
|
|
int error;
|
|
if (table->status & STATUS_GARBAGE) // If first read
|
|
{
|
|
if (unlikely((error=
|
|
table->file->ha_read_first_row(table->record[0],
|
|
table->s->primary_key))))
|
|
{
|
|
if (error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
table->const_table= 1;
|
|
mark_as_null_row(tab->table);
|
|
empty_record(table); // Make empty record
|
|
return -1;
|
|
}
|
|
store_record(table,record[1]);
|
|
}
|
|
else if (!table->status) // Only happens with left join
|
|
restore_record(table,record[1]); // restore old record
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Read a table when there is at most one matching row.
|
|
|
|
@param tab Table to read
|
|
|
|
@retval 0 Row was found
|
|
@retval -1 Row was not found
|
|
@retval 1 Got an error (other than row not found) during read
|
|
*/
|
|
|
|
static int
|
|
join_read_const(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
if (table->status & STATUS_GARBAGE) // If first read
|
|
{
|
|
table->status= 0;
|
|
if (cp_buffer_from_ref(tab->join->thd, table, &tab->ref))
|
|
error=HA_ERR_KEY_NOT_FOUND;
|
|
else
|
|
{
|
|
handler *file= table->file;
|
|
if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread &&
|
|
(int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY)
|
|
{
|
|
file->ha_start_keyread(tab->ref.key);
|
|
/* This is probably needed for analyze table */
|
|
tab->index= tab->ref.key;
|
|
}
|
|
error= file->
|
|
ha_index_read_idx_map(table->record[0],tab->ref.key,
|
|
(uchar*) tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT);
|
|
file->ha_end_keyread();
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
table->status= STATUS_NOT_FOUND;
|
|
mark_as_null_row(tab->table);
|
|
empty_record(table);
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1;
|
|
}
|
|
store_record(table,record[1]);
|
|
}
|
|
else if (!(table->status & ~STATUS_NULL_ROW)) // Only happens with left join
|
|
{
|
|
table->status=0;
|
|
restore_record(table,record[1]); // restore old record
|
|
}
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
/*
|
|
eq_ref access method implementation: "read_first" function
|
|
|
|
SYNOPSIS
|
|
join_read_key()
|
|
tab JOIN_TAB of the accessed table
|
|
|
|
DESCRIPTION
|
|
This is "read_fist" function for the eq_ref access method. The difference
|
|
from ref access function is that is that it has a one-element lookup
|
|
cache (see cmp_buffer_with_ref)
|
|
|
|
RETURN
|
|
0 - Ok
|
|
-1 - Row not found
|
|
1 - Error
|
|
*/
|
|
|
|
|
|
static int
|
|
join_read_key(JOIN_TAB *tab)
|
|
{
|
|
return join_read_key2(tab->join->thd, tab, tab->table, &tab->ref);
|
|
}
|
|
|
|
|
|
/*
|
|
eq_ref access handler but generalized a bit to support TABLE and TABLE_REF
|
|
not from the join_tab. See join_read_key for detailed synopsis.
|
|
*/
|
|
int join_read_key2(THD *thd, JOIN_TAB *tab, TABLE *table, TABLE_REF *table_ref)
|
|
{
|
|
int error;
|
|
if (!table->file->inited)
|
|
{
|
|
error= table->file->ha_index_init(table_ref->key, tab ? tab->sorted : TRUE);
|
|
if (unlikely(error))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
The following is needed when one makes ref (or eq_ref) access from row
|
|
comparisons: one must call row->bring_value() to get the new values.
|
|
*/
|
|
if (tab && tab->bush_children)
|
|
{
|
|
TABLE_LIST *emb_sj_nest= tab->bush_children->start->emb_sj_nest;
|
|
emb_sj_nest->sj_subq_pred->left_exp()->bring_value();
|
|
}
|
|
|
|
/* TODO: Why don't we do "Late NULLs Filtering" here? */
|
|
|
|
if (cmp_buffer_with_ref(thd, table, table_ref) ||
|
|
(table->status & (STATUS_GARBAGE | STATUS_NO_PARENT | STATUS_NULL_ROW)))
|
|
{
|
|
if (table_ref->key_err)
|
|
{
|
|
table->status=STATUS_NOT_FOUND;
|
|
return -1;
|
|
}
|
|
/*
|
|
Moving away from the current record. Unlock the row
|
|
in the handler if it did not match the partial WHERE.
|
|
*/
|
|
if (tab && tab->ref.has_record && tab->ref.use_count == 0)
|
|
{
|
|
tab->read_record.table->file->unlock_row();
|
|
table_ref->has_record= FALSE;
|
|
}
|
|
error=table->file->ha_index_read_map(table->record[0],
|
|
table_ref->key_buff,
|
|
make_prev_keypart_map(table_ref->key_parts),
|
|
HA_READ_KEY_EXACT);
|
|
if (unlikely(error) &&
|
|
error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
|
|
if (likely(!error))
|
|
{
|
|
table_ref->has_record= TRUE;
|
|
table_ref->use_count= 1;
|
|
}
|
|
}
|
|
else if (table->status == 0)
|
|
{
|
|
DBUG_ASSERT(table_ref->has_record);
|
|
table_ref->use_count++;
|
|
}
|
|
table->null_row=0;
|
|
return table->status ? -1 : 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Since join_read_key may buffer a record, do not unlock
|
|
it if it was not used in this invocation of join_read_key().
|
|
Only count locks, thus remembering if the record was left unused,
|
|
and unlock already when pruning the current value of
|
|
TABLE_REF buffer.
|
|
@sa join_read_key()
|
|
*/
|
|
|
|
static void
|
|
join_read_key_unlock_row(st_join_table *tab)
|
|
{
|
|
DBUG_ASSERT(tab->ref.use_count);
|
|
if (tab->ref.use_count)
|
|
tab->ref.use_count--;
|
|
}
|
|
|
|
/**
|
|
Rows from const tables are read once but potentially used
|
|
multiple times during execution of a query.
|
|
Ensure such rows are never unlocked during query execution.
|
|
*/
|
|
|
|
void
|
|
join_const_unlock_row(JOIN_TAB *tab)
|
|
{
|
|
DBUG_ASSERT(tab->type == JT_CONST);
|
|
}
|
|
|
|
|
|
/*
|
|
ref access method implementation: "read_first" function
|
|
|
|
SYNOPSIS
|
|
join_read_always_key()
|
|
tab JOIN_TAB of the accessed table
|
|
|
|
DESCRIPTION
|
|
This is "read_fist" function for the "ref" access method.
|
|
|
|
The functon must leave the index initialized when it returns.
|
|
ref_or_null access implementation depends on that.
|
|
|
|
RETURN
|
|
0 - Ok
|
|
-1 - Row not found
|
|
1 - Error
|
|
*/
|
|
|
|
static int
|
|
join_read_always_key(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
/* Initialize the index first */
|
|
if (!table->file->inited)
|
|
{
|
|
if (unlikely((error= table->file->ha_index_init(tab->ref.key,
|
|
tab->sorted))))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
|
|
return -1;
|
|
if (unlikely((error=
|
|
table->file->prepare_index_key_scan_map(tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts)))))
|
|
{
|
|
report_error(table,error);
|
|
return -1;
|
|
}
|
|
if ((error= table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_KEY_EXACT)))
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1; /* purecov: inspected */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
This function is used when optimizing away ORDER BY in
|
|
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC.
|
|
*/
|
|
|
|
static int
|
|
join_read_last_key(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
if (!table->file->inited &&
|
|
unlikely((error= table->file->ha_index_init(tab->ref.key, tab->sorted))))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
|
|
if (unlikely(cp_buffer_from_ref(tab->join->thd, table, &tab->ref)))
|
|
return -1;
|
|
if (unlikely((error=
|
|
table->file->prepare_index_key_scan_map(tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts)))) )
|
|
{
|
|
report_error(table,error);
|
|
return -1;
|
|
}
|
|
if (unlikely((error=
|
|
table->file->ha_index_read_map(table->record[0],
|
|
tab->ref.key_buff,
|
|
make_prev_keypart_map(tab->ref.key_parts),
|
|
HA_READ_PREFIX_LAST))))
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
return -1; /* purecov: inspected */
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/* ARGSUSED */
|
|
static int
|
|
join_no_more_records(READ_RECORD *info __attribute__((unused)))
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_next_same(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
TABLE *table= info->table;
|
|
JOIN_TAB *tab=table->reginfo.join_tab;
|
|
|
|
if (unlikely((error= table->file->ha_index_next_same(table->record[0],
|
|
tab->ref.key_buff,
|
|
tab->ref.key_length))))
|
|
{
|
|
if (error != HA_ERR_END_OF_FILE)
|
|
return report_error(table, error);
|
|
table->status= STATUS_GARBAGE;
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_prev_same(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
TABLE *table= info->table;
|
|
JOIN_TAB *tab=table->reginfo.join_tab;
|
|
|
|
if (unlikely((error= table->file->ha_index_prev(table->record[0]))))
|
|
return report_error(table, error);
|
|
if (key_cmp_if_same(table, tab->ref.key_buff, tab->ref.key,
|
|
tab->ref.key_length))
|
|
{
|
|
table->status=STATUS_NOT_FOUND;
|
|
error= -1;
|
|
}
|
|
return error;
|
|
}
|
|
|
|
|
|
static int
|
|
join_init_quick_read_record(JOIN_TAB *tab)
|
|
{
|
|
quick_select_return res= test_if_quick_select(tab);
|
|
|
|
if (res == SQL_SELECT::ERROR)
|
|
return 1; /* Fatal error */
|
|
|
|
if (res == SQL_SELECT::IMPOSSIBLE_RANGE)
|
|
return -1; /* No possible records */
|
|
|
|
/*
|
|
Proceed to read rows. If we've created a quick select, use it, otherwise
|
|
do a full scan.
|
|
*/
|
|
return join_init_read_record(tab);
|
|
}
|
|
|
|
|
|
int read_first_record_seq(JOIN_TAB *tab)
|
|
{
|
|
if (unlikely(tab->read_record.table->file->ha_rnd_init_with_error(1)))
|
|
return 1;
|
|
return tab->read_record.read_record();
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Create a new (dynamic) quick select.
|
|
*/
|
|
|
|
static quick_select_return
|
|
test_if_quick_select(JOIN_TAB *tab)
|
|
{
|
|
DBUG_EXECUTE_IF("show_explain_probe_test_if_quick_select",
|
|
if (dbug_user_var_equals_int(tab->join->thd,
|
|
"show_explain_probe_select_id",
|
|
tab->join->select_lex->select_number))
|
|
dbug_serve_apcs(tab->join->thd, 1);
|
|
);
|
|
|
|
|
|
delete tab->select->quick;
|
|
tab->select->quick=0;
|
|
|
|
if (tab->table->file->inited != handler::NONE)
|
|
tab->table->file->ha_index_or_rnd_end();
|
|
|
|
quick_select_return res;
|
|
res= tab->select->test_quick_select(tab->join->thd, tab->keys,
|
|
(table_map) 0, HA_POS_ERROR, 0,
|
|
FALSE, /*remove where parts*/FALSE,
|
|
FALSE,
|
|
/* no unusable key notes */
|
|
Item_func::BITMAP_NONE);
|
|
if (tab->explain_plan && tab->explain_plan->range_checked_fer)
|
|
tab->explain_plan->range_checked_fer->collect_data(tab->select->quick);
|
|
|
|
return res;
|
|
}
|
|
|
|
|
|
/*
|
|
@return
|
|
1 - Yes, use dynamically built range
|
|
0 - No, don't use dynamic range (but there's no error)
|
|
-1 - Fatal error
|
|
*/
|
|
|
|
static
|
|
int test_if_use_dynamic_range_scan(JOIN_TAB *join_tab)
|
|
{
|
|
if (unlikely(join_tab->use_quick == 2))
|
|
{
|
|
quick_select_return res= test_if_quick_select(join_tab);
|
|
if (res == SQL_SELECT::ERROR)
|
|
return -1;
|
|
else
|
|
{
|
|
/* Both OK and IMPOSSIBLE_RANGE go here */
|
|
return join_tab->select->quick ? 1 : 0;
|
|
}
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
|
|
int join_init_read_record(JOIN_TAB *tab)
|
|
{
|
|
bool need_unpacking= FALSE;
|
|
JOIN *join= tab->join;
|
|
/*
|
|
Note: the query plan tree for the below operations is constructed in
|
|
save_agg_explain_data.
|
|
*/
|
|
if (tab->distinct && tab->remove_duplicates()) // Remove duplicates.
|
|
return 1;
|
|
|
|
if (join->top_join_tab_count != join->const_tables)
|
|
{
|
|
TABLE_LIST *tbl= tab->table->pos_in_table_list;
|
|
need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE;
|
|
}
|
|
|
|
if (tab->need_to_build_rowid_filter)
|
|
{
|
|
if (tab->build_range_rowid_filter())
|
|
return 1; /* Fatal error */
|
|
}
|
|
|
|
if (tab->filesort && tab->sort_table()) // Sort table.
|
|
return 1;
|
|
|
|
DBUG_EXECUTE_IF("kill_join_init_read_record",
|
|
tab->join->thd->set_killed(KILL_QUERY););
|
|
|
|
|
|
if (!tab->preread_init_done && tab->preread_init())
|
|
return 1;
|
|
|
|
if (tab->select && tab->select->quick && tab->select->quick->reset())
|
|
{
|
|
/* Ensures error status is propagated back to client */
|
|
report_error(tab->table,
|
|
tab->join->thd->killed ? HA_ERR_QUERY_INTERRUPTED : HA_ERR_OUT_OF_MEM);
|
|
return 1;
|
|
}
|
|
/* make sure we won't get ER_QUERY_INTERRUPTED from any code below */
|
|
DBUG_EXECUTE_IF("kill_join_init_read_record",
|
|
tab->join->thd->reset_killed(););
|
|
|
|
Copy_field *save_copy, *save_copy_end;
|
|
|
|
/*
|
|
init_read_record resets all elements of tab->read_record().
|
|
Remember things that we don't want to have reset.
|
|
*/
|
|
save_copy= tab->read_record.copy_field;
|
|
save_copy_end= tab->read_record.copy_field_end;
|
|
|
|
/*
|
|
JT_NEXT means that we should use an index scan on index 'tab->index'
|
|
However if filesort is set, the table was already sorted above
|
|
and now have to retrive the rows from the tmp file or by rnd_pos()
|
|
If !(tab->select && tab->select->quick)) it means that we are
|
|
in "Range checked for each record" and we better let the normal
|
|
init_read_record() handle this case
|
|
*/
|
|
|
|
if (tab->type == JT_NEXT && ! tab->filesort &&
|
|
!(tab->select && tab->select->quick))
|
|
{
|
|
/* Used with covered_index scan or force index */
|
|
if (init_read_record_idx(&tab->read_record, tab->join->thd, tab->table,
|
|
1, tab->index, 0))
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
if (init_read_record(&tab->read_record, tab->join->thd, tab->table,
|
|
tab->select, tab->filesort_result, 1, 1, FALSE))
|
|
return 1;
|
|
}
|
|
tab->read_record.copy_field= save_copy;
|
|
tab->read_record.copy_field_end= save_copy_end;
|
|
|
|
if (need_unpacking)
|
|
{
|
|
tab->read_record.read_record_func_and_unpack_calls=
|
|
tab->read_record.read_record_func;
|
|
tab->read_record.read_record_func = read_record_func_for_rr_and_unpack;
|
|
}
|
|
|
|
return tab->read_record.read_record();
|
|
}
|
|
|
|
|
|
/*
|
|
Helper function for sorting table with filesort.
|
|
*/
|
|
|
|
bool
|
|
JOIN_TAB::sort_table()
|
|
{
|
|
int rc;
|
|
DBUG_PRINT("info",("Sorting for index"));
|
|
THD_STAGE_INFO(join->thd, stage_creating_sort_index);
|
|
DBUG_ASSERT(join->ordered_index_usage != (filesort->order == join->order ?
|
|
JOIN::ordered_index_order_by :
|
|
JOIN::ordered_index_group_by));
|
|
rc= create_sort_index(join->thd, join, this, NULL);
|
|
/* Disactivate rowid filter if it was used when creating sort index */
|
|
if (rowid_filter)
|
|
table->file->rowid_filter_is_active= false;
|
|
return (rc != 0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_first(JOIN_TAB *tab)
|
|
{
|
|
int error= 0;
|
|
TABLE *table=tab->table;
|
|
DBUG_ENTER("join_read_first");
|
|
|
|
DBUG_ASSERT(table->no_keyread ||
|
|
!table->covering_keys.is_set(tab->index) ||
|
|
table->file->keyread == tab->index);
|
|
tab->table->status=0;
|
|
tab->read_record.table=table;
|
|
if (tab->index >= table->s->keys)
|
|
{
|
|
DBUG_ASSERT(tab->index < table->s->total_keys);
|
|
DBUG_ASSERT(tab->index == table->s->keys);
|
|
DBUG_ASSERT(tab->sorted);
|
|
DBUG_ASSERT(tab->join->order);
|
|
DBUG_ASSERT(tab->join->order->next == NULL);
|
|
DBUG_ASSERT(tab->join->select_limit < HA_POS_ERROR);
|
|
tab->read_record.read_record_func= join_hlindex_read_next;
|
|
error= tab->table->hlindex_read_first(tab->index, *tab->join->order->item,
|
|
tab->join->select_limit);
|
|
}
|
|
else
|
|
{
|
|
tab->read_record.read_record_func= join_read_next;
|
|
if (!table->file->inited)
|
|
error= table->file->ha_index_init(tab->index, tab->sorted);
|
|
if (!error)
|
|
error= table->file->prepare_index_scan();
|
|
if (!error)
|
|
error= tab->table->file->ha_index_first(tab->table->record[0]);
|
|
}
|
|
if (error)
|
|
{
|
|
if (error != HA_ERR_KEY_NOT_FOUND && error != HA_ERR_END_OF_FILE)
|
|
report_error(table, error);
|
|
DBUG_RETURN(-1);
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_next(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_index_next(info->record()))))
|
|
return report_error(info->table, error);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int join_hlindex_read_next(READ_RECORD *info)
|
|
{
|
|
if (int error= info->table->hlindex_read_next())
|
|
return report_error(info->table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_last(JOIN_TAB *tab)
|
|
{
|
|
TABLE *table=tab->table;
|
|
int error= 0;
|
|
DBUG_ENTER("join_read_last");
|
|
|
|
DBUG_ASSERT(table->no_keyread ||
|
|
!table->covering_keys.is_set(tab->index) ||
|
|
table->file->keyread == tab->index);
|
|
tab->table->status=0;
|
|
tab->read_record.read_record_func= join_read_prev;
|
|
tab->read_record.table=table;
|
|
if (!table->file->inited)
|
|
error= table->file->ha_index_init(tab->index, 1);
|
|
if (likely(!error))
|
|
error= table->file->prepare_index_scan();
|
|
if (unlikely(error) ||
|
|
unlikely(error= tab->table->file->ha_index_last(tab->table->record[0])))
|
|
DBUG_RETURN(report_error(table, error));
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
static int
|
|
join_read_prev(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_index_prev(info->record()))))
|
|
return report_error(info->table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int
|
|
join_ft_read_first(JOIN_TAB *tab)
|
|
{
|
|
int error;
|
|
TABLE *table= tab->table;
|
|
|
|
if (!table->file->inited &&
|
|
(error= table->file->ha_index_init(tab->ref.key, 1)))
|
|
{
|
|
(void) report_error(table, error);
|
|
return 1;
|
|
}
|
|
|
|
table->file->ft_init();
|
|
|
|
if (unlikely((error= table->file->ha_ft_read(table->record[0]))))
|
|
return report_error(table, error);
|
|
return 0;
|
|
}
|
|
|
|
static int
|
|
join_ft_read_next(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= info->table->file->ha_ft_read(info->record()))))
|
|
return report_error(info->table, error);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Reading of key with key reference and one part that may be NULL.
|
|
*/
|
|
|
|
int
|
|
join_read_always_key_or_null(JOIN_TAB *tab)
|
|
{
|
|
int res;
|
|
|
|
/* First read according to key which is NOT NULL */
|
|
*tab->ref.null_ref_key= 0; // Clear null byte
|
|
if ((res= join_read_always_key(tab)) >= 0)
|
|
return res;
|
|
|
|
/* Then read key with null value */
|
|
*tab->ref.null_ref_key= 1; // Set null byte
|
|
return safe_index_read(tab);
|
|
}
|
|
|
|
|
|
int
|
|
join_read_next_same_or_null(READ_RECORD *info)
|
|
{
|
|
int error;
|
|
if (unlikely((error= join_read_next_same(info)) >= 0))
|
|
return error;
|
|
JOIN_TAB *tab= info->table->reginfo.join_tab;
|
|
|
|
/* Test if we have already done a read after null key */
|
|
if (*tab->ref.null_ref_key)
|
|
return -1; // All keys read
|
|
*tab->ref.null_ref_key= 1; // Set null byte
|
|
return safe_index_read(tab); // then read null keys
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
DESCRIPTION
|
|
Functions that end one nested loop iteration. Different functions
|
|
are used to support GROUP BY clause and to redirect records
|
|
to a table (e.g. in case of SELECT into a temporary table) or to the
|
|
network client.
|
|
|
|
RETURN VALUES
|
|
NESTED_LOOP_OK - the record has been successfully handled
|
|
NESTED_LOOP_ERROR - a fatal error (like table corruption)
|
|
was detected
|
|
NESTED_LOOP_KILLED - thread shutdown was requested while processing
|
|
the record
|
|
NESTED_LOOP_QUERY_LIMIT - the record has been successfully handled;
|
|
additionally, the nested loop produced the
|
|
number of rows specified in the LIMIT clause
|
|
for the query
|
|
NESTED_LOOP_CURSOR_LIMIT - the record has been successfully handled;
|
|
additionally, there is a cursor and the nested
|
|
loop algorithm produced the number of rows
|
|
that is specified for current cursor fetch
|
|
operation.
|
|
All return values except NESTED_LOOP_OK abort the nested loop.
|
|
*****************************************************************************/
|
|
|
|
/* ARGSUSED */
|
|
static enum_nested_loop_state
|
|
end_send(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
DBUG_ENTER("end_send");
|
|
/*
|
|
When all tables are const this function is called with jointab == NULL.
|
|
This function shouldn't be called for the first join_tab as it needs
|
|
to get fields from previous tab.
|
|
*/
|
|
DBUG_ASSERT(join_tab == NULL || join_tab != join->join_tab);
|
|
//TODO pass fields via argument
|
|
List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
|
|
|
|
if (end_of_records)
|
|
{
|
|
if (join->procedure && join->procedure->end_of_records())
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
if (join->table_count &&
|
|
join->join_tab->is_using_loose_index_scan())
|
|
{
|
|
/* Copy non-aggregated fields when loose index scan is used. */
|
|
copy_fields(&join->tmp_table_param);
|
|
}
|
|
if (join->having && join->having->val_int() == 0)
|
|
DBUG_RETURN(NESTED_LOOP_OK); // Didn't match having
|
|
if (join->procedure)
|
|
{
|
|
if (join->procedure->send_row(join->procedure_fields_list))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
if (join->send_records >= join->unit->lim.get_select_limit() &&
|
|
join->unit->lim.is_with_ties())
|
|
{
|
|
/*
|
|
Stop sending rows if the order fields corresponding to WITH TIES
|
|
have changed.
|
|
*/
|
|
int idx= test_if_item_cache_changed(join->order_fields);
|
|
if (idx >= 0)
|
|
join->do_send_rows= false;
|
|
}
|
|
|
|
if (join->do_send_rows)
|
|
{
|
|
int error;
|
|
/* result < 0 if row was not accepted and should not be counted */
|
|
if (unlikely((error= join->result->send_data_with_check(*fields,
|
|
join->unit,
|
|
join->send_records))))
|
|
{
|
|
if (error > 0)
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
// error < 0 => duplicate row
|
|
join->duplicate_rows++;
|
|
}
|
|
}
|
|
|
|
join->send_records++;
|
|
join->accepted_rows++;
|
|
if (join->send_records >= join->unit->lim.get_select_limit())
|
|
{
|
|
if (!join->do_send_rows)
|
|
{
|
|
/*
|
|
If we have used Priority Queue for optimizing order by with limit,
|
|
then stop here, there are no more records to consume.
|
|
When this optimization is used, end_send is called on the next
|
|
join_tab.
|
|
*/
|
|
if (join->order &&
|
|
join->select_options & OPTION_FOUND_ROWS &&
|
|
join_tab > join->join_tab &&
|
|
(join_tab - 1)->filesort && (join_tab - 1)->filesort->using_pq)
|
|
{
|
|
DBUG_PRINT("info", ("filesort NESTED_LOOP_QUERY_LIMIT"));
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
/* For WITH TIES we keep sending rows until a group has changed. */
|
|
if (join->unit->lim.is_with_ties())
|
|
{
|
|
/* Prepare the order_fields comparison for with ties. */
|
|
if (join->send_records == join->unit->lim.get_select_limit())
|
|
(void) test_if_group_changed(join->order_fields);
|
|
/* One more loop, to check if the next row matches with_ties or not. */
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
if (join->select_options & OPTION_FOUND_ROWS)
|
|
{
|
|
JOIN_TAB *jt=join->join_tab;
|
|
if ((join->table_count == 1) && !join->sort_and_group
|
|
&& !join->send_group_parts && !join->having && !jt->select_cond &&
|
|
!(jt->select && jt->select->quick) &&
|
|
(jt->table->file->ha_table_flags() & HA_STATS_RECORDS_IS_EXACT) &&
|
|
(jt->ref.key < 0))
|
|
{
|
|
/* Join over all rows in table; Return number of found rows */
|
|
TABLE *table=jt->table;
|
|
|
|
if (jt->filesort_result) // If filesort was used
|
|
{
|
|
join->send_records= jt->filesort_result->found_rows;
|
|
}
|
|
else
|
|
{
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
join->send_records= table->file->stats.records;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
join->do_send_rows= 0;
|
|
if (join->unit->fake_select_lex)
|
|
join->unit->fake_select_lex->limit_params.select_limit= 0;
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
|
|
}
|
|
else if (join->send_records >= join->fetch_limit)
|
|
{
|
|
/*
|
|
There is a server side cursor and all rows for
|
|
this fetch request are sent.
|
|
*/
|
|
DBUG_RETURN(NESTED_LOOP_CURSOR_LIMIT);
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform OrderedGroupBy operation and write the output into join->result.
|
|
|
|
@detail
|
|
The input stream is ordered by the GROUP BY expression, so groups come
|
|
one after another. We only need to accumulate the aggregate value, when
|
|
a GROUP BY group ends, check the HAVING and send the group.
|
|
|
|
Note that the output comes in the GROUP BY order, which is required by
|
|
the MySQL's GROUP BY semantics. No further sorting is needed.
|
|
|
|
@seealso end_write_group() also implements SortAndGroup
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_send_group(JOIN *join, JOIN_TAB *join_tab, bool end_of_records)
|
|
{
|
|
int idx= -1;
|
|
enum_nested_loop_state ok_code= NESTED_LOOP_OK;
|
|
/*
|
|
join_tab can be 0 in the case all tables are const tables and we did not
|
|
need a temporary table to store the result.
|
|
In this case we use the original given fields, which is stored in
|
|
join->fields.
|
|
*/
|
|
List<Item> *fields= join_tab ? (join_tab-1)->fields : join->fields;
|
|
DBUG_ENTER("end_send_group");
|
|
|
|
if (!join->items3.is_null() && join->current_ref_ptrs != join->items3)
|
|
join->set_items_ref_array(join->items3);
|
|
|
|
if (!join->first_record || end_of_records ||
|
|
(idx=test_if_group_changed(join->group_fields)) >= 0)
|
|
{
|
|
|
|
if (!join->group_sent &&
|
|
(join->first_record ||
|
|
(end_of_records && !join->group && !join->group_optimized_away)))
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (join->procedure)
|
|
join->procedure->end_group();
|
|
/* Test if there was a group change. */
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
int error=0;
|
|
if (join->procedure)
|
|
{
|
|
if (join->having && join->having->val_int() == 0)
|
|
error= -1; // Didn't satisfy having
|
|
else
|
|
{
|
|
if (join->do_send_rows)
|
|
error=join->procedure->send_row(*fields) ? 1 : 0;
|
|
join->send_records++;
|
|
}
|
|
if (end_of_records && join->procedure->end_of_records())
|
|
error= 1; // Fatal error
|
|
}
|
|
else
|
|
{
|
|
/* Reset all sum functions on group change. */
|
|
if (!join->first_record)
|
|
{
|
|
/* No matching rows for group function */
|
|
|
|
List_iterator_fast<Item> it(*fields);
|
|
Item *item;
|
|
join->no_rows_in_result_called= 1;
|
|
|
|
join->clear(&cleared_tables);
|
|
while ((item= it++))
|
|
item->no_rows_in_result();
|
|
}
|
|
if (join->having && join->having->val_int() == 0)
|
|
error= -1; // Didn't satisfy having
|
|
else
|
|
{
|
|
if (join->do_send_rows)
|
|
{
|
|
error= join->result->send_data_with_check(*fields,
|
|
join->unit,
|
|
join->send_records);
|
|
if (unlikely(error < 0))
|
|
{
|
|
/* Duplicate row, don't count */
|
|
join->duplicate_rows++;
|
|
error= 0;
|
|
}
|
|
}
|
|
join->send_records++;
|
|
join->group_sent= true;
|
|
}
|
|
if (unlikely(join->rollup.state != ROLLUP::STATE_NONE && error <= 0))
|
|
{
|
|
if (join->rollup_send_data((uint) (idx+1)))
|
|
error= 1;
|
|
}
|
|
if (join->no_rows_in_result_called)
|
|
{
|
|
/* Restore null tables to original state */
|
|
join->no_rows_in_result_called= 0;
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
}
|
|
}
|
|
if (unlikely(error > 0))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
if (join->send_records >= join->unit->lim.get_select_limit() &&
|
|
join->do_send_rows)
|
|
{
|
|
/* WITH TIES can be computed during end_send_group if
|
|
the order by is a subset of group by and we had an index
|
|
available to compute group by order directly. */
|
|
if (!join->unit->lim.is_with_ties() ||
|
|
idx < (int)join->with_ties_order_count)
|
|
{
|
|
if (!(join->select_options & OPTION_FOUND_ROWS))
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT); // Abort nicely
|
|
join->do_send_rows= 0;
|
|
join->unit->lim.set_unlimited();
|
|
}
|
|
}
|
|
else if (join->send_records >= join->fetch_limit)
|
|
{
|
|
/*
|
|
There is a server side cursor and all rows
|
|
for this fetch request are sent.
|
|
|
|
Preventing code duplication. When finished with the group reset
|
|
the group functions and copy_fields. We fall through. bug #11904
|
|
*/
|
|
ok_code= NESTED_LOOP_CURSOR_LIMIT;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
join->first_record=1;
|
|
(void) test_if_group_changed(join->group_fields);
|
|
}
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
/*
|
|
This branch is executed also for cursors which have finished their
|
|
fetch limit - the reason for ok_code.
|
|
*/
|
|
copy_fields(&join->tmp_table_param);
|
|
if (init_sum_functions(join->sum_funcs, join->sum_funcs_end[idx+1]))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (join->procedure)
|
|
join->procedure->add();
|
|
join->group_sent= false;
|
|
join->accepted_rows++;
|
|
DBUG_RETURN(ok_code);
|
|
}
|
|
}
|
|
if (update_sum_func(join->sum_funcs))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
join->accepted_rows++;
|
|
if (join->procedure)
|
|
join->procedure->add();
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/* ARGSUSED */
|
|
static enum_nested_loop_state
|
|
end_write(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *const table= join_tab->table;
|
|
DBUG_ENTER("end_write");
|
|
|
|
if (!end_of_records)
|
|
{
|
|
copy_fields(join_tab->tmp_table_param);
|
|
if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
|
|
if (likely(!join_tab->having || join_tab->having->val_int()))
|
|
{
|
|
int error;
|
|
join->found_records++;
|
|
join->accepted_rows++;
|
|
if ((error= table->file->ha_write_tmp_row(table->record[0])))
|
|
{
|
|
if (likely(!table->file->is_fatal_error(error, HA_CHECK_DUP)))
|
|
goto end; // Ignore duplicate keys
|
|
bool is_duplicate;
|
|
if (create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 1, &is_duplicate))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
|
|
if (is_duplicate)
|
|
goto end;
|
|
}
|
|
if (++join_tab->send_records >=
|
|
join_tab->tmp_table_param->end_write_records &&
|
|
join->do_send_rows)
|
|
{
|
|
if (!(join->select_options & OPTION_FOUND_ROWS))
|
|
DBUG_RETURN(NESTED_LOOP_QUERY_LIMIT);
|
|
join->do_send_rows=0;
|
|
join->unit->lim.set_unlimited();
|
|
}
|
|
}
|
|
}
|
|
end:
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform GROUP BY operation over rows coming in arbitrary order: use
|
|
TemporaryTableWithPartialSums algorithm.
|
|
|
|
@detail
|
|
The TemporaryTableWithPartialSums algorithm is:
|
|
|
|
CREATE TEMPORARY TABLE tmp (
|
|
group_by_columns PRIMARY KEY,
|
|
partial_sum
|
|
);
|
|
|
|
for each row R in join output {
|
|
INSERT INTO tmp (R.group_by_columns, R.sum_value)
|
|
ON DUPLICATE KEY UPDATE partial_sum=partial_sum + R.sum_value;
|
|
}
|
|
|
|
@detail
|
|
Also applies HAVING, etc.
|
|
|
|
@seealso end_unique_update()
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
end_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *const table= join_tab->table;
|
|
ORDER *group;
|
|
int error;
|
|
DBUG_ENTER("end_update");
|
|
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
|
|
join->found_records++;
|
|
copy_fields(join_tab->tmp_table_param); // Groups are copied twice.
|
|
/* Make a key of group index */
|
|
for (group=table->group ; group ; group=group->next)
|
|
{
|
|
Item *item= *group->item;
|
|
if (group->fast_field_copier_setup != group->field)
|
|
{
|
|
DBUG_PRINT("info", ("new setup %p -> %p",
|
|
group->fast_field_copier_setup,
|
|
group->field));
|
|
group->fast_field_copier_setup= group->field;
|
|
group->fast_field_copier_func=
|
|
item->setup_fast_field_copier(group->field);
|
|
}
|
|
item->save_org_in_field(group->field, group->fast_field_copier_func);
|
|
/* Store in the used key if the field was 0 */
|
|
if (item->maybe_null())
|
|
group->buff[-1]= (char) group->field->is_null();
|
|
}
|
|
if (!table->file->ha_index_read_map(table->record[1],
|
|
join_tab->tmp_table_param->group_buff,
|
|
HA_WHOLE_KEY,
|
|
HA_READ_KEY_EXACT))
|
|
{ /* Update old record */
|
|
restore_record(table,record[1]);
|
|
update_tmptable_sum_func(join->sum_funcs,table);
|
|
if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
|
|
table->record[0]))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
goto end;
|
|
}
|
|
|
|
init_tmptable_sum_functions(join->sum_funcs);
|
|
if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
|
|
join->thd)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
if (unlikely((error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
{
|
|
if (create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 0, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
|
|
/* Change method to update rows */
|
|
if (unlikely((error= table->file->ha_index_init(0, 0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
|
|
join_tab->aggr->set_write_func(end_unique_update);
|
|
}
|
|
join_tab->send_records++;
|
|
end:
|
|
join->accepted_rows++; // For rownum()
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/**
|
|
Like end_update, but this is done with unique constraints instead of keys.
|
|
*/
|
|
|
|
static enum_nested_loop_state
|
|
end_unique_update(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
int error;
|
|
DBUG_ENTER("end_unique_update");
|
|
|
|
if (end_of_records)
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
|
|
init_tmptable_sum_functions(join->sum_funcs);
|
|
copy_fields(join_tab->tmp_table_param); // Groups are copied twice.
|
|
if (copy_funcs(join_tab->tmp_table_param->items_to_copy, join->thd))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
|
|
join->accepted_rows++;
|
|
if (likely(!(error= table->file->ha_write_tmp_row(table->record[0]))))
|
|
join_tab->send_records++; // New group
|
|
else
|
|
{
|
|
if (unlikely((int) table->file->get_dup_key(error) < 0))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
/* Prepare table for random positioning */
|
|
bool rnd_inited= (table->file->inited == handler::RND);
|
|
if (!rnd_inited &&
|
|
((error= table->file->ha_index_end()) ||
|
|
(error= table->file->ha_rnd_init(0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
if (unlikely(table->file->ha_rnd_pos(table->record[1],table->file->dup_ref)))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
restore_record(table,record[1]);
|
|
update_tmptable_sum_func(join->sum_funcs,table);
|
|
if (unlikely((error= table->file->ha_update_tmp_row(table->record[1],
|
|
table->record[0]))))
|
|
{
|
|
table->file->print_error(error,MYF(0)); /* purecov: inspected */
|
|
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
|
|
}
|
|
if (!rnd_inited &&
|
|
((error= table->file->ha_rnd_end()) ||
|
|
(error= table->file->ha_index_init(0, 0))))
|
|
{
|
|
table->file->print_error(error, MYF(0));
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
}
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
join->accepted_rows++; // For rownum()
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform OrderedGroupBy operation and write the output into the temporary
|
|
table (join_tab->table).
|
|
|
|
@detail
|
|
The input stream is ordered by the GROUP BY expression, so groups come
|
|
one after another. We only need to accumulate the aggregate value, when
|
|
a GROUP BY group ends, check the HAVING and write the group.
|
|
|
|
@seealso end_send_group() also implements OrderedGroupBy
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
end_write_group(JOIN *join, JOIN_TAB *join_tab __attribute__((unused)),
|
|
bool end_of_records)
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
int idx= -1;
|
|
DBUG_ENTER("end_write_group");
|
|
|
|
join->accepted_rows++;
|
|
if (!join->first_record || end_of_records ||
|
|
(idx=test_if_group_changed(join->group_fields)) >= 0)
|
|
{
|
|
if (join->first_record || (end_of_records && !join->group))
|
|
{
|
|
table_map cleared_tables= (table_map) 0;
|
|
if (join->procedure)
|
|
join->procedure->end_group();
|
|
int send_group_parts= join->send_group_parts;
|
|
if (idx < send_group_parts)
|
|
{
|
|
if (!join->first_record)
|
|
{
|
|
/* No matching rows for group function */
|
|
join->clear(&cleared_tables);
|
|
}
|
|
copy_sum_funcs(join->sum_funcs,
|
|
join->sum_funcs_end[send_group_parts]);
|
|
if (!join_tab->having || join_tab->having->val_int())
|
|
{
|
|
int error= table->file->ha_write_tmp_row(table->record[0]);
|
|
if (unlikely(error) &&
|
|
create_internal_tmp_table_from_heap(join->thd, table,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
error, 0, NULL))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
if (unlikely(join->rollup.state != ROLLUP::STATE_NONE))
|
|
{
|
|
if (unlikely(join->rollup_write_data((uint) (idx+1),
|
|
join_tab->tmp_table_param,
|
|
table)))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
}
|
|
}
|
|
if (cleared_tables)
|
|
unclear_tables(join, &cleared_tables);
|
|
if (end_of_records)
|
|
goto end;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (end_of_records)
|
|
goto end;
|
|
join->first_record=1;
|
|
(void) test_if_group_changed(join->group_fields);
|
|
}
|
|
if (idx < (int) join->send_group_parts)
|
|
{
|
|
copy_fields(join_tab->tmp_table_param);
|
|
if (unlikely(copy_funcs(join_tab->tmp_table_param->items_to_copy,
|
|
join->thd)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(init_sum_functions(join->sum_funcs,
|
|
join->sum_funcs_end[idx+1])))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(join->procedure))
|
|
join->procedure->add();
|
|
goto end;
|
|
}
|
|
}
|
|
if (unlikely(update_sum_func(join->sum_funcs)))
|
|
DBUG_RETURN(NESTED_LOOP_ERROR);
|
|
if (unlikely(join->procedure))
|
|
join->procedure->add();
|
|
end:
|
|
if (unlikely(join->thd->check_killed()))
|
|
{
|
|
DBUG_RETURN(NESTED_LOOP_KILLED); /* purecov: inspected */
|
|
}
|
|
DBUG_RETURN(NESTED_LOOP_OK);
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Remove calculation with tables that aren't yet read. Remove also tests
|
|
against fields that are read through key where the table is not a
|
|
outer join table.
|
|
We can't remove tests that are made against columns which are stored
|
|
in sorted order.
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
Check if "left_item=right_item" equality is guaranteed to be true by use of
|
|
[eq]ref access on left_item->field->table.
|
|
|
|
SYNOPSIS
|
|
test_if_ref()
|
|
root_cond
|
|
left_item
|
|
right_item
|
|
|
|
DESCRIPTION
|
|
Check if the given "left_item = right_item" equality is guaranteed to be
|
|
true by use of [eq_]ref access method.
|
|
|
|
We need root_cond as we can't remove ON expressions even if employed ref
|
|
access guarantees that they are true. This is because TODO
|
|
|
|
RETURN
|
|
TRUE if right_item is used removable reference key on left_item
|
|
FALSE Otherwise
|
|
|
|
*/
|
|
|
|
bool test_if_ref(Item *root_cond, Item_field *left_item,Item *right_item)
|
|
{
|
|
Field *field=left_item->field;
|
|
JOIN_TAB *join_tab= field->table->reginfo.join_tab;
|
|
// No need to change const test
|
|
if (!field->table->const_table && join_tab &&
|
|
!join_tab->is_ref_for_hash_join() &&
|
|
(!join_tab->first_inner ||
|
|
*join_tab->first_inner->on_expr_ref == root_cond))
|
|
{
|
|
/*
|
|
If ref access uses "Full scan on NULL key" (i.e. it actually alternates
|
|
between ref access and full table scan), then no equality can be
|
|
guaranteed to be true.
|
|
*/
|
|
if (join_tab->ref.is_access_triggered())
|
|
return FALSE;
|
|
|
|
Item *ref_item=part_of_refkey(field->table,field);
|
|
if (ref_item && (ref_item->eq(right_item,1) ||
|
|
ref_item->real_item()->eq(right_item,1)))
|
|
{
|
|
right_item= right_item->real_item();
|
|
if (right_item->type() == Item::FIELD_ITEM)
|
|
return (field->eq_def(((Item_field *) right_item)->field));
|
|
/* remove equalities injected by IN->EXISTS transformation */
|
|
else if (right_item->type() == Item::CACHE_ITEM)
|
|
return ((Item_cache *)right_item)->eq_def (field);
|
|
if (right_item->const_item() && !(right_item->is_null()))
|
|
{
|
|
/*
|
|
We can remove binary fields and numerical fields except float,
|
|
as float comparison isn't 100 % safe
|
|
We have to keep normal strings to be able to check for end spaces
|
|
*/
|
|
if (field->binary() &&
|
|
field->real_type() != MYSQL_TYPE_STRING &&
|
|
field->real_type() != MYSQL_TYPE_VARCHAR &&
|
|
(field->type() != MYSQL_TYPE_FLOAT || field->decimals() == 0))
|
|
{
|
|
return !right_item->save_in_field_no_warnings(field, 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0; // keep test
|
|
}
|
|
|
|
|
|
/**
|
|
Extract a condition that can be checked after reading given table
|
|
@fn make_cond_for_table()
|
|
|
|
@param cond Condition to analyze
|
|
@param tables Tables for which "current field values" are available
|
|
Tables for which "current field values" are available (this
|
|
includes used_table)
|
|
(may also include PSEUDO_TABLE_BITS, and may be zero)
|
|
@param used_table Table that we're extracting the condition for
|
|
@param join_tab_idx_arg
|
|
The index of the JOIN_TAB this Item is being extracted
|
|
for. MAX_TABLES if there is no corresponding JOIN_TAB.
|
|
@param exclude_expensive_cond
|
|
Do not push expensive conditions
|
|
@param retain_ref_cond
|
|
Retain ref conditions
|
|
|
|
@retval <>NULL Generated condition
|
|
@retval =NULL Already checked, OR error
|
|
|
|
@details
|
|
Extract the condition that can be checked after reading the table
|
|
specified in 'used_table', given that current-field values for tables
|
|
specified in 'tables' bitmap are available.
|
|
If 'used_table' is 0
|
|
- extract conditions for all tables in 'tables'.
|
|
- extract conditions are unrelated to any tables
|
|
in the same query block/level(i.e. conditions
|
|
which have used_tables == 0).
|
|
|
|
The function assumes that
|
|
- Constant parts of the condition has already been checked.
|
|
- Condition that could be checked for tables in 'tables' has already
|
|
been checked.
|
|
|
|
The function takes into account that some parts of the condition are
|
|
guaranteed to be true by employed 'ref' access methods (the code that
|
|
does this is located at the end, search down for "EQ_FUNC").
|
|
|
|
@note
|
|
Make sure to keep the implementations of make_cond_for_table() and
|
|
make_cond_after_sjm() synchronized.
|
|
make_cond_for_info_schema() uses similar algorithm as well.
|
|
*/
|
|
|
|
static Item *
|
|
make_cond_for_table(THD *thd, Item *cond, table_map tables,
|
|
table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond __attribute__((unused)),
|
|
bool retain_ref_cond)
|
|
{
|
|
return make_cond_for_table_from_pred(thd, cond, cond, tables, used_table,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, true);
|
|
}
|
|
|
|
|
|
static Item *
|
|
make_cond_for_table_from_pred(THD *thd, Item *root_cond, Item *cond,
|
|
table_map tables, table_map used_table,
|
|
int join_tab_idx_arg,
|
|
bool exclude_expensive_cond __attribute__
|
|
((unused)),
|
|
bool retain_ref_cond,
|
|
bool is_top_and_level)
|
|
|
|
{
|
|
table_map rand_table_bit= (table_map) RAND_TABLE_BIT;
|
|
|
|
if (used_table && !(cond->used_tables() & used_table))
|
|
return (COND*) 0; // Already checked
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
/* Create new top level AND item */
|
|
Item_cond_and *new_cond=new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
/*
|
|
Special handling of top level conjuncts with RAND_TABLE_BIT:
|
|
if such a conjunct contains a reference to a field that is not
|
|
an outer field then it is pushed to the corresponding table by
|
|
the same rule as all other conjuncts. Otherwise, if the conjunct
|
|
is used in WHERE is is pushed to the last joined table, if is it
|
|
is used in ON condition of an outer join it is pushed into the
|
|
last inner table of the outer join. Such conjuncts are pushed in
|
|
a call of make_cond_for_table_from_pred() with the
|
|
parameter 'used_table' equal to PSEUDO_TABLE_BITS.
|
|
*/
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(item->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* The conjunct with RAND_TABLE_BIT has been allready pushed */
|
|
continue;
|
|
}
|
|
Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
|
|
tables, used_table,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, false);
|
|
if (fix)
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
else if (thd->is_error())
|
|
return ((COND*) 0);
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0; // Always true
|
|
case 1:
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
/*
|
|
Call fix_fields to propagate all properties of the children to
|
|
the new parent Item. This should not be expensive because all
|
|
children of Item_cond_and should be fixed by now.
|
|
*/
|
|
if (new_cond->fix_fields(thd, 0))
|
|
return (COND*) 0;
|
|
new_cond->used_tables_cache=
|
|
((Item_cond_and*) cond)->used_tables_cache &
|
|
tables;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else
|
|
{ // Or list
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* This top level formula with RAND_TABLE_BIT has been already pushed */
|
|
return (COND*) 0;
|
|
}
|
|
|
|
Item_cond_or *new_cond=new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix=make_cond_for_table_from_pred(thd, root_cond, item,
|
|
tables, 0L,
|
|
join_tab_idx_arg,
|
|
exclude_expensive_cond,
|
|
retain_ref_cond, false);
|
|
if (!fix)
|
|
return (COND*) 0; // Always true or error
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
/*
|
|
Call fix_fields to propagate all properties of the children to
|
|
the new parent Item. This should not be expensive because all
|
|
children of Item_cond_and should be fixed by now.
|
|
*/
|
|
if (new_cond->fix_fields(thd, 0))
|
|
return (COND*) 0;
|
|
new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
else if (cond->basic_const_item())
|
|
return cond;
|
|
|
|
if (is_top_and_level && used_table == rand_table_bit &&
|
|
(cond->used_tables() & ~OUTER_REF_TABLE_BIT) != rand_table_bit)
|
|
{
|
|
/* This top level formula with RAND_TABLE_BIT has been already pushed */
|
|
return (COND*) 0;
|
|
}
|
|
|
|
/*
|
|
Because the following test takes a while and it can be done
|
|
table_count times, we mark each item that we have examined with the result
|
|
of the test
|
|
*/
|
|
if ((cond->marker == MARKER_CHECK_ON_READ && !retain_ref_cond) ||
|
|
(cond->used_tables() & ~tables))
|
|
return (COND*) 0; // Can't check this yet
|
|
|
|
if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK)
|
|
{
|
|
cond->set_join_tab_idx((uint8) join_tab_idx_arg);
|
|
return cond; // Not boolean op
|
|
}
|
|
|
|
if (cond->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
|
|
{
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
|
|
if (left_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
|
|
test_if_ref(root_cond, (Item_field*) left_item,right_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ; // Checked when read
|
|
return (COND*) 0;
|
|
}
|
|
if (right_item->type() == Item::FIELD_ITEM && !retain_ref_cond &&
|
|
test_if_ref(root_cond, (Item_field*) right_item,left_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ; // Checked when read
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
cond->marker= MARKER_PROCESSED;
|
|
cond->set_join_tab_idx((uint8) join_tab_idx_arg);
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
The difference of this from make_cond_for_table() is that we're in the
|
|
following state:
|
|
1. conditions referring to 'tables' have been checked
|
|
2. conditions referring to sjm_tables have been checked, too
|
|
3. We need condition that couldn't be checked in #1 or #2 but
|
|
can be checked when we get both (tables | sjm_tables).
|
|
|
|
*/
|
|
static COND *
|
|
make_cond_after_sjm(THD *thd, Item *root_cond, Item *cond, table_map tables,
|
|
table_map sjm_tables, bool inside_or_clause)
|
|
{
|
|
/*
|
|
We assume that conditions that refer to only join prefix tables or
|
|
sjm_tables have already been checked.
|
|
*/
|
|
if (!inside_or_clause)
|
|
{
|
|
table_map cond_used_tables= cond->used_tables();
|
|
if((!(cond_used_tables & ~tables) ||
|
|
!(cond_used_tables & ~sjm_tables)))
|
|
return (COND*) 0; // Already checked
|
|
}
|
|
|
|
/* AND/OR recursive descent */
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
/* Create new top level AND item */
|
|
Item_cond_and *new_cond= new (thd->mem_root) Item_cond_and(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix=make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
|
|
inside_or_clause);
|
|
if (fix)
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
switch (new_cond->argument_list()->elements) {
|
|
case 0:
|
|
return (COND*) 0; // Always true
|
|
case 1:
|
|
return new_cond->argument_list()->head();
|
|
default:
|
|
/*
|
|
Item_cond_and do not need fix_fields for execution, its parameters
|
|
are fixed or do not need fix_fields, too
|
|
*/
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache=
|
|
((Item_cond_and*) cond)->used_tables_cache &
|
|
tables;
|
|
return new_cond;
|
|
}
|
|
}
|
|
else
|
|
{ // Or list
|
|
Item_cond_or *new_cond= new (thd->mem_root) Item_cond_or(thd);
|
|
if (!new_cond)
|
|
return (COND*) 0; // OOM /* purecov: inspected */
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item=li++))
|
|
{
|
|
Item *fix= make_cond_after_sjm(thd, root_cond, item, tables, sjm_tables,
|
|
/*inside_or_clause= */TRUE);
|
|
if (!fix)
|
|
return (COND*) 0; // Always true
|
|
new_cond->argument_list()->push_back(fix, thd->mem_root);
|
|
}
|
|
/*
|
|
Item_cond_or do not need fix_fields for execution, its parameters
|
|
are fixed or do not need fix_fields, too
|
|
*/
|
|
new_cond->quick_fix_field();
|
|
new_cond->used_tables_cache= ((Item_cond_or*) cond)->used_tables_cache;
|
|
new_cond->top_level_item();
|
|
return new_cond;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Because the following test takes a while and it can be done
|
|
table_count times, we mark each item that we have examined with the result
|
|
of the test
|
|
*/
|
|
|
|
if (cond->marker == MARKER_CHECK_ON_READ ||
|
|
(cond->used_tables() & ~(tables | sjm_tables)))
|
|
return (COND*) 0; // Can't check this yet
|
|
if (cond->marker == MARKER_PROCESSED || cond->eq_cmp_result() == Item::COND_OK)
|
|
return cond; // Not boolean op
|
|
|
|
/*
|
|
Remove equalities that are guaranteed to be true by use of 'ref' access
|
|
method
|
|
*/
|
|
if (((Item_func*) cond)->functype() == Item_func::EQ_FUNC)
|
|
{
|
|
Item *left_item= ((Item_func*) cond)->arguments()[0]->real_item();
|
|
Item *right_item= ((Item_func*) cond)->arguments()[1]->real_item();
|
|
if (left_item->type() == Item::FIELD_ITEM &&
|
|
test_if_ref(root_cond, (Item_field*) left_item,right_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ;
|
|
return (COND*) 0;
|
|
}
|
|
if (right_item->type() == Item::FIELD_ITEM &&
|
|
test_if_ref(root_cond, (Item_field*) right_item,left_item))
|
|
{
|
|
cond->marker= MARKER_CHECK_ON_READ;
|
|
return (COND*) 0;
|
|
}
|
|
}
|
|
cond->marker= MARKER_PROCESSED;
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
|
|
Check if
|
|
- @table uses "ref"-like access
|
|
- it is based on "@field=certain_item" equality
|
|
- the equality will be true for any record returned by the access method
|
|
and return the certain_item if yes.
|
|
|
|
@detail
|
|
|
|
Equality won't necessarily hold if:
|
|
- the used index covers only part of the @field.
|
|
Suppose, we have a CHAR(5) field and INDEX(field(3)). if you make a lookup
|
|
for 'abc', you will get both record with 'abc' and with 'abcde'.
|
|
- The type of access is actually ref_or_null, and so @field can be either
|
|
a value or NULL.
|
|
|
|
@return
|
|
Item that the field will be equal to
|
|
NULL if no such item
|
|
*/
|
|
|
|
static Item *
|
|
part_of_refkey(TABLE *table,Field *field)
|
|
{
|
|
JOIN_TAB *join_tab= table->reginfo.join_tab;
|
|
if (!join_tab)
|
|
return (Item*) 0; // field from outer non-select (UPDATE,...)
|
|
|
|
uint ref_parts= join_tab->ref.key_parts;
|
|
if (ref_parts) /* if it's ref/eq_ref/ref_or_null */
|
|
{
|
|
uint key= join_tab->ref.key;
|
|
KEY *key_info= join_tab->get_keyinfo_by_key_no(key);
|
|
KEY_PART_INFO *key_part= key_info->key_part;
|
|
|
|
for (uint part=0 ; part < ref_parts ; part++,key_part++)
|
|
{
|
|
if (field->eq(key_part->field))
|
|
{
|
|
/*
|
|
Found the field in the key. Check that
|
|
1. ref_or_null doesn't alternate this component between a value and
|
|
a NULL
|
|
2. index fully covers the key
|
|
*/
|
|
if (part != join_tab->ref.null_ref_part && // (1)
|
|
!(key_part->key_part_flag & HA_PART_KEY_SEG)) // (2)
|
|
{
|
|
return join_tab->ref.items[part];
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return (Item*) 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if one can use the key to resolve ORDER BY.
|
|
|
|
@param join if not NULL, can use the join's top-level
|
|
multiple-equalities.
|
|
@param order Sort order
|
|
@param table Table to sort
|
|
@param idx Index to check
|
|
@param used_key_parts [out] NULL by default, otherwise return value for
|
|
used key parts.
|
|
|
|
@note
|
|
used_key_parts is set to correct key parts used if return value != 0
|
|
(On other cases, used_key_part may be changed)
|
|
Note that the value may actually be greater than the number of index
|
|
key parts. This can happen for storage engines that have the primary
|
|
key parts as a suffix for every secondary key.
|
|
|
|
@retval
|
|
1 key is ok.
|
|
@retval
|
|
0 Key can't be used
|
|
@retval
|
|
-1 Reverse key can be used
|
|
*/
|
|
|
|
static int test_if_order_by_key(JOIN *join, ORDER *order, TABLE *table,
|
|
uint idx, uint *used_key_parts)
|
|
{
|
|
KEY_PART_INFO *key_part,*key_part_end;
|
|
key_part=table->key_info[idx].key_part;
|
|
key_part_end=key_part + table->key_info[idx].ext_key_parts;
|
|
key_part_map const_key_parts=table->const_key_parts[idx];
|
|
uint user_defined_kp= table->key_info[idx].user_defined_key_parts;
|
|
int reverse=0;
|
|
uint key_parts;
|
|
bool have_pk_suffix= false;
|
|
uint pk= table->s->primary_key;
|
|
ORDER::enum_order keypart_order;
|
|
DBUG_ENTER("test_if_order_by_key");
|
|
|
|
if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_IN_READ_INDEX) &&
|
|
idx < table->s->keys &&
|
|
table->key_info[idx].ext_key_part_map && pk != MAX_KEY && pk != idx)
|
|
{
|
|
have_pk_suffix= true;
|
|
}
|
|
|
|
if ((*order->item)->real_item()->type() != Item::FIELD_ITEM)
|
|
{
|
|
if (order->next)
|
|
DBUG_RETURN(0);
|
|
|
|
DBUG_RETURN((*order->item)->part_of_sortkey().is_set(idx));
|
|
}
|
|
|
|
for (; order ; order=order->next, const_key_parts>>=1)
|
|
{
|
|
Item_field *item_field= ((Item_field*) (*order->item)->real_item());
|
|
int flag;
|
|
|
|
/*
|
|
Skip key parts that are constants in the WHERE clause.
|
|
These are already skipped in the ORDER BY by const_expression_in_where()
|
|
for top level queries.
|
|
*/
|
|
for (; const_key_parts & 1 ; const_key_parts>>= 1)
|
|
{
|
|
if (item_field->contains(key_part->field))
|
|
{
|
|
/* Subquery with ORDER BY, continue with next field */
|
|
goto next_order_field;
|
|
}
|
|
key_part++;
|
|
}
|
|
|
|
/*
|
|
This check was in this function historically (although I think it's
|
|
better to check it outside of this function):
|
|
|
|
"Test if the primary key parts were all const (i.e. there's one row).
|
|
The sorting doesn't matter"
|
|
|
|
So, we're checking that
|
|
(1) this is an extended key
|
|
(2) we've reached its end
|
|
*/
|
|
key_parts= (uint)(key_part - table->key_info[idx].key_part);
|
|
if (have_pk_suffix &&
|
|
reverse == 0 && // all were =const so far
|
|
key_parts == table->key_info[idx].ext_key_parts &&
|
|
table->const_key_parts[pk] == PREV_BITS(uint,
|
|
table->key_info[pk].
|
|
user_defined_key_parts))
|
|
{
|
|
key_parts= 0;
|
|
reverse= 1; // Key is ok to use
|
|
goto ok;
|
|
}
|
|
|
|
if (key_part == key_part_end ||
|
|
!key_part->field->part_of_sortkey.is_set(idx))
|
|
{
|
|
/*
|
|
There are some items left in ORDER BY that we don't have in the key
|
|
*/
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
if (!item_field->contains(key_part->field))
|
|
DBUG_RETURN(0);
|
|
|
|
keypart_order= ((key_part->key_part_flag & HA_REVERSE_SORT) ?
|
|
ORDER::ORDER_DESC : ORDER::ORDER_ASC);
|
|
/* set flag to 1 if we can use read-next on key, else to -1 */
|
|
flag= (order->direction == keypart_order) ? 1 : -1;
|
|
if (reverse && flag != reverse)
|
|
DBUG_RETURN(0);
|
|
reverse=flag; // Remember if reverse
|
|
|
|
next_order_field:
|
|
if (key_part < key_part_end)
|
|
key_part++;
|
|
}
|
|
|
|
key_parts= (uint) (key_part - table->key_info[idx].key_part);
|
|
|
|
if (reverse == -1 &&
|
|
!(table->file->index_flags(idx, user_defined_kp-1, 1) & HA_READ_PREV))
|
|
reverse= 0; // Index can't be used
|
|
|
|
if (have_pk_suffix && reverse == -1)
|
|
{
|
|
uint pk_parts= table->key_info[pk].user_defined_key_parts;
|
|
if (!(table->file->index_flags(pk, pk_parts-1, 1) & HA_READ_PREV))
|
|
reverse= 0; // Index can't be used
|
|
}
|
|
|
|
ok:
|
|
*used_key_parts= key_parts;
|
|
DBUG_RETURN(reverse);
|
|
}
|
|
|
|
|
|
/**
|
|
Find shortest key suitable for full table scan.
|
|
|
|
@param table Table to scan
|
|
@param usable_keys Allowed keys
|
|
|
|
@return
|
|
MAX_KEY no suitable key found
|
|
key index otherwise
|
|
|
|
@notes
|
|
We should not use keyread_time() as in the case of disk_read_cost= 0
|
|
all keys would be regarded equal.
|
|
*/
|
|
|
|
uint find_shortest_key(TABLE *table, const key_map *usable_keys)
|
|
{
|
|
size_t min_length= INT_MAX32;
|
|
uint best= MAX_KEY;
|
|
uint possible_keys= usable_keys->bits_set();
|
|
|
|
if (possible_keys)
|
|
{
|
|
if (possible_keys == 1)
|
|
return usable_keys->find_first_bit();
|
|
|
|
for (uint nr=0; nr < table->s->keys ; nr++)
|
|
{
|
|
if (usable_keys->is_set(nr))
|
|
{
|
|
size_t length= table->key_storage_length(nr);
|
|
if (length < min_length)
|
|
{
|
|
min_length= length;
|
|
best= nr;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
|
|
|
|
/**
|
|
Test if a second key is the subkey of the first one.
|
|
|
|
@param key_part First key parts
|
|
@param ref_key_part Second key parts
|
|
@param ref_key_part_end Last+1 part of the second key
|
|
|
|
@note
|
|
Second key MUST be shorter than the first one.
|
|
|
|
@retval
|
|
1 is a subkey
|
|
@retval
|
|
0 no sub key
|
|
*/
|
|
|
|
inline bool
|
|
is_subkey(KEY_PART_INFO *key_part, KEY_PART_INFO *ref_key_part,
|
|
KEY_PART_INFO *ref_key_part_end)
|
|
{
|
|
for (; ref_key_part < ref_key_part_end; key_part++, ref_key_part++)
|
|
if (!key_part->field->eq(ref_key_part->field))
|
|
return 0;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
Test if we can use one of the 'usable_keys' instead of 'ref' key
|
|
for sorting.
|
|
|
|
@param ref Number of key, used for WHERE clause
|
|
@param usable_keys Keys for testing
|
|
|
|
@return
|
|
- MAX_KEY If we can't use other key
|
|
- the number of found key Otherwise
|
|
*/
|
|
|
|
static uint
|
|
test_if_subkey(ORDER *order, TABLE *table, uint ref, uint ref_key_parts,
|
|
const key_map *usable_keys)
|
|
{
|
|
uint nr;
|
|
uint min_length= (uint) ~0;
|
|
uint best= MAX_KEY;
|
|
KEY_PART_INFO *ref_key_part= table->key_info[ref].key_part;
|
|
KEY_PART_INFO *ref_key_part_end= ref_key_part + ref_key_parts;
|
|
|
|
/*
|
|
Find the shortest key that
|
|
- produces the required ordering
|
|
- has key #ref (up to ref_key_parts) as its subkey.
|
|
*/
|
|
for (nr= 0 ; nr < table->s->keys ; nr++)
|
|
{
|
|
uint not_used;
|
|
if (usable_keys->is_set(nr) &&
|
|
table->key_info[nr].key_length < min_length &&
|
|
table->key_info[nr].user_defined_key_parts >= ref_key_parts &&
|
|
is_subkey(table->key_info[nr].key_part, ref_key_part,
|
|
ref_key_part_end) &&
|
|
test_if_order_by_key(NULL, order, table, nr, ¬_used))
|
|
{
|
|
min_length= table->key_info[nr].key_length;
|
|
best= nr;
|
|
}
|
|
}
|
|
return best;
|
|
}
|
|
|
|
|
|
/**
|
|
Check if GROUP BY/DISTINCT can be optimized away because the set is
|
|
already known to be distinct.
|
|
|
|
Used in removing the GROUP BY/DISTINCT of the following types of
|
|
statements:
|
|
@code
|
|
SELECT [DISTINCT] <unique_key_cols>... FROM <single_table_ref>
|
|
[GROUP BY <unique_key_cols>,...]
|
|
@endcode
|
|
|
|
If (a,b,c is distinct)
|
|
then <any combination of a,b,c>,{whatever} is also distinct
|
|
|
|
This function checks if all the key parts of any of the unique keys
|
|
of the table are referenced by a list : either the select list
|
|
through find_field_in_item_list or GROUP BY list through
|
|
find_field_in_order_list.
|
|
If the above holds and the key parts cannot contain NULLs then we
|
|
can safely remove the GROUP BY/DISTINCT,
|
|
as no result set can be more distinct than an unique key.
|
|
|
|
@param table The table to operate on.
|
|
@param find_func function to iterate over the list and search
|
|
for a field
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
list_contains_unique_index(TABLE *table,
|
|
bool (*find_func) (Field *, void *), void *data)
|
|
{
|
|
for (uint keynr= 0; keynr < table->s->keys; keynr++)
|
|
{
|
|
if (keynr == table->s->primary_key ||
|
|
(table->key_info[keynr].flags & HA_NOSAME))
|
|
{
|
|
KEY *keyinfo= table->key_info + keynr;
|
|
KEY_PART_INFO *key_part, *key_part_end;
|
|
|
|
for (key_part=keyinfo->key_part,
|
|
key_part_end=key_part+ keyinfo->user_defined_key_parts;
|
|
key_part < key_part_end;
|
|
key_part++)
|
|
{
|
|
if (key_part->field->maybe_null() ||
|
|
!find_func(key_part->field, data))
|
|
break;
|
|
}
|
|
if (key_part == key_part_end)
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Helper function for list_contains_unique_index.
|
|
Find a field reference in a list of ORDER structures.
|
|
Finds a direct reference of the Field in the list.
|
|
|
|
@param field The field to search for.
|
|
@param data ORDER *.The list to search in
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
find_field_in_order_list (Field *field, void *data)
|
|
{
|
|
ORDER *group= (ORDER *) data;
|
|
bool part_found= 0;
|
|
for (ORDER *tmp_group= group; tmp_group; tmp_group=tmp_group->next)
|
|
{
|
|
Item *item= (*tmp_group->item)->real_item();
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) item)->field->eq(field))
|
|
{
|
|
part_found= 1;
|
|
break;
|
|
}
|
|
}
|
|
return part_found;
|
|
}
|
|
|
|
|
|
/**
|
|
Helper function for list_contains_unique_index.
|
|
Find a field reference in a dynamic list of Items.
|
|
Finds a direct reference of the Field in the list.
|
|
|
|
@param[in] field The field to search for.
|
|
@param[in] data List<Item> *.The list to search in
|
|
|
|
@retval
|
|
1 found
|
|
@retval
|
|
0 not found.
|
|
*/
|
|
|
|
static bool
|
|
find_field_in_item_list (Field *field, void *data)
|
|
{
|
|
List<Item> *fields= (List<Item> *) data;
|
|
bool part_found= 0;
|
|
List_iterator<Item> li(*fields);
|
|
Item *item;
|
|
|
|
while ((item= li++))
|
|
{
|
|
if (item->real_item()->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) (item->real_item()))->field->eq(field))
|
|
{
|
|
part_found= 1;
|
|
break;
|
|
}
|
|
}
|
|
return part_found;
|
|
}
|
|
|
|
|
|
/*
|
|
Fill *col_keys with a union of Field::part_of_sortkey of all fields
|
|
that belong to 'table' and are equal to 'item_field'.
|
|
*/
|
|
|
|
static
|
|
void compute_part_of_sort_key_for_equals(JOIN *join, TABLE *table,
|
|
key_map *col_keys, Item *item)
|
|
{
|
|
if (item->type() != Item::FIELD_ITEM ||
|
|
!optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP))
|
|
return;
|
|
|
|
Item_field *item_field= (Item_field*)item;
|
|
Item_equal *item_eq= NULL;
|
|
|
|
if (item_field->item_equal)
|
|
{
|
|
/*
|
|
The item_field is from ORDER structure, but it already has an item_equal
|
|
pointer set (UseMultipleEqualitiesToRemoveTempTable code have set it)
|
|
*/
|
|
item_eq= item_field->item_equal;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Walk through join's muliple equalities and find the one that contains
|
|
item_field.
|
|
*/
|
|
if (!join->cond_equal)
|
|
return;
|
|
table_map needed_tbl_map= item_field->used_tables() | table->map;
|
|
List_iterator<Item_equal> li(join->cond_equal->current_level);
|
|
Item_equal *cur_item_eq;
|
|
while ((cur_item_eq= li++))
|
|
{
|
|
if ((cur_item_eq->used_tables() & needed_tbl_map) &&
|
|
cur_item_eq->contains(item_field->field))
|
|
{
|
|
item_eq= cur_item_eq;
|
|
item_field->item_equal= item_eq; // Save the pointer to our Item_equal.
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (item_eq)
|
|
{
|
|
Item_equal_fields_iterator it(*item_eq);
|
|
Item *item;
|
|
/* Loop through other members that belong to table table */
|
|
while ((item= it++))
|
|
{
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
((Item_field*)item)->field->table == table)
|
|
{
|
|
col_keys->merge(((Item_field*)item)->field->part_of_sortkey);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
This is called when switching table access to produce records
|
|
in reverse order.
|
|
|
|
@detail
|
|
- Disable "Range checked for each record" (Is this strictly necessary
|
|
here?)
|
|
- Disable Index Condition Pushdown and Rowid Filtering.
|
|
|
|
IndexConditionPushdownAndReverseScans, RowidFilteringAndReverseScans:
|
|
Suppose we're computing
|
|
|
|
select * from t1
|
|
where
|
|
key1 between 10 and 20 and extra_condition
|
|
order by key1 desc
|
|
|
|
here the range access uses a reverse-ordered scan on (1 <= key1 <= 10) and
|
|
extra_condition is checked by either ICP or Rowid Filtering.
|
|
|
|
Also suppose that extra_condition happens to be false for rows of t1 that
|
|
do not satisfy the "10 <= key1 <= 20" condition.
|
|
|
|
For forward ordered range scan, the SQL layer will make these calls:
|
|
|
|
h->read_range_first(RANGE(10 <= key1 <= 20));
|
|
while (h->read_range_next()) { ... }
|
|
|
|
The storage engine sees the end endpoint of "key1<=20" and can stop scanning
|
|
as soon as it encounters a row with key1>20.
|
|
|
|
For backward-ordered range scan, the SQL layer will make these calls:
|
|
|
|
h->index_read_map(key1=20, HA_READ_PREFIX_LAST_OR_PREV);
|
|
while (h->index_prev()) {
|
|
if (cmp_key(h->record, "key1=10" )<0)
|
|
break; // end of range
|
|
...
|
|
}
|
|
|
|
Note that the check whether we've walked beyond the key=10 endpoint is
|
|
made at the SQL layer. The storage engine has no information about the left
|
|
endpoint of the interval we're scanning. If all rows before that endpoint
|
|
do not satisfy ICP condition or do not pass the Rowid Filter, the storage
|
|
engine will enumerate the records until the table start.
|
|
|
|
In MySQL, the API is extended with set_end_range() call so that the storage
|
|
engine "knows" when to stop scanning.
|
|
*/
|
|
|
|
static void prepare_for_reverse_ordered_access(JOIN_TAB *tab)
|
|
{
|
|
/* Cancel "Range checked for each record" */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
tab->use_quick= 1;
|
|
tab->read_first_record= join_init_read_record;
|
|
}
|
|
/*
|
|
Cancel Pushed Index Condition, as it doesn't work for reverse scans.
|
|
*/
|
|
if (tab->select && tab->select->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->select->pre_idx_push_select_cond);
|
|
tab->table->file->cancel_pushed_idx_cond();
|
|
}
|
|
/*
|
|
The same with Rowid Filter: it doesn't work with reverse scans so cancel
|
|
it, too.
|
|
*/
|
|
{
|
|
/*
|
|
Rowid Filter is initialized at a later stage. It is not pushed to
|
|
the storage engine yet:
|
|
*/
|
|
DBUG_ASSERT(!tab->table->file->pushed_rowid_filter);
|
|
tab->range_rowid_filter_info= NULL;
|
|
delete tab->rowid_filter;
|
|
tab->rowid_filter= NULL;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Given a table and order, find indexes that produce rows in the order
|
|
|
|
@param usable_keys IN Bitmap of keys we can use
|
|
OUT Bitmap of indexes that produce rows in order.
|
|
|
|
@return
|
|
false Some indexes were found
|
|
true No indexes found
|
|
*/
|
|
|
|
static
|
|
bool find_indexes_matching_order(JOIN *join, TABLE *table, ORDER *order,
|
|
key_map *usable_keys)
|
|
{
|
|
/* Find indexes that cover all ORDER/GROUP BY fields */
|
|
for (ORDER *tmp_order=order; tmp_order ; tmp_order=tmp_order->next)
|
|
{
|
|
key_map col_keys= (*tmp_order->item)->part_of_sortkey();
|
|
|
|
/*
|
|
Take multiple-equalities into account. Suppose we have
|
|
ORDER BY col1, col10
|
|
and there are
|
|
multiple-equal(col1, col2, col3),
|
|
multiple-equal(col10, col11).
|
|
|
|
Then,
|
|
- when item=col1, we find the set of indexes that cover one of {col1,
|
|
col2, col3}
|
|
- when item=col10, we find the set of indexes that cover one of {col10,
|
|
col11}
|
|
|
|
And we compute an intersection of these sets to find set of indexes that
|
|
cover all ORDER BY components.
|
|
*/
|
|
compute_part_of_sort_key_for_equals(join, table, &col_keys,
|
|
(*tmp_order->item)->real_item());
|
|
usable_keys->intersect(col_keys);
|
|
if (usable_keys->is_clear_all())
|
|
return true; // No usable keys
|
|
}
|
|
return false;
|
|
|
|
}
|
|
|
|
/**
|
|
Test if we can skip the ORDER BY by using an index.
|
|
|
|
If we can use an index, the JOIN_TAB / tab->select struct
|
|
is changed to use the index.
|
|
|
|
The index must cover all fields in <order>, or it will not be considered.
|
|
|
|
@param no_changes No changes will be made to the query plan.
|
|
@param fatal_error OUT A fatal error occurred
|
|
|
|
@todo
|
|
- sergeyp: Results of all index merge selects actually are ordered
|
|
by clustered PK values.
|
|
|
|
@retval
|
|
0 We have to use filesort to do the sorting
|
|
@retval
|
|
1 We can use an index.
|
|
*/
|
|
|
|
static bool
|
|
test_if_skip_sort_order(JOIN_TAB *tab,ORDER *order,ha_rows select_limit,
|
|
bool no_changes, const key_map *map, bool *fatal_error)
|
|
{
|
|
int ref_key;
|
|
uint UNINIT_VAR(ref_key_parts);
|
|
int order_direction= 0;
|
|
uint used_key_parts= 0;
|
|
TABLE *table=tab->table;
|
|
SQL_SELECT *select=tab->select;
|
|
key_map usable_keys;
|
|
QUICK_SELECT_I *save_quick= select ? select->quick : 0;
|
|
Item *orig_cond= 0;
|
|
bool orig_cond_saved= false;
|
|
int best_key= -1;
|
|
bool changed_key= false;
|
|
THD *thd= tab->join->thd;
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_array trace_arr(thd, "test_if_skip_sort_order");
|
|
DBUG_ENTER("test_if_skip_sort_order");
|
|
|
|
*fatal_error= false;
|
|
/* Check that we are always called with first non-const table */
|
|
DBUG_ASSERT(tab == tab->join->join_tab + tab->join->const_tables);
|
|
|
|
/* Sorting a single row can always be skipped */
|
|
if (tab->type == JT_EQ_REF ||
|
|
tab->type == JT_CONST ||
|
|
tab->type == JT_SYSTEM)
|
|
{
|
|
Json_writer_object trace_skip(thd);
|
|
trace_skip.add("skipped", "single row access method");
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
/*
|
|
Keys disabled by ALTER TABLE ... DISABLE KEYS should have already
|
|
been taken into account.
|
|
*/
|
|
usable_keys= *map;
|
|
|
|
// Step #1: Find indexes that produce the required ordering.
|
|
if (find_indexes_matching_order(tab->join, table, order, &usable_keys))
|
|
DBUG_RETURN(false); // Cannot skip sorting
|
|
|
|
/*
|
|
Step #2: Analyze the current access method. Note the used index as ref_key
|
|
and #used keyparts in ref_key_parts.
|
|
*/
|
|
ref_key= -1;
|
|
/* Test if constant range in WHERE */
|
|
if (tab->ref.key >= 0 && tab->ref.key_parts)
|
|
{
|
|
ref_key= tab->ref.key;
|
|
ref_key_parts= tab->ref.key_parts;
|
|
/*
|
|
todo: why does JT_REF_OR_NULL mean filesort? We could find another index
|
|
that satisfies the ordering. I would just set ref_key=MAX_KEY here...
|
|
*/
|
|
if (tab->type == JT_REF_OR_NULL || tab->type == JT_FT ||
|
|
tab->ref.uses_splitting)
|
|
goto use_filesort;
|
|
}
|
|
else if (select && select->quick) // Range found by opt_range
|
|
{
|
|
int quick_type= select->quick->get_type();
|
|
/*
|
|
assume results are not ordered when index merge is used
|
|
TODO: sergeyp: Results of all index merge selects actually are ordered
|
|
by clustered PK values.
|
|
*/
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT)
|
|
{
|
|
/*
|
|
we set ref_key=MAX_KEY instead of -1, because test_if_cheaper_ordering()
|
|
assumes that "ref_key==-1" means doing full index scan.
|
|
(This is not very straightforward and we got into this situation for
|
|
historical reasons. Should be fixed at some point).
|
|
*/
|
|
ref_key= MAX_KEY;
|
|
}
|
|
else
|
|
{
|
|
ref_key= select->quick->index;
|
|
ref_key_parts= select->quick->used_key_parts;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Step #3: Check if index ref_key that we're using produces the required
|
|
ordering or if there is another index new_ref_key such that
|
|
- ref_key is a prefix of new_ref_key (so, access method can be reused)
|
|
- new_ref_key produces the required ordering
|
|
*/
|
|
if (ref_key >= 0 && ref_key != MAX_KEY)
|
|
{
|
|
/* Current access method uses index ref_key with ref_key_parts parts */
|
|
if (!usable_keys.is_set(ref_key))
|
|
{
|
|
/* However, ref_key doesn't match the needed ordering */
|
|
uint new_ref_key;
|
|
|
|
/*
|
|
If using index only read, only consider other possible index only
|
|
keys
|
|
*/
|
|
if (table->covering_keys.is_set(ref_key))
|
|
usable_keys.intersect(table->covering_keys);
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
orig_cond= tab->set_cond(tab->pre_idx_push_select_cond);
|
|
orig_cond_saved= true;
|
|
}
|
|
|
|
if ((new_ref_key= test_if_subkey(order, table, ref_key, ref_key_parts,
|
|
&usable_keys)) < MAX_KEY)
|
|
{
|
|
/*
|
|
Index new_ref_key
|
|
- produces the required ordering,
|
|
- also has the same columns as ref_key for #ref_key_parts (this
|
|
means we will read the same number of rows as with ref_key).
|
|
*/
|
|
|
|
/*
|
|
If new_ref_key allows to construct a quick select which uses more key
|
|
parts than ref(new_ref_key) would, do that.
|
|
|
|
Otherwise, construct a ref access (todo: it's not clear what is the
|
|
win in using ref access when we could use quick select also?)
|
|
*/
|
|
if ((table->opt_range_keys.is_set(new_ref_key) &&
|
|
table->opt_range[new_ref_key].key_parts > ref_key_parts) ||
|
|
!(tab->ref.key >= 0))
|
|
{
|
|
/*
|
|
The range optimizer constructed QUICK_RANGE for ref_key, and
|
|
we want to use instead new_ref_key as the index. We can't
|
|
just change the index of the quick select, because this may
|
|
result in an inconsistent QUICK_SELECT object. Below we
|
|
create a new QUICK_SELECT from scratch so that all its
|
|
parameters are set correctly by the range optimizer.
|
|
*/
|
|
key_map new_ref_key_map;
|
|
COND *save_cond;
|
|
quick_select_return res;
|
|
new_ref_key_map.clear_all(); // Force the creation of quick select
|
|
new_ref_key_map.set_bit(new_ref_key); // only for new_ref_key.
|
|
|
|
/* Reset quick; This will be restored in 'use_filesort' if needed */
|
|
select->quick= 0;
|
|
save_cond= select->cond;
|
|
if (select->pre_idx_push_select_cond)
|
|
select->cond= select->pre_idx_push_select_cond;
|
|
res= select->test_quick_select(tab->join->thd, new_ref_key_map, 0,
|
|
(tab->join->select_options &
|
|
OPTION_FOUND_ROWS) ?
|
|
HA_POS_ERROR :
|
|
tab->join->unit->
|
|
lim.get_select_limit(),
|
|
TRUE, TRUE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL);
|
|
// if we cannot use quick select
|
|
if (res != SQL_SELECT::OK || !tab->select->quick)
|
|
{
|
|
if (res == SQL_SELECT::ERROR)
|
|
*fatal_error= true;
|
|
select->cond= save_cond;
|
|
goto use_filesort;
|
|
}
|
|
tab->type= JT_RANGE;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts= 0;
|
|
tab->use_quick= 1;
|
|
best_key= new_ref_key;
|
|
/*
|
|
We don't restore select->cond as we want to use the
|
|
original condition as index condition pushdown is not
|
|
active for the new index.
|
|
todo: why not perform index condition pushdown for the new index?
|
|
*/
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We'll use ref access method on key new_ref_key. In general case
|
|
the index search tuple for new_ref_key will be different (e.g.
|
|
when one index is defined as (part1, part2, ...) and another as
|
|
(part1, part2(N), ...) and the WHERE clause contains
|
|
"part1 = const1 AND part2=const2".
|
|
So we build tab->ref from scratch here.
|
|
*/
|
|
KEYUSE *keyuse= tab->keyuse;
|
|
while (keyuse->key != new_ref_key && keyuse->table == tab->table)
|
|
keyuse++;
|
|
if (create_ref_for_key(tab->join, tab, keyuse, FALSE,
|
|
(tab->join->const_table_map |
|
|
OUTER_REF_TABLE_BIT)))
|
|
goto use_filesort;
|
|
|
|
pick_table_access_method(tab);
|
|
}
|
|
|
|
ref_key= new_ref_key;
|
|
changed_key= true;
|
|
}
|
|
}
|
|
/* Check if we get the rows in requested sorted order by using the key */
|
|
if (usable_keys.is_set(ref_key) &&
|
|
(order_direction= test_if_order_by_key(tab->join, order,table,ref_key,
|
|
&used_key_parts)))
|
|
goto check_reverse_order;
|
|
}
|
|
|
|
/*
|
|
Step #4: Go through all indexes that produce required ordering (in
|
|
usable_keys) and check if any of them is cheaper than ref_key
|
|
*/
|
|
{
|
|
uint UNINIT_VAR(best_key_parts);
|
|
uint saved_best_key_parts= 0;
|
|
int best_key_direction= 0;
|
|
JOIN *join= tab->join;
|
|
ha_rows table_records= table->stat_records();
|
|
double new_read_time_dummy;
|
|
|
|
test_if_cheaper_ordering(FALSE, tab, order, table, usable_keys,
|
|
ref_key, select_limit,
|
|
&best_key, &best_key_direction,
|
|
&select_limit, &new_read_time_dummy,
|
|
&best_key_parts,
|
|
&saved_best_key_parts);
|
|
|
|
/*
|
|
filesort() and join cache are usually faster than reading in
|
|
index order and not using join cache, except in case that chosen
|
|
index is clustered key.
|
|
*/
|
|
if (best_key < 0 ||
|
|
((select_limit >= table_records) &&
|
|
((tab->type == JT_ALL || tab->type == JT_RANGE) &&
|
|
tab->join->table_count > tab->join->const_tables + 1) &&
|
|
!table->is_clustering_key(best_key)))
|
|
goto use_filesort;
|
|
|
|
if (select && table->opt_range_keys.is_set(best_key) && best_key != ref_key)
|
|
{
|
|
key_map tmp_map;
|
|
tmp_map.clear_all(); // Force the creation of quick select
|
|
tmp_map.set_bit(best_key); // only best_key.
|
|
select->quick= 0;
|
|
|
|
bool cond_saved= false;
|
|
Item *saved_cond;
|
|
|
|
/*
|
|
Index Condition Pushdown may have removed parts of the condition for
|
|
this table. Temporarily put them back because we want the whole
|
|
condition for the range analysis.
|
|
*/
|
|
if (select->pre_idx_push_select_cond)
|
|
{
|
|
saved_cond= select->cond;
|
|
select->cond= select->pre_idx_push_select_cond;
|
|
cond_saved= true;
|
|
}
|
|
|
|
quick_select_return res;
|
|
res = select->test_quick_select(join->thd, tmp_map, 0,
|
|
join->select_options & OPTION_FOUND_ROWS ?
|
|
HA_POS_ERROR :
|
|
join->unit->lim.get_select_limit(),
|
|
TRUE, FALSE, FALSE, FALSE,
|
|
Item_func::BITMAP_ALL);
|
|
if (res == SQL_SELECT::ERROR)
|
|
{
|
|
*fatal_error= true;
|
|
goto use_filesort;
|
|
}
|
|
|
|
if (cond_saved)
|
|
select->cond= saved_cond;
|
|
}
|
|
order_direction= best_key_direction;
|
|
/*
|
|
saved_best_key_parts is actual number of used keyparts found by
|
|
the test_if_order_by_key function. It could differ from
|
|
keyinfo->user_defined_key_parts, thus we have to restore it in
|
|
case of desc order as it affects QUICK_SELECT_DESC behaviour.
|
|
*/
|
|
used_key_parts= (order_direction == -1) ?
|
|
saved_best_key_parts : best_key_parts;
|
|
changed_key= true;
|
|
}
|
|
|
|
check_reverse_order:
|
|
DBUG_ASSERT(order_direction != 0);
|
|
|
|
if (order_direction == -1) // If ORDER BY ... DESC
|
|
{
|
|
int quick_type;
|
|
if (select && select->quick)
|
|
{
|
|
/*
|
|
Don't reverse the sort order, if it's already done.
|
|
(In some cases test_if_order_by_key() can be called multiple times
|
|
*/
|
|
if (select->quick->reverse_sorted())
|
|
goto skipped_filesort;
|
|
|
|
quick_type= select->quick->get_type();
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
|
{
|
|
tab->limit= 0;
|
|
goto use_filesort; // Use filesort
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Update query plan with access pattern for doing ordered access
|
|
according to what we have decided above.
|
|
*/
|
|
if (!no_changes) // We are allowed to update QEP
|
|
{
|
|
if (best_key >= 0)
|
|
{
|
|
bool quick_created=
|
|
(select && select->quick && select->quick!=save_quick);
|
|
|
|
if (!quick_created)
|
|
{
|
|
if (select) // Throw any existing quick select
|
|
select->quick= 0; // Cleanup either reset to save_quick,
|
|
// or 'delete save_quick'
|
|
tab->index= best_key;
|
|
tab->read_first_record= (order_direction > 0 ?
|
|
join_read_first:
|
|
join_read_last);
|
|
tab->type=JT_NEXT; // Read with index_first(), index_next()
|
|
|
|
/*
|
|
Currently usage of rowid filters is not supported in InnoDB
|
|
if the table is accessed by the primary key
|
|
*/
|
|
if (tab->rowid_filter &&
|
|
(table->file->is_clustering_key(tab->index) ||
|
|
table->covering_keys.is_set(best_key)))
|
|
tab->clear_range_rowid_filter();
|
|
|
|
if (tab->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->pre_idx_push_select_cond);
|
|
/*
|
|
orig_cond is a part of pre_idx_push_cond,
|
|
no need to restore it.
|
|
*/
|
|
orig_cond= 0;
|
|
orig_cond_saved= false;
|
|
}
|
|
|
|
table->file->ha_index_or_rnd_end();
|
|
if (tab->join->select_options & SELECT_DESCRIBE)
|
|
{
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts= 0;
|
|
if (select_limit < table->stat_records())
|
|
tab->limit= select_limit;
|
|
}
|
|
}
|
|
else if (tab->type != JT_ALL || tab->select->quick)
|
|
{
|
|
/*
|
|
We're about to use a quick access to the table.
|
|
We need to change the access method so as the quick access
|
|
method is actually used.
|
|
*/
|
|
DBUG_ASSERT(tab->select->quick);
|
|
tab->type= JT_RANGE;
|
|
tab->use_quick=1;
|
|
tab->ref.key= -1;
|
|
tab->ref.key_parts=0; // Don't use ref key.
|
|
if (tab->rowid_filter)
|
|
tab->clear_range_rowid_filter();
|
|
tab->read_first_record= join_init_read_record;
|
|
if (tab->is_using_loose_index_scan())
|
|
tab->join->tmp_table_param.precomputed_group_by= TRUE;
|
|
|
|
/*
|
|
Restore the original condition as changes done by pushdown
|
|
condition are not relevant anymore
|
|
*/
|
|
if (tab->select && tab->select->pre_idx_push_select_cond)
|
|
{
|
|
tab->set_cond(tab->select->pre_idx_push_select_cond);
|
|
tab->table->file->cancel_pushed_idx_cond();
|
|
}
|
|
/*
|
|
TODO: update the number of records in join->best_positions[tablenr]
|
|
*/
|
|
}
|
|
} // best_key >= 0
|
|
|
|
if (order_direction == -1) // If ORDER BY ... DESC
|
|
{
|
|
if (select && select->quick)
|
|
{
|
|
/* ORDER BY range_key DESC */
|
|
QUICK_SELECT_I *tmp= select->quick->make_reverse(used_key_parts);
|
|
if (!tmp)
|
|
{
|
|
tab->limit= 0;
|
|
goto use_filesort; // Reverse sort failed -> filesort
|
|
}
|
|
prepare_for_reverse_ordered_access(tab);
|
|
|
|
if (select->quick == save_quick)
|
|
save_quick= 0; // make_reverse() consumed it
|
|
select->set_quick(tmp);
|
|
}
|
|
else if (tab->type != JT_NEXT && tab->type != JT_REF_OR_NULL &&
|
|
tab->ref.key >= 0 && tab->ref.key_parts <= used_key_parts)
|
|
{
|
|
/*
|
|
SELECT * FROM t1 WHERE a=1 ORDER BY a DESC,b DESC
|
|
|
|
Use a traversal function that starts by reading the last row
|
|
with key part (A) and then traverse the index backwards.
|
|
*/
|
|
tab->read_first_record= join_read_last_key;
|
|
tab->read_record.read_record_func= join_read_prev_same;
|
|
prepare_for_reverse_ordered_access(tab);
|
|
}
|
|
}
|
|
else if (select && select->quick)
|
|
{
|
|
/* Cancel "Range checked for each record" */
|
|
if (tab->use_quick == 2)
|
|
{
|
|
tab->use_quick= 1;
|
|
tab->read_first_record= join_init_read_record;
|
|
}
|
|
select->quick->need_sorted_output();
|
|
}
|
|
|
|
if (tab->type == JT_EQ_REF)
|
|
tab->read_record.unlock_row= join_read_key_unlock_row;
|
|
else if (tab->type == JT_CONST)
|
|
tab->read_record.unlock_row= join_const_unlock_row;
|
|
else
|
|
tab->read_record.unlock_row= rr_unlock_row;
|
|
|
|
} // QEP has been modified
|
|
|
|
/*
|
|
Cleanup:
|
|
We may have both a 'select->quick' and 'save_quick' (original)
|
|
at this point. Delete the one that we wan't use.
|
|
*/
|
|
|
|
skipped_filesort:
|
|
// Keep current (ordered) select->quick
|
|
if (select && save_quick != select->quick)
|
|
{
|
|
delete save_quick;
|
|
save_quick= NULL;
|
|
}
|
|
if (orig_cond_saved && !changed_key)
|
|
tab->set_cond(orig_cond);
|
|
if (!no_changes && changed_key && table->file->pushed_idx_cond)
|
|
table->file->cancel_pushed_idx_cond();
|
|
|
|
DBUG_RETURN(1);
|
|
|
|
use_filesort:
|
|
// Restore original save_quick
|
|
if (select && select->quick != save_quick)
|
|
{
|
|
delete select->quick;
|
|
select->quick= save_quick;
|
|
}
|
|
if (orig_cond_saved)
|
|
tab->set_cond(orig_cond);
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
If not selecting by given key, create an index how records should be read
|
|
|
|
SYNOPSIS
|
|
create_sort_index()
|
|
thd Thread handler
|
|
join Join with table to sort
|
|
join_tab What table to sort
|
|
fsort Filesort object. NULL means "use tab->filesort".
|
|
|
|
IMPLEMENTATION
|
|
- If there is an index that can be used, the first non-const join_tab in
|
|
'join' is modified to use this index.
|
|
- If no index, create with filesort() an index file that can be used to
|
|
retrieve rows in order (should be done with 'read_record').
|
|
The sorted data is stored in tab->filesort
|
|
|
|
RETURN VALUES
|
|
0 ok
|
|
-1 Some fatal error
|
|
1 No records
|
|
*/
|
|
|
|
int
|
|
create_sort_index(THD *thd, JOIN *join, JOIN_TAB *tab, Filesort *fsort)
|
|
{
|
|
TABLE *table;
|
|
SQL_SELECT *select;
|
|
bool quick_created= FALSE;
|
|
SORT_INFO *file_sort= 0;
|
|
DBUG_ENTER("create_sort_index");
|
|
|
|
if (fsort == NULL)
|
|
fsort= tab->filesort;
|
|
|
|
table= tab->table;
|
|
select= fsort->select;
|
|
|
|
table->status=0; // May be wrong if quick_select
|
|
|
|
if (!tab->preread_init_done && tab->preread_init())
|
|
goto err;
|
|
|
|
// If table has a range, move it to select
|
|
if (select && tab->ref.key >= 0)
|
|
{
|
|
if (!select->quick)
|
|
{
|
|
if (tab->quick)
|
|
{
|
|
select->quick= tab->quick;
|
|
tab->quick= NULL;
|
|
/*
|
|
We can only use 'Only index' if quick key is same as ref_key
|
|
and in index_merge 'Only index' cannot be used
|
|
*/
|
|
if (((uint) tab->ref.key != select->quick->index))
|
|
table->file->ha_end_keyread();
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We have a ref on a const; Change this to a range that filesort
|
|
can use.
|
|
For impossible ranges (like when doing a lookup on NULL on a NOT NULL
|
|
field, quick will contain an empty record set.
|
|
*/
|
|
if (!(select->quick= (tab->type == JT_FT ?
|
|
get_ft_select(thd, table, tab->ref.key) :
|
|
get_quick_select_for_ref(thd, table, &tab->ref,
|
|
tab->found_records))))
|
|
goto err;
|
|
quick_created= TRUE;
|
|
}
|
|
fsort->own_select= true;
|
|
}
|
|
else
|
|
{
|
|
fsort->own_select= false;
|
|
DBUG_ASSERT(tab->type == JT_REF || tab->type == JT_EQ_REF);
|
|
// Update ref value
|
|
if (unlikely(cp_buffer_from_ref(thd, table, &tab->ref) &&
|
|
thd->is_error()))
|
|
goto err; // out of memory
|
|
}
|
|
}
|
|
|
|
|
|
/* Fill schema tables with data before filesort if it's necessary */
|
|
if ((join->select_lex->options & OPTION_SCHEMA_TABLE) &&
|
|
unlikely(get_schema_tables_result(join, PROCESSED_BY_CREATE_SORT_INDEX)))
|
|
goto err;
|
|
|
|
if (table->s->tmp_table)
|
|
table->file->info(HA_STATUS_VARIABLE); // Get record count
|
|
fsort->accepted_rows= &join->accepted_rows; // For ROWNUM
|
|
file_sort= filesort(thd, table, fsort, fsort->tracker, join, tab->table->map);
|
|
DBUG_ASSERT(tab->filesort_result == 0);
|
|
tab->filesort_result= file_sort;
|
|
tab->records= 0;
|
|
if (file_sort)
|
|
{
|
|
tab->records= join->select_options & OPTION_FOUND_ROWS ?
|
|
file_sort->found_rows : file_sort->return_rows;
|
|
}
|
|
|
|
if (quick_created)
|
|
{
|
|
/* This will delete the quick select. */
|
|
select->cleanup();
|
|
}
|
|
|
|
table->file->ha_end_keyread();
|
|
if (tab->type == JT_FT)
|
|
table->file->ha_ft_end();
|
|
else
|
|
table->file->ha_index_or_rnd_end();
|
|
|
|
DBUG_RETURN(file_sort == 0);
|
|
err:
|
|
DBUG_RETURN(-1);
|
|
}
|
|
|
|
|
|
/**
|
|
Compare fields from table->record[0] and table->record[1],
|
|
possibly skipping few first fields.
|
|
|
|
@param table
|
|
@param ptr field to start the comparison from,
|
|
somewhere in the table->field[] array
|
|
|
|
@retval 1 different
|
|
@retval 0 identical
|
|
*/
|
|
static bool compare_record(TABLE *table, Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
Field *f= *ptr;
|
|
if (f->is_null() != f->is_null(table->s->rec_buff_length) ||
|
|
(!f->is_null() && f->cmp_offset(table->s->rec_buff_length)))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static bool copy_blobs(Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
if ((*ptr)->flags & BLOB_FLAG)
|
|
if (((Field_blob *) (*ptr))->copy())
|
|
return 1; // Error
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void free_blobs(Field **ptr)
|
|
{
|
|
for (; *ptr ; ptr++)
|
|
{
|
|
if ((*ptr)->flags & BLOB_FLAG)
|
|
((Field_blob *) (*ptr))->free();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Remove duplicates from a temporary table.
|
|
|
|
@detail
|
|
Remove duplicate rows from a temporary table. This is used for e.g. queries
|
|
like
|
|
|
|
select distinct count(*) as CNT from tbl group by col
|
|
|
|
Here, we get a group table with count(*) values. It is not possible to
|
|
prevent duplicates from appearing in the table (as we don't know the values
|
|
before we've done the grouping). Because of that, we have this function to
|
|
scan the temptable (maybe, multiple times) and remove the duplicate rows
|
|
|
|
Rows that do not satisfy 'having' condition are also removed.
|
|
*/
|
|
|
|
bool
|
|
JOIN_TAB::remove_duplicates()
|
|
|
|
{
|
|
bool error;
|
|
ulong keylength= 0, sort_field_keylength= 0;
|
|
uint field_count, item_count;
|
|
List<Item> *fields= (this-1)->fields;
|
|
Item *item;
|
|
THD *thd= join->thd;
|
|
SORT_FIELD *sortorder, *sorder;
|
|
DBUG_ENTER("remove_duplicates");
|
|
|
|
DBUG_ASSERT(join->aggr_tables > 0 && table->s->tmp_table != NO_TMP_TABLE);
|
|
THD_STAGE_INFO(join->thd, stage_removing_duplicates);
|
|
|
|
if (!(sortorder= (SORT_FIELD*) my_malloc(PSI_INSTRUMENT_ME,
|
|
(fields->elements+1) *
|
|
sizeof(SORT_FIELD),
|
|
MYF(MY_WME | MY_ZEROFILL))))
|
|
DBUG_RETURN(TRUE);
|
|
|
|
/* Calculate how many saved fields there is in list */
|
|
field_count= item_count= 0;
|
|
|
|
List_iterator<Item> it(*fields);
|
|
for (sorder= sortorder ; (item=it++) ;)
|
|
{
|
|
if (!item->const_item())
|
|
{
|
|
if (item->get_tmp_table_field())
|
|
{
|
|
/* Field is stored in temporary table, skipp */
|
|
field_count++;
|
|
}
|
|
else
|
|
{
|
|
/* Item is not stored in temporary table, remember it */
|
|
sorder->item= item;
|
|
sorder->type= sorder->item->type_handler()->is_packable() ?
|
|
SORT_FIELD_ATTR::VARIABLE_SIZE :
|
|
SORT_FIELD_ATTR::FIXED_SIZE;
|
|
/* Calculate sorder->length */
|
|
item->type_handler()->sort_length(thd, item, sorder);
|
|
sorder++;
|
|
item_count++;
|
|
}
|
|
}
|
|
}
|
|
sorder->item= 0; // End marker
|
|
|
|
if ((field_count + item_count == 0) && ! having &&
|
|
!(join->select_options & OPTION_FOUND_ROWS))
|
|
{
|
|
// only const items with no OPTION_FOUND_ROWS
|
|
join->unit->lim.send_first_row(); // Only send first row
|
|
my_free(sortorder);
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
/*
|
|
The table contains first fields that will be in the output, then
|
|
temporary results pointed to by the fields list.
|
|
Example: SELECT DISTINCT sum(a), sum(d) > 2 FROM ...
|
|
In this case the temporary table contains sum(a), sum(d).
|
|
*/
|
|
|
|
Field **first_field=table->field+table->s->fields - field_count;
|
|
for (Field **ptr=first_field; *ptr; ptr++)
|
|
keylength+= (*ptr)->sort_length() + (*ptr)->maybe_null();
|
|
for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++)
|
|
sort_field_keylength+= ptr->length + (ptr->item->maybe_null() ? 1 : 0);
|
|
|
|
/*
|
|
Disable LIMIT ROWS EXAMINED in order to avoid interrupting prematurely
|
|
duplicate removal, and produce a possibly incomplete query result.
|
|
*/
|
|
thd->lex->limit_rows_examined_cnt= ULONGLONG_MAX;
|
|
if (thd->killed == ABORT_QUERY)
|
|
thd->reset_killed();
|
|
|
|
table->file->info(HA_STATUS_VARIABLE);
|
|
table->reginfo.lock_type=TL_WRITE;
|
|
|
|
if (table->s->db_type() == heap_hton ||
|
|
(!table->s->blob_fields &&
|
|
((ALIGN_SIZE(keylength) + HASH_OVERHEAD) * table->file->stats.records <
|
|
thd->variables.sortbuff_size)))
|
|
error= remove_dup_with_hash_index(join->thd, table, field_count,
|
|
first_field, sortorder,
|
|
keylength + sort_field_keylength, having);
|
|
else
|
|
error=remove_dup_with_compare(join->thd, table, first_field, sortorder,
|
|
sort_field_keylength, having);
|
|
|
|
if (join->select_lex != join->select_lex->master_unit()->fake_select_lex)
|
|
thd->lex->set_limit_rows_examined();
|
|
free_blobs(first_field);
|
|
my_free(sortorder);
|
|
DBUG_RETURN(error);
|
|
}
|
|
|
|
|
|
/*
|
|
Create a sort/compare key from items
|
|
|
|
Key is of fixed length and binary comparable
|
|
*/
|
|
|
|
static uchar *make_sort_key(SORT_FIELD *sortorder, uchar *key_buffer,
|
|
String *tmp_value)
|
|
{
|
|
for (SORT_FIELD *ptr= sortorder ; ptr->item ; ptr++)
|
|
{
|
|
ptr->item->type_handler()->make_sort_key_part(key_buffer,
|
|
ptr->item,
|
|
ptr, tmp_value);
|
|
key_buffer+= (ptr->item->maybe_null() ? 1 : 0) + ptr->length;
|
|
}
|
|
return key_buffer;
|
|
}
|
|
|
|
|
|
/*
|
|
Remove duplicates by comparing all rows with all other rows
|
|
|
|
@param thd THD
|
|
@param table Temporary table
|
|
@param first_field Pointer to fields in temporary table that are part of
|
|
distinct, ends with null pointer
|
|
@param sortorder An array of Items part of distsinct. Terminated with an
|
|
element N with sortorder[N]->item=NULL.
|
|
@param keylength Length of key produced by sortorder
|
|
@param having Having expression (NULL if no having)
|
|
*/
|
|
|
|
static int remove_dup_with_compare(THD *thd, TABLE *table, Field **first_field,
|
|
SORT_FIELD *sortorder, ulong keylength,
|
|
Item *having)
|
|
{
|
|
handler *file=table->file;
|
|
uchar *record=table->record[0], *key_buffer, *key_buffer2;
|
|
char *tmp_buffer;
|
|
int error;
|
|
String tmp_value;
|
|
DBUG_ENTER("remove_dup_with_compare");
|
|
|
|
if (unlikely(!my_multi_malloc(PSI_INSTRUMENT_ME,
|
|
MYF(MY_WME),
|
|
&key_buffer, keylength,
|
|
&key_buffer2, keylength,
|
|
&tmp_buffer, keylength+1,
|
|
NullS)))
|
|
DBUG_RETURN(1);
|
|
tmp_value.set(tmp_buffer, keylength, &my_charset_bin);
|
|
|
|
if (unlikely(file->ha_rnd_init_with_error(1)))
|
|
DBUG_RETURN(1);
|
|
|
|
error= file->ha_rnd_next(record);
|
|
for (;;)
|
|
{
|
|
if (unlikely(thd->check_killed()))
|
|
{
|
|
error= 1;
|
|
goto end;
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
if (having && !having->val_int())
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
error= file->ha_rnd_next(record);
|
|
continue;
|
|
}
|
|
if (unlikely(copy_blobs(first_field)))
|
|
{
|
|
my_message(ER_OUTOFMEMORY, ER_THD(thd,ER_OUTOFMEMORY),
|
|
MYF(ME_FATAL));
|
|
error= 1;
|
|
goto end;
|
|
}
|
|
make_sort_key(sortorder, key_buffer, &tmp_value);
|
|
store_record(table,record[1]);
|
|
|
|
/* Read through rest of file and mark duplicated rows deleted */
|
|
bool found=0;
|
|
for (;;)
|
|
{
|
|
if (unlikely((error= file->ha_rnd_next(record))))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
make_sort_key(sortorder, key_buffer2, &tmp_value);
|
|
if (compare_record(table, first_field) == 0 &&
|
|
(!keylength ||
|
|
memcmp(key_buffer, key_buffer2, keylength) == 0))
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
}
|
|
else if (!found)
|
|
{
|
|
found=1;
|
|
if (unlikely((error= file->remember_rnd_pos())))
|
|
goto err;
|
|
}
|
|
}
|
|
if (!found)
|
|
break; // End of file
|
|
/* Restart search on saved row */
|
|
if (unlikely((error= file->restart_rnd_next(record))))
|
|
goto err;
|
|
}
|
|
|
|
error= 0;
|
|
end:
|
|
my_free(key_buffer);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
DBUG_RETURN(error);
|
|
|
|
err:
|
|
DBUG_ASSERT(error);
|
|
file->print_error(error,MYF(0));
|
|
goto end;
|
|
}
|
|
|
|
|
|
/**
|
|
Generate a hash index for each row to quickly find duplicate rows.
|
|
|
|
@param thd THD
|
|
@param table Temporary table
|
|
@param field_count Number of fields part of distinct
|
|
@param first_field Pointer to fields in temporary table that are part of
|
|
distinct, ends with null pointer
|
|
@param sortorder An array of Items part of distsinct. Terminated with an
|
|
element N with sortorder[N]->item=NULL.
|
|
@param keylength Length of hash key
|
|
@param having Having expression (NULL if no having)
|
|
|
|
@note
|
|
Note that this will not work on tables with blobs!
|
|
*/
|
|
|
|
static int remove_dup_with_hash_index(THD *thd, TABLE *table,
|
|
uint field_count,
|
|
Field **first_field,
|
|
SORT_FIELD *sortorder,
|
|
ulong key_length,
|
|
Item *having)
|
|
{
|
|
uchar *key_buffer, *key_pos, *record=table->record[0];
|
|
char *tmp_buffer;
|
|
int error;
|
|
handler *file= table->file;
|
|
ulong extra_length= ALIGN_SIZE(key_length)-key_length;
|
|
uint *field_lengths, *field_length;
|
|
HASH hash;
|
|
String tmp_value;
|
|
DBUG_ENTER("remove_dup_with_hash_index");
|
|
|
|
if (!my_multi_malloc(key_memory_hash_index_key_buffer, MYF(MY_WME),
|
|
&key_buffer,
|
|
(uint) ((key_length + extra_length) *
|
|
(long) file->stats.records),
|
|
&field_lengths,
|
|
(uint) (field_count*sizeof(*field_lengths)),
|
|
&tmp_buffer, key_length+1,
|
|
NullS))
|
|
DBUG_RETURN(1);
|
|
|
|
tmp_value.set(tmp_buffer, key_length, &my_charset_bin);
|
|
field_length= field_lengths;
|
|
for (Field **ptr= first_field ; *ptr ; ptr++)
|
|
(*field_length++)= (*ptr)->sort_length();
|
|
|
|
if (my_hash_init(key_memory_hash_index_key_buffer, &hash, &my_charset_bin,
|
|
(uint) file->stats.records, 0, key_length,
|
|
(my_hash_get_key) 0, 0, 0))
|
|
{
|
|
my_free(key_buffer);
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
if (unlikely((error= file->ha_rnd_init(1))))
|
|
goto err;
|
|
|
|
key_pos= key_buffer;
|
|
for (;;)
|
|
{
|
|
uchar *org_key_pos;
|
|
if (unlikely(thd->check_killed()))
|
|
{
|
|
error=0;
|
|
goto err;
|
|
}
|
|
if (unlikely((error= file->ha_rnd_next(record))))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
if (having && !having->val_int())
|
|
{
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
continue;
|
|
}
|
|
|
|
/* copy fields to key buffer */
|
|
org_key_pos= key_pos;
|
|
field_length=field_lengths;
|
|
for (Field **ptr= first_field ; *ptr ; ptr++)
|
|
{
|
|
(*ptr)->make_sort_key_part(key_pos, *field_length);
|
|
key_pos+= (*ptr)->maybe_null() + *field_length++;
|
|
}
|
|
/* Copy result fields not stored in table to key buffer */
|
|
key_pos= make_sort_key(sortorder, key_pos, &tmp_value);
|
|
|
|
/* Check if it exists before */
|
|
if (my_hash_search(&hash, org_key_pos, key_length))
|
|
{
|
|
/* Duplicated found ; Remove the row */
|
|
if (unlikely((error= file->ha_delete_row(record))))
|
|
goto err;
|
|
}
|
|
else
|
|
{
|
|
if (my_hash_insert(&hash, org_key_pos))
|
|
goto err;
|
|
}
|
|
key_pos+=extra_length;
|
|
}
|
|
my_free(key_buffer);
|
|
my_hash_free(&hash);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
my_free(key_buffer);
|
|
my_hash_free(&hash);
|
|
file->extra(HA_EXTRA_NO_CACHE);
|
|
(void) file->ha_rnd_end();
|
|
if (unlikely(error))
|
|
file->print_error(error,MYF(0));
|
|
DBUG_RETURN(1);
|
|
}
|
|
|
|
|
|
/*
|
|
eq_ref: Create the lookup key and check if it is the same as saved key
|
|
|
|
SYNOPSIS
|
|
cmp_buffer_with_ref()
|
|
tab Join tab of the accessed table
|
|
table The table to read. This is usually tab->table, except for
|
|
semi-join when we might need to make a lookup in a temptable
|
|
instead.
|
|
tab_ref The structure with methods to collect index lookup tuple.
|
|
This is usually table->ref, except for the case of when we're
|
|
doing lookup into semi-join materialization table.
|
|
|
|
DESCRIPTION
|
|
Used by eq_ref access method: create the index lookup key and check if
|
|
we've used this key at previous lookup (If yes, we don't need to repeat
|
|
the lookup - the record has been already fetched)
|
|
|
|
RETURN
|
|
TRUE No cached record for the key, or failed to create the key (due to
|
|
out-of-domain error)
|
|
FALSE The created key is the same as the previous one (and the record
|
|
is already in table->record)
|
|
*/
|
|
|
|
static bool
|
|
cmp_buffer_with_ref(THD *thd, TABLE *table, TABLE_REF *tab_ref)
|
|
{
|
|
bool no_prev_key;
|
|
if (!tab_ref->disable_cache)
|
|
{
|
|
if (!(no_prev_key= tab_ref->key_err))
|
|
{
|
|
/* Previous access found a row. Copy its key */
|
|
memcpy(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length);
|
|
}
|
|
}
|
|
else
|
|
no_prev_key= TRUE;
|
|
if ((tab_ref->key_err= cp_buffer_from_ref(thd, table, tab_ref)) ||
|
|
no_prev_key)
|
|
return 1;
|
|
return memcmp(tab_ref->key_buff2, tab_ref->key_buff, tab_ref->key_length)
|
|
!= 0;
|
|
}
|
|
|
|
|
|
bool
|
|
cp_buffer_from_ref(THD *thd, TABLE *table, TABLE_REF *ref)
|
|
{
|
|
enum_check_fields org_count_cuted_fields= thd->count_cuted_fields;
|
|
MY_BITMAP *old_map= dbug_tmp_use_all_columns(table, &table->write_set);
|
|
bool result= 0;
|
|
key_part_map map= 1;
|
|
|
|
thd->count_cuted_fields= CHECK_FIELD_IGNORE;
|
|
for (store_key **copy=ref->key_copy ; *copy ; copy++, map <<= 1)
|
|
{
|
|
while (map & ref->const_ref_part_map) // skip const ref parts
|
|
map <<= 1; // no store_key objects for them
|
|
if ((*copy)->copy(thd) & 1 ||
|
|
((ref->null_rejecting & map) && (*copy)->null_key))
|
|
{
|
|
result= 1;
|
|
break;
|
|
}
|
|
}
|
|
thd->count_cuted_fields= org_count_cuted_fields;
|
|
dbug_tmp_restore_column_map(&table->write_set, old_map);
|
|
return result;
|
|
}
|
|
|
|
|
|
/*****************************************************************************
|
|
Group and order functions
|
|
*****************************************************************************/
|
|
|
|
/**
|
|
Resolve an ORDER BY or GROUP BY column reference.
|
|
|
|
Given a column reference (represented by 'order') from a GROUP BY or ORDER
|
|
BY clause, find the actual column it represents. If the column being
|
|
resolved is from the GROUP BY clause, the procedure searches the SELECT
|
|
list 'fields' and the columns in the FROM list 'tables'. If 'order' is from
|
|
the ORDER BY clause, only the SELECT list is being searched.
|
|
|
|
If 'order' is resolved to an Item, then order->item is set to the found
|
|
Item. If there is no item for the found column (that is, it was resolved
|
|
into a table field), order->item is 'fixed' and is added to all_fields and
|
|
ref_pointer_array.
|
|
|
|
ref_pointer_array and all_fields are updated.
|
|
|
|
@param[in] thd Pointer to current thread structure
|
|
@param[in,out] ref_pointer_array All select, group and order by fields
|
|
@param[in] tables List of tables to search in (usually
|
|
FROM clause)
|
|
@param[in] order Column reference to be resolved
|
|
@param[in] fields List of fields to search in (usually
|
|
SELECT list)
|
|
@param[in,out] all_fields All select, group and order by fields
|
|
@param[in] is_group_field True if order is a GROUP field, false if
|
|
ORDER by field
|
|
@param[in] add_to_all_fields If the item is to be added to all_fields and
|
|
ref_pointer_array, this flag can be set to
|
|
false to stop the automatic insertion.
|
|
@param[in] from_window_spec If true then order is from a window spec
|
|
|
|
@retval
|
|
FALSE if OK
|
|
@retval
|
|
TRUE if error occurred
|
|
*/
|
|
|
|
static bool
|
|
find_order_in_list(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
TABLE_LIST *tables,
|
|
ORDER *order, List<Item> &fields, List<Item> &all_fields,
|
|
bool is_group_field, bool add_to_all_fields,
|
|
bool from_window_spec)
|
|
{
|
|
Item *order_item= *order->item; /* The item from the GROUP/ORDER caluse. */
|
|
Item::Type order_item_type;
|
|
Item **select_item; /* The corresponding item from the SELECT clause. */
|
|
Field *from_field; /* The corresponding field from the FROM clause. */
|
|
uint counter;
|
|
enum_resolution_type resolution;
|
|
|
|
if (order_item->is_order_clause_position() && !from_window_spec)
|
|
{ /* Order by position */
|
|
uint count;
|
|
if (order->counter_used)
|
|
count= order->counter; // counter was once resolved
|
|
else
|
|
count= (uint) order_item->val_int();
|
|
if (!count || count > fields.elements)
|
|
{
|
|
my_error(ER_BAD_FIELD_ERROR, MYF(0),
|
|
order_item->full_name(), thd_where(thd));
|
|
return TRUE;
|
|
}
|
|
thd->change_item_tree((Item **)&order->item, (Item *)&ref_pointer_array[count - 1]);
|
|
order->in_field_list= 1;
|
|
order->counter= count;
|
|
order->counter_used= 1;
|
|
return FALSE;
|
|
}
|
|
/* Lookup the current GROUP/ORDER field in the SELECT clause. */
|
|
select_item= find_item_in_list(order_item, fields, &counter,
|
|
REPORT_EXCEPT_NOT_FOUND, &resolution);
|
|
if (!select_item)
|
|
return TRUE; /* The item is not unique, or some other error occurred. */
|
|
|
|
|
|
/* Check whether the resolved field is not ambiguos. */
|
|
if (select_item != not_found_item)
|
|
{
|
|
Item *view_ref= NULL;
|
|
/*
|
|
If we have found field not by its alias in select list but by its
|
|
original field name, we should additionally check if we have conflict
|
|
for this name (in case if we would perform lookup in all tables).
|
|
*/
|
|
if (resolution == RESOLVED_BEHIND_ALIAS &&
|
|
order_item->fix_fields_if_needed_for_order_by(thd, order->item))
|
|
return TRUE;
|
|
|
|
/* Lookup the current GROUP field in the FROM clause. */
|
|
order_item_type= order_item->type();
|
|
from_field= (Field*) not_found_field;
|
|
if ((is_group_field && order_item_type == Item::FIELD_ITEM) ||
|
|
order_item_type == Item::REF_ITEM)
|
|
{
|
|
from_field= find_field_in_tables(thd, (Item_ident*) order_item, tables,
|
|
NULL, ignored_tables_list_t(NULL),
|
|
&view_ref, IGNORE_ERRORS, FALSE, FALSE);
|
|
if (!from_field)
|
|
from_field= (Field*) not_found_field;
|
|
}
|
|
|
|
if (from_field == not_found_field ||
|
|
(from_field != view_ref_found ?
|
|
/* it is field of base table => check that fields are same */
|
|
((*select_item)->type() == Item::FIELD_ITEM &&
|
|
((Item_field*) (*select_item))->field->eq(from_field)) :
|
|
/*
|
|
in is field of view table => check that references on translation
|
|
table are same
|
|
*/
|
|
((*select_item)->type() == Item::REF_ITEM &&
|
|
view_ref->type() == Item::REF_ITEM &&
|
|
((Item_ref *) (*select_item))->ref ==
|
|
((Item_ref *) view_ref)->ref)))
|
|
{
|
|
/*
|
|
If there is no such field in the FROM clause, or it is the same field
|
|
as the one found in the SELECT clause, then use the Item created for
|
|
the SELECT field. As a result if there was a derived field that
|
|
'shadowed' a table field with the same name, the table field will be
|
|
chosen over the derived field.
|
|
*/
|
|
order->item= &ref_pointer_array[counter];
|
|
order->in_field_list=1;
|
|
return FALSE;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
There is a field with the same name in the FROM clause. This
|
|
is the field that will be chosen. In this case we issue a
|
|
warning so the user knows that the field from the FROM clause
|
|
overshadows the column reference from the SELECT list.
|
|
*/
|
|
push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
|
|
ER_NON_UNIQ_ERROR,
|
|
ER_THD(thd, ER_NON_UNIQ_ERROR),
|
|
((Item_ident*) order_item)->field_name.str,
|
|
thd_where(thd));
|
|
}
|
|
}
|
|
else if (from_window_spec)
|
|
{
|
|
Item **found_item= find_item_in_list(order_item, all_fields, &counter,
|
|
REPORT_EXCEPT_NOT_FOUND, &resolution,
|
|
all_fields.elements - fields.elements);
|
|
if (found_item != not_found_item)
|
|
{
|
|
order->item= &ref_pointer_array[all_fields.elements-1-counter];
|
|
order->in_field_list= 0;
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
order->in_field_list=0;
|
|
/*
|
|
The call to order_item->fix_fields() means that here we resolve
|
|
'order_item' to a column from a table in the list 'tables', or to
|
|
a column in some outer query. Exactly because of the second case
|
|
we come to this point even if (select_item == not_found_item),
|
|
inspite of that fix_fields() calls find_item_in_list() one more
|
|
time.
|
|
|
|
We check order_item->fixed() because Item_func_group_concat can put
|
|
arguments for which fix_fields already was called.
|
|
*/
|
|
if (order_item->fix_fields_if_needed_for_order_by(thd, order->item) ||
|
|
thd->is_error())
|
|
return TRUE; /* Wrong field. */
|
|
order_item= *order->item; // Item can change during fix_fields()
|
|
|
|
if (!add_to_all_fields)
|
|
return FALSE;
|
|
|
|
uint el= all_fields.elements;
|
|
/* Add new field to field list. */
|
|
all_fields.push_front(order_item, thd->mem_root);
|
|
ref_pointer_array[el]= order_item;
|
|
/*
|
|
If the order_item is a SUM_FUNC_ITEM, when fix_fields is called
|
|
ref_by is set to order->item which is the address of order_item.
|
|
But this needs to be address of order_item in the all_fields list.
|
|
As a result, when it gets replaced with Item_aggregate_ref
|
|
object in Item::split_sum_func2, we will be able to retrieve the
|
|
newly created object.
|
|
*/
|
|
if (order_item->type() == Item::SUM_FUNC_ITEM)
|
|
((Item_sum *)order_item)->ref_by= all_fields.head_ref();
|
|
|
|
order->item= &ref_pointer_array[el];
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Change order to point at item in select list.
|
|
|
|
If item isn't a number and doesn't exits in the select list, add it the
|
|
the field list.
|
|
*/
|
|
|
|
int setup_order(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
|
|
List<Item> &fields, List<Item> &all_fields, ORDER *order,
|
|
bool from_window_spec)
|
|
{
|
|
SELECT_LEX *select = thd->lex->current_select;
|
|
enum_parsing_place context_analysis_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
thd->where= THD_WHERE::ORDER_CLAUSE;
|
|
const bool for_union= select->master_unit()->is_unit_op() &&
|
|
select == select->master_unit()->fake_select_lex;
|
|
for (uint number = 1; order; order=order->next, number++)
|
|
{
|
|
if (find_order_in_list(thd, ref_pointer_array, tables, order, fields,
|
|
all_fields, false, true, from_window_spec))
|
|
return 1;
|
|
Item * const item= *order->item;
|
|
if (item->with_window_func() && context_analysis_place != IN_ORDER_BY)
|
|
{
|
|
my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
UNION queries cannot be used with an aggregate function in
|
|
an ORDER BY clause
|
|
*/
|
|
|
|
if (for_union && (item->with_sum_func() || item->with_window_func()))
|
|
{
|
|
my_error(ER_AGGREGATE_ORDER_FOR_UNION, MYF(0), number);
|
|
return 1;
|
|
}
|
|
|
|
if ((from_window_spec && item->with_sum_func() &&
|
|
item->type() != Item::SUM_FUNC_ITEM) || item->with_window_func())
|
|
{
|
|
item->split_sum_func(thd, ref_pointer_array,
|
|
all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Intitialize the GROUP BY list.
|
|
|
|
@param thd Thread handler
|
|
@param ref_pointer_array We store references to all fields that was
|
|
not in 'fields' here.
|
|
@param fields All fields in the select part. Any item in
|
|
'order' that is part of these list is replaced
|
|
by a pointer to this fields.
|
|
@param all_fields Total list of all unique fields used by the
|
|
select. All items in 'order' that was not part
|
|
of fields will be added first to this list.
|
|
@param order The fields we should do GROUP/PARTITION BY on
|
|
@param hidden_group_fields Pointer to flag that is set to 1 if we added
|
|
any fields to all_fields.
|
|
@param from_window_spec If true then list is from a window spec
|
|
|
|
@todo
|
|
change ER_WRONG_FIELD_WITH_GROUP to more detailed
|
|
ER_NON_GROUPING_FIELD_USED
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error (probably out of memory)
|
|
*/
|
|
|
|
int
|
|
setup_group(THD *thd, Ref_ptr_array ref_pointer_array, TABLE_LIST *tables,
|
|
List<Item> &fields, List<Item> &all_fields, ORDER *order,
|
|
bool *hidden_group_fields, bool from_window_spec)
|
|
{
|
|
enum_parsing_place context_analysis_place=
|
|
thd->lex->current_select->context_analysis_place;
|
|
*hidden_group_fields=0;
|
|
ORDER *ord;
|
|
|
|
if (!order)
|
|
return 0; /* Everything is ok */
|
|
|
|
uint org_fields=all_fields.elements;
|
|
|
|
thd->where= THD_WHERE::GROUP_STATEMENT;
|
|
for (ord= order; ord; ord= ord->next)
|
|
{
|
|
if (find_order_in_list(thd, ref_pointer_array, tables, ord, fields,
|
|
all_fields, true, true, from_window_spec))
|
|
return 1;
|
|
(*ord->item)->marker= MARKER_UNDEF_POS; /* Mark found */
|
|
if ((*ord->item)->with_sum_func() && context_analysis_place == IN_GROUP_BY)
|
|
{
|
|
my_error(ER_WRONG_GROUP_FIELD, MYF(0), (*ord->item)->full_name());
|
|
return 1;
|
|
}
|
|
if ((*ord->item)->with_window_func())
|
|
{
|
|
if (context_analysis_place == IN_GROUP_BY)
|
|
my_error(ER_WRONG_PLACEMENT_OF_WINDOW_FUNCTION, MYF(0));
|
|
else
|
|
my_error(ER_WINDOW_FUNCTION_IN_WINDOW_SPEC, MYF(0));
|
|
return 1;
|
|
}
|
|
if (from_window_spec && (*ord->item)->with_sum_func() &&
|
|
(*ord->item)->type() != Item::SUM_FUNC_ITEM)
|
|
(*ord->item)->split_sum_func(thd, ref_pointer_array,
|
|
all_fields, SPLIT_SUM_SELECT);
|
|
}
|
|
if (thd->variables.sql_mode & MODE_ONLY_FULL_GROUP_BY &&
|
|
context_analysis_place == IN_GROUP_BY)
|
|
{
|
|
/*
|
|
Don't allow one to use fields that is not used in GROUP BY
|
|
For each select a list of field references that aren't under an
|
|
aggregate function is created. Each field in this list keeps the
|
|
position of the select list expression which it belongs to.
|
|
|
|
First we check an expression from the select list against the GROUP BY
|
|
list. If it's found there then it's ok. It's also ok if this expression
|
|
is a constant or an aggregate function. Otherwise we scan the list
|
|
of non-aggregated fields and if we'll find at least one field reference
|
|
that belongs to this expression and doesn't occur in the GROUP BY list
|
|
we throw an error. If there are no fields in the created list for a
|
|
select list expression this means that all fields in it are used under
|
|
aggregate functions.
|
|
|
|
Note that for items in the select list (fields), Item_field->markers
|
|
contains the position of the field in the select list.
|
|
*/
|
|
Item *item;
|
|
Item_field *field;
|
|
int cur_pos_in_select_list= 0;
|
|
List_iterator<Item> li(fields);
|
|
List_iterator<Item_field> naf_it(thd->lex->current_select->join->non_agg_fields);
|
|
|
|
field= naf_it++;
|
|
while (field && (item=li++))
|
|
{
|
|
if (item->type() != Item::SUM_FUNC_ITEM &&
|
|
item->marker != MARKER_UNDEF_POS &&
|
|
!item->const_item() &&
|
|
!(item->real_item()->type() == Item::FIELD_ITEM &&
|
|
item->used_tables() & OUTER_REF_TABLE_BIT))
|
|
{
|
|
while (field)
|
|
{
|
|
/* Skip fields from previous expressions. */
|
|
if (field->marker < cur_pos_in_select_list)
|
|
goto next_field;
|
|
/* Found a field from the next expression. */
|
|
if (field->marker > cur_pos_in_select_list)
|
|
break;
|
|
/*
|
|
Check whether the field occur in the GROUP BY list.
|
|
Throw the error later if the field isn't found.
|
|
*/
|
|
for (ord= order; ord; ord= ord->next)
|
|
if ((*ord->item)->eq((Item*)field, 0))
|
|
goto next_field;
|
|
/*
|
|
TODO: change ER_WRONG_FIELD_WITH_GROUP to more detailed
|
|
ER_NON_GROUPING_FIELD_USED
|
|
*/
|
|
my_error(ER_WRONG_FIELD_WITH_GROUP, MYF(0), field->full_name());
|
|
return 1;
|
|
next_field:
|
|
field= naf_it++;
|
|
}
|
|
}
|
|
cur_pos_in_select_list++;
|
|
}
|
|
}
|
|
if (org_fields != all_fields.elements)
|
|
*hidden_group_fields=1; // group fields is not used
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Add fields with aren't used at start of field list.
|
|
|
|
@return
|
|
FALSE if ok
|
|
*/
|
|
|
|
static bool
|
|
setup_new_fields(THD *thd, List<Item> &fields,
|
|
List<Item> &all_fields, ORDER *new_field)
|
|
{
|
|
Item **item;
|
|
uint counter;
|
|
enum_resolution_type not_used;
|
|
DBUG_ENTER("setup_new_fields");
|
|
|
|
thd->column_usage= MARK_COLUMNS_READ; // Not really needed, but...
|
|
for (; new_field ; new_field= new_field->next)
|
|
{
|
|
if ((item= find_item_in_list(*new_field->item, fields, &counter,
|
|
IGNORE_ERRORS, ¬_used)))
|
|
new_field->item=item; /* Change to shared Item */
|
|
else
|
|
{
|
|
thd->where= THD_WHERE::PROCEDURE_LIST;
|
|
if ((*new_field->item)->fix_fields(thd, new_field->item))
|
|
DBUG_RETURN(1); /* purecov: inspected */
|
|
all_fields.push_front(*new_field->item, thd->mem_root);
|
|
new_field->item=all_fields.head_ref();
|
|
}
|
|
}
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/**
|
|
Create a group by that consist of all non const fields.
|
|
|
|
Try to use the fields in the order given by 'order' to allow one to
|
|
optimize away 'order by'.
|
|
|
|
@retval
|
|
0 OOM error if thd->is_fatal_error is set. Otherwise group was eliminated
|
|
# Pointer to new group
|
|
*/
|
|
|
|
ORDER *
|
|
create_distinct_group(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
ORDER *order_list, List<Item> &fields,
|
|
List<Item> &all_fields,
|
|
bool *all_order_by_fields_used)
|
|
{
|
|
List_iterator<Item> li(fields);
|
|
Item *item;
|
|
Ref_ptr_array orig_ref_pointer_array= ref_pointer_array;
|
|
ORDER *order,*group,**prev;
|
|
uint idx= 0;
|
|
|
|
*all_order_by_fields_used= 1;
|
|
while ((item=li++))
|
|
item->marker= MARKER_UNUSED; /* Marker that field is not used */
|
|
|
|
prev= &group; group=0;
|
|
for (order=order_list ; order; order=order->next)
|
|
{
|
|
if (order->in_field_list)
|
|
{
|
|
ORDER *ord=(ORDER*) thd->memdup((char*) order,sizeof(ORDER));
|
|
if (!ord)
|
|
return 0;
|
|
*prev=ord;
|
|
prev= &ord->next;
|
|
(*ord->item)->marker= MARKER_FOUND_IN_ORDER;
|
|
}
|
|
else
|
|
*all_order_by_fields_used= 0;
|
|
}
|
|
|
|
li.rewind();
|
|
while ((item=li++))
|
|
{
|
|
if (!item->const_item() && !item->with_sum_func() &&
|
|
item->marker == MARKER_UNUSED)
|
|
{
|
|
/*
|
|
Don't put duplicate columns from the SELECT list into the
|
|
GROUP BY list.
|
|
*/
|
|
ORDER *ord_iter;
|
|
for (ord_iter= group; ord_iter; ord_iter= ord_iter->next)
|
|
if ((*ord_iter->item)->eq(item, 1))
|
|
goto next_item;
|
|
|
|
ORDER *ord= thd->calloc<ORDER>(1);
|
|
if (!ord)
|
|
return 0;
|
|
|
|
if (item->type() == Item::FIELD_ITEM &&
|
|
item->field_type() == MYSQL_TYPE_BIT)
|
|
{
|
|
/*
|
|
Because HEAP tables can't index BIT fields we need to use an
|
|
additional hidden field for grouping because later it will be
|
|
converted to a LONG field. Original field will remain of the
|
|
BIT type and will be returned [el]client.
|
|
*/
|
|
Item_field *new_item= new (thd->mem_root) Item_field(thd, (Item_field*)item);
|
|
if (!new_item)
|
|
return 0;
|
|
int el= all_fields.elements;
|
|
orig_ref_pointer_array[el]= new_item;
|
|
all_fields.push_front(new_item, thd->mem_root);
|
|
ord->item=&orig_ref_pointer_array[el];
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We have here only field_list (not all_field_list), so we can use
|
|
simple indexing of ref_pointer_array (order in the array and in the
|
|
list are same)
|
|
*/
|
|
ord->item= &ref_pointer_array[idx];
|
|
}
|
|
ord->direction= ORDER::ORDER_ASC;
|
|
*prev=ord;
|
|
prev= &ord->next;
|
|
}
|
|
next_item:
|
|
idx++;
|
|
}
|
|
*prev=0;
|
|
return group;
|
|
}
|
|
|
|
|
|
/**
|
|
Update join with count of the different type of fields.
|
|
*/
|
|
|
|
void
|
|
count_field_types(SELECT_LEX *select_lex, TMP_TABLE_PARAM *param,
|
|
List<Item> &fields, bool reset_with_sum_func)
|
|
{
|
|
List_iterator<Item> li(fields);
|
|
Item *field;
|
|
|
|
param->field_count=param->sum_func_count=param->func_count=
|
|
param->hidden_field_count=0;
|
|
param->quick_group=1;
|
|
while ((field=li++))
|
|
{
|
|
Item::Type real_type= field->real_item()->type();
|
|
if (real_type == Item::FIELD_ITEM)
|
|
param->field_count++;
|
|
else if (real_type == Item::SUM_FUNC_ITEM)
|
|
{
|
|
if (! field->const_item())
|
|
{
|
|
Item_sum *sum_item=(Item_sum*) field->real_item();
|
|
if (!sum_item->depended_from() ||
|
|
sum_item->depended_from() == select_lex)
|
|
{
|
|
if (!sum_item->quick_group)
|
|
param->quick_group=0; // UDF SUM function
|
|
param->sum_func_count++;
|
|
|
|
for (uint i=0 ; i < sum_item->get_arg_count() ; i++)
|
|
{
|
|
if (sum_item->get_arg(i)->real_item()->type() == Item::FIELD_ITEM)
|
|
param->field_count++;
|
|
else
|
|
param->func_count++;
|
|
}
|
|
}
|
|
param->func_count++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
param->func_count++;
|
|
if (reset_with_sum_func)
|
|
field->with_flags&= ~item_with_t::SUM_FUNC;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Return 1 if second is a subpart of first argument.
|
|
|
|
SIDE EFFECT:
|
|
For all the first items in the group by list that match, the sort
|
|
direction of the GROUP BY items are set to the same as those given by the
|
|
ORDER BY.
|
|
The direction of the group does not matter if the ORDER BY clause overrides
|
|
it anyway.
|
|
*/
|
|
|
|
static bool
|
|
test_if_subpart(ORDER *group_by, ORDER *order_by)
|
|
{
|
|
while (group_by && order_by)
|
|
{
|
|
if ((*group_by->item)->eq(*order_by->item, 1))
|
|
group_by->direction= order_by->direction;
|
|
else
|
|
return 0;
|
|
group_by= group_by->next;
|
|
order_by= order_by->next;
|
|
}
|
|
return MY_TEST(!order_by);
|
|
}
|
|
|
|
/**
|
|
Return table number if there is only one table in sort order
|
|
and group and order is compatible, else return 0.
|
|
*/
|
|
|
|
static TABLE *
|
|
get_sort_by_table(ORDER *a,ORDER *b, List<TABLE_LIST> &tables,
|
|
table_map const_tables)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> ti(tables);
|
|
table_map map= (table_map) 0;
|
|
DBUG_ENTER("get_sort_by_table");
|
|
|
|
if (!a)
|
|
a=b; // Only one need to be given
|
|
else if (!b)
|
|
b=a;
|
|
|
|
for (; a && b; a=a->next,b=b->next)
|
|
{
|
|
/* Skip elements of a that are constant */
|
|
while (!((*a->item)->used_tables() & ~const_tables))
|
|
{
|
|
if (!(a= a->next))
|
|
break;
|
|
}
|
|
|
|
/* Skip elements of b that are constant */
|
|
while (!((*b->item)->used_tables() & ~const_tables))
|
|
{
|
|
if (!(b= b->next))
|
|
break;
|
|
}
|
|
|
|
if (!a || !b)
|
|
break;
|
|
|
|
if (!(*a->item)->eq(*b->item,1))
|
|
DBUG_RETURN(0);
|
|
map|=a->item[0]->used_tables();
|
|
}
|
|
if (!map || (map & (RAND_TABLE_BIT | OUTER_REF_TABLE_BIT)))
|
|
DBUG_RETURN(0);
|
|
|
|
map&= ~const_tables;
|
|
while ((table= ti++) && !(map & table->table->map)) ;
|
|
if (map != table->table->map)
|
|
DBUG_RETURN(0); // More than one table
|
|
DBUG_PRINT("exit",("sort by table: %d",table->table->tablenr));
|
|
DBUG_RETURN(table->table);
|
|
}
|
|
|
|
|
|
/**
|
|
calc how big buffer we need for comparing group entries.
|
|
*/
|
|
|
|
void calc_group_buffer(TMP_TABLE_PARAM *param, ORDER *group)
|
|
{
|
|
uint key_length=0, parts=0, null_parts=0;
|
|
|
|
for (; group ; group=group->next)
|
|
{
|
|
Item *group_item= *group->item;
|
|
Field *field= group_item->get_tmp_table_field();
|
|
if (field)
|
|
{
|
|
enum_field_types type;
|
|
if ((type= field->type()) == MYSQL_TYPE_BLOB)
|
|
key_length+=MAX_BLOB_WIDTH; // Can't be used as a key
|
|
else if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_VAR_STRING)
|
|
key_length+= field->field_length + HA_KEY_BLOB_LENGTH;
|
|
else if (type == MYSQL_TYPE_BIT)
|
|
{
|
|
/* Bit is usually stored as a longlong key for group fields */
|
|
key_length+= 8; // Big enough
|
|
}
|
|
else
|
|
key_length+= field->pack_length();
|
|
}
|
|
else
|
|
{
|
|
switch (group_item->cmp_type()) {
|
|
case REAL_RESULT:
|
|
key_length+= sizeof(double);
|
|
break;
|
|
case INT_RESULT:
|
|
key_length+= sizeof(longlong);
|
|
break;
|
|
case DECIMAL_RESULT:
|
|
key_length+= my_decimal_get_binary_size(group_item->max_length -
|
|
(group_item->decimals ? 1 : 0),
|
|
group_item->decimals);
|
|
break;
|
|
case TIME_RESULT:
|
|
{
|
|
/*
|
|
As items represented as DATE/TIME fields in the group buffer
|
|
have STRING_RESULT result type, we increase the length
|
|
by 8 as maximum pack length of such fields.
|
|
*/
|
|
key_length+= 8;
|
|
break;
|
|
}
|
|
case STRING_RESULT:
|
|
{
|
|
enum enum_field_types type= group_item->field_type();
|
|
if (type == MYSQL_TYPE_BLOB)
|
|
key_length+= MAX_BLOB_WIDTH; // Can't be used as a key
|
|
else
|
|
{
|
|
/*
|
|
Group strings are taken as varstrings and require an length field.
|
|
A field is not yet created by create_tmp_field_ex()
|
|
and the sizes should match up.
|
|
*/
|
|
key_length+= group_item->max_length + HA_KEY_BLOB_LENGTH;
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
/* This case should never be chosen */
|
|
DBUG_ASSERT(0);
|
|
my_error(ER_OUT_OF_RESOURCES, MYF(ME_FATAL));
|
|
}
|
|
}
|
|
parts++;
|
|
if (group_item->maybe_null())
|
|
null_parts++;
|
|
}
|
|
param->group_length= key_length + null_parts;
|
|
param->group_parts= parts;
|
|
param->group_null_parts= null_parts;
|
|
}
|
|
|
|
static void calc_group_buffer(JOIN *join, ORDER *group)
|
|
{
|
|
if (group)
|
|
join->group= 1;
|
|
calc_group_buffer(&join->tmp_table_param, group);
|
|
}
|
|
|
|
|
|
/**
|
|
allocate group fields or take prepared (cached).
|
|
|
|
@param main_join join of current select
|
|
@param curr_join current join (join of current select or temporary copy
|
|
of it)
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 failed
|
|
*/
|
|
|
|
static bool
|
|
make_group_fields(JOIN *main_join, JOIN *curr_join)
|
|
{
|
|
if (main_join->group_fields_cache.elements)
|
|
{
|
|
curr_join->group_fields= main_join->group_fields_cache;
|
|
curr_join->sort_and_group= 1;
|
|
}
|
|
else
|
|
{
|
|
if (alloc_group_fields(curr_join, curr_join->group_list))
|
|
return (1);
|
|
main_join->group_fields_cache= curr_join->group_fields;
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static bool
|
|
fill_cached_item_list(THD *thd, List<Cached_item> *list, ORDER *order,
|
|
uint max_number_of_elements = UINT_MAX)
|
|
{
|
|
for (; order && max_number_of_elements ;
|
|
order= order->next, max_number_of_elements--)
|
|
{
|
|
Cached_item *tmp= new_Cached_item(thd, *order->item, true);
|
|
if (!tmp || list->push_front(tmp))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
Get a list of buffers for saving last group.
|
|
|
|
Groups are saved in reverse order for easier check loop.
|
|
*/
|
|
|
|
static bool
|
|
alloc_group_fields(JOIN *join, ORDER *group)
|
|
{
|
|
if (fill_cached_item_list(join->thd, &join->group_fields, group))
|
|
return true;
|
|
join->sort_and_group=1; /* Mark for do_select */
|
|
return false;
|
|
}
|
|
|
|
static bool
|
|
alloc_order_fields(JOIN *join, ORDER *order, uint max_number_of_elements)
|
|
{
|
|
return fill_cached_item_list(join->thd, &join->order_fields, order,
|
|
max_number_of_elements);
|
|
}
|
|
|
|
|
|
/*
|
|
Test if a single-row cache of items changed, and update the cache.
|
|
|
|
@details Test if a list of items that typically represents a result
|
|
row has changed. If the value of some item changed, update the cached
|
|
value for this item.
|
|
|
|
@param list list of <item, cached_value> pairs stored as Cached_item.
|
|
|
|
@return -1 if no item changed
|
|
@return index of the first item that changed
|
|
*/
|
|
|
|
int test_if_item_cache_changed(List<Cached_item> &list)
|
|
{
|
|
DBUG_ENTER("test_if_item_cache_changed");
|
|
List_iterator<Cached_item> li(list);
|
|
int idx= -1,i;
|
|
Cached_item *buff;
|
|
|
|
for (i=(int) list.elements-1 ; (buff=li++) ; i--)
|
|
{
|
|
if (buff->cmp())
|
|
idx=i;
|
|
}
|
|
DBUG_PRINT("info", ("idx: %d", idx));
|
|
DBUG_RETURN(idx);
|
|
}
|
|
|
|
|
|
/*
|
|
@return
|
|
-1 - Group not changed
|
|
value>=0 - Number of the component where the group changed
|
|
*/
|
|
|
|
int
|
|
test_if_group_changed(List<Cached_item> &list)
|
|
{
|
|
DBUG_ENTER("test_if_group_changed");
|
|
List_iterator<Cached_item> li(list);
|
|
int idx= -1,i;
|
|
Cached_item *buff;
|
|
|
|
for (i=(int) list.elements-1 ; (buff=li++) ; i--)
|
|
{
|
|
if (buff->cmp())
|
|
idx=i;
|
|
}
|
|
DBUG_PRINT("info", ("idx: %d", idx));
|
|
DBUG_RETURN(idx);
|
|
}
|
|
|
|
|
|
/**
|
|
Setup copy_fields to save fields at start of new group.
|
|
|
|
Setup copy_fields to save fields at start of new group
|
|
|
|
Only FIELD_ITEM:s and FUNC_ITEM:s needs to be saved between groups.
|
|
Change old item_field to use a new field with points at saved fieldvalue
|
|
This function is only called before use of send_result_set_metadata.
|
|
|
|
@param thd THD pointer
|
|
@param param temporary table parameters
|
|
@param ref_pointer_array array of pointers to top elements of filed list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@todo
|
|
In most cases this result will be sent to the user.
|
|
This should be changed to use copy_int or copy_real depending
|
|
on how the value is to be used: In some cases this may be an
|
|
argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
!=0 error
|
|
*/
|
|
|
|
bool
|
|
setup_copy_fields(THD *thd, TMP_TABLE_PARAM *param,
|
|
Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields, List<Item> &res_all_fields,
|
|
uint elements, List<Item> &all_fields)
|
|
{
|
|
Item *pos;
|
|
List_iterator_fast<Item> li(all_fields);
|
|
Copy_field *copy= NULL;
|
|
Copy_field *copy_start __attribute__((unused));
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
List<Item> extra_funcs;
|
|
uint i, border= all_fields.elements - elements;
|
|
DBUG_ENTER("setup_copy_fields");
|
|
|
|
if (param->field_count &&
|
|
!(copy=param->copy_field= new (thd->mem_root) Copy_field[param->field_count]))
|
|
goto err2;
|
|
|
|
param->copy_funcs.empty();
|
|
copy_start= copy;
|
|
for (i= 0; (pos= li++); i++)
|
|
{
|
|
Field *field;
|
|
uchar *tmp;
|
|
Item *real_pos= pos->real_item();
|
|
/*
|
|
Aggregate functions can be substituted for fields (by e.g. temp tables).
|
|
We need to filter those substituted fields out.
|
|
*/
|
|
if (real_pos->type() == Item::FIELD_ITEM &&
|
|
!(real_pos != pos &&
|
|
((Item_ref *)pos)->ref_type() == Item_ref::AGGREGATE_REF))
|
|
{
|
|
Item_field *item;
|
|
if (!(item= new (thd->mem_root) Item_field(thd, ((Item_field*) real_pos))))
|
|
goto err;
|
|
if (pos->type() == Item::REF_ITEM)
|
|
{
|
|
/* preserve the names of the ref when dereferncing */
|
|
Item_ref *ref= (Item_ref *) pos;
|
|
item->db_name= ref->db_name;
|
|
item->table_name= ref->table_name;
|
|
item->name= ref->name;
|
|
}
|
|
pos= item;
|
|
if (item->field->flags & BLOB_FLAG)
|
|
{
|
|
if (!(pos= new (thd->mem_root) Item_copy_string(thd, pos)))
|
|
goto err;
|
|
/*
|
|
Item_copy_string::copy for function can call
|
|
Item_copy_string::val_int for blob via Item_ref.
|
|
But if Item_copy_string::copy for blob isn't called before,
|
|
it's value will be wrong
|
|
so let's insert Item_copy_string for blobs in the beginning of
|
|
copy_funcs
|
|
(to see full test case look at having.test, BUG #4358)
|
|
*/
|
|
if (param->copy_funcs.push_front(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
set up save buffer and change result_field to point at
|
|
saved value
|
|
*/
|
|
field= item->field;
|
|
item->result_field=field->make_new_field(thd->mem_root,
|
|
field->table, 1);
|
|
/*
|
|
We need to allocate one extra byte for null handling and
|
|
another extra byte to not get warnings from purify in
|
|
Field_string::val_int
|
|
*/
|
|
if (!(tmp= thd->alloc<uchar>(field->pack_length()+2)))
|
|
goto err;
|
|
if (copy)
|
|
{
|
|
DBUG_ASSERT (param->field_count > (uint) (copy - copy_start));
|
|
copy->set(tmp, item->result_field);
|
|
item->result_field->move_field(copy->to_ptr,copy->to_null_ptr,1);
|
|
#ifdef HAVE_valgrind
|
|
copy->to_ptr[copy->from_length]= 0;
|
|
#endif
|
|
copy++;
|
|
}
|
|
}
|
|
}
|
|
else if ((real_pos->type() == Item::FUNC_ITEM ||
|
|
real_pos->real_type() == Item::SUBSELECT_ITEM ||
|
|
real_pos->type() == Item::CACHE_ITEM ||
|
|
real_pos->type() == Item::COND_ITEM) &&
|
|
!real_pos->with_sum_func())
|
|
{ // Save for send fields
|
|
const Lex_ident_column real_name= pos->name;
|
|
pos= real_pos;
|
|
pos->name= real_name;
|
|
/* TODO:
|
|
In most cases this result will be sent to the user.
|
|
This should be changed to use copy_int or copy_real depending
|
|
on how the value is to be used: In some cases this may be an
|
|
argument in a group function, like: IF(ISNULL(col),0,COUNT(*))
|
|
*/
|
|
if (!(pos= pos->type_handler()->create_item_copy(thd, pos)))
|
|
goto err;
|
|
if (i < border) // HAVING, ORDER and GROUP BY
|
|
{
|
|
if (extra_funcs.push_back(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
else if (param->copy_funcs.push_back(pos, thd->mem_root))
|
|
goto err;
|
|
}
|
|
res_all_fields.push_back(pos, thd->mem_root);
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
pos;
|
|
}
|
|
param->copy_field_end= copy;
|
|
|
|
for (i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
/*
|
|
Put elements from HAVING, ORDER BY and GROUP BY last to ensure that any
|
|
reference used in these will resolve to a item that is already calculated
|
|
*/
|
|
param->copy_funcs.append(&extra_funcs);
|
|
|
|
DBUG_RETURN(0);
|
|
|
|
err:
|
|
if (copy)
|
|
delete [] param->copy_field; // This is never 0
|
|
param->copy_field= 0;
|
|
err2:
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Make a copy of all simple SELECT'ed items.
|
|
|
|
This is done at the start of a new group so that we can retrieve
|
|
these later when the group changes.
|
|
*/
|
|
|
|
void
|
|
copy_fields(TMP_TABLE_PARAM *param)
|
|
{
|
|
Copy_field *ptr=param->copy_field;
|
|
Copy_field *end=param->copy_field_end;
|
|
|
|
DBUG_ASSERT((ptr != NULL && end >= ptr) || (ptr == NULL && end == NULL));
|
|
|
|
for (; ptr != end; ptr++)
|
|
(*ptr->do_copy)(ptr);
|
|
|
|
List_iterator_fast<Item> it(param->copy_funcs);
|
|
Item_copy *item;
|
|
while ((item= (Item_copy*) it++))
|
|
item->copy();
|
|
}
|
|
|
|
|
|
/**
|
|
Make an array of pointers to sum_functions to speed up
|
|
sum_func calculation.
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 Error
|
|
*/
|
|
|
|
bool JOIN::alloc_func_list()
|
|
{
|
|
uint func_count, group_parts;
|
|
DBUG_ENTER("alloc_func_list");
|
|
|
|
func_count= tmp_table_param.sum_func_count;
|
|
/*
|
|
If we are using rollup, we need a copy of the summary functions for
|
|
each level
|
|
*/
|
|
if (rollup.state != ROLLUP::STATE_NONE)
|
|
func_count*= (send_group_parts+1);
|
|
|
|
group_parts= send_group_parts;
|
|
/*
|
|
If distinct, reserve memory for possible
|
|
disctinct->group_by optimization
|
|
*/
|
|
if (select_distinct)
|
|
{
|
|
group_parts+= fields_list.elements;
|
|
/*
|
|
If the ORDER clause is specified then it's possible that
|
|
it also will be optimized, so reserve space for it too
|
|
*/
|
|
if (order)
|
|
{
|
|
ORDER *ord;
|
|
for (ord= order; ord; ord= ord->next)
|
|
group_parts++;
|
|
}
|
|
}
|
|
|
|
/* This must use calloc() as rollup_make_fields depends on this */
|
|
sum_funcs= (Item_sum**) thd->calloc(sizeof(Item_sum**) * (func_count+1) +
|
|
sizeof(Item_sum***) * (group_parts+1));
|
|
sum_funcs_end= (Item_sum***) (sum_funcs+func_count+1);
|
|
DBUG_RETURN(sum_funcs == 0);
|
|
}
|
|
|
|
|
|
/**
|
|
Initialize 'sum_funcs' array with all Item_sum objects.
|
|
|
|
@param field_list All items
|
|
@param send_result_set_metadata Items in select list
|
|
@param before_group_by Set to 1 if this is called before GROUP BY handling
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
bool JOIN::make_sum_func_list(List<Item> &field_list,
|
|
List<Item> &send_result_set_metadata,
|
|
bool before_group_by)
|
|
{
|
|
List_iterator_fast<Item> it(field_list);
|
|
Item_sum **func;
|
|
Item *item;
|
|
DBUG_ENTER("make_sum_func_list");
|
|
|
|
func= sum_funcs;
|
|
while ((item=it++))
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
|
|
(!((Item_sum*) item)->depended_from() ||
|
|
((Item_sum *)item)->depended_from() == select_lex))
|
|
*func++= (Item_sum*) item;
|
|
}
|
|
if (before_group_by && rollup.state == ROLLUP::STATE_INITED)
|
|
{
|
|
rollup.state= ROLLUP::STATE_READY;
|
|
if (rollup_make_fields(field_list, send_result_set_metadata, &func))
|
|
DBUG_RETURN(TRUE); // Should never happen
|
|
}
|
|
else if (rollup.state == ROLLUP::STATE_NONE)
|
|
{
|
|
for (uint i=0 ; i <= send_group_parts ;i++)
|
|
sum_funcs_end[i]= func;
|
|
}
|
|
else if (rollup.state == ROLLUP::STATE_READY)
|
|
DBUG_RETURN(FALSE); // Don't put end marker
|
|
*func=0; // End marker
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Change all funcs and sum_funcs to fields in tmp table, and create
|
|
new list of all items.
|
|
|
|
@param thd THD pointer
|
|
@param ref_pointer_array array of pointers to top elements of filed list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
!=0 error
|
|
*/
|
|
|
|
static bool
|
|
change_to_use_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields,
|
|
List<Item> &res_all_fields,
|
|
uint elements, List<Item> &all_fields)
|
|
{
|
|
List_iterator_fast<Item> it(all_fields);
|
|
Item *item_field,*item;
|
|
DBUG_ENTER("change_to_use_tmp_fields");
|
|
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
|
|
uint border= all_fields.elements - elements;
|
|
for (uint i= 0; (item= it++); i++)
|
|
{
|
|
Field *field;
|
|
/*
|
|
SUM_FUNC_ITEM will be replaced by the calculated value which is
|
|
stored in the temporary table.
|
|
The first part of the following test is for items that are expressions
|
|
with SUM_FUNC_ITEMS, like 'sum(a)+1'. In this case we keep the original
|
|
item, which contain an Item_ref that points to the SUM_FUNC_ITEM that
|
|
will be replaced with a pointer to the calculated value.
|
|
The second test is for window functions. Window functions contains
|
|
only pointers to Item_refs, which will be adjusted to point to the
|
|
temporary table.
|
|
*/
|
|
enum Item::Type item_type= item->type();
|
|
if ((item->with_sum_func() && item_type != Item::SUM_FUNC_ITEM) ||
|
|
item->with_window_func())
|
|
item_field= item;
|
|
else if (item_type == Item::FIELD_ITEM ||
|
|
item_type == Item::DEFAULT_VALUE_ITEM)
|
|
{
|
|
if (!(item_field= item->get_tmp_table_item(thd)))
|
|
DBUG_RETURN(true);
|
|
}
|
|
else if (item_type == Item::FUNC_ITEM &&
|
|
((Item_func*)item)->functype() == Item_func::SUSERVAR_FUNC)
|
|
{
|
|
field= item->get_tmp_table_field();
|
|
if (field != NULL)
|
|
{
|
|
/*
|
|
Replace "@:=<expression>" with "@:=<tmp table
|
|
column>". Otherwise, we would re-evaluate <expression>, and
|
|
if expression were a subquery, this would access
|
|
already-unlocked tables.
|
|
*/
|
|
Item_func_set_user_var* suv=
|
|
new (thd->mem_root) Item_func_set_user_var(thd, (Item_func_set_user_var*) item);
|
|
Item_field *new_field= new (thd->mem_root) Item_field(thd, field);
|
|
if (!suv || !new_field)
|
|
DBUG_RETURN(true); // Fatal error
|
|
new_field->set_refers_to_temp_table();
|
|
List<Item> list;
|
|
list.push_back(new_field, thd->mem_root);
|
|
suv->set_arguments(thd, list);
|
|
item_field= suv;
|
|
}
|
|
else
|
|
item_field= item;
|
|
}
|
|
else if ((field= item->get_tmp_table_field()))
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && field->table->group)
|
|
{
|
|
item_field= ((Item_sum*) item)->result_item(thd, field);
|
|
}
|
|
else
|
|
{
|
|
item_field= (Item*) new (thd->mem_root) Item_field(thd, field);
|
|
if (item_field)
|
|
((Item_field*) item_field)->set_refers_to_temp_table();
|
|
}
|
|
if (!item_field)
|
|
DBUG_RETURN(true); // Fatal error
|
|
|
|
if (item->real_item()->type() != Item::FIELD_ITEM)
|
|
field->orig_table= 0;
|
|
item_field->name= item->name;
|
|
if (item->type() == Item::REF_ITEM)
|
|
{
|
|
Item_field *ifield= (Item_field *) item_field;
|
|
Item_ref *iref= (Item_ref *) item;
|
|
ifield->table_name= iref->table_name;
|
|
ifield->db_name= iref->db_name;
|
|
}
|
|
#ifndef DBUG_OFF
|
|
if (!item_field->name.str)
|
|
{
|
|
char buff[256];
|
|
String str(buff,sizeof(buff),&my_charset_bin);
|
|
str.length(0);
|
|
str.extra_allocation(1024);
|
|
item->print(&str, QT_ORDINARY);
|
|
item_field->name.str= thd->strmake(str.ptr(), str.length());
|
|
item_field->name.length= str.length();
|
|
}
|
|
#endif
|
|
}
|
|
else
|
|
item_field= item;
|
|
|
|
res_all_fields.push_back(item_field, thd->mem_root);
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
item_field;
|
|
}
|
|
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
for (uint i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
/**
|
|
Change all sum_func refs to fields to point at fields in tmp table.
|
|
Change all funcs to be fields in tmp table.
|
|
|
|
@param thd THD pointer
|
|
@param ref_pointer_array array of pointers to top elements of field list
|
|
@param res_selected_fields new list of items of select item list
|
|
@param res_all_fields new list of all items
|
|
@param elements number of elements in select item list
|
|
@param all_fields all fields list
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 error
|
|
*/
|
|
|
|
static bool
|
|
change_refs_to_tmp_fields(THD *thd, Ref_ptr_array ref_pointer_array,
|
|
List<Item> &res_selected_fields,
|
|
List<Item> &res_all_fields, uint elements,
|
|
List<Item> &all_fields)
|
|
{
|
|
List_iterator_fast<Item> it(all_fields);
|
|
Item *item, *new_item;
|
|
res_selected_fields.empty();
|
|
res_all_fields.empty();
|
|
|
|
uint i, border= all_fields.elements - elements;
|
|
for (i= 0; (item= it++); i++)
|
|
{
|
|
if (item->type() == Item::SUM_FUNC_ITEM && item->const_item())
|
|
new_item= item;
|
|
else
|
|
{
|
|
if (!(new_item= item->get_tmp_table_item(thd)))
|
|
return 1;
|
|
}
|
|
|
|
if (res_all_fields.push_back(new_item, thd->mem_root))
|
|
return 1;
|
|
ref_pointer_array[((i < border)? all_fields.elements-i-1 : i-border)]=
|
|
new_item;
|
|
}
|
|
|
|
List_iterator_fast<Item> itr(res_all_fields);
|
|
for (i= 0; i < border; i++)
|
|
itr++;
|
|
itr.sublist(res_selected_fields, elements);
|
|
|
|
return thd->is_error();
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************************
|
|
Code for calculating functions
|
|
******************************************************************************/
|
|
|
|
|
|
/**
|
|
Call ::setup for all sum functions.
|
|
|
|
@param thd thread handler
|
|
@param func_ptr sum function list
|
|
|
|
@retval
|
|
FALSE ok
|
|
@retval
|
|
TRUE error
|
|
*/
|
|
|
|
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
DBUG_ENTER("setup_sum_funcs");
|
|
while ((func= *(func_ptr++)))
|
|
{
|
|
if (func->aggregator_setup(thd))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Setup aggregate functions.
|
|
|
|
@param thd Thread descriptor
|
|
@param func_ptr Array of pointers to aggregate functions
|
|
@param need_distinct FALSE means that the table access method already
|
|
guarantees that arguments of all aggregate functions
|
|
will be unique. (This is the case for Loose Scan)
|
|
TRUE - Otherwise.
|
|
@return
|
|
false Ok
|
|
true Error
|
|
*/
|
|
|
|
bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
|
|
bool need_distinct)
|
|
{
|
|
Item_sum *func;
|
|
DBUG_ENTER("prepare_sum_aggregators");
|
|
while ((func= *(func_ptr++)))
|
|
{
|
|
bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
|
|
if (need_distinct_aggregator && table_count - const_tables == 1)
|
|
{
|
|
/*
|
|
We are doing setup for an aggregate with DISTINCT, like
|
|
|
|
SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
|
|
|
|
In general case, agg_func will need to use Aggregator_distinct to
|
|
remove duplicates from its arguments.
|
|
We won't have to remove duplicates if we know the arguments are already
|
|
unique. This is true when
|
|
1. the join operation has only one non-const table (checked above)
|
|
2. the argument list covers a PRIMARY or a UNIQUE index.
|
|
|
|
Example: here the values of t1.pk are unique:
|
|
|
|
SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
|
|
|
|
and so the whole argument of agg_func is unique.
|
|
*/
|
|
List<Item> arg_fields;
|
|
for (uint i= 0; i < func->argument_count(); i++)
|
|
{
|
|
if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
|
|
arg_fields.push_back(func->arguments()[i]);
|
|
}
|
|
|
|
/*
|
|
If the query has a GROUP BY, then it's sufficient that a unique
|
|
key is covered by a concatenation of {argument_list, group_by_list}.
|
|
|
|
Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
|
|
|
|
SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
|
|
|
|
Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
|
|
will be unique, and no de-duplication will be needed.
|
|
*/
|
|
for (ORDER *group= group_list; group ; group= group->next)
|
|
{
|
|
if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
|
|
arg_fields.push_back(*group->item);
|
|
}
|
|
|
|
if (list_contains_unique_index(join_tab[const_tables].table,
|
|
find_field_in_item_list,
|
|
(void *) &arg_fields))
|
|
need_distinct_aggregator= false;
|
|
}
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
|
|
trace_aggr.add("function", func);
|
|
trace_aggr.add("aggregator_type",
|
|
(need_distinct_aggregator ||
|
|
func->uses_non_standard_aggregator_for_distinct()) ?
|
|
"distinct" : "simple");
|
|
if (func->set_aggregator(thd, need_distinct_aggregator ?
|
|
Aggregator::DISTINCT_AGGREGATOR :
|
|
Aggregator::SIMPLE_AGGREGATOR))
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
DBUG_RETURN(FALSE);
|
|
}
|
|
|
|
|
|
static void
|
|
init_tmptable_sum_functions(Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
while ((func= *(func_ptr++)))
|
|
func->reset_field();
|
|
}
|
|
|
|
|
|
/** Update record 0 in tmp_table from record 1. */
|
|
|
|
static void
|
|
update_tmptable_sum_func(Item_sum **func_ptr,
|
|
TABLE *tmp_table __attribute__((unused)))
|
|
{
|
|
Item_sum *func;
|
|
while ((func= *(func_ptr++)))
|
|
func->update_field();
|
|
}
|
|
|
|
|
|
/** Copy result of sum functions to record in tmp_table. */
|
|
|
|
static void
|
|
copy_sum_funcs(Item_sum **func_ptr, Item_sum **end_ptr)
|
|
{
|
|
for (; func_ptr != end_ptr ; func_ptr++)
|
|
(void) (*func_ptr)->save_in_result_field(1);
|
|
return;
|
|
}
|
|
|
|
|
|
static bool
|
|
init_sum_functions(Item_sum **func_ptr, Item_sum **end_ptr)
|
|
{
|
|
for (; func_ptr != end_ptr ;func_ptr++)
|
|
{
|
|
if ((*func_ptr)->reset_and_add())
|
|
return 1;
|
|
}
|
|
/* If rollup, calculate the upper sum levels */
|
|
for ( ; *func_ptr ; func_ptr++)
|
|
{
|
|
if ((*func_ptr)->aggregator_add())
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
static bool
|
|
update_sum_func(Item_sum **func_ptr)
|
|
{
|
|
Item_sum *func;
|
|
for (; (func= (Item_sum*) *func_ptr) ; func_ptr++)
|
|
if (func->aggregator_add())
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Copy result of functions to record in tmp_table.
|
|
|
|
Uses the thread pointer to check for errors in
|
|
some of the val_xxx() methods called by the
|
|
save_in_result_field() function.
|
|
TODO: make the Item::val_xxx() return error code
|
|
|
|
@param func_ptr array of the function Items to copy to the tmp table
|
|
@param thd pointer to the current thread for error checking
|
|
@retval
|
|
FALSE if OK
|
|
@retval
|
|
TRUE on error
|
|
*/
|
|
|
|
bool
|
|
copy_funcs(Item **func_ptr, const THD *thd)
|
|
{
|
|
Item *func;
|
|
for (; (func = *func_ptr) ; func_ptr++)
|
|
{
|
|
if (func->type() == Item::FUNC_ITEM &&
|
|
((Item_func *) func)->with_window_func())
|
|
continue;
|
|
func->save_in_result_field(1);
|
|
/*
|
|
Need to check the THD error state because Item::val_xxx() don't
|
|
return error code, but can generate errors
|
|
TODO: change it for a real status check when Item::val_xxx()
|
|
are extended to return status code.
|
|
*/
|
|
if (unlikely(thd->is_error()))
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
/**
|
|
Create a condition for a const reference and add this to the
|
|
currenct select for the table.
|
|
*/
|
|
|
|
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab)
|
|
{
|
|
DBUG_ENTER("add_ref_to_table_cond");
|
|
if (!join_tab->ref.key_parts)
|
|
DBUG_RETURN(FALSE);
|
|
|
|
Item_cond_and *cond= new (thd->mem_root) Item_cond_and(thd);
|
|
TABLE *table=join_tab->table;
|
|
int error= 0;
|
|
if (!cond)
|
|
DBUG_RETURN(TRUE);
|
|
|
|
for (uint i=0 ; i < join_tab->ref.key_parts ; i++)
|
|
{
|
|
Field *field=table->field[table->key_info[join_tab->ref.key].key_part[i].
|
|
fieldnr-1];
|
|
Item *value=join_tab->ref.items[i];
|
|
cond->add(new (thd->mem_root)
|
|
Item_func_equal(thd, new (thd->mem_root) Item_field(thd, field),
|
|
value),
|
|
thd->mem_root);
|
|
}
|
|
if (unlikely(thd->is_error()))
|
|
DBUG_RETURN(TRUE);
|
|
if (!cond->fixed())
|
|
{
|
|
Item *tmp_item= (Item*) cond;
|
|
cond->fix_fields(thd, &tmp_item);
|
|
DBUG_ASSERT(cond == tmp_item);
|
|
}
|
|
if (join_tab->select)
|
|
{
|
|
Item *UNINIT_VAR(cond_copy);
|
|
if (join_tab->select->pre_idx_push_select_cond)
|
|
cond_copy= cond->copy_andor_structure(thd);
|
|
if (join_tab->select->cond)
|
|
error=(int) cond->add(join_tab->select->cond, thd->mem_root);
|
|
join_tab->select->cond= cond;
|
|
if (join_tab->select->pre_idx_push_select_cond)
|
|
{
|
|
Item *new_cond= and_conds(thd, cond_copy,
|
|
join_tab->select->pre_idx_push_select_cond);
|
|
if (new_cond->fix_fields_if_needed(thd, &new_cond))
|
|
error= 1;
|
|
join_tab->pre_idx_push_select_cond=
|
|
join_tab->select->pre_idx_push_select_cond= new_cond;
|
|
}
|
|
join_tab->set_select_cond(cond, __LINE__);
|
|
}
|
|
else if ((join_tab->select= make_select(join_tab->table, 0, 0, cond,
|
|
(SORT_INFO*) 0, 0, &error)))
|
|
join_tab->set_select_cond(cond, __LINE__);
|
|
|
|
DBUG_RETURN(error ? TRUE : FALSE);
|
|
}
|
|
|
|
|
|
/**
|
|
Free joins of subselect of this select.
|
|
|
|
@param thd THD pointer
|
|
@param select pointer to st_select_lex which subselects joins we will free
|
|
*/
|
|
|
|
void free_underlaid_joins(THD *thd, SELECT_LEX *select)
|
|
{
|
|
for (SELECT_LEX_UNIT *unit= select->first_inner_unit();
|
|
unit;
|
|
unit= unit->next_unit())
|
|
unit->cleanup();
|
|
}
|
|
|
|
/****************************************************************************
|
|
ROLLUP handling
|
|
****************************************************************************/
|
|
|
|
/**
|
|
Replace occurrences of group by fields in an expression by ref items.
|
|
|
|
The function replaces occurrences of group by fields in expr
|
|
by ref objects for these fields unless they are under aggregate
|
|
functions.
|
|
The function also corrects value of the the maybe_null attribute
|
|
for the items of all subexpressions containing group by fields.
|
|
|
|
@b EXAMPLES
|
|
@code
|
|
SELECT a+1 FROM t1 GROUP BY a WITH ROLLUP
|
|
SELECT SUM(a)+a FROM t1 GROUP BY a WITH ROLLUP
|
|
@endcode
|
|
|
|
@b IMPLEMENTATION
|
|
|
|
The function recursively traverses the tree of the expr expression,
|
|
looks for occurrences of the group by fields that are not under
|
|
aggregate functions and replaces them for the corresponding ref items.
|
|
|
|
@note
|
|
This substitution is needed GROUP BY queries with ROLLUP if
|
|
SELECT list contains expressions over group by attributes.
|
|
|
|
@param thd reference to the context
|
|
@param expr expression to make replacement
|
|
@param group_list list of references to group by items
|
|
@param changed out: returns 1 if item contains a replaced field item
|
|
|
|
@todo
|
|
- TODO: Some functions are not null-preserving. For those functions
|
|
updating of the maybe_null attribute is an overkill.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
static bool change_group_ref(THD *thd, Item_func *expr, ORDER *group_list,
|
|
bool *changed)
|
|
{
|
|
if (expr->argument_count())
|
|
{
|
|
Name_resolution_context *context= &thd->lex->current_select->context;
|
|
Item **arg,**arg_end;
|
|
bool arg_changed= FALSE;
|
|
for (arg= expr->arguments(),
|
|
arg_end= expr->arguments() + expr->argument_count();
|
|
arg != arg_end; arg++)
|
|
{
|
|
Item *item= *arg;
|
|
if (item->type() == Item::FIELD_ITEM || item->type() == Item::REF_ITEM)
|
|
{
|
|
ORDER *group_tmp;
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (item->eq(*group_tmp->item,0))
|
|
{
|
|
Item *new_item;
|
|
if (!(new_item= new (thd->mem_root) Item_ref(thd, context,
|
|
group_tmp->item,
|
|
null_clex_str,
|
|
item->name)))
|
|
return 1; // fatal_error is set
|
|
thd->change_item_tree(arg, new_item);
|
|
arg_changed= TRUE;
|
|
}
|
|
}
|
|
}
|
|
else if (item->type() == Item::FUNC_ITEM)
|
|
{
|
|
if (change_group_ref(thd, (Item_func *) item, group_list, &arg_changed))
|
|
return 1;
|
|
}
|
|
}
|
|
if (arg_changed)
|
|
{
|
|
expr->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP;
|
|
*changed= TRUE;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/** Allocate memory needed for other rollup functions. */
|
|
|
|
bool JOIN::rollup_init()
|
|
{
|
|
uint i,j;
|
|
Item **ref_array;
|
|
|
|
tmp_table_param.quick_group= 0; // Can't create groups in tmp table
|
|
/*
|
|
Each group can potentially be replaced with Item_func_rollup_const() which
|
|
needs a copy_func placeholder.
|
|
*/
|
|
tmp_table_param.func_count+= send_group_parts;
|
|
rollup.state= ROLLUP::STATE_INITED;
|
|
|
|
/*
|
|
Create pointers to the different sum function groups
|
|
These are updated by rollup_make_fields()
|
|
*/
|
|
tmp_table_param.group_parts= send_group_parts;
|
|
|
|
Item_null_result **null_items= thd->alloc<Item_null_result*>(send_group_parts);
|
|
|
|
rollup.null_items= Item_null_array(null_items, send_group_parts);
|
|
rollup.ref_pointer_arrays=
|
|
reinterpret_cast<Ref_ptr_array*>
|
|
(thd->alloc((sizeof(Ref_ptr_array) +
|
|
all_fields.elements * sizeof(Item*)) * send_group_parts));
|
|
rollup.fields= thd->alloc<List<Item> >(send_group_parts);
|
|
|
|
if (!null_items || !rollup.ref_pointer_arrays || !rollup.fields)
|
|
return true;
|
|
|
|
ref_array= (Item**) (rollup.ref_pointer_arrays+send_group_parts);
|
|
|
|
/*
|
|
Prepare space for field list for the different levels
|
|
These will be filled up in rollup_make_fields()
|
|
*/
|
|
for (i= 0 ; i < send_group_parts ; i++)
|
|
{
|
|
if (!(rollup.null_items[i]= new (thd->mem_root) Item_null_result(thd)))
|
|
return true;
|
|
|
|
List<Item> *rollup_fields= &rollup.fields[i];
|
|
rollup_fields->empty();
|
|
rollup.ref_pointer_arrays[i]= Ref_ptr_array(ref_array, all_fields.elements);
|
|
ref_array+= all_fields.elements;
|
|
}
|
|
for (i= 0 ; i < send_group_parts; i++)
|
|
{
|
|
for (j=0 ; j < fields_list.elements ; j++)
|
|
rollup.fields[i].push_back(rollup.null_items[i], thd->mem_root);
|
|
}
|
|
List_iterator<Item> it(all_fields);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
ORDER *group_tmp;
|
|
bool found_in_group= 0;
|
|
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
item->base_flags|= item_base_t::MAYBE_NULL | item_base_t::IN_ROLLUP;
|
|
found_in_group= 1;
|
|
break;
|
|
}
|
|
}
|
|
if (item->type() == Item::FUNC_ITEM && !found_in_group)
|
|
{
|
|
bool changed= FALSE;
|
|
if (change_group_ref(thd, (Item_func *) item, group_list, &changed))
|
|
return 1;
|
|
/*
|
|
We have to prevent creation of a field in a temporary table for
|
|
an expression that contains GROUP BY attributes.
|
|
Marking the expression item as 'with_sum_func' will ensure this.
|
|
*/
|
|
if (changed)
|
|
item->with_flags|= item_with_t::SUM_FUNC;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Wrap all constant Items in GROUP BY list.
|
|
|
|
For ROLLUP queries each constant item referenced in GROUP BY list
|
|
is wrapped up into an Item_func object yielding the same value
|
|
as the constant item. The objects of the wrapper class are never
|
|
considered as constant items and besides they inherit all
|
|
properties of the Item_result_field class.
|
|
This wrapping allows us to ensure writing constant items
|
|
into temporary tables whenever the result of the ROLLUP
|
|
operation has to be written into a temporary table, e.g. when
|
|
ROLLUP is used together with DISTINCT in the SELECT list.
|
|
Usually when creating temporary tables for a intermidiate
|
|
result we do not include fields for constant expressions.
|
|
|
|
@retval
|
|
0 if ok
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
bool JOIN::rollup_process_const_fields()
|
|
{
|
|
ORDER *group_tmp;
|
|
Item *item;
|
|
List_iterator<Item> it(all_fields);
|
|
|
|
for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next)
|
|
{
|
|
if (!(*group_tmp->item)->const_item())
|
|
continue;
|
|
while ((item= it++))
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
Item* new_item= new (thd->mem_root) Item_func_rollup_const(thd, item);
|
|
if (!new_item)
|
|
return 1;
|
|
new_item->fix_fields(thd, (Item **) 0);
|
|
thd->change_item_tree(it.ref(), new_item);
|
|
for (ORDER *tmp= group_tmp; tmp; tmp= tmp->next)
|
|
{
|
|
if (*tmp->item == item)
|
|
thd->change_item_tree(tmp->item, new_item);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
it.rewind();
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Fill up rollup structures with pointers to fields to use.
|
|
|
|
Creates copies of item_sum items for each sum level.
|
|
|
|
@param fields_arg List of all fields (hidden and real ones)
|
|
@param sel_fields Pointer to selected fields
|
|
@param func Store here a pointer to all fields
|
|
|
|
@retval
|
|
0 if ok;
|
|
In this case func is pointing to next not used element.
|
|
@retval
|
|
1 on error
|
|
*/
|
|
|
|
bool JOIN::rollup_make_fields(List<Item> &fields_arg, List<Item> &sel_fields,
|
|
Item_sum ***func)
|
|
{
|
|
List_iterator_fast<Item> it(fields_arg);
|
|
Item *first_field= sel_fields.head();
|
|
uint level;
|
|
|
|
/*
|
|
Create field lists for the different levels
|
|
|
|
The idea here is to have a separate field list for each rollup level to
|
|
avoid all runtime checks of which columns should be NULL.
|
|
|
|
The list is stored in reverse order to get sum function in such an order
|
|
in func that it makes it easy to reset them with init_sum_functions()
|
|
|
|
Assuming: SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
|
|
rollup.fields[0] will contain list where a,b,c is NULL
|
|
rollup.fields[1] will contain list where b,c is NULL
|
|
...
|
|
rollup.ref_pointer_array[#] points to fields for rollup.fields[#]
|
|
...
|
|
sum_funcs_end[0] points to all sum functions
|
|
sum_funcs_end[1] points to all sum functions, except grand totals
|
|
...
|
|
*/
|
|
|
|
for (level=0 ; level < send_group_parts ; level++)
|
|
{
|
|
uint i;
|
|
uint pos= send_group_parts - level -1;
|
|
bool real_fields= 0;
|
|
Item *item;
|
|
List_iterator<Item> new_it(rollup.fields[pos]);
|
|
Ref_ptr_array ref_array_start= rollup.ref_pointer_arrays[pos];
|
|
ORDER *start_group;
|
|
|
|
/* Point to first hidden field */
|
|
uint ref_array_ix= fields_arg.elements-1;
|
|
|
|
/* Remember where the sum functions ends for the previous level */
|
|
sum_funcs_end[pos+1]= *func;
|
|
|
|
/* Find the start of the group for this level */
|
|
for (i= 0, start_group= group_list ;
|
|
i++ < pos ;
|
|
start_group= start_group->next)
|
|
;
|
|
|
|
it.rewind();
|
|
while ((item= it++))
|
|
{
|
|
if (item == first_field)
|
|
{
|
|
real_fields= 1; // End of hidden fields
|
|
ref_array_ix= 0;
|
|
}
|
|
|
|
if (item->type() == Item::SUM_FUNC_ITEM && !item->const_item() &&
|
|
(!((Item_sum*) item)->depended_from() ||
|
|
((Item_sum *)item)->depended_from() == select_lex))
|
|
|
|
{
|
|
/*
|
|
This is a top level summary function that must be replaced with
|
|
a sum function that is reset for this level.
|
|
|
|
NOTE: This code creates an object which is not that nice in a
|
|
sub select. Fortunately it's not common to have rollup in
|
|
sub selects.
|
|
*/
|
|
item= item->copy_or_same(thd);
|
|
((Item_sum*) item)->make_unique();
|
|
*(*func)= (Item_sum*) item;
|
|
(*func)++;
|
|
}
|
|
else
|
|
{
|
|
/* Check if this is something that is part of this group by */
|
|
ORDER *group_tmp;
|
|
for (group_tmp= start_group, i= pos ;
|
|
group_tmp ; group_tmp= group_tmp->next, i++)
|
|
{
|
|
if (*group_tmp->item == item)
|
|
{
|
|
/*
|
|
This is an element that is used by the GROUP BY and should be
|
|
set to NULL in this level
|
|
*/
|
|
Item_null_result *null_item= new (thd->mem_root) Item_null_result(thd);
|
|
if (!null_item)
|
|
return 1;
|
|
// Value will be null sometimes
|
|
item->set_maybe_null();
|
|
null_item->result_field= item->get_tmp_table_field();
|
|
item= null_item;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
ref_array_start[ref_array_ix]= item;
|
|
if (real_fields)
|
|
{
|
|
(void) new_it++; // Point to next item
|
|
new_it.replace(item); // Replace previous
|
|
ref_array_ix++;
|
|
}
|
|
else
|
|
ref_array_ix--;
|
|
}
|
|
}
|
|
sum_funcs_end[0]= *func; // Point to last function
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Send all rollup levels higher than the current one to the client.
|
|
|
|
@b SAMPLE
|
|
@code
|
|
SELECT a, b, c SUM(b) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
@endcode
|
|
|
|
@param idx Level we are on:
|
|
- 0 = Total sum level
|
|
- 1 = First group changed (a)
|
|
- 2 = Second group changed (a,b)
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 If send_data_failed()
|
|
*/
|
|
|
|
int JOIN::rollup_send_data(uint idx)
|
|
{
|
|
uint i;
|
|
for (i= send_group_parts ; i-- > idx ; )
|
|
{
|
|
int res= 0;
|
|
/* Get reference pointers to sum functions in place */
|
|
copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
|
|
if ((!having || having->val_int()))
|
|
{
|
|
if (send_records < unit->lim.get_select_limit() && do_send_rows &&
|
|
(res= result->send_data_with_check(rollup.fields[i],
|
|
unit, send_records)) > 0)
|
|
return 1;
|
|
if (!res)
|
|
send_records++;
|
|
}
|
|
}
|
|
/* Restore ref_pointer_array */
|
|
set_items_ref_array(current_ref_ptrs);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
Write all rollup levels higher than the current one to a temp table.
|
|
|
|
@b SAMPLE
|
|
@code
|
|
SELECT a, b, SUM(c) FROM t1 GROUP BY a,b WITH ROLLUP
|
|
@endcode
|
|
|
|
@param idx Level we are on:
|
|
- 0 = Total sum level
|
|
- 1 = First group changed (a)
|
|
- 2 = Second group changed (a,b)
|
|
@param table reference to temp table
|
|
|
|
@retval
|
|
0 ok
|
|
@retval
|
|
1 if write_data_failed()
|
|
*/
|
|
|
|
int JOIN::rollup_write_data(uint idx, TMP_TABLE_PARAM *tmp_table_param_arg,
|
|
TABLE *table_arg)
|
|
{
|
|
uint i;
|
|
for (i= send_group_parts ; i-- > idx ; )
|
|
{
|
|
/* Get reference pointers to sum functions in place */
|
|
copy_ref_ptr_array(ref_ptrs, rollup.ref_pointer_arrays[i]);
|
|
if ((!having || having->val_int()))
|
|
{
|
|
int write_error;
|
|
Item *item;
|
|
List_iterator_fast<Item> it(rollup.fields[i]);
|
|
while ((item= it++))
|
|
{
|
|
if (item->type() == Item::NULL_ITEM && item->is_result_field())
|
|
item->save_in_result_field(1);
|
|
}
|
|
copy_sum_funcs(sum_funcs_end[i+1], sum_funcs_end[i]);
|
|
if (unlikely((write_error=
|
|
table_arg->file->ha_write_tmp_row(table_arg->record[0]))))
|
|
{
|
|
if (create_internal_tmp_table_from_heap(thd, table_arg,
|
|
tmp_table_param_arg->start_recinfo,
|
|
&tmp_table_param_arg->recinfo,
|
|
write_error, 0, NULL))
|
|
return 1;
|
|
}
|
|
}
|
|
}
|
|
/* Restore ref_pointer_array */
|
|
set_items_ref_array(current_ref_ptrs);
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
clear results if there are not rows found for group
|
|
(end_send_group/end_write_group)
|
|
*/
|
|
|
|
void inline JOIN::clear_sum_funcs()
|
|
{
|
|
if (sum_funcs)
|
|
{
|
|
Item_sum *func, **func_ptr= sum_funcs;
|
|
while ((func= *(func_ptr++)))
|
|
func->clear();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Prepare for returning 'empty row' when there is no matching row.
|
|
|
|
- Mark all tables with mark_as_null_row()
|
|
- Make a copy of of all simple SELECT items
|
|
- Reset all sum functions to NULL or 0.
|
|
*/
|
|
|
|
void JOIN::clear(table_map *cleared_tables)
|
|
{
|
|
clear_tables(this, cleared_tables);
|
|
copy_fields(&tmp_table_param);
|
|
clear_sum_funcs();
|
|
}
|
|
|
|
|
|
/**
|
|
Print an EXPLAIN line with all NULLs and given message in the 'Extra' column
|
|
|
|
@retval
|
|
0 ok
|
|
1 OOM error or error from send_data()
|
|
*/
|
|
|
|
int print_explain_message_line(select_result_sink *result,
|
|
uint8 options, bool is_analyze,
|
|
uint select_number,
|
|
const char *select_type,
|
|
ha_rows *rows,
|
|
const char *message)
|
|
{
|
|
/* Note: for SHOW EXPLAIN, this is caller thread's THD */
|
|
THD *thd= result->thd;
|
|
MEM_ROOT *mem_root= thd->mem_root;
|
|
Item *item_null= new (mem_root) Item_null(thd);
|
|
List<Item> item_list;
|
|
|
|
item_list.push_back(new (mem_root) Item_int(thd, (int32) select_number),
|
|
mem_root);
|
|
item_list.push_back(new (mem_root) Item_string_sys(thd, select_type),
|
|
mem_root);
|
|
/* `table` */
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `partitions` */
|
|
if (options & DESCRIBE_PARTITIONS)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* type, possible_keys, key, key_len, ref */
|
|
for (uint i=0 ; i < 5; i++)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `rows` */
|
|
StringBuffer<64> rows_str;
|
|
if (rows)
|
|
{
|
|
rows_str.append_ulonglong((ulonglong)(*rows));
|
|
item_list.push_back(new (mem_root)
|
|
Item_string_sys(thd, rows_str.ptr(),
|
|
rows_str.length()), mem_root);
|
|
}
|
|
else
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `r_rows` */
|
|
if (is_analyze)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `filtered` */
|
|
if (is_analyze || options & DESCRIBE_EXTENDED)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `r_filtered` */
|
|
if (is_analyze)
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
/* `Extra` */
|
|
if (message)
|
|
item_list.push_back(new (mem_root) Item_string_sys(thd, message),
|
|
mem_root);
|
|
else
|
|
item_list.push_back(item_null, mem_root);
|
|
|
|
if (unlikely(thd->is_error()) || unlikely(result->send_data(item_list)))
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Append MRR information from quick select to the given string
|
|
*/
|
|
|
|
void explain_append_mrr_info(QUICK_RANGE_SELECT *quick, String *res)
|
|
{
|
|
char mrr_str_buf[128];
|
|
mrr_str_buf[0]=0;
|
|
int len;
|
|
handler *h= quick->head->file;
|
|
len= h->multi_range_read_explain_info(quick->mrr_flags, mrr_str_buf,
|
|
sizeof(mrr_str_buf));
|
|
if (len > 0)
|
|
{
|
|
//res->append(STRING_WITH_LEN("; "));
|
|
res->append(mrr_str_buf, len);
|
|
}
|
|
}
|
|
|
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
int append_possible_keys(MEM_ROOT *alloc, String_list &list, TABLE *table,
|
|
key_map possible_keys)
|
|
{
|
|
uint j;
|
|
for (j=0 ; j < table->s->keys ; j++)
|
|
{
|
|
if (possible_keys.is_set(j))
|
|
if (!(list.append_str(alloc, table->key_info[j].name.str)))
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
bool JOIN_TAB::save_explain_data(Explain_table_access *eta,
|
|
table_map prefix_tables,
|
|
bool distinct_arg, JOIN_TAB *first_top_tab)
|
|
{
|
|
int quick_type= -1;
|
|
CHARSET_INFO *cs= system_charset_info;
|
|
THD *thd= join->thd;
|
|
TABLE_LIST *table_list= table->pos_in_table_list;
|
|
QUICK_SELECT_I *cur_quick= NULL;
|
|
my_bool key_read;
|
|
char table_name_buffer[SAFE_NAME_LEN];
|
|
KEY *key_info= 0;
|
|
uint key_len= 0, used_index= MAX_KEY;
|
|
|
|
#ifdef NOT_YET
|
|
/*
|
|
Would be good to keep this condition up to date.
|
|
Another alternative is to remove JOIN_TAB::cond_selectivity and use
|
|
TABLE::cond_selectivity everywhere
|
|
*/
|
|
DBUG_ASSERT(cond_selectivity == table->cond_selectivity);
|
|
#endif
|
|
|
|
explain_plan= eta;
|
|
eta->key.clear();
|
|
eta->quick_info= NULL;
|
|
eta->cost= join_read_time;
|
|
eta->loops= join_loops;
|
|
|
|
/*
|
|
We assume that if this table does pre-sorting, then it doesn't do filtering
|
|
with SQL_SELECT.
|
|
*/
|
|
DBUG_ASSERT(!(select && filesort));
|
|
const SQL_SELECT *tab_select= get_sql_select();
|
|
|
|
if (filesort)
|
|
{
|
|
if (!(eta->pre_join_sort=
|
|
new (thd->mem_root) Explain_aggr_filesort(thd->mem_root,
|
|
thd->lex->analyze_stmt,
|
|
filesort)))
|
|
return 1;
|
|
}
|
|
// psergey-todo: data for filtering!
|
|
tracker= &eta->tracker;
|
|
jbuf_tracker= &eta->jbuf_tracker;
|
|
jbuf_loops_tracker= &eta->jbuf_loops_tracker;
|
|
jbuf_unpack_tracker= &eta->jbuf_unpack_tracker;
|
|
|
|
/* Enable the table access time tracker only for "ANALYZE stmt" */
|
|
if (unlikely(thd->lex->analyze_stmt ||
|
|
thd->variables.log_slow_verbosity &
|
|
LOG_SLOW_VERBOSITY_ENGINE))
|
|
{
|
|
table->file->set_time_tracker(&eta->op_tracker);
|
|
|
|
/*
|
|
Set handler_for_stats even if we are not running an ANALYZE command.
|
|
There's no harm, and in case somebody runs a SHOW ANALYZE command we'll
|
|
be able to print the engine statistics.
|
|
*/
|
|
if (table->file->handler_stats &&
|
|
table->s->tmp_table != INTERNAL_TMP_TABLE)
|
|
eta->handler_for_stats= table->file;
|
|
|
|
if (likely(thd->lex->analyze_stmt))
|
|
{
|
|
eta->op_tracker.set_gap_tracker(&eta->extra_time_tracker);
|
|
eta->jbuf_unpack_tracker.set_gap_tracker(&eta->jbuf_extra_time_tracker);
|
|
}
|
|
}
|
|
/* No need to save id and select_type here, they are kept in Explain_select */
|
|
|
|
/* table */
|
|
if (table->derived_select_number)
|
|
{
|
|
/* Derived table name generation */
|
|
size_t len= my_snprintf(table_name_buffer, sizeof(table_name_buffer)-1,
|
|
"<derived%u>",
|
|
table->derived_select_number);
|
|
eta->table_name.copy(table_name_buffer, len, cs);
|
|
}
|
|
else if (bush_children)
|
|
{
|
|
JOIN_TAB *ctab= bush_children->start;
|
|
/* table */
|
|
size_t len= my_snprintf(table_name_buffer,
|
|
sizeof(table_name_buffer)-1,
|
|
"<subquery%d>",
|
|
ctab->emb_sj_nest->sj_subq_pred->get_identifier());
|
|
eta->table_name.copy(table_name_buffer, len, cs);
|
|
}
|
|
else
|
|
{
|
|
TABLE_LIST *real_table= table->pos_in_table_list;
|
|
/*
|
|
When multi-table UPDATE/DELETE does updates/deletes to a VIEW, the view
|
|
is merged in a certain particular way (grep for DT_MERGE_FOR_INSERT).
|
|
|
|
As a result, view's underlying tables have $tbl->pos_in_table_list={view}.
|
|
We don't want to print view name in EXPLAIN, we want underlying table's
|
|
alias (like specified in the view definition).
|
|
*/
|
|
if (real_table->merged_for_insert)
|
|
{
|
|
TABLE_LIST *view_child=
|
|
real_table->view->first_select_lex()->table_list.first;
|
|
for (;view_child; view_child= view_child->next_local)
|
|
{
|
|
if (view_child->table == table)
|
|
{
|
|
real_table= view_child;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
eta->table_name.copy(real_table->alias.str, real_table->alias.length, cs);
|
|
}
|
|
|
|
/* "partitions" column */
|
|
{
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
partition_info *part_info;
|
|
if (!table->derived_select_number &&
|
|
(part_info= table->part_info))
|
|
{ //TODO: all thd->mem_root here should be fixed
|
|
make_used_partitions_str(thd->mem_root, part_info, &eta->used_partitions,
|
|
eta->used_partitions_list);
|
|
eta->used_partitions_set= true;
|
|
}
|
|
else
|
|
eta->used_partitions_set= false;
|
|
#else
|
|
/* just produce empty column if partitioning is not compiled in */
|
|
eta->used_partitions_set= false;
|
|
#endif
|
|
}
|
|
|
|
/* "type" column */
|
|
enum join_type tab_type= type;
|
|
if ((type == JT_ALL || type == JT_RANGE || type == JT_HASH) &&
|
|
tab_select && tab_select->quick && use_quick != 2)
|
|
{
|
|
cur_quick= tab_select->quick;
|
|
quick_type= cur_quick->get_type();
|
|
if ((quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT) ||
|
|
(quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION))
|
|
tab_type= type == JT_HASH ? JT_HASH_INDEX_MERGE : JT_INDEX_MERGE;
|
|
else
|
|
tab_type= type == JT_HASH ? JT_HASH_RANGE : JT_RANGE;
|
|
}
|
|
eta->type= tab_type;
|
|
|
|
/* Build "possible_keys" value */
|
|
// psergey-todo: why does this use thd MEM_ROOT??? Doesn't this
|
|
// break ANALYZE ? thd->mem_root will be freed, and after that we will
|
|
// attempt to print the query plan?
|
|
if (append_possible_keys(thd->mem_root, eta->possible_keys, table, keys))
|
|
return 1;
|
|
// psergey-todo: ^ check for error return code
|
|
|
|
/* Build "key", "key_len", and "ref" */
|
|
|
|
if (rowid_filter)
|
|
{
|
|
Range_rowid_filter *range_filter= (Range_rowid_filter *) rowid_filter;
|
|
QUICK_SELECT_I *quick= range_filter->get_select()->quick;
|
|
|
|
Explain_rowid_filter *erf= new (thd->mem_root) Explain_rowid_filter;
|
|
erf->quick= quick->get_explain(thd->mem_root);
|
|
erf->selectivity= range_rowid_filter_info->selectivity;
|
|
erf->rows= quick->records;
|
|
if (!(erf->tracker= new Rowid_filter_tracker(thd->lex->analyze_stmt)))
|
|
return 1;
|
|
rowid_filter->set_tracker(erf->tracker);
|
|
eta->rowid_filter= erf;
|
|
}
|
|
|
|
if (tab_type == JT_NEXT)
|
|
{
|
|
used_index= index;
|
|
key_info= table->key_info+index;
|
|
key_len= key_info->key_length;
|
|
}
|
|
else if (ref.key_parts)
|
|
{
|
|
used_index= ref.key;
|
|
key_info= get_keyinfo_by_key_no(ref.key);
|
|
key_len= ref.key_length;
|
|
}
|
|
|
|
/*
|
|
In STRAIGHT_JOIN queries, there can be join tabs with JT_CONST type
|
|
that still have quick selects.
|
|
*/
|
|
if (tab_select && tab_select->quick && tab_type != JT_CONST)
|
|
{
|
|
if (!(eta->quick_info= tab_select->quick->get_explain(thd->mem_root)))
|
|
return 1;
|
|
}
|
|
|
|
if (key_info) /* 'index' or 'ref' access */
|
|
{
|
|
eta->key.set(thd->mem_root, key_info, key_len);
|
|
|
|
if (ref.key_parts && tab_type != JT_FT)
|
|
{
|
|
store_key **key_ref= ref.key_copy;
|
|
for (uint kp= 0; kp < ref.key_parts; kp++)
|
|
{
|
|
if ((key_part_map(1) << kp) & ref.const_ref_part_map)
|
|
{
|
|
if (!(eta->ref_list.append_str(thd->mem_root, "const")))
|
|
return 1;
|
|
/*
|
|
create_ref_for_key() handles keypart=const equalities as follows:
|
|
- non-EXPLAIN execution will copy the "const" to lookup tuple
|
|
immediately and will not add an element to ref.key_copy
|
|
- EXPLAIN will put an element into ref.key_copy. Since we've
|
|
just printed "const" for it, we should skip it here
|
|
*/
|
|
if (thd->lex->describe)
|
|
key_ref++;
|
|
}
|
|
else
|
|
{
|
|
if (!(eta->ref_list.append_str(thd->mem_root, (*key_ref)->name())))
|
|
return 1;
|
|
key_ref++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (tab_type == JT_HASH_NEXT) /* full index scan + hash join */
|
|
{
|
|
used_index= index;
|
|
eta->hash_next_key.set(thd->mem_root,
|
|
& table->key_info[index],
|
|
table->key_info[index].key_length);
|
|
// psergey-todo: ^ is the above correct? are we necessarily joining on all
|
|
// columns?
|
|
}
|
|
|
|
if (!key_info)
|
|
{
|
|
if (table_list && /* SJM bushes don't have table_list */
|
|
table_list->schema_table &&
|
|
table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
|
|
{
|
|
IS_table_read_plan *is_table_read_plan= table_list->is_table_read_plan;
|
|
StringBuffer<64> key_name_buf;
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_db_lookup_value())
|
|
{
|
|
/* The "key" has the name of the column referring to the database */
|
|
int f_idx= table_list->schema_table->idx_field1;
|
|
LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name();
|
|
key_name_buf.append(tmp, cs);
|
|
}
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_table_lookup_value())
|
|
{
|
|
if (is_table_read_plan->trivial_show_command ||
|
|
is_table_read_plan->has_db_lookup_value())
|
|
key_name_buf.append(',');
|
|
|
|
int f_idx= table_list->schema_table->idx_field2;
|
|
LEX_CSTRING tmp= table_list->schema_table->fields_info[f_idx].name();
|
|
key_name_buf.append(tmp, cs);
|
|
}
|
|
|
|
if (key_name_buf.length())
|
|
eta->key.set_pseudo_key(thd->mem_root, key_name_buf.c_ptr_safe());
|
|
}
|
|
}
|
|
|
|
/* "rows" */
|
|
if (table_list /* SJM bushes don't have table_list */ &&
|
|
table_list->schema_table)
|
|
{
|
|
/* I_S tables have rows=extra=NULL */
|
|
eta->rows_set= false;
|
|
eta->filtered_set= false;
|
|
}
|
|
else
|
|
{
|
|
double examined_rows= get_examined_rows();
|
|
|
|
eta->rows_set= true;
|
|
eta->rows= double_to_rows(examined_rows);
|
|
|
|
/* "filtered" */
|
|
float f= 0.0;
|
|
if (examined_rows)
|
|
{
|
|
f= (float) (100.0 * records_out / examined_rows);
|
|
set_if_smaller(f, 100.0);
|
|
}
|
|
eta->filtered_set= true;
|
|
eta->filtered= f;
|
|
}
|
|
|
|
/* Build "Extra" field and save it */
|
|
key_read= table->file->keyread_enabled();
|
|
if ((tab_type == JT_NEXT || tab_type == JT_CONST) && used_index != MAX_KEY &&
|
|
table->covering_keys.is_set(used_index))
|
|
key_read=1;
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT &&
|
|
!((QUICK_ROR_INTERSECT_SELECT*)cur_quick)->need_to_fetch_row)
|
|
key_read=1;
|
|
|
|
if (table_list->table_function)
|
|
eta->push_extra(ET_TABLE_FUNCTION);
|
|
|
|
if (info)
|
|
{
|
|
eta->push_extra(info);
|
|
}
|
|
else if (packed_info & TAB_INFO_HAVE_VALUE)
|
|
{
|
|
if (packed_info & TAB_INFO_USING_INDEX)
|
|
eta->push_extra(ET_USING_INDEX);
|
|
if (packed_info & TAB_INFO_USING_WHERE)
|
|
eta->push_extra(ET_USING_WHERE);
|
|
if (packed_info & TAB_INFO_FULL_SCAN_ON_NULL)
|
|
eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
|
|
}
|
|
else
|
|
{
|
|
uint keyno= MAX_KEY;
|
|
if (ref.key_parts)
|
|
keyno= ref.key;
|
|
else if (tab_select && cur_quick)
|
|
keyno = cur_quick->index;
|
|
|
|
if (keyno != MAX_KEY && keyno == table->file->pushed_idx_cond_keyno &&
|
|
table->file->pushed_idx_cond)
|
|
{
|
|
eta->push_extra(ET_USING_INDEX_CONDITION);
|
|
eta->pushed_index_cond= table->file->pushed_idx_cond;
|
|
}
|
|
else if (cache_idx_cond)
|
|
{
|
|
eta->push_extra(ET_USING_INDEX_CONDITION_BKA);
|
|
eta->pushed_index_cond= cache_idx_cond;
|
|
}
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_ROR_UNION ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_ROR_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_INTERSECT ||
|
|
quick_type == QUICK_SELECT_I::QS_TYPE_INDEX_MERGE)
|
|
{
|
|
eta->push_extra(ET_USING);
|
|
}
|
|
if (tab_select)
|
|
{
|
|
if (use_quick == 2)
|
|
{
|
|
eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD);
|
|
eta->range_checked_fer= new (thd->mem_root) Explain_range_checked_fer;
|
|
if (eta->range_checked_fer)
|
|
eta->range_checked_fer->
|
|
append_possible_keys_stat(thd->mem_root, table, keys);
|
|
}
|
|
else if (tab_select->cond ||
|
|
(cache_select && cache_select->cond))
|
|
{
|
|
const COND *pushed_cond= table->file->pushed_cond;
|
|
|
|
if ((table->file->ha_table_flags() &
|
|
HA_CAN_TABLE_CONDITION_PUSHDOWN) &&
|
|
pushed_cond)
|
|
{
|
|
eta->push_extra(ET_USING_WHERE_WITH_PUSHED_CONDITION);
|
|
}
|
|
else
|
|
{
|
|
eta->where_cond= tab_select->cond;
|
|
eta->cache_cond= cache_select? cache_select->cond : NULL;
|
|
eta->push_extra(ET_USING_WHERE);
|
|
}
|
|
}
|
|
}
|
|
if (table_list /* SJM bushes don't have table_list */ &&
|
|
table_list->schema_table &&
|
|
table_list->schema_table->i_s_requested_object & OPTIMIZE_I_S_TABLE)
|
|
{
|
|
if (!table_list->table_open_method)
|
|
eta->push_extra(ET_SKIP_OPEN_TABLE);
|
|
else if (table_list->table_open_method == OPEN_FRM_ONLY)
|
|
eta->push_extra(ET_OPEN_FRM_ONLY);
|
|
else
|
|
eta->push_extra(ET_OPEN_FULL_TABLE);
|
|
/* psergey-note: the following has a bug.*/
|
|
if (table_list->is_table_read_plan->trivial_show_command ||
|
|
(table_list->is_table_read_plan->has_db_lookup_value() &&
|
|
table_list->is_table_read_plan->has_table_lookup_value()))
|
|
eta->push_extra(ET_SCANNED_0_DATABASES);
|
|
else if (table_list->is_table_read_plan->has_db_lookup_value() ||
|
|
table_list->is_table_read_plan->has_table_lookup_value())
|
|
eta->push_extra(ET_SCANNED_1_DATABASE);
|
|
else
|
|
eta->push_extra(ET_SCANNED_ALL_DATABASES);
|
|
}
|
|
if (key_read)
|
|
{
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_GROUP_MIN_MAX)
|
|
{
|
|
QUICK_GROUP_MIN_MAX_SELECT *qgs=
|
|
(QUICK_GROUP_MIN_MAX_SELECT *) tab_select->quick;
|
|
eta->push_extra(ET_USING_INDEX_FOR_GROUP_BY);
|
|
eta->loose_scan_is_scanning= qgs->loose_scan_is_scanning();
|
|
}
|
|
else
|
|
eta->push_extra(ET_USING_INDEX);
|
|
}
|
|
if (table->reginfo.not_exists_optimize)
|
|
eta->push_extra(ET_NOT_EXISTS);
|
|
|
|
if (quick_type == QUICK_SELECT_I::QS_TYPE_RANGE)
|
|
{
|
|
explain_append_mrr_info((QUICK_RANGE_SELECT*)(tab_select->quick),
|
|
&eta->mrr_type);
|
|
if (eta->mrr_type.length() > 0)
|
|
eta->push_extra(ET_USING_MRR);
|
|
}
|
|
|
|
if (shortcut_for_distinct)
|
|
eta->push_extra(ET_DISTINCT);
|
|
|
|
if (loosescan_match_tab)
|
|
{
|
|
eta->push_extra(ET_LOOSESCAN);
|
|
}
|
|
|
|
if (first_weedout_table)
|
|
{
|
|
eta->start_dups_weedout= true;
|
|
eta->push_extra(ET_START_TEMPORARY);
|
|
}
|
|
if (check_weed_out_table)
|
|
{
|
|
eta->push_extra(ET_END_TEMPORARY);
|
|
eta->end_dups_weedout= true;
|
|
}
|
|
|
|
else if (do_firstmatch)
|
|
{
|
|
if (do_firstmatch == /*join->join_tab*/ first_top_tab - 1)
|
|
eta->push_extra(ET_FIRST_MATCH);
|
|
else
|
|
{
|
|
eta->push_extra(ET_FIRST_MATCH);
|
|
TABLE *prev_table=do_firstmatch->table;
|
|
if (prev_table->derived_select_number)
|
|
{
|
|
char namebuf[NAME_LEN];
|
|
/* Derived table name generation */
|
|
size_t len= my_snprintf(namebuf, sizeof(namebuf)-1,
|
|
"<derived%u>",
|
|
prev_table->derived_select_number);
|
|
eta->firstmatch_table_name.append(namebuf, len);
|
|
}
|
|
else
|
|
eta->firstmatch_table_name.append(&prev_table->pos_in_table_list->alias);
|
|
}
|
|
}
|
|
|
|
for (uint part= 0; part < ref.key_parts; part++)
|
|
{
|
|
if (ref.cond_guards[part])
|
|
{
|
|
eta->push_extra(ET_FULL_SCAN_ON_NULL_KEY);
|
|
eta->full_scan_on_null_key= true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (cache)
|
|
{
|
|
eta->push_extra(ET_USING_JOIN_BUFFER);
|
|
if (cache->save_explain_data(&eta->bka_type))
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
In case this is a derived table, here we remember the number of
|
|
subselect that used to produce it.
|
|
*/
|
|
if (!(table_list && table_list->is_with_table_recursive_reference()))
|
|
eta->derived_select_number= table->derived_select_number;
|
|
|
|
/* The same for non-merged semi-joins */
|
|
eta->non_merged_sjm_number = get_non_merged_semijoin_select();
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
Walk through join->aggr_tables and save aggregation/grouping query plan into
|
|
an Explain_select object
|
|
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
bool save_agg_explain_data(JOIN *join, Explain_select *xpl_sel)
|
|
{
|
|
JOIN_TAB *join_tab=join->join_tab + join->exec_join_tab_cnt();
|
|
Explain_aggr_node *prev_node;
|
|
Explain_aggr_node *node= xpl_sel->aggr_tree;
|
|
bool is_analyze= join->thd->lex->analyze_stmt;
|
|
THD *thd= join->thd;
|
|
|
|
for (uint i= 0; i < join->aggr_tables; i++, join_tab++)
|
|
{
|
|
// Each aggregate means a temp.table
|
|
prev_node= node;
|
|
if (!(node= new (thd->mem_root) Explain_aggr_tmp_table))
|
|
return 1;
|
|
node->child= prev_node;
|
|
|
|
if (join_tab->window_funcs_step)
|
|
{
|
|
Explain_aggr_node *new_node=
|
|
join_tab->window_funcs_step->save_explain_plan(thd->mem_root,
|
|
is_analyze);
|
|
if (!new_node)
|
|
return 1;
|
|
|
|
prev_node=node;
|
|
node= new_node;
|
|
node->child= prev_node;
|
|
}
|
|
|
|
/* The below matches execution in join_init_read_record() */
|
|
if (join_tab->distinct)
|
|
{
|
|
prev_node= node;
|
|
if (!(node= new (thd->mem_root) Explain_aggr_remove_dups))
|
|
return 1;
|
|
node->child= prev_node;
|
|
}
|
|
|
|
if (join_tab->filesort)
|
|
{
|
|
Explain_aggr_filesort *eaf =
|
|
new (thd->mem_root) Explain_aggr_filesort(thd->mem_root, is_analyze, join_tab->filesort);
|
|
if (!eaf)
|
|
return 1;
|
|
prev_node= node;
|
|
node= eaf;
|
|
node->child= prev_node;
|
|
}
|
|
}
|
|
xpl_sel->aggr_tree= node;
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Save Query Plan Footprint
|
|
|
|
@note
|
|
Currently, this function may be called multiple times
|
|
|
|
@retval
|
|
0 ok
|
|
1 error
|
|
*/
|
|
|
|
int JOIN::save_explain_data_intern(Explain_query *output,
|
|
bool need_tmp_table_arg,
|
|
bool need_order_arg, bool distinct_arg,
|
|
const char *message)
|
|
{
|
|
JOIN *join= this; /* Legacy: this code used to be a non-member function */
|
|
DBUG_ENTER("JOIN::save_explain_data_intern");
|
|
DBUG_PRINT("info", ("Select %p (%u), type %s, message %s",
|
|
join->select_lex, join->select_lex->select_number,
|
|
join->select_lex->type,
|
|
message ? message : "NULL"));
|
|
DBUG_ASSERT(have_query_plan == QEP_AVAILABLE);
|
|
/* fake_select_lex is created/printed by Explain_union */
|
|
DBUG_ASSERT(join->select_lex != join->unit->fake_select_lex);
|
|
|
|
/* There should be no attempts to save query plans for merged selects */
|
|
DBUG_ASSERT(!join->select_lex->master_unit()->derived ||
|
|
join->select_lex->master_unit()->derived->is_materialized_derived() ||
|
|
join->select_lex->master_unit()->derived->is_with_table());
|
|
|
|
/* Don't log this into the slow query log */
|
|
|
|
if (message)
|
|
{
|
|
if (!(explain= new (output->mem_root)
|
|
Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
#ifndef DBUG_OFF
|
|
explain->select_lex= select_lex;
|
|
#endif
|
|
join->select_lex->set_explain_type(true);
|
|
|
|
explain->select_id= join->select_lex->select_number;
|
|
explain->select_type= join->select_lex->type;
|
|
explain->linkage= select_lex->get_linkage();
|
|
explain->using_temporary= need_tmp;
|
|
explain->using_filesort= need_order_arg;
|
|
/* Setting explain->message means that all other members are invalid */
|
|
explain->message= message;
|
|
|
|
if (select_lex->master_unit()->derived)
|
|
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
if (save_agg_explain_data(this, explain))
|
|
DBUG_RETURN(1);
|
|
|
|
output->add_node(explain);
|
|
}
|
|
else if (pushdown_query)
|
|
{
|
|
if (!(explain= new (output->mem_root)
|
|
Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt)))
|
|
DBUG_RETURN(1);
|
|
select_lex->set_explain_type(true);
|
|
|
|
explain->select_id= select_lex->select_number;
|
|
explain->select_type= select_lex->type;
|
|
explain->linkage= select_lex->get_linkage();
|
|
explain->using_temporary= need_tmp;
|
|
explain->using_filesort= need_order_arg;
|
|
explain->message= "Storage engine handles GROUP BY";
|
|
|
|
if (select_lex->master_unit()->derived)
|
|
explain->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
output->add_node(explain);
|
|
}
|
|
else
|
|
{
|
|
Explain_select *xpl_sel;
|
|
explain= xpl_sel=
|
|
new (output->mem_root) Explain_select(output->mem_root,
|
|
thd->lex->analyze_stmt);
|
|
if (!explain)
|
|
DBUG_RETURN(1);
|
|
|
|
table_map used_tables=0;
|
|
|
|
join->select_lex->set_explain_type(true);
|
|
xpl_sel->cost= best_read;
|
|
xpl_sel->select_id= join->select_lex->select_number;
|
|
xpl_sel->select_type= join->select_lex->type;
|
|
xpl_sel->linkage= select_lex->get_linkage();
|
|
xpl_sel->is_lateral= ((select_lex->get_linkage() == DERIVED_TABLE_TYPE) &&
|
|
(select_lex->uncacheable & UNCACHEABLE_DEPENDENT));
|
|
if (select_lex->master_unit()->derived)
|
|
xpl_sel->connection_type= Explain_node::EXPLAIN_NODE_DERIVED;
|
|
|
|
if (save_agg_explain_data(this, xpl_sel))
|
|
DBUG_RETURN(1);
|
|
|
|
xpl_sel->exec_const_cond= exec_const_cond;
|
|
xpl_sel->outer_ref_cond= outer_ref_cond;
|
|
xpl_sel->pseudo_bits_cond= pseudo_bits_cond;
|
|
if (tmp_having)
|
|
xpl_sel->having= tmp_having;
|
|
else
|
|
xpl_sel->having= having;
|
|
xpl_sel->having_value= having_value;
|
|
|
|
JOIN_TAB* const first_top_tab= join->first_breadth_first_tab();
|
|
JOIN_TAB* prev_bush_root_tab= NULL;
|
|
|
|
Explain_basic_join *cur_parent= xpl_sel;
|
|
|
|
for (JOIN_TAB *tab= first_explain_order_tab(join); tab;
|
|
tab= next_explain_order_tab(join, tab))
|
|
{
|
|
JOIN_TAB *saved_join_tab= NULL;
|
|
TABLE *cur_table= tab->table;
|
|
|
|
/* Don't show eliminated tables */
|
|
if (cur_table->map & join->eliminated_tables)
|
|
{
|
|
used_tables|= cur_table->map;
|
|
continue;
|
|
}
|
|
|
|
Explain_table_access *eta= (new (output->mem_root)
|
|
Explain_table_access(output->mem_root,
|
|
thd->lex->analyze_stmt));
|
|
|
|
if (!eta)
|
|
DBUG_RETURN(1);
|
|
if (tab->bush_root_tab != prev_bush_root_tab)
|
|
{
|
|
if (tab->bush_root_tab)
|
|
{
|
|
/*
|
|
We've entered an SJ-Materialization nest. Create an object for it.
|
|
*/
|
|
if (!(cur_parent=
|
|
new (output->mem_root) Explain_basic_join(output->mem_root)))
|
|
DBUG_RETURN(1);
|
|
|
|
JOIN_TAB *first_child= tab->bush_root_tab->bush_children->start;
|
|
cur_parent->select_id=
|
|
first_child->emb_sj_nest->sj_subq_pred->get_identifier();
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
We've just left an SJ-Materialization nest. We are at the join tab
|
|
that 'embeds the nest'
|
|
*/
|
|
DBUG_ASSERT(tab->bush_children);
|
|
eta->sjm_nest= cur_parent;
|
|
cur_parent= xpl_sel;
|
|
}
|
|
}
|
|
prev_bush_root_tab= tab->bush_root_tab;
|
|
|
|
cur_parent->add_table(eta, output);
|
|
if (tab->save_explain_data(eta, used_tables, distinct_arg, first_top_tab))
|
|
DBUG_RETURN(1);
|
|
|
|
if (saved_join_tab)
|
|
tab= saved_join_tab;
|
|
|
|
// For next iteration
|
|
used_tables|= cur_table->map;
|
|
}
|
|
output->add_node(xpl_sel);
|
|
}
|
|
|
|
/*
|
|
Don't try to add query plans for child selects if this select was pushed
|
|
down into a Smart Storage Engine:
|
|
- the entire statement was pushed down ("PUSHED SELECT"), or
|
|
- this derived table was pushed down ("PUSHED DERIVED")
|
|
*/
|
|
if (!select_lex->pushdown_select && select_lex->type != pushed_derived_text)
|
|
for (SELECT_LEX_UNIT *tmp_unit= join->select_lex->first_inner_unit();
|
|
tmp_unit;
|
|
tmp_unit= tmp_unit->next_unit())
|
|
if (tmp_unit->explainable())
|
|
explain->add_child(tmp_unit->first_select()->select_number);
|
|
|
|
if (select_lex->is_top_level_node())
|
|
output->query_plan_ready();
|
|
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
|
|
/*
|
|
This function serves as "shortcut point" for EXPLAIN queries.
|
|
|
|
The EXPLAIN statement executes just like its SELECT counterpart would
|
|
execute, except that JOIN::exec() will call select_describe() instead of
|
|
actually executing the query.
|
|
|
|
Inside select_describe():
|
|
- Query plan is updated with latest QEP choices made at the start of
|
|
JOIN::exec().
|
|
- the proces of "almost execution" is invoked for the children subqueries.
|
|
|
|
Overall, select_describe() is a legacy of old EXPLAIN implementation and
|
|
should be removed.
|
|
*/
|
|
|
|
static void select_describe(JOIN *join, bool need_tmp_table, bool need_order,
|
|
bool distinct,const char *message)
|
|
{
|
|
THD *thd=join->thd;
|
|
DBUG_ENTER("select_describe");
|
|
|
|
if (join->select_lex->pushdown_select)
|
|
{
|
|
/*
|
|
The whole statement was pushed down to a Smart Storage Engine. Do not
|
|
attempt to produce a query plan locally.
|
|
*/
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/* Update the QPF with latest values of using_temporary, using_filesort */
|
|
for (SELECT_LEX_UNIT *unit= join->select_lex->first_inner_unit();
|
|
unit;
|
|
unit= unit->next_unit())
|
|
{
|
|
/*
|
|
This fix_fields() call is to handle an edge case like this:
|
|
|
|
SELECT ... UNION SELECT ... ORDER BY (SELECT ...)
|
|
|
|
for such queries, we'll get here before having called
|
|
subquery_expr->fix_fields(), which will cause failure to
|
|
*/
|
|
if (unit->item && !unit->item->fixed())
|
|
{
|
|
Item *ref= unit->item;
|
|
if (unit->item->fix_fields(thd, &ref))
|
|
DBUG_VOID_RETURN;
|
|
DBUG_ASSERT(ref == unit->item);
|
|
}
|
|
|
|
if (unit->explainable())
|
|
{
|
|
if (mysql_explain_union(thd, unit, unit->result))
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
bool mysql_explain_union(THD *thd, SELECT_LEX_UNIT *unit, select_result *result)
|
|
{
|
|
DBUG_ENTER("mysql_explain_union");
|
|
bool res= 0;
|
|
SELECT_LEX *first= unit->first_select();
|
|
|
|
for (SELECT_LEX *sl= first; sl; sl= sl->next_select())
|
|
{
|
|
sl->set_explain_type(FALSE);
|
|
sl->options|= SELECT_DESCRIBE;
|
|
}
|
|
|
|
if (unit->is_unit_op() || unit->fake_select_lex)
|
|
{
|
|
ulonglong save_options= 0;
|
|
|
|
if (unit->union_needs_tmp_table() && unit->fake_select_lex)
|
|
{
|
|
save_options= unit->fake_select_lex->options;
|
|
unit->fake_select_lex->select_number= FAKE_SELECT_LEX_ID; // just for initialization
|
|
unit->fake_select_lex->type= unit_operation_text[unit->common_op()];
|
|
unit->fake_select_lex->options|= SELECT_DESCRIBE;
|
|
}
|
|
if (!(res= unit->prepare(unit->derived, result,
|
|
SELECT_NO_UNLOCK | SELECT_DESCRIBE)))
|
|
{
|
|
bool is_pushed_union=
|
|
(unit->derived && unit->derived->pushdown_derived) ||
|
|
unit->pushdown_unit;
|
|
if (unit->pushdown_unit)
|
|
{
|
|
create_explain_query_if_not_exists(thd->lex, thd->mem_root);
|
|
if (!unit->executed)
|
|
unit->save_union_explain(thd->lex->explain);
|
|
List<Item> items;
|
|
result->prepare(items, unit);
|
|
}
|
|
if (!is_pushed_union)
|
|
res= unit->exec();
|
|
}
|
|
|
|
if (unit->union_needs_tmp_table() && unit->fake_select_lex)
|
|
unit->fake_select_lex->options= save_options;
|
|
}
|
|
else
|
|
{
|
|
thd->lex->current_select= first;
|
|
unit->set_limit(unit->global_parameters());
|
|
res= mysql_select(thd, first->table_list.first, first->item_list,
|
|
first->where,
|
|
first->order_list.elements + first->group_list.elements,
|
|
first->order_list.first, first->group_list.first,
|
|
first->having, thd->lex->proc_list.first,
|
|
first->options | thd->variables.option_bits | SELECT_DESCRIBE,
|
|
result, unit, first);
|
|
}
|
|
|
|
DBUG_RETURN(res || thd->is_error());
|
|
}
|
|
|
|
|
|
static void print_table_array(THD *thd,
|
|
table_map eliminated_tables,
|
|
String *str, TABLE_LIST **table,
|
|
TABLE_LIST **end,
|
|
enum_query_type query_type)
|
|
{
|
|
(*table)->print(thd, eliminated_tables, str, query_type);
|
|
|
|
for (TABLE_LIST **tbl= table + 1; tbl < end; tbl++)
|
|
{
|
|
TABLE_LIST *curr= *tbl;
|
|
|
|
/*
|
|
The "eliminated_tables &&" check guards againist the case of
|
|
printing the query for CREATE VIEW. We do that without having run
|
|
JOIN::optimize() and so will have nested_join->used_tables==0.
|
|
*/
|
|
if (eliminated_tables &&
|
|
((curr->table && (curr->table->map & eliminated_tables)) ||
|
|
(curr->nested_join && !(curr->nested_join->used_tables &
|
|
~eliminated_tables))))
|
|
{
|
|
/* as of 5.5, print_join doesnt put eliminated elements into array */
|
|
DBUG_ASSERT(0);
|
|
continue;
|
|
}
|
|
|
|
/* JOIN_TYPE_OUTER is just a marker unrelated to real join */
|
|
if (curr->outer_join & (JOIN_TYPE_LEFT|JOIN_TYPE_RIGHT))
|
|
{
|
|
/* MySQL converts right to left joins */
|
|
str->append(STRING_WITH_LEN(" left join "));
|
|
}
|
|
else if (curr->straight)
|
|
str->append(STRING_WITH_LEN(" straight_join "));
|
|
else if (curr->sj_inner_tables)
|
|
str->append(STRING_WITH_LEN(" semi join "));
|
|
else
|
|
str->append(STRING_WITH_LEN(" join "));
|
|
|
|
curr->print(thd, eliminated_tables, str, query_type);
|
|
if (curr->on_expr)
|
|
{
|
|
str->append(STRING_WITH_LEN(" on("));
|
|
curr->on_expr->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Check if the passed table is
|
|
- a base table which was eliminated, or
|
|
- a join nest which only contained eliminated tables (and so was eliminated,
|
|
too)
|
|
*/
|
|
|
|
bool is_eliminated_table(table_map eliminated_tables, TABLE_LIST *tbl)
|
|
{
|
|
return eliminated_tables &&
|
|
((tbl->table && (tbl->table->map & eliminated_tables)) ||
|
|
(tbl->nested_join && !(tbl->nested_join->used_tables &
|
|
~eliminated_tables)));
|
|
}
|
|
|
|
/**
|
|
Print joins from the FROM clause.
|
|
|
|
@param thd thread handler
|
|
@param str string where table should be printed
|
|
@param tables list of tables in join
|
|
@query_type type of the query is being generated
|
|
*/
|
|
|
|
static void print_join(THD *thd,
|
|
table_map eliminated_tables,
|
|
String *str,
|
|
List<TABLE_LIST> *tables,
|
|
enum_query_type query_type)
|
|
{
|
|
/* List is reversed => we should reverse it before using */
|
|
List_iterator_fast<TABLE_LIST> ti(*tables);
|
|
TABLE_LIST **table;
|
|
DBUG_ENTER("print_join");
|
|
|
|
/*
|
|
If the QT_NO_DATA_EXPANSION flag is specified, we print the
|
|
original table list, including constant tables that have been
|
|
optimized away, as the constant tables may be referenced in the
|
|
expression printed by Item_field::print() when this flag is given.
|
|
Otherwise, only non-const tables are printed.
|
|
|
|
Example:
|
|
|
|
Original SQL:
|
|
select * from (select 1) t
|
|
|
|
Printed without QT_NO_DATA_EXPANSION:
|
|
select '1' AS `1` from dual
|
|
|
|
Printed with QT_NO_DATA_EXPANSION:
|
|
select `t`.`1` from (select 1 AS `1`) `t`
|
|
*/
|
|
const bool print_const_tables= (query_type & QT_NO_DATA_EXPANSION);
|
|
size_t tables_to_print= 0;
|
|
|
|
for (TABLE_LIST *t= ti++; t ; t= ti++)
|
|
{
|
|
/* See comment in print_table_array() about the second condition */
|
|
if (print_const_tables || !t->optimized_away)
|
|
if (!is_eliminated_table(eliminated_tables, t))
|
|
tables_to_print++;
|
|
}
|
|
if (tables_to_print == 0)
|
|
{
|
|
str->append(STRING_WITH_LEN("dual"));
|
|
DBUG_VOID_RETURN; // all tables were optimized away
|
|
}
|
|
ti.rewind();
|
|
|
|
if (!(table= thd->alloc<TABLE_LIST*>(tables_to_print)))
|
|
DBUG_VOID_RETURN; // out of memory
|
|
|
|
TABLE_LIST *tmp, **t= table + (tables_to_print - 1);
|
|
while ((tmp= ti++))
|
|
{
|
|
if (tmp->optimized_away && !print_const_tables)
|
|
continue;
|
|
if (is_eliminated_table(eliminated_tables, tmp))
|
|
continue;
|
|
*t--= tmp;
|
|
}
|
|
|
|
DBUG_ASSERT(tables->elements >= 1);
|
|
/*
|
|
Assert that the first table in the list isn't eliminated. This comes from
|
|
the fact that the first table can't be inner table of an outer join.
|
|
*/
|
|
DBUG_ASSERT(!eliminated_tables ||
|
|
!(((*table)->table && ((*table)->table->map & eliminated_tables)) ||
|
|
((*table)->nested_join && !((*table)->nested_join->used_tables &
|
|
~eliminated_tables))));
|
|
/*
|
|
If the first table is a semi-join nest, swap it with something that is
|
|
not a semi-join nest.
|
|
*/
|
|
if ((*table)->sj_inner_tables)
|
|
{
|
|
TABLE_LIST **end= table + tables_to_print;
|
|
for (TABLE_LIST **t2= table; t2!=end; t2++)
|
|
{
|
|
if (!(*t2)->sj_inner_tables)
|
|
{
|
|
tmp= *t2;
|
|
*t2= *table;
|
|
*table= tmp;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
print_table_array(thd, eliminated_tables, str, table,
|
|
table + tables_to_print, query_type);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
/**
|
|
@brief Print an index hint
|
|
|
|
@details Prints out the USE|FORCE|IGNORE index hint.
|
|
|
|
@param thd the current thread
|
|
@param[out] str appends the index hint here
|
|
@param hint what the hint is (as string : "USE INDEX"|
|
|
"FORCE INDEX"|"IGNORE INDEX")
|
|
@param hint_length the length of the string in 'hint'
|
|
@param indexes a list of index names for the hint
|
|
*/
|
|
|
|
void
|
|
Index_hint::print(THD *thd, String *str)
|
|
{
|
|
switch (type)
|
|
{
|
|
case INDEX_HINT_IGNORE: str->append(STRING_WITH_LEN("IGNORE INDEX")); break;
|
|
case INDEX_HINT_USE: str->append(STRING_WITH_LEN("USE INDEX")); break;
|
|
case INDEX_HINT_FORCE: str->append(STRING_WITH_LEN("FORCE INDEX")); break;
|
|
}
|
|
str->append(STRING_WITH_LEN(" ("));
|
|
if (key_name.length)
|
|
{
|
|
if (thd && key_name.streq(primary_key_name))
|
|
str->append(primary_key_name);
|
|
else
|
|
append_identifier(thd, str, &key_name);
|
|
}
|
|
str->append(')');
|
|
}
|
|
|
|
|
|
/**
|
|
Print table as it should be in join list.
|
|
|
|
@param str string where table should be printed
|
|
*/
|
|
|
|
void TABLE_LIST::print(THD *thd, table_map eliminated_tables, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
if (nested_join)
|
|
{
|
|
str->append('(');
|
|
print_join(thd, eliminated_tables, str, &nested_join->join_list, query_type);
|
|
str->append(')');
|
|
}
|
|
else if (jtbm_subselect)
|
|
{
|
|
if (jtbm_subselect->engine->engine_type() ==
|
|
subselect_engine::SINGLE_SELECT_ENGINE)
|
|
{
|
|
/*
|
|
We get here when conversion into materialization didn't finish (this
|
|
happens when
|
|
- The subquery is a degenerate case which produces 0 or 1 record
|
|
- subquery's optimization didn't finish because of @@max_join_size
|
|
limits
|
|
- ... maybe some other cases like this
|
|
*/
|
|
str->append(STRING_WITH_LEN(" <materialize> ("));
|
|
jtbm_subselect->engine->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
else
|
|
{
|
|
str->append(STRING_WITH_LEN(" <materialize> ("));
|
|
subselect_hash_sj_engine *hash_engine;
|
|
hash_engine= (subselect_hash_sj_engine*)jtbm_subselect->engine;
|
|
hash_engine->materialize_engine->print(str, query_type);
|
|
str->append(')');
|
|
}
|
|
}
|
|
else
|
|
{
|
|
Lex_ident_table cmp_name(empty_clex_str); // Name to compare with alias
|
|
if (view_name.str)
|
|
{
|
|
// A view
|
|
|
|
if (!(belong_to_view &&
|
|
belong_to_view->compact_view_format) &&
|
|
!(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
|
|
{
|
|
append_identifier(thd, str, &view_db);
|
|
str->append('.');
|
|
}
|
|
append_identifier(thd, str, &view_name);
|
|
cmp_name= view_name;
|
|
}
|
|
else if (derived)
|
|
{
|
|
if (!is_with_table())
|
|
{
|
|
// A derived table
|
|
str->append('(');
|
|
derived->print(str, query_type);
|
|
str->append(')');
|
|
cmp_name= Lex_ident_table(empty_clex_str); // Force printing of alias
|
|
}
|
|
else
|
|
{
|
|
append_identifier(thd, str, &table_name);
|
|
cmp_name= table_name;
|
|
}
|
|
}
|
|
else if (table_function)
|
|
{
|
|
/* A table function. */
|
|
(void) table_function->print(thd, this, str, query_type);
|
|
str->append(' ');
|
|
append_identifier(thd, str, &alias);
|
|
cmp_name= alias;
|
|
}
|
|
else
|
|
{
|
|
// A normal table
|
|
|
|
if (!(belong_to_view &&
|
|
belong_to_view->compact_view_format) &&
|
|
!(query_type & QT_ITEM_IDENT_SKIP_DB_NAMES))
|
|
{
|
|
append_identifier(thd, str, &db);
|
|
str->append('.');
|
|
}
|
|
if (schema_table)
|
|
{
|
|
append_identifier(thd, str, &schema_table_name);
|
|
cmp_name= Lex_ident_table(schema_table_name);
|
|
}
|
|
else
|
|
{
|
|
append_identifier(thd, str, &table_name);
|
|
cmp_name= table_name;
|
|
}
|
|
#ifdef WITH_PARTITION_STORAGE_ENGINE
|
|
if (partition_names && partition_names->elements)
|
|
{
|
|
int i, num_parts= partition_names->elements;
|
|
List_iterator<String> name_it(*(partition_names));
|
|
str->append(STRING_WITH_LEN(" PARTITION ("));
|
|
for (i= 1; i <= num_parts; i++)
|
|
{
|
|
String *name= name_it++;
|
|
append_identifier(thd, str, name->ptr(), name->length());
|
|
if (i != num_parts)
|
|
str->append(',');
|
|
}
|
|
str->append(')');
|
|
}
|
|
#endif /* WITH_PARTITION_STORAGE_ENGINE */
|
|
}
|
|
if (table && table->versioned())
|
|
vers_conditions.print(str, query_type);
|
|
|
|
if (!cmp_name.streq(alias))
|
|
{
|
|
str->append(' ');
|
|
append_identifier_opt_casedn(thd, str, alias,
|
|
lower_case_table_names == 1);
|
|
|
|
if (column_names && (column_names->elements > 0))
|
|
list_strlex_print(thd, str, column_names, true);
|
|
}
|
|
|
|
if (index_hints)
|
|
{
|
|
List_iterator<Index_hint> it(*index_hints);
|
|
Index_hint *hint;
|
|
|
|
while ((hint= it++))
|
|
{
|
|
str->append(' ');
|
|
hint->print(thd, str);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
enum explainable_cmd_type
|
|
{
|
|
SELECT_CMD, INSERT_CMD, REPLACE_CMD, UPDATE_CMD, DELETE_CMD, NO_CMD
|
|
};
|
|
|
|
static
|
|
const LEX_CSTRING explainable_cmd_name []=
|
|
{
|
|
{STRING_WITH_LEN("select ")},
|
|
{STRING_WITH_LEN("insert ")},
|
|
{STRING_WITH_LEN("replace ")},
|
|
{STRING_WITH_LEN("update ")},
|
|
{STRING_WITH_LEN("delete ")},
|
|
};
|
|
|
|
static
|
|
const LEX_CSTRING* get_explainable_cmd_name(enum explainable_cmd_type cmd)
|
|
{
|
|
return explainable_cmd_name + cmd;
|
|
}
|
|
|
|
static
|
|
enum explainable_cmd_type get_explainable_cmd_type(THD *thd)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
case SQLCOM_SELECT:
|
|
return SELECT_CMD;
|
|
case SQLCOM_INSERT:
|
|
case SQLCOM_INSERT_SELECT:
|
|
return INSERT_CMD;
|
|
case SQLCOM_REPLACE:
|
|
case SQLCOM_REPLACE_SELECT:
|
|
return REPLACE_CMD;
|
|
case SQLCOM_UPDATE:
|
|
case SQLCOM_UPDATE_MULTI:
|
|
return UPDATE_CMD;
|
|
case SQLCOM_DELETE:
|
|
case SQLCOM_DELETE_MULTI:
|
|
return DELETE_CMD;
|
|
default:
|
|
return SELECT_CMD;
|
|
}
|
|
}
|
|
|
|
|
|
void TABLE_LIST::print_leaf_tables(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
if (merge_underlying_list)
|
|
{
|
|
for (TABLE_LIST *tbl= merge_underlying_list; tbl; tbl= tbl->next_local)
|
|
tbl->print_leaf_tables(thd, str, query_type);
|
|
}
|
|
else
|
|
print(thd, 0, str, query_type);
|
|
}
|
|
|
|
|
|
void st_select_lex::print_item_list(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
/*
|
|
outer_select() can not be used here because it is for name resolution
|
|
and will return NULL at any end of name resolution chain (view/derived)
|
|
*/
|
|
bool top_level= is_query_topmost(thd);
|
|
List_iterator_fast<Item> it(item_list);
|
|
Item *item;
|
|
while ((item= it++))
|
|
{
|
|
if (first)
|
|
first= 0;
|
|
else
|
|
str->append(',');
|
|
|
|
if ((is_subquery_function() && !item->is_explicit_name()) ||
|
|
!item->name.str)
|
|
{
|
|
/*
|
|
Do not print auto-generated aliases in subqueries. It has no purpose
|
|
in a view definition or other contexts where the query is printed.
|
|
*/
|
|
item->print(str, query_type);
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
Do not print illegal names (if it is not top level SELECT).
|
|
Top level view checked (and correct name are assigned),
|
|
other cases of top level SELECT are not important, because
|
|
it is not "table field".
|
|
*/
|
|
if (top_level ||
|
|
item->is_explicit_name() ||
|
|
!check_column_name(item->name.str))
|
|
item->print_item_w_name(str, query_type);
|
|
else
|
|
item->print(str, query_type);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_set_clause(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
/*
|
|
outer_select() can not be used here because it is for name resolution
|
|
and will return NULL at any end of name resolution chain (view/derived)
|
|
*/
|
|
List_iterator_fast<Item> it(item_list);
|
|
List_iterator_fast<Item> vt(thd->lex->value_list);
|
|
Item *item;
|
|
Item *val;
|
|
while ((item= it++, val= vt++ ))
|
|
{
|
|
if (first)
|
|
{
|
|
str->append(STRING_WITH_LEN(" set "));
|
|
first= 0;
|
|
}
|
|
else
|
|
str->append(',');
|
|
|
|
item->print(str, (enum_query_type) (query_type | QT_NO_DATA_EXPANSION));
|
|
str->append(STRING_WITH_LEN(" = "));
|
|
val->print(str, query_type);
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_on_duplicate_key_clause(THD *thd, String *str,
|
|
enum_query_type query_type)
|
|
{
|
|
bool first= 1;
|
|
List_iterator_fast<Item> it(thd->lex->update_list);
|
|
List_iterator_fast<Item> vt(thd->lex->value_list);
|
|
Item *item;
|
|
Item *val;
|
|
while ((item= it++, val= vt++ ))
|
|
{
|
|
if (first)
|
|
{
|
|
str->append(STRING_WITH_LEN(" on duplicate key update "));
|
|
first= 0;
|
|
}
|
|
else
|
|
str->append(',');
|
|
|
|
item->print(str, query_type);
|
|
str->append(STRING_WITH_LEN(" = "));
|
|
val->print(str, query_type);
|
|
}
|
|
}
|
|
|
|
|
|
void st_select_lex::print_lock_type(String *str)
|
|
{
|
|
if (select_lock == select_lock_type::IN_SHARE_MODE)
|
|
str->append(STRING_WITH_LEN(" lock in share mode"));
|
|
else if (select_lock == select_lock_type::FOR_UPDATE)
|
|
str->append(STRING_WITH_LEN(" for update"));
|
|
if (unlikely(skip_locked))
|
|
str->append(STRING_WITH_LEN(" skip locked"));
|
|
}
|
|
|
|
|
|
void st_select_lex::print(THD *thd, String *str, enum_query_type query_type)
|
|
{
|
|
DBUG_ASSERT(thd);
|
|
|
|
if (tvc)
|
|
{
|
|
tvc->print(thd, str, query_type);
|
|
return;
|
|
}
|
|
|
|
if (is_tvc_wrapper && (query_type & QT_NO_WRAPPERS_FOR_TVC_IN_VIEW))
|
|
{
|
|
first_inner_unit()->first_select()->print(thd, str, query_type);
|
|
return;
|
|
}
|
|
|
|
bool top_level= is_query_topmost(thd);
|
|
enum explainable_cmd_type sel_type= SELECT_CMD;
|
|
if (top_level && !(query_type & QT_SELECT_ONLY))
|
|
sel_type= get_explainable_cmd_type(thd);
|
|
|
|
if (sel_type == INSERT_CMD || sel_type == REPLACE_CMD)
|
|
{
|
|
str->append(get_explainable_cmd_name(sel_type));
|
|
str->append(STRING_WITH_LEN("into "));
|
|
TABLE_LIST *tbl= thd->lex->query_tables;
|
|
while (tbl->merge_underlying_list)
|
|
tbl= tbl->merge_underlying_list;
|
|
tbl->print(thd, 0, str, query_type);
|
|
if (thd->lex->field_list.elements)
|
|
{
|
|
str->append ('(');
|
|
List_iterator_fast<Item> it(thd->lex->field_list);
|
|
Item *item;
|
|
bool first= true;
|
|
while ((item= it++))
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
str->append(item->name);
|
|
}
|
|
str->append(')');
|
|
}
|
|
|
|
str->append(' ');
|
|
|
|
if (thd->lex->sql_command == SQLCOM_INSERT ||
|
|
thd->lex->sql_command == SQLCOM_REPLACE)
|
|
{
|
|
str->append(STRING_WITH_LEN("values "));
|
|
bool is_first_elem= true;
|
|
List_iterator_fast<List_item> li(thd->lex->many_values);
|
|
List_item *list;
|
|
|
|
while ((list= li++))
|
|
{
|
|
if (is_first_elem)
|
|
is_first_elem= false;
|
|
else
|
|
str->append(',');
|
|
|
|
print_list_item(str, list, query_type);
|
|
}
|
|
if (thd->lex->update_list.elements)
|
|
print_on_duplicate_key_clause(thd, str, query_type);
|
|
return;
|
|
}
|
|
}
|
|
|
|
if ((query_type & QT_SHOW_SELECT_NUMBER) &&
|
|
thd->lex->all_selects_list &&
|
|
thd->lex->all_selects_list->link_next &&
|
|
select_number != FAKE_SELECT_LEX_ID)
|
|
{
|
|
str->append(STRING_WITH_LEN("/* select#"));
|
|
str->append_ulonglong(select_number);
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED2)
|
|
{
|
|
str->append('/');
|
|
str->append_ulonglong(nest_level);
|
|
|
|
if (master_unit()->fake_select_lex &&
|
|
master_unit()->first_select() == this)
|
|
{
|
|
str->append(STRING_WITH_LEN(" Filter Select: "));
|
|
master_unit()->fake_select_lex->print(thd, str, query_type);
|
|
}
|
|
}
|
|
str->append(STRING_WITH_LEN(" */ "));
|
|
}
|
|
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
str->append(STRING_WITH_LEN("select "));
|
|
|
|
if (join && join->cleaned)
|
|
{
|
|
/*
|
|
JOIN already cleaned up so it is dangerous to print items
|
|
because temporary tables they pointed on could be freed.
|
|
*/
|
|
str->append('#');
|
|
str->append(select_number);
|
|
return;
|
|
}
|
|
|
|
/* First add options */
|
|
if (options & SELECT_STRAIGHT_JOIN)
|
|
str->append(STRING_WITH_LEN("straight_join "));
|
|
if (options & SELECT_HIGH_PRIORITY)
|
|
str->append(STRING_WITH_LEN("high_priority "));
|
|
if (options & SELECT_DISTINCT)
|
|
str->append(STRING_WITH_LEN("distinct "));
|
|
if (options & SELECT_SMALL_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_small_result "));
|
|
if (options & SELECT_BIG_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_big_result "));
|
|
if (options & OPTION_BUFFER_RESULT)
|
|
str->append(STRING_WITH_LEN("sql_buffer_result "));
|
|
if (options & OPTION_FOUND_ROWS)
|
|
str->append(STRING_WITH_LEN("sql_calc_found_rows "));
|
|
if (this == parent_lex->first_select_lex())
|
|
{
|
|
switch (parent_lex->sql_cache)
|
|
{
|
|
case LEX::SQL_NO_CACHE:
|
|
str->append(STRING_WITH_LEN("sql_no_cache "));
|
|
break;
|
|
case LEX::SQL_CACHE:
|
|
str->append(STRING_WITH_LEN("sql_cache "));
|
|
break;
|
|
case LEX::SQL_CACHE_UNSPECIFIED:
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
//Item List
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
print_item_list(thd, str, query_type);
|
|
/*
|
|
from clause
|
|
TODO: support USING/FORCE/IGNORE index
|
|
*/
|
|
if (table_list.elements)
|
|
{
|
|
if (sel_type == SELECT_CMD ||
|
|
sel_type == INSERT_CMD ||
|
|
sel_type == REPLACE_CMD)
|
|
{
|
|
str->append(STRING_WITH_LEN(" from "));
|
|
/* go through join tree */
|
|
print_join(thd, join? join->eliminated_tables: 0, str, &top_join_list,
|
|
query_type);
|
|
}
|
|
if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
str->append(get_explainable_cmd_name(sel_type));
|
|
if (sel_type == DELETE_CMD)
|
|
{
|
|
str->append(STRING_WITH_LEN(" from "));
|
|
bool first= true;
|
|
for (TABLE_LIST *target_tbl= thd->lex->auxiliary_table_list.first;
|
|
target_tbl;
|
|
target_tbl= target_tbl->next_local)
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
target_tbl->correspondent_table->print_leaf_tables(thd, str,
|
|
query_type);
|
|
}
|
|
|
|
if (!first)
|
|
str->append(STRING_WITH_LEN(" using "));
|
|
}
|
|
if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
{
|
|
if (join)
|
|
print_join(thd, 0, str, &top_join_list, query_type);
|
|
else
|
|
{
|
|
bool first= true;
|
|
List_iterator_fast<TABLE_LIST> li(leaf_tables);
|
|
TABLE_LIST *tbl;
|
|
while ((tbl= li++))
|
|
{
|
|
if (first)
|
|
first= false;
|
|
else
|
|
str->append(',');
|
|
tbl->print(thd, 0, str, query_type);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if (where)
|
|
{
|
|
/*
|
|
"SELECT 1 FROM DUAL WHERE 2" should not be printed as
|
|
"SELECT 1 WHERE 2": the 1st syntax is valid, but the 2nd is not.
|
|
*/
|
|
str->append(STRING_WITH_LEN(" from DUAL "));
|
|
}
|
|
|
|
if (sel_type == UPDATE_CMD)
|
|
print_set_clause(thd, str, query_type);
|
|
|
|
// Where
|
|
Item *cur_where= where;
|
|
if (join)
|
|
cur_where= join->conds;
|
|
else if (sel_type == UPDATE_CMD || sel_type == DELETE_CMD)
|
|
cur_where= thd->lex->upd_del_where;
|
|
if (cur_where || cond_value != Item::COND_UNDEF)
|
|
{
|
|
str->append(STRING_WITH_LEN(" where "));
|
|
if (cur_where)
|
|
cur_where->print(str, query_type);
|
|
else
|
|
str->append(cond_value != Item::COND_FALSE ? '1' : '0');
|
|
}
|
|
|
|
// group by & olap
|
|
if (group_list.elements)
|
|
{
|
|
str->append(STRING_WITH_LEN(" group by "));
|
|
print_order(str, group_list.first, query_type);
|
|
switch (olap)
|
|
{
|
|
case CUBE_TYPE:
|
|
str->append(STRING_WITH_LEN(" with cube"));
|
|
break;
|
|
case ROLLUP_TYPE:
|
|
str->append(STRING_WITH_LEN(" with rollup"));
|
|
break;
|
|
default:
|
|
; //satisfy compiler
|
|
}
|
|
}
|
|
|
|
// having
|
|
Item *cur_having= having;
|
|
if (join)
|
|
cur_having= join->having;
|
|
|
|
if (cur_having || having_value != Item::COND_UNDEF)
|
|
{
|
|
str->append(STRING_WITH_LEN(" having "));
|
|
if (cur_having)
|
|
cur_having->print(str, query_type);
|
|
else
|
|
str->append(having_value != Item::COND_FALSE ? '1' : '0');
|
|
}
|
|
|
|
if (order_list.elements)
|
|
{
|
|
str->append(STRING_WITH_LEN(" order by "));
|
|
print_order(str, order_list.first, query_type);
|
|
}
|
|
|
|
// limit
|
|
print_limit(thd, str, query_type);
|
|
|
|
// lock type
|
|
if (braces) /* no braces processed in
|
|
SELECT_LEX_UNIT::print_lock_from_the_last_select */
|
|
print_lock_type(str);
|
|
|
|
if ((sel_type == INSERT_CMD || sel_type == REPLACE_CMD) &&
|
|
thd->lex->update_list.elements)
|
|
print_on_duplicate_key_clause(thd, str, query_type);
|
|
|
|
// returning clause
|
|
if (sel_type == DELETE_CMD && !item_list.elements)
|
|
{
|
|
print_item_list(thd, str, query_type);
|
|
}
|
|
// PROCEDURE unsupported here
|
|
}
|
|
|
|
|
|
/**
|
|
Change the select_result object of the JOIN.
|
|
|
|
If old_result is not used, forward the call to the current
|
|
select_result in case it is a wrapper around old_result.
|
|
|
|
Call prepare() and prepare2() on the new select_result if we decide
|
|
to use it.
|
|
|
|
@param new_result New select_result object
|
|
@param old_result Old select_result object (NULL to force change)
|
|
|
|
@retval false Success
|
|
@retval true Error
|
|
*/
|
|
|
|
bool JOIN::change_result(select_result *new_result, select_result *old_result)
|
|
{
|
|
DBUG_ENTER("JOIN::change_result");
|
|
if (old_result == NULL || result == old_result)
|
|
{
|
|
result= new_result;
|
|
if (result->prepare(fields_list, select_lex->master_unit()) ||
|
|
result->prepare2(this))
|
|
DBUG_RETURN(true); /* purecov: inspected */
|
|
DBUG_RETURN(false);
|
|
}
|
|
DBUG_RETURN(result->change_result(new_result));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Set allowed types of join caches that can be used for join operations
|
|
|
|
@details
|
|
The function sets a bitmap of allowed join buffers types in the field
|
|
allowed_join_cache_types of this JOIN structure:
|
|
bit 1 is set if tjoin buffers are allowed to be incremental
|
|
bit 2 is set if the join buffers are allowed to be hashed
|
|
but 3 is set if the join buffers are allowed to be used for BKA
|
|
join algorithms.
|
|
The allowed types are read from system variables.
|
|
Besides the function sets maximum allowed join cache level that is
|
|
also read from a system variable.
|
|
*/
|
|
|
|
void JOIN::set_allowed_join_cache_types()
|
|
{
|
|
allowed_join_cache_types= 0;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_INCREMENTAL))
|
|
allowed_join_cache_types|= JOIN_CACHE_INCREMENTAL_BIT;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_HASHED))
|
|
allowed_join_cache_types|= JOIN_CACHE_HASHED_BIT;
|
|
if (optimizer_flag(thd, OPTIMIZER_SWITCH_JOIN_CACHE_BKA))
|
|
allowed_join_cache_types|= JOIN_CACHE_BKA_BIT;
|
|
allowed_semijoin_with_cache=
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_SEMIJOIN_WITH_CACHE);
|
|
allowed_outer_join_with_cache=
|
|
optimizer_flag(thd, OPTIMIZER_SWITCH_OUTER_JOIN_WITH_CACHE);
|
|
max_allowed_join_cache_level= thd->variables.join_cache_level;
|
|
}
|
|
|
|
|
|
/**
|
|
Save a query execution plan so that the caller can revert to it if needed,
|
|
and reset the current query plan so that it can be reoptimized.
|
|
|
|
@param save_to The object into which the current query plan state is saved
|
|
*/
|
|
|
|
void JOIN::save_query_plan(Join_plan_state *save_to)
|
|
{
|
|
DYNAMIC_ARRAY tmp_keyuse;
|
|
/* Swap the current and the backup keyuse internal arrays. */
|
|
tmp_keyuse= keyuse;
|
|
keyuse= save_to->keyuse; /* keyuse is reset to an empty array. */
|
|
save_to->keyuse= tmp_keyuse;
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
save_to->join_tab_keyuse[i]= join_tab[i].keyuse;
|
|
join_tab[i].keyuse= NULL;
|
|
save_to->join_tab_checked_keys[i]= join_tab[i].checked_keys;
|
|
join_tab[i].checked_keys.clear_all();
|
|
}
|
|
memcpy((uchar*) save_to->best_positions, (uchar*) best_positions,
|
|
sizeof(POSITION) * (table_count + 1));
|
|
memset((uchar*) best_positions, 0, sizeof(POSITION) * (table_count + 1));
|
|
|
|
/* Save SJM nests */
|
|
List_iterator<TABLE_LIST> it(select_lex->sj_nests);
|
|
TABLE_LIST *tlist;
|
|
SJ_MATERIALIZATION_INFO **p_info= save_to->sj_mat_info;
|
|
while ((tlist= it++))
|
|
{
|
|
*(p_info++)= tlist->sj_mat_info;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reset a query execution plan so that it can be reoptimized in-place.
|
|
*/
|
|
void JOIN::reset_query_plan()
|
|
{
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
join_tab[i].keyuse= NULL;
|
|
join_tab[i].checked_keys.clear_all();
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Restore a query execution plan previously saved by the caller.
|
|
|
|
@param The object from which the current query plan state is restored.
|
|
*/
|
|
|
|
void JOIN::restore_query_plan(Join_plan_state *restore_from)
|
|
{
|
|
DYNAMIC_ARRAY tmp_keyuse;
|
|
tmp_keyuse= keyuse;
|
|
keyuse= restore_from->keyuse;
|
|
restore_from->keyuse= tmp_keyuse;
|
|
|
|
for (uint i= 0; i < table_count; i++)
|
|
{
|
|
join_tab[i].keyuse= restore_from->join_tab_keyuse[i];
|
|
join_tab[i].checked_keys= restore_from->join_tab_checked_keys[i];
|
|
}
|
|
|
|
memcpy((uchar*) best_positions, (uchar*) restore_from->best_positions,
|
|
sizeof(POSITION) * (table_count + 1));
|
|
/* Restore SJM nests */
|
|
List_iterator<TABLE_LIST> it(select_lex->sj_nests);
|
|
TABLE_LIST *tlist;
|
|
SJ_MATERIALIZATION_INFO **p_info= restore_from->sj_mat_info;
|
|
while ((tlist= it++))
|
|
{
|
|
tlist->sj_mat_info= *(p_info++);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
Reoptimize a query plan taking into account an additional conjunct to the
|
|
WHERE clause.
|
|
|
|
@param added_where An extra conjunct to the WHERE clause to reoptimize with
|
|
@param join_tables The set of tables to reoptimize
|
|
@param save_to If != NULL, save here the state of the current query
|
|
plan, otherwise reuse the existing query plan structures.
|
|
|
|
@notes
|
|
Given a query plan that was already optimized taking into account some WHERE
|
|
clause 'C', reoptimize this plan with a new WHERE clause 'C AND added_where'.
|
|
The reoptimization works as follows:
|
|
|
|
1. Call update_ref_and_keys *only* for the new conditions 'added_where'
|
|
that are about to be injected into the query.
|
|
2. Expand if necessary the original KEYUSE array JOIN::keyuse to
|
|
accommodate the new REF accesses computed for the 'added_where' condition.
|
|
3. Add the new KEYUSEs into JOIN::keyuse.
|
|
4. Re-sort and re-filter the JOIN::keyuse array with the newly added
|
|
KEYUSE elements.
|
|
|
|
@retval REOPT_NEW_PLAN there is a new plan.
|
|
@retval REOPT_OLD_PLAN no new improved plan was produced, use the old one.
|
|
@retval REOPT_ERROR an irrecovarable error occurred during
|
|
reoptimization.
|
|
*/
|
|
|
|
JOIN::enum_reopt_result
|
|
JOIN::reoptimize(Item *added_where, table_map join_tables,
|
|
Join_plan_state *save_to)
|
|
{
|
|
DYNAMIC_ARRAY added_keyuse;
|
|
SARGABLE_PARAM *sargables= 0; /* Used only as a dummy parameter. */
|
|
size_t org_keyuse_elements;
|
|
|
|
/* Re-run the REF optimizer to take into account the new conditions. */
|
|
if (update_ref_and_keys(thd, &added_keyuse, join_tab, table_count,
|
|
added_where, ~outer_join, select_lex, &sargables))
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_ERROR;
|
|
}
|
|
|
|
if (!added_keyuse.elements)
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_OLD_PLAN;
|
|
}
|
|
|
|
if (save_to)
|
|
save_query_plan(save_to);
|
|
else
|
|
reset_query_plan();
|
|
|
|
if (!keyuse.buffer &&
|
|
my_init_dynamic_array(thd->mem_root->psi_key, &keyuse, sizeof(KEYUSE),
|
|
20, 64, MYF(MY_THREAD_SPECIFIC)))
|
|
{
|
|
delete_dynamic(&added_keyuse);
|
|
return REOPT_ERROR;
|
|
}
|
|
|
|
org_keyuse_elements= save_to ? save_to->keyuse.elements : keyuse.elements;
|
|
allocate_dynamic(&keyuse, org_keyuse_elements + added_keyuse.elements);
|
|
|
|
/* If needed, add the access methods from the original query plan. */
|
|
if (save_to)
|
|
{
|
|
DBUG_ASSERT(!keyuse.elements);
|
|
keyuse.elements= save_to->keyuse.elements;
|
|
if (size_t e= keyuse.elements)
|
|
memcpy(keyuse.buffer,
|
|
save_to->keyuse.buffer, e * keyuse.size_of_element);
|
|
}
|
|
|
|
/* Add the new access methods to the keyuse array. */
|
|
memcpy(keyuse.buffer + keyuse.elements * keyuse.size_of_element,
|
|
added_keyuse.buffer,
|
|
(size_t) added_keyuse.elements * added_keyuse.size_of_element);
|
|
keyuse.elements+= added_keyuse.elements;
|
|
/* added_keyuse contents is copied, and it is no longer needed. */
|
|
delete_dynamic(&added_keyuse);
|
|
|
|
if (sort_and_filter_keyuse(this, &keyuse, true))
|
|
return REOPT_ERROR;
|
|
optimize_keyuse(this, &keyuse);
|
|
|
|
if (optimize_semijoin_nests(this, join_tables))
|
|
return REOPT_ERROR;
|
|
|
|
/* Re-run the join optimizer to compute a new query plan. */
|
|
if (choose_plan(this, join_tables, 0))
|
|
return REOPT_ERROR;
|
|
|
|
return REOPT_NEW_PLAN;
|
|
}
|
|
|
|
|
|
/**
|
|
Cache constant expressions in WHERE, HAVING, ON conditions.
|
|
*/
|
|
|
|
void JOIN::cache_const_exprs()
|
|
{
|
|
uchar cache_flag= FALSE;
|
|
uchar *analyzer_arg= &cache_flag;
|
|
|
|
/* No need in cache if all tables are constant. */
|
|
if (const_tables == table_count)
|
|
return;
|
|
|
|
if (conds)
|
|
conds->top_level_compile(thd, &Item::cache_const_expr_analyzer, &analyzer_arg,
|
|
&Item::cache_const_expr_transformer, &cache_flag);
|
|
cache_flag= FALSE;
|
|
if (having)
|
|
having->top_level_compile(thd, &Item::cache_const_expr_analyzer,
|
|
&analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag);
|
|
|
|
for (JOIN_TAB *tab= first_depth_first_tab(this); tab;
|
|
tab= next_depth_first_tab(this, tab))
|
|
{
|
|
if (*tab->on_expr_ref)
|
|
{
|
|
cache_flag= FALSE;
|
|
(*tab->on_expr_ref)->top_level_compile(thd, &Item::cache_const_expr_analyzer,
|
|
&analyzer_arg, &Item::cache_const_expr_transformer, &cache_flag);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
Get the cost of using index keynr to read #LIMIT matching rows by calling
|
|
ha_index_next() repeatedly (either with index scan, quick or 'ref')
|
|
|
|
@detail
|
|
- If there is a quick select, we try to use it.
|
|
- If there is no quick select return the full cost from
|
|
cost_for_index_read() (Doing a full scan with up to 'limit' records)
|
|
|
|
@param pos Result from best_access_path(). Is NULL for
|
|
single-table UPDATE/DELETE
|
|
@param table Table to be sorted
|
|
@param keynr Which index to use
|
|
@param rows_limit How many rows we want to read.
|
|
This may be different than what was in the original
|
|
LIMIT the caller has included fanouts and extra
|
|
rows needed for handling GROUP BY.
|
|
@param rows_to_scan Number of rows to scan if there is no range.
|
|
@param read_cost Full cost, including cost of WHERE.
|
|
@param read_rows Number of rows that needs to be read
|
|
|
|
@return
|
|
0 No possible range scan, cost is for index scan
|
|
1 Range scan should be used
|
|
|
|
For the moment we don't take selectivity of the WHERE clause into
|
|
account when calculating the number of rows we have to read
|
|
(except what we get from quick select).
|
|
|
|
The cost is calculated the following way:
|
|
(The selectivity is there to take into account the increased number of
|
|
rows that we have to read to find LIMIT matching rows)
|
|
*/
|
|
|
|
static bool get_range_limit_read_cost(const POSITION *pos,
|
|
const TABLE *table,
|
|
uint keynr,
|
|
ha_rows rows_limit_arg,
|
|
ha_rows rows_to_scan,
|
|
double *read_cost,
|
|
double *read_rows)
|
|
{
|
|
double rows_limit= rows2double(rows_limit_arg);
|
|
if (table->opt_range_keys.is_set(keynr))
|
|
{
|
|
/*
|
|
Start from quick select's rows and cost. These are always cheaper than
|
|
full index scan/cost.
|
|
*/
|
|
double best_rows, range_rows;
|
|
double range_cost= (double) table->opt_range[keynr].cost.fetch_cost();
|
|
best_rows= range_rows= (double) table->opt_range[keynr].rows;
|
|
|
|
if (pos)
|
|
{
|
|
double cond_selectivity;
|
|
/*
|
|
Take into count table selectivity as the number of accepted
|
|
rows for this table will be 'records_out'.
|
|
|
|
For example:
|
|
key1 BETWEEN 10 AND 1000 AND key2 BETWEEN 10 AND 20
|
|
|
|
If we are trying to do an ORDER BY on key1, we have to take into
|
|
account that using key2 we have to examine much fewer rows.
|
|
*/
|
|
best_rows= pos->records_out; // Best rows with any key/keys
|
|
/*
|
|
We assign "double range_rows" from integer #rows a few lines above
|
|
so comparison with 0.0 makes sense
|
|
*/
|
|
if (range_rows > 0.0)
|
|
cond_selectivity= best_rows / range_rows;
|
|
else
|
|
cond_selectivity= 1.0;
|
|
|
|
DBUG_ASSERT(cond_selectivity <= 1.000000001);
|
|
set_if_smaller(cond_selectivity, 1.0);
|
|
|
|
/*
|
|
We have to examine more rows in the proportion to the selectivity of the
|
|
the table
|
|
*/
|
|
rows_limit= rows_limit / cond_selectivity;
|
|
}
|
|
|
|
if (best_rows > rows_limit)
|
|
{
|
|
/*
|
|
LIMIT clause specifies that we will need to read fewer records than
|
|
quick select will return. Assume that quick select's cost is
|
|
proportional to the number of records we need to return (e.g. if we
|
|
only need 1/3rd of records, it will cost us 1/3rd of quick select's
|
|
read time)
|
|
*/
|
|
range_cost*= rows_limit / best_rows;
|
|
range_rows= rows_limit;
|
|
}
|
|
*read_cost= range_cost + range_rows * WHERE_COST_THD(table->in_use);
|
|
*read_rows= range_rows;
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
Calculate the number of rows we have to check if we are
|
|
doing a full index scan (as a suitable range scan was not available).
|
|
|
|
We assume that each of the tested indexes is not correlated
|
|
with ref_key. Thus, to select first N records we have to scan
|
|
N/selectivity(ref_key) index entries.
|
|
selectivity(ref_key) = #scanned_records/#table_records =
|
|
refkey_rows_estimate/table_records.
|
|
In any case we can't select more than #table_records.
|
|
N/(refkey_rows_estimate/table_records) > table_records
|
|
<=> N > refkey_rows_estimate.
|
|
*/
|
|
ALL_READ_COST cost= cost_for_index_read(table->in_use, table, keynr,
|
|
rows_to_scan, 0);
|
|
*read_cost= (table->file->cost(&cost) +
|
|
rows_to_scan * WHERE_COST_THD(table->in_use));
|
|
*read_rows= rows2double(rows_to_scan);
|
|
return 0;
|
|
}
|
|
|
|
|
|
/**
|
|
Find a cheaper access key than a given key
|
|
|
|
@param tab NULL or JOIN_TAB of the accessed table
|
|
@param order Linked list of ORDER BY arguments
|
|
@param table Table if tab == NULL or tab->table
|
|
@param usable_keys Key map to find a cheaper key in
|
|
@param ref_key
|
|
0 <= key < MAX_KEY - Key that is currently used for finding
|
|
row
|
|
MAX_KEY - means index_merge is used
|
|
-1 - means we're currently not using an
|
|
index to find rows.
|
|
|
|
@param select_limit LIMIT value
|
|
@param [out] new_key Key number if success, otherwise undefined
|
|
@param [out] new_key_direction Return -1 (reverse) or +1 if success,
|
|
otherwise undefined
|
|
@param [out] new_select_limit Estimate of the number of rows we have
|
|
to read find 'select_limit' rows.
|
|
@param [out] new_used_key_parts NULL by default, otherwise return number
|
|
of new_key prefix columns if success
|
|
or undefined if the function fails
|
|
@param [out] saved_best_key_parts NULL by default, otherwise preserve the
|
|
value for further use in QUICK_SELECT_DESC
|
|
|
|
@note
|
|
This function takes into account table->opt_range_condition_rows statistic
|
|
(that is calculated by the make_join_statistics function).
|
|
However, single table procedures such as Sql_cmd_update:update_single_table()
|
|
and Sql_cmd_delete::delete_single_table()
|
|
never call make_join_statistics, so they have to update it manually
|
|
(@see get_index_for_order()).
|
|
*/
|
|
|
|
static bool
|
|
test_if_cheaper_ordering(bool in_join_optimizer,
|
|
const JOIN_TAB *tab, ORDER *order, TABLE *table,
|
|
key_map usable_keys, int ref_key,
|
|
ha_rows select_limit_arg,
|
|
int *new_key, int *new_key_direction,
|
|
ha_rows *new_select_limit, double *new_read_time,
|
|
uint *new_used_key_parts,
|
|
uint *saved_best_key_parts)
|
|
{
|
|
DBUG_ENTER("test_if_cheaper_ordering");
|
|
/*
|
|
Check whether there is an index compatible with the given order
|
|
usage of which is cheaper than usage of the ref_key index (ref_key>=0)
|
|
or a table scan.
|
|
It may be the case if ORDER/GROUP BY is used with LIMIT.
|
|
*/
|
|
ha_rows best_select_limit= HA_POS_ERROR;
|
|
JOIN *join;
|
|
uint nr;
|
|
key_map keys;
|
|
int best_key_direction= 0;
|
|
double read_time, filesort_cost;
|
|
enum sort_type filesort_type;
|
|
int best_key= -1;
|
|
double fanout;
|
|
ha_rows table_records= table->stat_records();
|
|
bool group;
|
|
const bool has_limit= (select_limit_arg != HA_POS_ERROR);
|
|
THD *thd= table->in_use;
|
|
POSITION *position;
|
|
ha_rows rows_estimate, refkey_rows_estimate;
|
|
Json_writer_object trace_wrapper(thd);
|
|
Json_writer_object trace_cheaper_ordering(
|
|
thd, "reconsidering_access_paths_for_index_ordering");
|
|
|
|
if (tab)
|
|
{
|
|
join= tab->join;
|
|
position= &join->best_positions[tab- join->join_tab];
|
|
group=join->group && order == join->group_list;
|
|
/* Take into account that records_out can be < 1.0 in case of GROUP BY */
|
|
rows_estimate= double_to_rows(position->records_out+0.5);
|
|
set_if_bigger(rows_estimate, 1);
|
|
refkey_rows_estimate= rows_estimate;
|
|
}
|
|
else
|
|
{
|
|
join= NULL;
|
|
position= 0;
|
|
refkey_rows_estimate= rows_estimate= table_records;
|
|
group= 0;
|
|
}
|
|
trace_cheaper_ordering.add("clause", group ? "GROUP BY" : "ORDER BY");
|
|
|
|
/*
|
|
If not used with LIMIT, only use keys if the whole query can be
|
|
resolved with a key; This is because filesort() is usually faster than
|
|
retrieving all rows through an index.
|
|
*/
|
|
if (select_limit_arg >= table_records)
|
|
{
|
|
keys= *table->file->keys_to_use_for_scanning();
|
|
keys.merge(table->covering_keys);
|
|
|
|
/*
|
|
We are adding here also the index specified in FORCE INDEX clause,
|
|
if any.
|
|
This is to allow users to use index in ORDER BY.
|
|
*/
|
|
if (table->force_index)
|
|
keys.merge(group ? table->keys_in_use_for_group_by :
|
|
table->keys_in_use_for_order_by);
|
|
keys.intersect(usable_keys);
|
|
}
|
|
else
|
|
keys= usable_keys;
|
|
|
|
|
|
if (join) // True if SELECT
|
|
{
|
|
uint nr= join->const_tables;
|
|
fanout= 1.0;
|
|
if (nr != join->table_count - 1) // If not last table
|
|
fanout= (join->join_record_count / position->records_out);
|
|
else
|
|
{
|
|
/* Only one table. Limit cannot be bigger than table_records */
|
|
set_if_smaller(select_limit_arg, table_records);
|
|
}
|
|
read_time= position->read_time;
|
|
}
|
|
else
|
|
{
|
|
/* Probably an update or delete. Assume we will do a full table scan */
|
|
fanout= 1.0;
|
|
read_time= table->file->cost(table->file->ha_scan_and_compare_time(rows_estimate));
|
|
set_if_smaller(select_limit_arg, table_records);
|
|
}
|
|
|
|
filesort_cost= cost_of_filesort(table, order, rows_estimate,
|
|
select_limit_arg, &filesort_type);
|
|
read_time+= filesort_cost;
|
|
|
|
/*
|
|
Calculate the selectivity of the ref_key for REF_ACCESS. For
|
|
RANGE_ACCESS we use table->opt_range_condition_rows.
|
|
*/
|
|
if (in_join_optimizer)
|
|
{
|
|
if (ref_key >= 0 && ref_key != MAX_KEY &&
|
|
join->best_positions[join->const_tables].type == JT_REF)
|
|
{
|
|
refkey_rows_estimate=
|
|
(ha_rows)join->best_positions[join->const_tables].records_read;
|
|
set_if_bigger(refkey_rows_estimate, 1);
|
|
}
|
|
}
|
|
else if (ref_key >= 0 && ref_key != MAX_KEY && tab->type == JT_REF)
|
|
{
|
|
/*
|
|
If ref access uses keypart=const for all its key parts,
|
|
and quick select uses the same # of key parts, then they are equivalent.
|
|
Reuse #rows estimate from quick select as it is more precise.
|
|
|
|
Note: we could just have used
|
|
join->best_positions[join->const_tables].records_read
|
|
here. That number was computed in best_access_path() and it already
|
|
includes adjustments based on table->opt_range[ref_key].rows.
|
|
*/
|
|
if (tab->ref.const_ref_part_map ==
|
|
make_prev_keypart_map(tab->ref.key_parts) &&
|
|
table->opt_range_keys.is_set(ref_key) &&
|
|
table->opt_range[ref_key].key_parts == tab->ref.key_parts)
|
|
refkey_rows_estimate= table->opt_range[ref_key].rows;
|
|
else
|
|
{
|
|
const KEY *ref_keyinfo= table->key_info + ref_key;
|
|
refkey_rows_estimate=
|
|
(ha_rows)ref_keyinfo->actual_rec_per_key(tab->ref.key_parts - 1);
|
|
}
|
|
set_if_bigger(refkey_rows_estimate, 1);
|
|
}
|
|
|
|
if (unlikely(thd->trace_started()))
|
|
{
|
|
if (tab)
|
|
trace_cheaper_ordering.add_table_name(tab);
|
|
else
|
|
trace_cheaper_ordering.add_table_name(table);
|
|
trace_cheaper_ordering.
|
|
add("rows_estimation", rows_estimate).
|
|
add("filesort_cost", filesort_cost).
|
|
add("read_cost", read_time).
|
|
add("filesort_type", filesort_names[filesort_type].str).
|
|
add("fanout", fanout);
|
|
}
|
|
|
|
/*
|
|
Force using an index for sorting if there was no ref key
|
|
and FORCE INDEX was used.
|
|
*/
|
|
if (table->force_index && ref_key < 0)
|
|
read_time= DBL_MAX;
|
|
|
|
Json_writer_array possible_keys(thd,"possible_keys");
|
|
for (nr=0; nr < table->s->total_keys ; nr++)
|
|
{
|
|
int direction;
|
|
ha_rows select_limit= select_limit_arg;
|
|
ha_rows estimated_rows_to_scan;
|
|
uint used_key_parts= 0;
|
|
double range_cost, range_rows;
|
|
Json_writer_object possible_key(thd);
|
|
possible_key.add("index", table->key_info[nr].name);
|
|
|
|
if (keys.is_set(nr) &&
|
|
(direction= test_if_order_by_key(join, order, table, nr,
|
|
&used_key_parts)))
|
|
{
|
|
/*
|
|
At this point we are sure that ref_key is a non-ordering
|
|
key (where "ordering key" is a key that will return rows
|
|
in the order required by ORDER BY).
|
|
*/
|
|
DBUG_ASSERT (ref_key != (int) nr);
|
|
|
|
possible_key.add("can_resolve_order", true);
|
|
possible_key.add("direction", direction);
|
|
bool is_covering= (table->covering_keys.is_set(nr) ||
|
|
table->is_clustering_key(nr));
|
|
/*
|
|
Don't use an index scan with ORDER BY without limit.
|
|
For GROUP BY without limit always use index scan
|
|
if there is a suitable index.
|
|
Why we hold to this asymmetry hardly can be explained
|
|
rationally. It's easy to demonstrate that using
|
|
temporary table + filesort could be cheaper for grouping
|
|
queries too.
|
|
*/
|
|
if (is_covering || has_limit ||
|
|
(ref_key < 0 && (group || table->force_index)))
|
|
{
|
|
double rec_per_key;
|
|
if (group)
|
|
{
|
|
/*
|
|
Used_key_parts can be larger than keyinfo->user_defined_key_parts
|
|
when using a secondary index clustered with a primary
|
|
key (e.g. as in Innodb).
|
|
See Bug #28591 for details.
|
|
*/
|
|
KEY *keyinfo= table->key_info+nr;
|
|
uint used_index_parts= keyinfo->user_defined_key_parts;
|
|
uint used_pk_parts= 0;
|
|
if (used_key_parts > used_index_parts)
|
|
used_pk_parts= used_key_parts-used_index_parts;
|
|
rec_per_key= used_key_parts ?
|
|
keyinfo->actual_rec_per_key(used_key_parts-1) : 1;
|
|
/* Take into account the selectivity of the used pk prefix */
|
|
if (used_pk_parts)
|
|
{
|
|
/*
|
|
TODO: This code need to be tested with debugger
|
|
- Why set rec_per_key to 1 if we don't have primary key data
|
|
or the full key is used ?
|
|
- If used_pk_parts == 1, we don't take into account that
|
|
the first primary key part could part of the current key.
|
|
*/
|
|
KEY *pkinfo=tab->table->key_info+table->s->primary_key;
|
|
/*
|
|
If the values of of records per key for the prefixes
|
|
of the primary key are considered unknown we assume
|
|
they are equal to 1.
|
|
*/
|
|
if (used_key_parts == pkinfo->user_defined_key_parts ||
|
|
pkinfo->rec_per_key[0] == 0)
|
|
rec_per_key= 1;
|
|
if (rec_per_key > 1)
|
|
{
|
|
rec_per_key*= pkinfo->actual_rec_per_key(used_pk_parts-1);
|
|
rec_per_key/= pkinfo->actual_rec_per_key(0);
|
|
/*
|
|
The value of rec_per_key for the extended key has
|
|
to be adjusted accordingly if some components of
|
|
the secondary key are included in the primary key.
|
|
*/
|
|
for(uint i= 1; i < used_pk_parts; i++)
|
|
{
|
|
if (pkinfo->key_part[i].field->key_start.is_set(nr))
|
|
{
|
|
/*
|
|
We presume here that for any index rec_per_key[i] != 0
|
|
if rec_per_key[0] != 0.
|
|
*/
|
|
DBUG_ASSERT(pkinfo->actual_rec_per_key(i));
|
|
rec_per_key*= pkinfo->actual_rec_per_key(i-1);
|
|
rec_per_key/= pkinfo->actual_rec_per_key(i);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
set_if_bigger(rec_per_key, 1);
|
|
/*
|
|
With a grouping query each group containing on average
|
|
rec_per_key records produces only one row that will
|
|
be included into the result set.
|
|
*/
|
|
if (select_limit > table_records/rec_per_key)
|
|
select_limit= table_records;
|
|
else
|
|
select_limit= (ha_rows) (select_limit*rec_per_key);
|
|
} /* group */
|
|
|
|
/*
|
|
If tab=tk is not the last joined table tn then to get first
|
|
L records from the result set we can expect to retrieve
|
|
only L/fanout(tk,tn) where fanout(tk,tn) says how many
|
|
rows in the record set on average will match each row tk.
|
|
Usually our estimates for fanouts are too pessimistic.
|
|
So the estimate for L/fanout(tk,tn) will be too optimistic
|
|
and as result we'll choose an index scan when using ref/range
|
|
access + filesort will be cheaper.
|
|
*/
|
|
select_limit= double_to_rows(select_limit/fanout);
|
|
set_if_bigger(select_limit, 1);
|
|
|
|
if (select_limit > refkey_rows_estimate)
|
|
estimated_rows_to_scan= table_records;
|
|
else
|
|
estimated_rows_to_scan= (ha_rows) (select_limit *
|
|
(double) table_records /
|
|
(double) refkey_rows_estimate);
|
|
|
|
bool range_scan= get_range_limit_read_cost(tab ? position : 0,
|
|
table,
|
|
nr,
|
|
select_limit,
|
|
estimated_rows_to_scan,
|
|
&range_cost,
|
|
&range_rows);
|
|
if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key
|
|
.add("rows_to_examine", range_rows)
|
|
.add("range_scan", range_scan)
|
|
.add("scan_cost", range_cost);
|
|
}
|
|
|
|
/*
|
|
We will try use the key if:
|
|
- If there is no ref key and no usable keys has yet been found and
|
|
there is either a group by or a FORCE_INDEX
|
|
- If the new cost is better than read_time
|
|
*/
|
|
if (range_cost < read_time)
|
|
{
|
|
read_time= range_cost;
|
|
possible_key.add("chosen", true);
|
|
best_key= nr;
|
|
if (saved_best_key_parts)
|
|
*saved_best_key_parts= used_key_parts;
|
|
if (new_used_key_parts)
|
|
*new_used_key_parts= table->s->key_info[nr].user_defined_key_parts;
|
|
best_key_direction= direction;
|
|
best_select_limit= estimated_rows_to_scan;
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key
|
|
.add("usable", false)
|
|
.add("cause", "cost");
|
|
}
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
possible_key.add("usable", false);
|
|
if (!group && select_limit == HA_POS_ERROR)
|
|
possible_key.add("cause", "order by without limit");
|
|
}
|
|
}
|
|
else if (unlikely(possible_key.trace_started()))
|
|
{
|
|
if (keys.is_set(nr))
|
|
{
|
|
possible_key.
|
|
add("can_resolve_order", false).
|
|
add("cause", "order can not be resolved by key");
|
|
}
|
|
else
|
|
{
|
|
possible_key.
|
|
add("can_resolve_order", false).
|
|
add("cause", "not usable index for the query");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (best_key < 0 || best_key == ref_key)
|
|
DBUG_RETURN(FALSE);
|
|
|
|
*new_key= best_key;
|
|
*new_key_direction= best_key_direction;
|
|
*new_select_limit= has_limit ? best_select_limit : table_records;
|
|
*new_read_time= read_time;
|
|
DBUG_RETURN(TRUE);
|
|
}
|
|
|
|
|
|
/**
|
|
Find a key to apply single table UPDATE/DELETE by a given ORDER
|
|
|
|
@param order Linked list of ORDER BY arguments
|
|
@param table Table to find a key
|
|
@param select Pointer to access/update select->quick (if any)
|
|
@param limit LIMIT clause parameter
|
|
@param [out] scanned_limit How many records we expect to scan
|
|
Valid if *need_sort=FALSE.
|
|
@param [out] need_sort TRUE if filesort needed
|
|
@param [out] reverse
|
|
TRUE if the key is reversed again given ORDER (undefined if key == MAX_KEY)
|
|
|
|
@return
|
|
- MAX_KEY if no key found (need_sort == TRUE)
|
|
- MAX_KEY if quick select result order is OK (need_sort == FALSE)
|
|
- key number (either index scan or quick select) (need_sort == FALSE)
|
|
|
|
@note
|
|
Side effects:
|
|
- may deallocate or deallocate and replace select->quick;
|
|
- may set table->opt_range_condition_rows and table->quick_rows[...]
|
|
to table->file->stats.records.
|
|
*/
|
|
|
|
uint get_index_for_order(ORDER *order, TABLE *table, SQL_SELECT *select,
|
|
ha_rows limit, ha_rows *scanned_limit,
|
|
bool *need_sort, bool *reverse)
|
|
{
|
|
if (!order)
|
|
{
|
|
*need_sort= FALSE;
|
|
if (select && select->quick)
|
|
return select->quick->index; // index or MAX_KEY, use quick select as is
|
|
else
|
|
return table->file->key_used_on_scan; // MAX_KEY or index for some engine
|
|
}
|
|
|
|
if (!is_simple_order(order)) // just to cut further expensive checks
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
if (select && select->quick)
|
|
{
|
|
if (select->quick->index == MAX_KEY)
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
uint used_key_parts;
|
|
switch (test_if_order_by_key(NULL, order, table, select->quick->index,
|
|
&used_key_parts)) {
|
|
case 1: // desired order
|
|
*need_sort= FALSE;
|
|
*scanned_limit= MY_MIN(limit, select->quick->records);
|
|
return select->quick->index;
|
|
case 0: // unacceptable order
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
case -1: // desired order, but opposite direction
|
|
{
|
|
QUICK_SELECT_I *reverse_quick;
|
|
if ((reverse_quick=
|
|
select->quick->make_reverse(used_key_parts)))
|
|
{
|
|
select->set_quick(reverse_quick);
|
|
*need_sort= FALSE;
|
|
*scanned_limit= MY_MIN(limit, select->quick->records);
|
|
return select->quick->index;
|
|
}
|
|
else
|
|
{
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
}
|
|
}
|
|
DBUG_ASSERT(0);
|
|
}
|
|
else if (limit != HA_POS_ERROR)
|
|
{
|
|
// check if some index scan & LIMIT is more efficient than filesort
|
|
|
|
/*
|
|
Update opt_range_condition_rows since single table UPDATE/DELETE
|
|
procedures don't call make_join_statistics() and leave this
|
|
variable uninitialized.
|
|
*/
|
|
table->opt_range_condition_rows= table->stat_records();
|
|
|
|
int key, direction;
|
|
double new_cost;
|
|
if (test_if_cheaper_ordering(FALSE, NULL, order, table,
|
|
table->keys_in_use_for_order_by, -1, limit,
|
|
&key, &direction, &limit, &new_cost) &&
|
|
!is_key_used(table, key, table->write_set))
|
|
{
|
|
*need_sort= FALSE;
|
|
*scanned_limit= limit;
|
|
*reverse= (direction < 0);
|
|
return key;
|
|
}
|
|
}
|
|
*need_sort= TRUE;
|
|
return MAX_KEY;
|
|
}
|
|
|
|
|
|
/*
|
|
Count how many times the specified conditions are true for first rows_to_read
|
|
rows of the table.
|
|
|
|
@param thd Thread handle
|
|
@param rows_to_read How many rows to sample
|
|
@param table Table to use
|
|
@conds conds INOUT List of conditions and counters for them
|
|
|
|
@return Number of we've checked. It can be equal or less than rows_to_read.
|
|
0 is returned for error or when the table had no rows.
|
|
*/
|
|
|
|
ulong check_selectivity(THD *thd,
|
|
ulong rows_to_read,
|
|
TABLE *table,
|
|
List<COND_STATISTIC> *conds)
|
|
{
|
|
ulong count= 0;
|
|
COND_STATISTIC *cond;
|
|
List_iterator_fast<COND_STATISTIC> it(*conds);
|
|
handler *file= table->file;
|
|
uchar *record= table->record[0];
|
|
int error= 0;
|
|
DBUG_ENTER("check_selectivity");
|
|
|
|
DBUG_ASSERT(rows_to_read > 0);
|
|
while ((cond= it++))
|
|
{
|
|
DBUG_ASSERT(cond->cond);
|
|
DBUG_ASSERT(cond->cond->used_tables() == table->map);
|
|
cond->positive= 0;
|
|
}
|
|
it.rewind();
|
|
|
|
if (unlikely(file->ha_rnd_init_with_error(1)))
|
|
DBUG_RETURN(0);
|
|
do
|
|
{
|
|
error= file->ha_rnd_next(record);
|
|
|
|
if (unlikely(thd->killed))
|
|
{
|
|
thd->send_kill_message();
|
|
count= 0;
|
|
goto err;
|
|
}
|
|
if (unlikely(error))
|
|
{
|
|
if (error == HA_ERR_END_OF_FILE)
|
|
break;
|
|
goto err;
|
|
}
|
|
|
|
count++;
|
|
while ((cond= it++))
|
|
{
|
|
if (cond->cond->val_bool())
|
|
cond->positive++;
|
|
}
|
|
it.rewind();
|
|
|
|
} while (count < rows_to_read);
|
|
|
|
file->ha_rnd_end();
|
|
DBUG_RETURN(count);
|
|
|
|
err:
|
|
DBUG_PRINT("error", ("error %d", error));
|
|
file->ha_rnd_end();
|
|
DBUG_RETURN(0);
|
|
}
|
|
|
|
/****************************************************************************
|
|
AGGR_OP implementation
|
|
****************************************************************************/
|
|
|
|
/**
|
|
@brief Instantiate tmp table for aggregation and start index scan if needed
|
|
@todo Tmp table always would be created, even for empty result. Extend
|
|
executor to avoid tmp table creation when no rows were written
|
|
into tmp table.
|
|
@return
|
|
true error
|
|
false ok
|
|
*/
|
|
|
|
bool
|
|
AGGR_OP::prepare_tmp_table()
|
|
{
|
|
TABLE *table= join_tab->table;
|
|
JOIN *join= join_tab->join;
|
|
int rc= 0;
|
|
|
|
if (!join_tab->table->is_created())
|
|
{
|
|
if (instantiate_tmp_table(table, join_tab->tmp_table_param->keyinfo,
|
|
join_tab->tmp_table_param->start_recinfo,
|
|
&join_tab->tmp_table_param->recinfo,
|
|
join->select_options))
|
|
return true;
|
|
(void) table->file->extra(HA_EXTRA_WRITE_CACHE);
|
|
}
|
|
/* If it wasn't already, start index scan for grouping using table index. */
|
|
if (!table->file->inited && table->group &&
|
|
join_tab->tmp_table_param->sum_func_count && table->s->keys)
|
|
rc= table->file->ha_index_init(0, 0);
|
|
else
|
|
{
|
|
/* Start index scan in scanning mode */
|
|
rc= table->file->ha_rnd_init(true);
|
|
}
|
|
if (rc)
|
|
{
|
|
table->file->print_error(rc, MYF(0));
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Prepare table if necessary and call write_func to save record
|
|
|
|
@param end_of_records the end_of_record signal to pass to the writer
|
|
|
|
@return return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
AGGR_OP::put_record(bool end_of_records)
|
|
{
|
|
// Lasy tmp table creation/initialization
|
|
if (!join_tab->table->file->inited)
|
|
if (prepare_tmp_table())
|
|
return NESTED_LOOP_ERROR;
|
|
enum_nested_loop_state rc= (*write_func)(join_tab->join, join_tab,
|
|
end_of_records);
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Finish rnd/index scan after accumulating records, switch ref_array,
|
|
and send accumulated records further.
|
|
@return return one of enum_nested_loop_state.
|
|
*/
|
|
|
|
enum_nested_loop_state
|
|
AGGR_OP::end_send()
|
|
{
|
|
enum_nested_loop_state rc= NESTED_LOOP_OK;
|
|
TABLE *table= join_tab->table;
|
|
JOIN *join= join_tab->join;
|
|
|
|
// All records were stored, send them further
|
|
int tmp, new_errno= 0;
|
|
|
|
if ((rc= put_record(true)) < NESTED_LOOP_OK)
|
|
return rc;
|
|
|
|
if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
|
|
{
|
|
DBUG_PRINT("error",("extra(HA_EXTRA_NO_CACHE) failed"));
|
|
new_errno= tmp;
|
|
}
|
|
if ((tmp= table->file->ha_index_or_rnd_end()))
|
|
{
|
|
DBUG_PRINT("error",("ha_index_or_rnd_end() failed"));
|
|
new_errno= tmp;
|
|
}
|
|
if (new_errno)
|
|
{
|
|
table->file->print_error(new_errno,MYF(0));
|
|
return NESTED_LOOP_ERROR;
|
|
}
|
|
|
|
// Update ref array
|
|
join_tab->join->set_items_ref_array(*join_tab->ref_array);
|
|
bool keep_last_filesort_result = join_tab->filesort ? false : true;
|
|
if (join_tab->window_funcs_step)
|
|
{
|
|
if (join_tab->window_funcs_step->exec(join, keep_last_filesort_result))
|
|
return NESTED_LOOP_ERROR;
|
|
}
|
|
|
|
table->reginfo.lock_type= TL_UNLOCK;
|
|
|
|
bool in_first_read= true;
|
|
|
|
/*
|
|
Reset the counter before copying rows from internal temporary table to
|
|
INSERT table.
|
|
*/
|
|
join_tab->join->thd->get_stmt_da()->reset_current_row_for_warning(1);
|
|
while (rc == NESTED_LOOP_OK)
|
|
{
|
|
int error;
|
|
if (in_first_read)
|
|
{
|
|
in_first_read= false;
|
|
error= join_init_read_record(join_tab);
|
|
}
|
|
else
|
|
error= join_tab->read_record.read_record();
|
|
|
|
if (unlikely(error > 0 || (join->thd->is_error()))) // Fatal error
|
|
rc= NESTED_LOOP_ERROR;
|
|
else if (error < 0)
|
|
break;
|
|
else if (unlikely(join->thd->killed)) // Aborted by user
|
|
{
|
|
join->thd->send_kill_message();
|
|
rc= NESTED_LOOP_KILLED;
|
|
}
|
|
else
|
|
{
|
|
rc= evaluate_join_record(join, join_tab, 0);
|
|
}
|
|
}
|
|
|
|
if (keep_last_filesort_result)
|
|
{
|
|
delete join_tab->filesort_result;
|
|
join_tab->filesort_result= NULL;
|
|
}
|
|
|
|
// Finish rnd scn after sending records
|
|
if (join_tab->table->file->inited)
|
|
join_tab->table->file->ha_rnd_end();
|
|
|
|
return rc;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Remove marked top conjuncts of a condition
|
|
|
|
@param thd The thread handle
|
|
@param cond The condition which subformulas are to be removed
|
|
|
|
@details
|
|
The function removes all top conjuncts marked with the flag
|
|
MARKER_FULL_EXTRACTION from the condition 'cond'. The resulting
|
|
formula is returned a the result of the function
|
|
If 'cond' s marked with such flag the function returns 0.
|
|
The function clear the extraction flags for the removed
|
|
formulas
|
|
|
|
@retval
|
|
condition without removed subformulas
|
|
0 if the whole 'cond' is removed
|
|
*/
|
|
|
|
Item *remove_pushed_top_conjuncts(THD *thd, Item *cond)
|
|
{
|
|
if (cond->get_extraction_flag() == MARKER_FULL_EXTRACTION)
|
|
{
|
|
cond->clear_extraction_flag();
|
|
return 0;
|
|
}
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item= li++))
|
|
{
|
|
if (item->get_extraction_flag() == MARKER_FULL_EXTRACTION)
|
|
{
|
|
item->clear_extraction_flag();
|
|
li.remove();
|
|
}
|
|
}
|
|
switch (((Item_cond*) cond)->argument_list()->elements)
|
|
{
|
|
case 0:
|
|
return 0;
|
|
case 1:
|
|
return ((Item_cond*) cond)->argument_list()->head();
|
|
default:
|
|
return cond;
|
|
}
|
|
}
|
|
}
|
|
return cond;
|
|
}
|
|
|
|
|
|
/*
|
|
There are 5 cases in which we shortcut the join optimization process as we
|
|
conclude that the join would be a degenerate one
|
|
1) IMPOSSIBLE WHERE
|
|
2) MIN/MAX optimization (@see opt_sum_query)
|
|
3) EMPTY CONST TABLE
|
|
If a window function is present in any of the above cases then to get the
|
|
result of the window function, we need to execute it. So we need to
|
|
create a temporary table for its execution. Here we need to take in mind
|
|
that aggregate functions and non-aggregate function need not be executed.
|
|
|
|
*/
|
|
|
|
void JOIN::handle_implicit_grouping_with_window_funcs()
|
|
{
|
|
if (select_lex->have_window_funcs() && send_row_on_empty_set())
|
|
{
|
|
const_tables= top_join_tab_count= table_count= 0;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
@brief
|
|
Perform a partial cleanup for the JOIN_TAB structure
|
|
|
|
@note
|
|
this is used to cleanup resources for the re-execution of correlated
|
|
subqueries.
|
|
*/
|
|
void JOIN_TAB::partial_cleanup()
|
|
{
|
|
if (!table)
|
|
return;
|
|
|
|
if (table->is_created())
|
|
{
|
|
table->file->ha_index_or_rnd_end();
|
|
DBUG_PRINT("info", ("close index: %s.%s alias: %s",
|
|
table->s->db.str,
|
|
table->s->table_name.str,
|
|
table->alias.c_ptr()));
|
|
if (aggr)
|
|
{
|
|
int tmp= 0;
|
|
if ((tmp= table->file->extra(HA_EXTRA_NO_CACHE)))
|
|
table->file->print_error(tmp, MYF(0));
|
|
}
|
|
}
|
|
delete filesort_result;
|
|
filesort_result= NULL;
|
|
free_cache(&read_record);
|
|
}
|
|
|
|
/**
|
|
@brief
|
|
Construct not null conditions for provingly not nullable fields
|
|
|
|
@details
|
|
For each non-constant joined table the function creates a conjunction
|
|
of IS NOT NULL predicates containing a predicate for each field used
|
|
in the WHERE clause or an OR expression such that
|
|
- is declared as nullable
|
|
- for which it can proved be that it is null-rejected
|
|
- is a part of some index.
|
|
This conjunction could be anded with either the WHERE condition or with
|
|
an ON expression and the modified join query would produce the same
|
|
result set as the original one.
|
|
If a conjunction of IS NOT NULL predicates is constructed for an inner
|
|
table of an outer join OJ that is not an inner table of embedded outer
|
|
joins then it is to be anded with the ON expression of OJ.
|
|
The constructed conjunctions of IS NOT NULL predicates are attached
|
|
to the corresponding tables. They used for range analysis complementary
|
|
to other sargable range conditions.
|
|
|
|
@note
|
|
Let f be a field of the joined table t. In the context of the upper
|
|
paragraph field f is called null-rejected if any the following holds:
|
|
|
|
- t is a table of a top inner join and a conjunctive formula that rejects
|
|
rows with null values for f can be extracted from the WHERE condition
|
|
|
|
- t is an outer table of a top outer join operation and a conjunctive
|
|
formula over the outer tables of the outer join that rejects rows with
|
|
null values for can be extracted from the WHERE condition
|
|
|
|
- t is an outer table of a non-top outer join operation and a conjunctive
|
|
formula over the outer tables of the outer join that rejects rows with
|
|
null values for f can be extracted from the ON expression of the
|
|
embedding outer join
|
|
|
|
- the joined table is an inner table of a outer join operation and
|
|
a conjunctive formula over inner tables of the outer join that rejects
|
|
rows with null values for f can be extracted from the ON expression of
|
|
the outer join operation.
|
|
|
|
It is assumed above that all inner join nests have been eliminated and
|
|
that all possible conversions of outer joins into inner joins have been
|
|
already done.
|
|
*/
|
|
|
|
void JOIN::make_notnull_conds_for_range_scans()
|
|
{
|
|
DBUG_ENTER("JOIN::make_notnull_conds_for_range_scans");
|
|
|
|
if (impossible_where ||
|
|
!optimizer_flag(thd, OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN))
|
|
{
|
|
/* Complementary range analysis is not needed */
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
if (conds && build_notnull_conds_for_range_scans(this, conds,
|
|
conds->used_tables()))
|
|
{
|
|
/*
|
|
Found a IS NULL conjunctive predicate for a null-rejected field
|
|
in the WHERE clause
|
|
*/
|
|
conds= (Item*) Item_false;
|
|
cond_equal= 0;
|
|
impossible_where= true;
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
TABLE_LIST *tbl;
|
|
while ((tbl= li++))
|
|
{
|
|
if (tbl->on_expr)
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
build_notnull_conds_for_inner_nest_of_outer_join(this, tbl);
|
|
}
|
|
else if (build_notnull_conds_for_range_scans(this, tbl->on_expr,
|
|
tbl->table->map))
|
|
{
|
|
/*
|
|
Found a IS NULL conjunctive predicate for a null-rejected field
|
|
of the inner table of an outer join with ON expression tbl->on_expr
|
|
*/
|
|
tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
}
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Build not null conditions for range scans of given join tables
|
|
|
|
@param join the join for whose tables not null conditions are to be built
|
|
@param cond the condition from which not null predicates are to be inferred
|
|
@param allowed the bit map of join tables to be taken into account
|
|
|
|
@details
|
|
For each join table t from the 'allowed' set of tables the function finds
|
|
all fields whose null-rejectedness can be inferred from null-rejectedness
|
|
of the condition cond. For each found field f from table t such that it
|
|
participates at least in one index on table t a NOT NULL predicate is
|
|
constructed and a conjunction of all such predicates is attached to t.
|
|
If when looking for null-rejecting fields of t it is discovered one of its
|
|
fields has to be null-rejected and there is IS NULL conjunctive top level
|
|
predicate for this field then the function immediately returns true.
|
|
The function uses the bitmap TABLE::tmp_set to mark found null-rejected
|
|
fields of table t.
|
|
|
|
@note
|
|
Currently only top level conjuncts without disjunctive sub-formulas are
|
|
are taken into account when looking for null-rejected fields.
|
|
|
|
@retval
|
|
true if a contradiction is inferred
|
|
false otherwise
|
|
*/
|
|
|
|
static
|
|
bool build_notnull_conds_for_range_scans(JOIN *join, Item *cond,
|
|
table_map allowed)
|
|
{
|
|
THD *thd= join->thd;
|
|
DBUG_ENTER("build_notnull_conds_for_range_scans");
|
|
|
|
for (JOIN_TAB *s= join->join_tab;
|
|
s < join->join_tab + join->table_count ; s++)
|
|
{
|
|
/* Clear all needed bitmaps to mark found fields */
|
|
if ((allowed & s->table->map) &&
|
|
!(s->table->map & join->const_table_map))
|
|
bitmap_clear_all(&s->table->tmp_set);
|
|
}
|
|
|
|
/*
|
|
Find all null-rejected fields assuming that cond is null-rejected and
|
|
only formulas over tables from 'allowed' are to be taken into account
|
|
*/
|
|
if (cond->find_not_null_fields(allowed))
|
|
DBUG_RETURN(true);
|
|
|
|
/*
|
|
For each table t from 'allowed' build a conjunction of NOT NULL predicates
|
|
constructed for all found fields if they are included in some indexes.
|
|
If the construction of the conjunction succeeds attach the formula to
|
|
t->table->notnull_cond. The condition will be used to look for
|
|
complementary range scans.
|
|
*/
|
|
for (JOIN_TAB *s= join->join_tab ;
|
|
s < join->join_tab + join->table_count ; s++)
|
|
{
|
|
TABLE *tab= s->table;
|
|
List<Item> notnull_list;
|
|
Item *notnull_cond= 0;
|
|
|
|
if (!(allowed & tab->map) ||
|
|
(s->table->map && join->const_table_map))
|
|
continue;
|
|
|
|
for (Field** field_ptr= tab->field; *field_ptr; field_ptr++)
|
|
{
|
|
Field *field= *field_ptr;
|
|
if (field->part_of_key.is_clear_all())
|
|
continue;
|
|
if (!bitmap_is_set(&tab->tmp_set, field->field_index))
|
|
continue;
|
|
Item_field *field_item= new (thd->mem_root) Item_field(thd, field);
|
|
if (!field_item)
|
|
continue;
|
|
Item *isnotnull_item=
|
|
new (thd->mem_root) Item_func_isnotnull(thd, field_item);
|
|
if (!isnotnull_item)
|
|
continue;
|
|
if (notnull_list.push_back(isnotnull_item, thd->mem_root))
|
|
continue;
|
|
s->const_keys.merge(field->part_of_key);
|
|
}
|
|
|
|
switch (notnull_list.elements) {
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
notnull_cond= notnull_list.head();
|
|
break;
|
|
default:
|
|
notnull_cond=
|
|
new (thd->mem_root) Item_cond_and(thd, notnull_list);
|
|
}
|
|
if (notnull_cond && !notnull_cond->fix_fields(thd, 0))
|
|
{
|
|
tab->notnull_cond= notnull_cond;
|
|
}
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Build not null conditions for inner nest tables of an outer join
|
|
|
|
@param join the join for whose table nest not null conditions are to be
|
|
built
|
|
@param nest_tbl the nest of the inner tables of an outer join
|
|
|
|
@details
|
|
The function assumes that nest_tbl is the nest of the inner tables
|
|
of an outer join and so an ON expression for this outer join is
|
|
attached to nest_tbl.
|
|
The function selects the tables of the nest_tbl that are not inner
|
|
tables of embedded outer joins and then it calls
|
|
build_notnull_conds_for_range_scans() for nest_tbl->on_expr and
|
|
the bitmap for the selected tables. This call finds all fields
|
|
belonging to the selected tables whose null-rejectedness can be
|
|
inferred from the null-rejectedness of nest_tbl->on_expr. After
|
|
this the function recursively finds all null_rejected fields for
|
|
the remaining tables from the nest of nest_tbl.
|
|
*/
|
|
|
|
static
|
|
void build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join,
|
|
TABLE_LIST *nest_tbl)
|
|
{
|
|
TABLE_LIST *tbl;
|
|
table_map used_tables= 0;
|
|
List_iterator<TABLE_LIST> li(nest_tbl->nested_join->join_list);
|
|
|
|
while ((tbl= li++))
|
|
{
|
|
if (!tbl->on_expr)
|
|
used_tables|= tbl->table->map;
|
|
}
|
|
if (used_tables &&
|
|
build_notnull_conds_for_range_scans(join, nest_tbl->on_expr, used_tables))
|
|
{
|
|
nest_tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
|
|
li.rewind();
|
|
while ((tbl= li++))
|
|
{
|
|
if (tbl->on_expr)
|
|
{
|
|
if (tbl->nested_join)
|
|
{
|
|
build_notnull_conds_for_inner_nest_of_outer_join(join, tbl);
|
|
}
|
|
else if (build_notnull_conds_for_range_scans(join, tbl->on_expr,
|
|
tbl->table->map))
|
|
tbl->on_expr= (Item*) Item_false;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Initialize join cache and enable keyread
|
|
*/
|
|
void JOIN::init_join_cache_and_keyread()
|
|
{
|
|
JOIN_TAB *tab;
|
|
for (tab= first_linear_tab(this, WITH_BUSH_ROOTS, WITHOUT_CONST_TABLES);
|
|
tab;
|
|
tab= next_linear_tab(this, tab, WITH_BUSH_ROOTS))
|
|
{
|
|
TABLE *table= tab->table;
|
|
switch (tab->type) {
|
|
case JT_SYSTEM:
|
|
case JT_CONST:
|
|
case JT_FT:
|
|
case JT_UNKNOWN:
|
|
case JT_MAYBE_REF:
|
|
break;
|
|
case JT_EQ_REF:
|
|
case JT_REF_OR_NULL:
|
|
case JT_REF:
|
|
if (table->covering_keys.is_set(tab->ref.key) && !table->no_keyread)
|
|
table->file->ha_start_keyread(tab->ref.key);
|
|
break;
|
|
case JT_HASH:
|
|
case JT_ALL:
|
|
case JT_RANGE:
|
|
SQL_SELECT *select;
|
|
select= tab->select ? tab->select :
|
|
(tab->filesort ? tab->filesort->select : NULL);
|
|
if (select && select->quick && select->quick->index != MAX_KEY &&
|
|
table->covering_keys.is_set(select->quick->index) &&
|
|
!table->no_keyread)
|
|
table->file->ha_start_keyread(select->quick->index);
|
|
break;
|
|
case JT_HASH_NEXT:
|
|
case JT_NEXT:
|
|
if ((tab->read_first_record == join_read_first ||
|
|
tab->read_first_record == join_read_last) &&
|
|
table->covering_keys.is_set(tab->index) &&
|
|
!table->no_keyread)
|
|
{
|
|
DBUG_ASSERT(!tab->filesort);
|
|
table->file->ha_start_keyread(tab->index);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
/* purecov: end */
|
|
}
|
|
|
|
if (table->file->keyread_enabled() &&
|
|
!table->is_clustering_key(table->file->keyread))
|
|
{
|
|
/*
|
|
Here we set the read_set bitmap for all covering keys
|
|
except CLUSTERED indexes, with all the key-parts inside the key.
|
|
This is needed specifically for an index that contains virtual column.
|
|
|
|
Example:
|
|
Lets say we have this query
|
|
SELECT b FROM t1;
|
|
|
|
and the table definition is like
|
|
CREATE TABLE t1(
|
|
a varchar(10) DEFAULT NULL,
|
|
b varchar(255) GENERATED ALWAYS AS (a) VIRTUAL,
|
|
KEY key1 (b));
|
|
|
|
So we a virtual column b and an index key1 defined on the virtual
|
|
column. So if a query uses a vcol, base columns that it
|
|
depends on are automatically added to the read_set - because they're
|
|
needed to calculate the vcol.
|
|
But if we're doing keyread, vcol is taken
|
|
from the index, not calculated, and base columns do not need to be
|
|
in the read set. To ensure this we try to set the read_set to only
|
|
the key-parts of the indexes.
|
|
|
|
Another side effect of this is
|
|
Lets say you have a query
|
|
select a, b from t1
|
|
and there is an index key1 (a,b,c)
|
|
then as key1 is covering and we would have the keyread enable for
|
|
this key, so the below call will also set the read_set for column
|
|
c, which is not a problem as we read all the columns from the index
|
|
tuple.
|
|
*/
|
|
table->mark_index_columns(table->file->keyread, table->read_set);
|
|
}
|
|
bool init_for_explain= false;
|
|
|
|
/*
|
|
Can we use lightweight initalization mode just for EXPLAINs? We can if
|
|
we're certain that the optimizer will not execute the subquery.
|
|
The optimzier will not execute the subquery if it's too expensive. For
|
|
the exact criteria, see Item_subselect::is_expensive().
|
|
Note that the subquery might be a UNION and we might not yet know if it
|
|
is expensive.
|
|
What we do know is that if this SELECT is too expensive, then the whole
|
|
subquery will be too expensive as well.
|
|
So, we can use lightweight initialization (init_for_explain=true) if this
|
|
SELECT examines more than @@expensive_subquery_limit rows.
|
|
*/
|
|
if ((select_options & SELECT_DESCRIBE) &&
|
|
get_examined_rows() >= thd->variables.expensive_subquery_limit)
|
|
{
|
|
init_for_explain= true;
|
|
}
|
|
if (tab->cache && tab->cache->init(init_for_explain))
|
|
revise_cache_usage(tab);
|
|
else
|
|
tab->remove_redundant_bnl_scan_conds();
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief
|
|
Unpack temp table fields to base table fields.
|
|
*/
|
|
|
|
void unpack_to_base_table_fields(TABLE *table)
|
|
{
|
|
JOIN_TAB *tab= table->reginfo.join_tab;
|
|
for (Copy_field *cp= tab->read_record.copy_field;
|
|
cp != tab->read_record.copy_field_end; cp++)
|
|
(*cp->do_copy)(cp);
|
|
}
|
|
|
|
/*
|
|
Call item->fix_after_optimize for all items registered in
|
|
lex->fix_after_optimize
|
|
|
|
This is needed for items like ROWNUM(), which needs access to structures
|
|
created by the early optimizer pass, like JOIN
|
|
*/
|
|
|
|
static void fix_items_after_optimize(THD *thd, SELECT_LEX *select_lex)
|
|
{
|
|
List_iterator<Item> li(select_lex->fix_after_optimize);
|
|
|
|
while (Item *item= li++)
|
|
item->fix_after_optimize(thd);
|
|
}
|
|
|
|
|
|
/*
|
|
Set a limit for the SELECT_LEX_UNIT based on ROWNUM usage.
|
|
The limit is shown in EXPLAIN
|
|
*/
|
|
|
|
static bool set_limit_for_unit(THD *thd, SELECT_LEX_UNIT *unit, ha_rows lim)
|
|
{
|
|
SELECT_LEX *gpar= unit->global_parameters();
|
|
if (gpar->limit_params.select_limit != 0 &&
|
|
// limit can not be an expression but can be parameter
|
|
(!gpar->limit_params.select_limit->basic_const_item() ||
|
|
((ha_rows)gpar->limit_params.select_limit->val_int()) < lim))
|
|
return false;
|
|
|
|
Query_arena *arena, backup;
|
|
arena= thd->activate_stmt_arena_if_needed(&backup);
|
|
|
|
gpar->limit_params.select_limit=
|
|
new (thd->mem_root) Item_int(thd, lim, MAX_BIGINT_WIDTH);
|
|
if (gpar->limit_params.select_limit == 0)
|
|
return true; // EOM
|
|
|
|
unit->set_limit(gpar);
|
|
|
|
gpar->limit_params.explicit_limit= true; // to show in EXPLAIN
|
|
|
|
if (arena)
|
|
thd->restore_active_arena(arena, &backup);
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
/**
|
|
Check possibility of LIMIT setting by rownum() of upper SELECT and do it
|
|
|
|
@note Ideal is to convert something like
|
|
SELECT ...
|
|
FROM (SELECT ...) table
|
|
WHERE rownum() < <CONSTANT>;
|
|
to
|
|
SELECT ...
|
|
FROM (SELECT ... LIMIT <CONSTANT>) table
|
|
WHERE rownum() < <CONSTANT>;
|
|
|
|
@retval true EOM
|
|
@retval false no errors
|
|
*/
|
|
|
|
bool JOIN::optimize_upper_rownum_func()
|
|
{
|
|
DBUG_ASSERT(select_lex->master_unit()->derived);
|
|
|
|
if (select_lex->master_unit()->first_select() != select_lex)
|
|
return false; // first will set parameter
|
|
|
|
if (select_lex->master_unit()->global_parameters()->
|
|
limit_params.offset_limit != NULL)
|
|
return false; // offset is set, we cannot set limit
|
|
|
|
SELECT_LEX *outer_select= select_lex->master_unit()->outer_select();
|
|
/*
|
|
Check that it is safe to use rownum-limit from the outer query
|
|
(the one that has 'WHERE rownum()...')
|
|
*/
|
|
if (outer_select == NULL ||
|
|
!outer_select->with_rownum ||
|
|
(outer_select->options & SELECT_DISTINCT) ||
|
|
outer_select->table_list.elements != 1 ||
|
|
outer_select->where == NULL ||
|
|
outer_select->where->type() != Item::FUNC_ITEM)
|
|
return false;
|
|
|
|
return process_direct_rownum_comparison(thd, unit, outer_select->where);
|
|
}
|
|
|
|
|
|
/**
|
|
Test if the predicate compares rownum() with a constant
|
|
|
|
@return 1 No or invalid rownum() compare
|
|
@return 0 rownum() is compared with a constant.
|
|
In this case *args contains the constant and
|
|
*inv_order constains 1 if the rownum() was the right
|
|
argument, like in 'WHERE 2 >= rownum()'.
|
|
*/
|
|
|
|
static bool check_rownum_usage(Item_func *func_item, longlong *limit,
|
|
bool *inv_order)
|
|
{
|
|
Item *arg1, *arg2;
|
|
*inv_order= 0;
|
|
DBUG_ASSERT(func_item->argument_count() == 2);
|
|
|
|
/* 'rownum op const' or 'const op field' */
|
|
arg1= func_item->arguments()[0]->real_item();
|
|
if (arg1->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) arg1)->functype() == Item_func::ROWNUM_FUNC)
|
|
{
|
|
arg2= func_item->arguments()[1]->real_item();
|
|
if (arg2->can_eval_in_optimize())
|
|
{
|
|
*limit= arg2->val_int();
|
|
return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR;
|
|
}
|
|
}
|
|
else if (arg1->can_eval_in_optimize())
|
|
{
|
|
arg2= func_item->arguments()[1]->real_item();
|
|
if (arg2->type() == Item::FUNC_ITEM &&
|
|
((Item_func*) arg2)->functype() == Item_func::ROWNUM_FUNC)
|
|
{
|
|
*limit= arg1->val_int();
|
|
*inv_order= 1;
|
|
return *limit <= 0 || (ulonglong) *limit >= HA_POS_ERROR;
|
|
}
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
|
|
/*
|
|
Limit optimization for ROWNUM()
|
|
|
|
Go through the WHERE clause and find out if there are any of the following
|
|
constructs on the top level:
|
|
rownum() <= integer_constant
|
|
rownum() < integer_constant
|
|
rownum() = 1
|
|
|
|
If yes, then threat the select as if 'LIMIT integer_constant' would
|
|
have been used
|
|
*/
|
|
|
|
static void optimize_rownum(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond)
|
|
{
|
|
DBUG_ENTER("optimize_rownum");
|
|
|
|
if (cond->type() == Item::COND_ITEM)
|
|
{
|
|
if (((Item_cond*) cond)->functype() == Item_func::COND_AND_FUNC)
|
|
{
|
|
List_iterator<Item> li(*((Item_cond*) cond)->argument_list());
|
|
Item *item;
|
|
while ((item= li++))
|
|
optimize_rownum(thd, unit, item);
|
|
}
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
process_direct_rownum_comparison(thd, unit, cond);
|
|
DBUG_VOID_RETURN;
|
|
}
|
|
|
|
|
|
static bool process_direct_rownum_comparison(THD *thd, SELECT_LEX_UNIT *unit,
|
|
Item *cond)
|
|
{
|
|
DBUG_ENTER("process_direct_rownum_comparison");
|
|
if (cond->real_type() == Item::FUNC_ITEM)
|
|
{
|
|
Item_func *pred= (Item_func*) cond;
|
|
longlong limit;
|
|
bool inv;
|
|
|
|
if (pred->argument_count() != 2)
|
|
DBUG_RETURN(false); // Not a compare functions
|
|
if (check_rownum_usage(pred, &limit, &inv))
|
|
DBUG_RETURN(false);
|
|
|
|
Item_func::Functype pred_type= pred->functype();
|
|
|
|
if (inv && pred_type != Item_func::EQ_FUNC)
|
|
{
|
|
if (pred_type == Item_func::GT_FUNC) // # > rownum()
|
|
pred_type= Item_func::LT_FUNC;
|
|
else if (pred_type == Item_func::GE_FUNC) // # >= rownum()
|
|
pred_type= Item_func::LE_FUNC;
|
|
else
|
|
DBUG_RETURN(false);
|
|
}
|
|
switch (pred_type) {
|
|
case Item_func::LT_FUNC: // rownum() < #
|
|
{
|
|
if (limit <= 0)
|
|
DBUG_RETURN(false);
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit - 1));
|
|
case Item_func::LE_FUNC:
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit));
|
|
case Item_func::EQ_FUNC:
|
|
if (limit == 1)
|
|
DBUG_RETURN(set_limit_for_unit(thd, unit, limit));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
DBUG_RETURN(false);
|
|
}
|
|
|
|
/**
|
|
@brief
|
|
Transform IN predicates having equal constant elements to equalities
|
|
|
|
@param thd The context of the statement
|
|
|
|
@details
|
|
If all elements in an IN predicate are constant and equal to each other
|
|
then clause
|
|
- "a IN (e1,..,en)" can be transformed to "a = e1"
|
|
- "a NOT IN (e1,..,en)" can be transformed to "a != e1".
|
|
This means an object of Item_func_in can be replaced with an object of
|
|
Item_func_eq for IN (e1,..,en) clause or Item_func_ne for
|
|
NOT IN (e1,...,en).
|
|
Such a replacement allows the optimizer to choose a better execution plan.
|
|
|
|
This methods applies such transformation for each IN predicate of the WHERE
|
|
condition and ON expressions of this join where possible
|
|
|
|
@retval
|
|
false success
|
|
true failure
|
|
*/
|
|
bool JOIN::transform_in_predicates_into_equalities(THD *thd)
|
|
{
|
|
DBUG_ENTER("JOIN::transform_in_predicates_into_equalities");
|
|
DBUG_RETURN(transform_all_conds_and_on_exprs(
|
|
thd, &Item::in_predicate_to_equality_transformer));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Rewrite datetime comparison conditions into sargable.
|
|
See details in the description for class Date_cmp_func_rewriter
|
|
*/
|
|
|
|
bool JOIN::transform_date_conds_into_sargable()
|
|
{
|
|
DBUG_ENTER("JOIN::transform_date_conds_into_sargable");
|
|
DBUG_RETURN(transform_all_conds_and_on_exprs(
|
|
thd, &Item::date_conds_transformer));
|
|
}
|
|
|
|
|
|
/**
|
|
@brief
|
|
Transform all items in WHERE and ON expressions using a given transformer
|
|
|
|
@param thd The context of the statement
|
|
transformer Pointer to the transformation function
|
|
|
|
@details
|
|
For each item of the WHERE condition and ON expressions of the SELECT
|
|
for this join the method performs the intransformation using the given
|
|
transformation function
|
|
|
|
@retval
|
|
false success
|
|
true failure
|
|
*/
|
|
bool JOIN::transform_all_conds_and_on_exprs(THD *thd,
|
|
Item_transformer transformer)
|
|
{
|
|
if (conds)
|
|
{
|
|
conds= conds->top_level_transform(thd, transformer, (uchar *) 0);
|
|
if (!conds)
|
|
return true;
|
|
}
|
|
if (join_list)
|
|
{
|
|
if (transform_all_conds_and_on_exprs_in_join_list(thd, join_list,
|
|
transformer))
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool JOIN::transform_all_conds_and_on_exprs_in_join_list(
|
|
THD *thd, List<TABLE_LIST> *join_list, Item_transformer transformer)
|
|
{
|
|
TABLE_LIST *table;
|
|
List_iterator<TABLE_LIST> li(*join_list);
|
|
|
|
while ((table= li++))
|
|
{
|
|
if (table->nested_join)
|
|
{
|
|
if (transform_all_conds_and_on_exprs_in_join_list(
|
|
thd, &table->nested_join->join_list, transformer))
|
|
return true;
|
|
}
|
|
if (table->on_expr)
|
|
{
|
|
table->on_expr= table->on_expr->top_level_transform(thd, transformer, 0);
|
|
if (!table->on_expr)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
static void MYSQL_DML_START(THD *thd)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
|
|
case SQLCOM_UPDATE:
|
|
MYSQL_UPDATE_START(thd->query());
|
|
break;
|
|
case SQLCOM_UPDATE_MULTI:
|
|
MYSQL_MULTI_UPDATE_START(thd->query());
|
|
break;
|
|
case SQLCOM_DELETE:
|
|
MYSQL_DELETE_START(thd->query());
|
|
break;
|
|
case SQLCOM_DELETE_MULTI:
|
|
MYSQL_MULTI_DELETE_START(thd->query());
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
|
|
static void MYSQL_DML_DONE(THD *thd, int rc)
|
|
{
|
|
switch (thd->lex->sql_command) {
|
|
|
|
case SQLCOM_UPDATE:
|
|
MYSQL_UPDATE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_found()),
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_updated()));
|
|
break;
|
|
case SQLCOM_UPDATE_MULTI:
|
|
MYSQL_MULTI_UPDATE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_found()),
|
|
(rc ? 0 :
|
|
((multi_update*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_updated()));
|
|
break;
|
|
case SQLCOM_DELETE:
|
|
MYSQL_DELETE_DONE(rc, (rc ? 0 : (ulong) (thd->get_row_count_func())));
|
|
break;
|
|
case SQLCOM_DELETE_MULTI:
|
|
MYSQL_MULTI_DELETE_DONE(
|
|
rc,
|
|
(rc ? 0 :
|
|
((multi_delete*)(((Sql_cmd_dml*)(thd->lex->m_sql_cmd))->get_result()))
|
|
->num_deleted()));
|
|
break;
|
|
default:
|
|
DBUG_ASSERT(0);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
@brief Perform actions needed before locking tables for a DML statement
|
|
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@details
|
|
This function calls the precheck() procedure fo the processed statement,
|
|
then is opens tables used in the statement and finally it calls the function
|
|
prepare_inner() that is specific for the type of the statement.
|
|
|
|
@note
|
|
The function are used when processing:
|
|
- a DML statement
|
|
- PREPARE stmt FROM <DML "statement>"
|
|
- EXECUTE stmt when stmt is prepared from a DML statement.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::prepare(THD *thd)
|
|
{
|
|
lex= thd->lex;
|
|
SELECT_LEX_UNIT *unit= &lex->unit;
|
|
|
|
DBUG_ASSERT(!is_prepared());
|
|
|
|
// Perform a coarse statement-specific privilege check.
|
|
if (precheck(thd))
|
|
goto err;
|
|
|
|
MYSQL_DML_START(thd);
|
|
|
|
lex->context_analysis_only|= CONTEXT_ANALYSIS_ONLY_DERIVED;
|
|
|
|
if (open_tables_for_query(thd, lex->query_tables, &table_count, 0,
|
|
get_dml_prelocking_strategy()))
|
|
{
|
|
if (thd->is_error())
|
|
goto err;
|
|
(void)unit->cleanup();
|
|
return true;
|
|
}
|
|
|
|
if (prepare_inner(thd))
|
|
goto err;
|
|
|
|
lex->context_analysis_only&= ~CONTEXT_ANALYSIS_ONLY_DERIVED;
|
|
|
|
set_prepared();
|
|
unit->set_prepared();
|
|
|
|
return false;
|
|
|
|
err:
|
|
DBUG_ASSERT(thd->is_error());
|
|
DBUG_PRINT("info", ("report_error: %d", thd->is_error()));
|
|
|
|
(void)unit->cleanup();
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Execute a DML statement
|
|
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@details
|
|
The function assumes that each type of a DML statement has its own
|
|
implementation of the virtunal functions precheck(). It is also
|
|
assumed that that the virtual function execute execute_inner() is to be
|
|
overridden by the implementations for specific commands.
|
|
|
|
@note
|
|
Currently only UPDATE and DELETE statement are executed using this function.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::execute(THD *thd)
|
|
{
|
|
lex = thd->lex;
|
|
bool res;
|
|
|
|
SELECT_LEX_UNIT *unit = &lex->unit;
|
|
SELECT_LEX *select_lex= lex->first_select_lex();
|
|
|
|
if (!is_prepared())
|
|
{
|
|
/*
|
|
This is called when processing
|
|
- a DML statement
|
|
- PREPARE stmt FROM <DML "statement>"
|
|
- EXECUTE stmt when stmt is prepared from a DML statement.
|
|
The call will invoke open_tables_for_query()
|
|
*/
|
|
if (prepare(thd))
|
|
goto err;
|
|
}
|
|
else // This branch currently is never used for DML commands
|
|
{
|
|
if (precheck(thd))
|
|
goto err;
|
|
|
|
MYSQL_DML_START(thd);
|
|
|
|
if (open_tables_for_query(thd, lex->query_tables, &table_count, 0,
|
|
get_dml_prelocking_strategy()))
|
|
goto err;
|
|
}
|
|
|
|
THD_STAGE_INFO(thd, stage_init);
|
|
|
|
/*
|
|
Locking of tables is done after preparation but before optimization.
|
|
This allows to do better partition pruning and avoid locking unused
|
|
partitions. As a consequence, in such a case, prepare stage can rely only
|
|
on metadata about tables used and not data from them.
|
|
*/
|
|
if (!is_empty_query())
|
|
{
|
|
if (lock_tables(thd, lex->query_tables, table_count, 0))
|
|
goto err;
|
|
}
|
|
|
|
unit->set_limit(select_lex);
|
|
|
|
/* Perform statement-specific execution */
|
|
res = execute_inner(thd);
|
|
|
|
if (res)
|
|
goto err;
|
|
|
|
thd->push_final_warnings();
|
|
res= unit->cleanup();
|
|
|
|
/* "Unprepare" this object since unit->cleanup actually unprepares */
|
|
unprepare(thd);
|
|
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
|
|
MYSQL_DML_DONE(thd, res);
|
|
|
|
return res;
|
|
|
|
err:
|
|
DBUG_ASSERT(thd->is_error() || thd->killed);
|
|
MYSQL_DML_DONE(thd, 1);
|
|
THD_STAGE_INFO(thd, stage_end);
|
|
(void)unit->cleanup();
|
|
if (is_prepared())
|
|
unprepare(thd);
|
|
|
|
return thd->is_error();
|
|
}
|
|
|
|
|
|
/**
|
|
@brief Generic implemention of optimization and execution phases
|
|
@param thd global context the processed statement
|
|
@returns false if success, true if error
|
|
|
|
@note
|
|
This implementation assumes that the processed DML statement is represented
|
|
as a SELECT_LEX or SELECT_LEX_UNIT tree with attached corresponding
|
|
JOIN structures. Any JOIN structure is constructed at the prepare phase.
|
|
When created at the top level join it is provided with an object of a class
|
|
derived from select_result_sink. The pointer to the object is saved in
|
|
the this->result field. For different types of DML statements different
|
|
derived classes are used for this object. The class of this object determines
|
|
additional specific actions performed at the phases of context analysis,
|
|
optimization and execution.
|
|
*/
|
|
|
|
bool Sql_cmd_dml::execute_inner(THD *thd)
|
|
{
|
|
SELECT_LEX_UNIT *unit = &lex->unit;
|
|
SELECT_LEX *select_lex= unit->first_select();
|
|
JOIN *join= select_lex->join;
|
|
|
|
if (join->optimize())
|
|
goto err;
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
join->conds_history= join->conds;
|
|
join->having_history= (join->having?join->having:join->tmp_having);
|
|
}
|
|
|
|
if (unlikely(thd->is_error()))
|
|
goto err;
|
|
|
|
if (join->exec())
|
|
goto err;
|
|
|
|
if (thd->lex->describe & DESCRIBE_EXTENDED)
|
|
{
|
|
select_lex->where= join->conds_history;
|
|
select_lex->having= join->having_history;
|
|
}
|
|
|
|
err:
|
|
return join->error;
|
|
}
|
|
|
|
|
|
/**
|
|
@} (end of group Query_Optimizer)
|
|
*/
|